From 64ee2574e02329acb28161931403fa7391e98cec Mon Sep 17 00:00:00 2001 From: Chengyuan Zhang Date: Fri, 11 Jun 2021 14:53:18 -0700 Subject: [PATCH] grpclb: skip fallback if the LB is already in fallback mode (#8253) Manually checks if the gRPCLB policy is already in fallback mode when trying to fallback due to receiving address update without LB addresses. Commit b956f8852d36adc54eb60649d7b82896b399d723 added an invariant check in the FallbackModeTask runnable to ensure the task is fired only when the LB is not already in fallback mode. However, that commit missed the case that receiving address updates without LB addresses can trigger the run of FallbackModeTask runnable, because the existing implementation chose to reuse the code in FallbackModeTask. In such case, running FallbackModeTask could break the invariant check as the LB policy may already in fallback mode. This change eliminates the reuse of FallbackModeTask for handling address update without LB address. That is, every time receiving address update, we manually check if it is already in fallback instead of reusing to FallbackModeTask perform the check. Note there was a discussion brought up whether we should force entering fallback (shutdown existing subchannels) or we should still keep the balancer connection. Different languages have already diverged on this. Go shuts down the balancer connection and all subchannel connections to force using fallback addresses. C-core keep the balancer connection working and does not shutdown subchannels, only let fallback happens after the existing balancer connection and subchannel connections become broken. Java shuts down the balancer connection but not subchannels. This change does not try to change the existing behavior, but only fixes the invariant check breakage. ------------------- See bug reported in b/190700476 --- .../main/java/io/grpc/grpclb/GrpclbState.java | 15 +++++++--- .../grpc/grpclb/GrpclbLoadBalancerTest.java | 29 +++++++++++++++++-- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/grpclb/src/main/java/io/grpc/grpclb/GrpclbState.java b/grpclb/src/main/java/io/grpc/grpclb/GrpclbState.java index 59fa67dc2dc..19bc35b373f 100644 --- a/grpclb/src/main/java/io/grpc/grpclb/GrpclbState.java +++ b/grpclb/src/main/java/io/grpc/grpclb/GrpclbState.java @@ -259,11 +259,19 @@ void handleAddresses( serviceName, newLbAddressGroups, newBackendServers); + fallbackBackendList = newBackendServers; if (newLbAddressGroups.isEmpty()) { - // No balancer address: close existing balancer connection and enter fallback mode - // immediately. + // No balancer address: close existing balancer connection and prepare to enter fallback + // mode. If there is no successful backend connection, it enters fallback mode immediately. + // Otherwise, fallback does not happen until backend connections are lost. This behavior + // might be different from other languages (e.g., existing balancer connection is not + // closed in C-core), but we aren't changing it at this time. shutdownLbComm(); - syncContext.execute(new FallbackModeTask(NO_LB_ADDRESS_PROVIDED_STATUS)); + if (!usingFallbackBackends) { + fallbackReason = NO_LB_ADDRESS_PROVIDED_STATUS; + cancelFallbackTimer(); + maybeUseFallbackBackends(); + } } else { startLbComm(newLbAddressGroups); // Avoid creating a new RPC just because the addresses were updated, as it can cause a @@ -281,7 +289,6 @@ void handleAddresses( TimeUnit.MILLISECONDS, timerService); } } - fallbackBackendList = newBackendServers; if (usingFallbackBackends) { // Populate the new fallback backends to round-robin list. useFallbackBackends(); diff --git a/grpclb/src/test/java/io/grpc/grpclb/GrpclbLoadBalancerTest.java b/grpclb/src/test/java/io/grpc/grpclb/GrpclbLoadBalancerTest.java index 7dba20be1d0..f664aad0a7a 100644 --- a/grpclb/src/test/java/io/grpc/grpclb/GrpclbLoadBalancerTest.java +++ b/grpclb/src/test/java/io/grpc/grpclb/GrpclbLoadBalancerTest.java @@ -1452,8 +1452,11 @@ public void grpclbFallback_breakLbStreamBeforeFallbackTimerExpires() { public void grpclbFallback_noBalancerAddress() { InOrder inOrder = inOrder(helper, subchannelPool); - // Create just backend addresses - List backendList = createResolvedBackendAddresses(2); + // Create 5 distinct backends + List backends = createResolvedBackendAddresses(5); + + // Name resolver gives the first two backend addresses + List backendList = backends.subList(0, 2); deliverResolvedAddresses(backendList, Collections.emptyList()); assertThat(logs).containsAtLeast( @@ -1474,6 +1477,28 @@ public void grpclbFallback_noBalancerAddress() { .createOobChannel(ArgumentMatchers.anyList(), anyString()); logs.clear(); + ///////////////////////////////////////////////////////////////////////////////////////// + // Name resolver sends new resolution results with new backend addr but no balancer addr + ///////////////////////////////////////////////////////////////////////////////////////// + // Name resolver then gives the last three backends + backendList = backends.subList(2, 5); + deliverResolvedAddresses(backendList, Collections.emptyList()); + + assertThat(logs).containsAtLeast( + "INFO: [grpclb-] Using fallback backends", + "INFO: [grpclb-] " + + "Using RR list=[[[FakeSocketAddress-fake-address-2]/{}], " + + "[[FakeSocketAddress-fake-address-3]/{}], " + + "[[FakeSocketAddress-fake-address-4]/{}]], drop=[null, null, null]", + "INFO: [grpclb-] " + + "Update balancing state to CONNECTING: picks=[BUFFER_ENTRY], " + + "drops=[null, null, null]") + .inOrder(); + + // Shift to use updated backends + fallbackTestVerifyUseOfFallbackBackendLists(inOrder, backendList); + logs.clear(); + /////////////////////////////////////////////////////////////////////////////////////// // Name resolver sends new resolution results without any backend addr or balancer addr ///////////////////////////////////////////////////////////////////////////////////////