From ede7e9a4841e1f2a2aa1e2d85274f59125d81cc2 Mon Sep 17 00:00:00 2001 From: Chengyuan Zhang Date: Thu, 11 Mar 2021 15:35:06 -0800 Subject: [PATCH] Fix bug of ignoring subchannels with CONNECTING state when aggregating the overall load balancing state. We should treat both IDLE and CONNECTING subchannels as "connection in progress" when aggregating for the overall load balancing state. Otherwise, RPCs could fail prematurely if one subchannel enters TF while all the others are still in CONNECTING. 23d279660c1d370beea7aa53d1b1b2bc20518253 made each individual subchannel stay in TF until READY if it previously was in TF. So subchannels with CONNECTING state are those in first time conncting. We should give time for them to connect. --- grpclb/src/main/java/io/grpc/grpclb/GrpclbState.java | 8 ++++---- .../test/java/io/grpc/grpclb/GrpclbLoadBalancerTest.java | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/grpclb/src/main/java/io/grpc/grpclb/GrpclbState.java b/grpclb/src/main/java/io/grpc/grpclb/GrpclbState.java index b2d8b453de5e..744d96203215 100644 --- a/grpclb/src/main/java/io/grpc/grpclb/GrpclbState.java +++ b/grpclb/src/main/java/io/grpc/grpclb/GrpclbState.java @@ -776,7 +776,7 @@ private void maybeUpdatePicker() { case ROUND_ROBIN: pickList = new ArrayList<>(backendList.size()); Status error = null; - boolean hasIdle = false; + boolean hasPending = false; for (BackendEntry entry : backendList) { Subchannel subchannel = entry.subchannel; Attributes attrs = subchannel.getAttributes(); @@ -785,12 +785,12 @@ private void maybeUpdatePicker() { pickList.add(entry); } else if (stateInfo.getState() == TRANSIENT_FAILURE) { error = stateInfo.getStatus(); - } else if (stateInfo.getState() == IDLE) { - hasIdle = true; + } else { + hasPending = true; } } if (pickList.isEmpty()) { - if (error != null && !hasIdle) { + if (error != null && !hasPending) { pickList.add(new ErrorEntry(error)); state = TRANSIENT_FAILURE; } else { diff --git a/grpclb/src/test/java/io/grpc/grpclb/GrpclbLoadBalancerTest.java b/grpclb/src/test/java/io/grpc/grpclb/GrpclbLoadBalancerTest.java index e768cf018e3d..501735b30b11 100644 --- a/grpclb/src/test/java/io/grpc/grpclb/GrpclbLoadBalancerTest.java +++ b/grpclb/src/test/java/io/grpc/grpclb/GrpclbLoadBalancerTest.java @@ -1175,9 +1175,10 @@ public void roundRobinMode_subchannelStayTransientFailureUntilReady() { deliverSubchannelState(subchannel2, ConnectivityStateInfo.forNonError(CONNECTING)); inOrder.verify(helper).updateBalancingState(eq(CONNECTING), any(SubchannelPicker.class)); - // Switch subchannel1 to TRANSIENT_FAILURE, making the general state TRANSIENT_FAILURE too. - Status error = Status.UNAVAILABLE.withDescription("error1"); + // Switch all subchannels to TRANSIENT_FAILURE, making the general state TRANSIENT_FAILURE too. + Status error = Status.UNAVAILABLE.withDescription("error"); deliverSubchannelState(subchannel1, ConnectivityStateInfo.forTransientFailure(error)); + deliverSubchannelState(subchannel2, ConnectivityStateInfo.forTransientFailure(error)); inOrder.verify(helper).updateBalancingState(eq(TRANSIENT_FAILURE), pickerCaptor.capture()); assertThat(((RoundRobinPicker) pickerCaptor.getValue()).pickList) .containsExactly(new ErrorEntry(error));