From 9c562c8a6fb216e6de80310a3f00b10edbc16e93 Mon Sep 17 00:00:00 2001 From: Chengyuan Zhang Date: Thu, 11 Mar 2021 16:48:01 -0800 Subject: [PATCH] grpclb: should not ignore subchannels with CONNECTING state in aggregating the overall LB state (#7959) We should treat both IDLE and CONNECTING subchannels as "connection in progress" when aggregating for the overall load balancing state. Otherwise, RPCs could fail prematurely if one subchannel enters TF while all the others are still in CONNECTING. 23d279660c1d370beea7aa53d1b1b2bc20518253 made each individual subchannel stay in TF until READY if it previously was in TF. So subchannels with CONNECTING state are those in first time connecting. We should give them time to connect. --- grpclb/src/main/java/io/grpc/grpclb/GrpclbState.java | 8 ++++---- .../test/java/io/grpc/grpclb/GrpclbLoadBalancerTest.java | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/grpclb/src/main/java/io/grpc/grpclb/GrpclbState.java b/grpclb/src/main/java/io/grpc/grpclb/GrpclbState.java index b2d8b453de5..744d9620321 100644 --- a/grpclb/src/main/java/io/grpc/grpclb/GrpclbState.java +++ b/grpclb/src/main/java/io/grpc/grpclb/GrpclbState.java @@ -776,7 +776,7 @@ private void maybeUpdatePicker() { case ROUND_ROBIN: pickList = new ArrayList<>(backendList.size()); Status error = null; - boolean hasIdle = false; + boolean hasPending = false; for (BackendEntry entry : backendList) { Subchannel subchannel = entry.subchannel; Attributes attrs = subchannel.getAttributes(); @@ -785,12 +785,12 @@ private void maybeUpdatePicker() { pickList.add(entry); } else if (stateInfo.getState() == TRANSIENT_FAILURE) { error = stateInfo.getStatus(); - } else if (stateInfo.getState() == IDLE) { - hasIdle = true; + } else { + hasPending = true; } } if (pickList.isEmpty()) { - if (error != null && !hasIdle) { + if (error != null && !hasPending) { pickList.add(new ErrorEntry(error)); state = TRANSIENT_FAILURE; } else { diff --git a/grpclb/src/test/java/io/grpc/grpclb/GrpclbLoadBalancerTest.java b/grpclb/src/test/java/io/grpc/grpclb/GrpclbLoadBalancerTest.java index e768cf018e3..501735b30b1 100644 --- a/grpclb/src/test/java/io/grpc/grpclb/GrpclbLoadBalancerTest.java +++ b/grpclb/src/test/java/io/grpc/grpclb/GrpclbLoadBalancerTest.java @@ -1175,9 +1175,10 @@ public void roundRobinMode_subchannelStayTransientFailureUntilReady() { deliverSubchannelState(subchannel2, ConnectivityStateInfo.forNonError(CONNECTING)); inOrder.verify(helper).updateBalancingState(eq(CONNECTING), any(SubchannelPicker.class)); - // Switch subchannel1 to TRANSIENT_FAILURE, making the general state TRANSIENT_FAILURE too. - Status error = Status.UNAVAILABLE.withDescription("error1"); + // Switch all subchannels to TRANSIENT_FAILURE, making the general state TRANSIENT_FAILURE too. + Status error = Status.UNAVAILABLE.withDescription("error"); deliverSubchannelState(subchannel1, ConnectivityStateInfo.forTransientFailure(error)); + deliverSubchannelState(subchannel2, ConnectivityStateInfo.forTransientFailure(error)); inOrder.verify(helper).updateBalancingState(eq(TRANSIENT_FAILURE), pickerCaptor.capture()); assertThat(((RoundRobinPicker) pickerCaptor.getValue()).pickList) .containsExactly(new ErrorEntry(error));