Skip to content

Commit

Permalink
fix: A stuck when the client fail to get DoneCallback (#1637)
Browse files Browse the repository at this point in the history
Add a timeout of one minute waiting for done callback to be called. Same timeout as client close.
The donecallback mainly gives back the server side error status, so it is not critical. In Dataflow connector, we saw hang because the DoneCallback is lost and we wait forever on it.

Stack trace in b/230501926
  • Loading branch information
yirutang committed May 2, 2022
1 parent 73ddd7b commit 3baa84e
Showing 1 changed file with 17 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ private void appendLoop() {
// We can close the stream connection and handle the remaining inflight requests.
if (streamConnection != null) {
this.streamConnection.close();
waitForDoneCallback();
waitForDoneCallback(1, TimeUnit.MINUTES);
}

// At this point, there cannot be more callback. It is safe to clean up all inflight requests.
Expand All @@ -491,9 +491,10 @@ private boolean waitingQueueDrained() {
}
}

private void waitForDoneCallback() {
private void waitForDoneCallback(long duration, TimeUnit timeUnit) {
log.fine("Waiting for done callback from stream connection. Stream: " + streamName);
while (true) {
long deadline = System.nanoTime() + timeUnit.toNanos(duration);
while (System.nanoTime() <= deadline) {
this.lock.lock();
try {
if (connectionFinalStatus != null) {
Expand All @@ -505,6 +506,19 @@ private void waitForDoneCallback() {
}
Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
}
this.lock.lock();
try {
if (connectionFinalStatus == null) {
connectionFinalStatus =
new StatusRuntimeException(
Status.fromCode(Code.CANCELLED)
.withDescription("Timeout waiting for DoneCallback."));
}
} finally {
this.lock.unlock();
}

return;
}

private AppendRowsRequest prepareRequestBasedOnPosition(
Expand Down

0 comments on commit 3baa84e

Please sign in to comment.