Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: at connection level, retry for internal errors #1965

Merged
merged 10 commits into from Jan 31, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Expand Up @@ -49,20 +49,20 @@ If you are using Maven without BOM, add this to your dependencies:
If you are using Gradle 5.x or later, add this to your dependencies:

```Groovy
implementation platform('com.google.cloud:libraries-bom:26.4.0')
implementation platform('com.google.cloud:libraries-bom:26.5.0')

implementation 'com.google.cloud:google-cloud-bigquerystorage'
```
If you are using Gradle without BOM, add this to your dependencies:

```Groovy
implementation 'com.google.cloud:google-cloud-bigquerystorage:2.28.3'
implementation 'com.google.cloud:google-cloud-bigquerystorage:2.28.4'
```

If you are using SBT, add this to your dependencies:

```Scala
libraryDependencies += "com.google.cloud" % "google-cloud-bigquerystorage" % "2.28.3"
libraryDependencies += "com.google.cloud" % "google-cloud-bigquerystorage" % "2.28.4"
```

## Authentication
Expand Down
5 changes: 5 additions & 0 deletions google-cloud-bigquerystorage/clirr-ignored-differences.xml
Expand Up @@ -142,4 +142,9 @@
<className>com/google/cloud/bigquery/storage/v1/ConnectionWorkerPool</className>
<method>long getInflightWaitSeconds(com.google.cloud.bigquery.storage.v1.StreamWriter)</method>
</difference>
<difference>
<differenceType>7009</differenceType>
<className>com/google/cloud/bigquery/storage/v1/ConnectionWorkerPool</className>
<method>ConnectionWorkerPool(long, long, java.time.Duration, com.google.api.gax.batching.FlowController$LimitExceededBehavior, java.lang.String, com.google.cloud.bigquery.storage.v1.BigQueryWriteSettings)</method>
</difference>
</differences>
Expand Up @@ -19,7 +19,6 @@
import com.google.api.core.SettableApiFuture;
import com.google.api.gax.batching.FlowController;
import com.google.auto.value.AutoValue;
import com.google.cloud.bigquery.storage.util.Errors;
import com.google.cloud.bigquery.storage.v1.AppendRowsRequest.ProtoData;
import com.google.cloud.bigquery.storage.v1.Exceptions.AppendSerializtionError;
import com.google.cloud.bigquery.storage.v1.StreamConnection.DoneCallback;
Expand Down Expand Up @@ -716,14 +715,15 @@ private void requestCallback(AppendRowsResponse response) {
});
}

private boolean isRetriableError(Throwable t) {
private boolean isConnectionErrorRetriable(Throwable t) {
Status status = Status.fromThrowable(t);
if (Errors.isRetryableInternalStatus(status)) {
return true;
}
return status.getCode() == Code.ABORTED
|| status.getCode() == Code.UNAVAILABLE
|| status.getCode() == Code.CANCELLED;
|| status.getCode() == Code.CANCELLED
|| status.getCode() == Code.INTERNAL
|| status.getCode() == Code.FAILED_PRECONDITION
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is this error for?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to this CL: https://critique.corp.google.com/cl/483521407/depot/google3/cloud/helix/vortex/frontend/base/vortex_error_util.cc, except that two client trying to talk to the same stream. I think in this case, retry with a timeout also applies from client side point of view. If the situation persist then it eventually fails out, but if it is just transient, in the race of two workers, then retry still works.

|| status.getCode() == Code.DEADLINE_EXCEEDED
|| status.getCode() == Code.RESOURCE_EXHAUSTED;
}

private void doneCallback(Throwable finalStatus) {
Expand All @@ -740,7 +740,7 @@ private void doneCallback(Throwable finalStatus) {
connectionRetryStartTime = System.currentTimeMillis();
}
// If the error can be retried, don't set it here, let it try to retry later on.
if (isRetriableError(finalStatus)
if (isConnectionErrorRetriable(finalStatus)
&& !userClosed
&& (maxRetryDuration.toMillis() == 0f
|| System.currentTimeMillis() - connectionRetryStartTime
Expand Down
Expand Up @@ -461,9 +461,17 @@ public void testShortenStreamNameAllowed() throws Exception {

@Test
public void testAppendSuccessAndConnectionError() throws Exception {
StreamWriter writer = getTestStreamWriter();
StreamWriter writer =
StreamWriter.newBuilder(TEST_STREAM_1, client)
.setWriterSchema(createProtoSchema())
.setTraceId(TEST_TRACE_ID)
// Retry expire immediately.
.setMaxRetryDuration(java.time.Duration.ofMillis(1L))
.build();
testBigQueryWrite.addResponse(createAppendResponse(0));
testBigQueryWrite.addException(Status.INTERNAL.asException());
testBigQueryWrite.addException(Status.INTERNAL.asException());
testBigQueryWrite.addException(Status.INTERNAL.asException());

ApiFuture<AppendRowsResponse> appendFuture1 = sendTestMessage(writer, new String[] {"A"});
ApiFuture<AppendRowsResponse> appendFuture2 = sendTestMessage(writer, new String[] {"B"});
Expand Down Expand Up @@ -581,11 +589,11 @@ public void testAppendAfterUserClose() throws Exception {
@Test
public void testAppendAfterServerClose() throws Exception {
StreamWriter writer = getTestStreamWriter();
testBigQueryWrite.addException(Status.INTERNAL.asException());
testBigQueryWrite.addException(Status.INVALID_ARGUMENT.asException());

ApiFuture<AppendRowsResponse> appendFuture1 = sendTestMessage(writer, new String[] {"A"});
ApiException error1 = assertFutureException(ApiException.class, appendFuture1);
assertEquals(Code.INTERNAL, error1.getStatusCode().getCode());
assertEquals(Code.INVALID_ARGUMENT, error1.getStatusCode().getCode());

ApiFuture<AppendRowsResponse> appendFuture2 = sendTestMessage(writer, new String[] {"B"});
assertTrue(appendFuture2.isDone());
Expand Down Expand Up @@ -637,7 +645,7 @@ public void serverCloseWhileRequestsInflight() throws Exception {
StreamWriter writer = getTestStreamWriter();
// Server will sleep 2 seconds before closing the connection.
testBigQueryWrite.setResponseSleep(Duration.ofSeconds(2));
testBigQueryWrite.addException(Status.INTERNAL.asException());
testBigQueryWrite.addException(Status.INVALID_ARGUMENT.asException());

// Send 10 requests, so that there are 10 inflight requests.
int appendCount = 10;
Expand All @@ -649,7 +657,7 @@ public void serverCloseWhileRequestsInflight() throws Exception {
// Server close should properly handle all inflight requests.
for (int i = 0; i < appendCount; i++) {
ApiException actualError = assertFutureException(ApiException.class, futures.get(i));
assertEquals(Code.INTERNAL, actualError.getStatusCode().getCode());
assertEquals(Code.INVALID_ARGUMENT, actualError.getStatusCode().getCode());
}

writer.close();
Expand Down