New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Reduce the flakiness of the OkHttp tests (though not eliminate). #10973
base: master
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -106,6 +106,7 @@ final class OkHttpServerTransport implements ServerTransport, | |
private final KeepAliveEnforcer keepAliveEnforcer; | ||
|
||
private final Object lock = new Object(); | ||
private final Object handShakeLock = new Object(); | ||
@GuardedBy("lock") | ||
private boolean abruptShutdown; | ||
@GuardedBy("lock") | ||
|
@@ -164,15 +165,17 @@ private void startIo(SerializingExecutor serializingExecutor) { | |
// The socket implementation is lazily initialized, but had broken thread-safety | ||
// for that laziness https://bugs.openjdk.org/browse/JDK-8278326. | ||
// As a workaround, we lock to synchronize initialization with shutdown(). | ||
HandshakerSocketFactory.HandshakeResult result; | ||
synchronized (lock) { | ||
socket.setTcpNoDelay(true); | ||
} | ||
HandshakerSocketFactory.HandshakeResult result = | ||
config.handshakerSocketFactory.handshake(socket, Attributes.EMPTY); | ||
synchronized (handShakeLock) { | ||
result = config.handshakerSocketFactory.handshake(socket, Attributes.EMPTY); | ||
} | ||
synchronized (lock) { | ||
this.socket = result.socket; | ||
this.attributes = result.attributes; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did TSAN show this was needed? This field shouldn't be looked at until |
||
} | ||
this.attributes = result.attributes; | ||
|
||
int maxQueuedControlFrames = 10000; | ||
AsyncSink asyncSink = AsyncSink.sink(serializingExecutor, this, maxQueuedControlFrames); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ | |
|
||
import static com.google.common.base.Charsets.UTF_8; | ||
import static com.google.common.truth.Truth.assertThat; | ||
import static com.google.common.truth.Truth.assertWithMessage; | ||
import static io.grpc.internal.ClientStreamListener.RpcProgress.MISCARRIED; | ||
import static io.grpc.internal.ClientStreamListener.RpcProgress.PROCESSED; | ||
import static io.grpc.internal.ClientStreamListener.RpcProgress.REFUSED; | ||
|
@@ -293,7 +294,7 @@ public void close() throws SecurityException { | |
Buffer buffer = createMessageFrame(message); | ||
frameHandler().data(false, 3, buffer, (int) buffer.size(), | ||
(int) buffer.size()); | ||
assertThat(logs).hasSize(1); | ||
assertWithMessage("log messages: " + logs).that(logs).hasSize(1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How does this help? I tried it out and it seems to change:
into:
The point of Truth is that it prints the value of what your testing. Why print it twice? |
||
log = logs.remove(0); | ||
assertThat(log.getMessage()).startsWith(Direction.INBOUND + " DATA: streamId=" + 3); | ||
assertThat(log.getLevel()).isEqualTo(Level.FINE); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -117,6 +117,7 @@ public void mtls_succeeds() throws Exception { | |
ManagedChannel channel = grpcCleanupRule.register(clientChannel(server, channelCreds)); | ||
|
||
SimpleServiceGrpc.newBlockingStub(channel).unaryRpc(SimpleRequest.getDefaultInstance()); | ||
server.shutdown(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do these do? The Cleanup rule will shut down the server just after this line. Do we have to shut down the server before the client? It isn't like OkHttpServer.acceptConnecitons() is spinning tightly in its loop. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But because the client shuts down before the server, after it falls out of the accept it goes through the logic that is skipped when the shutdown flag is set. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried this and I saw no change in failure rate. (1000 runs) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Darn reality not matching the theory .... :( |
||
} | ||
|
||
@Test | ||
|
@@ -144,6 +145,7 @@ public void untrustedClient_fails() throws Exception { | |
ManagedChannel channel = grpcCleanupRule.register(clientChannel(server, channelCreds)); | ||
|
||
assertRpcFails(channel); | ||
server.shutdown(); | ||
} | ||
|
||
@Test | ||
|
@@ -168,6 +170,7 @@ public void missingOptionalClientCert_succeeds() throws Exception { | |
ManagedChannel channel = grpcCleanupRule.register(clientChannel(server, channelCreds)); | ||
|
||
SimpleServiceGrpc.newBlockingStub(channel).unaryRpc(SimpleRequest.getDefaultInstance()); | ||
server.shutdown(); | ||
} | ||
|
||
@Test | ||
|
@@ -192,6 +195,7 @@ public void missingRequiredClientCert_fails() throws Exception { | |
ManagedChannel channel = grpcCleanupRule.register(clientChannel(server, channelCreds)); | ||
|
||
assertRpcFails(channel); | ||
server.shutdown(); | ||
} | ||
|
||
@Test | ||
|
@@ -208,6 +212,7 @@ public void untrustedServer_fails() throws Exception { | |
ManagedChannel channel = grpcCleanupRule.register(clientChannel(server, channelCreds)); | ||
|
||
assertRpcFails(channel); | ||
server.shutdown(); | ||
} | ||
|
||
@Test | ||
|
@@ -231,6 +236,7 @@ public void unmatchedServerSubjectAlternativeNames_fails() throws Exception { | |
.build()); | ||
|
||
assertRpcFails(channel); | ||
server.shutdown(); | ||
} | ||
|
||
@Test | ||
|
@@ -255,6 +261,7 @@ public void hostnameVerifierTrusts_succeeds() | |
.build()); | ||
|
||
SimpleServiceGrpc.newBlockingStub(channel).unaryRpc(SimpleRequest.getDefaultInstance()); | ||
server.shutdown(); | ||
} | ||
|
||
@Test | ||
|
@@ -280,6 +287,7 @@ public void hostnameVerifierFails_fails() | |
|
||
Status status = assertRpcFails(channel); | ||
assertThat(status.getCause()).isInstanceOf(SSLPeerUnverifiedException.class); | ||
server.shutdown(); | ||
} | ||
|
||
private static Server server(ServerCredentials creds) throws IOException { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This does nothing. This method is only called once ever per
handShakeLock
instance, and the lock isn't used anywhere else.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks like a JDK bug. I don't know what JDK was used when TSAN failed, but based on the line numbers it looks to be JDK 11 or earlier. And it adds the new session to the session cache before setting the context, which does seem racy.
https://github.com/openjdk/jdk/blob/da75f3c4ad5bdf25167a3ed80e51f567ab3dbd01/src/java.base/share/classes/sun/security/ssl/SSLSessionContextImpl.java#L175
JDK master does the same, without any extra synchronization. So I think we need to report this and get it fixed.