From 84d4a5b97aa14835dbb02741664844164e166308 Mon Sep 17 00:00:00 2001 From: Justin Mathews Date: Mon, 7 Jun 2021 13:57:15 -0400 Subject: [PATCH 01/16] failing test for linkcheck of US Patent page --- .../conf.py | 1 + .../index.rst | 1 + tests/test_build_linkcheck.py | 17 +++++++++++++++++ 3 files changed, 19 insertions(+) create mode 100644 tests/roots/test-linkcheck-connection-error-on-http-head/conf.py create mode 100644 tests/roots/test-linkcheck-connection-error-on-http-head/index.rst diff --git a/tests/roots/test-linkcheck-connection-error-on-http-head/conf.py b/tests/roots/test-linkcheck-connection-error-on-http-head/conf.py new file mode 100644 index 00000000000..a45d22e2821 --- /dev/null +++ b/tests/roots/test-linkcheck-connection-error-on-http-head/conf.py @@ -0,0 +1 @@ +exclude_patterns = ['_build'] diff --git a/tests/roots/test-linkcheck-connection-error-on-http-head/index.rst b/tests/roots/test-linkcheck-connection-error-on-http-head/index.rst new file mode 100644 index 00000000000..bdb5fecb207 --- /dev/null +++ b/tests/roots/test-linkcheck-connection-error-on-http-head/index.rst @@ -0,0 +1 @@ +This is `a link to the US Patent Website `_ diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py index fd7a5482abd..810d5555ede 100644 --- a/tests/test_build_linkcheck.py +++ b/tests/test_build_linkcheck.py @@ -575,3 +575,20 @@ def test_limit_rate_bails_out_after_waiting_max_time(app): rate_limits) next_check = worker.limit_rate(FakeResponse()) assert next_check is None + + +@pytest.mark.sphinx('linkcheck', testroot='linkcheck-connection-error-on-http-head', freshenv=True) +def test_get_after_head_raises_connection_error(app): + class InternalServerErrorHandler(http.server.BaseHTTPRequestHandler): + def do_GET(self): + self.send_error(500, "Internal Server Error") + + with http_server(InternalServerErrorHandler): + app.build() + content = (app.outdir / 'output.txt').read_text() + assert "broken" not in content + # assert content == ( + # "index.rst:1: [broken] http://localhost:7777/#anchor: " + # "500 Server Error: Internal Server Error " + # "for url: http://localhost:7777/\n" + # ) From 57c866caf1b64a23d08436b49c5e23313ec68259 Mon Sep 17 00:00:00 2001 From: Justin Mathews Date: Mon, 7 Jun 2021 16:50:42 -0400 Subject: [PATCH 02/16] catch ConnectionError on HEAD --- sphinx/builders/linkcheck.py | 4 ++-- tests/test_build_linkcheck.py | 14 ++------------ 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 05e12c1730e..a3d2eb93599 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -26,7 +26,7 @@ from docutils import nodes from docutils.nodes import Element from requests import Response -from requests.exceptions import HTTPError, TooManyRedirects +from requests.exceptions import HTTPError, TooManyRedirects, ConnectionError from sphinx.application import Sphinx from sphinx.builders.dummy import DummyBuilder @@ -460,7 +460,7 @@ def check_uri() -> Tuple[str, str, int]: config=self.config, auth=auth_info, **kwargs) response.raise_for_status() - except (HTTPError, TooManyRedirects) as err: + except (HTTPError, TooManyRedirects, ConnectionError) as err: if isinstance(err, HTTPError) and err.response.status_code == 429: raise # retry with GET request if that fails, some servers diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py index 810d5555ede..9966d21be49 100644 --- a/tests/test_build_linkcheck.py +++ b/tests/test_build_linkcheck.py @@ -579,16 +579,6 @@ def test_limit_rate_bails_out_after_waiting_max_time(app): @pytest.mark.sphinx('linkcheck', testroot='linkcheck-connection-error-on-http-head', freshenv=True) def test_get_after_head_raises_connection_error(app): - class InternalServerErrorHandler(http.server.BaseHTTPRequestHandler): - def do_GET(self): - self.send_error(500, "Internal Server Error") - - with http_server(InternalServerErrorHandler): - app.build() + app.build() content = (app.outdir / 'output.txt').read_text() - assert "broken" not in content - # assert content == ( - # "index.rst:1: [broken] http://localhost:7777/#anchor: " - # "500 Server Error: Internal Server Error " - # "for url: http://localhost:7777/\n" - # ) + assert not content From 11b4345ff5fd412dab621bae5fd7a7ced15ab326 Mon Sep 17 00:00:00 2001 From: Justin Mathews Date: Mon, 7 Jun 2021 17:35:25 -0400 Subject: [PATCH 03/16] Resolves bug #9306 --- .../test-linkcheck-connection-error-on-http-head/index.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/roots/test-linkcheck-connection-error-on-http-head/index.rst b/tests/roots/test-linkcheck-connection-error-on-http-head/index.rst index bdb5fecb207..ea799681512 100644 --- a/tests/roots/test-linkcheck-connection-error-on-http-head/index.rst +++ b/tests/roots/test-linkcheck-connection-error-on-http-head/index.rst @@ -1 +1,3 @@ +This tests bug #9306. + This is `a link to the US Patent Website `_ From db9adaceafe9769509d9b482ee8d7afdced09a44 Mon Sep 17 00:00:00 2001 From: Justin Mathews Date: Wed, 9 Jun 2021 23:40:40 -0400 Subject: [PATCH 04/16] Trigger the bad web server behaviour with the http_server context manager instead of relying on an external website. --- .../index.rst | 4 +--- tests/test_build_linkcheck.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/roots/test-linkcheck-connection-error-on-http-head/index.rst b/tests/roots/test-linkcheck-connection-error-on-http-head/index.rst index ea799681512..0d25e5d260a 100644 --- a/tests/roots/test-linkcheck-connection-error-on-http-head/index.rst +++ b/tests/roots/test-linkcheck-connection-error-on-http-head/index.rst @@ -1,3 +1 @@ -This tests bug #9306. - -This is `a link to the US Patent Website `_ +`This `_ tests bug #9306. diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py index 9966d21be49..e0e7982b39e 100644 --- a/tests/test_build_linkcheck.py +++ b/tests/test_build_linkcheck.py @@ -577,8 +577,19 @@ def test_limit_rate_bails_out_after_waiting_max_time(app): assert next_check is None +class ConnectionResetHandler(http.server.BaseHTTPRequestHandler): + def do_HEAD(self): + raise requests.ConnectionError + + def do_GET(self): + self.send_response(200, "OK") + self.end_headers() + + @pytest.mark.sphinx('linkcheck', testroot='linkcheck-connection-error-on-http-head', freshenv=True) def test_get_after_head_raises_connection_error(app): - app.build() + app.config.tls_verify = False + with https_server(ConnectionResetHandler): + app.build() content = (app.outdir / 'output.txt').read_text() assert not content From e10a31b10aced3771c9c3594c163cfcd22d770db Mon Sep 17 00:00:00 2001 From: Justin Mathews Date: Wed, 9 Jun 2021 23:45:32 -0400 Subject: [PATCH 05/16] making a note in the change log --- CHANGES | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES b/CHANGES index 5b890fe8420..0fbc2f2df32 100644 --- a/CHANGES +++ b/CHANGES @@ -16,6 +16,8 @@ Features added Bugs fixed ---------- +* #9306: Linkcheck Reports Broken Link when Remote Server Closes on HEAD Request + Testing -------- From 935df33d950c2388e9d5a30c1c902bbd1d23f22e Mon Sep 17 00:00:00 2001 From: Justin Mathews Date: Wed, 9 Jun 2021 23:56:02 -0400 Subject: [PATCH 06/16] comment explaining why try GET when HEAD got a ConnectionError --- sphinx/builders/linkcheck.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index a3d2eb93599..65a9a88c95d 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -460,6 +460,11 @@ def check_uri() -> Tuple[str, str, int]: config=self.config, auth=auth_info, **kwargs) response.raise_for_status() + # When there is a ConnectionError from the HEAD request, it might be due to + # the webserver intentionally dropping the connection when it sees HEAD requests + # but still responding properly to GET requests. This has been observed in the + # wild with at https://patft.uspto.gov/. + # See https://github.com/sphinx-doc/sphinx/issues/9306 for more details. except (HTTPError, TooManyRedirects, ConnectionError) as err: if isinstance(err, HTTPError) and err.response.status_code == 429: raise From 68ff1b603458cf23cc8739e6fabc945cdaea084a Mon Sep 17 00:00:00 2001 From: Justin Mathews Date: Thu, 10 Jun 2021 11:39:03 -0400 Subject: [PATCH 07/16] Update CHANGES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: François Freitag --- CHANGES | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index 0fbc2f2df32..c81afeacb00 100644 --- a/CHANGES +++ b/CHANGES @@ -16,7 +16,7 @@ Features added Bugs fixed ---------- -* #9306: Linkcheck Reports Broken Link when Remote Server Closes on HEAD Request +* #9306: Linkcheck reports broken link when remote server closes on HEAD request Testing -------- From 193ea9153e842d3154c6439cdcd98c041a41763f Mon Sep 17 00:00:00 2001 From: Justin Mathews Date: Thu, 10 Jun 2021 11:40:01 -0400 Subject: [PATCH 08/16] alphabetical ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: François Freitag --- sphinx/builders/linkcheck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 65a9a88c95d..8d9a476f02e 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -26,7 +26,7 @@ from docutils import nodes from docutils.nodes import Element from requests import Response -from requests.exceptions import HTTPError, TooManyRedirects, ConnectionError +from requests.exceptions import ConnectionError, HTTPError, TooManyRedirects from sphinx.application import Sphinx from sphinx.builders.dummy import DummyBuilder From d804981a377b05de287f4b9941c76d9a530570d0 Mon Sep 17 00:00:00 2001 From: Justin Mathews Date: Thu, 10 Jun 2021 11:40:16 -0400 Subject: [PATCH 09/16] alphabetical ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: François Freitag --- sphinx/builders/linkcheck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 8d9a476f02e..bc4b2204268 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -465,7 +465,7 @@ def check_uri() -> Tuple[str, str, int]: # but still responding properly to GET requests. This has been observed in the # wild with at https://patft.uspto.gov/. # See https://github.com/sphinx-doc/sphinx/issues/9306 for more details. - except (HTTPError, TooManyRedirects, ConnectionError) as err: + except (ConnectionError, HTTPError, TooManyRedirects) as err: if isinstance(err, HTTPError) and err.response.status_code == 429: raise # retry with GET request if that fails, some servers From ce305190c561bafc369aeb717f2646b3f4364af5 Mon Sep 17 00:00:00 2001 From: Justin Mathews Date: Thu, 10 Jun 2021 11:40:44 -0400 Subject: [PATCH 10/16] shorter explanatory comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: François Freitag --- sphinx/builders/linkcheck.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index bc4b2204268..a81a0253d9e 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -460,11 +460,8 @@ def check_uri() -> Tuple[str, str, int]: config=self.config, auth=auth_info, **kwargs) response.raise_for_status() - # When there is a ConnectionError from the HEAD request, it might be due to - # the webserver intentionally dropping the connection when it sees HEAD requests - # but still responding properly to GET requests. This has been observed in the - # wild with at https://patft.uspto.gov/. - # See https://github.com/sphinx-doc/sphinx/issues/9306 for more details. + # Servers drop the connection on HEAD requests, causing + # ConnectionError. except (ConnectionError, HTTPError, TooManyRedirects) as err: if isinstance(err, HTTPError) and err.response.status_code == 429: raise From a4621fb73eef079959229c7fbc215639db202b4d Mon Sep 17 00:00:00 2001 From: Justin Mathews Date: Thu, 10 Jun 2021 11:41:11 -0400 Subject: [PATCH 11/16] test with http MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: François Freitag --- tests/test_build_linkcheck.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py index e0e7982b39e..f9a874577c2 100644 --- a/tests/test_build_linkcheck.py +++ b/tests/test_build_linkcheck.py @@ -588,8 +588,7 @@ def do_GET(self): @pytest.mark.sphinx('linkcheck', testroot='linkcheck-connection-error-on-http-head', freshenv=True) def test_get_after_head_raises_connection_error(app): - app.config.tls_verify = False - with https_server(ConnectionResetHandler): + with http_server(ConnectionResetHandler): app.build() content = (app.outdir / 'output.txt').read_text() assert not content From 134a8d8f545b5161060798d50d2c981e64d1db77 Mon Sep 17 00:00:00 2001 From: Justin Mathews Date: Thu, 10 Jun 2021 11:41:47 -0400 Subject: [PATCH 12/16] reuse existing test link MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: François Freitag --- tests/test_build_linkcheck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py index f9a874577c2..1fea96d6c76 100644 --- a/tests/test_build_linkcheck.py +++ b/tests/test_build_linkcheck.py @@ -586,7 +586,7 @@ def do_GET(self): self.end_headers() -@pytest.mark.sphinx('linkcheck', testroot='linkcheck-connection-error-on-http-head', freshenv=True) +@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True) def test_get_after_head_raises_connection_error(app): with http_server(ConnectionResetHandler): app.build() From 9b2a1e20e23dc9830540bae6c707d8349de4e922 Mon Sep 17 00:00:00 2001 From: Justin Mathews Date: Thu, 10 Jun 2021 11:42:36 -0400 Subject: [PATCH 13/16] explicitly close the connection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: François Freitag --- tests/test_build_linkcheck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py index 1fea96d6c76..5a146b831cf 100644 --- a/tests/test_build_linkcheck.py +++ b/tests/test_build_linkcheck.py @@ -579,7 +579,7 @@ def test_limit_rate_bails_out_after_waiting_max_time(app): class ConnectionResetHandler(http.server.BaseHTTPRequestHandler): def do_HEAD(self): - raise requests.ConnectionError + self.connection.close() def do_GET(self): self.send_response(200, "OK") From 36c662eca58d36579ee854a1f9f440724b91aca2 Mon Sep 17 00:00:00 2001 From: Justin Mathews Date: Thu, 10 Jun 2021 11:43:14 -0400 Subject: [PATCH 14/16] positive test assertion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: François Freitag --- tests/test_build_linkcheck.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py index 5a146b831cf..78d6fe57b78 100644 --- a/tests/test_build_linkcheck.py +++ b/tests/test_build_linkcheck.py @@ -592,3 +592,12 @@ def test_get_after_head_raises_connection_error(app): app.build() content = (app.outdir / 'output.txt').read_text() assert not content + content = (app.outdir / 'output.json').read_text() + assert json.loads(content) == { + "filename": "index.rst", + "lineno": 1, + "status": "working", + "code": 0, + "uri": "http://localhost:7777/", + "info": "", + } From cf8f5fce371318123679ef14fd4a242485b733cf Mon Sep 17 00:00:00 2001 From: Justin Mathews Date: Thu, 10 Jun 2021 11:46:17 -0400 Subject: [PATCH 15/16] delete redundant test content --- tests/roots/test-linkcheck-connection-error-on-http-head/conf.py | 1 - .../roots/test-linkcheck-connection-error-on-http-head/index.rst | 1 - 2 files changed, 2 deletions(-) delete mode 100644 tests/roots/test-linkcheck-connection-error-on-http-head/conf.py delete mode 100644 tests/roots/test-linkcheck-connection-error-on-http-head/index.rst diff --git a/tests/roots/test-linkcheck-connection-error-on-http-head/conf.py b/tests/roots/test-linkcheck-connection-error-on-http-head/conf.py deleted file mode 100644 index a45d22e2821..00000000000 --- a/tests/roots/test-linkcheck-connection-error-on-http-head/conf.py +++ /dev/null @@ -1 +0,0 @@ -exclude_patterns = ['_build'] diff --git a/tests/roots/test-linkcheck-connection-error-on-http-head/index.rst b/tests/roots/test-linkcheck-connection-error-on-http-head/index.rst deleted file mode 100644 index 0d25e5d260a..00000000000 --- a/tests/roots/test-linkcheck-connection-error-on-http-head/index.rst +++ /dev/null @@ -1 +0,0 @@ -`This `_ tests bug #9306. From 862d876c849925829691eba2bd975bf330f7567d Mon Sep 17 00:00:00 2001 From: Justin Mathews Date: Thu, 10 Jun 2021 12:54:17 -0400 Subject: [PATCH 16/16] Update CHANGES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: François Freitag --- CHANGES | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES b/CHANGES index c81afeacb00..379334d9182 100644 --- a/CHANGES +++ b/CHANGES @@ -16,7 +16,8 @@ Features added Bugs fixed ---------- -* #9306: Linkcheck reports broken link when remote server closes on HEAD request +* #9306: Linkcheck reports broken link when remote server closes the connection + on HEAD request Testing --------