diff --git a/CHANGES b/CHANGES index f06f9bc0ac..4a2f61e7cb 100644 --- a/CHANGES +++ b/CHANGES @@ -50,6 +50,8 @@ Features added * #9016: linkcheck: Support checking anchors on github.com * #9016: linkcheck: Add a new event :event:`linkcheck-process-uri` to modify URIs before checking hyperlinks +* #6525: linkcheck: Add :confval:`linkcheck_allowed_redirects` to mark + hyperlinks that are redirected to expected URLs as "working" * #1874: py domain: Support union types using ``|`` in info-field-list * #9268: py domain: :confval:`python_use_unqualified_type_names` supports type field in info-field-list diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index c5723f95e0..18eae9c190 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -2527,6 +2527,29 @@ Options for the linkcheck builder .. versionadded:: 1.1 +.. confval:: linkcheck_allowed_redirects + + A dictionary that maps a pattern of the source URI to a pattern of the canonical + URI. The linkcheck builder treats the redirected link as "working" when: + + - the link in the document matches the source URI pattern, and + - the redirect location matches the canonical URI pattern. + + Example: + + .. code-block:: python + + linkcheck_allowed_redirects = { + # All HTTP redirections from the source URI to the canonical URI will be treated as "working". + r'https://sphinx-doc\.org/.*': r'https://sphinx-doc\.org/en/master/.*' + } + + If set, linkcheck builder will emit a warning when disallowed redirection + found. It's useful to detect unexpected redirects under :option:`the + warn-is-error mode `. + + .. versionadded:: 4.1 + .. confval:: linkcheck_request_headers A dictionary that maps baseurls to HTTP request headers. diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 722b1e69a5..6819bd3df9 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -272,8 +272,12 @@ def process_result(self, result: CheckResult) -> None: except KeyError: text, color = ('with unknown code', purple) linkstat['text'] = text - logger.info(color('redirect ') + result.uri + - color(' - ' + text + ' to ' + result.message)) + if self.config.linkcheck_allowed_redirects: + logger.warning('redirect ' + result.uri + ' - ' + text + ' to ' + + result.message, location=(filename, result.lineno)) + else: + logger.info(color('redirect ') + result.uri + + color(' - ' + text + ' to ' + result.message)) self.write_entry('redirected ' + text, result.docname, filename, result.lineno, result.uri + ' to ' + result.message) else: @@ -496,13 +500,23 @@ def check_uri() -> Tuple[str, str, int]: new_url = response.url if anchor: new_url += '#' + anchor - # history contains any redirects, get last - if response.history: + + if allowed_redirect(req_url, new_url): + return 'working', '', 0 + elif response.history: + # history contains any redirects, get last code = response.history[-1].status_code return 'redirected', new_url, code else: return 'redirected', new_url, 0 + def allowed_redirect(url: str, new_url: str) -> bool: + for from_url, to_url in self.config.linkcheck_allowed_redirects.items(): + if from_url.match(url) and to_url.match(new_url): + return True + + return False + def check(docname: str) -> Tuple[str, str, int]: # check for various conditions without bothering the network if len(uri) == 0 or uri.startswith(('#', 'mailto:', 'tel:')): @@ -667,11 +681,25 @@ def rewrite_github_anchor(app: Sphinx, uri: str) -> Optional[str]: return None +def compile_linkcheck_allowed_redirects(app: Sphinx, config: Config) -> None: + """Compile patterns in linkcheck_allowed_redirects to the regexp objects.""" + for url, pattern in list(app.config.linkcheck_allowed_redirects.items()): + try: + app.config.linkcheck_allowed_redirects[re.compile(url)] = re.compile(pattern) + except re.error as exc: + logger.warning(__('Failed to compile regex in linkcheck_allowed_redirects: %r %s'), + exc.pattern, exc.msg) + finally: + # Remove the original regexp-string + app.config.linkcheck_allowed_redirects.pop(url) + + def setup(app: Sphinx) -> Dict[str, Any]: app.add_builder(CheckExternalLinksBuilder) app.add_post_transform(HyperlinkCollector) app.add_config_value('linkcheck_ignore', [], None) + app.add_config_value('linkcheck_allowed_redirects', {}, None) app.add_config_value('linkcheck_auth', [], None) app.add_config_value('linkcheck_request_headers', {}, None) app.add_config_value('linkcheck_retries', 1, None) @@ -684,6 +712,8 @@ def setup(app: Sphinx) -> Dict[str, Any]: app.add_config_value('linkcheck_rate_limit_timeout', 300.0, None) app.add_event('linkcheck-process-uri') + + app.connect('config-inited', compile_linkcheck_allowed_redirects, priority=800) app.connect('linkcheck-process-uri', rewrite_github_anchor) return { diff --git a/tests/roots/test-linkcheck-localserver-warn-redirects/conf.py b/tests/roots/test-linkcheck-localserver-warn-redirects/conf.py new file mode 100644 index 0000000000..a45d22e282 --- /dev/null +++ b/tests/roots/test-linkcheck-localserver-warn-redirects/conf.py @@ -0,0 +1 @@ +exclude_patterns = ['_build'] diff --git a/tests/roots/test-linkcheck-localserver-warn-redirects/index.rst b/tests/roots/test-linkcheck-localserver-warn-redirects/index.rst new file mode 100644 index 0000000000..7c57d56712 --- /dev/null +++ b/tests/roots/test-linkcheck-localserver-warn-redirects/index.rst @@ -0,0 +1,2 @@ +`local server1 `_ +`local server2 `_ diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py index a67bca1702..6db0e75120 100644 --- a/tests/test_build_linkcheck.py +++ b/tests/test_build_linkcheck.py @@ -23,6 +23,7 @@ import requests from sphinx.builders.linkcheck import HyperlinkAvailabilityCheckWorker, RateLimit +from sphinx.testing.util import strip_escseq from sphinx.util.console import strip_colors from .utils import CERT_FILE, http_server, https_server @@ -254,7 +255,7 @@ def log_date_time_string(self): @pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True) -def test_follows_redirects_on_HEAD(app, capsys): +def test_follows_redirects_on_HEAD(app, capsys, warning): with http_server(make_redirect_handler(support_head=True)): app.build() stdout, stderr = capsys.readouterr() @@ -269,10 +270,11 @@ def test_follows_redirects_on_HEAD(app, capsys): 127.0.0.1 - - [] "HEAD /?redirected=1 HTTP/1.1" 204 - """ ) + assert warning.getvalue() == '' @pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True) -def test_follows_redirects_on_GET(app, capsys): +def test_follows_redirects_on_GET(app, capsys, warning): with http_server(make_redirect_handler(support_head=False)): app.build() stdout, stderr = capsys.readouterr() @@ -288,6 +290,28 @@ def test_follows_redirects_on_GET(app, capsys): 127.0.0.1 - - [] "GET /?redirected=1 HTTP/1.1" 204 - """ ) + assert warning.getvalue() == '' + + +@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-warn-redirects', + freshenv=True, confoverrides={ + 'linkcheck_allowed_redirects': {'http://localhost:7777/.*1': '.*'} + }) +def test_linkcheck_allowed_redirects(app, warning): + with http_server(make_redirect_handler(support_head=False)): + app.build() + + with open(app.outdir / 'output.json') as fp: + records = [json.loads(l) for l in fp.readlines()] + + assert len(records) == 2 + result = {r["uri"]: r["status"] for r in records} + assert result["http://localhost:7777/path1"] == "working" + assert result["http://localhost:7777/path2"] == "redirected" + + assert ("index.rst.rst:1: WARNING: redirect http://localhost:7777/path2 - with Found to " + "http://localhost:7777/?redirected=1\n" in strip_escseq(warning.getvalue())) + assert len(warning.getvalue().splitlines()) == 1 class OKHandler(http.server.BaseHTTPRequestHandler):