sphinx-doc · tk0miya · Jul 6, 2021 · Apr 29, 2021 · Apr 29, 2021 · May 20, 2021
diff --git a/CHANGES b/CHANGES
@@ -43,6 +43,10 @@ Features added
   text
 * #9176: i18n: Emit a debug message if message catalog file not found under
   :confval:`locale_dirs`
+* #6525: linkcheck: Add :confval:`linkcheck_warn_redirects` to emit a warning
+  when the hyperlink is redirected
+* #6525: linkcheck: Add :confval:`linkcheck_allowed_redirects` to mark
+  hyperlinks that are redirected to expected URLs as "working"
 * #1874: py domain: Support union types using ``|`` in info-field-list
 * #9097: Optimize the paralell build
 * #9131: Add :confval:`nitpick_ignore_regex` to ignore nitpicky warnings using

diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst
@@ -2527,6 +2527,28 @@ Options for the linkcheck builder
 
    .. versionadded:: 1.1
 
+.. confval:: linkcheck_allowed_redirects
+
+   A dictionary that maps a pattern of the source URI to a pattern of the canonical
+   URI. The linkcheck builder treats the redirected link as "working" when:
+
+    - the link in the document matches the source URI pattern, and
+    - the redirect location matches the canonical URI pattern.
+
+   Example:
+
+   .. code-block:: python
+
+      linkcheck_allowed_redirects = {
+          # All HTTP redirections from the source URI to the canonical URI will be treated as "working".
+          r'https://sphinx-doc\.org/.*': r'https://sphinx-doc\.org/en/master/.*'
-          r'https://sphinx-doc\.org/.*': r'https://sphinx-doc\.org/en/master/.*'
+          r'https://sphinx-doc\.org/': r'https://(www\.)?sphinx-doc\.org/.*'
-          r'https://sphinx-doc\.org/.*': r'https://sphinx-doc\.org/en/master/.*'
+          r'https://sphinx-doc\.org/': r'https://(www\.)?sphinx-doc\.org/.*'
+      }
+
+   It's helpful to enable :confval:`linkcheck_warn_redirects` to warn for URIs
+   causing unexpected HTTP redirection.
+
+   .. versionadded:: 4.1
+
 .. confval:: linkcheck_request_headers
 
    A dictionary that maps baseurls to HTTP request headers.
@@ -2647,6 +2669,14 @@ Options for the linkcheck builder
 
    .. versionadded:: 3.4
 
+.. confval:: linkcheck_warn_redirects
+
+   If true, emits a warning when the response for a hyperlink is a redirect.
+   It's useful to detect unexpected redirects under :option:`the warn-is-error
+   mode <sphinx-build -W>`.  Default is ``False``.
+
+    .. versionadded:: 4.1
+
 
 Options for the XML builder
 ---------------------------

diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py
@@ -272,8 +272,12 @@ def process_result(self, result: CheckResult) -> None:
             except KeyError:
                 text, color = ('with unknown code', purple)
             linkstat['text'] = text
-            logger.info(color('redirect  ') + result.uri +
-                        color(' - ' + text + ' to ' + result.message))
+            if self.config.linkcheck_warn_redirects:
+                logger.warning('redirect  ' + result.uri + ' - ' + text + ' to ' +
+                               result.message, location=(filename, result.lineno))
+            else:
+                logger.info(color('redirect  ') + result.uri +
+                            color(' - ' + text + ' to ' + result.message))
             self.write_entry('redirected ' + text, result.docname, filename,
                              result.lineno, result.uri + ' to ' + result.message)
         else:
@@ -494,13 +498,23 @@ def check_uri() -> Tuple[str, str, int]:
                 new_url = response.url
                 if anchor:
                     new_url += '#' + anchor
-                # history contains any redirects, get last
-                if response.history:
+
+                if allowed_redirect(req_url, new_url):
+                    return 'working', '', 0
+                elif response.history:
+                    # history contains any redirects, get last
                     code = response.history[-1].status_code
                     return 'redirected', new_url, code
                 else:
                     return 'redirected', new_url, 0
 
+        def allowed_redirect(url: str, new_url: str) -> bool:
+            for from_url, to_url in self.config.linkcheck_allowed_redirects.items():
+                if from_url.match(url) and to_url.match(new_url):
+                    return True
+
+            return False
+
         def check(docname: str) -> Tuple[str, str, int]:
             # check for various conditions without bothering the network
             if len(uri) == 0 or uri.startswith(('#', 'mailto:', 'tel:')):
@@ -642,11 +656,25 @@ def run(self, **kwargs: Any) -> None:
                     hyperlinks[uri] = uri_info
 
 
+def compile_linkcheck_allowed_redirects(app: Sphinx, config: Config) -> None:
+    """Compile patterns in linkcheck_allowed_redirects to the regexp objects."""
+    for url, pattern in list(app.config.linkcheck_allowed_redirects.items()):
+        try:
+            app.config.linkcheck_allowed_redirects[re.compile(url)] = re.compile(pattern)
+        except re.error as exc:
+            logger.warning(__('Failed to compile regex in linkcheck_allowed_redirects: %r %s'),
+                           exc.pattern, exc.msg)
+        finally:
+            # Remove the original regexp-string
+            app.config.linkcheck_allowed_redirects.pop(url)
+
+
 def setup(app: Sphinx) -> Dict[str, Any]:
     app.add_builder(CheckExternalLinksBuilder)
     app.add_post_transform(HyperlinkCollector)
 
     app.add_config_value('linkcheck_ignore', [], None)
+    app.add_config_value('linkcheck_allowed_redirects', {}, None)
     app.add_config_value('linkcheck_auth', [], None)
     app.add_config_value('linkcheck_request_headers', {}, None)
     app.add_config_value('linkcheck_retries', 1, None)
@@ -657,6 +685,9 @@ def setup(app: Sphinx) -> Dict[str, Any]:
     # commonly used for dynamic pages
     app.add_config_value('linkcheck_anchors_ignore', ["^!"], None)
     app.add_config_value('linkcheck_rate_limit_timeout', 300.0, None)
+    app.add_config_value('linkcheck_warn_redirects', False, None)
+
+    app.connect('config-inited', compile_linkcheck_allowed_redirects, priority=800)
 
     return {
         'version': 'builtin',

diff --git a/tests/roots/test-linkcheck-localserver-warn-redirects/conf.py b/tests/roots/test-linkcheck-localserver-warn-redirects/conf.py
@@ -0,0 +1 @@
+exclude_patterns = ['_build']
diff --git a/tests/roots/test-linkcheck-localserver-warn-redirects/index.rst b/tests/roots/test-linkcheck-localserver-warn-redirects/index.rst
@@ -0,0 +1,2 @@
+`local server1 <http://localhost:7777/path1>`_
+`local server2 <http://localhost:7777/path2>`_
diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py
@@ -23,6 +23,7 @@
 import requests
 
 from sphinx.builders.linkcheck import HyperlinkAvailabilityCheckWorker, RateLimit
+from sphinx.testing.util import strip_escseq
 from sphinx.util.console import strip_colors
 
 from .utils import CERT_FILE, http_server, https_server
@@ -250,7 +251,7 @@ def log_date_time_string(self):
 
 
 @pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True)
-def test_follows_redirects_on_HEAD(app, capsys):
+def test_follows_redirects_on_HEAD(app, capsys, warning):
     with http_server(make_redirect_handler(support_head=True)):
         app.build()
     stdout, stderr = capsys.readouterr()
@@ -265,10 +266,11 @@ def test_follows_redirects_on_HEAD(app, capsys):
         127.0.0.1 - - [] "HEAD /?redirected=1 HTTP/1.1" 204 -
         """
     )
+    assert warning.getvalue() == ''
 
 
 @pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True)
-def test_follows_redirects_on_GET(app, capsys):
+def test_follows_redirects_on_GET(app, capsys, warning):
     with http_server(make_redirect_handler(support_head=False)):
         app.build()
     stdout, stderr = capsys.readouterr()
@@ -284,6 +286,58 @@ def test_follows_redirects_on_GET(app, capsys):
         127.0.0.1 - - [] "GET /?redirected=1 HTTP/1.1" 204 -
         """
     )
+    assert warning.getvalue() == ''
+
+
+@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-warn-redirects',
+                    freshenv=True, confoverrides={'linkcheck_warn_redirects': True})
+def test_linkcheck_warn_redirects(app, warning):
+    with http_server(make_redirect_handler(support_head=False)):
+        app.build()
+    assert ("index.rst.rst:1: WARNING: redirect  http://localhost:7777/path1 - with Found to "
+            "http://localhost:7777/?redirected=1\n" in strip_escseq(warning.getvalue()))
+    assert ("index.rst.rst:1: WARNING: redirect  http://localhost:7777/path2 - with Found to "
+            "http://localhost:7777/?redirected=1\n" in strip_escseq(warning.getvalue()))
+    assert len(warning.getvalue().splitlines()) == 2
+
+
+@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-warn-redirects',
+                    freshenv=True, confoverrides={
+                        'linkcheck_allowed_redirects': {'http://localhost:7777/.*1': '.*'}
+                    })
+def test_linkcheck_allowed_redirects(app, warning):
+    with http_server(make_redirect_handler(support_head=False)):
+        app.build()
+
+    with open(app.outdir / 'output.json') as fp:
+        records = [json.loads(l) for l in fp.readlines()]
+
+    assert len(records) == 2
+    result = {r["uri"]: r["status"] for r in records}
+    assert result["http://localhost:7777/path1"] == "working"
+    assert result["http://localhost:7777/path2"] == "redirected"
+    assert warning.getvalue() == ''
+
+
+@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-warn-redirects',
+                    freshenv=True, confoverrides={
+                        'linkcheck_allowed_redirects': {'http://localhost:7777/.*1': '.*'},
+                        'linkcheck_warn_redirects': True,
+                    })
+def test_linkcheck_allowed_redirects_and_linkcheck_warn_redirects(app, warning):
+    with http_server(make_redirect_handler(support_head=False)):
+        app.build()
+
+    with open(app.outdir / 'output.json') as fp:
+        records = [json.loads(l) for l in fp.readlines()]
+
+    assert len(records) == 2
+    result = {r["uri"]: r["status"] for r in records}
+    assert result["http://localhost:7777/path1"] == "working"
+    assert result["http://localhost:7777/path2"] == "redirected"
+    assert ("index.rst.rst:1: WARNING: redirect  http://localhost:7777/path2 - with Found to "
+            "http://localhost:7777/?redirected=1\n" in strip_escseq(warning.getvalue()))
+    assert len(warning.getvalue().splitlines()) == 1
 
 
 class OKHandler(http.server.BaseHTTPRequestHandler):