Skip to content

Commit

Permalink
Merge pull request #9234 from tk0miya/6525_linkcheck_warn_redirects
Browse files Browse the repository at this point in the history
Close #6525: linkcheck: Add linkcheck_ignore_redirects and linkcheck_warn_redirects
  • Loading branch information
tk0miya committed Jul 6, 2021
2 parents f0fef96 + 5e5bca9 commit b09acab
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 6 deletions.
2 changes: 2 additions & 0 deletions CHANGES
Expand Up @@ -50,6 +50,8 @@ Features added
* #9016: linkcheck: Support checking anchors on github.com
* #9016: linkcheck: Add a new event :event:`linkcheck-process-uri` to modify
URIs before checking hyperlinks
* #6525: linkcheck: Add :confval:`linkcheck_allowed_redirects` to mark
hyperlinks that are redirected to expected URLs as "working"
* #1874: py domain: Support union types using ``|`` in info-field-list
* #9268: py domain: :confval:`python_use_unqualified_type_names` supports type
field in info-field-list
Expand Down
23 changes: 23 additions & 0 deletions doc/usage/configuration.rst
Expand Up @@ -2527,6 +2527,29 @@ Options for the linkcheck builder

.. versionadded:: 1.1

.. confval:: linkcheck_allowed_redirects

A dictionary that maps a pattern of the source URI to a pattern of the canonical
URI. The linkcheck builder treats the redirected link as "working" when:

- the link in the document matches the source URI pattern, and
- the redirect location matches the canonical URI pattern.

Example:

.. code-block:: python
linkcheck_allowed_redirects = {
# All HTTP redirections from the source URI to the canonical URI will be treated as "working".
r'https://sphinx-doc\.org/.*': r'https://sphinx-doc\.org/en/master/.*'
}
If set, linkcheck builder will emit a warning when disallowed redirection
found. It's useful to detect unexpected redirects under :option:`the
warn-is-error mode <sphinx-build -W>`.

.. versionadded:: 4.1

.. confval:: linkcheck_request_headers

A dictionary that maps baseurls to HTTP request headers.
Expand Down
38 changes: 34 additions & 4 deletions sphinx/builders/linkcheck.py
Expand Up @@ -272,8 +272,12 @@ def process_result(self, result: CheckResult) -> None:
except KeyError:
text, color = ('with unknown code', purple)
linkstat['text'] = text
logger.info(color('redirect ') + result.uri +
color(' - ' + text + ' to ' + result.message))
if self.config.linkcheck_allowed_redirects:
logger.warning('redirect ' + result.uri + ' - ' + text + ' to ' +
result.message, location=(filename, result.lineno))
else:
logger.info(color('redirect ') + result.uri +
color(' - ' + text + ' to ' + result.message))
self.write_entry('redirected ' + text, result.docname, filename,
result.lineno, result.uri + ' to ' + result.message)
else:
Expand Down Expand Up @@ -496,13 +500,23 @@ def check_uri() -> Tuple[str, str, int]:
new_url = response.url
if anchor:
new_url += '#' + anchor
# history contains any redirects, get last
if response.history:

if allowed_redirect(req_url, new_url):
return 'working', '', 0
elif response.history:
# history contains any redirects, get last
code = response.history[-1].status_code
return 'redirected', new_url, code
else:
return 'redirected', new_url, 0

def allowed_redirect(url: str, new_url: str) -> bool:
for from_url, to_url in self.config.linkcheck_allowed_redirects.items():
if from_url.match(url) and to_url.match(new_url):
return True

return False

def check(docname: str) -> Tuple[str, str, int]:
# check for various conditions without bothering the network
if len(uri) == 0 or uri.startswith(('#', 'mailto:', 'tel:')):
Expand Down Expand Up @@ -667,11 +681,25 @@ def rewrite_github_anchor(app: Sphinx, uri: str) -> Optional[str]:
return None


def compile_linkcheck_allowed_redirects(app: Sphinx, config: Config) -> None:
"""Compile patterns in linkcheck_allowed_redirects to the regexp objects."""
for url, pattern in list(app.config.linkcheck_allowed_redirects.items()):
try:
app.config.linkcheck_allowed_redirects[re.compile(url)] = re.compile(pattern)
except re.error as exc:
logger.warning(__('Failed to compile regex in linkcheck_allowed_redirects: %r %s'),
exc.pattern, exc.msg)
finally:
# Remove the original regexp-string
app.config.linkcheck_allowed_redirects.pop(url)


def setup(app: Sphinx) -> Dict[str, Any]:
app.add_builder(CheckExternalLinksBuilder)
app.add_post_transform(HyperlinkCollector)

app.add_config_value('linkcheck_ignore', [], None)
app.add_config_value('linkcheck_allowed_redirects', {}, None)
app.add_config_value('linkcheck_auth', [], None)
app.add_config_value('linkcheck_request_headers', {}, None)
app.add_config_value('linkcheck_retries', 1, None)
Expand All @@ -684,6 +712,8 @@ def setup(app: Sphinx) -> Dict[str, Any]:
app.add_config_value('linkcheck_rate_limit_timeout', 300.0, None)

app.add_event('linkcheck-process-uri')

app.connect('config-inited', compile_linkcheck_allowed_redirects, priority=800)
app.connect('linkcheck-process-uri', rewrite_github_anchor)

return {
Expand Down
@@ -0,0 +1 @@
exclude_patterns = ['_build']
@@ -0,0 +1,2 @@
`local server1 <http://localhost:7777/path1>`_
`local server2 <http://localhost:7777/path2>`_
28 changes: 26 additions & 2 deletions tests/test_build_linkcheck.py
Expand Up @@ -23,6 +23,7 @@
import requests

from sphinx.builders.linkcheck import HyperlinkAvailabilityCheckWorker, RateLimit
from sphinx.testing.util import strip_escseq
from sphinx.util.console import strip_colors

from .utils import CERT_FILE, http_server, https_server
Expand Down Expand Up @@ -254,7 +255,7 @@ def log_date_time_string(self):


@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True)
def test_follows_redirects_on_HEAD(app, capsys):
def test_follows_redirects_on_HEAD(app, capsys, warning):
with http_server(make_redirect_handler(support_head=True)):
app.build()
stdout, stderr = capsys.readouterr()
Expand All @@ -269,10 +270,11 @@ def test_follows_redirects_on_HEAD(app, capsys):
127.0.0.1 - - [] "HEAD /?redirected=1 HTTP/1.1" 204 -
"""
)
assert warning.getvalue() == ''


@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True)
def test_follows_redirects_on_GET(app, capsys):
def test_follows_redirects_on_GET(app, capsys, warning):
with http_server(make_redirect_handler(support_head=False)):
app.build()
stdout, stderr = capsys.readouterr()
Expand All @@ -288,6 +290,28 @@ def test_follows_redirects_on_GET(app, capsys):
127.0.0.1 - - [] "GET /?redirected=1 HTTP/1.1" 204 -
"""
)
assert warning.getvalue() == ''


@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-warn-redirects',
freshenv=True, confoverrides={
'linkcheck_allowed_redirects': {'http://localhost:7777/.*1': '.*'}
})
def test_linkcheck_allowed_redirects(app, warning):
with http_server(make_redirect_handler(support_head=False)):
app.build()

with open(app.outdir / 'output.json') as fp:
records = [json.loads(l) for l in fp.readlines()]

assert len(records) == 2
result = {r["uri"]: r["status"] for r in records}
assert result["http://localhost:7777/path1"] == "working"
assert result["http://localhost:7777/path2"] == "redirected"

assert ("index.rst.rst:1: WARNING: redirect http://localhost:7777/path2 - with Found to "
"http://localhost:7777/?redirected=1\n" in strip_escseq(warning.getvalue()))
assert len(warning.getvalue().splitlines()) == 1


class OKHandler(http.server.BaseHTTPRequestHandler):
Expand Down

0 comments on commit b09acab

Please sign in to comment.