Skip to content

Commit

Permalink
Close #9016: linkcheck builder failed to check the anchors of github.com
Browse files Browse the repository at this point in the history
  • Loading branch information
tk0miya committed May 31, 2021
1 parent 5c27519 commit 92335bd
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 3 deletions.
3 changes: 3 additions & 0 deletions CHANGES
Expand Up @@ -42,6 +42,9 @@ Features added
text
* #9176: i18n: Emit a debug message if message catalog file not found under
:confval:`locale_dirs`
* #9016: linkcheck: failed to check the anchor of github.com
* #9016: linkcheck: Add a new event :event:`linkcheck-process-uri` to modify
URIs before checking hyperlinks
* #1874: py domain: Support union types using ``|`` in info-field-list
* #9097: Optimize the paralell build
* #9131: Add :confval:`nitpick_ignore_regex` to ignore nitpicky warnings using
Expand Down
8 changes: 8 additions & 0 deletions doc/extdev/appapi.rst
Expand Up @@ -384,6 +384,14 @@ Here is a more detailed list of these events.
.. versionchanged:: 1.3
The return value can now specify a template name.

.. event:: linkcheck-process-uri (app, uri)

Emitted when the linkcheck builder collects hyperlinks from document. *uri*
is a collected URI. The event handlers can modify the URI by returning a
string.

.. versionadded:: 4.1

.. event:: build-finished (app, exception)

Emitted when a build has finished, before Sphinx exits, usually used for
Expand Down
28 changes: 28 additions & 0 deletions sphinx/builders/linkcheck.py
Expand Up @@ -627,6 +627,10 @@ def run(self, **kwargs: Any) -> None:
if 'refuri' not in refnode:
continue
uri = refnode['refuri']
newuri = self.app.emit_firstresult('linkcheck-process-uri', uri)
if newuri:
uri = newuri

lineno = get_node_line(refnode)
uri_info = Hyperlink(uri, self.env.docname, lineno)
if uri not in hyperlinks:
Expand All @@ -636,12 +640,33 @@ def run(self, **kwargs: Any) -> None:
for imgnode in self.document.traverse(nodes.image):
uri = imgnode['candidates'].get('?')
if uri and '://' in uri:
newuri = self.app.emit_firstresult('linkcheck-process-uri', uri)
if newuri:
uri = newuri

lineno = get_node_line(imgnode)
uri_info = Hyperlink(uri, self.env.docname, lineno)
if uri not in hyperlinks:
hyperlinks[uri] = uri_info


def rewrite_github_anchor(app: Sphinx, uri: str) -> Optional[str]:
"""Rewrite anchor name of the hyperlink to github.com
The hyperlink anchors in github.com are dynamically generated. This rewrites
them before checking and makes them comparable.
"""
if re.search('://github.com/', uri) and '#' in uri:
baseuri, anchor = uri.split('#', 1)
if anchor.startswith('user-content-'):
# Ignored when URI is already prefixed.
return None
else:
return f'{baseuri}#user-content-{anchor}'
else:
return None


def setup(app: Sphinx) -> Dict[str, Any]:
app.add_builder(CheckExternalLinksBuilder)
app.add_post_transform(HyperlinkCollector)
Expand All @@ -658,6 +683,9 @@ def setup(app: Sphinx) -> Dict[str, Any]:
app.add_config_value('linkcheck_anchors_ignore', ["^!"], None)
app.add_config_value('linkcheck_rate_limit_timeout', 300.0, None)

app.add_event('linkcheck-process-uri')
app.connect('linkcheck-process-uri', rewrite_github_anchor)

return {
'version': 'builtin',
'parallel_read_safe': True,
Expand Down
2 changes: 2 additions & 0 deletions tests/roots/test-linkcheck/links.txt
Expand Up @@ -13,6 +13,8 @@ Some additional anchors to exercise ignore code
* `Complete nonsense <https://localhost:7777/doesnotexist>`_
* `Example valid local file <conf.py>`_
* `Example invalid local file <path/to/notfound>`_
* https://github.com/sphinx-doc/sphinx#documentation
* https://github.com/sphinx-doc/sphinx#user-content-testing

.. image:: https://www.google.com/image.png
.. figure:: https://www.google.com/image2.png
10 changes: 7 additions & 3 deletions tests/test_build_linkcheck.py
Expand Up @@ -65,8 +65,8 @@ def test_defaults_json(app):
"info"]:
assert attr in row

assert len(content.splitlines()) == 10
assert len(rows) == 10
assert len(content.splitlines()) == 12
assert len(rows) == 12
# the output order of the rows is not stable
# due to possible variance in network latency
rowsby = {row["uri"]: row for row in rows}
Expand All @@ -87,7 +87,7 @@ def test_defaults_json(app):
assert dnerow['uri'] == 'https://localhost:7777/doesnotexist'
assert rowsby['https://www.google.com/image2.png'] == {
'filename': 'links.txt',
'lineno': 18,
'lineno': 20,
'status': 'broken',
'code': 0,
'uri': 'https://www.google.com/image2.png',
Expand All @@ -101,6 +101,10 @@ def test_defaults_json(app):
# images should fail
assert "Not Found for url: https://www.google.com/image.png" in \
rowsby["https://www.google.com/image.png"]["info"]
# The anchor of the URI for github.com is automatically modified
assert 'https://github.com/sphinx-doc/sphinx#documentation' not in rowsby
assert 'https://github.com/sphinx-doc/sphinx#user-content-documentation' in rowsby
assert 'https://github.com/sphinx-doc/sphinx#user-content-testing' in rowsby


@pytest.mark.sphinx(
Expand Down

0 comments on commit 92335bd

Please sign in to comment.