diff --git a/CHANGES b/CHANGES index 035a4f2a496..40fa83b447b 100644 --- a/CHANGES +++ b/CHANGES @@ -43,6 +43,9 @@ Features added text * #9176: i18n: Emit a debug message if message catalog file not found under :confval:`locale_dirs` +* #9016: linkcheck: Support checking anchors on github.com +* #9016: linkcheck: Add a new event :event:`linkcheck-process-uri` to modify + URIs before checking hyperlinks * #1874: py domain: Support union types using ``|`` in info-field-list * #9268: py domain: :confval:`python_use_unqualified_type_names` supports type field in info-field-list diff --git a/doc/extdev/appapi.rst b/doc/extdev/appapi.rst index 41318e9d62b..ae81fc5628f 100644 --- a/doc/extdev/appapi.rst +++ b/doc/extdev/appapi.rst @@ -384,6 +384,14 @@ Here is a more detailed list of these events. .. versionchanged:: 1.3 The return value can now specify a template name. +.. event:: linkcheck-process-uri (app, uri) + + Emitted when the linkcheck builder collects hyperlinks from document. *uri* + is a collected URI. The event handlers can modify the URI by returning a + string. + + .. versionadded:: 4.1 + .. event:: build-finished (app, exception) Emitted when a build has finished, before Sphinx exits, usually used for diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index a46b80c081b..a635e79a98e 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -21,7 +21,7 @@ from threading import Thread from typing import (Any, Dict, Generator, List, NamedTuple, Optional, Pattern, Set, Tuple, Union, cast) -from urllib.parse import unquote, urlparse +from urllib.parse import unquote, urlparse, urlunparse from docutils import nodes from docutils.nodes import Element @@ -627,6 +627,10 @@ def run(self, **kwargs: Any) -> None: if 'refuri' not in refnode: continue uri = refnode['refuri'] + newuri = self.app.emit_firstresult('linkcheck-process-uri', uri) + if newuri: + uri = newuri + lineno = get_node_line(refnode) uri_info = Hyperlink(uri, self.env.docname, lineno) if uri not in hyperlinks: @@ -636,12 +640,31 @@ def run(self, **kwargs: Any) -> None: for imgnode in self.document.traverse(nodes.image): uri = imgnode['candidates'].get('?') if uri and '://' in uri: + newuri = self.app.emit_firstresult('linkcheck-process-uri', uri) + if newuri: + uri = newuri + lineno = get_node_line(imgnode) uri_info = Hyperlink(uri, self.env.docname, lineno) if uri not in hyperlinks: hyperlinks[uri] = uri_info +def rewrite_github_anchor(app: Sphinx, uri: str) -> Optional[str]: + """Rewrite anchor name of the hyperlink to github.com + + The hyperlink anchors in github.com are dynamically generated. This rewrites + them before checking and makes them comparable. + """ + parsed = urlparse(uri) + if parsed.hostname == "github.com" and parsed.fragment: + prefixed = parsed.fragment.startswith('user-content-') + if not prefixed: + fragment = f'user-content-{parsed.fragment}' + return urlunparse(parsed._replace(fragment=fragment)) + return None + + def setup(app: Sphinx) -> Dict[str, Any]: app.add_builder(CheckExternalLinksBuilder) app.add_post_transform(HyperlinkCollector) @@ -658,6 +681,9 @@ def setup(app: Sphinx) -> Dict[str, Any]: app.add_config_value('linkcheck_anchors_ignore', ["^!"], None) app.add_config_value('linkcheck_rate_limit_timeout', 300.0, None) + app.add_event('linkcheck-process-uri') + app.connect('linkcheck-process-uri', rewrite_github_anchor) + return { 'version': 'builtin', 'parallel_read_safe': True, diff --git a/tests/roots/test-linkcheck/links.txt b/tests/roots/test-linkcheck/links.txt index b389414c90c..c21968250d7 100644 --- a/tests/roots/test-linkcheck/links.txt +++ b/tests/roots/test-linkcheck/links.txt @@ -13,6 +13,8 @@ Some additional anchors to exercise ignore code * `Complete nonsense `_ * `Example valid local file `_ * `Example invalid local file `_ +* https://github.com/sphinx-doc/sphinx#documentation +* https://github.com/sphinx-doc/sphinx#user-content-testing .. image:: https://www.google.com/image.png .. figure:: https://www.google.com/image2.png diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py index fd7a5482abd..0d24c1dde52 100644 --- a/tests/test_build_linkcheck.py +++ b/tests/test_build_linkcheck.py @@ -65,8 +65,8 @@ def test_defaults_json(app): "info"]: assert attr in row - assert len(content.splitlines()) == 10 - assert len(rows) == 10 + assert len(content.splitlines()) == 12 + assert len(rows) == 12 # the output order of the rows is not stable # due to possible variance in network latency rowsby = {row["uri"]: row for row in rows} @@ -87,7 +87,7 @@ def test_defaults_json(app): assert dnerow['uri'] == 'https://localhost:7777/doesnotexist' assert rowsby['https://www.google.com/image2.png'] == { 'filename': 'links.txt', - 'lineno': 18, + 'lineno': 20, 'status': 'broken', 'code': 0, 'uri': 'https://www.google.com/image2.png', @@ -101,6 +101,10 @@ def test_defaults_json(app): # images should fail assert "Not Found for url: https://www.google.com/image.png" in \ rowsby["https://www.google.com/image.png"]["info"] + # The anchor of the URI for github.com is automatically modified + assert 'https://github.com/sphinx-doc/sphinx#documentation' not in rowsby + assert 'https://github.com/sphinx-doc/sphinx#user-content-documentation' in rowsby + assert 'https://github.com/sphinx-doc/sphinx#user-content-testing' in rowsby @pytest.mark.sphinx(