diff --git a/sphinx/io.py b/sphinx/io.py index 459d250e45f..135010b962b 100644 --- a/sphinx/io.py +++ b/sphinx/io.py @@ -143,6 +143,11 @@ def setup(self, app: Sphinx) -> None: if transform in self.transforms: self.transforms.remove(transform) + def parse(self) -> None: + """Override the BaseReader parse method to call self.parser.parse_inline().""" + self.document = document = self.new_document() + self.parser.parse_inline(self.input, document, 1) + class SphinxDummyWriter(UnfilteredWriter): """Dummy writer module used for generating doctree.""" diff --git a/sphinx/parsers.py b/sphinx/parsers.py index 955d59b3b79..7375059d68c 100644 --- a/sphinx/parsers.py +++ b/sphinx/parsers.py @@ -7,7 +7,7 @@ import docutils.parsers import docutils.parsers.rst from docutils import nodes -from docutils.parsers.rst import states +from docutils.parsers.rst import languages, states from docutils.statemachine import StringList from docutils.transforms.universal import SmartQuotes @@ -46,6 +46,11 @@ def set_application(self, app: Sphinx) -> None: self.config = app.config self.env = app.env + def parse_inline(self, inputstring: str, document: nodes.document, lineno: int) -> None: + """Parse the inline elements of a text block and generate a document tree.""" + msg = 'Parser subclasses must implement parse_inline' + raise NotImplementedError(msg) + class RSTParser(docutils.parsers.rst.Parser, Parser): """A reST parser for Sphinx.""" @@ -60,6 +65,30 @@ def get_transforms(self) -> list[type[Transform]]: transforms.remove(SmartQuotes) return transforms + def parse_inline(self, inputstring: str, document: nodes.document, lineno: int) -> None: + """Parse inline syntax from text and generate a document tree.""" + # Avoid "Literal block expected; none found." warnings. + if inputstring.endswith('::'): + inputstring = inputstring[:-1] + + reporter = document.reporter + reporter.get_source_and_line = lambda x: (document['source'], x) # type: ignore[attr-defined] + language = languages.get_language(document.settings.language_code, reporter) + if self.inliner is None: + inliner = states.Inliner() + else: + inliner = self.inliner + inliner.init_customizations(document.settings) + memo = states.Struct( + document=document, + reporter=reporter, + language=language, + ) + textnodes, messages = inliner.parse(inputstring, lineno, memo, document) + p = nodes.paragraph(inputstring, '', *textnodes) + p.source, p.line = document['source'], lineno + document += [p, *messages] + def parse(self, inputstring: str | StringList, document: nodes.document) -> None: """Parse text and generate a document tree.""" self.setup_parse(inputstring, document) # type: ignore[arg-type] diff --git a/sphinx/transforms/i18n.py b/sphinx/transforms/i18n.py index 88b7f416e4b..5c78e0e7468 100644 --- a/sphinx/transforms/i18n.py +++ b/sphinx/transforms/i18n.py @@ -5,7 +5,6 @@ import contextlib from os import path from re import DOTALL, match -from textwrap import indent from typing import TYPE_CHECKING, Any, TypeVar from docutils import nodes @@ -21,7 +20,6 @@ from sphinx.util.i18n import docname_to_domain from sphinx.util.index_entries import split_index_msg from sphinx.util.nodes import ( - IMAGE_TYPE_NODES, LITERAL_TYPE_NODES, NodeMatcher, extract_messages, @@ -380,25 +378,12 @@ def apply(self, **kwargs: Any) -> None: node['translated'] = True continue - # Avoid "Literal block expected; none found." warnings. - # If msgstr ends with '::' then it cause warning message at - # parser.parse() processing. - # literal-block-warning is only appear in avobe case. - if msgstr.strip().endswith('::'): - msgstr += '\n\n dummy literal' - # dummy literal node will discard by 'patch = patch[0]' - - # literalblock need literal block notation to avoid it become - # paragraph. + # literalblock can not contain references or terms if isinstance(node, LITERAL_TYPE_NODES): - msgstr = '::\n\n' + indent(msgstr, ' ' * 3) + continue patch = publish_msgstr(self.app, msgstr, source, node.line, self.config, settings) # type: ignore[arg-type] - # FIXME: no warnings about inconsistent references in this part - # XXX doctest and other block markup - if not isinstance(patch, nodes.paragraph): - continue # skip for now updater = _NodeUpdater(node, patch, self.document, noqa=False) processed = updater.update_title_mapping() @@ -453,45 +438,25 @@ def apply(self, **kwargs: Any) -> None: node['alt'] = msgstr continue - # Avoid "Literal block expected; none found." warnings. - # If msgstr ends with '::' then it cause warning message at - # parser.parse() processing. - # literal-block-warning is only appear in avobe case. - if msgstr.strip().endswith('::'): - msgstr += '\n\n dummy literal' - # dummy literal node will discard by 'patch = patch[0]' - - # literalblock need literal block notation to avoid it become - # paragraph. - if isinstance(node, LITERAL_TYPE_NODES): - msgstr = '::\n\n' + indent(msgstr, ' ' * 3) + if isinstance(node, nodes.image) and node.get('uri') == msg: + node['uri'] = msgstr + continue - # Structural Subelements phase1 - # There is a possibility that only the title node is created. - # see: https://docutils.sourceforge.io/docs/ref/doctree.html#structural-subelements - if isinstance(node, nodes.title): - # This generates:
msgstr
- msgstr = msgstr + '\n' + '=' * len(msgstr) * 2 + # literalblock do not need to be parsed as they do not contain inline syntax, + # except for parsed-literals, but they use the same node type, so we differentiate + # them based on their number of children. + if isinstance(node, LITERAL_TYPE_NODES) and len(node.children) <= 1: + node.children = [nodes.Text(msgstr)] + # for highlighting that expects .rawsource and .astext() are same. + node.rawsource = node.astext() + node['translated'] = True + continue patch = publish_msgstr(self.app, msgstr, source, node.line, self.config, settings) # type: ignore[arg-type] - # Structural Subelements phase2 - if isinstance(node, nodes.title): - # get node that placed as a first child - patch = patch.next_node() # type: ignore[assignment] # ignore unexpected markups in translation message - unexpected: tuple[type[nodes.Element], ...] = ( - nodes.paragraph, # expected form of translation - nodes.title, # generated by above "Subelements phase2" - ) - - # following types are expected if - # config.gettext_additional_targets is configured - unexpected += LITERAL_TYPE_NODES - unexpected += IMAGE_TYPE_NODES - - if not isinstance(patch, unexpected): + if not isinstance(patch, nodes.paragraph): continue # skip updater = _NodeUpdater(node, patch, self.document, noqa) @@ -502,15 +467,6 @@ def apply(self, **kwargs: Any) -> None: updater.update_pending_xrefs() updater.update_leaves() - # for highlighting that expects .rawsource and .astext() are same. - if isinstance(node, LITERAL_TYPE_NODES): - node.rawsource = node.astext() - - if isinstance(node, nodes.image) and node.get('alt') != msg: - node['uri'] = patch['uri'] - node['translated'] = False - continue # do not mark translated - node['translated'] = True # to avoid double translation if 'index' in self.config.gettext_additional_targets: diff --git a/sphinx/util/nodes.py b/sphinx/util/nodes.py index bbc1f64e481..528f1729bef 100644 --- a/sphinx/util/nodes.py +++ b/sphinx/util/nodes.py @@ -266,8 +266,7 @@ def extract_messages(doctree: Element) -> Iterable[tuple[Element, str]]: if node.get('alt'): yield node, node['alt'] if node.get('translatable'): - image_uri = node.get('original_uri', node['uri']) - msg = f'.. image:: {image_uri}' + msg = node.get('original_uri', node['uri']) else: msg = '' elif isinstance(node, nodes.meta): diff --git a/tests/roots/test-intl/literalblock.txt b/tests/roots/test-intl/literalblock.txt index 583b5b61072..b930eb773e8 100644 --- a/tests/roots/test-intl/literalblock.txt +++ b/tests/roots/test-intl/literalblock.txt @@ -69,3 +69,11 @@ doctest blocks >>> if __name__ == '__main__': # if run this py file as python script ... main() # call main + +parsed literal +============== + +.. parsed-literal:: + + **this** *is* + `parsed literal`_ diff --git a/tests/roots/test-intl/markup.txt b/tests/roots/test-intl/markup.txt index d167a042bca..6d6f6e51bbe 100644 --- a/tests/roots/test-intl/markup.txt +++ b/tests/roots/test-intl/markup.txt @@ -4,3 +4,5 @@ i18n with strange markup 1. title starting with 1. ------------------------- +A. Einstein was a really +smart dude. diff --git a/tests/roots/test-intl/xx/LC_MESSAGES/figure.po b/tests/roots/test-intl/xx/LC_MESSAGES/figure.po index 64bbdf763db..4678aaf69a5 100644 --- a/tests/roots/test-intl/xx/LC_MESSAGES/figure.po +++ b/tests/roots/test-intl/xx/LC_MESSAGES/figure.po @@ -40,14 +40,14 @@ msgstr "IMAGE URL AND ALT" msgid "img" msgstr "IMG -> I18N" -msgid ".. image:: img.png" -msgstr ".. image:: i18n.png" +msgid "img.png" +msgstr "i18n.png" msgid "i18n" msgstr "I18N -> IMG" -msgid ".. image:: i18n.png" -msgstr ".. image:: img.png" +msgid "i18n.png" +msgstr "img.png" msgid "image on substitution" msgstr "IMAGE ON SUBSTITUTION" diff --git a/tests/roots/test-intl/xx/LC_MESSAGES/literalblock.po b/tests/roots/test-intl/xx/LC_MESSAGES/literalblock.po index d320d957e42..18f83185c7f 100644 --- a/tests/roots/test-intl/xx/LC_MESSAGES/literalblock.po +++ b/tests/roots/test-intl/xx/LC_MESSAGES/literalblock.po @@ -125,3 +125,13 @@ msgstr "" ">>> if __name__ == '__main__': # IF RUN THIS PY FILE AS PYTHON SCRIPT\n" "... main() # CALL MAIN" +msgid "parsed literal" +msgstr "PARSED LITERAL" + +msgid "" +"**this** *is*\n" +"`parsed literal`_" +msgstr "" +"**THIS** *IS*\n" +"`PARSED LITERAL`_" + diff --git a/tests/roots/test-intl/xx/LC_MESSAGES/markup.po b/tests/roots/test-intl/xx/LC_MESSAGES/markup.po index ad6de9b4417..0235b458189 100644 --- a/tests/roots/test-intl/xx/LC_MESSAGES/markup.po +++ b/tests/roots/test-intl/xx/LC_MESSAGES/markup.po @@ -23,3 +23,6 @@ msgstr "I18N WITH STRANGE MARKUP" msgid "1. title starting with 1." msgstr "1. TITLE STARTING WITH 1." +msgid "A. Einstein was a really smart dude." +msgstr "A. EINSTEIN WAS A REALLY SMART DUDE." + diff --git a/tests/roots/test-intl_substitution_definitions/xx/LC_MESSAGES/prolog_epilog_substitution.po b/tests/roots/test-intl_substitution_definitions/xx/LC_MESSAGES/prolog_epilog_substitution.po index 3ce51fe4ffc..7b32be8da7e 100644 --- a/tests/roots/test-intl_substitution_definitions/xx/LC_MESSAGES/prolog_epilog_substitution.po +++ b/tests/roots/test-intl_substitution_definitions/xx/LC_MESSAGES/prolog_epilog_substitution.po @@ -28,11 +28,11 @@ msgstr "SUBSTITUTED IMAGE |subst_epilog_2| HERE." msgid "subst_prolog_2" msgstr "SUBST_PROLOG_2 TRANSLATED" -msgid ".. image:: /img.png" -msgstr ".. image:: /i18n.png" +msgid "/img.png" +msgstr "/i18n.png" msgid "subst_epilog_2" msgstr "SUBST_EPILOG_2 TRANSLATED" -msgid ".. image:: /i18n.png" -msgstr ".. image:: /img.png" +msgid "/i18n.png" +msgstr "/img.png" diff --git a/tests/test_builders/test_build_gettext.py b/tests/test_builders/test_build_gettext.py index dc8f4c9dcbd..15c9204134d 100644 --- a/tests/test_builders/test_build_gettext.py +++ b/tests/test_builders/test_build_gettext.py @@ -206,11 +206,11 @@ def test_gettext_prolog_epilog_substitution(app): "This is content that contains |subst_prolog_1|.", "Substituted image |subst_prolog_2| here.", "subst_prolog_2", - ".. image:: /img.png", + "/img.png", "This is content that contains |subst_epilog_1|.", "Substituted image |subst_epilog_2| here.", "subst_epilog_2", - ".. image:: /i18n.png", + "/i18n.png", ] @@ -265,4 +265,6 @@ def test_gettext_literalblock_additional(app): "function\\n... sys.stdout.write('hello') # call write method of " "stdout object\\n>>>\\n>>> if __name__ == '__main__': # if run this py " 'file as python script\\n... main() # call main', + 'parsed literal', + '**this** *is*\\n`parsed literal`_', ] diff --git a/tests/test_intl/test_intl.py b/tests/test_intl/test_intl.py index 540af2e62cb..03f0c0af693 100644 --- a/tests/test_intl/test_intl.py +++ b/tests/test_intl/test_intl.py @@ -244,7 +244,7 @@ def test_text_definition_terms(app): app.build() # --- definition terms: regression test for #975, #2198, #2205 result = (app.outdir / 'definition_terms.txt').read_text(encoding='utf8') - expect = ("13. I18N WITH DEFINITION TERMS" + expect = ("14. I18N WITH DEFINITION TERMS" "\n******************************\n" "\nSOME TERM" "\n THE CORRESPONDING DEFINITION\n" @@ -264,7 +264,7 @@ def test_text_glossary_term(app, warning): app.build() # --- glossary terms: regression test for #1090 result = (app.outdir / 'glossary_terms.txt').read_text(encoding='utf8') - expect = (r"""18. I18N WITH GLOSSARY TERMS + expect = (r"""19. I18N WITH GLOSSARY TERMS **************************** SOME NEW TERM @@ -299,7 +299,7 @@ def test_text_glossary_term_inconsistencies(app, warning): app.build() # --- glossary term inconsistencies: regression test for #1090 result = (app.outdir / 'glossary_terms_inconsistency.txt').read_text(encoding='utf8') - expect = ("19. I18N WITH GLOSSARY TERMS INCONSISTENCY" + expect = ("20. I18N WITH GLOSSARY TERMS INCONSISTENCY" "\n******************************************\n" "\n1. LINK TO *SOME NEW TERM*.\n" "\n2. LINK TO *TERM NOT IN GLOSSARY*.\n") @@ -349,7 +349,7 @@ def test_text_seealso(app): app.build() # --- seealso result = (app.outdir / 'seealso.txt').read_text(encoding='utf8') - expect = ("12. I18N WITH SEEALSO" + expect = ("13. I18N WITH SEEALSO" "\n*********************\n" "\nSee also: SHORT TEXT 1\n" "\nSee also: LONG TEXT 1\n" @@ -366,13 +366,13 @@ def test_text_figure_captions(app): app.build() # --- figure captions: regression test for #940 result = (app.outdir / 'figure.txt').read_text(encoding='utf8') - expect = ("14. I18N WITH FIGURE CAPTION" + expect = ("15. I18N WITH FIGURE CAPTION" "\n****************************\n" "\n [image]MY CAPTION OF THE FIGURE\n" "\n MY DESCRIPTION PARAGRAPH1 OF THE FIGURE.\n" "\n MY DESCRIPTION PARAGRAPH2 OF THE FIGURE.\n" "\n" - "\n14.1. FIGURE IN THE BLOCK" + "\n15.1. FIGURE IN THE BLOCK" "\n=========================\n" "\nBLOCK\n" "\n [image]MY CAPTION OF THE FIGURE\n" @@ -380,7 +380,7 @@ def test_text_figure_captions(app): "\n MY DESCRIPTION PARAGRAPH2 OF THE FIGURE.\n" "\n" "\n" - "14.2. IMAGE URL AND ALT\n" + "15.2. IMAGE URL AND ALT\n" "=======================\n" "\n" "[image: I18N -> IMG][image]\n" @@ -388,11 +388,11 @@ def test_text_figure_captions(app): " [image: IMG -> I18N][image]\n" "\n" "\n" - "14.3. IMAGE ON SUBSTITUTION\n" + "15.3. IMAGE ON SUBSTITUTION\n" "===========================\n" "\n" "\n" - "14.4. IMAGE UNDER NOTE\n" + "15.4. IMAGE UNDER NOTE\n" "======================\n" "\n" "Note:\n" @@ -428,7 +428,7 @@ def test_text_docfields(app): app.build() # --- docfields result = (app.outdir / 'docfields.txt').read_text(encoding='utf8') - expect = ("21. I18N WITH DOCFIELDS" + expect = ("22. I18N WITH DOCFIELDS" "\n***********************\n" "\nclass Cls1\n" "\n Parameters:" @@ -1322,6 +1322,9 @@ def test_xml_strange_markup(app): title1, = subsec1.findall('title') assert_elem(title1, ['1. TITLE STARTING WITH 1.']) + pars = subsec1.findall('paragraph') + assert_elem(pars[0], ['A. EINSTEIN WAS A REALLY SMART DUDE.']) + @sphinx_intl @pytest.mark.sphinx('html') @@ -1443,7 +1446,7 @@ def test_additional_targets_should_be_translated(app): """<span class="no">LIST</span>""") assert_count(expected_expr, result, 1) - # doctest block should not be translated but be highlighted + # doctest block should be translated and highlighted expected_expr = ( """<span class="gp">>>> </span>""" """<span class="kn">import</span> <span class="nn">sys</span> """ @@ -1453,6 +1456,11 @@ def test_additional_targets_should_be_translated(app): # '#noqa' should remain in literal blocks. assert_count("#noqa", result, 1) + # parsed literal should be translated + expected_expr = ('<strong>THIS</strong> <em>IS</em>\n' + '<a class="reference internal" href="#parsed-literal">PARSED LITERAL</a>') + assert_count(expected_expr, result, 1) + # [raw.txt] result = (app.outdir / 'raw.html').read_text(encoding='utf8') diff --git a/tests/test_markup/test_parse_inline.py b/tests/test_markup/test_parse_inline.py new file mode 100644 index 00000000000..3f1c86b3ddc --- /dev/null +++ b/tests/test_markup/test_parse_inline.py @@ -0,0 +1,142 @@ +import os + +import docutils +import pytest +from docutils import frontend, nodes +from docutils.io import StringInput + +from sphinx import addnodes, parsers +from sphinx.io import SphinxStandaloneReader +from sphinx.util.console import strip_colors + +docutils_version = pytest.mark.skipif( + docutils.__version_info__ < (0, 19), reason="at least docutils 0.19 required" +) + + +@pytest.fixture() +def document(app): + settings = frontend.get_default_settings(parsers.RSTParser) + settings.env = app.builder.env + reader = SphinxStandaloneReader() + reader.setup(app) + reader.source = StringInput(source_path='dummy/document.rst') + reader.settings = settings + document = reader.new_document() + return document + + +@pytest.fixture() +def parser(app): + parser = parsers.RSTParser() + parser.set_application(app) + return parser + + +@docutils_version() +@pytest.mark.parametrize(('rst', 'expected'), [ + ( + # pep role + ':pep:`8`', + [addnodes.index, nodes.target, nodes.reference] + ), + ( + # rfc role + ':rfc:`2324`', + [addnodes.index, nodes.target, nodes.reference] + ), + ( + # correct interpretation of code with whitespace + '``code sample``', + [nodes.literal] + ), + ( + # no ampersands in guilabel + ':guilabel:`Foo`', + [nodes.inline] + ), + ( + # kbd role + ':kbd:`space`', + [nodes.literal] + ), + ( + # description list: simple + 'term\n description', + [nodes.Text] + ), + ( + # description list: with classifiers + 'term : class1 : class2\n description', + [nodes.Text] + ), + ( + # glossary (description list): multiple terms + '.. glossary::\n\n term1\n term2\n description', + [nodes.Text] + ), + ( + # basic inline markup + '**Strong** text and *emphasis*', + [nodes.strong, nodes.Text, nodes.emphasis] + ), + ( + # literal block + 'see this code block::\n\n hello world!', + [nodes.Text] + ), + ( + # missing literal block + 'see this code block::', + [nodes.Text] + ), + ( + # section title + 'This is a title\n================\n', + [nodes.Text] + ), + ( + # footnote reference + 'Reference a footnote [1]_', + [nodes.Text, nodes.footnote_reference] + ), + ( + # substitution reference + 'here is a |substituted| text', + [nodes.Text, nodes.substitution_reference, nodes.Text] + ), +]) +def test_inline_no_error(rst, expected, parser, document): + parser.parse_inline(rst, document, 1) + assert len(document.children) == 1 + paragraph = document.children[0] + assert paragraph.__class__ == nodes.paragraph + assert len(paragraph.children) == len(expected) + for i, child in enumerate(paragraph.children): + assert child.__class__ == expected[i] + + +@docutils_version() +@pytest.mark.parametrize(('rst', 'expected'), [ + ( + # invalid unfinished literal + '``code sample', + 'WARNING: Inline literal start-string without end-string.' + ), +]) +def test_inline_errors(rst, expected, parser, document, warning): + lineno = 5 + expected = f'dummy/document.rst:{lineno}: {expected}' + parser.parse_inline(rst, document, lineno) + assert len(document.children) > 1 + paragraph = document.children[0] + messages = document.children[1:] + assert paragraph.__class__ == nodes.paragraph + for message in messages: + assert message.__class__ == nodes.system_message + warnings = getwarning(warning) + assert expected in warnings + + +def getwarning(warnings): + return strip_colors(warnings.getvalue().replace(os.sep, '/'))