Close #3985: Implement #noqa for i18n

When cross-references in the original paragraph and the translated paragraph do not match, a warning is emitted. It is useful, because it allows to catch mistakes, but it can also be an annoyance since sometimes it is expected that the cross-references will not match. For example, a reference that is repeated in the original text may need to be factored out for good style in the target language. Another example: if the translator needs to translate a universally understood term in the source language into a term that not everyone knows is the translation of this original term, adding a reference to the glossary can be warranted. This allows the translated message to start with '#noqa' in order to disable the warning.
sphinx-doc · Jan 16, 2022 · dd6a074 · dd6a074
1 parent 9cecd91
commit dd6a074
Show file tree

Hide file tree

Showing 7 changed files with 150 additions and 6 deletions.
diff --git a/doc/usage/advanced/intl.rst b/doc/usage/advanced/intl.rst
@@ -68,6 +68,23 @@ be translated you need to follow these instructions:
 * Run your desired build.
 
 
+In order to protect against mistakes, a warning is emitted if
+cross-references in the translated paragraph do not match those from the
+original.  This can be turned off globally using the
+:confval:`suppress_warnings` configuration variable.  Alternatively, to
+turn it off for one message only, start the message with ``#noqa`` like
+this::
+
+   #noqa Lorem ipsum dolor sit amet ...
+
+(Write ``\#noqa`` in case you want to have "#noqa" literally in the
+text.  This does not apply to code blocks, where ``#noqa`` is ignored
+because code blocks do not contain references anyway.)
+
+.. versionadded:: 4.4
+   The ``#noqa`` mechanism.
+
+
 Translating with sphinx-intl
 ----------------------------
 

diff --git a/sphinx/transforms/i18n.py b/sphinx/transforms/i18n.py
@@ -9,6 +9,7 @@
 """
 
 from os import path
+from re import match
 from textwrap import indent
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, TypeVar
 
@@ -82,6 +83,14 @@ def publish_msgstr(app: "Sphinx", source: str, source_path: str, source_line: in
         config.rst_prolog = rst_prolog  # type: ignore
 
 
+def parse_noqa(source: str) -> Tuple[str, bool]:
+    m = match(r"\s*#\s*noqa(\s+|$)", source)
+    if m:
+        return source[m.end():], True
+    else:
+        return source, False
+
+
 class PreserveTranslatableMessages(SphinxTransform):
     """
     Preserve original translatable messages before translation
@@ -119,6 +128,14 @@ def apply(self, **kwargs: Any) -> None:
         # phase1: replace reference ids with translated names
         for node, msg in extract_messages(self.document):
             msgstr = catalog.gettext(msg)
+
+            # There is no point in having #noqa on literal blocks because
+            # they cannot contain references.  Recognizing it would just
+            # completely prevent escaping the #noqa.  Outside of literal
+            # blocks, one can always write \#noqa.
+            if not isinstance(node, LITERAL_TYPE_NODES):
+                msgstr, _ = parse_noqa(msgstr)
+
             # XXX add marker to untranslated parts
             if not msgstr or msgstr == msg or not msgstr.strip():
                 # as-of-yet untranslated
@@ -139,6 +156,7 @@ def apply(self, **kwargs: Any) -> None:
 
             patch = publish_msgstr(self.app, msgstr, source,
                                    node.line, self.config, settings)
+            # FIXME: no warnings about inconsistent references in this part
             # XXX doctest and other block markup
             if not isinstance(patch, nodes.paragraph):
                 continue  # skip for now
@@ -228,6 +246,11 @@ def apply(self, **kwargs: Any) -> None:
                 continue  # skip if the node is already translated by phase1
 
             msgstr = catalog.gettext(msg)
+
+            # See above.
+            if not isinstance(node, LITERAL_TYPE_NODES):
+                msgstr, noqa = parse_noqa(msgstr)
+
             # XXX add marker to untranslated parts
             if not msgstr or msgstr == msg:  # as-of-yet untranslated
                 continue
@@ -273,7 +296,6 @@ def apply(self, **kwargs: Any) -> None:
 
             patch = publish_msgstr(self.app, msgstr, source,
                                    node.line, self.config, settings)
-
             # Structural Subelements phase2
             if isinstance(node, nodes.title):
                 # get <title> node that placed as a first child
@@ -303,7 +325,7 @@ def list_replace_or_append(lst: List[N], old: N, new: N) -> None:
             is_autofootnote_ref = NodeMatcher(nodes.footnote_reference, auto=Any)
             old_foot_refs: List[nodes.footnote_reference] = list(node.findall(is_autofootnote_ref))  # NOQA
             new_foot_refs: List[nodes.footnote_reference] = list(patch.findall(is_autofootnote_ref))  # NOQA
-            if len(old_foot_refs) != len(new_foot_refs):
+            if not noqa and len(old_foot_refs) != len(new_foot_refs):
                 old_foot_ref_rawsources = [ref.rawsource for ref in old_foot_refs]
                 new_foot_ref_rawsources = [ref.rawsource for ref in new_foot_refs]
                 logger.warning(__('inconsistent footnote references in translated message.' +
@@ -346,7 +368,7 @@ def list_replace_or_append(lst: List[N], old: N, new: N) -> None:
             is_refnamed_ref = NodeMatcher(nodes.reference, refname=Any)
             old_refs: List[nodes.reference] = list(node.findall(is_refnamed_ref))
             new_refs: List[nodes.reference] = list(patch.findall(is_refnamed_ref))
-            if len(old_refs) != len(new_refs):
+            if not noqa and len(old_refs) != len(new_refs):
                 old_ref_rawsources = [ref.rawsource for ref in old_refs]
                 new_ref_rawsources = [ref.rawsource for ref in new_refs]
                 logger.warning(__('inconsistent references in translated message.' +
@@ -374,7 +396,7 @@ def list_replace_or_append(lst: List[N], old: N, new: N) -> None:
             old_foot_refs = list(node.findall(is_refnamed_footnote_ref))
             new_foot_refs = list(patch.findall(is_refnamed_footnote_ref))
             refname_ids_map: Dict[str, List[str]] = {}
-            if len(old_foot_refs) != len(new_foot_refs):
+            if not noqa and len(old_foot_refs) != len(new_foot_refs):
                 old_foot_ref_rawsources = [ref.rawsource for ref in old_foot_refs]
                 new_foot_ref_rawsources = [ref.rawsource for ref in new_foot_refs]
                 logger.warning(__('inconsistent footnote references in translated message.' +
@@ -393,7 +415,7 @@ def list_replace_or_append(lst: List[N], old: N, new: N) -> None:
             old_cite_refs: List[nodes.citation_reference] = list(node.findall(is_citation_ref))
             new_cite_refs: List[nodes.citation_reference] = list(patch.findall(is_citation_ref))  # NOQA
             refname_ids_map = {}
-            if len(old_cite_refs) != len(new_cite_refs):
+            if not noqa and len(old_cite_refs) != len(new_cite_refs):
                 old_cite_ref_rawsources = [ref.rawsource for ref in old_cite_refs]
                 new_cite_ref_rawsources = [ref.rawsource for ref in new_cite_refs]
                 logger.warning(__('inconsistent citation references in translated message.' +
@@ -413,7 +435,7 @@ def list_replace_or_append(lst: List[N], old: N, new: N) -> None:
             old_xrefs = list(node.findall(addnodes.pending_xref))
             new_xrefs = list(patch.findall(addnodes.pending_xref))
             xref_reftarget_map = {}
-            if len(old_xrefs) != len(new_xrefs):
+            if not noqa and len(old_xrefs) != len(new_xrefs):
                 old_xref_rawsources = [xref.rawsource for xref in old_xrefs]
                 new_xref_rawsources = [xref.rawsource for xref in new_xrefs]
                 logger.warning(__('inconsistent term references in translated message.' +

diff --git a/tests/roots/test-intl/literalblock.txt b/tests/roots/test-intl/literalblock.txt
@@ -49,6 +49,14 @@ code blocks
       literal-block
       in list
 
+.. highlight:: none
+
+::
+
+   test_code_for_noqa()
+   continued()
+
+
 doctest blocks
 ==============
 

diff --git a/tests/roots/test-intl/noqa.txt b/tests/roots/test-intl/noqa.txt
@@ -0,0 +1,16 @@
+First section
+=============
+
+Some text with a reference, :ref:`next-section`.
+
+Another reference: :ref:`next-section`.
+
+This should allow to test escaping ``#noqa``.
+
+.. _next-section:
+
+Next section
+============
+
+Some text, again referring to the section: :ref:`next-section`.
+
diff --git a/tests/roots/test-intl/xx/LC_MESSAGES/literalblock.po b/tests/roots/test-intl/xx/LC_MESSAGES/literalblock.po
@@ -77,6 +77,11 @@ msgid "literal-block\n"
 msgstr "LITERAL-BLOCK\n"
 "IN LIST"
 
+msgid "test_code_for_noqa()\n"
+"continued()"
+msgstr "#noqa should not get stripped\n"
+"# from this block."
+
 msgid "doctest blocks"
 msgstr "DOCTEST-BLOCKS"
 

diff --git a/tests/roots/test-intl/xx/LC_MESSAGES/noqa.po b/tests/roots/test-intl/xx/LC_MESSAGES/noqa.po
@@ -0,0 +1,48 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C)
+# This file is distributed under the same license as the Sphinx intl <Tests> package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: \n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2022-01-16 15:23+0100\n"
+"PO-Revision-Date: 2022-01-16 15:23+0100\n"
+"Last-Translator: Jean Abou Samra <jean@abou-samra.fr>\n"
+"Language-Team: \n"
+"Language: xx\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"X-Generator: Poedit 3.0\n"
+
+#: ../tests/roots/test-intl/noqa.txt:2
+msgid "First section"
+msgstr ""
+
+#: ../tests/roots/test-intl/noqa.txt:4
+msgid "Some text with a reference, :ref:`next-section`."
+msgstr "#noqa TRANSLATED TEXT WITHOUT REFERENCE."
+
+#: ../tests/roots/test-intl/noqa.txt:6
+msgid "Another reference: :ref:`next-section`."
+msgstr ""
+" \n"
+"# noqa\n"
+"  \n"
+"\n"
+"  TEST WHITESPACE INSENSITIVITY."
+
+#: ../tests/roots/test-intl/noqa.txt:8
+msgid "This should allow to test escaping ``#noqa``."
+msgstr "\\#noqa ``#noqa`` is escaped at the beginning of this string."
+
+#: ../tests/roots/test-intl/noqa.txt:13
+msgid "Next section"
+msgstr ""
+
+# This edge case should not fail.
+#: ../tests/roots/test-intl/noqa.txt:15
+msgid "Some text, again referring to the section: :ref:`next-section`."
+msgstr "#noqa"
diff --git a/tests/test_intl.py b/tests/test_intl.py
@@ -192,6 +192,31 @@ def test_text_inconsistency_warnings(app, warning):
     assert_re_search(expected_citation_warning_expr, warnings)
 
 
+@sphinx_intl
+@pytest.mark.sphinx('text')
+@pytest.mark.test_params(shared_result='test_intl_basic')
+def test_noqa(app, warning):
+    app.build()
+    result = (app.outdir / 'noqa.txt').read_text()
+    expect = r"""First section
+*************
+
+TRANSLATED TEXT WITHOUT REFERENCE.
+
+TEST WHITESPACE INSENSITIVITY.
+
+#noqa "#noqa" is escaped at the beginning of this string.
+
+
+Next section
+************
+
+Some text, again referring to the section: Next section.
+"""
+    assert result == expect
+    assert "next-section" not in getwarning(warning)
+
+
 @sphinx_intl
 @pytest.mark.sphinx('text')
 @pytest.mark.test_params(shared_result='test_intl_basic')
@@ -1186,6 +1211,9 @@ def test_additional_targets_should_be_translated(app):
         """<span class="c1"># SYS IMPORTING</span>""")
     assert_count(expected_expr, result, 1)
 
+    # '#noqa' should remain in literal blocks.
+    assert_count("#noqa", result, 1)
+
     # [raw.txt]
 
     result = (app.outdir / 'raw.html').read_text()