Skip to content

Commit

Permalink
Close #3985: Implement #noqa for i18n
Browse files Browse the repository at this point in the history
When cross-references in the original paragraph and the translated
paragraph do not match, a warning is emitted.  It is useful, because
it allows to catch mistakes, but it can also be an annoyance since
sometimes it is expected that the cross-references will not match.
For example, a reference that is repeated in the original text may
need to be factored out for good style in the target language.
Another example: if the translator needs to translate a universally
understood term in the source language into a term that not everyone
knows is the translation of this original term, adding a reference to
the glossary can be warranted.  This allows the translated message to
start with '#noqa' in order to disable the warning.
  • Loading branch information
jeanas committed Jan 16, 2022
1 parent 9cecd91 commit dd6a074
Show file tree
Hide file tree
Showing 7 changed files with 150 additions and 6 deletions.
17 changes: 17 additions & 0 deletions doc/usage/advanced/intl.rst
Expand Up @@ -68,6 +68,23 @@ be translated you need to follow these instructions:
* Run your desired build.


In order to protect against mistakes, a warning is emitted if
cross-references in the translated paragraph do not match those from the
original. This can be turned off globally using the
:confval:`suppress_warnings` configuration variable. Alternatively, to
turn it off for one message only, start the message with ``#noqa`` like
this::

#noqa Lorem ipsum dolor sit amet ...

(Write ``\#noqa`` in case you want to have "#noqa" literally in the
text. This does not apply to code blocks, where ``#noqa`` is ignored
because code blocks do not contain references anyway.)

.. versionadded:: 4.4
The ``#noqa`` mechanism.


Translating with sphinx-intl
----------------------------

Expand Down
34 changes: 28 additions & 6 deletions sphinx/transforms/i18n.py
Expand Up @@ -9,6 +9,7 @@
"""

from os import path
from re import match
from textwrap import indent
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, TypeVar

Expand Down Expand Up @@ -82,6 +83,14 @@ def publish_msgstr(app: "Sphinx", source: str, source_path: str, source_line: in
config.rst_prolog = rst_prolog # type: ignore


def parse_noqa(source: str) -> Tuple[str, bool]:
m = match(r"\s*#\s*noqa(\s+|$)", source)
if m:
return source[m.end():], True
else:
return source, False


class PreserveTranslatableMessages(SphinxTransform):
"""
Preserve original translatable messages before translation
Expand Down Expand Up @@ -119,6 +128,14 @@ def apply(self, **kwargs: Any) -> None:
# phase1: replace reference ids with translated names
for node, msg in extract_messages(self.document):
msgstr = catalog.gettext(msg)

# There is no point in having #noqa on literal blocks because
# they cannot contain references. Recognizing it would just
# completely prevent escaping the #noqa. Outside of literal
# blocks, one can always write \#noqa.
if not isinstance(node, LITERAL_TYPE_NODES):
msgstr, _ = parse_noqa(msgstr)

# XXX add marker to untranslated parts
if not msgstr or msgstr == msg or not msgstr.strip():
# as-of-yet untranslated
Expand All @@ -139,6 +156,7 @@ def apply(self, **kwargs: Any) -> None:

patch = publish_msgstr(self.app, msgstr, source,
node.line, self.config, settings)
# FIXME: no warnings about inconsistent references in this part
# XXX doctest and other block markup
if not isinstance(patch, nodes.paragraph):
continue # skip for now
Expand Down Expand Up @@ -228,6 +246,11 @@ def apply(self, **kwargs: Any) -> None:
continue # skip if the node is already translated by phase1

msgstr = catalog.gettext(msg)

# See above.
if not isinstance(node, LITERAL_TYPE_NODES):
msgstr, noqa = parse_noqa(msgstr)

# XXX add marker to untranslated parts
if not msgstr or msgstr == msg: # as-of-yet untranslated
continue
Expand Down Expand Up @@ -273,7 +296,6 @@ def apply(self, **kwargs: Any) -> None:

patch = publish_msgstr(self.app, msgstr, source,
node.line, self.config, settings)

# Structural Subelements phase2
if isinstance(node, nodes.title):
# get <title> node that placed as a first child
Expand Down Expand Up @@ -303,7 +325,7 @@ def list_replace_or_append(lst: List[N], old: N, new: N) -> None:
is_autofootnote_ref = NodeMatcher(nodes.footnote_reference, auto=Any)
old_foot_refs: List[nodes.footnote_reference] = list(node.findall(is_autofootnote_ref)) # NOQA
new_foot_refs: List[nodes.footnote_reference] = list(patch.findall(is_autofootnote_ref)) # NOQA
if len(old_foot_refs) != len(new_foot_refs):
if not noqa and len(old_foot_refs) != len(new_foot_refs):
old_foot_ref_rawsources = [ref.rawsource for ref in old_foot_refs]
new_foot_ref_rawsources = [ref.rawsource for ref in new_foot_refs]
logger.warning(__('inconsistent footnote references in translated message.' +
Expand Down Expand Up @@ -346,7 +368,7 @@ def list_replace_or_append(lst: List[N], old: N, new: N) -> None:
is_refnamed_ref = NodeMatcher(nodes.reference, refname=Any)
old_refs: List[nodes.reference] = list(node.findall(is_refnamed_ref))
new_refs: List[nodes.reference] = list(patch.findall(is_refnamed_ref))
if len(old_refs) != len(new_refs):
if not noqa and len(old_refs) != len(new_refs):
old_ref_rawsources = [ref.rawsource for ref in old_refs]
new_ref_rawsources = [ref.rawsource for ref in new_refs]
logger.warning(__('inconsistent references in translated message.' +
Expand Down Expand Up @@ -374,7 +396,7 @@ def list_replace_or_append(lst: List[N], old: N, new: N) -> None:
old_foot_refs = list(node.findall(is_refnamed_footnote_ref))
new_foot_refs = list(patch.findall(is_refnamed_footnote_ref))
refname_ids_map: Dict[str, List[str]] = {}
if len(old_foot_refs) != len(new_foot_refs):
if not noqa and len(old_foot_refs) != len(new_foot_refs):
old_foot_ref_rawsources = [ref.rawsource for ref in old_foot_refs]
new_foot_ref_rawsources = [ref.rawsource for ref in new_foot_refs]
logger.warning(__('inconsistent footnote references in translated message.' +
Expand All @@ -393,7 +415,7 @@ def list_replace_or_append(lst: List[N], old: N, new: N) -> None:
old_cite_refs: List[nodes.citation_reference] = list(node.findall(is_citation_ref))
new_cite_refs: List[nodes.citation_reference] = list(patch.findall(is_citation_ref)) # NOQA
refname_ids_map = {}
if len(old_cite_refs) != len(new_cite_refs):
if not noqa and len(old_cite_refs) != len(new_cite_refs):
old_cite_ref_rawsources = [ref.rawsource for ref in old_cite_refs]
new_cite_ref_rawsources = [ref.rawsource for ref in new_cite_refs]
logger.warning(__('inconsistent citation references in translated message.' +
Expand All @@ -413,7 +435,7 @@ def list_replace_or_append(lst: List[N], old: N, new: N) -> None:
old_xrefs = list(node.findall(addnodes.pending_xref))
new_xrefs = list(patch.findall(addnodes.pending_xref))
xref_reftarget_map = {}
if len(old_xrefs) != len(new_xrefs):
if not noqa and len(old_xrefs) != len(new_xrefs):
old_xref_rawsources = [xref.rawsource for xref in old_xrefs]
new_xref_rawsources = [xref.rawsource for xref in new_xrefs]
logger.warning(__('inconsistent term references in translated message.' +
Expand Down
8 changes: 8 additions & 0 deletions tests/roots/test-intl/literalblock.txt
Expand Up @@ -49,6 +49,14 @@ code blocks
literal-block
in list

.. highlight:: none

::

test_code_for_noqa()
continued()


doctest blocks
==============

Expand Down
16 changes: 16 additions & 0 deletions tests/roots/test-intl/noqa.txt
@@ -0,0 +1,16 @@
First section
=============

Some text with a reference, :ref:`next-section`.

Another reference: :ref:`next-section`.

This should allow to test escaping ``#noqa``.

.. _next-section:

Next section
============

Some text, again referring to the section: :ref:`next-section`.

5 changes: 5 additions & 0 deletions tests/roots/test-intl/xx/LC_MESSAGES/literalblock.po
Expand Up @@ -77,6 +77,11 @@ msgid "literal-block\n"
msgstr "LITERAL-BLOCK\n"
"IN LIST"

msgid "test_code_for_noqa()\n"
"continued()"
msgstr "#noqa should not get stripped\n"
"# from this block."

msgid "doctest blocks"
msgstr "DOCTEST-BLOCKS"

Expand Down
48 changes: 48 additions & 0 deletions tests/roots/test-intl/xx/LC_MESSAGES/noqa.po
@@ -0,0 +1,48 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C)
# This file is distributed under the same license as the Sphinx intl <Tests> package.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
msgid ""
msgstr ""
"Project-Id-Version: \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2022-01-16 15:23+0100\n"
"PO-Revision-Date: 2022-01-16 15:23+0100\n"
"Last-Translator: Jean Abou Samra <jean@abou-samra.fr>\n"
"Language-Team: \n"
"Language: xx\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Generator: Poedit 3.0\n"

#: ../tests/roots/test-intl/noqa.txt:2
msgid "First section"
msgstr ""

#: ../tests/roots/test-intl/noqa.txt:4
msgid "Some text with a reference, :ref:`next-section`."
msgstr "#noqa TRANSLATED TEXT WITHOUT REFERENCE."

#: ../tests/roots/test-intl/noqa.txt:6
msgid "Another reference: :ref:`next-section`."
msgstr ""
" \n"
"# noqa\n"
" \n"
"\n"
" TEST WHITESPACE INSENSITIVITY."

#: ../tests/roots/test-intl/noqa.txt:8
msgid "This should allow to test escaping ``#noqa``."
msgstr "\\#noqa ``#noqa`` is escaped at the beginning of this string."

#: ../tests/roots/test-intl/noqa.txt:13
msgid "Next section"
msgstr ""

# This edge case should not fail.
#: ../tests/roots/test-intl/noqa.txt:15
msgid "Some text, again referring to the section: :ref:`next-section`."
msgstr "#noqa"
28 changes: 28 additions & 0 deletions tests/test_intl.py
Expand Up @@ -192,6 +192,31 @@ def test_text_inconsistency_warnings(app, warning):
assert_re_search(expected_citation_warning_expr, warnings)


@sphinx_intl
@pytest.mark.sphinx('text')
@pytest.mark.test_params(shared_result='test_intl_basic')
def test_noqa(app, warning):
app.build()
result = (app.outdir / 'noqa.txt').read_text()
expect = r"""First section
*************
TRANSLATED TEXT WITHOUT REFERENCE.
TEST WHITESPACE INSENSITIVITY.
#noqa "#noqa" is escaped at the beginning of this string.
Next section
************
Some text, again referring to the section: Next section.
"""
assert result == expect
assert "next-section" not in getwarning(warning)


@sphinx_intl
@pytest.mark.sphinx('text')
@pytest.mark.test_params(shared_result='test_intl_basic')
Expand Down Expand Up @@ -1186,6 +1211,9 @@ def test_additional_targets_should_be_translated(app):
"""<span class="c1"># SYS IMPORTING</span>""")
assert_count(expected_expr, result, 1)

# '#noqa' should remain in literal blocks.
assert_count("#noqa", result, 1)

# [raw.txt]

result = (app.outdir / 'raw.html').read_text()
Expand Down

0 comments on commit dd6a074

Please sign in to comment.