From 30a46713500c97828f132af3f99c23633156ab29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 30 Mar 2024 11:05:30 +0100 Subject: [PATCH 01/66] enhance ANSI functions --- sphinx/util/_io.py | 4 +- sphinx/util/console.py | 104 +++++++++++++++++++++------ sphinx/util/exceptions.py | 4 +- tests/test_util/test_util_console.py | 102 ++++++++++++++++++++++++++ 4 files changed, 189 insertions(+), 25 deletions(-) create mode 100644 tests/test_util/test_util_console.py diff --git a/sphinx/util/_io.py b/sphinx/util/_io.py index e140cf12081..3689d9e4511 100644 --- a/sphinx/util/_io.py +++ b/sphinx/util/_io.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING -from sphinx.util.console import _strip_escape_sequences +from sphinx.util.console import strip_escape_sequences if TYPE_CHECKING: from typing import Protocol @@ -25,7 +25,7 @@ def __init__( def write(self, text: str, /) -> None: self.stream_term.write(text) - self.stream_file.write(_strip_escape_sequences(text)) + self.stream_file.write(strip_escape_sequences(text)) def flush(self) -> None: if hasattr(self.stream_term, 'flush'): diff --git a/sphinx/util/console.py b/sphinx/util/console.py index 8a5fe3d51fc..dfd336383db 100644 --- a/sphinx/util/console.py +++ b/sphinx/util/console.py @@ -11,24 +11,71 @@ if TYPE_CHECKING: from typing import Final + # fmt: off + def reset(text: str) -> str: ... # NoQA: E704 + def bold(text: str) -> str: ... # NoQA: E704 + def faint(text: str) -> str: ... # NoQA: E704 + def standout(text: str) -> str: ... # NoQA: E704 + def underline(text: str) -> str: ... # NoQA: E704 + def blink(text: str) -> str: ... # NoQA: E704 + + def black(text: str) -> str: ... # NoQA: E704 + def white(text: str) -> str: ... # NoQA: E704 + def red(text: str) -> str: ... # NoQA: E704 + def yellow(text: str) -> str: ... # NoQA: E704 + def blue(text: str) -> str: ... # NoQA: E704 + def purple(text: str) -> str: ... # NoQA: E704 + def turquoise(text: str) -> str: ... # NoQA: E704 + + def darkgray(text: str) -> str: ... # NoQA: E704 + def lightgray(text: str) -> str: ... # NoQA: E704 + def darkred(text: str) -> str: ... # NoQA: E704 + def brown(text: str) -> str: ... # NoQA: E704 + def darkblue(text: str) -> str: ... # NoQA: E704 + def fuchsia(text: str) -> str: ... # NoQA: E704 + def teal(text: str) -> str: ... # NoQA: E704 + # fmt: on + try: # check if colorama is installed to support color on Windows import colorama except ImportError: colorama = None +_CSI: Final[str] = re.escape('\x1b[') # 'ESC [': Control Sequence Introducer +_OSC: Final[str] = re.escape('\x1b]') # 'ESC ]': Operating System Command +_BELL: Final[str] = re.escape('\x07') # bell command -_CSI = re.escape('\x1b[') # 'ESC [': Control Sequence Introducer -_ansi_re: re.Pattern[str] = re.compile( - _CSI + r""" - ( - (\d\d;){0,2}\d\dm # ANSI colour code - | - \dK # ANSI Erase in Line - )""", - re.VERBOSE | re.ASCII) +# ANSI escape sequences for colors _ansi_color_re: Final[re.Pattern[str]] = re.compile('\x1b.*?m') +# ANSI escape sequences supported by vt100 terminal (non-colors) +_ansi_other_re: Final[re.Pattern[str]] = re.compile( + _CSI + + r"""(?: + H # HOME + |\?\d+[hl] # enable/disable features (e.g., cursor, mouse, etc) + |[1-6] q # cursor shape (e.g., blink) (note the space before 'q') + |2?J # erase down (J) or clear screen (2J) + |\d*[ABCD] # cursor up/down/forward/backward + |\d+G # move to column + |(?:\d;)?\d+;\d+H # move to (x, y) + |\dK # erase in line + ) | """ + + _OSC + + r"""(?: + \d;.*?\x07 # set window title + ) | """ + + _BELL, + re.VERBOSE | re.ASCII, +) + +# ANSI escape sequences +_ansi_re: Final[re.Pattern[str]] = re.compile( + ' | '.join((_ansi_color_re.pattern, _ansi_other_re.pattern)), + re.VERBOSE | re.ASCII, +) + codes: dict[str, str] = {} @@ -99,38 +146,53 @@ def escseq(name: str) -> str: def strip_colors(s: str) -> str: + """Strip all color escape sequences from *s*.""" + # TODO: deprecate parameter *s* in favor of a positional-only parameter *text* return _ansi_color_re.sub('', s) -def _strip_escape_sequences(s: str) -> str: - return _ansi_re.sub('', s) +def strip_control_sequences(text: str, /) -> str: + """Strip non-color escape sequences from *text*.""" + return _ansi_other_re.sub('', text) + + +def strip_escape_sequences(text: str, /) -> str: + """Strip all control sequences from *text*.""" + # Remove control sequences first so that text of the form + # + # '\x1b[94m' + '\x1bA' + TEXT + '\x1b[0m' + # + # is cleaned to TEXT and not '' (otherwise '[94m\x1bAabc\x1b[0' + # is considered by :data:`_ansi_color_re` and removed altogther). + return strip_colors(strip_control_sequences(text)) def create_color_func(name: str) -> None: def inner(text: str) -> str: return colorize(name, text) + globals()[name] = inner _attrs = { - 'reset': '39;49;00m', - 'bold': '01m', - 'faint': '02m', - 'standout': '03m', + 'reset': '39;49;00m', + 'bold': '01m', + 'faint': '02m', + 'standout': '03m', 'underline': '04m', - 'blink': '05m', + 'blink': '05m', } for _name, _value in _attrs.items(): codes[_name] = '\x1b[' + _value _colors = [ - ('black', 'darkgray'), - ('darkred', 'red'), + ('black', 'darkgray'), + ('darkred', 'red'), ('darkgreen', 'green'), - ('brown', 'yellow'), - ('darkblue', 'blue'), - ('purple', 'fuchsia'), + ('brown', 'yellow'), + ('darkblue', 'blue'), + ('purple', 'fuchsia'), ('turquoise', 'teal'), ('lightgray', 'white'), ] diff --git a/sphinx/util/exceptions.py b/sphinx/util/exceptions.py index 08281389bf3..577ec734e59 100644 --- a/sphinx/util/exceptions.py +++ b/sphinx/util/exceptions.py @@ -6,7 +6,7 @@ from typing import TYPE_CHECKING from sphinx.errors import SphinxParallelError -from sphinx.util.console import _strip_escape_sequences +from sphinx.util.console import strip_escape_sequences if TYPE_CHECKING: from sphinx.application import Sphinx @@ -31,7 +31,7 @@ def save_traceback(app: Sphinx | None, exc: BaseException) -> str: last_msgs = exts_list = '' else: extensions = app.extensions.values() - last_msgs = '\n'.join(f'# {_strip_escape_sequences(s).strip()}' + last_msgs = '\n'.join(f'# {strip_escape_sequences(s).strip()}' for s in app.messagelog) exts_list = '\n'.join(f'# {ext.name} ({ext.version})' for ext in extensions if ext.version != 'builtin') diff --git a/tests/test_util/test_util_console.py b/tests/test_util/test_util_console.py new file mode 100644 index 00000000000..e9f878d9b26 --- /dev/null +++ b/tests/test_util/test_util_console.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +import itertools +import string +from typing import TYPE_CHECKING + +import pytest + +import sphinx.util.console as term +from sphinx.util.console import strip_colors, strip_control_sequences, strip_escape_sequences + +if TYPE_CHECKING: + from collections.abc import Sequence + from typing import Any, Final + +ESC: Final[str] = '\x1b' +CSI: Final[str] = '\x1b[' +OSC: Final[str] = '\x1b]' +BELL: Final[str] = '\x07' + + +def osc_title(title: str) -> str: + return f'{OSC}2;{title}{BELL}' + + +def insert_ansi(text: str, codes: list[str]) -> str: + for code in codes: + text = f'{CSI}{code}{text}' + return text + + +def apply_style(text: str, style: list[str]) -> str: + for code in style: + if code in term.codes: + text = term.colorize(code, text) + else: + text = insert_ansi(text, [code]) + return text + + +def poweroder(seq: Sequence[Any], *, permutations: bool = True) -> list[tuple[Any, ...]]: + generator = itertools.permutations if permutations else itertools.combinations + return list(itertools.chain.from_iterable((generator(seq, i) for i in range(len(seq))))) + + +@pytest.mark.parametrize('invariant', [ESC, CSI, OSC]) +def test_strip_invariants(invariant: str) -> None: + assert strip_colors(invariant) == invariant + assert strip_control_sequences(invariant) == invariant + assert strip_escape_sequences(invariant) == invariant + + +# some color/style codes to use (but not composed) + + +_STYLES = ['m', '0m', '2m', '02m', '002m', '40m', '040m', '0;1m', '40;50m', '50;30;40m'] +# some non-color ESC codes to use (will be composed) +_CNTRLS = ['A', '0G', '1;20;128H'] + + +@pytest.mark.parametrize('prefix', ['', '#', 'def ']) # non-formatted part +@pytest.mark.parametrize('source', [string.printable, BELL]) +@pytest.mark.parametrize('style', [_STYLES, *poweroder(['bold', 'blink', 'blue', 'red'])]) +def test_strip_style(prefix: str, source: str, style: list[str]) -> None: + expect = prefix + source + pretty = prefix + apply_style(source, style) + assert strip_colors(pretty) == expect, (pretty, expect) + + +@pytest.mark.parametrize('prefix', ['', '#', 'def ']) # non-formatted part +@pytest.mark.parametrize('source', ['', 'abc', string.printable]) +@pytest.mark.parametrize('style', [_STYLES, *poweroder(['blue', 'bold'])]) +@pytest.mark.parametrize('cntrl', poweroder(_CNTRLS), ids='-'.join) +def test_strip_cntrl(prefix: str, source: str, style: list[str], cntrl: list[str]) -> None: + expect = pretty = prefix + apply_style(source, style) + # does nothing since there are only color sequences + assert strip_control_sequences(pretty) == expect, (pretty, expect) + + expect = prefix + source + pretty = prefix + insert_ansi(source, cntrl) + # all non-color codes are removed correctly + assert strip_control_sequences(pretty) == expect, (pretty, expect) + + +@pytest.mark.parametrize('prefix', ['', '#', 'def ']) # non-formatted part +@pytest.mark.parametrize('source', ['', 'abc', string.printable]) +@pytest.mark.parametrize('style', [_STYLES, *poweroder(['blue', 'bold'])]) +@pytest.mark.parametrize('cntrl', poweroder(_CNTRLS), ids='-'.join) +def test_strip_ansi(prefix: str, source: str, style: list[str], cntrl: list[str]) -> None: + expect = prefix + source + + with_style = prefix + apply_style(source, style) + assert strip_escape_sequences(with_style) == expect, (with_style, expect) + + with_cntrl = prefix + insert_ansi(source, cntrl) + assert strip_escape_sequences(with_cntrl) == expect, (with_cntrl, expect) + + composed = insert_ansi(with_style, cntrl) # add some cntrl sequences + assert strip_escape_sequences(composed) == expect, (composed, expect) + + composed = apply_style(with_cntrl, style) # add some color sequences + assert strip_escape_sequences(composed) == expect, (composed, expect) From 45ea39b9ab5d77e77f0d506301786cd38432ae00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 30 Mar 2024 11:19:09 +0100 Subject: [PATCH 02/66] remove ``# type: ignore[attr-defined]`` for colors --- sphinx/application.py | 2 +- sphinx/builders/__init__.py | 2 +- sphinx/builders/changes.py | 2 +- sphinx/builders/gettext.py | 2 +- sphinx/builders/latex/__init__.py | 2 +- sphinx/builders/linkcheck.py | 8 +------- sphinx/builders/manpage.py | 2 +- sphinx/builders/singlehtml.py | 2 +- sphinx/builders/texinfo.py | 2 +- sphinx/cmd/build.py | 7 +------ sphinx/cmd/make_mode.py | 7 +------ sphinx/cmd/quickstart.py | 8 +------- sphinx/ext/coverage.py | 2 +- sphinx/ext/doctest.py | 2 +- sphinx/util/console.py | 10 ++++++---- sphinx/util/display.py | 2 +- 16 files changed, 21 insertions(+), 41 deletions(-) diff --git a/sphinx/application.py b/sphinx/application.py index 1de0693baa2..7d16d9ab2b2 100644 --- a/sphinx/application.py +++ b/sphinx/application.py @@ -33,7 +33,7 @@ from sphinx.util import docutils, logging from sphinx.util._pathlib import _StrPath from sphinx.util.build_phase import BuildPhase -from sphinx.util.console import bold # type: ignore[attr-defined] +from sphinx.util.console import bold from sphinx.util.display import progress_message from sphinx.util.i18n import CatalogRepository from sphinx.util.logging import prefixed_warnings diff --git a/sphinx/builders/__init__.py b/sphinx/builders/__init__.py index 878f5d6acfb..9bcc8c7b1eb 100644 --- a/sphinx/builders/__init__.py +++ b/sphinx/builders/__init__.py @@ -17,7 +17,7 @@ from sphinx.locale import __ from sphinx.util import UnicodeDecodeErrorHandler, get_filetype, import_object, logging, rst from sphinx.util.build_phase import BuildPhase -from sphinx.util.console import bold # type: ignore[attr-defined] +from sphinx.util.console import bold from sphinx.util.display import progress_message, status_iterator from sphinx.util.docutils import sphinx_domains from sphinx.util.i18n import CatalogInfo, CatalogRepository, docname_to_domain diff --git a/sphinx/builders/changes.py b/sphinx/builders/changes.py index 7d5e0044e23..b233e85cd73 100644 --- a/sphinx/builders/changes.py +++ b/sphinx/builders/changes.py @@ -12,7 +12,7 @@ from sphinx.locale import _, __ from sphinx.theming import HTMLThemeFactory from sphinx.util import logging -from sphinx.util.console import bold # type: ignore[attr-defined] +from sphinx.util.console import bold from sphinx.util.fileutil import copy_asset_file from sphinx.util.osutil import ensuredir, os_path diff --git a/sphinx/builders/gettext.py b/sphinx/builders/gettext.py index 3928f9f9308..35abf1fd7e5 100644 --- a/sphinx/builders/gettext.py +++ b/sphinx/builders/gettext.py @@ -16,7 +16,7 @@ from sphinx.errors import ThemeError from sphinx.locale import __ from sphinx.util import logging -from sphinx.util.console import bold # type: ignore[attr-defined] +from sphinx.util.console import bold from sphinx.util.display import status_iterator from sphinx.util.i18n import CatalogInfo, docname_to_domain from sphinx.util.index_entries import split_index_msg diff --git a/sphinx/builders/latex/__init__.py b/sphinx/builders/latex/__init__.py index fd140dd062e..2b176f92504 100644 --- a/sphinx/builders/latex/__init__.py +++ b/sphinx/builders/latex/__init__.py @@ -20,7 +20,7 @@ from sphinx.errors import NoUri, SphinxError from sphinx.locale import _, __ from sphinx.util import logging, texescape -from sphinx.util.console import bold, darkgreen # type: ignore[attr-defined] +from sphinx.util.console import bold, darkgreen from sphinx.util.display import progress_message, status_iterator from sphinx.util.docutils import SphinxFileOutput, new_document from sphinx.util.fileutil import copy_asset_file diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 83f45e4719c..89a3543e822 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -24,13 +24,7 @@ from sphinx.locale import __ from sphinx.transforms.post_transforms import SphinxPostTransform from sphinx.util import encode_uri, logging, requests -from sphinx.util.console import ( # type: ignore[attr-defined] - darkgray, - darkgreen, - purple, - red, - turquoise, -) +from sphinx.util.console import darkgray, darkgreen, purple, red, turquoise from sphinx.util.http_date import rfc1123_to_epoch from sphinx.util.nodes import get_node_line diff --git a/sphinx/builders/manpage.py b/sphinx/builders/manpage.py index bf01d017c24..93b381d3864 100644 --- a/sphinx/builders/manpage.py +++ b/sphinx/builders/manpage.py @@ -13,7 +13,7 @@ from sphinx.builders import Builder from sphinx.locale import __ from sphinx.util import logging -from sphinx.util.console import darkgreen # type: ignore[attr-defined] +from sphinx.util.console import darkgreen from sphinx.util.display import progress_message from sphinx.util.nodes import inline_all_toctrees from sphinx.util.osutil import ensuredir, make_filename_from_project diff --git a/sphinx/builders/singlehtml.py b/sphinx/builders/singlehtml.py index f9ce8cea28d..87590544f6d 100644 --- a/sphinx/builders/singlehtml.py +++ b/sphinx/builders/singlehtml.py @@ -11,7 +11,7 @@ from sphinx.environment.adapters.toctree import global_toctree_for_doc from sphinx.locale import __ from sphinx.util import logging -from sphinx.util.console import darkgreen # type: ignore[attr-defined] +from sphinx.util.console import darkgreen from sphinx.util.display import progress_message from sphinx.util.nodes import inline_all_toctrees diff --git a/sphinx/builders/texinfo.py b/sphinx/builders/texinfo.py index 226ce690fad..8d5a1aa6df0 100644 --- a/sphinx/builders/texinfo.py +++ b/sphinx/builders/texinfo.py @@ -17,7 +17,7 @@ from sphinx.errors import NoUri from sphinx.locale import _, __ from sphinx.util import logging -from sphinx.util.console import darkgreen # type: ignore[attr-defined] +from sphinx.util.console import darkgreen from sphinx.util.display import progress_message, status_iterator from sphinx.util.docutils import new_document from sphinx.util.fileutil import copy_asset_file diff --git a/sphinx/cmd/build.py b/sphinx/cmd/build.py index bf3fa3400a4..be23e0b90a4 100644 --- a/sphinx/cmd/build.py +++ b/sphinx/cmd/build.py @@ -22,12 +22,7 @@ from sphinx.errors import SphinxError, SphinxParallelError from sphinx.locale import __ from sphinx.util._io import TeeStripANSI -from sphinx.util.console import ( # type: ignore[attr-defined] - color_terminal, - nocolor, - red, - terminal_safe, -) +from sphinx.util.console import color_terminal, nocolor, red, terminal_safe from sphinx.util.docutils import docutils_namespace, patch_docutils from sphinx.util.exceptions import format_exception_cut_frames, save_traceback from sphinx.util.osutil import ensuredir diff --git a/sphinx/cmd/make_mode.py b/sphinx/cmd/make_mode.py index ee237ae15c0..01929469cca 100644 --- a/sphinx/cmd/make_mode.py +++ b/sphinx/cmd/make_mode.py @@ -17,12 +17,7 @@ import sphinx from sphinx.cmd.build import build_main -from sphinx.util.console import ( # type: ignore[attr-defined] - blue, - bold, - color_terminal, - nocolor, -) +from sphinx.util.console import blue, bold, color_terminal, nocolor from sphinx.util.osutil import rmtree if sys.version_info >= (3, 11): diff --git a/sphinx/cmd/quickstart.py b/sphinx/cmd/quickstart.py index fe1a29a14d3..8fb7eebae48 100644 --- a/sphinx/cmd/quickstart.py +++ b/sphinx/cmd/quickstart.py @@ -31,13 +31,7 @@ import sphinx.locale from sphinx import __display_version__, package_dir from sphinx.locale import __ -from sphinx.util.console import ( # type: ignore[attr-defined] - bold, - color_terminal, - colorize, - nocolor, - red, -) +from sphinx.util.console import bold, color_terminal, colorize, nocolor, red from sphinx.util.osutil import ensuredir from sphinx.util.template import SphinxRenderer diff --git a/sphinx/ext/coverage.py b/sphinx/ext/coverage.py index 92afd868ca4..cfe093623c1 100644 --- a/sphinx/ext/coverage.py +++ b/sphinx/ext/coverage.py @@ -19,7 +19,7 @@ from sphinx.builders import Builder from sphinx.locale import __ from sphinx.util import logging -from sphinx.util.console import red # type: ignore[attr-defined] +from sphinx.util.console import red from sphinx.util.inspect import safe_getattr if TYPE_CHECKING: diff --git a/sphinx/ext/doctest.py b/sphinx/ext/doctest.py index fe133900c05..e6ba27439b2 100644 --- a/sphinx/ext/doctest.py +++ b/sphinx/ext/doctest.py @@ -22,7 +22,7 @@ from sphinx.builders import Builder from sphinx.locale import __ from sphinx.util import logging -from sphinx.util.console import bold # type: ignore[attr-defined] +from sphinx.util.console import bold from sphinx.util.docutils import SphinxDirective from sphinx.util.osutil import relpath diff --git a/sphinx/util/console.py b/sphinx/util/console.py index dfd336383db..23c42d7cc79 100644 --- a/sphinx/util/console.py +++ b/sphinx/util/console.py @@ -22,18 +22,20 @@ def blink(text: str) -> str: ... # NoQA: E704 def black(text: str) -> str: ... # NoQA: E704 def white(text: str) -> str: ... # NoQA: E704 def red(text: str) -> str: ... # NoQA: E704 + def green(text: str) -> str: ... # NoQA: E704 def yellow(text: str) -> str: ... # NoQA: E704 def blue(text: str) -> str: ... # NoQA: E704 - def purple(text: str) -> str: ... # NoQA: E704 - def turquoise(text: str) -> str: ... # NoQA: E704 + def fuchsia(text: str) -> str: ... # NoQA: E704 + def teal(text: str) -> str: ... # NoQA: E704 def darkgray(text: str) -> str: ... # NoQA: E704 def lightgray(text: str) -> str: ... # NoQA: E704 def darkred(text: str) -> str: ... # NoQA: E704 + def darkgreen(text: str) -> str: ... # NoQA: E704 def brown(text: str) -> str: ... # NoQA: E704 def darkblue(text: str) -> str: ... # NoQA: E704 - def fuchsia(text: str) -> str: ... # NoQA: E704 - def teal(text: str) -> str: ... # NoQA: E704 + def purple(text: str) -> str: ... # NoQA: E704 + def turquoise(text: str) -> str: ... # NoQA: E704 # fmt: on try: diff --git a/sphinx/util/display.py b/sphinx/util/display.py index 967c8057313..3cb8d9729b4 100644 --- a/sphinx/util/display.py +++ b/sphinx/util/display.py @@ -5,7 +5,7 @@ from sphinx.locale import __ from sphinx.util import logging -from sphinx.util.console import bold, color_terminal # type: ignore[attr-defined] +from sphinx.util.console import bold, color_terminal if False: from collections.abc import Iterable, Iterator From 86c3efeaa6916c9563da1f33ba1005067cb97897 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 30 Mar 2024 11:06:17 +0100 Subject: [PATCH 03/66] add tests for ANSI strippers --- tests/test_util/test_util_console.py | 125 +++++++++++++++++++++------ 1 file changed, 99 insertions(+), 26 deletions(-) diff --git a/tests/test_util/test_util_console.py b/tests/test_util/test_util_console.py index e9f878d9b26..ec049d6e821 100644 --- a/tests/test_util/test_util_console.py +++ b/tests/test_util/test_util_console.py @@ -1,7 +1,6 @@ from __future__ import annotations import itertools -import string from typing import TYPE_CHECKING import pytest @@ -11,7 +10,14 @@ if TYPE_CHECKING: from collections.abc import Sequence - from typing import Any, Final + from typing import Any, Final, TypeVar + + _T = TypeVar('_T') + + Style = str + """An ANSI style (color or format) known by :mod:`sphinx.util.console`.""" + AnsiCode = str + """An ANSI escape sequence.""" ESC: Final[str] = '\x1b' CSI: Final[str] = '\x1b[' @@ -20,17 +26,35 @@ def osc_title(title: str) -> str: + """OSC string for changing the terminal title.""" return f'{OSC}2;{title}{BELL}' -def insert_ansi(text: str, codes: list[str]) -> str: +def insert_ansi(text: str, codes: Sequence[AnsiCode], *, reset: bool = False) -> str: + """Add ANSI escape sequences codes to *text*. + + If *reset* is True, the reset code is added at the end. + + :param text: The text to decorate. + :param codes: A list of ANSI esc. seq. to use deprived of their CSI prefix. + :param reset: Indicate whether to add the reset esc. seq. + :return: The decorated text. + """ for code in codes: - text = f'{CSI}{code}{text}' + text = f'{code}{text}' + if reset: + text = term.reset(text) return text -def apply_style(text: str, style: list[str]) -> str: - for code in style: +def apply_style(text: str, codes: Sequence[AnsiCode | Style]) -> str: + """Apply one or more ANSI esc. seq. to *text*. + + Each item in *codes* can either be a color name (e.g., 'blue'), + a text decoration (e.g., 'blink') or an ANSI esc. seq. deprived + of its CSI prefix (e.g., '34m'). + """ + for code in codes: if code in term.codes: text = term.colorize(code, text) else: @@ -38,9 +62,30 @@ def apply_style(text: str, style: list[str]) -> str: return text -def poweroder(seq: Sequence[Any], *, permutations: bool = True) -> list[tuple[Any, ...]]: - generator = itertools.permutations if permutations else itertools.combinations - return list(itertools.chain.from_iterable((generator(seq, i) for i in range(len(seq))))) +def powerset( + elems: Sequence[_T], *, n: int | None = None, total: bool = True +) -> list[tuple[_T, ...]]: + r"""Generate the powerset over *seq*. + + :param elems: The elements to get the powerset over. + :param n: Optional maximum size of a subset. + :param total: If false, quotient the result by :math:`\mathfrak{S}_n`. + + Example: + ------- + + .. code-block:: python + + powerset([1, 2], total=True) + [(), (1,), (2,), (1, 2), (2, 1)] + + powerset([1, 2], total=False) + [(), (1,), (2,), (1, 2)] + """ + if n is None: + n = len(elems) + gen = itertools.permutations if total else itertools.combinations + return list(itertools.chain.from_iterable(gen(elems, i) for i in range(n + 1))) @pytest.mark.parametrize('invariant', [ESC, CSI, OSC]) @@ -51,27 +96,50 @@ def test_strip_invariants(invariant: str) -> None: # some color/style codes to use (but not composed) +_STYLES: list[tuple[AnsiCode, ...]] = [ + *[(f'{CSI}{";".join(map(str, s))}m',) for s in [range(s) for s in range(4)]], + *powerset(['blue', 'bold']) +] +# some non-color ESC codes to use (will be composed) +_CNTRLS: list[tuple[AnsiCode, ...]] = powerset([f'{CSI}A', f'{CSI}0G', f'{CSI}1;20;128H']) -_STYLES = ['m', '0m', '2m', '02m', '002m', '40m', '040m', '0;1m', '40;50m', '50;30;40m'] -# some non-color ESC codes to use (will be composed) -_CNTRLS = ['A', '0G', '1;20;128H'] +# For some reason that I (picnixz) do not understand, it is not possible to +# create a mark decorator using pytest.mark.parametrize.with_args(ids=...). +# +# As such, in order not to lose autocompletion from PyCharm, we will pass +# the custom id function to each call to `pytest.mark.parametrize`. +def _clean_id(value: Any) -> str: + if isinstance(value, str) and not value: + return '' + + if isinstance(value, (list, tuple)): + if not value: + return '()' + return '-'.join(map(_clean_id, value)) + + return repr(value) -@pytest.mark.parametrize('prefix', ['', '#', 'def ']) # non-formatted part -@pytest.mark.parametrize('source', [string.printable, BELL]) -@pytest.mark.parametrize('style', [_STYLES, *poweroder(['bold', 'blink', 'blue', 'red'])]) -def test_strip_style(prefix: str, source: str, style: list[str]) -> None: +@pytest.mark.parametrize('prefix', ['', 'raw'], ids=_clean_id) # non-formatted part +@pytest.mark.parametrize('source', ['', 'abc\ndef', BELL], ids=_clean_id) +@pytest.mark.parametrize('style', _STYLES, ids=_clean_id) +def test_strip_style(prefix: str, source: str, style: tuple[AnsiCode, ...]) -> None: expect = prefix + source pretty = prefix + apply_style(source, style) assert strip_colors(pretty) == expect, (pretty, expect) -@pytest.mark.parametrize('prefix', ['', '#', 'def ']) # non-formatted part -@pytest.mark.parametrize('source', ['', 'abc', string.printable]) -@pytest.mark.parametrize('style', [_STYLES, *poweroder(['blue', 'bold'])]) -@pytest.mark.parametrize('cntrl', poweroder(_CNTRLS), ids='-'.join) -def test_strip_cntrl(prefix: str, source: str, style: list[str], cntrl: list[str]) -> None: +@pytest.mark.parametrize('prefix', ['', 'raw'], ids=_clean_id) # non-formatted part +@pytest.mark.parametrize('source', ['', 'abc\ndef'], ids=_clean_id) +@pytest.mark.parametrize('style', _STYLES, ids=_clean_id) +@pytest.mark.parametrize('cntrl', _CNTRLS, ids=_clean_id) +def test_strip_cntrl( + prefix: str, + source: str, + style: tuple[AnsiCode, ...], + cntrl: tuple[AnsiCode, ...] +) -> None: expect = pretty = prefix + apply_style(source, style) # does nothing since there are only color sequences assert strip_control_sequences(pretty) == expect, (pretty, expect) @@ -82,11 +150,16 @@ def test_strip_cntrl(prefix: str, source: str, style: list[str], cntrl: list[str assert strip_control_sequences(pretty) == expect, (pretty, expect) -@pytest.mark.parametrize('prefix', ['', '#', 'def ']) # non-formatted part -@pytest.mark.parametrize('source', ['', 'abc', string.printable]) -@pytest.mark.parametrize('style', [_STYLES, *poweroder(['blue', 'bold'])]) -@pytest.mark.parametrize('cntrl', poweroder(_CNTRLS), ids='-'.join) -def test_strip_ansi(prefix: str, source: str, style: list[str], cntrl: list[str]) -> None: +@pytest.mark.parametrize('prefix', ['', 'raw'], ids=_clean_id) # non-formatted part +@pytest.mark.parametrize('source', ['', 'abc\ndef'], ids=_clean_id) +@pytest.mark.parametrize('style', _STYLES, ids=_clean_id) +@pytest.mark.parametrize('cntrl', _CNTRLS, ids=_clean_id) +def test_strip_ansi( + prefix: str, + source: str, + style: tuple[AnsiCode, ...], + cntrl: tuple[AnsiCode, ...] +) -> None: expect = prefix + source with_style = prefix + apply_style(source, style) From 55099de271cac87f2f77526b0ff04981ff4eb630 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 29 Mar 2024 17:36:10 +0100 Subject: [PATCH 04/66] add matcher objects for tests --- sphinx/testing/_matcher/__init__.py | 5 + sphinx/testing/_matcher/buffer.py | 409 ++++++++++++++++++++ sphinx/testing/_matcher/options.py | 172 +++++++++ sphinx/testing/_matcher/util.py | 200 ++++++++++ sphinx/testing/matcher.py | 359 ++++++++++++++++++ sphinx/testing/util.py | 11 + tests/test_testing/__init__.py | 0 tests/test_testing/test_matcher.py | 555 ++++++++++++++++++++++++++++ 8 files changed, 1711 insertions(+) create mode 100644 sphinx/testing/_matcher/__init__.py create mode 100644 sphinx/testing/_matcher/buffer.py create mode 100644 sphinx/testing/_matcher/options.py create mode 100644 sphinx/testing/_matcher/util.py create mode 100644 sphinx/testing/matcher.py create mode 100644 tests/test_testing/__init__.py create mode 100644 tests/test_testing/test_matcher.py diff --git a/sphinx/testing/_matcher/__init__.py b/sphinx/testing/_matcher/__init__.py new file mode 100644 index 00000000000..9aa65596b80 --- /dev/null +++ b/sphinx/testing/_matcher/__init__.py @@ -0,0 +1,5 @@ +"""Private package for :class:`~sphinx.testing.matcher.LineMatcher`. + +Any object provided in this package or any of its submodules can +be removed, changed, moved without prior notice. +""" diff --git a/sphinx/testing/_matcher/buffer.py b/sphinx/testing/_matcher/buffer.py new file mode 100644 index 00000000000..244d6aab445 --- /dev/null +++ b/sphinx/testing/_matcher/buffer.py @@ -0,0 +1,409 @@ +from __future__ import annotations + +__all__ = ('Line', 'Block') + +import abc +import sys +from collections.abc import Sequence +from itertools import starmap +from typing import TYPE_CHECKING, Generic, TypeVar, final, overload + +from sphinx.testing._matcher.util import windowed + +if TYPE_CHECKING: + from collections.abc import Iterable, Iterator + from typing import SupportsIndex + + from typing_extensions import Self + +_T = TypeVar('_T', bound=Sequence[str]) + + +class _TextView(Generic[_T], abc.ABC): + # add __weakref__ to allow the object being weak-referencable + __slots__ = ('_buffer', '_offset', '__weakref__') + + def __init__(self, buffer: _T, offset: int = 0, /) -> None: + if not isinstance(offset, int): + msg = f'offset must be an integer, got {offset!r}' + raise TypeError(msg) + + if offset < 0: + msg = f'offset must be >= 0, got {offset!r}' + raise ValueError(msg) + + self._buffer = buffer + self._offset = offset + + @property + def buffer(self) -> _T: + """The internal (immutable) buffer.""" + return self._buffer + + @property + def offset(self) -> int: + """The index of this object in the original source.""" + return self._offset + + def __copy__(self) -> Self: + return self.__class__(self.buffer, self.offset) + + def __bool__(self) -> bool: + return bool(len(self)) + + def __iter__(self) -> Iterator[str]: + return iter(self.buffer) + + def __reversed__(self) -> Iterator[str]: + return reversed(self.buffer) + + def __len__(self) -> int: + return len(self.buffer) + + def __contains__(self, value: object, /) -> bool: + return value in self.buffer + + @abc.abstractmethod + def __lt__(self, other: object, /) -> bool: + pass + + def __le__(self, other: object, /) -> bool: + return self == other or self < other + + def __ge__(self, other: object, /) -> bool: + return self == other or self > other + + @abc.abstractmethod + def __gt__(self, other: object, /) -> bool: + pass + + +@final +class Line(_TextView[str]): + """A line found by :meth:`~sphinx.testing.matcher.LineMatcher.match`. + + A :class:`Line` can be compared to :class:`str`, :class:`Line` objects or + a pair (i.e., a two-length sequence) ``(line, offset)`` where *line* is a + native :class:`str` (not a subclass thereof) and *offset* is an integer. + + By convention, the comparison result (except for ``!=``) of :class:`Line` + objects with distinct :attr:`offset` is always ``False``. Use :class:`str` + objects instead if the offset is not relevant. + """ + + def __init__(self, line: str, offset: int = 0, /) -> None: + """Construct a :class:`Line` object. + + The *line* must be a native :class:`str` object. + """ + if type(line) is not str: + # force the line to be a true string and not another string-like + msg = f'expecting a native string, got {line!r}' + raise TypeError(msg) + + super().__init__(line, offset) + + @classmethod + def view(cls, index: int, line: str, /) -> Self: + """Alternative constructor flipping the order of the arguments. + + This is typically useful with :func:`enumerate`, namely this makes:: + + from itertools import starmap + lines = list(starmap(Line.view, enumerate(lines)) + + equivalent to:: + + lines = [Line(line, index) for index, line in enumerate(lines)] + """ + return cls(line, index) + + # dunder methods + + def __str__(self) -> str: + """The line as a string.""" + return self.buffer + + def __repr__(self) -> str: + return f'{self.__class__.__name__}({self!s}, offset={self.offset})' + + def __getitem__(self, index: int | slice, /) -> str: + return self.buffer[index] + + def __add__(self, other: object, /) -> Line: + if isinstance(other, str): + return Line(str(self) + other, self.offset) + if isinstance(other, Line): + if self.offset != other.offset: + msg = 'cannot concatenate lines with different offsets' + raise ValueError(msg) + return Line(str(self) + str(other), self.offset) + return NotImplemented + + def __mul__(self, other: object, /) -> Line: + if isinstance(other, int): + return Line(str(self) * other, self.offset) + return NotImplemented + + def __eq__(self, other: object, /) -> bool: + other = self.__cast(other) + if isinstance(other, Line): + # check offsets before calling str() + return self.offset == other.offset and str(self) == str(other) + return False + + def __lt__(self, other: object, /) -> bool: + other = self.__cast(other) + if isinstance(other, Line): + # check offsets before calling str() + return self.offset == other.offset and str(self) < str(other) + return NotImplemented + + def __gt__(self, other: object, /) -> bool: + other = self.__cast(other) + if isinstance(other, Line): + # check offsets before calling str() + return self.offset == other.offset and str(self) > str(other) + return NotImplemented + + # exposed :class:`str` interface + + def startswith(self, prefix: str, start: int = 0, end: int | None = None, /) -> bool: + """See :meth:`str.startswith`.""" + return self.buffer.startswith(prefix, start, end) + + def endswith(self, suffix: str, start: int = 0, end: int | None = None, /) -> bool: + """See :meth:`str.endswith`.""" + return self.buffer.endswith(suffix, start, end) + + def count(self, sub: str, start: int = 0, end: int | None = None, /) -> int: + """See :meth:`str.count`.""" + return self.buffer.count(sub, start, end) + + def find(self, sub: str, start: int = 0, end: int | None = None, /) -> int: + """See :meth:`str.find`.""" + return self.buffer.find(sub, start, end) + + def rfind(self, sub: str, start: int = 0, end: int | None = None, /) -> int: + """See :meth:`str.rfind`.""" + return self.buffer.rfind(sub, start, end) + + def index(self, sub: str, start: int = 0, end: int | None = None, /) -> int: + """See :meth:`str.index`.""" + return self.buffer.index(sub, start, end) + + def rindex(self, sub: str, start: int = 0, end: int | None = None, /) -> int: + """See :meth:`str.rindex`.""" + return self.buffer.rindex(sub, start, end) + + def __cast(self, other: object, /) -> Self | object: + """Try to parse *object* as a :class:`Line`.""" + if isinstance(other, Line): + return other + if isinstance(other, str): + return Line(other, self.offset) + if isinstance(other, (list, tuple)) and len(other) == 2: + # type checking is handled by the Line constructor + return Line(other[0], other[1]) + return other + + +@final +class Block(_TextView[tuple[str, ...]]): + """Block found by :meth:`~sphinx.testing.matcher.LineMatcher.find`. + + A block can be compared to list of strings (e.g., ``['line1', 'line2']``), + a :class:`Block` object or a sequence of pairs ``(block, offset)`` (the + pair can be given as any two-length sequence) where: + + - *block* -- a sequence (e.g., list, tuple, deque, ...) consisting + of :class:`str`, :class:`Line` or ``(line, line_offset)`` objects. + + Here, ``(line, line_offset)`` follows the same conventions + for comparing :class:`Line` objects. + + - *offset* -- an integer (matched against :attr:`offset`). + + For instance,:: + + assert Block(['a', 'b'], 2) == [Line('a', 2), Line('b', 3)] + """ + + def __init__(self, buffer: Iterable[str], offset: int = 0, /) -> None: + buffer = tuple(buffer) + for line in buffer: + if not isinstance(line, str): + err = f'expecting a native string, got {line!r}' + raise TypeError(err) + super().__init__(buffer, offset) + + @property + def length(self) -> int: + """The number of lines in this block.""" + return len(self) + + @classmethod + def view(cls, index: int, buffer: Iterable[str], /) -> Self: + """Alternative constructor flipping the order of the arguments. + + This is typically useful with :func:`enumerate`, namely this makes:: + + from itertools import starmap + blocks = list(starmap(Block.view, enumerate(src)) + + equivalent to:: + + blocks = [Block(lines, index) for index, lines in enumerate(src)] + """ + return cls(buffer, index) + + @property + def window(self) -> slice: + """A slice representing this block in its source. + + If *source* is the original source this block is contained within, + then ``assert source[block.window] == block`` is always satisfied. + + Example:: + + source = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'] + block = Block(['4', '5', '6'], 3) + assert source[block.window] == ['4', '5', '6'] + """ + return slice(self.offset, self.offset + self.length) + + def context(self, delta: int, limit: int) -> tuple[slice, slice]: + """A slice object indicating a context around this block. + + :param delta: The number of context lines to show. + :param limit: The number of lines in the source the block belongs to. + :return: The slices for the 'before' and 'after' lines. + + Example:: + + source = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'] + block = Block(['4', '5', '6'], 3) + before, after = block.context(2, 10) + assert source[before] == ['2', '3'] + assert source[before] == ['7', '8'] + """ + block_stop = self.offset + self.length + before_slice = slice(max(0, self.offset - delta), min(self.offset, limit)) + after_slice = slice(min(block_stop, limit), min(block_stop + delta, limit)) + return before_slice, after_slice + + def __repr__(self) -> str: + return f'{self.__class__.__name__}({self.buffer!r}, offset={self.offset})' + + # fmt: off + @overload + def __getitem__(self, index: int, /) -> str: ... # NoQA: E704 + @overload + def __getitem__(self, index: slice, /) -> tuple[str, ...]: ... # NoQA: E704 + # fmt: on + def __getitem__(self, index: int | slice, /) -> str | tuple[str, ...]: # NoQA: E301 + """Get a line or a subset of the block's lines.""" + return self.buffer[index] + + def __eq__(self, other: object, /) -> bool: + if isinstance(other, Block): + # more efficient to first check the offsets + return (self.offset, self.buffer) == (other.offset, other.buffer) + + if isinstance(other, (list, tuple)): + lines, offset = self.__cast(other) + # check offsets before computing len(lines) or len(self) + if (offset, len(lines)) != (self.offset, self.length): + return False + + expect = starmap(Line.view, enumerate(self, offset)) + # xref py310+: use strict=True + return all(starmap(Line.__eq__, zip(expect, lines))) + return False + + def __lt__(self, other: object, /) -> bool: + # nothing can be a strict subset of the empty block + if not self: + return False + + if isinstance(other, Block): + # more efficient to first check if the indices are valid before checking the lines + if _can_be_strict_in(self.offset, self.length, other.offset, other.length): + return self.buffer < other.buffer + return False + + if isinstance(other, (list, tuple)): + lines, offset = self.__cast(other) + if not _can_be_strict_in(self.offset, self.length, offset, len(lines)): + return False + + expect = list(starmap(Line.view, enumerate(self, self.offset))) + for candidate in windowed(lines, self.length): + # xref py310+: use strict=True + if all(starmap(Line.__eq__, zip(expect, candidate))): + return True + return False + + # other types are not supported for comparison + return NotImplemented + + def __gt__(self, other: object, /) -> bool: + if isinstance(other, Block): + return other < self + + if isinstance(other, (list, tuple)): + expecting, offset = self.__cast(other) + batchsize = len(expecting) + if not _can_be_strict_in(offset, batchsize, self.offset, self.length): + return False + + for batch in windowed(self, batchsize): + candidate = starmap(Line.view, enumerate(batch, offset)) + if all(actual == expect for actual, expect in zip(candidate, expecting)): + return True + return False + + # other types are not supported for comparison + return NotImplemented + + # exposed :class:`tuple` interface + + def index( + self, + value: object, + start: SupportsIndex = 0, + stop: SupportsIndex = sys.maxsize, + /, + ) -> int: + """See :meth:`tuple.count`.""" + return self.buffer.index(value, start, stop) + + def count(self, value: object, /) -> int: + """See :meth:`tuple.count`.""" + return self.buffer.count(value) + + def __cast( + self, other: Sequence[object] | tuple[Sequence[object], int], / + ) -> tuple[Sequence[object], int]: + """Try to parse *object* as a pair ``(lines, block offset)``.""" + if len(other) == 2 and isinstance(other[0], Sequence) and isinstance(other[1], int): + # mypy does not know how to deduce that the lenght is 2 + if isinstance(other, str): + msg = f'expecting a sequence of lines, got {other!r}' + raise TypeError(msg) + return other[0], other[1] + return other, self.offset + + +def _can_be_strict_in(i1: int, l1: int, i2: int, l2: int) -> bool: + """Check that a block can be strictly contained in another block. + + :param i1: The address (index) of the first block. + :param l1: The length of the first block. + :param i2: The address (index) of the second block. + :param l2: The length of the second block. + """ + j1, j2 = i1 + l1, i2 + l2 + # Case 1: i1 == i2 and j1 < j2 (block1 is at most block2[:-1]) + # Case 2: i1 > i2 and j1 <= j2 (block1 is at most block2[1:]) + return l1 < l2 and ((i1 >= i2) and (j1 < j2) or (i1 > i2) and (j1 <= j2)) diff --git a/sphinx/testing/_matcher/options.py b/sphinx/testing/_matcher/options.py new file mode 100644 index 00000000000..3376266e4f5 --- /dev/null +++ b/sphinx/testing/_matcher/options.py @@ -0,0 +1,172 @@ +from __future__ import annotations + +__all__ = ('Options', 'get_option') + +from typing import TYPE_CHECKING, TypedDict, final, overload + +if TYPE_CHECKING: + from typing import Final, Literal, TypeVar, Union + + FlagOption = Literal['color', 'ctrl', 'keepends', 'empty', 'compress', 'unique'] + + StripOption = Literal['strip', 'stripline'] + StripChars = Union[bool, str, None] + + FlavorOption = Literal['flavor'] + Flavor = Literal['re', 'fnmatch', 'exact'] + + # For some reason, mypy does not like Union of Literal, + # so we wrap the Literal types inside a bigger Literal. + OptionName = Literal[FlagOption, StripOption, FlavorOption] + + DT = TypeVar('DT') + + +@final +class Options(TypedDict, total=False): + """Options for a :class:`~sphinx.testing.matcher.LineMatcher` object. + + Some options directly act on the original string (e.g., :attr:`strip`), + while others (e.g., :attr:`stripline`) act on the lines obtained after + splitting the (transformed) original string. + """ + + color: bool + """Indicate whether to keep the ANSI escape sequences for colors. + + The default value is ``False``. + """ + + ctrl: bool + """Indicate whether to keep the non-color ANSI escape sequences. + + The default value is ``True``. + """ + + strip: StripChars + """Call :meth:`str.strip` on the original source. + + The allowed values for :attr:`strip` are: + + * ``True`` -- remove leading and trailing whitespaces (the default). + * ``False`` -- keep leading and trailing whitespaces. + * a string (*chars*) -- remove leading and trailing characters in *chars*. + """ + + stripline: StripChars + """Call :meth:`str.strip` on the lines obtained after splitting the source. + + The allowed values for :attr:`strip` are: + + * ``True`` -- remove leading and trailing whitespaces. + * ``False`` -- keep leading and trailing whitespaces (the default). + * a string (*chars*) -- remove leading and trailing characters in *chars*. + """ + + keepends: bool + """If true, keep line breaks in the output. + + The default value is ``False``. + """ + + empty: bool + """If true, keep empty lines in the output. + + The default value is ``True``. + """ + + compress: bool + """Eliminate duplicated consecutive lines in the output. + + The default value is ``False``. + + For instance, ``['a', 'b', 'b', 'c'] -> ['a', 'b', 'c']``. + + Note that if :attr:`empty` is ``False``, empty lines are removed *before* + the duplicated lines, i.e., ``['a', 'b', '', 'b'] -> ['a', 'b']``. + """ + + unique: bool + """Eliminate multiple occurrences of lines in the output. + + The default value is ``False``. + + This option is only applied at the very end of the transformation chain, + after empty and duplicated consecutive lines might have been eliminated. + """ + + flavor: Flavor + """Indicate how strings are matched against non-compiled patterns. + + The allowed values for :attr:`flavor` are: + + * ``'exact'`` -- match lines using string equality (the default). + * ``'fnmatch'`` -- match lines using :mod:`fnmatch`-style patterns. + * ``'re'`` -- match lines using :mod:`re`-style patterns. + + This option only affects non-compiled patterns (i.e., those given + as :class:`str` and not :class:`~re.Pattern` objects). + """ + + +@final +class CompleteOptions(TypedDict): + """Same as :class:`Options` but as a total dictionary.""" + + # Whenever a new option in :class:`Options` is added, do not + # forget to add it here and in :data:`DEFAULT_OPTIONS`. + + color: bool + ctrl: bool + strip: StripChars + stripline: StripChars + keepends: bool + empty: bool + compress: bool + unique: bool + flavor: Flavor + + +DEFAULT_OPTIONS: Final[CompleteOptions] = CompleteOptions( + color=False, + ctrl=True, + strip=True, + stripline=False, + keepends=False, + empty=True, + compress=False, + unique=False, + flavor='exact', +) +"""The default (read-only) options values.""" + +if TYPE_CHECKING: + _OptionsView = Union[Options, CompleteOptions] + + +# Disable the ruff formatter to minimize the number of empty lines. +# +# When an option is added, add an overloaded definition +# so that mypy can correctly deduce the option's type. +# +# fmt: off +# boolean-like options +@overload +def get_option(options: _OptionsView, name: FlagOption, /) -> bool: ... # NoQA: E704 +@overload +def get_option(options: _OptionsView, name: FlagOption, default: DT, /) -> bool | DT: ... # NoQA: E704 +# strip-like options +@overload +def get_option(options: _OptionsView, name: StripOption, /) -> StripChars: ... # NoQA: E501, E704 +@overload +def get_option(options: _OptionsView, name: StripOption, default: DT, /) -> StripChars | DT: ... # NoQA: E501, E704 +# miscellaneous options +@overload +def get_option(options: _OptionsView, name: FlavorOption, /) -> Flavor: ... # NoQA: E704 +@overload +def get_option(options: _OptionsView, name: FlavorOption, default: DT, /) -> Flavor | DT: ... # NoQA: E704 +# fmt: on +def get_option(options: _OptionsView, name: OptionName, /, *default: DT) -> object | DT: # NoQA: E302 + if name in options: + return options[name] + return default[0] if default else DEFAULT_OPTIONS[name] diff --git a/sphinx/testing/_matcher/util.py b/sphinx/testing/_matcher/util.py new file mode 100644 index 00000000000..71461f682c3 --- /dev/null +++ b/sphinx/testing/_matcher/util.py @@ -0,0 +1,200 @@ +"""Private utility functions for :mod:`sphinx.testing.matcher`.""" + +from __future__ import annotations + +__all__ = () + +import itertools +import textwrap +from collections import deque +from operator import itemgetter +from typing import TYPE_CHECKING, overload + +if TYPE_CHECKING: + import re + from collections.abc import Iterable, Iterator, Mapping, Sequence + from typing import TypeVar, Union + + from typing_extensions import Never + + from sphinx.testing._matcher.buffer import Block + + LinePattern = Union[str, re.Pattern[str]] + + _T = TypeVar('_T') + + +def consume(iterator: Iterator[object], /, n: int | None = None) -> None: + """Consume *n* values from *iterator*.""" + # use the C API to efficiently consume iterators + if n is None: + deque(iterator, maxlen=0) + else: + n = max(n, 0) + next(itertools.islice(iterator, n, n), None) + + +def unique_justseen(iterable: Iterable[_T], /) -> Iterator[_T]: + """Yields elements in order, ignoring serial duplicates. + + Credits go to :func:`!more_itertools.recipes.unique_justseen`. + """ + return map(next, map(itemgetter(1), itertools.groupby(iterable))) + + +def unique_everseen(iterable: Iterable[_T], /) -> Iterator[_T]: + """Yields elements in order, ignoring duplicates. + + Credits go to :func:`!more_itertools.recipes.unique_everseen`. + """ + seen: set[_T] = set() + mark, pred = seen.add, seen.__contains__ + for element in itertools.filterfalse(pred, iterable): + mark(element) + yield element + + +def windowed(iterable: Iterable[_T], n: int, /) -> Iterator[deque[_T]]: + """Return a sliding window of width *n* over the given iterable. + + Credits go to :func:`!more_itertools.more.windowed`. + """ + iterator = iter(iterable) + window = deque(itertools.islice(iterator, n), maxlen=n) + if len(window) == n: + yield window + for group in iterator: + window.append(group) + yield window + + +def plural_form(noun: str, n: int, /) -> str: + """Append ``'s'`` to *noun* if *n* is more than *1*.""" + return noun + 's' if n > 1 else noun + + +def omit_message(n: int, /) -> str: + """The message to indicate that *n* lines where omitted.""" + noun = plural_form('line', n) + return f'... (omitted {n} {noun}) ...' + + +def omit_line(n: int, /) -> list[str]: + """Wrap :func:`omit_message` in a list, if any. + + If no lines are omitted, this returns the empty list. This is typically + useful when used in combination to ``lines.extend(omit_line(n))``. + """ + return [omit_message(n)] if n else [] + + +def make_prefix(indent: int, /, *, highlight: bool = False) -> str: + """Create the prefix used for indentation or highlighting.""" + prefix = ' ' * indent + return f'>{prefix[1:]}' if highlight else prefix + + +# fmt: off +@overload +def indent_source(text: str, /, *, sep: Never = ..., indent: int = ..., highlight: bool = ...) -> str: ... # NoQA: E501, E704 +@overload +def indent_source(lines: Iterable[str], /, *, sep: str = ..., indent: int = ..., highlight: bool = ...) -> str: ... # NoQA: E501, E704 +# fmt: on +def indent_source( # NoQA: E302 + src: Iterable[str], /, *, sep: str = '\n', indent: int = 4, highlight: bool = False +) -> str: + """Indent a string or an iterable of lines, returning a single string. + + :param indent: The number of indentation spaces. + :param highlight: Indicate whether the prefix is a highlighter. + :return: An indented line, possibly highlighted. + """ + if isinstance(src, str): + prefix = make_prefix(indent, highlight=highlight) + return textwrap.indent(src, prefix) + return sep.join(indent_lines(src, indent=indent, highlight=highlight)) + + +def indent_lines( + lines: Iterable[str], /, *, indent: int = 4, highlight: bool = False +) -> list[str]: + """Return a list of lines prefixed by an indentation string. + + :param lines: The lines to indent. + :param indent: The number of indentation spaces. + :param highlight: Indicate whether the prefix is a highlighter. + :return: A list of lines, possibly highlighted. + """ + prefix = make_prefix(indent, highlight=highlight) + return [prefix + line for line in lines] + + +def prettify_patterns(patterns: Sequence[LinePattern], /, *, indent: int = 4) -> str: + """Prettify the *patterns* as a string to print.""" + source = (p if isinstance(p, str) else p.pattern for p in patterns) + return indent_source(source, indent=indent, highlight=False) + + +def get_debug_context( + source: Sequence[str], block: Block, /, *, context: int, indent: int = 4 +) -> list[str]: + """Get some context lines around *block* and highlight the *block*. + + :param source: The source containing the *block*. + :param block: A block to highlight. + :param context: The number of lines to display around the block. + :param indent: The number of indentation spaces. + :return: A list of formatted lines. + """ + assert block <= source, 'the block must be contained in the source' + + logs: list[str] = [] + writelines = logs.extend + has_context = int(context > 0) + before, after = block.context(context, limit := len(source)) + + writelines(omit_line(has_context * before.start)) + writelines(indent_lines(source[before], indent=indent, highlight=False)) + writelines(indent_lines(block, indent=indent, highlight=True)) + writelines(indent_lines(source[after], indent=indent, highlight=False)) + writelines(omit_line(has_context * (limit - after.stop))) + + return logs + + +def _highlight( + source: Iterable[str], sections: Mapping[int, int], *, prefix: str, highlight_prefix: str +) -> Iterator[str]: + iterator = enumerate(source) + for index, line in iterator: + if count := sections.get(index, None): + yield highlight_prefix + line # the first line of the block + # yield the remaining lines of the block + tail = map(itemgetter(1), itertools.islice(iterator, count - 1)) + yield from map(highlight_prefix.__add__, tail) + else: + yield prefix + line + + +def highlight( + source: Iterable[str], + sections: Mapping[int, int] | None = None, + /, + *, + indent: int = 4, + keepends: bool = False, +) -> str: + """Highlight one or more blocks in *source*. + + :param source: The source to format. + :param sections: The blocks to highlight given as their offset and size. + :param indent: The number of indentation spaces. + :param keepends: Indicate whether the *source* contains line breaks or not. + :return: An indented text. + """ + sep = '' if keepends else '\n' + if sections: + tab, accent = make_prefix(indent), make_prefix(indent, highlight=True) + lines = _highlight(source, sections, prefix=tab, highlight_prefix=accent) + return sep.join(lines) + return indent_source(source, sep=sep, indent=indent, highlight=False) diff --git a/sphinx/testing/matcher.py b/sphinx/testing/matcher.py new file mode 100644 index 00000000000..99b6fe2c3b8 --- /dev/null +++ b/sphinx/testing/matcher.py @@ -0,0 +1,359 @@ +from __future__ import annotations + +__all__ = ('Options', 'LineMatcher') + +import contextlib +import fnmatch +import re +from itertools import starmap +from types import MappingProxyType +from typing import TYPE_CHECKING, final + +import pytest + +from sphinx.testing._matcher import util +from sphinx.testing._matcher.buffer import Block, Line +from sphinx.testing._matcher.options import DEFAULT_OPTIONS, Options, get_option +from sphinx.util.console import strip_colors, strip_control_sequences + +if TYPE_CHECKING: + from collections.abc import Collection, Generator, Iterable, Iterator, Sequence + from typing import Literal + + from typing_extensions import Self, Unpack + + from sphinx.testing._matcher.options import CompleteOptions + from sphinx.testing._matcher.util import LinePattern + + PatternType = Literal['line', 'block'] + + +def clean(text: str, /, **options: Unpack[Options]) -> tuple[str, ...]: + """Split *text* into lines. + + :param text: The text to get the cleaned lines of. + :param options: The processing options. + :return: A list of cleaned lines. + """ + if not get_option(options, 'ctrl'): + # Non-color ANSI esc. seq. must be stripped before colors; + # see :func:`sphinx.util.console.strip_escape_sequences`. + text = strip_control_sequences(text) + + if not get_option(options, 'color'): + text = strip_colors(text) + + chars = get_option(options, 'strip') + if isinstance(chars, bool) and chars: + text = text.strip() + elif isinstance(chars, str) or chars is None: + text = text.strip(chars) + elif chars is not False: + msg = 'expecting a boolean, a string or None for %r, got: %r' % ('strip', chars) + raise TypeError(msg) + + keepends = get_option(options, 'keepends') + lines: Iterable[str] = text.splitlines(keepends=keepends) + + chars = get_option(options, 'stripline') + if isinstance(chars, bool) and chars: + lines = map(str.strip, lines) + elif isinstance(chars, str) or chars is None: + lines = (line.strip(chars) for line in lines) + elif chars is not False: + msg = 'expecting a boolean, a string or None for %r, got: %r' % ('stripline', chars) + raise TypeError(msg) + + # Removing empty lines first ensures that serial duplicates can + # be eliminated in one cycle. Inverting the order of operations + # is not possible since empty lines may 'hide' duplicated lines. + if not get_option(options, 'empty'): + lines = filter(None, lines) + + if get_option(options, 'unique'): + # 'compress' has no effect when 'unique' is set + lines = util.unique_everseen(lines) + elif get_option(options, 'compress'): + lines = util.unique_justseen(lines) + + return tuple(lines) + + +def _to_lines_pattern(expect: LinePattern | Collection[LinePattern]) -> Sequence[LinePattern]: + return [expect] if isinstance(expect, (str, re.Pattern)) else list(expect) + + +def _to_block_pattern(expect: LinePattern | Sequence[LinePattern]) -> Sequence[LinePattern]: + """Make *pattern* compatible for block-matching.""" + if isinstance(expect, str): + return expect.splitlines() + if isinstance(expect, re.Pattern): + return [expect] + return expect + + +@final +class LineMatcher: + """Helper object for matching output lines.""" + + __slots__ = ('_content', '_options', '_stack') + + def __init__(self, content: str, /, **options: Unpack[Options]) -> None: + """Construct a :class:`LineMatcher` for the given string content. + + :param content: The source string. + :param options: The matcher options. + """ + self._content = content + # always complete the set of options for this object + self._options: CompleteOptions = DEFAULT_OPTIONS | options + # stack of cached cleaned lines (with a possible indirection) + self._stack: list[int | tuple[str, ...] | None] = [None] + + @classmethod + def parse( + cls, lines: Iterable[str], sep: str = '\n', /, **options: Unpack[Options] + ) -> Self: + """Construct a :class:`LineMatcher` object from a list of lines. + + This is typically useful when writing tests for :class:`LineMatcher` + since writing the lines instead of a long string is usually cleaner. + """ + return cls(sep.join(lines), **options) + + def __iter__(self) -> Iterator[Line]: + """The cached lines as :class:`~sphinx.testing._matcher.Line` instances.""" + return starmap(Line.view, enumerate(self.lines())) + + @property + def content(self) -> str: + """The raw content.""" + return self._content + + @property + def options(self) -> CompleteOptions: + """Return a *read-only* view on the (complete) set of options. + + The runtime type of this field is a :class:`!MappingProxyType` and + protects against *runtime* destructive operations (which would not + have been the case solely with a type annotation). + """ + return MappingProxyType(self._options) # type: ignore[return-value] + + @contextlib.contextmanager + def use(self, /, **options: Unpack[Options]) -> Generator[None, None, None]: + """Temporarily set the set of options for this object to *options*. + + If an option is not specified in *options*, its default value is used. + """ + local_options = DEFAULT_OPTIONS | options + with self.override(**local_options): + yield + + @contextlib.contextmanager + def override(self, /, **options: Unpack[Options]) -> Generator[None, None, None]: + """Temporarily extend the set of options for this object using *options*.""" + saved_options = self._options.copy() + self._options |= options + self._stack.append(None) # prepare the next cache entry + try: + yield + finally: + self._stack.pop() # pop the cached lines for this scope + self._options = saved_options + + def lines(self) -> tuple[str, ...]: + """The content lines, cleaned up according to the current options. + + This method is efficient in the sense that the lines are computed + once per set of options and cached for subsequent calls. + """ + stack = self._stack + assert stack, 'invalid stack state' + cached = stack[-1] + + if cached is None: + # compute for the first time the value + cached = clean(self.content, **self.options) + # check if the value is the same as any of a previously cached value + for addr, value in enumerate(stack): + if value == cached: + stack[-1] = addr # indirection + return cached + # the value did not exist yet, so we store it at most once + stack[-1] = cached + return cached + + if isinstance(cached, int): + value = self._stack[cached] + assert isinstance(value, tuple) + return value + + assert isinstance(cached, tuple) + return cached + + def match(self, expect: LinePattern | Collection[LinePattern], /) -> Sequence[Line]: + """Same as :meth:`itermatch` but returns a sequence of lines.""" + return list(self.itermatch(expect)) + + def itermatch(self, expect: LinePattern | Collection[LinePattern], /) -> Iterator[Line]: + """Yield the lines that match one (or more) of the given patterns. + + When one or more patterns are given, the order of evaluation is the + same as they are given (or arbitrary if they are given in a set). + """ + patterns = _to_lines_pattern(expect) + matchers = [pattern.match for pattern in self.__compile(patterns)] + + def predicate(line: Line) -> bool: + return any(matcher(str(line)) for matcher in matchers) + + yield from filter(predicate, self) + + def find(self, expect: str | Sequence[LinePattern], /) -> Sequence[Block]: + """Same as :meth:`iterfind` but returns a sequence of blocks.""" + return list(self.iterfind(expect)) + + def iterfind(self, expect: str | Sequence[LinePattern], /) -> Iterator[Block]: + """Yield non-overlapping blocks matching the given line patterns. + + :param expect: The line patterns that a block must satisfy. + :return: An iterator on the matching blocks. + + When *expect* is a single string, it is split into lines, each of + which corresponding to the pattern a block's line must satisfy. + + .. note:: + + This interface does not support single :class:`~re.Pattern` + objects as they could be interpreted as a line or a block + pattern. + """ + patterns = _to_block_pattern(expect) + + lines = self.lines() + # early abort if there are more expected lines than actual ones + if (width := len(patterns)) > len(lines): + return + + compiled_patterns = self.__compile(patterns) + + block_iterator = enumerate(util.windowed(lines, width)) + for start, block in block_iterator: + # check if the block matches the pattern line by line + if all(pattern.match(line) for pattern, line in zip(compiled_patterns, block)): + yield Block(block, start) + # Consume the iterator so that the next block consists + # of lines just after the block that was just yielded. + # + # Note that since the iterator yielded *block*, its + # state is already on the "next" line, so we need to + # advance by the block size - 1 only. + util.consume(block_iterator, width - 1) + + def assert_match( + self, expect: LinePattern | Collection[LinePattern], /, *, count: int | None = None + ) -> None: + """Assert that there exist one or more lines matching *pattern*. + + :param expect: One or more patterns the lines must satisfy. + :param count: If specified, the exact number of matching lines. + """ + patterns = _to_lines_pattern(expect) + self._assert_found('line', patterns, count=count) + + def assert_not_match( + self, expect: LinePattern | Collection[LinePattern], /, *, context: int = 3 + ) -> None: + """Assert that there are no lines matching *pattern*. + + :param expect: One or more patterns the lines must not satisfy. + :param context: Number of lines to print around a failing line. + """ + patterns = _to_lines_pattern(expect) + self._assert_not_found('line', patterns, context_size=context) + + def assert_block( + self, expect: str | Sequence[LinePattern], /, *, count: int | None = None + ) -> None: + """Assert that there exist one or more blocks matching the *patterns*. + + :param expect: The line patterns that a block must satisfy. + :param count: The number of blocks that should be found. + + When *expect* is a single string, it is split into lines, each + of which corresponding to the pattern a block's line must satisfy. + """ + patterns = _to_block_pattern(expect) + self._assert_found('block', patterns, count=count) + + def assert_not_block( + self, expect: str | Sequence[LinePattern], /, *, context: int = 1 + ) -> None: + """Assert that no block matches the *patterns*. + + :param expect: The line patterns that a block must satisfy. + :param context: Number of lines to print around a failing block. + + When *expect* is a single string, it is split into lines, each + of which corresponding to the pattern a block's line must satisfy. + + Use :data:`sys.maxsize` to show all capture lines. + """ + patterns = _to_block_pattern(expect) + self._assert_not_found('block', patterns, context_size=context) + + def _assert_found( + self, what: PatternType, /, patterns: Sequence[LinePattern], *, count: int | None + ) -> None: + blocks = self.iterfind(patterns) + + if count is None: + if next(blocks, None): + return + + keepends = get_option(self.options, 'keepends') + ctx = util.highlight(self.lines(), keepends=keepends) + pat = util.prettify_patterns(patterns) + logs = [f'{what} pattern', pat, 'not found in', ctx] + pytest.fail('\n\n'.join(logs)) + + indices = {block.offset: len(block) for block in blocks} + if (found := len(indices)) == count: + return + + keepends = get_option(self.options, 'keepends') + ctx = util.highlight(self.lines(), indices, keepends=keepends) + pat = util.prettify_patterns(patterns) + noun = util.plural_form(what, count) + logs = [f'found {found} != {count} {noun} matching', pat, 'in', ctx] + pytest.fail('\n\n'.join(logs)) + + def _assert_not_found( + self, what: PatternType, /, patterns: Sequence[LinePattern], *, context_size: int + ) -> None: + lines = self.lines() + if (count := len(patterns)) > len(lines): + return + + compiled_patterns = self.__compile(patterns) + + for start, block in enumerate(util.windowed(lines, count)): + if all(pattern.match(line) for pattern, line in zip(compiled_patterns, block)): + pattern = util.prettify_patterns(patterns) + context = util.get_debug_context( + lines, Block(block, start), context=context_size + ) + logs = [f'{what} pattern', pattern, 'found in', '\n'.join(context)] + pytest.fail('\n\n'.join(logs)) + + def __compile(self, patterns: Iterable[LinePattern], /) -> Sequence[re.Pattern[str]]: + flavor = get_option(self.options, 'flavor') + if flavor == 'fnmatch': + patterns = [fnmatch.translate(p) if isinstance(p, str) else p for p in patterns] + elif flavor == 'exact': + patterns = [re.escape(p) if isinstance(p, str) else p for p in patterns] + + # mypy does not like map + re.compile() although it is correct but + # this is likely due to https://github.com/python/mypy/issues/11880 + return [re.compile(pattern) for pattern in patterns] diff --git a/sphinx/testing/util.py b/sphinx/testing/util.py index d1de8ea2b74..a6a0519b33a 100644 --- a/sphinx/testing/util.py +++ b/sphinx/testing/util.py @@ -18,6 +18,7 @@ import sphinx.application import sphinx.locale import sphinx.pycode +from sphinx.testing.matcher import LineMatcher from sphinx.util.console import strip_colors from sphinx.util.docutils import additional_nodes @@ -190,6 +191,16 @@ def warning(self) -> StringIO: assert isinstance(self._warning, StringIO) return self._warning + @property + def stdout(self) -> LineMatcher: + """The line-matcher object on the status messages.""" + return LineMatcher(self.status.getvalue()) + + @property + def stderr(self) -> LineMatcher: + """The line-matcher object on the warning messages.""" + return LineMatcher(self.warning.getvalue()) + def cleanup(self, doctrees: bool = False) -> None: sys.path[:] = self._saved_path _clean_up_global_state() diff --git a/tests/test_testing/__init__.py b/tests/test_testing/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/test_testing/test_matcher.py b/tests/test_testing/test_matcher.py new file mode 100644 index 00000000000..b89049f8879 --- /dev/null +++ b/tests/test_testing/test_matcher.py @@ -0,0 +1,555 @@ +from __future__ import annotations + +import dataclasses +import itertools +from functools import cached_property +from typing import TYPE_CHECKING, cast + +import _pytest.outcomes +import pytest + +import sphinx.util.console as term +from sphinx.testing._matcher import util +from sphinx.testing._matcher.buffer import Block, Line +from sphinx.testing._matcher.options import DEFAULT_OPTIONS, CompleteOptions, Options +from sphinx.testing.matcher import LineMatcher, clean + +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + from typing import Final + + from sphinx.testing._matcher.options import Flavor + from sphinx.testing._matcher.util import LinePattern + + +@dataclasses.dataclass +class Source: + total: int + """The total number of lines in the source.""" + start: int + """The start index of the main block.""" + width: int + """The size of the main block.""" + dedup: int + """Number of times the main block is duplicated.""" + + @property + def ncopy(self) -> int: + """The number of copies of the base block in the main block.""" + return self.dedup + 1 + + @property + def stop(self) -> int: + """Stop index of the main block.""" + # possibly out of bounds if the test fixture requires + # more copies than possible + return self.start + self.ncopy * self.width + + @cached_property + def lines(self) -> list[str]: + """The source's lines.""" + return [*self.head, *self.main, *self.tail] + + @cached_property + def text(self) -> str: + """The source as a single string.""" + return '\n'.join(self.lines) + + @cached_property + def head(self) -> list[str]: + """The lines before the highlighted block.""" + return list(map(self.outer_line, range(self.start))) + + @cached_property + def tail(self) -> list[str]: + """The lines after the highlighted block.""" + return list(map(self.outer_line, range(self.stop, self.total))) + + @cached_property + def base(self) -> list[str]: + """Single main block (no duplication).""" + return list(map(self.block_line, range(self.start, self.start + self.width))) + + @cached_property + def main(self) -> list[str]: + """The block that could be highlighted (possibly duplicated).""" + parts = itertools.repeat(self.base, self.ncopy) + block = list(itertools.chain.from_iterable(parts)) + assert len(block) == self.ncopy * self.width, 'ill-formed block' + return block + + def peek_prev(self, context_size: int) -> list[str]: + """The context lines before the main block.""" + imin = max(0, self.start - context_size) + peek = [Source.outer_line(i) for i in range(imin, self.start)] + assert len(peek) <= context_size + return peek + + def peek_next(self, context_size: int) -> list[str]: + """The context lines after the main block.""" + imax = min(self.stop + context_size, self.total) + peek = [Source.outer_line(i) for i in range(self.stop, imax)] + assert len(peek) <= context_size + return peek + + @staticmethod + def outer_line(i: int) -> str: + """Line not in the main block.""" + return f'L{i}' + + @staticmethod + def block_line(i: int) -> str: + """Line in the main block.""" + return f'B{i}' + + +def make_debug_context( + block: list[str], + /, + view_prev: list[str], + omit_prev: int, + view_next: list[str], + omit_next: int, + *, + context_size: int, + indent: int = 4, +) -> list[str]: + """Other API for :func:`sphinx.testing._matcher.util.get_debug_context`.""" + lines: list[str] = [] + writelines = lines.extend + writelines(util.omit_line(bool(context_size) * omit_prev)) + writelines(util.indent_lines(view_prev, indent=indent, highlight=False)) + writelines(util.indent_lines(block, indent=indent, highlight=True)) + writelines(util.indent_lines(view_next, indent=indent, highlight=False)) + writelines(util.omit_line(bool(context_size) * omit_next)) + return lines + + +class TestClean: + # options with no cleaning phase (equivalent to text.striplines(True)) + noop: Final[CompleteOptions] = CompleteOptions( + color=True, + ctrl=True, + strip=False, + stripline=False, + keepends=True, + empty=True, + compress=False, + unique=False, + flavor='exact', + ) + + @classmethod + def check(cls, text: str, options: Options, expect: Sequence[str]) -> None: + options = cast(Options, cls.noop) | options + assert clean(text, **options) == tuple(expect) + + @pytest.mark.parametrize( + ('text', 'options', 'expect'), + [ + ('a ', Options(), ['a ']), + ('a\nb ', Options(), ['a\n', 'b ']), + ( + '\n'.join(['a', 'a', '', 'a', 'b', 'c', 'a']), + Options(keepends=False), + ['a', 'a', '', 'a', 'b', 'c', 'a'], + ), + ], + ) + def test_base(self, text, options, expect): + self.check(text, options, expect) + + @pytest.mark.parametrize( + ('text', 'options', 'expect'), + [ + ('a\nb ', Options(strip=True, stripline=False), ['a\n', 'b']), + ('a\nb ', Options(strip=False, stripline=True), ['a', 'b']), + ('a\n b ', Options(strip=True, stripline=True), ['a', 'b']), + ], + ) + def test_strip(self, text, options, expect): + self.check(text, options, expect) + + @pytest.mark.parametrize( + ('text', 'options', 'expect'), + [ + ( + '\n'.join(['a', 'a', '', 'a', 'b', 'c', 'a']), + Options(keepends=False, compress=True), + ['a', '', 'a', 'b', 'c', 'a'], + ), + ( + '\n'.join(['a', 'a', '', 'a', 'b', 'c', 'a']), + Options(keepends=False, unique=True), + ['a', '', 'b', 'c'], + ), + ( + '\n'.join(['a', 'a', '', 'a', 'b', 'c', 'a']), + Options(keepends=False, compress=False, unique=True), + ['a', '', 'b', 'c'], + ), + ], + ) + def test_eliminate_keep_empty(self, text, options, expect): + self.check(text, options, expect) + + @pytest.mark.parametrize( + ('text', 'options', 'expect'), + [ + ( + '\n'.join(['a', 'a', '', 'a', 'b', 'c', 'a']), + Options(keepends=False, empty=False, compress=True), + ['a', 'b', 'c', 'a'], + ), + ( + '\n'.join(['a', 'a', '', 'a', 'b', 'c', 'a']), + Options(keepends=False, empty=False, unique=True), + ['a', 'b', 'c'], + ), + ( + '\n'.join(['a', 'a', '', 'a', 'b', 'c', 'a']), + Options(keepends=False, empty=False, compress=False, unique=True), + ['a', 'b', 'c'], + ), + ], + ) + def test_eliminate(self, text, options, expect): + self.check(text, options, expect) + + +def test_line_operators(): + assert Line('a', 1) == 'a' + assert Line('a', 1) == ('a', 1) + assert Line('a', 1) == ['a', 1] + + assert Line('a', 2) != 'b' + assert Line('a', 2) != ('a', 1) + assert Line('a', 2) != ['a', 1] + + # order + assert Line('ab', 1) > 'a' + assert Line('a', 1) < 'ab' + assert Line('a', 1) <= 'a' + assert Line('a', 1) >= 'a' + + assert Line('ab', 1) > ('a', 1) + assert Line('a', 1) < ('ab', 1) + assert Line('a', 1) <= ('a', 1) + assert Line('a', 1) >= ('a', 1) + + +@pytest.mark.parametrize('expect', [('a', 'b', 'c'), ('a', ('b', 2), Line('c', 3))]) +def test_block_operators(expect: Sequence[str]) -> None: + lines = ['a', 'b', 'c'] + assert Block(lines, 1) == expect + assert Block(lines, 1) == [expect, 1] + + assert Block(lines, 1) != [*expect, 'x'] + assert Block(lines, 1) != [expect, 2] + + assert Block(lines, 1) <= expect + assert Block(lines, 1) <= [expect, 1] + + assert Block(lines[:2], 1) <= expect + assert Block(lines[:2], 1) <= [expect, 1] + + assert Block(lines[:2], 1) < expect + assert Block(lines[:2], 1) < [expect, 1] + + assert Block(lines, 1) >= expect + assert Block(lines, 1) >= [expect, 1] + + assert Block([*lines, 'd'], 1) > expect + assert Block([*lines, 'd'], 1) > [expect, 1] + + assert Block(['a', 'b'], 1).context(delta=4, limit=5) == (slice(0, 1), slice(3, 5)) + assert Block(['a', 'b'], 3).context(delta=2, limit=9) == (slice(1, 3), slice(5, 7)) + + +def test_options_class(): + # ensure that the classes are kept synchronized + missing_keys = Options.__annotations__.keys() - CompleteOptions.__annotations__ + assert not missing_keys, f'missing fields in proxy class: {", ".join(missing_keys)}' + + foreign_keys = CompleteOptions.__annotations__.keys() - Options.__annotations__ + assert not missing_keys, f'foreign fields in proxy class: {", ".join(foreign_keys)}' + + +@pytest.mark.parametrize('options', [DEFAULT_OPTIONS, LineMatcher('').options]) +def test_matcher_default_options(options: Mapping[str, object]) -> None: + """Check the synchronization of default options and classes in Sphinx.""" + processed = set() + + def check(option: str, default: object) -> None: + assert option in options + assert options[option] == default + processed.add(option) + + check('color', False) + check('ctrl', True) + check('strip', True) + check('stripline', False) + check('keepends', False) + check('empty', True) + check('compress', False) + check('unique', False) + check('flavor', 'exact') + + # check that there are no left over options + assert sorted(processed) == sorted(Options.__annotations__) + + +def test_matcher_cache(): + source = [term.blue('hello'), '', 'world'] + # keep colors and empty lines + matcher = LineMatcher.parse(source, color=True, empty=True) + + stack = matcher._stack + + assert len(stack) == 1 + assert stack[0] is None + + cached = matcher.lines() + assert len(stack) == 1 + assert stack[0] is cached + assert cached == (term.blue('hello'), '', 'world') + + assert matcher.lines() is cached # cached result + assert len(stack) == 1 + + with matcher.override(): + assert len(stack) == 2 + assert stack[0] is cached + assert stack[1] is None + + assert matcher.lines() == cached + assert len(stack) == 2 + assert stack[1] == 0 # do not duplicate the lines + + assert matcher.lines() is cached + assert len(stack) == 2 + + assert len(stack) == 1 + assert stack[0] is cached + assert matcher.lines() is cached + + with matcher.override(color=False): + assert len(stack) == 2 + assert stack[0] is cached + assert stack[1] is None + + assert matcher.lines() == ('hello', '', 'world') + assert len(stack) == 2 + assert stack[1] == ('hello', '', 'world') + + +def test_matcher_match(): + lines = ['hello', 'world', 'yay', '!', '!', '!'] + matcher = LineMatcher.parse(lines, flavor='exact') + assert matcher.match({'hello', 'yay'}) == [('hello', 0), ('yay', 2)] + + +def test_matcher_find(): + lines = ['hello', 'world', 'yay', 'hello', 'world', '!', 'yay'] + matcher = LineMatcher.parse(lines) + assert matcher.find(['hello', 'world']) == [ + [('hello', 0), ('world', 1)], + [('hello', 3), ('world', 4)], + ] + + +@pytest.mark.parametrize( + ('lines', 'flavor', 'pattern', 'expect'), + [ + (['1', 'b', '3', 'a', '5', '!'], 'exact', ('a', 'b'), [('b', 1), ('a', 3)]), + (['blbl', 'yay', 'hihi', '^o^'], 'fnmatch', '*[ao]*', [('yay', 1), ('^o^', 3)]), + (['111', 'hello', 'world', '222'], 're', r'\d+', [('111', 0), ('222', 3)]), + ], +) +def test_matcher_flavor( + lines: list[str], + flavor: Flavor, + pattern: Sequence[LinePattern], + expect: Sequence[tuple[str, int]], +) -> None: + matcher = LineMatcher.parse(lines, flavor=flavor) + assert matcher.match(pattern) == expect + + +@pytest.mark.parametrize('dedup', range(3)) +@pytest.mark.parametrize(('maxsize', 'start', 'count'), [(10, 3, 4)]) +def test_block_exists(maxsize, start, count, dedup): + # 'maxsize' might be smaller than start + (dedup + 1) * count + # but it is fine since stop indices are clamped internally + source = Source(maxsize, start, count, dedup=dedup) + matcher = LineMatcher(source.text) + + # the main block is matched exactly once + matcher.assert_block(source.main, count=1) + assert source.base * source.ncopy == source.main + matcher.assert_block(source.base, count=source.ncopy) + + for subidx in range(1, count + 1): + # check that the sub-blocks are matched correctly + subblock = [Source.block_line(start + i) for i in range(subidx)] + matcher.assert_block(subblock, count=source.ncopy) + + +@pytest.mark.parametrize( + ('pattern', 'count', 'expect'), + [ + ( + ['x', 'y'], + None, + [ + 'block pattern', + '', + ' x', + ' y', + '', + 'not found in', + '', + ' a', + ' b', + ' c', + ' a', + ' b', + ' d', + ], + ), + ( + ['a', 'b'], + 1, + [ + 'found 2 != 1 block matching', + '', + ' a', + ' b', + '', + 'in', + '', + '> a', + '> b', + ' c', + '> a', + '> b', + ' d', + ], + ), + (['a', 'b'], 2, None), + ( + ['a', 'b'], + 3, + [ + 'found 2 != 3 blocks matching', + '', + ' a', + ' b', + '', + 'in', + '', + '> a', + '> b', + ' c', + '> a', + '> b', + ' d', + ], + ), + ], +) +def test_block_exists_error(pattern, count, expect): + lines = ['a', 'b', 'c', 'a', 'b', 'd'] + matcher = LineMatcher.parse(lines) + + if expect is None: + matcher.assert_block(pattern, count=count) + return + + with pytest.raises(_pytest.outcomes.Failed, match='.*') as exc_info: + matcher.assert_block(pattern, count=count) + + actual = exc_info.value.msg + assert actual is not None + assert actual.splitlines() == expect + + +# fmt: off +@pytest.mark.parametrize(('maxsize', 'start', 'count'), [ + # combinations of integers (a, b, c) such that c >= 1 and a >= b + c + (1, 0, 1), + (2, 0, 1), (2, 0, 2), (2, 1, 1), + (3, 0, 1), (3, 0, 2), (3, 0, 3), (3, 1, 1), (3, 1, 2), (3, 2, 1), +]) +# fmt: on +@pytest.mark.parametrize('dedup', range(3)) +def test_block_not_exist(maxsize, start, count, dedup): + # 'maxsize' might be smaller than start + (dedup + 1) * count + # but it is fine since stop indices are clamped internally + source = Source(maxsize, start, count, dedup=dedup) + matcher = LineMatcher(source.text) + # do not use 'match' with pytest.raises() since the diff + # output is hard to parse, but use == with lists instead + with pytest.raises(_pytest.outcomes.Failed, match='.*') as exc_info: + matcher.assert_not_block(source.main, context=0) + + actual = exc_info.value.msg + assert actual is not None + + expect: list[str] = ['block pattern', ''] + expect.extend(util.indent_lines(source.main)) + expect.extend(['', 'found in', '']) + expect.extend(util.indent_lines(source.main, highlight=True)) + assert actual.splitlines() == expect + + +@pytest.mark.parametrize( + ('maxsize', 'start', 'count', 'dedup', 'omit_prev', 'omit_next', 'context_size'), + [ + # with small context + (10, 2, 4, 0, 1, 3, 1), # [--, L1, B2, B3, B4, B5, L6, --, --, --] + (10, 3, 4, 0, 2, 2, 1), # [--, --, L2, B3, B4, B5, B6, L7, --, --] + (10, 4, 4, 0, 3, 1, 1), # [--, --, --, L3, B4, B5, B6, B7, L8, --] + # with large context + (10, 2, 4, 0, 0, 1, 3), # [L0, L1, B2, B3, B4, B5, L6, L7, L8, --] + (10, 4, 4, 0, 0, 0, 5), # [L0, L1, L2, L3, B4, B5, B6, B7, L8, L9] + (10, 4, 4, 0, 1, 0, 3), # [--, L1, L2, L3, B4, B5, B6, B7, L8, L9] + # with duplicated block and small context + # [--, L1, (B2, B3, B4, B5) (2x), L10, -- (9x)] + (20, 2, 4, 1, 1, 9, 1), + # [--, --, L2, (B3, B4, B5, B6) (2x), L10, -- (8x)] + (20, 3, 4, 1, 2, 8, 1), + # [--, --, --, L3, (B4, B5, B6, B7) (2x), L11, -- (7x)] + (20, 4, 4, 1, 3, 7, 1), + # with duplicated block and large context + # [L0, L1, (B2, B3, B4, B5) (2x), L10, L11, L12, L13, L14, -- (5x)] + (20, 2, 4, 1, 0, 5, 5), + # [L0, L1, (B2, B3, B4, B5) (3x), L17, L18, L19] + (20, 2, 4, 2, 0, 0, 10), + # [--, --, --, --, --, L5, L6, L7, (B8, B9) (5x), L18, L19] + (20, 8, 2, 4, 5, 0, 3), + ], +) +def test_block_not_exist_debug( + maxsize, start, count, dedup, omit_prev, omit_next, context_size +): + source = Source(maxsize, start, count, dedup=dedup) + matcher = LineMatcher(source.text) + # do not use 'match' with pytest.raises() since the diff + # output is hard to parse, but use == with lists instead + with pytest.raises(_pytest.outcomes.Failed, match='.*') as exc_info: + matcher.assert_not_block(source.main, context=context_size) + + actual = exc_info.value.msg + assert actual is not None + + expect: list[str] = ['block pattern', ''] + expect.extend(util.indent_lines(source.main)) + expect.extend(['', 'found in', '']) + expect.extend(make_debug_context( + source.main, + source.peek_prev(context_size), omit_prev, + source.peek_next(context_size), omit_next, + context_size=context_size, indent=4, + )) + assert actual.splitlines() == expect From 0fe03c033c0eec6e25d0459307a2062cc204f75c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 30 Mar 2024 11:05:30 +0100 Subject: [PATCH 05/66] enhance ANSI functions --- sphinx/util/_io.py | 4 +- sphinx/util/console.py | 104 +++++++++++++++++++++------ sphinx/util/exceptions.py | 4 +- tests/test_util/test_util_console.py | 102 ++++++++++++++++++++++++++ 4 files changed, 189 insertions(+), 25 deletions(-) create mode 100644 tests/test_util/test_util_console.py diff --git a/sphinx/util/_io.py b/sphinx/util/_io.py index e140cf12081..3689d9e4511 100644 --- a/sphinx/util/_io.py +++ b/sphinx/util/_io.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING -from sphinx.util.console import _strip_escape_sequences +from sphinx.util.console import strip_escape_sequences if TYPE_CHECKING: from typing import Protocol @@ -25,7 +25,7 @@ def __init__( def write(self, text: str, /) -> None: self.stream_term.write(text) - self.stream_file.write(_strip_escape_sequences(text)) + self.stream_file.write(strip_escape_sequences(text)) def flush(self) -> None: if hasattr(self.stream_term, 'flush'): diff --git a/sphinx/util/console.py b/sphinx/util/console.py index 8a5fe3d51fc..da4647c9586 100644 --- a/sphinx/util/console.py +++ b/sphinx/util/console.py @@ -11,24 +11,71 @@ if TYPE_CHECKING: from typing import Final + # fmt: off + def reset(text: str) -> str: ... # NoQA: E704 + def bold(text: str) -> str: ... # NoQA: E704 + def faint(text: str) -> str: ... # NoQA: E704 + def standout(text: str) -> str: ... # NoQA: E704 + def underline(text: str) -> str: ... # NoQA: E704 + def blink(text: str) -> str: ... # NoQA: E704 + + def black(text: str) -> str: ... # NoQA: E704 + def white(text: str) -> str: ... # NoQA: E704 + def red(text: str) -> str: ... # NoQA: E704 + def yellow(text: str) -> str: ... # NoQA: E704 + def blue(text: str) -> str: ... # NoQA: E704 + def purple(text: str) -> str: ... # NoQA: E704 + def turquoise(text: str) -> str: ... # NoQA: E704 + + def darkgray(text: str) -> str: ... # NoQA: E704 + def lightgray(text: str) -> str: ... # NoQA: E704 + def darkred(text: str) -> str: ... # NoQA: E704 + def brown(text: str) -> str: ... # NoQA: E704 + def darkblue(text: str) -> str: ... # NoQA: E704 + def fuchsia(text: str) -> str: ... # NoQA: E704 + def teal(text: str) -> str: ... # NoQA: E704 + # fmt: on + try: # check if colorama is installed to support color on Windows import colorama except ImportError: colorama = None +_CSI: Final[str] = re.escape('\x1b[') # 'ESC [': Control Sequence Introducer +_OSC: Final[str] = re.escape('\x1b]') # 'ESC ]': Operating System Command +_BELL: Final[str] = re.escape('\x07') # bell command -_CSI = re.escape('\x1b[') # 'ESC [': Control Sequence Introducer -_ansi_re: re.Pattern[str] = re.compile( - _CSI + r""" - ( - (\d\d;){0,2}\d\dm # ANSI colour code - | - \dK # ANSI Erase in Line - )""", - re.VERBOSE | re.ASCII) +# ANSI escape sequences for colors _ansi_color_re: Final[re.Pattern[str]] = re.compile('\x1b.*?m') +# ANSI escape sequences supported by vt100 terminal (non-colors) +_ansi_other_re: Final[re.Pattern[str]] = re.compile( + _CSI + + r"""(?: + H # HOME + |\?\d+[hl] # enable/disable features (e.g., cursor, mouse, etc) + |[1-6] q # cursor shape (e.g., blink) (note the space before 'q') + |2?J # erase down (J) or clear screen (2J) + |\d*[ABCD] # cursor up/down/forward/backward + |\d+G # move to column + |(?:\d;)?\d+;\d+H # move to (x, y) + |\dK # erase in line + ) | """ + + _OSC + + r"""(?: + \d;.+?\x07 # set window title + ) | """ + + _BELL, + re.VERBOSE | re.ASCII, +) + +# ANSI escape sequences +_ansi_re: Final[re.Pattern[str]] = re.compile( + ' | '.join((_ansi_color_re.pattern, _ansi_other_re.pattern)), + re.VERBOSE | re.ASCII, +) + codes: dict[str, str] = {} @@ -99,38 +146,53 @@ def escseq(name: str) -> str: def strip_colors(s: str) -> str: + """Strip all color escape sequences from *s*.""" + # TODO: deprecate parameter *s* in favor of a positional-only parameter *text* return _ansi_color_re.sub('', s) -def _strip_escape_sequences(s: str) -> str: - return _ansi_re.sub('', s) +def strip_control_sequences(text: str, /) -> str: + """Strip non-color escape sequences from *text*.""" + return _ansi_other_re.sub('', text) + + +def strip_escape_sequences(text: str, /) -> str: + """Strip all control sequences from *text*.""" + # Remove control sequences first so that text of the form + # + # '\x1b[94m' + '\x1bA' + TEXT + '\x1b[0m' + # + # is cleaned to TEXT and not '' (otherwise '[94m\x1bAabc\x1b[0' + # is considered by :data:`_ansi_color_re` and removed altogther). + return strip_colors(strip_control_sequences(text)) def create_color_func(name: str) -> None: def inner(text: str) -> str: return colorize(name, text) + globals()[name] = inner _attrs = { - 'reset': '39;49;00m', - 'bold': '01m', - 'faint': '02m', - 'standout': '03m', + 'reset': '39;49;00m', + 'bold': '01m', + 'faint': '02m', + 'standout': '03m', 'underline': '04m', - 'blink': '05m', + 'blink': '05m', } for _name, _value in _attrs.items(): codes[_name] = '\x1b[' + _value _colors = [ - ('black', 'darkgray'), - ('darkred', 'red'), + ('black', 'darkgray'), + ('darkred', 'red'), ('darkgreen', 'green'), - ('brown', 'yellow'), - ('darkblue', 'blue'), - ('purple', 'fuchsia'), + ('brown', 'yellow'), + ('darkblue', 'blue'), + ('purple', 'fuchsia'), ('turquoise', 'teal'), ('lightgray', 'white'), ] diff --git a/sphinx/util/exceptions.py b/sphinx/util/exceptions.py index 08281389bf3..577ec734e59 100644 --- a/sphinx/util/exceptions.py +++ b/sphinx/util/exceptions.py @@ -6,7 +6,7 @@ from typing import TYPE_CHECKING from sphinx.errors import SphinxParallelError -from sphinx.util.console import _strip_escape_sequences +from sphinx.util.console import strip_escape_sequences if TYPE_CHECKING: from sphinx.application import Sphinx @@ -31,7 +31,7 @@ def save_traceback(app: Sphinx | None, exc: BaseException) -> str: last_msgs = exts_list = '' else: extensions = app.extensions.values() - last_msgs = '\n'.join(f'# {_strip_escape_sequences(s).strip()}' + last_msgs = '\n'.join(f'# {strip_escape_sequences(s).strip()}' for s in app.messagelog) exts_list = '\n'.join(f'# {ext.name} ({ext.version})' for ext in extensions if ext.version != 'builtin') diff --git a/tests/test_util/test_util_console.py b/tests/test_util/test_util_console.py new file mode 100644 index 00000000000..e9f878d9b26 --- /dev/null +++ b/tests/test_util/test_util_console.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +import itertools +import string +from typing import TYPE_CHECKING + +import pytest + +import sphinx.util.console as term +from sphinx.util.console import strip_colors, strip_control_sequences, strip_escape_sequences + +if TYPE_CHECKING: + from collections.abc import Sequence + from typing import Any, Final + +ESC: Final[str] = '\x1b' +CSI: Final[str] = '\x1b[' +OSC: Final[str] = '\x1b]' +BELL: Final[str] = '\x07' + + +def osc_title(title: str) -> str: + return f'{OSC}2;{title}{BELL}' + + +def insert_ansi(text: str, codes: list[str]) -> str: + for code in codes: + text = f'{CSI}{code}{text}' + return text + + +def apply_style(text: str, style: list[str]) -> str: + for code in style: + if code in term.codes: + text = term.colorize(code, text) + else: + text = insert_ansi(text, [code]) + return text + + +def poweroder(seq: Sequence[Any], *, permutations: bool = True) -> list[tuple[Any, ...]]: + generator = itertools.permutations if permutations else itertools.combinations + return list(itertools.chain.from_iterable((generator(seq, i) for i in range(len(seq))))) + + +@pytest.mark.parametrize('invariant', [ESC, CSI, OSC]) +def test_strip_invariants(invariant: str) -> None: + assert strip_colors(invariant) == invariant + assert strip_control_sequences(invariant) == invariant + assert strip_escape_sequences(invariant) == invariant + + +# some color/style codes to use (but not composed) + + +_STYLES = ['m', '0m', '2m', '02m', '002m', '40m', '040m', '0;1m', '40;50m', '50;30;40m'] +# some non-color ESC codes to use (will be composed) +_CNTRLS = ['A', '0G', '1;20;128H'] + + +@pytest.mark.parametrize('prefix', ['', '#', 'def ']) # non-formatted part +@pytest.mark.parametrize('source', [string.printable, BELL]) +@pytest.mark.parametrize('style', [_STYLES, *poweroder(['bold', 'blink', 'blue', 'red'])]) +def test_strip_style(prefix: str, source: str, style: list[str]) -> None: + expect = prefix + source + pretty = prefix + apply_style(source, style) + assert strip_colors(pretty) == expect, (pretty, expect) + + +@pytest.mark.parametrize('prefix', ['', '#', 'def ']) # non-formatted part +@pytest.mark.parametrize('source', ['', 'abc', string.printable]) +@pytest.mark.parametrize('style', [_STYLES, *poweroder(['blue', 'bold'])]) +@pytest.mark.parametrize('cntrl', poweroder(_CNTRLS), ids='-'.join) +def test_strip_cntrl(prefix: str, source: str, style: list[str], cntrl: list[str]) -> None: + expect = pretty = prefix + apply_style(source, style) + # does nothing since there are only color sequences + assert strip_control_sequences(pretty) == expect, (pretty, expect) + + expect = prefix + source + pretty = prefix + insert_ansi(source, cntrl) + # all non-color codes are removed correctly + assert strip_control_sequences(pretty) == expect, (pretty, expect) + + +@pytest.mark.parametrize('prefix', ['', '#', 'def ']) # non-formatted part +@pytest.mark.parametrize('source', ['', 'abc', string.printable]) +@pytest.mark.parametrize('style', [_STYLES, *poweroder(['blue', 'bold'])]) +@pytest.mark.parametrize('cntrl', poweroder(_CNTRLS), ids='-'.join) +def test_strip_ansi(prefix: str, source: str, style: list[str], cntrl: list[str]) -> None: + expect = prefix + source + + with_style = prefix + apply_style(source, style) + assert strip_escape_sequences(with_style) == expect, (with_style, expect) + + with_cntrl = prefix + insert_ansi(source, cntrl) + assert strip_escape_sequences(with_cntrl) == expect, (with_cntrl, expect) + + composed = insert_ansi(with_style, cntrl) # add some cntrl sequences + assert strip_escape_sequences(composed) == expect, (composed, expect) + + composed = apply_style(with_cntrl, style) # add some color sequences + assert strip_escape_sequences(composed) == expect, (composed, expect) From 2c1df5099900cda4458495287183a7af415395c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 30 Mar 2024 11:19:09 +0100 Subject: [PATCH 06/66] remove ``# type: ignore[attr-defined]`` for colors --- sphinx/application.py | 2 +- sphinx/builders/__init__.py | 2 +- sphinx/builders/changes.py | 2 +- sphinx/builders/gettext.py | 2 +- sphinx/builders/latex/__init__.py | 2 +- sphinx/builders/linkcheck.py | 8 +------- sphinx/builders/manpage.py | 2 +- sphinx/builders/singlehtml.py | 2 +- sphinx/builders/texinfo.py | 2 +- sphinx/cmd/build.py | 7 +------ sphinx/cmd/make_mode.py | 7 +------ sphinx/cmd/quickstart.py | 8 +------- sphinx/ext/coverage.py | 2 +- sphinx/ext/doctest.py | 2 +- sphinx/util/console.py | 10 ++++++---- sphinx/util/display.py | 2 +- 16 files changed, 21 insertions(+), 41 deletions(-) diff --git a/sphinx/application.py b/sphinx/application.py index 1de0693baa2..7d16d9ab2b2 100644 --- a/sphinx/application.py +++ b/sphinx/application.py @@ -33,7 +33,7 @@ from sphinx.util import docutils, logging from sphinx.util._pathlib import _StrPath from sphinx.util.build_phase import BuildPhase -from sphinx.util.console import bold # type: ignore[attr-defined] +from sphinx.util.console import bold from sphinx.util.display import progress_message from sphinx.util.i18n import CatalogRepository from sphinx.util.logging import prefixed_warnings diff --git a/sphinx/builders/__init__.py b/sphinx/builders/__init__.py index 878f5d6acfb..9bcc8c7b1eb 100644 --- a/sphinx/builders/__init__.py +++ b/sphinx/builders/__init__.py @@ -17,7 +17,7 @@ from sphinx.locale import __ from sphinx.util import UnicodeDecodeErrorHandler, get_filetype, import_object, logging, rst from sphinx.util.build_phase import BuildPhase -from sphinx.util.console import bold # type: ignore[attr-defined] +from sphinx.util.console import bold from sphinx.util.display import progress_message, status_iterator from sphinx.util.docutils import sphinx_domains from sphinx.util.i18n import CatalogInfo, CatalogRepository, docname_to_domain diff --git a/sphinx/builders/changes.py b/sphinx/builders/changes.py index 7d5e0044e23..b233e85cd73 100644 --- a/sphinx/builders/changes.py +++ b/sphinx/builders/changes.py @@ -12,7 +12,7 @@ from sphinx.locale import _, __ from sphinx.theming import HTMLThemeFactory from sphinx.util import logging -from sphinx.util.console import bold # type: ignore[attr-defined] +from sphinx.util.console import bold from sphinx.util.fileutil import copy_asset_file from sphinx.util.osutil import ensuredir, os_path diff --git a/sphinx/builders/gettext.py b/sphinx/builders/gettext.py index 3928f9f9308..35abf1fd7e5 100644 --- a/sphinx/builders/gettext.py +++ b/sphinx/builders/gettext.py @@ -16,7 +16,7 @@ from sphinx.errors import ThemeError from sphinx.locale import __ from sphinx.util import logging -from sphinx.util.console import bold # type: ignore[attr-defined] +from sphinx.util.console import bold from sphinx.util.display import status_iterator from sphinx.util.i18n import CatalogInfo, docname_to_domain from sphinx.util.index_entries import split_index_msg diff --git a/sphinx/builders/latex/__init__.py b/sphinx/builders/latex/__init__.py index fd140dd062e..2b176f92504 100644 --- a/sphinx/builders/latex/__init__.py +++ b/sphinx/builders/latex/__init__.py @@ -20,7 +20,7 @@ from sphinx.errors import NoUri, SphinxError from sphinx.locale import _, __ from sphinx.util import logging, texescape -from sphinx.util.console import bold, darkgreen # type: ignore[attr-defined] +from sphinx.util.console import bold, darkgreen from sphinx.util.display import progress_message, status_iterator from sphinx.util.docutils import SphinxFileOutput, new_document from sphinx.util.fileutil import copy_asset_file diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 83f45e4719c..89a3543e822 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -24,13 +24,7 @@ from sphinx.locale import __ from sphinx.transforms.post_transforms import SphinxPostTransform from sphinx.util import encode_uri, logging, requests -from sphinx.util.console import ( # type: ignore[attr-defined] - darkgray, - darkgreen, - purple, - red, - turquoise, -) +from sphinx.util.console import darkgray, darkgreen, purple, red, turquoise from sphinx.util.http_date import rfc1123_to_epoch from sphinx.util.nodes import get_node_line diff --git a/sphinx/builders/manpage.py b/sphinx/builders/manpage.py index bf01d017c24..93b381d3864 100644 --- a/sphinx/builders/manpage.py +++ b/sphinx/builders/manpage.py @@ -13,7 +13,7 @@ from sphinx.builders import Builder from sphinx.locale import __ from sphinx.util import logging -from sphinx.util.console import darkgreen # type: ignore[attr-defined] +from sphinx.util.console import darkgreen from sphinx.util.display import progress_message from sphinx.util.nodes import inline_all_toctrees from sphinx.util.osutil import ensuredir, make_filename_from_project diff --git a/sphinx/builders/singlehtml.py b/sphinx/builders/singlehtml.py index f9ce8cea28d..87590544f6d 100644 --- a/sphinx/builders/singlehtml.py +++ b/sphinx/builders/singlehtml.py @@ -11,7 +11,7 @@ from sphinx.environment.adapters.toctree import global_toctree_for_doc from sphinx.locale import __ from sphinx.util import logging -from sphinx.util.console import darkgreen # type: ignore[attr-defined] +from sphinx.util.console import darkgreen from sphinx.util.display import progress_message from sphinx.util.nodes import inline_all_toctrees diff --git a/sphinx/builders/texinfo.py b/sphinx/builders/texinfo.py index 226ce690fad..8d5a1aa6df0 100644 --- a/sphinx/builders/texinfo.py +++ b/sphinx/builders/texinfo.py @@ -17,7 +17,7 @@ from sphinx.errors import NoUri from sphinx.locale import _, __ from sphinx.util import logging -from sphinx.util.console import darkgreen # type: ignore[attr-defined] +from sphinx.util.console import darkgreen from sphinx.util.display import progress_message, status_iterator from sphinx.util.docutils import new_document from sphinx.util.fileutil import copy_asset_file diff --git a/sphinx/cmd/build.py b/sphinx/cmd/build.py index bf3fa3400a4..be23e0b90a4 100644 --- a/sphinx/cmd/build.py +++ b/sphinx/cmd/build.py @@ -22,12 +22,7 @@ from sphinx.errors import SphinxError, SphinxParallelError from sphinx.locale import __ from sphinx.util._io import TeeStripANSI -from sphinx.util.console import ( # type: ignore[attr-defined] - color_terminal, - nocolor, - red, - terminal_safe, -) +from sphinx.util.console import color_terminal, nocolor, red, terminal_safe from sphinx.util.docutils import docutils_namespace, patch_docutils from sphinx.util.exceptions import format_exception_cut_frames, save_traceback from sphinx.util.osutil import ensuredir diff --git a/sphinx/cmd/make_mode.py b/sphinx/cmd/make_mode.py index ee237ae15c0..01929469cca 100644 --- a/sphinx/cmd/make_mode.py +++ b/sphinx/cmd/make_mode.py @@ -17,12 +17,7 @@ import sphinx from sphinx.cmd.build import build_main -from sphinx.util.console import ( # type: ignore[attr-defined] - blue, - bold, - color_terminal, - nocolor, -) +from sphinx.util.console import blue, bold, color_terminal, nocolor from sphinx.util.osutil import rmtree if sys.version_info >= (3, 11): diff --git a/sphinx/cmd/quickstart.py b/sphinx/cmd/quickstart.py index fe1a29a14d3..8fb7eebae48 100644 --- a/sphinx/cmd/quickstart.py +++ b/sphinx/cmd/quickstart.py @@ -31,13 +31,7 @@ import sphinx.locale from sphinx import __display_version__, package_dir from sphinx.locale import __ -from sphinx.util.console import ( # type: ignore[attr-defined] - bold, - color_terminal, - colorize, - nocolor, - red, -) +from sphinx.util.console import bold, color_terminal, colorize, nocolor, red from sphinx.util.osutil import ensuredir from sphinx.util.template import SphinxRenderer diff --git a/sphinx/ext/coverage.py b/sphinx/ext/coverage.py index 92afd868ca4..cfe093623c1 100644 --- a/sphinx/ext/coverage.py +++ b/sphinx/ext/coverage.py @@ -19,7 +19,7 @@ from sphinx.builders import Builder from sphinx.locale import __ from sphinx.util import logging -from sphinx.util.console import red # type: ignore[attr-defined] +from sphinx.util.console import red from sphinx.util.inspect import safe_getattr if TYPE_CHECKING: diff --git a/sphinx/ext/doctest.py b/sphinx/ext/doctest.py index fe133900c05..e6ba27439b2 100644 --- a/sphinx/ext/doctest.py +++ b/sphinx/ext/doctest.py @@ -22,7 +22,7 @@ from sphinx.builders import Builder from sphinx.locale import __ from sphinx.util import logging -from sphinx.util.console import bold # type: ignore[attr-defined] +from sphinx.util.console import bold from sphinx.util.docutils import SphinxDirective from sphinx.util.osutil import relpath diff --git a/sphinx/util/console.py b/sphinx/util/console.py index da4647c9586..1e4ce4e810f 100644 --- a/sphinx/util/console.py +++ b/sphinx/util/console.py @@ -22,18 +22,20 @@ def blink(text: str) -> str: ... # NoQA: E704 def black(text: str) -> str: ... # NoQA: E704 def white(text: str) -> str: ... # NoQA: E704 def red(text: str) -> str: ... # NoQA: E704 + def green(text: str) -> str: ... # NoQA: E704 def yellow(text: str) -> str: ... # NoQA: E704 def blue(text: str) -> str: ... # NoQA: E704 - def purple(text: str) -> str: ... # NoQA: E704 - def turquoise(text: str) -> str: ... # NoQA: E704 + def fuchsia(text: str) -> str: ... # NoQA: E704 + def teal(text: str) -> str: ... # NoQA: E704 def darkgray(text: str) -> str: ... # NoQA: E704 def lightgray(text: str) -> str: ... # NoQA: E704 def darkred(text: str) -> str: ... # NoQA: E704 + def darkgreen(text: str) -> str: ... # NoQA: E704 def brown(text: str) -> str: ... # NoQA: E704 def darkblue(text: str) -> str: ... # NoQA: E704 - def fuchsia(text: str) -> str: ... # NoQA: E704 - def teal(text: str) -> str: ... # NoQA: E704 + def purple(text: str) -> str: ... # NoQA: E704 + def turquoise(text: str) -> str: ... # NoQA: E704 # fmt: on try: diff --git a/sphinx/util/display.py b/sphinx/util/display.py index 967c8057313..3cb8d9729b4 100644 --- a/sphinx/util/display.py +++ b/sphinx/util/display.py @@ -5,7 +5,7 @@ from sphinx.locale import __ from sphinx.util import logging -from sphinx.util.console import bold, color_terminal # type: ignore[attr-defined] +from sphinx.util.console import bold, color_terminal if False: from collections.abc import Iterable, Iterator From 73f3d882f73ab9105c6ef082f3fb5ea8cb6f9f81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 30 Mar 2024 11:06:17 +0100 Subject: [PATCH 07/66] add tests for ANSI strippers --- tests/test_util/test_util_console.py | 119 +++++++++++++++++++++------ 1 file changed, 93 insertions(+), 26 deletions(-) diff --git a/tests/test_util/test_util_console.py b/tests/test_util/test_util_console.py index e9f878d9b26..4055d7d0235 100644 --- a/tests/test_util/test_util_console.py +++ b/tests/test_util/test_util_console.py @@ -1,7 +1,6 @@ from __future__ import annotations import itertools -import string from typing import TYPE_CHECKING import pytest @@ -11,7 +10,14 @@ if TYPE_CHECKING: from collections.abc import Sequence - from typing import Any, Final + from typing import Any, Final, TypeVar + + _T = TypeVar('_T') + + Style = str + """An ANSI style (color or format) known by :mod:`sphinx.util.console`.""" + AnsiCode = str + """An ANSI escape sequence.""" ESC: Final[str] = '\x1b' CSI: Final[str] = '\x1b[' @@ -20,17 +26,35 @@ def osc_title(title: str) -> str: + """OSC string for changing the terminal title.""" return f'{OSC}2;{title}{BELL}' -def insert_ansi(text: str, codes: list[str]) -> str: +def insert_ansi(text: str, codes: Sequence[AnsiCode], *, reset: bool = False) -> str: + """Add ANSI escape sequences codes to *text*. + + If *reset* is True, the reset code is added at the end. + + :param text: The text to decorate. + :param codes: A list of ANSI esc. seq. to use deprived of their CSI prefix. + :param reset: Indicate whether to add the reset esc. seq. + :return: The decorated text. + """ for code in codes: - text = f'{CSI}{code}{text}' + text = f'{code}{text}' + if reset: + text = term.reset(text) return text -def apply_style(text: str, style: list[str]) -> str: - for code in style: +def apply_style(text: str, codes: Sequence[AnsiCode | Style]) -> str: + """Apply one or more ANSI esc. seq. to *text*. + + Each item in *codes* can either be a color name (e.g., 'blue'), + a text decoration (e.g., 'blink') or an ANSI esc. seq. deprived + of its CSI prefix (e.g., '34m'). + """ + for code in codes: if code in term.codes: text = term.colorize(code, text) else: @@ -38,9 +62,30 @@ def apply_style(text: str, style: list[str]) -> str: return text -def poweroder(seq: Sequence[Any], *, permutations: bool = True) -> list[tuple[Any, ...]]: - generator = itertools.permutations if permutations else itertools.combinations - return list(itertools.chain.from_iterable((generator(seq, i) for i in range(len(seq))))) +def powerset( + elems: Sequence[_T], *, n: int | None = None, total: bool = True +) -> list[tuple[_T, ...]]: + r"""Generate the powerset over *seq*. + + :param elems: The elements to get the powerset over. + :param n: Optional maximum size of a subset. + :param total: If false, quotient the result by :math:`\mathfrak{S}_n`. + + Example: + ------- + + .. code-block:: python + + powerset([1, 2], total=True) + [(), (1,), (2,), (1, 2), (2, 1)] + + powerset([1, 2], total=False) + [(), (1,), (2,), (1, 2)] + """ + if n is None: + n = len(elems) + gen = itertools.permutations if total else itertools.combinations + return list(itertools.chain.from_iterable(gen(elems, i) for i in range(n + 1))) @pytest.mark.parametrize('invariant', [ESC, CSI, OSC]) @@ -51,27 +96,47 @@ def test_strip_invariants(invariant: str) -> None: # some color/style codes to use (but not composed) +_STYLES: list[tuple[AnsiCode, ...]] = [ + *[(f'{CSI}{";".join(map(str, s))}m',) for s in [range(s) for s in range(4)]], + *powerset(['blue', 'bold']), +] +# some non-color ESC codes to use (will be composed) +_CNTRLS: list[tuple[AnsiCode, ...]] = powerset([f'{CSI}A', f'{CSI}0G', f'{CSI}1;20;128H']) -_STYLES = ['m', '0m', '2m', '02m', '002m', '40m', '040m', '0;1m', '40;50m', '50;30;40m'] -# some non-color ESC codes to use (will be composed) -_CNTRLS = ['A', '0G', '1;20;128H'] +# For some reason that I (picnixz) do not understand, it is not possible to +# create a mark decorator using pytest.mark.parametrize.with_args(ids=...). +# +# As such, in order not to lose autocompletion from PyCharm, we will pass +# the custom id function to each call to `pytest.mark.parametrize`. +def _clean_id(value: Any) -> str: + if isinstance(value, str) and not value: + return '' + + if isinstance(value, (list, tuple)): + if not value: + return '()' + return '-'.join(map(_clean_id, value)) + + return repr(value) -@pytest.mark.parametrize('prefix', ['', '#', 'def ']) # non-formatted part -@pytest.mark.parametrize('source', [string.printable, BELL]) -@pytest.mark.parametrize('style', [_STYLES, *poweroder(['bold', 'blink', 'blue', 'red'])]) -def test_strip_style(prefix: str, source: str, style: list[str]) -> None: +@pytest.mark.parametrize('prefix', ['', 'raw'], ids=_clean_id) # non-formatted part +@pytest.mark.parametrize('source', ['', 'abc\ndef', BELL], ids=_clean_id) +@pytest.mark.parametrize('style', _STYLES, ids=_clean_id) +def test_strip_style(prefix: str, source: str, style: tuple[AnsiCode, ...]) -> None: expect = prefix + source pretty = prefix + apply_style(source, style) assert strip_colors(pretty) == expect, (pretty, expect) -@pytest.mark.parametrize('prefix', ['', '#', 'def ']) # non-formatted part -@pytest.mark.parametrize('source', ['', 'abc', string.printable]) -@pytest.mark.parametrize('style', [_STYLES, *poweroder(['blue', 'bold'])]) -@pytest.mark.parametrize('cntrl', poweroder(_CNTRLS), ids='-'.join) -def test_strip_cntrl(prefix: str, source: str, style: list[str], cntrl: list[str]) -> None: +@pytest.mark.parametrize('prefix', ['', 'raw'], ids=_clean_id) # non-formatted part +@pytest.mark.parametrize('source', ['', 'abc\ndef'], ids=_clean_id) +@pytest.mark.parametrize('style', _STYLES, ids=_clean_id) +@pytest.mark.parametrize('cntrl', _CNTRLS, ids=_clean_id) +def test_strip_cntrl( + prefix: str, source: str, style: tuple[AnsiCode, ...], cntrl: tuple[AnsiCode, ...] +) -> None: expect = pretty = prefix + apply_style(source, style) # does nothing since there are only color sequences assert strip_control_sequences(pretty) == expect, (pretty, expect) @@ -82,11 +147,13 @@ def test_strip_cntrl(prefix: str, source: str, style: list[str], cntrl: list[str assert strip_control_sequences(pretty) == expect, (pretty, expect) -@pytest.mark.parametrize('prefix', ['', '#', 'def ']) # non-formatted part -@pytest.mark.parametrize('source', ['', 'abc', string.printable]) -@pytest.mark.parametrize('style', [_STYLES, *poweroder(['blue', 'bold'])]) -@pytest.mark.parametrize('cntrl', poweroder(_CNTRLS), ids='-'.join) -def test_strip_ansi(prefix: str, source: str, style: list[str], cntrl: list[str]) -> None: +@pytest.mark.parametrize('prefix', ['', 'raw'], ids=_clean_id) # non-formatted part +@pytest.mark.parametrize('source', ['', 'abc\ndef'], ids=_clean_id) +@pytest.mark.parametrize('style', _STYLES, ids=_clean_id) +@pytest.mark.parametrize('cntrl', _CNTRLS, ids=_clean_id) +def test_strip_ansi( + prefix: str, source: str, style: tuple[AnsiCode, ...], cntrl: tuple[AnsiCode, ...] +) -> None: expect = prefix + source with_style = prefix + apply_style(source, style) From fb92ab56ac0df716025d9297b8e46a7ad8e95ee1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 31 Mar 2024 17:44:54 +0200 Subject: [PATCH 08/66] update --- sphinx/testing/_matcher/options.py | 57 ++++++++- sphinx/testing/_matcher/util.py | 9 +- sphinx/testing/matcher.py | 196 +++++++++++++++++++++-------- sphinx/testing/util.py | 17 +-- tests/test_testing/test_matcher.py | 63 +++++++--- 5 files changed, 256 insertions(+), 86 deletions(-) diff --git a/sphinx/testing/_matcher/options.py b/sphinx/testing/_matcher/options.py index 3376266e4f5..08ae39dee22 100644 --- a/sphinx/testing/_matcher/options.py +++ b/sphinx/testing/_matcher/options.py @@ -5,19 +5,28 @@ from typing import TYPE_CHECKING, TypedDict, final, overload if TYPE_CHECKING: + from collections.abc import Callable, Sequence from typing import Final, Literal, TypeVar, Union + from sphinx.testing._matcher.util import LinePattern + FlagOption = Literal['color', 'ctrl', 'keepends', 'empty', 'compress', 'unique'] StripOption = Literal['strip', 'stripline'] StripChars = Union[bool, str, None] + DeleteOption = Literal['delete'] + DeletePattern = Union[LinePattern, Sequence[LinePattern]] + + FilteringOption = Literal['ignore'] + LinePredicate = Callable[[str], object] + FlavorOption = Literal['flavor'] Flavor = Literal['re', 'fnmatch', 'exact'] # For some reason, mypy does not like Union of Literal, # so we wrap the Literal types inside a bigger Literal. - OptionName = Literal[FlagOption, StripOption, FlavorOption] + OptionName = Literal[FlagOption, StripOption, DeleteOption, FilteringOption, FlavorOption] DT = TypeVar('DT') @@ -95,6 +104,32 @@ class Options(TypedDict, total=False): after empty and duplicated consecutive lines might have been eliminated. """ + delete: DeletePattern + """Strings or patterns to remove from the beginning of the line. + + When :attr:`delete` is a single string, it is considered as a + prefix to remove from the output lines. + + When :attr:`delete` is a single pattern, each line removes the + matching groups. + + When :attr:`delete` consists of one or more elements, either + a string or a :class:`~re.Pattern` objects, then all matching + groups and prefixes are removed until none remains. + + This transformation is applied at the end of the transformation + chain, just before filtering the output lines are filtered with + the :attr:`ignore` predicate + """ + + ignore: LinePredicate | None + """A predicate for filtering the output lines. + + Lines that satisfy this predicate are not included in the output. + + The default is ``None``, meaning that all lines are included. + """ + flavor: Flavor """Indicate how strings are matched against non-compiled patterns. @@ -118,12 +153,18 @@ class CompleteOptions(TypedDict): color: bool ctrl: bool + strip: StripChars stripline: StripChars + keepends: bool empty: bool compress: bool unique: bool + + delete: DeletePattern + ignore: LinePredicate | None + flavor: Flavor @@ -136,6 +177,8 @@ class CompleteOptions(TypedDict): empty=True, compress=False, unique=False, + delete=(), + ignore=None, flavor='exact', ) """The default (read-only) options values.""" @@ -157,9 +200,19 @@ def get_option(options: _OptionsView, name: FlagOption, /) -> bool: ... # NoQA: def get_option(options: _OptionsView, name: FlagOption, default: DT, /) -> bool | DT: ... # NoQA: E704 # strip-like options @overload -def get_option(options: _OptionsView, name: StripOption, /) -> StripChars: ... # NoQA: E501, E704 +def get_option(options: _OptionsView, name: StripOption, /) -> StripChars: ... # NoQA: E704 @overload def get_option(options: _OptionsView, name: StripOption, default: DT, /) -> StripChars | DT: ... # NoQA: E501, E704 +# delete prefix/suffix option +@overload +def get_option(options: _OptionsView, name: DeleteOption, /) -> DeletePattern: ... # NoQA: E704 +@overload +def get_option(options: _OptionsView, name: DeleteOption, default: DT, /) -> DeletePattern | DT: ... # NoQA: E501, E704 +# filtering options +@overload +def get_option(options: _OptionsView, name: FilteringOption, /) -> LinePredicate | None: ... # NoQA: E704 +@overload +def get_option(options: _OptionsView, name: FilteringOption, default: DT, /) -> LinePredicate | None | DT: ... # NoQA: E501, E704 # miscellaneous options @overload def get_option(options: _OptionsView, name: FlavorOption, /) -> Flavor: ... # NoQA: E704 diff --git a/sphinx/testing/_matcher/util.py b/sphinx/testing/_matcher/util.py index 71461f682c3..9c6724f6149 100644 --- a/sphinx/testing/_matcher/util.py +++ b/sphinx/testing/_matcher/util.py @@ -129,14 +129,17 @@ def indent_lines( return [prefix + line for line in lines] -def prettify_patterns(patterns: Sequence[LinePattern], /, *, indent: int = 4) -> str: +def prettify_patterns( + patterns: Sequence[LinePattern], /, *, indent: int = 4, sort: bool = False, +) -> str: """Prettify the *patterns* as a string to print.""" - source = (p if isinstance(p, str) else p.pattern for p in patterns) + lines = (p if isinstance(p, str) else p.pattern for p in patterns) + source = sorted(lines) if sort else lines return indent_source(source, indent=indent, highlight=False) def get_debug_context( - source: Sequence[str], block: Block, /, *, context: int, indent: int = 4 + source: Sequence[str], block: Block, /, context: int, *, indent: int = 4 ) -> list[str]: """Get some context lines around *block* and highlight the *block*. diff --git a/sphinx/testing/matcher.py b/sphinx/testing/matcher.py index 99b6fe2c3b8..6bc0c2c0220 100644 --- a/sphinx/testing/matcher.py +++ b/sphinx/testing/matcher.py @@ -1,10 +1,11 @@ from __future__ import annotations -__all__ = ('Options', 'LineMatcher') +__all__ = ('clean', 'Options', 'LineMatcher') import contextlib import fnmatch import re +from functools import reduce from itertools import starmap from types import MappingProxyType from typing import TYPE_CHECKING, final @@ -18,11 +19,12 @@ if TYPE_CHECKING: from collections.abc import Collection, Generator, Iterable, Iterator, Sequence + from io import StringIO from typing import Literal from typing_extensions import Self, Unpack - from sphinx.testing._matcher.options import CompleteOptions + from sphinx.testing._matcher.options import CompleteOptions, Flavor from sphinx.testing._matcher.util import LinePattern PatternType = Literal['line', 'block'] @@ -76,11 +78,38 @@ def clean(text: str, /, **options: Unpack[Options]) -> tuple[str, ...]: elif get_option(options, 'compress'): lines = util.unique_justseen(lines) + if delete := get_option(options, 'delete'): + flavor = get_option(options, 'flavor') + patterns = _translate(_to_line_patterns(delete), flavor=flavor) + # ensure that we are using the beginning of the string + compiled = [re.compile(rf'^{p}') if isinstance(p, str) else p for p in patterns] + + def sub(line: str, pattern: re.Pattern[str]) -> str: + return pattern.sub('', line) + + def reduction(line: str) -> str: + temp = reduce(sub, compiled, line) + while line != temp: + line, temp = temp, reduce(sub, compiled, temp) + return temp + + lines = map(reduction, lines) + + if callable(ignore := get_option(options, 'ignore')): + lines = (line for line in lines if not ignore(line)) + return tuple(lines) -def _to_lines_pattern(expect: LinePattern | Collection[LinePattern]) -> Sequence[LinePattern]: - return [expect] if isinstance(expect, (str, re.Pattern)) else list(expect) +def _to_line_patterns(expect: LinePattern | Collection[LinePattern]) -> Sequence[LinePattern]: + """Make *pattern* compatible for line-matching.""" + if isinstance(expect, (str, re.Pattern)): + return [expect] + + def key(x: str | re.Pattern[str]) -> str: + return x if isinstance(x, str) else x.pattern + + return [expect] if isinstance(expect, (str, re.Pattern)) else sorted(set(expect), key=key) def _to_block_pattern(expect: LinePattern | Sequence[LinePattern]) -> Sequence[LinePattern]: @@ -92,19 +121,36 @@ def _to_block_pattern(expect: LinePattern | Sequence[LinePattern]) -> Sequence[L return expect +def _translate(patterns: Iterable[LinePattern], *, flavor: Flavor) -> Iterable[LinePattern]: + if flavor == 'fnmatch': + return [fnmatch.translate(p) if isinstance(p, str) else p for p in patterns] + + if flavor == 'exact': + return [re.escape(p) if isinstance(p, str) else p for p in patterns] + + return patterns + + +def _compile(patterns: Iterable[LinePattern], *, flavor: Flavor) -> Sequence[re.Pattern[str]]: + patterns = _translate(patterns, flavor=flavor) + # mypy does not like map + re.compile() although it is correct but + # this is likely due to https://github.com/python/mypy/issues/11880 + return [re.compile(pattern) for pattern in patterns] + + @final class LineMatcher: """Helper object for matching output lines.""" __slots__ = ('_content', '_options', '_stack') - def __init__(self, content: str, /, **options: Unpack[Options]) -> None: + def __init__(self, content: str | StringIO, /, **options: Unpack[Options]) -> None: """Construct a :class:`LineMatcher` for the given string content. :param content: The source string. :param options: The matcher options. """ - self._content = content + self._content = content if isinstance(content, str) else content.getvalue() # always complete the set of options for this object self._options: CompleteOptions = DEFAULT_OPTIONS | options # stack of cached cleaned lines (with a possible indirection) @@ -192,32 +238,41 @@ def lines(self) -> tuple[str, ...]: assert isinstance(cached, tuple) return cached - def match(self, expect: LinePattern | Collection[LinePattern], /) -> Sequence[Line]: - """Same as :meth:`itermatch` but returns a sequence of lines.""" - return list(self.itermatch(expect)) + def find( + self, expect: LinePattern | Collection[LinePattern], /, *, flavor: Flavor | None = None + ) -> Sequence[Line]: + """Same as :meth:`iterfind` but returns a sequence of lines.""" + return list(self.iterfind(expect, flavor=flavor)) - def itermatch(self, expect: LinePattern | Collection[LinePattern], /) -> Iterator[Line]: + def iterfind( + self, expect: LinePattern | Collection[LinePattern], /, *, flavor: Flavor | None = None + ) -> Iterator[Line]: """Yield the lines that match one (or more) of the given patterns. When one or more patterns are given, the order of evaluation is the same as they are given (or arbitrary if they are given in a set). """ - patterns = _to_lines_pattern(expect) - matchers = [pattern.match for pattern in self.__compile(patterns)] + patterns = _to_line_patterns(expect) + matchers = [pattern.match for pattern in self.__compile(patterns, flavor=flavor)] def predicate(line: Line) -> bool: - return any(matcher(str(line)) for matcher in matchers) + return any(matcher(line.buffer) for matcher in matchers) yield from filter(predicate, self) - def find(self, expect: str | Sequence[LinePattern], /) -> Sequence[Block]: - """Same as :meth:`iterfind` but returns a sequence of blocks.""" - return list(self.iterfind(expect)) + def find_blocks( + self, expect: str | Sequence[LinePattern], /, *, flavor: Flavor | None = None + ) -> Sequence[Block]: + """Same as :meth:`iterfind_blocks` but returns a sequence of blocks.""" + return list(self.iterfind_blocks(expect, flavor=flavor)) - def iterfind(self, expect: str | Sequence[LinePattern], /) -> Iterator[Block]: + def iterfind_blocks( + self, expect: str | Sequence[LinePattern], /, *, flavor: Flavor | None = None + ) -> Iterator[Block]: """Yield non-overlapping blocks matching the given line patterns. :param expect: The line patterns that a block must satisfy. + :param flavor: Optional temporary flavor for string patterns. :return: An iterator on the matching blocks. When *expect* is a single string, it is split into lines, each of @@ -236,7 +291,7 @@ def iterfind(self, expect: str | Sequence[LinePattern], /) -> Iterator[Block]: if (width := len(patterns)) > len(lines): return - compiled_patterns = self.__compile(patterns) + compiled_patterns = self.__compile(patterns, flavor=flavor) block_iterator = enumerate(util.windowed(lines, width)) for start, block in block_iterator: @@ -251,49 +306,75 @@ def iterfind(self, expect: str | Sequence[LinePattern], /) -> Iterator[Block]: # advance by the block size - 1 only. util.consume(block_iterator, width - 1) + # assert methods + def assert_match( - self, expect: LinePattern | Collection[LinePattern], /, *, count: int | None = None + self, + expect: LinePattern | Collection[LinePattern], + /, + *, + count: int | None = None, + flavor: Flavor | None = None, ) -> None: """Assert that there exist one or more lines matching *pattern*. :param expect: One or more patterns the lines must satisfy. :param count: If specified, the exact number of matching lines. + :param flavor: Optional temporary flavor for string patterns. """ - patterns = _to_lines_pattern(expect) - self._assert_found('line', patterns, count=count) - - def assert_not_match( - self, expect: LinePattern | Collection[LinePattern], /, *, context: int = 3 + patterns = _to_line_patterns(expect) + self._assert_found('line', patterns, count=count, flavor=flavor) + + def assert_no_match( + self, + expect: LinePattern | Collection[LinePattern], + /, + *, + context: int = 3, + flavor: Flavor | None = None, ) -> None: """Assert that there are no lines matching *pattern*. :param expect: One or more patterns the lines must not satisfy. :param context: Number of lines to print around a failing line. + :param flavor: Optional temporary flavor for string patterns. """ - patterns = _to_lines_pattern(expect) - self._assert_not_found('line', patterns, context_size=context) - - def assert_block( - self, expect: str | Sequence[LinePattern], /, *, count: int | None = None + patterns = _to_line_patterns(expect) + self._assert_not_found('line', patterns, context_size=context, flavor=flavor) + + def assert_lines( + self, + expect: str | Sequence[LinePattern], + /, + *, + count: int | None = None, + flavor: Flavor | None = None, ) -> None: """Assert that there exist one or more blocks matching the *patterns*. :param expect: The line patterns that a block must satisfy. :param count: The number of blocks that should be found. + :param flavor: Optional temporary flavor for string patterns. When *expect* is a single string, it is split into lines, each of which corresponding to the pattern a block's line must satisfy. """ patterns = _to_block_pattern(expect) - self._assert_found('block', patterns, count=count) - - def assert_not_block( - self, expect: str | Sequence[LinePattern], /, *, context: int = 1 + self._assert_found('block', patterns, count=count, flavor=flavor) + + def assert_no_lines( + self, + expect: str | Sequence[LinePattern], + /, + *, + context: int = 3, + flavor: Flavor | None = None, ) -> None: """Assert that no block matches the *patterns*. :param expect: The line patterns that a block must satisfy. :param context: Number of lines to print around a failing block. + :param flavor: Optional temporary flavor for string patterns. When *expect* is a single string, it is split into lines, each of which corresponding to the pattern a block's line must satisfy. @@ -301,12 +382,18 @@ def assert_not_block( Use :data:`sys.maxsize` to show all capture lines. """ patterns = _to_block_pattern(expect) - self._assert_not_found('block', patterns, context_size=context) + self._assert_not_found('block', patterns, context_size=context, flavor=flavor) def _assert_found( - self, what: PatternType, /, patterns: Sequence[LinePattern], *, count: int | None + self, + what: PatternType, + /, + patterns: Sequence[LinePattern], + *, + count: int | None, + flavor: Flavor | None = None, ) -> None: - blocks = self.iterfind(patterns) + blocks = self.iterfind_blocks(patterns, flavor=flavor) if count is None: if next(blocks, None): @@ -314,7 +401,7 @@ def _assert_found( keepends = get_option(self.options, 'keepends') ctx = util.highlight(self.lines(), keepends=keepends) - pat = util.prettify_patterns(patterns) + pat = util.prettify_patterns(patterns, sort=what == 'line') logs = [f'{what} pattern', pat, 'not found in', ctx] pytest.fail('\n\n'.join(logs)) @@ -324,36 +411,35 @@ def _assert_found( keepends = get_option(self.options, 'keepends') ctx = util.highlight(self.lines(), indices, keepends=keepends) - pat = util.prettify_patterns(patterns) + pat = util.prettify_patterns(patterns, sort=what == 'line') noun = util.plural_form(what, count) logs = [f'found {found} != {count} {noun} matching', pat, 'in', ctx] pytest.fail('\n\n'.join(logs)) def _assert_not_found( - self, what: PatternType, /, patterns: Sequence[LinePattern], *, context_size: int + self, + what: PatternType, + /, + patterns: Sequence[LinePattern], + *, + context_size: int, + flavor: Flavor | None = None, ) -> None: lines = self.lines() if (count := len(patterns)) > len(lines): return - compiled_patterns = self.__compile(patterns) + compiled_patterns = self.__compile(patterns, flavor=flavor) for start, block in enumerate(util.windowed(lines, count)): if all(pattern.match(line) for pattern, line in zip(compiled_patterns, block)): - pattern = util.prettify_patterns(patterns) - context = util.get_debug_context( - lines, Block(block, start), context=context_size - ) - logs = [f'{what} pattern', pattern, 'found in', '\n'.join(context)] + pat = util.prettify_patterns(patterns, sort=what == 'line') + ctx = util.get_debug_context(lines, Block(block, start), context_size) + logs = [f'{what} pattern', pat, 'found in', '\n'.join(ctx)] pytest.fail('\n\n'.join(logs)) - def __compile(self, patterns: Iterable[LinePattern], /) -> Sequence[re.Pattern[str]]: - flavor = get_option(self.options, 'flavor') - if flavor == 'fnmatch': - patterns = [fnmatch.translate(p) if isinstance(p, str) else p for p in patterns] - elif flavor == 'exact': - patterns = [re.escape(p) if isinstance(p, str) else p for p in patterns] - - # mypy does not like map + re.compile() although it is correct but - # this is likely due to https://github.com/python/mypy/issues/11880 - return [re.compile(pattern) for pattern in patterns] + def __compile( + self, patterns: Iterable[LinePattern], *, flavor: Flavor | None + ) -> Sequence[re.Pattern[str]]: + flavor = get_option(self.options, 'flavor') if flavor is None else flavor + return _compile(patterns, flavor=flavor) diff --git a/sphinx/testing/util.py b/sphinx/testing/util.py index a6a0519b33a..131bfe1a998 100644 --- a/sphinx/testing/util.py +++ b/sphinx/testing/util.py @@ -29,6 +29,9 @@ from xml.etree.ElementTree import ElementTree from docutils.nodes import Node + from typing_extensions import Unpack + + from sphinx.testing._matcher.options import Options def assert_node(node: Node, cls: Any = None, xpath: str = "", **kwargs: Any) -> None: @@ -191,15 +194,13 @@ def warning(self) -> StringIO: assert isinstance(self._warning, StringIO) return self._warning - @property - def stdout(self) -> LineMatcher: - """The line-matcher object on the status messages.""" - return LineMatcher(self.status.getvalue()) + def stdout(self, /, **options: Unpack[Options]) -> LineMatcher: + """Create a line matcher object for the status messages.""" + return LineMatcher(self.status, **options) - @property - def stderr(self) -> LineMatcher: - """The line-matcher object on the warning messages.""" - return LineMatcher(self.warning.getvalue()) + def stderr(self, /, **options: Unpack[Options]) -> LineMatcher: + """Create a line matcher object for the warning messages.""" + return LineMatcher(self.warning, **options) def cleanup(self, doctrees: bool = False) -> None: sys.path[:] = self._saved_path diff --git a/tests/test_testing/test_matcher.py b/tests/test_testing/test_matcher.py index b89049f8879..e5e3fe488ba 100644 --- a/tests/test_testing/test_matcher.py +++ b/tests/test_testing/test_matcher.py @@ -18,7 +18,7 @@ from collections.abc import Mapping, Sequence from typing import Final - from sphinx.testing._matcher.options import Flavor + from sphinx.testing._matcher.options import Flavor, OptionName from sphinx.testing._matcher.util import LinePattern @@ -136,6 +136,9 @@ class TestClean: empty=True, compress=False, unique=False, + delete_prefix=(), + delete_suffix=(), + ignore=None, flavor='exact', ) @@ -280,19 +283,25 @@ def test_matcher_default_options(options: Mapping[str, object]) -> None: """Check the synchronization of default options and classes in Sphinx.""" processed = set() - def check(option: str, default: object) -> None: + def check(option: OptionName, default: object) -> None: assert option in options assert options[option] == default processed.add(option) check('color', False) check('ctrl', True) + check('strip', True) check('stripline', False) + check('keepends', False) check('empty', True) check('compress', False) check('unique', False) + + check('delete', ()) + check('ignore', None) + check('flavor', 'exact') # check that there are no left over options @@ -343,16 +352,16 @@ def test_matcher_cache(): assert stack[1] == ('hello', '', 'world') -def test_matcher_match(): +def test_matcher_find(): lines = ['hello', 'world', 'yay', '!', '!', '!'] matcher = LineMatcher.parse(lines, flavor='exact') - assert matcher.match({'hello', 'yay'}) == [('hello', 0), ('yay', 2)] + assert matcher.find({'hello', 'yay'}) == [('hello', 0), ('yay', 2)] -def test_matcher_find(): +def test_matcher_find_blocks(): lines = ['hello', 'world', 'yay', 'hello', 'world', '!', 'yay'] matcher = LineMatcher.parse(lines) - assert matcher.find(['hello', 'world']) == [ + assert matcher.find_blocks(['hello', 'world']) == [ [('hello', 0), ('world', 1)], [('hello', 3), ('world', 4)], ] @@ -373,26 +382,44 @@ def test_matcher_flavor( expect: Sequence[tuple[str, int]], ) -> None: matcher = LineMatcher.parse(lines, flavor=flavor) - assert matcher.match(pattern) == expect + assert matcher.find(pattern) == expect + + +def test_assert_match(): + matcher = LineMatcher.parse(['a', 'b', 'c', 'd']) + matcher.assert_match('.+', flavor='re') + matcher.assert_match('[abcd]', flavor='fnmatch') + + +def test_assert_match_debug(): + pass + + +def test_assert_no_match(): + pass + + +def test_assert_no_match_debug(): + pass @pytest.mark.parametrize('dedup', range(3)) @pytest.mark.parametrize(('maxsize', 'start', 'count'), [(10, 3, 4)]) -def test_block_exists(maxsize, start, count, dedup): +def test_assert_lines(maxsize, start, count, dedup): # 'maxsize' might be smaller than start + (dedup + 1) * count # but it is fine since stop indices are clamped internally source = Source(maxsize, start, count, dedup=dedup) matcher = LineMatcher(source.text) # the main block is matched exactly once - matcher.assert_block(source.main, count=1) + matcher.assert_lines(source.main, count=1) assert source.base * source.ncopy == source.main - matcher.assert_block(source.base, count=source.ncopy) + matcher.assert_lines(source.base, count=source.ncopy) for subidx in range(1, count + 1): # check that the sub-blocks are matched correctly subblock = [Source.block_line(start + i) for i in range(subidx)] - matcher.assert_block(subblock, count=source.ncopy) + matcher.assert_lines(subblock, count=source.ncopy) @pytest.mark.parametrize( @@ -458,16 +485,16 @@ def test_block_exists(maxsize, start, count, dedup): ), ], ) -def test_block_exists_error(pattern, count, expect): +def test_assert_lines_debug(pattern, count, expect): lines = ['a', 'b', 'c', 'a', 'b', 'd'] matcher = LineMatcher.parse(lines) if expect is None: - matcher.assert_block(pattern, count=count) + matcher.assert_lines(pattern, count=count) return with pytest.raises(_pytest.outcomes.Failed, match='.*') as exc_info: - matcher.assert_block(pattern, count=count) + matcher.assert_lines(pattern, count=count) actual = exc_info.value.msg assert actual is not None @@ -483,7 +510,7 @@ def test_block_exists_error(pattern, count, expect): ]) # fmt: on @pytest.mark.parametrize('dedup', range(3)) -def test_block_not_exist(maxsize, start, count, dedup): +def test_assert_no_lines(maxsize, start, count, dedup): # 'maxsize' might be smaller than start + (dedup + 1) * count # but it is fine since stop indices are clamped internally source = Source(maxsize, start, count, dedup=dedup) @@ -491,7 +518,7 @@ def test_block_not_exist(maxsize, start, count, dedup): # do not use 'match' with pytest.raises() since the diff # output is hard to parse, but use == with lists instead with pytest.raises(_pytest.outcomes.Failed, match='.*') as exc_info: - matcher.assert_not_block(source.main, context=0) + matcher.assert_no_lines(source.main, context=0) actual = exc_info.value.msg assert actual is not None @@ -530,7 +557,7 @@ def test_block_not_exist(maxsize, start, count, dedup): (20, 8, 2, 4, 5, 0, 3), ], ) -def test_block_not_exist_debug( +def test_assert_no_lines_debug( maxsize, start, count, dedup, omit_prev, omit_next, context_size ): source = Source(maxsize, start, count, dedup=dedup) @@ -538,7 +565,7 @@ def test_block_not_exist_debug( # do not use 'match' with pytest.raises() since the diff # output is hard to parse, but use == with lists instead with pytest.raises(_pytest.outcomes.Failed, match='.*') as exc_info: - matcher.assert_not_block(source.main, context=context_size) + matcher.assert_no_lines(source.main, context=context_size) actual = exc_info.value.msg assert actual is not None From f4309fbfb67766ced79f83e06de6ff8ccb268bd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 31 Mar 2024 19:49:18 +0200 Subject: [PATCH 09/66] cleanup --- sphinx/testing/_matcher/options.py | 10 +++++----- tests/test_testing/test_matcher.py | 3 +-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/sphinx/testing/_matcher/options.py b/sphinx/testing/_matcher/options.py index 08ae39dee22..f2db6936d5c 100644 --- a/sphinx/testing/_matcher/options.py +++ b/sphinx/testing/_matcher/options.py @@ -106,19 +106,19 @@ class Options(TypedDict, total=False): delete: DeletePattern """Strings or patterns to remove from the beginning of the line. - + When :attr:`delete` is a single string, it is considered as a prefix to remove from the output lines. When :attr:`delete` is a single pattern, each line removes the matching groups. - + When :attr:`delete` consists of one or more elements, either a string or a :class:`~re.Pattern` objects, then all matching groups and prefixes are removed until none remains. - This transformation is applied at the end of the transformation - chain, just before filtering the output lines are filtered with + This transformation is applied at the end of the transformation + chain, just before filtering the output lines are filtered with the :attr:`ignore` predicate """ @@ -126,7 +126,7 @@ class Options(TypedDict, total=False): """A predicate for filtering the output lines. Lines that satisfy this predicate are not included in the output. - + The default is ``None``, meaning that all lines are included. """ diff --git a/tests/test_testing/test_matcher.py b/tests/test_testing/test_matcher.py index e5e3fe488ba..54a856e4e38 100644 --- a/tests/test_testing/test_matcher.py +++ b/tests/test_testing/test_matcher.py @@ -136,8 +136,7 @@ class TestClean: empty=True, compress=False, unique=False, - delete_prefix=(), - delete_suffix=(), + delete=(), ignore=None, flavor='exact', ) From b9ce7ec2ab280bcbc162afca2f0870ebbccf6706 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 31 Mar 2024 19:51:17 +0200 Subject: [PATCH 10/66] split tests and utils into modules --- sphinx/testing/_matcher/__init__.py | 6 +- sphinx/testing/_matcher/buffer.py | 34 +- sphinx/testing/_matcher/cleaner.py | 181 +++++++++++ sphinx/testing/_matcher/engine.py | 112 +++++++ sphinx/testing/_matcher/options.py | 44 ++- sphinx/testing/_matcher/util.py | 13 +- sphinx/testing/matcher.py | 167 ++-------- tests/test_testing/test_matcher.py | 351 +++++++-------------- tests/test_testing/test_matcher_buffer.py | 133 ++++++++ tests/test_testing/test_matcher_cleaner.py | 28 ++ tests/test_testing/test_matcher_options.py | 52 +++ 11 files changed, 706 insertions(+), 415 deletions(-) create mode 100644 sphinx/testing/_matcher/cleaner.py create mode 100644 sphinx/testing/_matcher/engine.py create mode 100644 tests/test_testing/test_matcher_buffer.py create mode 100644 tests/test_testing/test_matcher_cleaner.py create mode 100644 tests/test_testing/test_matcher_options.py diff --git a/sphinx/testing/_matcher/__init__.py b/sphinx/testing/_matcher/__init__.py index 9aa65596b80..c7552fd3ebf 100644 --- a/sphinx/testing/_matcher/__init__.py +++ b/sphinx/testing/_matcher/__init__.py @@ -1,5 +1,7 @@ """Private package for :class:`~sphinx.testing.matcher.LineMatcher`. -Any object provided in this package or any of its submodules can -be removed, changed, moved without prior notice. +Unless explicitly exported in ``__all__`` or stated otherwise at a module +level, any object provided in this package or any of its submodules is +considered an implementation detail and can be removed, changed, moved +without prior notice (even if its name is public). """ diff --git a/sphinx/testing/_matcher/buffer.py b/sphinx/testing/_matcher/buffer.py index 244d6aab445..049bc4a6d58 100644 --- a/sphinx/testing/_matcher/buffer.py +++ b/sphinx/testing/_matcher/buffer.py @@ -19,17 +19,17 @@ _T = TypeVar('_T', bound=Sequence[str]) -class _TextView(Generic[_T], abc.ABC): +class TextView(Generic[_T], abc.ABC): # add __weakref__ to allow the object being weak-referencable __slots__ = ('_buffer', '_offset', '__weakref__') def __init__(self, buffer: _T, offset: int = 0, /) -> None: if not isinstance(offset, int): - msg = f'offset must be an integer, got {offset!r}' + msg = f'offset must be an integer, got: {offset!r}' raise TypeError(msg) if offset < 0: - msg = f'offset must be >= 0, got {offset!r}' + msg = f'offset must be >= 0, got: {offset!r}' raise ValueError(msg) self._buffer = buffer @@ -79,7 +79,7 @@ def __gt__(self, other: object, /) -> bool: @final -class Line(_TextView[str]): +class Line(TextView[str]): """A line found by :meth:`~sphinx.testing.matcher.LineMatcher.match`. A :class:`Line` can be compared to :class:`str`, :class:`Line` objects or @@ -91,14 +91,14 @@ class Line(_TextView[str]): objects instead if the offset is not relevant. """ - def __init__(self, line: str, offset: int = 0, /) -> None: + def __init__(self, line: str = '', offset: int = 0, /) -> None: """Construct a :class:`Line` object. The *line* must be a native :class:`str` object. """ if type(line) is not str: # force the line to be a true string and not another string-like - msg = f'expecting a native string, got {line!r}' + msg = f'expecting a native string, got: {line!r}' raise TypeError(msg) super().__init__(line, offset) @@ -209,7 +209,7 @@ def __cast(self, other: object, /) -> Self | object: @final -class Block(_TextView[tuple[str, ...]]): +class Block(TextView[tuple[str, ...]]): """Block found by :meth:`~sphinx.testing.matcher.LineMatcher.find`. A block can be compared to list of strings (e.g., ``['line1', 'line2']``), @@ -229,19 +229,14 @@ class Block(_TextView[tuple[str, ...]]): assert Block(['a', 'b'], 2) == [Line('a', 2), Line('b', 3)] """ - def __init__(self, buffer: Iterable[str], offset: int = 0, /) -> None: + def __init__(self, buffer: Iterable[str] = (), offset: int = 0, /) -> None: buffer = tuple(buffer) for line in buffer: - if not isinstance(line, str): - err = f'expecting a native string, got {line!r}' + if type(line) is not str: + err = f'expecting a native string, got: {line!r}' raise TypeError(err) super().__init__(buffer, offset) - @property - def length(self) -> int: - """The number of lines in this block.""" - return len(self) - @classmethod def view(cls, index: int, buffer: Iterable[str], /) -> Self: """Alternative constructor flipping the order of the arguments. @@ -257,6 +252,11 @@ def view(cls, index: int, buffer: Iterable[str], /) -> Self: """ return cls(buffer, index) + @property + def length(self) -> int: + """The number of lines in this block.""" + return len(self) + @property def window(self) -> slice: """A slice representing this block in its source. @@ -389,8 +389,8 @@ def __cast( if len(other) == 2 and isinstance(other[0], Sequence) and isinstance(other[1], int): # mypy does not know how to deduce that the lenght is 2 if isinstance(other, str): - msg = f'expecting a sequence of lines, got {other!r}' - raise TypeError(msg) + msg = f'expecting a sequence of lines, got: {other!r}' + raise ValueError(msg) return other[0], other[1] return other, self.offset diff --git a/sphinx/testing/_matcher/cleaner.py b/sphinx/testing/_matcher/cleaner.py new file mode 100644 index 00000000000..575f98e8b4d --- /dev/null +++ b/sphinx/testing/_matcher/cleaner.py @@ -0,0 +1,181 @@ +from __future__ import annotations + +__all__ = () + +from functools import reduce +from itertools import filterfalse +from typing import TYPE_CHECKING + +from sphinx.testing._matcher import engine, util +from sphinx.testing._matcher.options import get_option +from sphinx.util.console import strip_colors, strip_control_sequences + +if TYPE_CHECKING: + import re + from collections.abc import Iterable + + from typing_extensions import Unpack + + from sphinx.testing._matcher.options import ( + DeletePattern, + Flavor, + LinePredicate, + Options, + StripChars, + ) + + +def clean_text(text: str, /, **options: Unpack[Options]) -> Iterable[str]: + """Clean a text.""" + ctrl = get_option(options, 'ctrl') + color = get_option(options, 'color') + text = strip_ansi(text, ctrl=ctrl, color=color) + + text = strip_chars(text, get_option(options, 'strip')) + lines = splitlines(text, get_option(options, 'keepends')) + return clean_lines(lines, **options) + + +def clean_lines(lines: Iterable[str], /, **options: Unpack[Options]) -> Iterable[str]: + """Clean a list of lines.""" + lines = strip_lines(lines, get_option(options, 'stripline')) + # Removing empty lines first ensures that serial duplicates can + # be eliminated in one cycle. Inverting the order of operations + # is not possible since empty lines may 'hide' duplicated lines. + empty = get_option(options, 'empty') + compress = get_option(options, 'compress') + unique = get_option(options, 'unique') + lines = filterlines(lines, empty=empty, compress=compress, unique=unique) + + delete = get_option(options, 'delete') + flavor = get_option(options, 'flavor') + lines = prune(lines, delete, flavor=flavor) + + return ignorelines(lines, get_option(options, 'ignore')) + + +def strip_ansi(text: str, /, ctrl: bool = False, color: bool = False) -> str: + """Strip ANSI escape sequences. + + :param text: The text to clean. + :param ctrl: If true, remove non-color ANSI escape sequences. + :param color: If true, remove color ANSI escape sequences. + :return: The cleaned text. + """ + # non-color control sequences must be stripped before colors + text = text if ctrl else strip_control_sequences(text) + text = text if color else strip_colors(text) + return text + + +def strip_chars(text: str, chars: StripChars = True, /) -> str: + """Strip expected characters from *text*.""" + if isinstance(chars, bool): + return text.strip() if chars else text + + if isinstance(chars, str) or chars is None: + return text.strip(chars) + + msg = 'expecting a boolean, a string or None for %r, got: %r' % ('strip', chars) + raise ValueError(msg) + + +def strip_lines(lines: Iterable[str], chars: StripChars = True, /) -> Iterable[str]: + """Call :meth:`str.strip` to each line in *lines*.""" + if isinstance(chars, bool): + return map(str.strip, lines) if chars else lines + + if isinstance(chars, str) or chars is None: + return (line.strip(chars) for line in lines) + + msg = 'expecting a boolean, a string or None for %r, got: %r' % ('stripline', chars) + raise ValueError(msg) + + +def splitlines(text: str, /, keepends: bool = False) -> Iterable[str]: + """Split *text* into lines.""" + return text.splitlines(keepends=keepends) + + +def filterlines( + lines: Iterable[str], + /, + *, + empty: bool = False, + compress: bool = False, + unique: bool = False, +) -> Iterable[str]: + """Filter the lines. + + :param lines: The lines to filter. + :param empty: If true, remove empty lines. + :param unique: If true, remove duplicated lines. + :param compress: If true, remove consecutive duplicated lines. + :return: An iterable of filtered lines. + + By convention, duplicates elimination is performed *after* empty lines + are removed. To reverse the behaviour, consider using:: + + lines = filterlines(lines, compress=True) + lines = filterlines(lines, empty=True) + """ + # Removing empty lines first ensures that serial duplicates can + # be eliminated in one cycle. Inverting the order of operations + # is not possible since empty lines may 'hide' duplicated lines. + if not empty: + lines = filter(None, lines) + + if unique: + # 'compress' has no effect when 'unique' is set + return util.unique_everseen(lines) + + if compress: + return util.unique_justseen(lines) + + return lines + + +def ignorelines(lines: Iterable[str], predicate: LinePredicate | None, /) -> Iterable[str]: + """Ignore lines satisfying the *predicate*. + + :param lines: The lines to filter. + :param predicate: An optional predicate. + :return: An iterable of filtered lines. + """ + return filterfalse(predicate, lines) if callable(predicate) else lines + + +def prune( + lines: Iterable[str], delete: DeletePattern, /, flavor: Flavor = 'none' +) -> Iterable[str]: + r"""Remove substrings from *lines* satisfying some patterns. + + :param lines: The lines to transform. + :param delete: One or more prefixes to remove or substitution patterns. + :param flavor: Indicate the flavor of prefix regular expressions. + :return: An iterable of transformed lines. + + Usage:: + + lines = prune(['1111a', 'b'], r'\d+', flavor='re') + assert list(lines) == ['a', 'b'] + + lines = prune(['a123b', 'c123d'], re.compile(r'\d+')) + assert list(lines) == ['ab', 'cd'] + """ + delete_patterns = engine.to_line_patterns(delete) + patterns = engine.translate(delete_patterns, flavor=flavor) + # ensure that we are using the beginning of the string (this must + # be done *after* the regular expression translation, since fnmatch + # patterns do not support 'start of the string' syntax) + patterns = (engine.transform(lambda p: rf'^{p}', p) for p in patterns) + compiled = engine.compile(patterns, flavor='re') + + def sub(line: str, pattern: re.Pattern[str]) -> str: + return pattern.sub('', line) + + for line in lines: + ret = reduce(sub, compiled, line) + while line != ret: + line, ret = ret, reduce(sub, compiled, ret) + yield ret diff --git a/sphinx/testing/_matcher/engine.py b/sphinx/testing/_matcher/engine.py new file mode 100644 index 00000000000..2c0c9be1935 --- /dev/null +++ b/sphinx/testing/_matcher/engine.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +__all__ = () + +import fnmatch +import re +from typing import TYPE_CHECKING, overload + +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Sequence + from typing import TypeVar + + from sphinx.testing._matcher.options import Flavor + from sphinx.testing._matcher.util import LinePattern + + _LinePatternT = TypeVar('_LinePatternT', str, re.Pattern[str]) + + +def _check_flavor(flavor: Flavor) -> None: + allowed = ('none', 'fnmatch', 're') + if flavor not in allowed: + msg = f'unknown flavor: {flavor!r} (choose from {tuple(map(repr, allowed))})' + raise ValueError(msg) + + +# fmt: off +@overload +def to_line_patterns(expect: _LinePatternT) -> tuple[_LinePatternT]: ... # NoQA: E704 +@overload +def to_line_patterns(expect: Iterable[LinePattern]) -> tuple[LinePattern, ...]: ... # NoQA: E704 +# fmt: on +def to_line_patterns(expect: LinePattern | Iterable[LinePattern]) -> Sequence[LinePattern]: # NoQA: E302 + """Get a read-only sequence of line-matching patterns. + + :param expect: One or more patterns a line should match. + :return: The possible line patterns. + + .. note:: The order of *expect* is not retained and duplicates are removed. + """ + if isinstance(expect, (str, re.Pattern)): + return (expect,) + + def key(x: str | re.Pattern[str]) -> str: + return x if isinstance(x, str) else x.pattern + + # Do not make the patterns unique by their pattern string since + # string patterns compiled in different flavors might not have + # the same underlying pattern's string, e.g.:: + # + # re.compile('a*').pattern != fnmatch.translate('a*') + return tuple(sorted(set(expect), key=key)) + + +# fmt: off +@overload +def to_block_pattern(expect: str) -> tuple[str, ...]: ... # NoQA: E704 +@overload +def to_block_pattern(expect: re.Pattern[str]) -> tuple[re.Pattern[str]]: ... # NoQA: E704 +@overload +def to_block_pattern(expect: Sequence[LinePattern]) -> Sequence[LinePattern]: ... # NoQA: E704 +# fmt: on +def to_block_pattern(expect: LinePattern | Iterable[LinePattern]) -> Sequence[LinePattern]: # NoQA: E302 + """Get a read-only sequence for a s single block pattern. + + :param expect: A single string, a single pattern or one or more patterns. + :return: The line patterns of the block. + """ + if isinstance(expect, str): + return tuple(expect.splitlines()) + if isinstance(expect, re.Pattern): + return (expect,) + return tuple(expect) + + +# fmt: off +@overload +def transform(fn: Callable[[str], str], x: str) -> str: ... # NoQA: E704 +@overload +def transform(fn: Callable[..., str], x: re.Pattern[str]) -> re.Pattern[str]: ... # NoQA: E704 +# fmt: on +def transform(fn: Callable[..., str], x: LinePattern) -> LinePattern: # NoQA: E302 + """Transform regular expressions, leaving compiled patterns untouched.""" + return fn(x) if isinstance(x, str) else x + + +def translate(patterns: Iterable[LinePattern], *, flavor: Flavor) -> Iterable[LinePattern]: + r"""Translate regular expressions in *patterns* according to *flavor*. + + :param patterns: An iterable of patterns to translate if needed. + :return: An iterable of :class:`re`-style patterns. + + Usage:: + + patterns = list(_translate(['a*', re.compile('b')], flavor='fnmatch')) + patterns == ['(?:a.*)\\Z', re.compile('b')] + """ + _check_flavor(flavor) + + if flavor == 'none': + return (transform(re.escape, pattern) for pattern in patterns) + if flavor == 'fnmatch': + return (transform(fnmatch.translate, pattern) for pattern in patterns) + + return patterns + + +def compile(patterns: Iterable[LinePattern], *, flavor: Flavor) -> Sequence[re.Pattern[str]]: + """Compile one or more patterns into :class:`~re.Pattern` objects.""" + patterns = translate(patterns, flavor=flavor) + # mypy does not like map + re.compile() although it is correct but + # this is likely due to https://github.com/python/mypy/issues/11880 + return [re.compile(pattern) for pattern in patterns] diff --git a/sphinx/testing/_matcher/options.py b/sphinx/testing/_matcher/options.py index f2db6936d5c..5a02c408623 100644 --- a/sphinx/testing/_matcher/options.py +++ b/sphinx/testing/_matcher/options.py @@ -1,6 +1,6 @@ from __future__ import annotations -__all__ = ('Options', 'get_option') +__all__ = ('Options',) from typing import TYPE_CHECKING, TypedDict, final, overload @@ -22,7 +22,7 @@ LinePredicate = Callable[[str], object] FlavorOption = Literal['flavor'] - Flavor = Literal['re', 'fnmatch', 'exact'] + Flavor = Literal['re', 'fnmatch', 'none'] # For some reason, mypy does not like Union of Literal, # so we wrap the Literal types inside a bigger Literal. @@ -105,21 +105,28 @@ class Options(TypedDict, total=False): """ delete: DeletePattern - """Strings or patterns to remove from the beginning of the line. + r"""Prefixes or patterns to remove from the output lines. - When :attr:`delete` is a single string, it is considered as a - prefix to remove from the output lines. + The transformation is described for one or more :class:`str` + or :class:`~re.Pattern` objects as follows: - When :attr:`delete` is a single pattern, each line removes the - matching groups. + - Compile :class:`str` pattern into :class:`~re.Pattern` according + to the pattern :attr:`flavor` and remove prefixes matching those + patterns from the output lines. + - Replace substrings in the output lines matching one or more + patterns directly given as :class:`~re.Pattern` objects. - When :attr:`delete` consists of one or more elements, either - a string or a :class:`~re.Pattern` objects, then all matching - groups and prefixes are removed until none remains. + The process is repeated until no output lines starts by any + of the given strings or matches any of the given patterns. This transformation is applied at the end of the transformation chain, just before filtering the output lines are filtered with - the :attr:`ignore` predicate + the :attr:`ignore` predicate. + + Example:: + + clean('abcdA\n1', delete='abcd') == ['A', '1'] + clean('1234A\nxyzt', delete=r'\d+', flavor='re') == ['A', 'xyzt'] """ ignore: LinePredicate | None @@ -135,7 +142,7 @@ class Options(TypedDict, total=False): The allowed values for :attr:`flavor` are: - * ``'exact'`` -- match lines using string equality (the default). + * ``'none'`` -- match lines using string equality (the default). * ``'fnmatch'`` -- match lines using :mod:`fnmatch`-style patterns. * ``'re'`` -- match lines using :mod:`re`-style patterns. @@ -146,7 +153,10 @@ class Options(TypedDict, total=False): @final class CompleteOptions(TypedDict): - """Same as :class:`Options` but as a total dictionary.""" + """Same as :class:`Options` but as a total dictionary. + + :meta private: + """ # Whenever a new option in :class:`Options` is added, do not # forget to add it here and in :data:`DEFAULT_OPTIONS`. @@ -179,7 +189,7 @@ class CompleteOptions(TypedDict): unique=False, delete=(), ignore=None, - flavor='exact', + flavor='none', ) """The default (read-only) options values.""" @@ -220,6 +230,12 @@ def get_option(options: _OptionsView, name: FlavorOption, /) -> Flavor: ... # N def get_option(options: _OptionsView, name: FlavorOption, default: DT, /) -> Flavor | DT: ... # NoQA: E704 # fmt: on def get_option(options: _OptionsView, name: OptionName, /, *default: DT) -> object | DT: # NoQA: E302 + """Get an option value or *default*. + + If *default* is not specified, an internal default value is returned. + + :meta private: + """ if name in options: return options[name] return default[0] if default else DEFAULT_OPTIONS[name] diff --git a/sphinx/testing/_matcher/util.py b/sphinx/testing/_matcher/util.py index 9c6724f6149..42d976a2a78 100644 --- a/sphinx/testing/_matcher/util.py +++ b/sphinx/testing/_matcher/util.py @@ -1,4 +1,8 @@ -"""Private utility functions for :mod:`sphinx.testing.matcher`.""" +"""Private utility functions for :mod:`sphinx.testing.matcher`. + +All objects provided by this module are considered an implementation detail +and are not meant to be used by external libraries. +""" from __future__ import annotations @@ -20,6 +24,7 @@ from sphinx.testing._matcher.buffer import Block LinePattern = Union[str, re.Pattern[str]] + """A regular expression or a compiled pattern.""" _T = TypeVar('_T') @@ -130,7 +135,11 @@ def indent_lines( def prettify_patterns( - patterns: Sequence[LinePattern], /, *, indent: int = 4, sort: bool = False, + patterns: Sequence[LinePattern], + /, + *, + indent: int = 4, + sort: bool = False, ) -> str: """Prettify the *patterns* as a string to print.""" lines = (p if isinstance(p, str) else p.pattern for p in patterns) diff --git a/sphinx/testing/matcher.py b/sphinx/testing/matcher.py index 6bc0c2c0220..6a08786d479 100644 --- a/sphinx/testing/matcher.py +++ b/sphinx/testing/matcher.py @@ -1,25 +1,20 @@ from __future__ import annotations -__all__ = ('clean', 'Options', 'LineMatcher') +__all__ = ('Options', 'LineMatcher') import contextlib -import fnmatch -import re -from functools import reduce from itertools import starmap from types import MappingProxyType -from typing import TYPE_CHECKING, final +from typing import TYPE_CHECKING -import pytest - -from sphinx.testing._matcher import util +from sphinx.testing._matcher import cleaner, engine, util from sphinx.testing._matcher.buffer import Block, Line from sphinx.testing._matcher.options import DEFAULT_OPTIONS, Options, get_option -from sphinx.util.console import strip_colors, strip_control_sequences if TYPE_CHECKING: from collections.abc import Collection, Generator, Iterable, Iterator, Sequence from io import StringIO + from re import Pattern from typing import Literal from typing_extensions import Self, Unpack @@ -30,115 +25,6 @@ PatternType = Literal['line', 'block'] -def clean(text: str, /, **options: Unpack[Options]) -> tuple[str, ...]: - """Split *text* into lines. - - :param text: The text to get the cleaned lines of. - :param options: The processing options. - :return: A list of cleaned lines. - """ - if not get_option(options, 'ctrl'): - # Non-color ANSI esc. seq. must be stripped before colors; - # see :func:`sphinx.util.console.strip_escape_sequences`. - text = strip_control_sequences(text) - - if not get_option(options, 'color'): - text = strip_colors(text) - - chars = get_option(options, 'strip') - if isinstance(chars, bool) and chars: - text = text.strip() - elif isinstance(chars, str) or chars is None: - text = text.strip(chars) - elif chars is not False: - msg = 'expecting a boolean, a string or None for %r, got: %r' % ('strip', chars) - raise TypeError(msg) - - keepends = get_option(options, 'keepends') - lines: Iterable[str] = text.splitlines(keepends=keepends) - - chars = get_option(options, 'stripline') - if isinstance(chars, bool) and chars: - lines = map(str.strip, lines) - elif isinstance(chars, str) or chars is None: - lines = (line.strip(chars) for line in lines) - elif chars is not False: - msg = 'expecting a boolean, a string or None for %r, got: %r' % ('stripline', chars) - raise TypeError(msg) - - # Removing empty lines first ensures that serial duplicates can - # be eliminated in one cycle. Inverting the order of operations - # is not possible since empty lines may 'hide' duplicated lines. - if not get_option(options, 'empty'): - lines = filter(None, lines) - - if get_option(options, 'unique'): - # 'compress' has no effect when 'unique' is set - lines = util.unique_everseen(lines) - elif get_option(options, 'compress'): - lines = util.unique_justseen(lines) - - if delete := get_option(options, 'delete'): - flavor = get_option(options, 'flavor') - patterns = _translate(_to_line_patterns(delete), flavor=flavor) - # ensure that we are using the beginning of the string - compiled = [re.compile(rf'^{p}') if isinstance(p, str) else p for p in patterns] - - def sub(line: str, pattern: re.Pattern[str]) -> str: - return pattern.sub('', line) - - def reduction(line: str) -> str: - temp = reduce(sub, compiled, line) - while line != temp: - line, temp = temp, reduce(sub, compiled, temp) - return temp - - lines = map(reduction, lines) - - if callable(ignore := get_option(options, 'ignore')): - lines = (line for line in lines if not ignore(line)) - - return tuple(lines) - - -def _to_line_patterns(expect: LinePattern | Collection[LinePattern]) -> Sequence[LinePattern]: - """Make *pattern* compatible for line-matching.""" - if isinstance(expect, (str, re.Pattern)): - return [expect] - - def key(x: str | re.Pattern[str]) -> str: - return x if isinstance(x, str) else x.pattern - - return [expect] if isinstance(expect, (str, re.Pattern)) else sorted(set(expect), key=key) - - -def _to_block_pattern(expect: LinePattern | Sequence[LinePattern]) -> Sequence[LinePattern]: - """Make *pattern* compatible for block-matching.""" - if isinstance(expect, str): - return expect.splitlines() - if isinstance(expect, re.Pattern): - return [expect] - return expect - - -def _translate(patterns: Iterable[LinePattern], *, flavor: Flavor) -> Iterable[LinePattern]: - if flavor == 'fnmatch': - return [fnmatch.translate(p) if isinstance(p, str) else p for p in patterns] - - if flavor == 'exact': - return [re.escape(p) if isinstance(p, str) else p for p in patterns] - - return patterns - - -def _compile(patterns: Iterable[LinePattern], *, flavor: Flavor) -> Sequence[re.Pattern[str]]: - patterns = _translate(patterns, flavor=flavor) - # mypy does not like map + re.compile() although it is correct but - # this is likely due to https://github.com/python/mypy/issues/11880 - return [re.compile(pattern) for pattern in patterns] - - -@final class LineMatcher: """Helper object for matching output lines.""" @@ -157,7 +43,7 @@ def __init__(self, content: str | StringIO, /, **options: Unpack[Options]) -> No self._stack: list[int | tuple[str, ...] | None] = [None] @classmethod - def parse( + def from_lines( cls, lines: Iterable[str], sep: str = '\n', /, **options: Unpack[Options] ) -> Self: """Construct a :class:`LineMatcher` object from a list of lines. @@ -220,7 +106,7 @@ def lines(self) -> tuple[str, ...]: if cached is None: # compute for the first time the value - cached = clean(self.content, **self.options) + cached = tuple(cleaner.clean_text(self.content, **self.options)) # check if the value is the same as any of a previously cached value for addr, value in enumerate(stack): if value == cached: @@ -252,7 +138,7 @@ def iterfind( When one or more patterns are given, the order of evaluation is the same as they are given (or arbitrary if they are given in a set). """ - patterns = _to_line_patterns(expect) + patterns = engine.to_line_patterns(expect) matchers = [pattern.match for pattern in self.__compile(patterns, flavor=flavor)] def predicate(line: Line) -> bool: @@ -284,7 +170,7 @@ def iterfind_blocks( objects as they could be interpreted as a line or a block pattern. """ - patterns = _to_block_pattern(expect) + patterns = engine.to_block_pattern(expect) lines = self.lines() # early abort if there are more expected lines than actual ones @@ -312,7 +198,6 @@ def assert_match( self, expect: LinePattern | Collection[LinePattern], /, - *, count: int | None = None, flavor: Flavor | None = None, ) -> None: @@ -322,7 +207,7 @@ def assert_match( :param count: If specified, the exact number of matching lines. :param flavor: Optional temporary flavor for string patterns. """ - patterns = _to_line_patterns(expect) + patterns = engine.to_line_patterns(expect) self._assert_found('line', patterns, count=count, flavor=flavor) def assert_no_match( @@ -339,7 +224,7 @@ def assert_no_match( :param context: Number of lines to print around a failing line. :param flavor: Optional temporary flavor for string patterns. """ - patterns = _to_line_patterns(expect) + patterns = engine.to_line_patterns(expect) self._assert_not_found('line', patterns, context_size=context, flavor=flavor) def assert_lines( @@ -359,7 +244,7 @@ def assert_lines( When *expect* is a single string, it is split into lines, each of which corresponding to the pattern a block's line must satisfy. """ - patterns = _to_block_pattern(expect) + patterns = engine.to_block_pattern(expect) self._assert_found('block', patterns, count=count, flavor=flavor) def assert_no_lines( @@ -381,13 +266,12 @@ def assert_no_lines( Use :data:`sys.maxsize` to show all capture lines. """ - patterns = _to_block_pattern(expect) + patterns = engine.to_block_pattern(expect) self._assert_not_found('block', patterns, context_size=context, flavor=flavor) def _assert_found( self, - what: PatternType, - /, + pattern_type: PatternType, patterns: Sequence[LinePattern], *, count: int | None, @@ -401,9 +285,9 @@ def _assert_found( keepends = get_option(self.options, 'keepends') ctx = util.highlight(self.lines(), keepends=keepends) - pat = util.prettify_patterns(patterns, sort=what == 'line') - logs = [f'{what} pattern', pat, 'not found in', ctx] - pytest.fail('\n\n'.join(logs)) + pat = util.prettify_patterns(patterns, sort=pattern_type == 'line') + logs = [f'{pattern_type} pattern', pat, 'not found in', ctx] + raise AssertionError('\n\n'.join(logs)) indices = {block.offset: len(block) for block in blocks} if (found := len(indices)) == count: @@ -411,15 +295,14 @@ def _assert_found( keepends = get_option(self.options, 'keepends') ctx = util.highlight(self.lines(), indices, keepends=keepends) - pat = util.prettify_patterns(patterns, sort=what == 'line') - noun = util.plural_form(what, count) + pat = util.prettify_patterns(patterns, sort=pattern_type == 'line') + noun = util.plural_form(pattern_type, count) logs = [f'found {found} != {count} {noun} matching', pat, 'in', ctx] - pytest.fail('\n\n'.join(logs)) + raise AssertionError('\n\n'.join(logs)) def _assert_not_found( self, - what: PatternType, - /, + pattern_type: PatternType, patterns: Sequence[LinePattern], *, context_size: int, @@ -433,13 +316,13 @@ def _assert_not_found( for start, block in enumerate(util.windowed(lines, count)): if all(pattern.match(line) for pattern, line in zip(compiled_patterns, block)): - pat = util.prettify_patterns(patterns, sort=what == 'line') + pat = util.prettify_patterns(patterns, sort=pattern_type == 'line') ctx = util.get_debug_context(lines, Block(block, start), context_size) - logs = [f'{what} pattern', pat, 'found in', '\n'.join(ctx)] - pytest.fail('\n\n'.join(logs)) + logs = [f'{pattern_type} pattern', pat, 'found in', '\n'.join(ctx)] + raise AssertionError('\n\n'.join(logs)) def __compile( self, patterns: Iterable[LinePattern], *, flavor: Flavor | None - ) -> Sequence[re.Pattern[str]]: + ) -> Sequence[Pattern[str]]: flavor = get_option(self.options, 'flavor') if flavor is None else flavor - return _compile(patterns, flavor=flavor) + return engine.compile(patterns, flavor=flavor) diff --git a/tests/test_testing/test_matcher.py b/tests/test_testing/test_matcher.py index 54a856e4e38..fc825ce1edf 100644 --- a/tests/test_testing/test_matcher.py +++ b/tests/test_testing/test_matcher.py @@ -3,22 +3,20 @@ import dataclasses import itertools from functools import cached_property -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING -import _pytest.outcomes import pytest import sphinx.util.console as term from sphinx.testing._matcher import util -from sphinx.testing._matcher.buffer import Block, Line -from sphinx.testing._matcher.options import DEFAULT_OPTIONS, CompleteOptions, Options -from sphinx.testing.matcher import LineMatcher, clean +from sphinx.testing.matcher import LineMatcher if TYPE_CHECKING: - from collections.abc import Mapping, Sequence - from typing import Final + from collections.abc import Sequence - from sphinx.testing._matcher.options import Flavor, OptionName + from _pytest._code import ExceptionInfo + + from sphinx.testing._matcher.options import Flavor from sphinx.testing._matcher.util import LinePattern @@ -125,192 +123,16 @@ def make_debug_context( return lines -class TestClean: - # options with no cleaning phase (equivalent to text.striplines(True)) - noop: Final[CompleteOptions] = CompleteOptions( - color=True, - ctrl=True, - strip=False, - stripline=False, - keepends=True, - empty=True, - compress=False, - unique=False, - delete=(), - ignore=None, - flavor='exact', - ) - - @classmethod - def check(cls, text: str, options: Options, expect: Sequence[str]) -> None: - options = cast(Options, cls.noop) | options - assert clean(text, **options) == tuple(expect) - - @pytest.mark.parametrize( - ('text', 'options', 'expect'), - [ - ('a ', Options(), ['a ']), - ('a\nb ', Options(), ['a\n', 'b ']), - ( - '\n'.join(['a', 'a', '', 'a', 'b', 'c', 'a']), - Options(keepends=False), - ['a', 'a', '', 'a', 'b', 'c', 'a'], - ), - ], - ) - def test_base(self, text, options, expect): - self.check(text, options, expect) - - @pytest.mark.parametrize( - ('text', 'options', 'expect'), - [ - ('a\nb ', Options(strip=True, stripline=False), ['a\n', 'b']), - ('a\nb ', Options(strip=False, stripline=True), ['a', 'b']), - ('a\n b ', Options(strip=True, stripline=True), ['a', 'b']), - ], - ) - def test_strip(self, text, options, expect): - self.check(text, options, expect) - - @pytest.mark.parametrize( - ('text', 'options', 'expect'), - [ - ( - '\n'.join(['a', 'a', '', 'a', 'b', 'c', 'a']), - Options(keepends=False, compress=True), - ['a', '', 'a', 'b', 'c', 'a'], - ), - ( - '\n'.join(['a', 'a', '', 'a', 'b', 'c', 'a']), - Options(keepends=False, unique=True), - ['a', '', 'b', 'c'], - ), - ( - '\n'.join(['a', 'a', '', 'a', 'b', 'c', 'a']), - Options(keepends=False, compress=False, unique=True), - ['a', '', 'b', 'c'], - ), - ], - ) - def test_eliminate_keep_empty(self, text, options, expect): - self.check(text, options, expect) - - @pytest.mark.parametrize( - ('text', 'options', 'expect'), - [ - ( - '\n'.join(['a', 'a', '', 'a', 'b', 'c', 'a']), - Options(keepends=False, empty=False, compress=True), - ['a', 'b', 'c', 'a'], - ), - ( - '\n'.join(['a', 'a', '', 'a', 'b', 'c', 'a']), - Options(keepends=False, empty=False, unique=True), - ['a', 'b', 'c'], - ), - ( - '\n'.join(['a', 'a', '', 'a', 'b', 'c', 'a']), - Options(keepends=False, empty=False, compress=False, unique=True), - ['a', 'b', 'c'], - ), - ], - ) - def test_eliminate(self, text, options, expect): - self.check(text, options, expect) - - -def test_line_operators(): - assert Line('a', 1) == 'a' - assert Line('a', 1) == ('a', 1) - assert Line('a', 1) == ['a', 1] - - assert Line('a', 2) != 'b' - assert Line('a', 2) != ('a', 1) - assert Line('a', 2) != ['a', 1] - - # order - assert Line('ab', 1) > 'a' - assert Line('a', 1) < 'ab' - assert Line('a', 1) <= 'a' - assert Line('a', 1) >= 'a' - - assert Line('ab', 1) > ('a', 1) - assert Line('a', 1) < ('ab', 1) - assert Line('a', 1) <= ('a', 1) - assert Line('a', 1) >= ('a', 1) - - -@pytest.mark.parametrize('expect', [('a', 'b', 'c'), ('a', ('b', 2), Line('c', 3))]) -def test_block_operators(expect: Sequence[str]) -> None: - lines = ['a', 'b', 'c'] - assert Block(lines, 1) == expect - assert Block(lines, 1) == [expect, 1] - - assert Block(lines, 1) != [*expect, 'x'] - assert Block(lines, 1) != [expect, 2] - - assert Block(lines, 1) <= expect - assert Block(lines, 1) <= [expect, 1] - - assert Block(lines[:2], 1) <= expect - assert Block(lines[:2], 1) <= [expect, 1] - - assert Block(lines[:2], 1) < expect - assert Block(lines[:2], 1) < [expect, 1] - - assert Block(lines, 1) >= expect - assert Block(lines, 1) >= [expect, 1] - - assert Block([*lines, 'd'], 1) > expect - assert Block([*lines, 'd'], 1) > [expect, 1] - - assert Block(['a', 'b'], 1).context(delta=4, limit=5) == (slice(0, 1), slice(3, 5)) - assert Block(['a', 'b'], 3).context(delta=2, limit=9) == (slice(1, 3), slice(5, 7)) - - -def test_options_class(): - # ensure that the classes are kept synchronized - missing_keys = Options.__annotations__.keys() - CompleteOptions.__annotations__ - assert not missing_keys, f'missing fields in proxy class: {", ".join(missing_keys)}' - - foreign_keys = CompleteOptions.__annotations__.keys() - Options.__annotations__ - assert not missing_keys, f'foreign fields in proxy class: {", ".join(foreign_keys)}' - - -@pytest.mark.parametrize('options', [DEFAULT_OPTIONS, LineMatcher('').options]) -def test_matcher_default_options(options: Mapping[str, object]) -> None: - """Check the synchronization of default options and classes in Sphinx.""" - processed = set() - - def check(option: OptionName, default: object) -> None: - assert option in options - assert options[option] == default - processed.add(option) - - check('color', False) - check('ctrl', True) - - check('strip', True) - check('stripline', False) - - check('keepends', False) - check('empty', True) - check('compress', False) - check('unique', False) - - check('delete', ()) - check('ignore', None) - - check('flavor', 'exact') - - # check that there are no left over options - assert sorted(processed) == sorted(Options.__annotations__) +def parse_excinfo(excinfo: ExceptionInfo[AssertionError]) -> list[str]: + # see: https://github.com/pytest-dev/pytest/issues/12175 + assert excinfo.value is not None + return str(excinfo.value).removeprefix('AssertionError: ').splitlines() def test_matcher_cache(): source = [term.blue('hello'), '', 'world'] # keep colors and empty lines - matcher = LineMatcher.parse(source, color=True, empty=True) + matcher = LineMatcher.from_lines(source, color=True, empty=True) stack = matcher._stack @@ -353,13 +175,13 @@ def test_matcher_cache(): def test_matcher_find(): lines = ['hello', 'world', 'yay', '!', '!', '!'] - matcher = LineMatcher.parse(lines, flavor='exact') + matcher = LineMatcher.from_lines(lines, flavor='none') assert matcher.find({'hello', 'yay'}) == [('hello', 0), ('yay', 2)] def test_matcher_find_blocks(): lines = ['hello', 'world', 'yay', 'hello', 'world', '!', 'yay'] - matcher = LineMatcher.parse(lines) + matcher = LineMatcher.from_lines(lines) assert matcher.find_blocks(['hello', 'world']) == [ [('hello', 0), ('world', 1)], [('hello', 3), ('world', 4)], @@ -369,7 +191,7 @@ def test_matcher_find_blocks(): @pytest.mark.parametrize( ('lines', 'flavor', 'pattern', 'expect'), [ - (['1', 'b', '3', 'a', '5', '!'], 'exact', ('a', 'b'), [('b', 1), ('a', 3)]), + (['1', 'b', '3', 'a', '5', '!'], 'none', ('a', 'b'), [('b', 1), ('a', 3)]), (['blbl', 'yay', 'hihi', '^o^'], 'fnmatch', '*[ao]*', [('yay', 1), ('^o^', 3)]), (['111', 'hello', 'world', '222'], 're', r'\d+', [('111', 0), ('222', 3)]), ], @@ -380,26 +202,86 @@ def test_matcher_flavor( pattern: Sequence[LinePattern], expect: Sequence[tuple[str, int]], ) -> None: - matcher = LineMatcher.parse(lines, flavor=flavor) + matcher = LineMatcher.from_lines(lines, flavor=flavor) assert matcher.find(pattern) == expect def test_assert_match(): - matcher = LineMatcher.parse(['a', 'b', 'c', 'd']) + matcher = LineMatcher.from_lines(['a', 'b', 'c', 'd']) matcher.assert_match('.+', flavor='re') matcher.assert_match('[abcd]', flavor='fnmatch') -def test_assert_match_debug(): - pass +@pytest.mark.parametrize( + ('lines', 'pattern', 'flavor', 'expect'), + [ + ( + ['a', 'b', 'c', 'd', 'e'], + '[a-z]{3,}', + 're', + [ + 'line pattern', + '', + ' [a-z]{3,}', + '', + 'not found in', + '', + ' a', + ' b', + ' c', + ' d', + ' e', + ], + ), + ], +) +def test_assert_match_debug(lines, pattern, flavor, expect): + matcher = LineMatcher.from_lines(lines) + + with pytest.raises(AssertionError) as exc_info: + matcher.assert_match(pattern, flavor=flavor) + + assert parse_excinfo(exc_info) == expect def test_assert_no_match(): - pass + matcher = LineMatcher.from_lines(['a', 'b', 'c', 'd']) + matcher.assert_no_match(r'\d+', flavor='re') + matcher.assert_no_match('[1-9]', flavor='fnmatch') -def test_assert_no_match_debug(): - pass +@pytest.mark.parametrize( + ('lines', 'pattern', 'flavor', 'context', 'expect'), + [ + ( + ['a', 'b', '11X', '22Y', '33Z', 'c', 'd'], + '[1-9]{2}[A-Z]', + 're', + 2, + [ + 'line pattern', + '', + ' [1-9]{2}[A-Z]', + '', + 'found in', + '', + ' a', + ' b', + '> 11X', + ' 22Y', + ' 33Z', + '... (omitted 2 lines) ...', + ], + ), + ], +) +def test_assert_no_match_debug(lines, pattern, flavor, context, expect): + matcher = LineMatcher.from_lines(lines) + + with pytest.raises(AssertionError, match='.*') as exc_info: + matcher.assert_no_match(pattern, context=context, flavor=flavor) + + assert parse_excinfo(exc_info) == expect @pytest.mark.parametrize('dedup', range(3)) @@ -411,20 +293,21 @@ def test_assert_lines(maxsize, start, count, dedup): matcher = LineMatcher(source.text) # the main block is matched exactly once - matcher.assert_lines(source.main, count=1) + matcher.assert_lines(source.main, count=1, flavor='none') assert source.base * source.ncopy == source.main - matcher.assert_lines(source.base, count=source.ncopy) + matcher.assert_lines(source.base, count=source.ncopy, flavor='none') for subidx in range(1, count + 1): # check that the sub-blocks are matched correctly subblock = [Source.block_line(start + i) for i in range(subidx)] - matcher.assert_lines(subblock, count=source.ncopy) + matcher.assert_lines(subblock, count=source.ncopy, flavor='none') @pytest.mark.parametrize( - ('pattern', 'count', 'expect'), + ('lines', 'pattern', 'count', 'expect'), [ ( + ['a', 'b', 'c', 'a', 'b', 'd'], ['x', 'y'], None, [ @@ -444,6 +327,7 @@ def test_assert_lines(maxsize, start, count, dedup): ], ), ( + ['a', 'b', 'c', 'a', 'b', 'd'], ['a', 'b'], 1, [ @@ -462,8 +346,9 @@ def test_assert_lines(maxsize, start, count, dedup): ' d', ], ), - (['a', 'b'], 2, None), + (['a', 'b', 'c', 'a', 'b', 'd'], ['a', 'b'], 2, None), ( + ['a', 'b', 'c', 'a', 'b', 'd'], ['a', 'b'], 3, [ @@ -484,20 +369,17 @@ def test_assert_lines(maxsize, start, count, dedup): ), ], ) -def test_assert_lines_debug(pattern, count, expect): - lines = ['a', 'b', 'c', 'a', 'b', 'd'] - matcher = LineMatcher.parse(lines) +def test_assert_lines_debug(lines, pattern, count, expect): + matcher = LineMatcher.from_lines(lines) if expect is None: matcher.assert_lines(pattern, count=count) return - with pytest.raises(_pytest.outcomes.Failed, match='.*') as exc_info: + with pytest.raises(AssertionError, match='.*') as exc_info: matcher.assert_lines(pattern, count=count) - actual = exc_info.value.msg - assert actual is not None - assert actual.splitlines() == expect + assert parse_excinfo(exc_info) == expect # fmt: off @@ -514,19 +396,16 @@ def test_assert_no_lines(maxsize, start, count, dedup): # but it is fine since stop indices are clamped internally source = Source(maxsize, start, count, dedup=dedup) matcher = LineMatcher(source.text) - # do not use 'match' with pytest.raises() since the diff - # output is hard to parse, but use == with lists instead - with pytest.raises(_pytest.outcomes.Failed, match='.*') as exc_info: - matcher.assert_no_lines(source.main, context=0) - actual = exc_info.value.msg - assert actual is not None + with pytest.raises(AssertionError) as exc_info: + matcher.assert_no_lines(source.main, context=0) - expect: list[str] = ['block pattern', ''] - expect.extend(util.indent_lines(source.main)) - expect.extend(['', 'found in', '']) - expect.extend(util.indent_lines(source.main, highlight=True)) - assert actual.splitlines() == expect + assert parse_excinfo(exc_info) == [ + 'block pattern', '', + *util.indent_lines(source.main, indent=4, highlight=False), + '', 'found in', '', + *util.indent_lines(source.main, indent=4, highlight=True) + ] @pytest.mark.parametrize( @@ -561,21 +440,17 @@ def test_assert_no_lines_debug( ): source = Source(maxsize, start, count, dedup=dedup) matcher = LineMatcher(source.text) - # do not use 'match' with pytest.raises() since the diff - # output is hard to parse, but use == with lists instead - with pytest.raises(_pytest.outcomes.Failed, match='.*') as exc_info: + with pytest.raises(AssertionError) as exc_info: matcher.assert_no_lines(source.main, context=context_size) - actual = exc_info.value.msg - assert actual is not None - - expect: list[str] = ['block pattern', ''] - expect.extend(util.indent_lines(source.main)) - expect.extend(['', 'found in', '']) - expect.extend(make_debug_context( - source.main, - source.peek_prev(context_size), omit_prev, - source.peek_next(context_size), omit_next, - context_size=context_size, indent=4, - )) - assert actual.splitlines() == expect + assert parse_excinfo(exc_info) == [ + 'block pattern', '', + *util.indent_lines(source.main, indent=4, highlight=False), + '', 'found in', '', + *make_debug_context( + source.main, + source.peek_prev(context_size), omit_prev, + source.peek_next(context_size), omit_next, + context_size=context_size, indent=4, + ) + ] diff --git a/tests/test_testing/test_matcher_buffer.py b/tests/test_testing/test_matcher_buffer.py new file mode 100644 index 00000000000..9f55d1e58f1 --- /dev/null +++ b/tests/test_testing/test_matcher_buffer.py @@ -0,0 +1,133 @@ +from __future__ import annotations + +import re +from typing import TYPE_CHECKING + +import pytest + +from sphinx.testing._matcher.buffer import Block, Line + +if TYPE_CHECKING: + from collections.abc import Sequence + from typing import Any + + from sphinx.testing._matcher.buffer import TextView + + +@pytest.mark.parametrize('cls', [Line, Block]) +def test_offset_value(cls: type[TextView[Any]]) -> None: + with pytest.raises(TypeError, match=re.escape('offset must be an integer, got: None')): + cls('', None) # type: ignore[arg-type] + + with pytest.raises(ValueError, match=re.escape('offset must be >= 0, got: -1')): + cls('', -1) + + +def test_line_constructor(): + empty = Line() + assert empty.buffer == '' + assert empty.offset == 0 + + + with pytest.raises(TypeError, match=re.escape('expecting a native string, got: 0')): + Line(0, 1) # type: ignore[arg-type] + + with pytest.raises(TypeError, match=re.escape('expecting a native string, got: %r' % '')): + Line(type('', (str,), {})(), 1) + + +def test_line_arithmetic(): + l1, l2 = Line('a', 1), Line('b', 1) + assert l1 + l2 == Line('ab', 1) + + match = re.escape('cannot concatenate lines with different offsets') + with pytest.raises(ValueError, match=match): + Line('a', 1) + Line('b', 2) + + assert Line('a', 1) * 3 == Line('aaa', 1) + with pytest.raises(TypeError): + Line() * object() + + +def test_line_comparison_operators(): + assert Line('a', 1) == 'a' + assert Line('a', 1) == ('a', 1) + assert Line('a', 1) == ['a', 1] + assert Line('a', 1) == Line('a', 1) + + assert Line('a', 2) != 'b' + assert Line('a', 2) != ('a', 1) + assert Line('a', 2) != ('b', 2) + assert Line('a', 2) != ['a', 1] + assert Line('a', 2) != ['b', 2] + assert Line('a', 2) != Line('a', 1) + assert Line('a', 2) != Line('b', 2) + + # order + assert Line('ab', 1) > 'a' + assert Line('ab', 1) > ('a', 1) + assert Line('ab', 1) > ['a', 1] + assert Line('ab', 1) > Line('a', 1) + + assert Line('a', 1) < 'ab' + assert Line('a', 1) < ('ab', 1) + assert Line('a', 1) < ['ab', 1] + assert Line('a', 1) < Line('ab', 1) + + assert Line('ab', 1) >= 'ab' + assert Line('ab', 1) >= ('ab', 1) + assert Line('ab', 1) >= ['ab', 1] + assert Line('ab', 1) >= Line('ab', 1) + + assert Line('ab', 1) <= 'ab' + assert Line('ab', 1) <= ('ab', 1) + assert Line('ab', 1) <= ['ab', 1] + assert Line('ab', 1) <= Line('ab', 1) + + +@pytest.mark.parametrize('bad_line', [1234, type('', (str,), {})()]) +def test_block_constructor(bad_line): + empty = Block() + assert empty.buffer == () + assert empty.offset == 0 + + match = re.escape(f'expecting a native string, got: {bad_line!r}') + with pytest.raises(TypeError, match=match): + Block([bad_line]) + + +@pytest.mark.parametrize( + ('lines', 'foreign', 'expect'), + [ + (['a', 'b', 'c'], 'd', ('a', 'b', 'c')), + (['a', 'b', 'c'], 'd', ('a', ('b', 2), Line('c', 3))), + ], +) +def test_block_comparison_operators( + lines: list[str], foreign: str, expect: Sequence[str | tuple[str, int] | Line] +) -> None: + assert Block(lines, 1) == expect + assert Block(lines, 1) == [expect, 1] + + assert Block(lines, 1) != [*expect, foreign] + assert Block(lines, 1) != [expect, 2] + + assert Block(lines, 1) <= expect + assert Block(lines, 1) <= [expect, 1] + + assert Block(lines[:2], 1) <= expect + assert Block(lines[:2], 1) <= [expect, 1] + + assert Block(lines[:2], 1) < expect + assert Block(lines[:2], 1) < [expect, 1] + + assert Block(lines, 1) >= expect + assert Block(lines, 1) >= [expect, 1] + + assert Block([*lines, foreign], 1) > expect + assert Block([*lines, foreign], 1) > [expect, 1] + + +def test_block_slice_context(): + assert Block(['a', 'b'], 1).context(delta=4, limit=5) == (slice(0, 1), slice(3, 5)) + assert Block(['a', 'b'], 3).context(delta=2, limit=9) == (slice(1, 3), slice(5, 7)) diff --git a/tests/test_testing/test_matcher_cleaner.py b/tests/test_testing/test_matcher_cleaner.py new file mode 100644 index 00000000000..d811369f4b4 --- /dev/null +++ b/tests/test_testing/test_matcher_cleaner.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from sphinx.testing._matcher import cleaner + + +def test_strip_function(): + assert cleaner.strip_chars('abaaa\n') == 'abaaa' + assert cleaner.strip_chars('abaaa\n', False) == 'abaaa\n' + assert cleaner.strip_chars('abaaa', 'a') == 'b' + assert cleaner.strip_chars('abaaa', 'ab') == '' + + +def test_strip_lines_function(): + assert list(cleaner.strip_lines(['aba\n', 'aba\n'])) == ['aba', 'aba'] + assert list(cleaner.strip_lines(['aba\n', 'aba\n'], False)) == ['aba\n', 'aba\n'] + assert list(cleaner.strip_lines(['aba', 'aba'], 'a')) == ['b', 'b'] + assert list(cleaner.strip_lines(['aba', 'aba'], 'ab')) == ['', ''] + + +def test_filterlines(): + src = ['a', 'a', '', 'a', 'b', 'c', 'a'] + assert list(cleaner.filterlines(src, empty=False, compress=True)) == ['a', 'b', 'c', 'a'] + assert list(cleaner.filterlines(src, empty=False, unique=True)) == ['a', 'b', 'c'] + + expect = ['a', '', 'a', 'b', 'c', 'a'] + assert list(cleaner.filterlines(src, empty=True, compress=True)) == expect + + assert list(cleaner.filterlines(src, empty=True, unique=True)) == ['a', '', 'b', 'c'] diff --git a/tests/test_testing/test_matcher_options.py b/tests/test_testing/test_matcher_options.py new file mode 100644 index 00000000000..130407e29dd --- /dev/null +++ b/tests/test_testing/test_matcher_options.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +from sphinx.testing._matcher.options import DEFAULT_OPTIONS, CompleteOptions, Options +from sphinx.testing.matcher import LineMatcher + +if TYPE_CHECKING: + from collections.abc import Mapping + + from sphinx.testing._matcher.options import OptionName + + +def test_options_class(): + # ensure that the classes are kept synchronized + missing_keys = Options.__annotations__.keys() - CompleteOptions.__annotations__ + assert not missing_keys, f'missing fields in proxy class: {", ".join(missing_keys)}' + + foreign_keys = CompleteOptions.__annotations__.keys() - Options.__annotations__ + assert not missing_keys, f'foreign fields in proxy class: {", ".join(foreign_keys)}' + + +@pytest.mark.parametrize('options', [DEFAULT_OPTIONS, LineMatcher('').options]) +def test_matcher_default_options(options: Mapping[str, object]) -> None: + """Check the synchronization of default options and classes in Sphinx.""" + processed = set() + + def check(option: OptionName, default: object) -> None: + assert option in options + assert options[option] == default + processed.add(option) + + check('color', False) + check('ctrl', True) + + check('strip', True) + check('stripline', False) + + check('keepends', False) + check('empty', True) + check('compress', False) + check('unique', False) + + check('delete', ()) + check('ignore', None) + + check('flavor', 'none') + + # check that there are no left over options + assert sorted(processed) == sorted(Options.__annotations__) From 4c3566af57c250adb61ae35d4ce453f89bcc0217 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 1 Apr 2024 13:52:28 +0200 Subject: [PATCH 11/66] add explicit ``typing-extensions`` dependency --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 870081fc525..e3a97dc88a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,6 +79,7 @@ dynamic = ["version"] [project.optional-dependencies] docs = [ "sphinxcontrib-websupport", + "typing-extensions", ] lint = [ "flake8>=3.5.0", @@ -87,6 +88,7 @@ lint = [ "sphinx-lint", "types-docutils", "types-requests", + "typing-extensions", "pytest>=6.0", ] test = [ From bef6b177ecbc066c526a1ce612dd2c54a0b63c83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 1 Apr 2024 13:56:20 +0200 Subject: [PATCH 12/66] cleanup --- tests/test_testing/test_matcher_buffer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_testing/test_matcher_buffer.py b/tests/test_testing/test_matcher_buffer.py index 9f55d1e58f1..9354a62acd8 100644 --- a/tests/test_testing/test_matcher_buffer.py +++ b/tests/test_testing/test_matcher_buffer.py @@ -28,7 +28,6 @@ def test_line_constructor(): assert empty.buffer == '' assert empty.offset == 0 - with pytest.raises(TypeError, match=re.escape('expecting a native string, got: 0')): Line(0, 1) # type: ignore[arg-type] From bf8de84e3c2370b222b5786fe7c7d8e78c3860b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 1 Apr 2024 15:53:54 +0200 Subject: [PATCH 13/66] fixup --- sphinx/testing/_matcher/cleaner.py | 90 ++++++++++++++------- sphinx/testing/_matcher/engine.py | 34 ++++---- sphinx/testing/_matcher/util.py | 10 ++- tests/test_testing/test_matcher_cleaner.py | 91 ++++++++++++++++++++-- 4 files changed, 176 insertions(+), 49 deletions(-) diff --git a/sphinx/testing/_matcher/cleaner.py b/sphinx/testing/_matcher/cleaner.py index 575f98e8b4d..86bc09ebeb1 100644 --- a/sphinx/testing/_matcher/cleaner.py +++ b/sphinx/testing/_matcher/cleaner.py @@ -2,6 +2,7 @@ __all__ = () +import itertools from functools import reduce from itertools import filterfalse from typing import TYPE_CHECKING @@ -12,7 +13,8 @@ if TYPE_CHECKING: import re - from collections.abc import Iterable + from collections.abc import Iterable, Sequence + from typing import TypeVar from typing_extensions import Unpack @@ -24,6 +26,8 @@ StripChars, ) + _StrIterableT = TypeVar('_StrIterableT', bound=Iterable[str]) + def clean_text(text: str, /, **options: Unpack[Options]) -> Iterable[str]: """Clean a text.""" @@ -32,7 +36,7 @@ def clean_text(text: str, /, **options: Unpack[Options]) -> Iterable[str]: text = strip_ansi(text, ctrl=ctrl, color=color) text = strip_chars(text, get_option(options, 'strip')) - lines = splitlines(text, get_option(options, 'keepends')) + lines = text.splitlines(get_option(options, 'keepends')) return clean_lines(lines, **options) @@ -45,13 +49,13 @@ def clean_lines(lines: Iterable[str], /, **options: Unpack[Options]) -> Iterable empty = get_option(options, 'empty') compress = get_option(options, 'compress') unique = get_option(options, 'unique') - lines = filterlines(lines, empty=empty, compress=compress, unique=unique) + lines = filter_lines(lines, empty=empty, compress=compress, unique=unique) delete = get_option(options, 'delete') flavor = get_option(options, 'flavor') lines = prune(lines, delete, flavor=flavor) - return ignorelines(lines, get_option(options, 'ignore')) + return ignore_lines(lines, get_option(options, 'ignore')) def strip_ansi(text: str, /, ctrl: bool = False, color: bool = False) -> str: @@ -92,12 +96,7 @@ def strip_lines(lines: Iterable[str], chars: StripChars = True, /) -> Iterable[s raise ValueError(msg) -def splitlines(text: str, /, keepends: bool = False) -> Iterable[str]: - """Split *text* into lines.""" - return text.splitlines(keepends=keepends) - - -def filterlines( +def filter_lines( lines: Iterable[str], /, *, @@ -113,15 +112,13 @@ def filterlines( :param compress: If true, remove consecutive duplicated lines. :return: An iterable of filtered lines. - By convention, duplicates elimination is performed *after* empty lines - are removed. To reverse the behaviour, consider using:: + Since removing empty lines first allows serial duplicates to be eliminated + in the same iteration, duplicates elimination is performed *after* empty + lines are removed. To change the behaviour, consider using:: lines = filterlines(lines, compress=True) lines = filterlines(lines, empty=True) """ - # Removing empty lines first ensures that serial duplicates can - # be eliminated in one cycle. Inverting the order of operations - # is not possible since empty lines may 'hide' duplicated lines. if not empty: lines = filter(None, lines) @@ -135,7 +132,7 @@ def filterlines( return lines -def ignorelines(lines: Iterable[str], predicate: LinePredicate | None, /) -> Iterable[str]: +def ignore_lines(lines: Iterable[str], predicate: LinePredicate | None, /) -> Iterable[str]: """Ignore lines satisfying the *predicate*. :param lines: The lines to filter. @@ -146,13 +143,19 @@ def ignorelines(lines: Iterable[str], predicate: LinePredicate | None, /) -> Ite def prune( - lines: Iterable[str], delete: DeletePattern, /, flavor: Flavor = 'none' + lines: Iterable[str], + delete: DeletePattern, + /, + flavor: Flavor = 'none', + *, + trace: list[Sequence[tuple[str, Sequence[str]]]] | None = None, ) -> Iterable[str]: - r"""Remove substrings from *lines* satisfying some patterns. + r"""Remove substrings from a source satisfying some patterns. - :param lines: The lines to transform. + :param lines: The source to transform. :param delete: One or more prefixes to remove or substitution patterns. :param flavor: Indicate the flavor of prefix regular expressions. + :param trace: A buffer where intermediate results are stored. :return: An iterable of transformed lines. Usage:: @@ -162,8 +165,27 @@ def prune( lines = prune(['a123b', 'c123d'], re.compile(r'\d+')) assert list(lines) == ['ab', 'cd'] + + For debugging purposes, an empty list *trace* can be given + When specified, *trace* is incrementally constructed as follows:: + + for i, line in enumerate(lines): + entry, res = [(line, frame := [])], line + for j, pattern in enumerate(patterns): + res = patterns.sub('', res) + frame.append(res) + + while res != line: + entry.append((res, frame := [])) + for j, pattern in enumerate(patterns): + res = patterns.sub('', res) + frame.append(res) + + trace.append(entry) + yield res """ - delete_patterns = engine.to_line_patterns(delete) + # keep the order in which patterns are evaluated and possible duplicates + delete_patterns = engine.to_line_patterns(delete, optimized=False) patterns = engine.translate(delete_patterns, flavor=flavor) # ensure that we are using the beginning of the string (this must # be done *after* the regular expression translation, since fnmatch @@ -171,11 +193,27 @@ def prune( patterns = (engine.transform(lambda p: rf'^{p}', p) for p in patterns) compiled = engine.compile(patterns, flavor='re') - def sub(line: str, pattern: re.Pattern[str]) -> str: + def prune_redux(line: str, pattern: re.Pattern[str]) -> str: return pattern.sub('', line) - for line in lines: - ret = reduce(sub, compiled, line) - while line != ret: - line, ret = ret, reduce(sub, compiled, ret) - yield ret + def prune_debug(line: str, frame: list[str]) -> str: + results = itertools.accumulate(compiled, prune_redux, initial=line) + frame.extend(itertools.islice(results, 1, None)) # skip the first element + assert frame + return frame[-1] + + if trace is None: + for line in lines: + ret = reduce(prune_redux, compiled, line) + while line != ret: + line, ret = ret, reduce(prune_redux, compiled, ret) + yield ret + else: + for line in lines: + entry: list[tuple[str, list[str]]] = [(line, [])] + ret = prune_debug(line, entry[-1][1]) + while line != ret: + entry.append((ret, [])) + line, ret = ret, prune_debug(ret, entry[-1][1]) + trace.append(entry) + yield ret diff --git a/sphinx/testing/_matcher/engine.py b/sphinx/testing/_matcher/engine.py index 2c0c9be1935..8c76b1f0137 100644 --- a/sphinx/testing/_matcher/engine.py +++ b/sphinx/testing/_matcher/engine.py @@ -25,17 +25,28 @@ def _check_flavor(flavor: Flavor) -> None: # fmt: off @overload -def to_line_patterns(expect: _LinePatternT) -> tuple[_LinePatternT]: ... # NoQA: E704 -@overload -def to_line_patterns(expect: Iterable[LinePattern]) -> tuple[LinePattern, ...]: ... # NoQA: E704 +def to_line_patterns(expect: str, *, optimized: bool = True) -> tuple[str]: ... # NoQA: E704 +@overload # NoQA: E302 +def to_line_patterns( # NoQA: E704 + expect: re.Pattern[str], *, optimized: bool = True +) -> tuple[re.Pattern[str]]: ... +@overload # NoQA: E302 +def to_line_patterns( # NoQA: E704 + expect: Iterable[LinePattern], *, optimized: bool = True +) -> tuple[LinePattern, ...]: ... # fmt: on -def to_line_patterns(expect: LinePattern | Iterable[LinePattern]) -> Sequence[LinePattern]: # NoQA: E302 +def to_line_patterns( # NoQA: E302 + expect: LinePattern | Iterable[LinePattern], *, optimized: bool = True +) -> Sequence[LinePattern]: """Get a read-only sequence of line-matching patterns. :param expect: One or more patterns a line should match. + :param optimized: If true, patterns are sorted and duplicates are removed. :return: The possible line patterns. - .. note:: The order of *expect* is not retained and duplicates are removed. + By convention, + + to_line_patterns("my pattern") == to_line_patterns(["my pattern"]) """ if isinstance(expect, (str, re.Pattern)): return (expect,) @@ -43,12 +54,9 @@ def to_line_patterns(expect: LinePattern | Iterable[LinePattern]) -> Sequence[Li def key(x: str | re.Pattern[str]) -> str: return x if isinstance(x, str) else x.pattern - # Do not make the patterns unique by their pattern string since - # string patterns compiled in different flavors might not have - # the same underlying pattern's string, e.g.:: - # - # re.compile('a*').pattern != fnmatch.translate('a*') - return tuple(sorted(set(expect), key=key)) + if optimized: + return sorted(set(expect), key=key) + return tuple(expect) # fmt: off @@ -76,9 +84,9 @@ def to_block_pattern(expect: LinePattern | Iterable[LinePattern]) -> Sequence[Li @overload def transform(fn: Callable[[str], str], x: str) -> str: ... # NoQA: E704 @overload -def transform(fn: Callable[..., str], x: re.Pattern[str]) -> re.Pattern[str]: ... # NoQA: E704 +def transform(fn: Callable[[str], str], x: re.Pattern[str]) -> re.Pattern[str]: ... # NoQA: E704 # fmt: on -def transform(fn: Callable[..., str], x: LinePattern) -> LinePattern: # NoQA: E302 +def transform(fn: Callable[[str], str], x: LinePattern) -> LinePattern: # NoQA: E302 """Transform regular expressions, leaving compiled patterns untouched.""" return fn(x) if isinstance(x, str) else x diff --git a/sphinx/testing/_matcher/util.py b/sphinx/testing/_matcher/util.py index 42d976a2a78..47dc14351e0 100644 --- a/sphinx/testing/_matcher/util.py +++ b/sphinx/testing/_matcher/util.py @@ -101,9 +101,13 @@ def make_prefix(indent: int, /, *, highlight: bool = False) -> str: # fmt: off @overload -def indent_source(text: str, /, *, sep: Never = ..., indent: int = ..., highlight: bool = ...) -> str: ... # NoQA: E501, E704 -@overload -def indent_source(lines: Iterable[str], /, *, sep: str = ..., indent: int = ..., highlight: bool = ...) -> str: ... # NoQA: E501, E704 +def indent_source( # NoQA: E704 + text: str, /, *, sep: Never = ..., indent: int = ..., highlight: bool = ... +) -> str: ... +@overload # NoQA: E302 +def indent_source( # NoQA: E704 + lines: Iterable[str], /, *, sep: str = ..., indent: int = ..., highlight: bool = ... +) -> str: ... # fmt: on def indent_source( # NoQA: E302 src: Iterable[str], /, *, sep: str = '\n', indent: int = 4, highlight: bool = False diff --git a/tests/test_testing/test_matcher_cleaner.py b/tests/test_testing/test_matcher_cleaner.py index d811369f4b4..046996d3a43 100644 --- a/tests/test_testing/test_matcher_cleaner.py +++ b/tests/test_testing/test_matcher_cleaner.py @@ -1,28 +1,105 @@ from __future__ import annotations +import re +from typing import TYPE_CHECKING + +import pytest + from sphinx.testing._matcher import cleaner +if TYPE_CHECKING: + from collections.abc import Callable, Sequence -def test_strip_function(): + +def test_strip_chars(): assert cleaner.strip_chars('abaaa\n') == 'abaaa' assert cleaner.strip_chars('abaaa\n', False) == 'abaaa\n' assert cleaner.strip_chars('abaaa', 'a') == 'b' assert cleaner.strip_chars('abaaa', 'ab') == '' -def test_strip_lines_function(): +def test_strip_lines(): assert list(cleaner.strip_lines(['aba\n', 'aba\n'])) == ['aba', 'aba'] assert list(cleaner.strip_lines(['aba\n', 'aba\n'], False)) == ['aba\n', 'aba\n'] assert list(cleaner.strip_lines(['aba', 'aba'], 'a')) == ['b', 'b'] assert list(cleaner.strip_lines(['aba', 'aba'], 'ab')) == ['', ''] -def test_filterlines(): +def test_filter_lines(): src = ['a', 'a', '', 'a', 'b', 'c', 'a'] - assert list(cleaner.filterlines(src, empty=False, compress=True)) == ['a', 'b', 'c', 'a'] - assert list(cleaner.filterlines(src, empty=False, unique=True)) == ['a', 'b', 'c'] + assert list(cleaner.filter_lines(src, empty=False, compress=True)) == ['a', 'b', 'c', 'a'] + assert list(cleaner.filter_lines(src, empty=False, unique=True)) == ['a', 'b', 'c'] expect = ['a', '', 'a', 'b', 'c', 'a'] - assert list(cleaner.filterlines(src, empty=True, compress=True)) == expect + assert list(cleaner.filter_lines(src, empty=True, compress=True)) == expect + + assert list(cleaner.filter_lines(src, empty=True, unique=True)) == ['a', '', 'b', 'c'] + + +@pytest.fixture() +def prune_trace_object() -> Callable[[], list[Sequence[tuple[str, Sequence[str]]]]]: + return list + + +def test_prune_prefix(prune_trace_object): + trace = prune_trace_object() + lines = cleaner.prune(['1111a1', 'b1'], '1', flavor='none', trace=trace) + assert list(lines) == ['a1', 'b1'] + assert trace == [ + [ + ('1111a1', ['111a1']), + ('111a1', ['11a1']), + ('11a1', ['1a1']), + ('1a1', ['a1']), + ('a1', ['a1']), + ], + [('b1', ['b1'])], + ] + + trace = prune_trace_object() + lines = cleaner.prune(['1111a1', 'b1'], r'\d+', flavor='re', trace=trace) + assert list(lines) == ['a1', 'b1'] + assert trace == [ + [('1111a1', ['a1']), ('a1', ['a1'])], + [('b1', ['b1'])], + ] + + trace = prune_trace_object() + lines = cleaner.prune(['/a/b/c.txt', 'keep.py'], '*.txt', flavor='fnmatch', trace=trace) + assert list(lines) == ['', 'keep.py'] + assert trace == [ + [('/a/b/c.txt', ['']), ('', [''])], + [('keep.py', ['keep.py'])], + ] + + +def test_prune_groups(prune_trace_object): + lines = cleaner.prune(['a123b', 'c123d'], re.compile(r'\d+')) + assert list(lines) == ['ab', 'cd'] + + p1 = re.compile(r'\d\d') + p2 = re.compile(r'\n+') + + trace = prune_trace_object() + lines = cleaner.prune(['a 123\n456x7\n8\n b'], [p1, p2], trace=trace) + assert list(lines) == ['a x b'] + + assert len(trace) == 1 + assert len(trace[0]) == 3 + # elimination of double digits and new lines (in that order) + assert trace[0][0] == ('a 123\n456x7\n8\n b', ['a 3\n6x7\n8\n b', 'a 36x78 b']) + # new digits appeared so we re-eliminated them + assert trace[0][1] == ('a 36x78 b', ['a x b', 'a x b']) + # identity for both patterns + assert trace[0][2] == ('a x b', ['a x b', 'a x b']) + + trace = prune_trace_object() + lines = cleaner.prune(['a 123\n456x7\n8\n b'], [p2, p1], trace=trace) + assert list(lines) == ['a x b'] - assert list(cleaner.filterlines(src, empty=True, unique=True)) == ['a', '', 'b', 'c'] + assert len(trace) == 1 + assert len(trace[0]) == 2 + # elimination of new lines and double digits (in that order) + assert trace[0][0] == ('a 123\n456x7\n8\n b', ['a 123456x78 b', 'a x b']) + # identity for both patterns + assert trace[0][1] == ('a x b', ['a x b', 'a x b']) From 9fd372ab0029e822b6ae9612e447b5ed5fe7e383 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 1 Apr 2024 17:25:08 +0200 Subject: [PATCH 14/66] Update documentation and make buffers more efficient. --- sphinx/testing/_matcher/buffer.py | 155 +++++++++++++--------- sphinx/testing/matcher.py | 17 +-- tests/test_testing/test_matcher.py | 33 +++-- tests/test_testing/test_matcher_buffer.py | 4 +- 4 files changed, 128 insertions(+), 81 deletions(-) diff --git a/sphinx/testing/_matcher/buffer.py b/sphinx/testing/_matcher/buffer.py index 049bc4a6d58..a372ebc4266 100644 --- a/sphinx/testing/_matcher/buffer.py +++ b/sphinx/testing/_matcher/buffer.py @@ -19,34 +19,39 @@ _T = TypeVar('_T', bound=Sequence[str]) -class TextView(Generic[_T], abc.ABC): +class ComparableView(Generic[_T], abc.ABC): + """A string or a sequence of strings implementing rich comparison. + + :meta private: + """ + # add __weakref__ to allow the object being weak-referencable - __slots__ = ('_buffer', '_offset', '__weakref__') + __slots__ = ('__buffer', '__offset', '__weakref__') - def __init__(self, buffer: _T, offset: int = 0, /) -> None: - if not isinstance(offset, int): + def __init__(self, buffer: _T, offset: int = 0, /, *, _check: bool = True) -> None: + if _check and not isinstance(offset, int): msg = f'offset must be an integer, got: {offset!r}' raise TypeError(msg) - if offset < 0: + if _check and offset < 0: msg = f'offset must be >= 0, got: {offset!r}' raise ValueError(msg) - self._buffer = buffer - self._offset = offset + self.__buffer = buffer + self.__offset = offset @property def buffer(self) -> _T: """The internal (immutable) buffer.""" - return self._buffer + return self.__buffer @property def offset(self) -> int: """The index of this object in the original source.""" - return self._offset + return self.__offset def __copy__(self) -> Self: - return self.__class__(self.buffer, self.offset) + return self.__class__(self.buffer, self.offset, _check=False) def __bool__(self) -> bool: return bool(len(self)) @@ -79,44 +84,46 @@ def __gt__(self, other: object, /) -> bool: @final -class Line(TextView[str]): +class Line(ComparableView[str]): """A line found by :meth:`~sphinx.testing.matcher.LineMatcher.match`. A :class:`Line` can be compared to :class:`str`, :class:`Line` objects or - a pair (i.e., a two-length sequence) ``(line, offset)`` where *line* is a - native :class:`str` (not a subclass thereof) and *offset* is an integer. + a pair (i.e., a two-length sequence) ``(line, line_offset)`` where + + - *line* is a native :class:`str` (not a subclass thereof), and + - *line_offset* is an nonnegative integer. By convention, the comparison result (except for ``!=``) of :class:`Line` objects with distinct :attr:`offset` is always ``False``. Use :class:`str` objects instead if the offset is not relevant. """ - def __init__(self, line: str = '', offset: int = 0, /) -> None: + def __init__(self, line: str = '', offset: int = 0, /, *, _check: bool = True) -> None: """Construct a :class:`Line` object. The *line* must be a native :class:`str` object. """ - if type(line) is not str: + if _check and type(line) is not str: # force the line to be a true string and not another string-like msg = f'expecting a native string, got: {line!r}' raise TypeError(msg) - super().__init__(line, offset) + super().__init__(line, offset, _check=_check) @classmethod - def view(cls, index: int, line: str, /) -> Self: + def view(cls, index: int, line: object, /) -> Self: """Alternative constructor flipping the order of the arguments. This is typically useful with :func:`enumerate`, namely this makes:: from itertools import starmap - lines = list(starmap(Line.view, enumerate(lines)) + lines = list(starmap(Line.view, enumerate(src)) equivalent to:: - lines = [Line(line, index) for index, line in enumerate(lines)] + lines = [Line(str(line), index) for index, line in enumerate(src)] """ - return cls(line, index) + return cls(str(line), index, _check=True) # dunder methods @@ -128,41 +135,42 @@ def __repr__(self) -> str: return f'{self.__class__.__name__}({self!s}, offset={self.offset})' def __getitem__(self, index: int | slice, /) -> str: + """Return the *nth* character.""" return self.buffer[index] - def __add__(self, other: object, /) -> Line: + def __add__(self, other: object, /) -> Self: if isinstance(other, str): - return Line(str(self) + other, self.offset) + return self.__class__(str(self) + other, self.offset, _check=False) if isinstance(other, Line): if self.offset != other.offset: msg = 'cannot concatenate lines with different offsets' raise ValueError(msg) - return Line(str(self) + str(other), self.offset) + return self.__class__(str(self) + str(other), self.offset, _check=False) return NotImplemented - def __mul__(self, other: object, /) -> Line: + def __mul__(self, other: object, /) -> Self: if isinstance(other, int): - return Line(str(self) * other, self.offset) + return self.__class__(str(self) * other, self.offset, _check=False) return NotImplemented def __eq__(self, other: object, /) -> bool: other = self.__cast(other) - if isinstance(other, Line): - # check offsets before calling str() + if isinstance(other, self.__class__): + # check offsets before calling str() for efficiency return self.offset == other.offset and str(self) == str(other) return False def __lt__(self, other: object, /) -> bool: other = self.__cast(other) - if isinstance(other, Line): - # check offsets before calling str() + if isinstance(other, self.__class__): + # check offsets before calling str() for efficiency return self.offset == other.offset and str(self) < str(other) return NotImplemented def __gt__(self, other: object, /) -> bool: other = self.__cast(other) - if isinstance(other, Line): - # check offsets before calling str() + if isinstance(other, self.__class__): + # check offsets before calling str() for efficiency return self.offset == other.offset and str(self) > str(other) return NotImplemented @@ -198,44 +206,51 @@ def rindex(self, sub: str, start: int = 0, end: int | None = None, /) -> int: def __cast(self, other: object, /) -> Self | object: """Try to parse *object* as a :class:`Line`.""" - if isinstance(other, Line): + if isinstance(other, self.__class__): return other if isinstance(other, str): - return Line(other, self.offset) + return self.__class__(other, self.offset, _check=False) if isinstance(other, (list, tuple)) and len(other) == 2: # type checking is handled by the Line constructor - return Line(other[0], other[1]) + return self.__class__(other[0], other[1], _check=True) return other @final -class Block(TextView[tuple[str, ...]]): +class Block(ComparableView[tuple[str, ...]], Sequence[str]): """Block found by :meth:`~sphinx.testing.matcher.LineMatcher.find`. - A block can be compared to list of strings (e.g., ``['line1', 'line2']``), - a :class:`Block` object or a sequence of pairs ``(block, offset)`` (the - pair can be given as any two-length sequence) where: + A block is a sequence of lines comparable to :class:`Line`, generally a + string (the line content) or a pair ``(line, line_offset)``. In addition, + a block can be compared to pair ``(block_lines, block_offset)`` where: - - *block* -- a sequence (e.g., list, tuple, deque, ...) consisting - of :class:`str`, :class:`Line` or ``(line, line_offset)`` objects. + - *block_lines* is a sequence of lines-like objects, and + - *block_offset* is an integer (matched against :attr:`offset`). - Here, ``(line, line_offset)`` follows the same conventions - for comparing :class:`Line` objects. - - - *offset* -- an integer (matched against :attr:`offset`). + Whenever a pair ``(line, line_offset)`` or ``(block, block_offset)`` + is needed, it can be any two-element sequence (e.g., tuple or list). For instance,:: - assert Block(['a', 'b'], 2) == [Line('a', 2), Line('b', 3)] + assert Block(['a', 'b', 'c'], 2) == ['a', ('b', 3), Line('c', 4)] + + .. note:: + + By convention, ``block[i]`` or ``block[i:j]`` returns :class:`str` + or sequences of :class:`str`. Consider using :meth:`at` to get the + corresponding :class:`Line` or :class:`Block` values. """ - def __init__(self, buffer: Iterable[str] = (), offset: int = 0, /) -> None: + def __init__( + self, buffer: Iterable[str] = (), offset: int = 0, /, *, _check: bool = True + ) -> None: buffer = tuple(buffer) - for line in buffer: - if type(line) is not str: - err = f'expecting a native string, got: {line!r}' - raise TypeError(err) - super().__init__(buffer, offset) + if _check: + for line in buffer: + if type(line) is not str: + err = f'expecting a native string, got: {line!r}' + raise TypeError(err) + super().__init__(buffer, offset, _check=_check) @classmethod def view(cls, index: int, buffer: Iterable[str], /) -> Self: @@ -250,7 +265,7 @@ def view(cls, index: int, buffer: Iterable[str], /) -> Self: blocks = [Block(lines, index) for index, lines in enumerate(src)] """ - return cls(buffer, index) + return cls(buffer, index, _check=True) @property def length(self) -> int: @@ -285,7 +300,7 @@ def context(self, delta: int, limit: int) -> tuple[slice, slice]: block = Block(['4', '5', '6'], 3) before, after = block.context(2, 10) assert source[before] == ['2', '3'] - assert source[before] == ['7', '8'] + assert source[after] == ['7', '8'] """ block_stop = self.offset + self.length before_slice = slice(max(0, self.offset - delta), min(self.offset, limit)) @@ -295,18 +310,38 @@ def context(self, delta: int, limit: int) -> tuple[slice, slice]: def __repr__(self) -> str: return f'{self.__class__.__name__}({self.buffer!r}, offset={self.offset})' + # fmt: off + @overload + def at(self, index: int, /) -> Line: ... # NoQA: E704 + @overload + def at(self, index: slice, /) -> Self: ... # NoQA: E704 + # fmt: on + def at(self, index: int | slice, /) -> Line | Block: # NoQA: E301 + """Get a :class:`Line` or a contiguous sub-:class:`Block`.""" + if isinstance(index, slice): + # normalize negative and None slice fields + start, _, step = index.indices(self.length) + if step != 1: + msg = 'only contiguous regions can be extracted' + raise ValueError(msg) + return self.__class__(self.buffer[index], self.offset + start, _check=False) + + # normalize negative indices + start, _, _ = slice(index, -1).indices(self.length) + return Line(self.buffer[index], self.offset + start, _check=False) + # fmt: off @overload def __getitem__(self, index: int, /) -> str: ... # NoQA: E704 @overload - def __getitem__(self, index: slice, /) -> tuple[str, ...]: ... # NoQA: E704 + def __getitem__(self, index: slice, /) -> Sequence[str]: ... # NoQA: E704 # fmt: on - def __getitem__(self, index: int | slice, /) -> str | tuple[str, ...]: # NoQA: E301 - """Get a line or a subset of the block's lines.""" - return self.buffer[index] + def __getitem__(self, index: int | slice, /) -> str | Sequence[str]: # NoQA: E301 + """Get a line or a contiguous sub-block.""" + return self.at(index).buffer def __eq__(self, other: object, /) -> bool: - if isinstance(other, Block): + if isinstance(other, self.__class__): # more efficient to first check the offsets return (self.offset, self.buffer) == (other.offset, other.buffer) @@ -326,7 +361,7 @@ def __lt__(self, other: object, /) -> bool: if not self: return False - if isinstance(other, Block): + if isinstance(other, self.__class__): # more efficient to first check if the indices are valid before checking the lines if _can_be_strict_in(self.offset, self.length, other.offset, other.length): return self.buffer < other.buffer @@ -348,7 +383,7 @@ def __lt__(self, other: object, /) -> bool: return NotImplemented def __gt__(self, other: object, /) -> bool: - if isinstance(other, Block): + if isinstance(other, self.__class__): return other < self if isinstance(other, (list, tuple)): diff --git a/sphinx/testing/matcher.py b/sphinx/testing/matcher.py index 6a08786d479..a9dfd3d93b6 100644 --- a/sphinx/testing/matcher.py +++ b/sphinx/testing/matcher.py @@ -40,7 +40,7 @@ def __init__(self, content: str | StringIO, /, **options: Unpack[Options]) -> No # always complete the set of options for this object self._options: CompleteOptions = DEFAULT_OPTIONS | options # stack of cached cleaned lines (with a possible indirection) - self._stack: list[int | tuple[str, ...] | None] = [None] + self._stack: list[int | Block | None] = [None] @classmethod def from_lines( @@ -54,7 +54,7 @@ def from_lines( return cls(sep.join(lines), **options) def __iter__(self) -> Iterator[Line]: - """The cached lines as :class:`~sphinx.testing._matcher.Line` instances.""" + """An iterator on the cached lines.""" return starmap(Line.view, enumerate(self.lines())) @property @@ -94,7 +94,7 @@ def override(self, /, **options: Unpack[Options]) -> Generator[None, None, None] self._stack.pop() # pop the cached lines for this scope self._options = saved_options - def lines(self) -> tuple[str, ...]: + def lines(self) -> Block: """The content lines, cleaned up according to the current options. This method is efficient in the sense that the lines are computed @@ -106,7 +106,7 @@ def lines(self) -> tuple[str, ...]: if cached is None: # compute for the first time the value - cached = tuple(cleaner.clean_text(self.content, **self.options)) + cached = Block(cleaner.clean_text(self.content, **self.options)) # check if the value is the same as any of a previously cached value for addr, value in enumerate(stack): if value == cached: @@ -118,10 +118,10 @@ def lines(self) -> tuple[str, ...]: if isinstance(cached, int): value = self._stack[cached] - assert isinstance(value, tuple) + assert isinstance(value, Block) return value - assert isinstance(cached, tuple) + assert isinstance(cached, Block) return cached def find( @@ -183,7 +183,7 @@ def iterfind_blocks( for start, block in block_iterator: # check if the block matches the pattern line by line if all(pattern.match(line) for pattern, line in zip(compiled_patterns, block)): - yield Block(block, start) + yield Block(block, start, _check=False) # Consume the iterator so that the next block consists # of lines just after the block that was just yielded. # @@ -317,7 +317,8 @@ def _assert_not_found( for start, block in enumerate(util.windowed(lines, count)): if all(pattern.match(line) for pattern, line in zip(compiled_patterns, block)): pat = util.prettify_patterns(patterns, sort=pattern_type == 'line') - ctx = util.get_debug_context(lines, Block(block, start), context_size) + block_object = Block(block, start, _check=False) + ctx = util.get_debug_context(lines, block_object, context_size) logs = [f'{pattern_type} pattern', pat, 'found in', '\n'.join(ctx)] raise AssertionError('\n\n'.join(logs)) diff --git a/tests/test_testing/test_matcher.py b/tests/test_testing/test_matcher.py index fc825ce1edf..55947fc1ca1 100644 --- a/tests/test_testing/test_matcher.py +++ b/tests/test_testing/test_matcher.py @@ -102,17 +102,28 @@ def block_line(i: int) -> str: def make_debug_context( - block: list[str], + block: list[str], # highlighted block /, - view_prev: list[str], - omit_prev: int, - view_next: list[str], - omit_next: int, + view_prev: list[str], # context lines before the main block + omit_prev: int, # number of lines that were omitted before 'view_prev' + view_next: list[str], # context lines after the main block + omit_next: int, # number of lines that were omitted after 'view_next' *, - context_size: int, + context_size: int, # the original value of the 'context_size' parameter indent: int = 4, ) -> list[str]: - """Other API for :func:`sphinx.testing._matcher.util.get_debug_context`.""" + """Other API for :func:`sphinx.testing._matcher.util.get_debug_context`. + + The resulting lines are of the form:: + + - a line indicating that *omit_prev* lines were omitted, + - the block *view_prev*, + - the main *block* (highlighted), + - the block *view_next*, + - a line indicating that *omit_next* lines were omitted. + + If *context_size = 0*, the lines indicating omitted lines are not included. + """ lines: list[str] = [] writelines = lines.extend writelines(util.omit_line(bool(context_size) * omit_prev)) @@ -125,6 +136,7 @@ def make_debug_context( def parse_excinfo(excinfo: ExceptionInfo[AssertionError]) -> list[str]: # see: https://github.com/pytest-dev/pytest/issues/12175 + assert excinfo.type is AssertionError assert excinfo.value is not None return str(excinfo.value).removeprefix('AssertionError: ').splitlines() @@ -135,7 +147,6 @@ def test_matcher_cache(): matcher = LineMatcher.from_lines(source, color=True, empty=True) stack = matcher._stack - assert len(stack) == 1 assert stack[0] is None @@ -370,7 +381,7 @@ def test_assert_lines(maxsize, start, count, dedup): ], ) def test_assert_lines_debug(lines, pattern, count, expect): - matcher = LineMatcher.from_lines(lines) + matcher = LineMatcher.from_lines(lines, flavor='none') if expect is None: matcher.assert_lines(pattern, count=count) @@ -395,7 +406,7 @@ def test_assert_no_lines(maxsize, start, count, dedup): # 'maxsize' might be smaller than start + (dedup + 1) * count # but it is fine since stop indices are clamped internally source = Source(maxsize, start, count, dedup=dedup) - matcher = LineMatcher(source.text) + matcher = LineMatcher(source.text, flavor='none') with pytest.raises(AssertionError) as exc_info: matcher.assert_no_lines(source.main, context=0) @@ -439,7 +450,7 @@ def test_assert_no_lines_debug( maxsize, start, count, dedup, omit_prev, omit_next, context_size ): source = Source(maxsize, start, count, dedup=dedup) - matcher = LineMatcher(source.text) + matcher = LineMatcher(source.text, flavor='none') with pytest.raises(AssertionError) as exc_info: matcher.assert_no_lines(source.main, context=context_size) diff --git a/tests/test_testing/test_matcher_buffer.py b/tests/test_testing/test_matcher_buffer.py index 9354a62acd8..0aeef43d446 100644 --- a/tests/test_testing/test_matcher_buffer.py +++ b/tests/test_testing/test_matcher_buffer.py @@ -11,11 +11,11 @@ from collections.abc import Sequence from typing import Any - from sphinx.testing._matcher.buffer import TextView + from sphinx.testing._matcher.buffer import ComparableView @pytest.mark.parametrize('cls', [Line, Block]) -def test_offset_value(cls: type[TextView[Any]]) -> None: +def test_offset_value(cls: type[ComparableView[Any]]) -> None: with pytest.raises(TypeError, match=re.escape('offset must be an integer, got: None')): cls('', None) # type: ignore[arg-type] From 523242189ec72244d03ea1a71670646aacbd5dba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 2 Apr 2024 13:32:40 +0200 Subject: [PATCH 15/66] fix various bugs --- sphinx/testing/_matcher/buffer.py | 438 +++++++++++++--------- sphinx/testing/_matcher/cleaner.py | 19 +- sphinx/testing/_matcher/engine.py | 18 +- sphinx/testing/_matcher/options.py | 19 +- sphinx/testing/_matcher/util.py | 4 +- sphinx/testing/matcher.py | 36 +- tests/test_testing/test_matcher.py | 15 +- tests/test_testing/test_matcher_buffer.py | 111 ++++-- 8 files changed, 409 insertions(+), 251 deletions(-) diff --git a/sphinx/testing/_matcher/buffer.py b/sphinx/testing/_matcher/buffer.py index a372ebc4266..f108f290429 100644 --- a/sphinx/testing/_matcher/buffer.py +++ b/sphinx/testing/_matcher/buffer.py @@ -3,25 +3,25 @@ __all__ = ('Line', 'Block') import abc -import sys +import itertools +import operator from collections.abc import Sequence -from itertools import starmap from typing import TYPE_CHECKING, Generic, TypeVar, final, overload -from sphinx.testing._matcher.util import windowed - if TYPE_CHECKING: from collections.abc import Iterable, Iterator - from typing import SupportsIndex from typing_extensions import Self _T = TypeVar('_T', bound=Sequence[str]) -class ComparableView(Generic[_T], abc.ABC): +class SourceView(Generic[_T], abc.ABC): """A string or a sequence of strings implementing rich comparison. + Given an implicit *source* as a list of strings, a :class:`SourceView` + is a subset of that implicit *source* starting at some :attr:`offset`. + :meta private: """ @@ -29,13 +29,24 @@ class ComparableView(Generic[_T], abc.ABC): __slots__ = ('__buffer', '__offset', '__weakref__') def __init__(self, buffer: _T, offset: int = 0, /, *, _check: bool = True) -> None: - if _check and not isinstance(offset, int): - msg = f'offset must be an integer, got: {offset!r}' - raise TypeError(msg) + """Construct a :class:`SourceView`. - if _check and offset < 0: - msg = f'offset must be >= 0, got: {offset!r}' - raise ValueError(msg) + :param buffer: The view's content (a string or a list of strings). + :param offset: The view's offset with respect to the original source. + :param _check: An internal parameter used for validating inputs. + + The *_check* parameter is only meant for internal usage and strives + to speed-up the construction of :class:`SourceView` objects for which + their constructor arguments are known to be valid at call time. + """ + if _check: + if not isinstance(offset, int): + msg = f'offset must be an integer, got: {offset!r}' + raise TypeError(msg) + + if offset < 0: + msg = f'offset must be >= 0, got: {offset!r}' + raise ValueError(msg) self.__buffer = buffer self.__offset = offset @@ -50,47 +61,78 @@ def offset(self) -> int: """The index of this object in the original source.""" return self.__offset + @property + def length(self) -> int: + """The number of items in this object.""" + return len(self) + + def pformat(self) -> str: + """A nice representation of this object.""" + return '{0.__class__.__name__}({0!r}, @={0.offset}, #={0.length})'.format(self) + + def __repr__(self) -> str: + return repr(self.buffer) + def __copy__(self) -> Self: return self.__class__(self.buffer, self.offset, _check=False) def __bool__(self) -> bool: + """Indicate whether this view is empty or not.""" return bool(len(self)) def __iter__(self) -> Iterator[str]: + """An iterator over the view's items.""" return iter(self.buffer) - def __reversed__(self) -> Iterator[str]: - return reversed(self.buffer) - def __len__(self) -> int: + """The number of "atomic" items in this view.""" return len(self.buffer) def __contains__(self, value: object, /) -> bool: + """Check that an "atomic" value is represented by this view.""" return value in self.buffer @abc.abstractmethod def __lt__(self, other: object, /) -> bool: - pass + """Check that this view is strictly contained in *other*. + + Subclasses implementing the :class:`SourceView` interface + should describe the expected types for *object*. + """ def __le__(self, other: object, /) -> bool: + """Check that this view is contained in *other*. + + By default, ``self == other`` is called before ``self < other``, but + subclasses should override this method for an efficient alternative. + """ return self == other or self < other def __ge__(self, other: object, /) -> bool: + """Check that *other* is contained by this view. + + By default, ``self == other`` is called before ``self > other``, but + subclasses should override this method for an efficient alternative. + """ return self == other or self > other @abc.abstractmethod def __gt__(self, other: object, /) -> bool: - pass + """Check that this view strictly contains *other*. + + Subclasses implementing the :class:`SourceView` interface + should describe the expected types for *object*. + """ @final -class Line(ComparableView[str]): +class Line(SourceView[str]): """A line found by :meth:`~sphinx.testing.matcher.LineMatcher.match`. A :class:`Line` can be compared to :class:`str`, :class:`Line` objects or a pair (i.e., a two-length sequence) ``(line, line_offset)`` where - - *line* is a native :class:`str` (not a subclass thereof), and + - *line* is a :class:`str` object, and - *line_offset* is an nonnegative integer. By convention, the comparison result (except for ``!=``) of :class:`Line` @@ -98,20 +140,16 @@ class Line(ComparableView[str]): objects instead if the offset is not relevant. """ - def __init__(self, line: str = '', offset: int = 0, /, *, _check: bool = True) -> None: - """Construct a :class:`Line` object. - - The *line* must be a native :class:`str` object. - """ - if _check and type(line) is not str: - # force the line to be a true string and not another string-like - msg = f'expecting a native string, got: {line!r}' - raise TypeError(msg) + # NOTE(picnixz): this class could be extended to support arbitrary + # character's types, but it would not be possible to use the C API + # implementing the :class:`str` interface anymore. + def __init__(self, line: str = '', offset: int = 0, /, *, _check: bool = True) -> None: + """Construct a :class:`Line` object.""" super().__init__(line, offset, _check=_check) @classmethod - def view(cls, index: int, line: object, /) -> Self: + def view(cls, index: int, line: str, /, *, _check: bool = True) -> Self: """Alternative constructor flipping the order of the arguments. This is typically useful with :func:`enumerate`, namely this makes:: @@ -121,9 +159,13 @@ def view(cls, index: int, line: object, /) -> Self: equivalent to:: - lines = [Line(str(line), index) for index, line in enumerate(src)] + def cast(line: object) -> str: + return line if isinstance(line, str) else str(line) + + lines = [Line(cast(line), index) for index, line in enumerate(src)] """ - return cls(str(line), index, _check=True) + line = line if isinstance(line, str) else str(line) + return cls(line, index, _check=_check) # dunder methods @@ -131,93 +173,118 @@ def __str__(self) -> str: """The line as a string.""" return self.buffer - def __repr__(self) -> str: - return f'{self.__class__.__name__}({self!s}, offset={self.offset})' - def __getitem__(self, index: int | slice, /) -> str: - """Return the *nth* character.""" return self.buffer[index] - def __add__(self, other: object, /) -> Self: + def __eq__(self, other: object, /) -> bool: if isinstance(other, str): - return self.__class__(str(self) + other, self.offset, _check=False) - if isinstance(other, Line): - if self.offset != other.offset: - msg = 'cannot concatenate lines with different offsets' - raise ValueError(msg) - return self.__class__(str(self) + str(other), self.offset, _check=False) - return NotImplemented + return self.buffer == other - def __mul__(self, other: object, /) -> Self: - if isinstance(other, int): - return self.__class__(str(self) * other, self.offset, _check=False) - return NotImplemented + other = self.__parse_non_string(other) + if other is None: + return NotImplemented - def __eq__(self, other: object, /) -> bool: - other = self.__cast(other) - if isinstance(other, self.__class__): - # check offsets before calling str() for efficiency - return self.offset == other.offset and str(self) == str(other) - return False + # separately check offsets before the buffers for efficiency + return self.offset == other[1] and self.buffer == other[0] def __lt__(self, other: object, /) -> bool: - other = self.__cast(other) - if isinstance(other, self.__class__): - # check offsets before calling str() for efficiency - return self.offset == other.offset and str(self) < str(other) - return NotImplemented + if isinstance(other, str): + return self.buffer < other + + other = self.__parse_non_string(other) + if other is None: + return NotImplemented + + # separately check offsets before the buffers for efficiency + return self.offset == other[1] and self.buffer < other[0] def __gt__(self, other: object, /) -> bool: - other = self.__cast(other) - if isinstance(other, self.__class__): - # check offsets before calling str() for efficiency - return self.offset == other.offset and str(self) > str(other) - return NotImplemented + if isinstance(other, str): + return self.buffer > other + + other = self.__parse_non_string(other) + if other is None: + return NotImplemented - # exposed :class:`str` interface + # separately check offsets before the buffers for efficiency + return self.offset == other[1] and self.buffer > other[0] def startswith(self, prefix: str, start: int = 0, end: int | None = None, /) -> bool: - """See :meth:`str.startswith`.""" + """Test whether the line starts with the given *prefix*. + + :param prefix: A line prefix to test. + :param start: The test start position. + :param end: The test stop position. + """ return self.buffer.startswith(prefix, start, end) def endswith(self, suffix: str, start: int = 0, end: int | None = None, /) -> bool: - """See :meth:`str.endswith`.""" + """Test whether the line ends with the given *suffix*. + + :param suffix: A line suffix to test. + :param start: The test start position. + :param end: The test stop position. + """ return self.buffer.endswith(suffix, start, end) def count(self, sub: str, start: int = 0, end: int | None = None, /) -> int: - """See :meth:`str.count`.""" + """Count the number of non-overlapping occurrences of a substring. + + :param sub: A substring to locate. + :param start: The test start position. + :param end: The test stop position. + """ return self.buffer.count(sub, start, end) - def find(self, sub: str, start: int = 0, end: int | None = None, /) -> int: - """See :meth:`str.find`.""" - return self.buffer.find(sub, start, end) + def index(self, sub: str, start: int = 0, end: int | None = None, /) -> int: + """Get the lowest index of the substring *sub* in ``self[start:end]``. - def rfind(self, sub: str, start: int = 0, end: int | None = None, /) -> int: - """See :meth:`str.rfind`.""" - return self.buffer.rfind(sub, start, end) + :raise ValueError: The substring is not found in ``self[start:end]``. - def index(self, sub: str, start: int = 0, end: int | None = None, /) -> int: - """See :meth:`str.index`.""" + .. seealso:: :meth:`find` + """ return self.buffer.index(sub, start, end) def rindex(self, sub: str, start: int = 0, end: int | None = None, /) -> int: - """See :meth:`str.rindex`.""" + """Get the highest index of the substring *sub* in ``self[start:end]``. + + :raise ValueError: The substring is not found in ``self[start:end]``. + + .. seealso:: :meth:`rfind` + """ return self.buffer.rindex(sub, start, end) - def __cast(self, other: object, /) -> Self | object: - """Try to parse *object* as a :class:`Line`.""" + def find(self, sub: str, start: int = 0, end: int | None = None, /) -> int: + """Get the lowest index of the substring *sub* in ``self[start:end]``. + + If the substring is not found, this returns ``-1``. + + .. seealso:: :meth:`index` + """ + return self.buffer.find(sub, start, end) + + def rfind(self, sub: str, start: int = 0, end: int | None = None, /) -> int: + """Get the highest index of the substring *sub* in ``self[start:end]``. + + If the substring is not found, this returns ``-1``. + + .. seealso:: :meth:`rindex` + """ + return self.buffer.rfind(sub, start, end) + + def __parse_non_string(self, other: object, /) -> tuple[str, int] | None: + """Try to parse *other* as a ``line`` or a ``(line, offset)`` pair.""" if isinstance(other, self.__class__): - return other - if isinstance(other, str): - return self.__class__(other, self.offset, _check=False) - if isinstance(other, (list, tuple)) and len(other) == 2: - # type checking is handled by the Line constructor - return self.__class__(other[0], other[1], _check=True) - return other + return other.buffer, other.offset + if isinstance(other, Sequence) and len(other) == 2: + buffer, offset = other + if isinstance(buffer, str) and isinstance(offset, int): + return buffer, offset + return None @final -class Block(ComparableView[tuple[str, ...]], Sequence[str]): +class Block(SourceView[tuple[str, ...]], Sequence[str]): """Block found by :meth:`~sphinx.testing.matcher.LineMatcher.find`. A block is a sequence of lines comparable to :class:`Line`, generally a @@ -244,16 +311,19 @@ class Block(ComparableView[tuple[str, ...]], Sequence[str]): def __init__( self, buffer: Iterable[str] = (), offset: int = 0, /, *, _check: bool = True ) -> None: + # It is more efficient to first consume everything and then + # iterate over the values for checks rather than to add the + # validated values one by one. buffer = tuple(buffer) if _check: for line in buffer: - if type(line) is not str: + if not isinstance(line, str): err = f'expecting a native string, got: {line!r}' raise TypeError(err) super().__init__(buffer, offset, _check=_check) @classmethod - def view(cls, index: int, buffer: Iterable[str], /) -> Self: + def view(cls, index: int, buffer: Iterable[str], /, *, _check: bool = True) -> Self: """Alternative constructor flipping the order of the arguments. This is typically useful with :func:`enumerate`, namely this makes:: @@ -265,12 +335,7 @@ def view(cls, index: int, buffer: Iterable[str], /) -> Self: blocks = [Block(lines, index) for index, lines in enumerate(src)] """ - return cls(buffer, index, _check=True) - - @property - def length(self) -> int: - """The number of lines in this block.""" - return len(self) + return cls(buffer, index, _check=_check) @property def window(self) -> slice: @@ -296,19 +361,20 @@ def context(self, delta: int, limit: int) -> tuple[slice, slice]: Example:: - source = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'] - block = Block(['4', '5', '6'], 3) - before, after = block.context(2, 10) - assert source[before] == ['2', '3'] - assert source[after] == ['7', '8'] + source = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'] + block = Block(['4', '5', '6'], 3) + before, after = block.context(2, 10) + assert source[before] == ['2', '3'] + assert source[after] == ['7', '8'] """ block_stop = self.offset + self.length before_slice = slice(max(0, self.offset - delta), min(self.offset, limit)) after_slice = slice(min(block_stop, limit), min(block_stop + delta, limit)) return before_slice, after_slice - def __repr__(self) -> str: - return f'{self.__class__.__name__}({self.buffer!r}, offset={self.offset})' + def count(self, line: object, /) -> int: + """Count the number of occurences of a *line*.""" + return self.buffer.count(line) # fmt: off @overload @@ -319,14 +385,12 @@ def at(self, index: slice, /) -> Self: ... # NoQA: E704 def at(self, index: int | slice, /) -> Line | Block: # NoQA: E301 """Get a :class:`Line` or a contiguous sub-:class:`Block`.""" if isinstance(index, slice): - # normalize negative and None slice fields - start, _, step = index.indices(self.length) - if step != 1: - msg = 'only contiguous regions can be extracted' - raise ValueError(msg) - return self.__class__(self.buffer[index], self.offset + start, _check=False) + # exception for invalid step is handled by __getitem__ + buffer = self[index] + offset = self.offset + index.indices(self.length)[0] + return self.__class__(buffer, offset, _check=False) - # normalize negative indices + # normalize negative index start, _, _ = slice(index, -1).indices(self.length) return Line(self.buffer[index], self.offset + start, _check=False) @@ -338,96 +402,130 @@ def __getitem__(self, index: slice, /) -> Sequence[str]: ... # NoQA: E704 # fmt: on def __getitem__(self, index: int | slice, /) -> str | Sequence[str]: # NoQA: E301 """Get a line or a contiguous sub-block.""" - return self.at(index).buffer + if isinstance(index, slice): + # normalize negative and None slice fields + _, _, step = index.indices(self.length) + if step != 1: + msg = 'only contiguous regions can be extracted' + raise ValueError(msg) + return self.buffer[index] def __eq__(self, other: object, /) -> bool: if isinstance(other, self.__class__): # more efficient to first check the offsets return (self.offset, self.buffer) == (other.offset, other.buffer) - if isinstance(other, (list, tuple)): - lines, offset = self.__cast(other) - # check offsets before computing len(lines) or len(self) - if (offset, len(lines)) != (self.offset, self.length): - return False + other = self.__parse_non_block(other) + if other is None: + return NotImplemented + + lines, offset = other + # check offsets before computing len(lines) or len(self) + if offset != -1 and offset != self.offset: + return False - expect = starmap(Line.view, enumerate(self, offset)) - # xref py310+: use strict=True - return all(starmap(Line.__eq__, zip(expect, lines))) + if len(lines) == self.length: + # match the lines one by one, possibly using a rich comparison + expect = self.__lines_iterator(0, self.length) + return all(map(operator.__eq__, expect, lines)) return False def __lt__(self, other: object, /) -> bool: - # nothing can be a strict subset of the empty block - if not self: - return False - if isinstance(other, self.__class__): # more efficient to first check if the indices are valid before checking the lines if _can_be_strict_in(self.offset, self.length, other.offset, other.length): return self.buffer < other.buffer return False - if isinstance(other, (list, tuple)): - lines, offset = self.__cast(other) - if not _can_be_strict_in(self.offset, self.length, offset, len(lines)): - return False + other = self.__parse_non_block(other) + if other is None: + return NotImplemented - expect = list(starmap(Line.view, enumerate(self, self.offset))) - for candidate in windowed(lines, self.length): - # xref py310+: use strict=True - if all(starmap(Line.__eq__, zip(expect, candidate))): - return True + lines, offset = other + max_length = len(lines) + if self.length >= max_length: + # By Dirichlet's box principle, *other* must have strictly more + # items than *self* for the latter to be strictly contained. return False - # other types are not supported for comparison - return NotImplemented + # convert this block into its lines so that we use a rich comparison + # with the items in *other* (we do not know their exact type) + actual = self.__lines(0, self.length) + + if offset != -1: + if _can_be_strict_in(self.offset, self.length, offset, max_length): + return actual < lines + return False + + # we want to find this block in the *other* block (at any place) + for start in range(max_length - self.length + 1): + region = itertools.islice(lines, start, start + self.length) + if all(map(operator.__eq__, actual, region)): + return True + return False def __gt__(self, other: object, /) -> bool: if isinstance(other, self.__class__): return other < self - if isinstance(other, (list, tuple)): - expecting, offset = self.__cast(other) - batchsize = len(expecting) - if not _can_be_strict_in(offset, batchsize, self.offset, self.length): - return False + other = self.__parse_non_block(other) + if other is None: + return NotImplemented - for batch in windowed(self, batchsize): - candidate = starmap(Line.view, enumerate(batch, offset)) - if all(actual == expect for actual, expect in zip(candidate, expecting)): - return True + # nothing can be a strict subset of the empty block (this check + # must be done *after* we decided whether *other* is correct) + if not self: return False - # other types are not supported for comparison - return NotImplemented - - # exposed :class:`tuple` interface - - def index( - self, - value: object, - start: SupportsIndex = 0, - stop: SupportsIndex = sys.maxsize, - /, - ) -> int: - """See :meth:`tuple.count`.""" - return self.buffer.index(value, start, stop) - - def count(self, value: object, /) -> int: - """See :meth:`tuple.count`.""" - return self.buffer.count(value) - - def __cast( - self, other: Sequence[object] | tuple[Sequence[object], int], / - ) -> tuple[Sequence[object], int]: - """Try to parse *object* as a pair ``(lines, block offset)``.""" + lines, other_offset = other + other_length = len(lines) + + if self.length <= other_length: + # By Dirichlet's box principle, *self* must have strictly more + # items than *other* for the latter to be strictly contained. + return False + + if other_offset != -1: + # we want to find *other* at a given offset + if _can_be_strict_in(other_offset, other_length, self.offset, self.length): + # dispatch to C implementation of list.__lt__ + actual = self.__lines(other_offset, other_length) + return actual > lines + + # we want to find *other* in this block (at any place) + for start in range(self.length - other_length + 1): + if self.__lines(start, other_length) > lines: + return True + return False + + # Do not annotate with list[Line] since otherwise mypy complains + # when comparing with a right-hand side that is a list of objects. + def __lines(self, start: int, count: int) -> list[object]: + """Same as :func:`__lines_iterator` but return a list instead.""" + return list(self.__lines_iterator(start, count)) + + def __lines_iterator(self, start: int, count: int) -> Iterator[Line]: + """Yield some lines in this block as :class:`Line` objects.""" + region = itertools.islice(self.buffer, start, start + count) + for index, line in enumerate(region, self.offset + start): + yield Line(line, index, _check=False) + + def __parse_non_block(self, other: object, /) -> tuple[list[object], int] | None: + """Try to parse *other* as a pair ``(block lines, block offset)``. + + For efficiency, do *not* call this method on :class:`Block` instances + since they can be handled separately more efficiently. + """ + if not isinstance(other, Sequence): + return None + + # given as (lines, offset) with lines = sequence of line-like objects if len(other) == 2 and isinstance(other[0], Sequence) and isinstance(other[1], int): - # mypy does not know how to deduce that the lenght is 2 - if isinstance(other, str): - msg = f'expecting a sequence of lines, got: {other!r}' - raise ValueError(msg) - return other[0], other[1] - return other, self.offset + if isinstance(other[0], str): + return None + # mypy does not know how to deduce that the length is 2 + return list(other[0]), other[1] + return list(other), -1 def _can_be_strict_in(i1: int, l1: int, i2: int, l2: int) -> bool: diff --git a/sphinx/testing/_matcher/cleaner.py b/sphinx/testing/_matcher/cleaner.py index 86bc09ebeb1..a6b3f29853e 100644 --- a/sphinx/testing/_matcher/cleaner.py +++ b/sphinx/testing/_matcher/cleaner.py @@ -55,7 +55,10 @@ def clean_lines(lines: Iterable[str], /, **options: Unpack[Options]) -> Iterable flavor = get_option(options, 'flavor') lines = prune(lines, delete, flavor=flavor) - return ignore_lines(lines, get_option(options, 'ignore')) + ignore = get_option(options, 'ignore') + lines = ignore_lines(lines, ignore) + + return lines def strip_ansi(text: str, /, ctrl: bool = False, color: bool = False) -> str: @@ -76,24 +79,14 @@ def strip_chars(text: str, chars: StripChars = True, /) -> str: """Strip expected characters from *text*.""" if isinstance(chars, bool): return text.strip() if chars else text - - if isinstance(chars, str) or chars is None: - return text.strip(chars) - - msg = 'expecting a boolean, a string or None for %r, got: %r' % ('strip', chars) - raise ValueError(msg) + return text.strip(chars) def strip_lines(lines: Iterable[str], chars: StripChars = True, /) -> Iterable[str]: """Call :meth:`str.strip` to each line in *lines*.""" if isinstance(chars, bool): return map(str.strip, lines) if chars else lines - - if isinstance(chars, str) or chars is None: - return (line.strip(chars) for line in lines) - - msg = 'expecting a boolean, a string or None for %r, got: %r' % ('stripline', chars) - raise ValueError(msg) + return (line.strip(chars) for line in lines) def filter_lines( diff --git a/sphinx/testing/_matcher/engine.py b/sphinx/testing/_matcher/engine.py index 8c76b1f0137..19dd9b08711 100644 --- a/sphinx/testing/_matcher/engine.py +++ b/sphinx/testing/_matcher/engine.py @@ -25,18 +25,18 @@ def _check_flavor(flavor: Flavor) -> None: # fmt: off @overload -def to_line_patterns(expect: str, *, optimized: bool = True) -> tuple[str]: ... # NoQA: E704 +def to_line_patterns(expect: str, *, optimized: bool = False) -> tuple[str]: ... # NoQA: E704 @overload # NoQA: E302 def to_line_patterns( # NoQA: E704 - expect: re.Pattern[str], *, optimized: bool = True + expect: re.Pattern[str], *, optimized: bool = False ) -> tuple[re.Pattern[str]]: ... @overload # NoQA: E302 def to_line_patterns( # NoQA: E704 - expect: Iterable[LinePattern], *, optimized: bool = True + expect: Iterable[LinePattern], *, optimized: bool = False ) -> tuple[LinePattern, ...]: ... # fmt: on def to_line_patterns( # NoQA: E302 - expect: LinePattern | Iterable[LinePattern], *, optimized: bool = True + expect: LinePattern | Iterable[LinePattern], *, optimized: bool = False ) -> Sequence[LinePattern]: """Get a read-only sequence of line-matching patterns. @@ -55,7 +55,7 @@ def key(x: str | re.Pattern[str]) -> str: return x if isinstance(x, str) else x.pattern if optimized: - return sorted(set(expect), key=key) + return tuple(sorted(set(expect), key=key)) return tuple(expect) @@ -82,11 +82,11 @@ def to_block_pattern(expect: LinePattern | Iterable[LinePattern]) -> Sequence[Li # fmt: off @overload -def transform(fn: Callable[[str], str], x: str) -> str: ... # NoQA: E704 +def transform(fn: Callable[[str], str], x: str, /) -> str: ... # NoQA: E704 @overload -def transform(fn: Callable[[str], str], x: re.Pattern[str]) -> re.Pattern[str]: ... # NoQA: E704 +def transform(fn: Callable[[str], str], x: re.Pattern[str], /) -> re.Pattern[str]: ... # NoQA: E704 # fmt: on -def transform(fn: Callable[[str], str], x: LinePattern) -> LinePattern: # NoQA: E302 +def transform(fn: Callable[[str], str], x: LinePattern, /) -> LinePattern: # NoQA: E302 """Transform regular expressions, leaving compiled patterns untouched.""" return fn(x) if isinstance(x, str) else x @@ -99,7 +99,7 @@ def translate(patterns: Iterable[LinePattern], *, flavor: Flavor) -> Iterable[Li Usage:: - patterns = list(_translate(['a*', re.compile('b')], flavor='fnmatch')) + patterns = list(translate(['a*', re.compile('b')], flavor='fnmatch')) patterns == ['(?:a.*)\\Z', re.compile('b')] """ _check_flavor(flavor) diff --git a/sphinx/testing/_matcher/options.py b/sphinx/testing/_matcher/options.py index 5a02c408623..1cab24119d3 100644 --- a/sphinx/testing/_matcher/options.py +++ b/sphinx/testing/_matcher/options.py @@ -29,6 +29,7 @@ OptionName = Literal[FlagOption, StripOption, DeleteOption, FilteringOption, FlavorOption] DT = TypeVar('DT') + _OptionsView = Union['Options', 'CompleteOptions'] @final @@ -38,12 +39,14 @@ class Options(TypedDict, total=False): Some options directly act on the original string (e.g., :attr:`strip`), while others (e.g., :attr:`stripline`) act on the lines obtained after splitting the (transformed) original string. + + .. seealso:: :mod:`sphinx.testing._matcher.cleaner` """ color: bool """Indicate whether to keep the ANSI escape sequences for colors. - The default value is ``False``. + The default value is ``True``. """ ctrl: bool @@ -122,11 +125,6 @@ class Options(TypedDict, total=False): This transformation is applied at the end of the transformation chain, just before filtering the output lines are filtered with the :attr:`ignore` predicate. - - Example:: - - clean('abcdA\n1', delete='abcd') == ['A', '1'] - clean('1234A\nxyzt', delete=r'\d+', flavor='re') == ['A', 'xyzt'] """ ignore: LinePredicate | None @@ -179,9 +177,9 @@ class CompleteOptions(TypedDict): DEFAULT_OPTIONS: Final[CompleteOptions] = CompleteOptions( - color=False, + color=True, ctrl=True, - strip=True, + strip=False, stripline=False, keepends=False, empty=True, @@ -193,12 +191,7 @@ class CompleteOptions(TypedDict): ) """The default (read-only) options values.""" -if TYPE_CHECKING: - _OptionsView = Union[Options, CompleteOptions] - -# Disable the ruff formatter to minimize the number of empty lines. -# # When an option is added, add an overloaded definition # so that mypy can correctly deduce the option's type. # diff --git a/sphinx/testing/_matcher/util.py b/sphinx/testing/_matcher/util.py index 47dc14351e0..16f3803cb3a 100644 --- a/sphinx/testing/_matcher/util.py +++ b/sphinx/testing/_matcher/util.py @@ -40,7 +40,7 @@ def consume(iterator: Iterator[object], /, n: int | None = None) -> None: def unique_justseen(iterable: Iterable[_T], /) -> Iterator[_T]: - """Yields elements in order, ignoring serial duplicates. + """Yield elements in order, ignoring serial duplicates. Credits go to :func:`!more_itertools.recipes.unique_justseen`. """ @@ -48,7 +48,7 @@ def unique_justseen(iterable: Iterable[_T], /) -> Iterator[_T]: def unique_everseen(iterable: Iterable[_T], /) -> Iterator[_T]: - """Yields elements in order, ignoring duplicates. + """Yield elements in order, ignoring duplicates. Credits go to :func:`!more_itertools.recipes.unique_everseen`. """ diff --git a/sphinx/testing/matcher.py b/sphinx/testing/matcher.py index a9dfd3d93b6..92ead634233 100644 --- a/sphinx/testing/matcher.py +++ b/sphinx/testing/matcher.py @@ -30,7 +30,7 @@ class LineMatcher: __slots__ = ('_content', '_options', '_stack') - def __init__(self, content: str | StringIO, /, **options: Unpack[Options]) -> None: + def __init__(self, content: str | StringIO = '', /, **options: Unpack[Options]) -> None: """Construct a :class:`LineMatcher` for the given string content. :param content: The source string. @@ -42,16 +42,20 @@ def __init__(self, content: str | StringIO, /, **options: Unpack[Options]) -> No # stack of cached cleaned lines (with a possible indirection) self._stack: list[int | Block | None] = [None] + def feed(self, content: str | StringIO) -> None: + self._content = content if isinstance(content, str) else content.getvalue() + self._stack = [None] + @classmethod - def from_lines( - cls, lines: Iterable[str], sep: str = '\n', /, **options: Unpack[Options] - ) -> Self: + def from_lines(cls, lines: Iterable[str] = (), /, **options: Unpack[Options]) -> Self: """Construct a :class:`LineMatcher` object from a list of lines. This is typically useful when writing tests for :class:`LineMatcher` since writing the lines instead of a long string is usually cleaner. """ - return cls(sep.join(lines), **options) + keepends = get_option(options, 'keepends') + glue = '' if keepends else '\n' + return cls(glue.join(lines), **options) def __iter__(self) -> Iterator[Line]: """An iterator on the cached lines.""" @@ -138,7 +142,7 @@ def iterfind( When one or more patterns are given, the order of evaluation is the same as they are given (or arbitrary if they are given in a set). """ - patterns = engine.to_line_patterns(expect) + patterns = engine.to_line_patterns(expect, optimized=True) matchers = [pattern.match for pattern in self.__compile(patterns, flavor=flavor)] def predicate(line: Line) -> bool: @@ -170,9 +174,12 @@ def iterfind_blocks( objects as they could be interpreted as a line or a block pattern. """ - patterns = engine.to_block_pattern(expect) - lines = self.lines() + # early abort if there are no lines to match + if not lines: + return + + patterns = engine.to_block_pattern(expect) # early abort if there are more expected lines than actual ones if (width := len(patterns)) > len(lines): return @@ -207,7 +214,7 @@ def assert_match( :param count: If specified, the exact number of matching lines. :param flavor: Optional temporary flavor for string patterns. """ - patterns = engine.to_line_patterns(expect) + patterns = engine.to_line_patterns(expect, optimized=True) self._assert_found('line', patterns, count=count, flavor=flavor) def assert_no_match( @@ -224,7 +231,7 @@ def assert_no_match( :param context: Number of lines to print around a failing line. :param flavor: Optional temporary flavor for string patterns. """ - patterns = engine.to_line_patterns(expect) + patterns = engine.to_line_patterns(expect, optimized=True) self._assert_not_found('line', patterns, context_size=context, flavor=flavor) def assert_lines( @@ -309,12 +316,17 @@ def _assert_not_found( flavor: Flavor | None = None, ) -> None: lines = self.lines() - if (count := len(patterns)) > len(lines): + # early abort if there are no lines to match + if not lines: + return + + # early abort if there are more lines to match than available + if (window_size := len(patterns)) > len(lines): return compiled_patterns = self.__compile(patterns, flavor=flavor) - for start, block in enumerate(util.windowed(lines, count)): + for start, block in enumerate(util.windowed(lines, window_size)): if all(pattern.match(line) for pattern, line in zip(compiled_patterns, block)): pat = util.prettify_patterns(patterns, sort=pattern_type == 'line') block_object = Block(block, start, _check=False) diff --git a/tests/test_testing/test_matcher.py b/tests/test_testing/test_matcher.py index 55947fc1ca1..240e3ced7c2 100644 --- a/tests/test_testing/test_matcher.py +++ b/tests/test_testing/test_matcher.py @@ -144,7 +144,7 @@ def parse_excinfo(excinfo: ExceptionInfo[AssertionError]) -> list[str]: def test_matcher_cache(): source = [term.blue('hello'), '', 'world'] # keep colors and empty lines - matcher = LineMatcher.from_lines(source, color=True, empty=True) + matcher = LineMatcher.from_lines(source) stack = matcher._stack assert len(stack) == 1 @@ -222,6 +222,19 @@ def test_assert_match(): matcher.assert_match('.+', flavor='re') matcher.assert_match('[abcd]', flavor='fnmatch') + matcher = LineMatcher() + matcher.feed('') + with pytest.raises(AssertionError, match=r'(?s:.+not found in.+)'): + matcher.assert_match('.+', flavor='re') + + matcher.feed('') + with pytest.raises(AssertionError, match=r'(?s:.+not found in.+)'): + matcher.assert_match('.*', flavor='re') + + matcher = LineMatcher.from_lines(['\n']) + assert matcher.lines() == [''] + matcher.assert_match('.*', flavor='re') + @pytest.mark.parametrize( ('lines', 'pattern', 'flavor', 'expect'), diff --git a/tests/test_testing/test_matcher_buffer.py b/tests/test_testing/test_matcher_buffer.py index 0aeef43d446..522bab670e6 100644 --- a/tests/test_testing/test_matcher_buffer.py +++ b/tests/test_testing/test_matcher_buffer.py @@ -1,5 +1,6 @@ from __future__ import annotations +import operator import re from typing import TYPE_CHECKING @@ -11,11 +12,11 @@ from collections.abc import Sequence from typing import Any - from sphinx.testing._matcher.buffer import ComparableView + from sphinx.testing._matcher.buffer import SourceView @pytest.mark.parametrize('cls', [Line, Block]) -def test_offset_value(cls: type[ComparableView[Any]]) -> None: +def test_offset_value(cls: type[SourceView[Any]]) -> None: with pytest.raises(TypeError, match=re.escape('offset must be an integer, got: None')): cls('', None) # type: ignore[arg-type] @@ -23,31 +24,6 @@ def test_offset_value(cls: type[ComparableView[Any]]) -> None: cls('', -1) -def test_line_constructor(): - empty = Line() - assert empty.buffer == '' - assert empty.offset == 0 - - with pytest.raises(TypeError, match=re.escape('expecting a native string, got: 0')): - Line(0, 1) # type: ignore[arg-type] - - with pytest.raises(TypeError, match=re.escape('expecting a native string, got: %r' % '')): - Line(type('', (str,), {})(), 1) - - -def test_line_arithmetic(): - l1, l2 = Line('a', 1), Line('b', 1) - assert l1 + l2 == Line('ab', 1) - - match = re.escape('cannot concatenate lines with different offsets') - with pytest.raises(ValueError, match=match): - Line('a', 1) + Line('b', 2) - - assert Line('a', 1) * 3 == Line('aaa', 1) - with pytest.raises(TypeError): - Line() * object() - - def test_line_comparison_operators(): assert Line('a', 1) == 'a' assert Line('a', 1) == ('a', 1) @@ -84,15 +60,79 @@ def test_line_comparison_operators(): assert Line('ab', 1) <= Line('ab', 1) -@pytest.mark.parametrize('bad_line', [1234, type('', (str,), {})()]) -def test_block_constructor(bad_line): +def test_empty_line(): + assert Line() == '' + assert Line() == ['', 0] + + assert Line() != ['', 1] + assert Line() != ['a'] + assert Line() != ['a', 0] + assert Line() != object() + + assert Line() <= '' + assert Line() <= 'a' + assert Line() <= ['a', 0] + assert Line() <= Line('a', 0) + + assert Line() < 'a' + assert Line() < ['a', 0] + assert Line() < Line('a', 0) + + # do not simplify these expressions + assert not operator.__lt__(Line(), '') + assert not operator.__lt__(Line(), ['', 0]) + assert not operator.__lt__(Line(), Line()) + + assert not operator.__gt__(Line(), '') + assert not operator.__gt__(Line(), ['', 0]) + assert not operator.__gt__(Line(), Line()) + + +@pytest.mark.parametrize('operand', [[], [Line()], [Line(), 0], [[chr(1), chr(2)], 0]]) +def test_line_unsupported_operators(operand): + for dispatcher in [operator.__lt__, operator.__le__, operator.__ge__, operator.__gt__]: + pytest.raises(TypeError, dispatcher, Line(), operand) + + assert Line() != operand + + +def test_block_constructor(): empty = Block() assert empty.buffer == () assert empty.offset == 0 - match = re.escape(f'expecting a native string, got: {bad_line!r}') + match = re.escape('expecting a native string, got: 1234') with pytest.raises(TypeError, match=match): - Block([bad_line]) + Block([1234]) # type: ignore[list-item] + + +def test_empty_block(): + assert Block() == [] + assert Block() == [[], 0] + + assert Block() != [[], 1] + assert Block() != ['a'] + assert Block() != [['a'], 0] + assert Block() != object() + + assert Block() <= [] + assert Block() <= ['a'] + assert Block() <= [['a'], 0] + assert Block() <= [[Line('a', 0)], 0] + + assert Block() < ['a'] + assert Block() < [['a'], 0] + assert Block() < [[Line('a', 0)], 0] + + # do not simplify these expressions + assert not operator.__lt__(Block(), []) + assert not operator.__lt__(Block(), [[], 0]) + + assert not operator.__gt__(Block(), []) + assert not operator.__gt__(Block(), ['a']) + assert not operator.__gt__(Block(), [['a'], 0]) + assert not operator.__gt__(Block(), [[('a', 0)], 0]) + assert not operator.__gt__(Block(), [[Line('a', 0)], 0]) @pytest.mark.parametrize( @@ -100,6 +140,7 @@ def test_block_constructor(bad_line): [ (['a', 'b', 'c'], 'd', ('a', 'b', 'c')), (['a', 'b', 'c'], 'd', ('a', ('b', 2), Line('c', 3))), + (['a', 'b', 'c'], 'd', ('a', ['b', 2], Line('c', 3))), ], ) def test_block_comparison_operators( @@ -127,6 +168,14 @@ def test_block_comparison_operators( assert Block([*lines, foreign], 1) > [expect, 1] +@pytest.mark.parametrize('operand', [{1, 2, 3}]) +def test_block_unsupported_operators(operand): + for dispatcher in [operator.__lt__, operator.__le__, operator.__ge__, operator.__gt__]: + pytest.raises(TypeError, dispatcher, Block(), operand) + + assert Block() != operand + + def test_block_slice_context(): assert Block(['a', 'b'], 1).context(delta=4, limit=5) == (slice(0, 1), slice(3, 5)) assert Block(['a', 'b'], 3).context(delta=2, limit=9) == (slice(1, 3), slice(5, 7)) From 3695617364573da673046fae513d0a62bc39b0e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 2 Apr 2024 14:29:55 +0200 Subject: [PATCH 16/66] fixup --- sphinx/testing/matcher.py | 6 ++++++ tests/test_testing/test_matcher_options.py | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/sphinx/testing/matcher.py b/sphinx/testing/matcher.py index 92ead634233..f50f48efe92 100644 --- a/sphinx/testing/matcher.py +++ b/sphinx/testing/matcher.py @@ -52,6 +52,12 @@ def from_lines(cls, lines: Iterable[str] = (), /, **options: Unpack[Options]) -> This is typically useful when writing tests for :class:`LineMatcher` since writing the lines instead of a long string is usually cleaner. + + The lines are glued together according to whether line breaks, + which can be specified by the keyword argument *keepends*. + + By default, the lines are assumed *not* to have line breaks (since + this is usually what is the most common). """ keepends = get_option(options, 'keepends') glue = '' if keepends else '\n' diff --git a/tests/test_testing/test_matcher_options.py b/tests/test_testing/test_matcher_options.py index 130407e29dd..2680b828e9e 100644 --- a/tests/test_testing/test_matcher_options.py +++ b/tests/test_testing/test_matcher_options.py @@ -32,10 +32,10 @@ def check(option: OptionName, default: object) -> None: assert options[option] == default processed.add(option) - check('color', False) + check('color', True) check('ctrl', True) - check('strip', True) + check('strip', False) check('stripline', False) check('keepends', False) From c8cb941097298a5a736a9488ff3e6d0b192b9a7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 2 Apr 2024 17:00:48 +0200 Subject: [PATCH 17/66] fix bugs --- sphinx/testing/_matcher/cleaner.py | 35 +++--- sphinx/testing/_matcher/engine.py | 62 +++++---- sphinx/testing/_matcher/options.py | 134 +++++++++++--------- sphinx/testing/_matcher/util.py | 9 +- sphinx/testing/matcher.py | 140 ++++++++++----------- tests/test_testing/test_matcher.py | 6 +- tests/test_testing/test_matcher_engine.py | 83 ++++++++++++ tests/test_testing/test_matcher_options.py | 23 ++-- 8 files changed, 304 insertions(+), 188 deletions(-) create mode 100644 tests/test_testing/test_matcher_engine.py diff --git a/sphinx/testing/_matcher/cleaner.py b/sphinx/testing/_matcher/cleaner.py index a6b3f29853e..36bd11097f5 100644 --- a/sphinx/testing/_matcher/cleaner.py +++ b/sphinx/testing/_matcher/cleaner.py @@ -8,7 +8,6 @@ from typing import TYPE_CHECKING from sphinx.testing._matcher import engine, util -from sphinx.testing._matcher.options import get_option from sphinx.util.console import strip_colors, strip_control_sequences if TYPE_CHECKING: @@ -30,32 +29,37 @@ def clean_text(text: str, /, **options: Unpack[Options]) -> Iterable[str]: - """Clean a text.""" - ctrl = get_option(options, 'ctrl') - color = get_option(options, 'color') + """Clean a text, returning an iterable of lines.""" + ctrl = options.get('ctrl', True) + color = options.get('color', True) text = strip_ansi(text, ctrl=ctrl, color=color) - text = strip_chars(text, get_option(options, 'strip')) - lines = text.splitlines(get_option(options, 'keepends')) + strip = options.get('strip', False) + text = strip_chars(text, strip) + + keepends = options.get('keepends', False) + lines = text.splitlines(keepends) + return clean_lines(lines, **options) def clean_lines(lines: Iterable[str], /, **options: Unpack[Options]) -> Iterable[str]: - """Clean a list of lines.""" - lines = strip_lines(lines, get_option(options, 'stripline')) + """Clean an iterable of lines.""" + stripline = options.get('stripline', False) + lines = strip_lines(lines, stripline) # Removing empty lines first ensures that serial duplicates can # be eliminated in one cycle. Inverting the order of operations # is not possible since empty lines may 'hide' duplicated lines. - empty = get_option(options, 'empty') - compress = get_option(options, 'compress') - unique = get_option(options, 'unique') + empty = options.get('empty', True) + compress = options.get('compress', False) + unique = options.get('unique', False) lines = filter_lines(lines, empty=empty, compress=compress, unique=unique) - delete = get_option(options, 'delete') - flavor = get_option(options, 'flavor') + delete = options.get('delete', ()) + flavor = options.get('flavor', 'none') lines = prune(lines, delete, flavor=flavor) - ignore = get_option(options, 'ignore') + ignore = options.get('ignore', None) lines = ignore_lines(lines, ignore) return lines @@ -177,8 +181,7 @@ def prune( trace.append(entry) yield res """ - # keep the order in which patterns are evaluated and possible duplicates - delete_patterns = engine.to_line_patterns(delete, optimized=False) + delete_patterns = engine.to_line_patterns(delete) patterns = engine.translate(delete_patterns, flavor=flavor) # ensure that we are using the beginning of the string (this must # be done *after* the regular expression translation, since fnmatch diff --git a/sphinx/testing/_matcher/engine.py b/sphinx/testing/_matcher/engine.py index 19dd9b08711..b29fa1894f3 100644 --- a/sphinx/testing/_matcher/engine.py +++ b/sphinx/testing/_matcher/engine.py @@ -4,10 +4,11 @@ import fnmatch import re +from collections.abc import Sequence, Set from typing import TYPE_CHECKING, overload if TYPE_CHECKING: - from collections.abc import Callable, Iterable, Sequence + from collections.abc import Callable, Iterable from typing import TypeVar from sphinx.testing._matcher.options import Flavor @@ -17,45 +18,49 @@ def _check_flavor(flavor: Flavor) -> None: - allowed = ('none', 'fnmatch', 're') + allowed: Sequence[Flavor] = ('none', 'fnmatch', 're') if flavor not in allowed: msg = f'unknown flavor: {flavor!r} (choose from {tuple(map(repr, allowed))})' raise ValueError(msg) +def _sort_pattern(s: str | re.Pattern[str]) -> tuple[str, int, int]: + if isinstance(s, str): + return (s, -1, -1) + return (s.pattern, s.flags, s.groups) + + # fmt: off @overload -def to_line_patterns(expect: str, *, optimized: bool = False) -> tuple[str]: ... # NoQA: E704 -@overload # NoQA: E302 -def to_line_patterns( # NoQA: E704 - expect: re.Pattern[str], *, optimized: bool = False -) -> tuple[re.Pattern[str]]: ... -@overload # NoQA: E302 -def to_line_patterns( # NoQA: E704 - expect: Iterable[LinePattern], *, optimized: bool = False -) -> tuple[LinePattern, ...]: ... +def to_line_patterns(expect: str) -> tuple[str]: ... # NoQA: E704 +@overload +def to_line_patterns(expect: re.Pattern[str]) -> tuple[re.Pattern[str]]: ... # NoQA: E704 +@overload +def to_line_patterns(expect: Iterable[LinePattern], /) -> tuple[LinePattern, ...]: ... # NoQA: E704 # fmt: on -def to_line_patterns( # NoQA: E302 - expect: LinePattern | Iterable[LinePattern], *, optimized: bool = False -) -> Sequence[LinePattern]: +def to_line_patterns(expect: LinePattern | Iterable[LinePattern]) -> Sequence[LinePattern]: # NoqA: E302 """Get a read-only sequence of line-matching patterns. :param expect: One or more patterns a line should match. - :param optimized: If true, patterns are sorted and duplicates are removed. :return: The possible line patterns. - By convention, + By convention,:: to_line_patterns("my pattern") == to_line_patterns(["my pattern"]) + + .. note:: + + The order in which the patterns are given is retained, except for + iterables that do not have an ordering (e.g., :class:`set`). For + such collections, patterns are ordered accroding to their string + representation, :class:`flags ` and capture groups. """ if isinstance(expect, (str, re.Pattern)): return (expect,) - def key(x: str | re.Pattern[str]) -> str: - return x if isinstance(x, str) else x.pattern + if isinstance(expect, Set): + return tuple(sorted(expect, key=_sort_pattern)) - if optimized: - return tuple(sorted(set(expect), key=key)) return tuple(expect) @@ -67,16 +72,25 @@ def to_block_pattern(expect: re.Pattern[str]) -> tuple[re.Pattern[str]]: ... # @overload def to_block_pattern(expect: Sequence[LinePattern]) -> Sequence[LinePattern]: ... # NoQA: E704 # fmt: on -def to_block_pattern(expect: LinePattern | Iterable[LinePattern]) -> Sequence[LinePattern]: # NoQA: E302 - """Get a read-only sequence for a s single block pattern. +def to_block_pattern(expect: LinePattern | Sequence[LinePattern]) -> Sequence[LinePattern]: # NoQA: E302 + r"""Get a read-only sequence for a s single block pattern. - :param expect: A single string, a single pattern or one or more patterns. + :param expect: A string, :class:`~re.Pattern` or a sequence thereof. :return: The line patterns of the block. + :raise TypeError: The type of *expect* is not supported. + + When *expect* is a single string, it is split into lines to produce + the corresponding block pattern, e.g.:: + + to_block_pattern('line1\nline2') == ('line1', 'line2') """ if isinstance(expect, str): return tuple(expect.splitlines()) if isinstance(expect, re.Pattern): return (expect,) + if not isinstance(expect, Sequence): + msg = f'expecting a sequence of patterns, got: {expect!r}' + raise TypeError(msg) return tuple(expect) @@ -117,4 +131,4 @@ def compile(patterns: Iterable[LinePattern], *, flavor: Flavor) -> Sequence[re.P patterns = translate(patterns, flavor=flavor) # mypy does not like map + re.compile() although it is correct but # this is likely due to https://github.com/python/mypy/issues/11880 - return [re.compile(pattern) for pattern in patterns] + return tuple(re.compile(pattern) for pattern in patterns) diff --git a/sphinx/testing/_matcher/options.py b/sphinx/testing/_matcher/options.py index 1cab24119d3..684970d3bd7 100644 --- a/sphinx/testing/_matcher/options.py +++ b/sphinx/testing/_matcher/options.py @@ -2,11 +2,15 @@ __all__ = ('Options',) +import contextlib +from types import MappingProxyType from typing import TYPE_CHECKING, TypedDict, final, overload if TYPE_CHECKING: - from collections.abc import Callable, Sequence - from typing import Final, Literal, TypeVar, Union + from collections.abc import Callable, Generator, Mapping, Sequence + from typing import ClassVar, Literal, TypeVar, Union + + from typing_extensions import Unpack from sphinx.testing._matcher.util import LinePattern @@ -176,59 +180,77 @@ class CompleteOptions(TypedDict): flavor: Flavor -DEFAULT_OPTIONS: Final[CompleteOptions] = CompleteOptions( - color=True, - ctrl=True, - strip=False, - stripline=False, - keepends=False, - empty=True, - compress=False, - unique=False, - delete=(), - ignore=None, - flavor='none', -) -"""The default (read-only) options values.""" - - -# When an option is added, add an overloaded definition -# so that mypy can correctly deduce the option's type. -# -# fmt: off -# boolean-like options -@overload -def get_option(options: _OptionsView, name: FlagOption, /) -> bool: ... # NoQA: E704 -@overload -def get_option(options: _OptionsView, name: FlagOption, default: DT, /) -> bool | DT: ... # NoQA: E704 -# strip-like options -@overload -def get_option(options: _OptionsView, name: StripOption, /) -> StripChars: ... # NoQA: E704 -@overload -def get_option(options: _OptionsView, name: StripOption, default: DT, /) -> StripChars | DT: ... # NoQA: E501, E704 -# delete prefix/suffix option -@overload -def get_option(options: _OptionsView, name: DeleteOption, /) -> DeletePattern: ... # NoQA: E704 -@overload -def get_option(options: _OptionsView, name: DeleteOption, default: DT, /) -> DeletePattern | DT: ... # NoQA: E501, E704 -# filtering options -@overload -def get_option(options: _OptionsView, name: FilteringOption, /) -> LinePredicate | None: ... # NoQA: E704 -@overload -def get_option(options: _OptionsView, name: FilteringOption, default: DT, /) -> LinePredicate | None | DT: ... # NoQA: E501, E704 -# miscellaneous options -@overload -def get_option(options: _OptionsView, name: FlavorOption, /) -> Flavor: ... # NoQA: E704 -@overload -def get_option(options: _OptionsView, name: FlavorOption, default: DT, /) -> Flavor | DT: ... # NoQA: E704 -# fmt: on -def get_option(options: _OptionsView, name: OptionName, /, *default: DT) -> object | DT: # NoQA: E302 - """Get an option value or *default*. - - If *default* is not specified, an internal default value is returned. +class Configurable: + """Mixin supporting a known set of options.""" - :meta private: + __slots__ = ('_options',) + + default_options: ClassVar[CompleteOptions] = CompleteOptions( + color=True, + ctrl=True, + strip=False, + stripline=False, + keepends=False, + empty=True, + compress=False, + unique=False, + delete=(), + ignore=None, + flavor='none', + ) + """The default options to use. + + Subclasses should override this field for different default options. """ - if name in options: - return options[name] - return default[0] if default else DEFAULT_OPTIONS[name] + + def __init__(self, /, *args: object, **options: Unpack[Options]) -> None: + # always complete the set of options for this object + self._options = options + + @property + def options(self) -> Mapping[str, object]: # cannot use CompleteOptions :( + """A read-only view on the current mapping of options.""" + return MappingProxyType(self._options) + + @contextlib.contextmanager + def use(self, /, **options: Unpack[Options]) -> Generator[None, None, None]: + """Temporarily replace the set of options with *options*.""" + local_options = self.default_options | options + with self.override(**local_options): + yield + + @contextlib.contextmanager + def override(self, /, **options: Unpack[Options]) -> Generator[None, None, None]: + """Temporarily extend the set of options with *options*.""" + saved_options = self._options.copy() + self._options |= options + try: + yield + finally: + self._options = saved_options + + # When an option is added, add an overloaded definition + # so that mypy can correctly deduce the option's type. + # + # fmt: off + # boolean-like options + @overload + def get_option(self, name: FlagOption, /) -> bool: ... # NoQA: E704 + # strip-like options + @overload + def get_option(self, name: StripOption, /) -> StripChars: ... # NoQA: E704 + # delete prefix/suffix option + @overload + def get_option(self, name: DeleteOption, /) -> DeletePattern: ... # NoQA: E704 + # filtering options + @overload + def get_option(self, name: FilteringOption, /) -> LinePredicate | None: ... # NoQA: E704 + # miscellaneous options + @overload + def get_option(self, name: FlavorOption, /) -> Flavor: ... # NoQA: E704 + # fmt: on + def get_option(self, name: OptionName, /) -> object: # NoQA: E301 + """Get a known option value, or its default value.""" + if name in self._options: + return self._options[name] + return self.default_options[name] diff --git a/sphinx/testing/_matcher/util.py b/sphinx/testing/_matcher/util.py index 16f3803cb3a..5b65e2fbd80 100644 --- a/sphinx/testing/_matcher/util.py +++ b/sphinx/testing/_matcher/util.py @@ -59,11 +59,16 @@ def unique_everseen(iterable: Iterable[_T], /) -> Iterator[_T]: yield element -def windowed(iterable: Iterable[_T], n: int, /) -> Iterator[deque[_T]]: +def windowed(iterable: Iterable[_T], n: int, /) -> Iterator[Sequence[_T]]: """Return a sliding window of width *n* over the given iterable. - Credits go to :func:`!more_itertools.more.windowed`. + Credits go to :func:`!more_itertools.more.windowed` but slightly + differs in the sense that if *n* is *0*, then an empty iterator + is returned. """ + if n == 0: + return + iterator = iter(iterable) window = deque(itertools.islice(iterator, n), maxlen=n) if len(window) == n: diff --git a/sphinx/testing/matcher.py b/sphinx/testing/matcher.py index f50f48efe92..c7cfdda776c 100644 --- a/sphinx/testing/matcher.py +++ b/sphinx/testing/matcher.py @@ -3,13 +3,12 @@ __all__ = ('Options', 'LineMatcher') import contextlib -from itertools import starmap -from types import MappingProxyType -from typing import TYPE_CHECKING +import itertools +from typing import TYPE_CHECKING, cast from sphinx.testing._matcher import cleaner, engine, util from sphinx.testing._matcher.buffer import Block, Line -from sphinx.testing._matcher.options import DEFAULT_OPTIONS, Options, get_option +from sphinx.testing._matcher.options import Configurable, Options if TYPE_CHECKING: from collections.abc import Collection, Generator, Iterable, Iterator, Sequence @@ -19,32 +18,30 @@ from typing_extensions import Self, Unpack - from sphinx.testing._matcher.options import CompleteOptions, Flavor + from sphinx.testing._matcher.options import Flavor from sphinx.testing._matcher.util import LinePattern PatternType = Literal['line', 'block'] -class LineMatcher: +class LineMatcher(Configurable): """Helper object for matching output lines.""" - __slots__ = ('_content', '_options', '_stack') + __slots__ = ('_content', '_stack') - def __init__(self, content: str | StringIO = '', /, **options: Unpack[Options]) -> None: + # make sure to have an independent object + default_options = Configurable.default_options.copy() + + def __init__(self, content: str | StringIO, /, **options: Unpack[Options]) -> None: """Construct a :class:`LineMatcher` for the given string content. :param content: The source string. :param options: The matcher options. """ self._content = content if isinstance(content, str) else content.getvalue() - # always complete the set of options for this object - self._options: CompleteOptions = DEFAULT_OPTIONS | options # stack of cached cleaned lines (with a possible indirection) self._stack: list[int | Block | None] = [None] - - def feed(self, content: str | StringIO) -> None: - self._content = content if isinstance(content, str) else content.getvalue() - self._stack = [None] + super().__init__(**options) @classmethod def from_lines(cls, lines: Iterable[str] = (), /, **options: Unpack[Options]) -> Self: @@ -59,50 +56,27 @@ def from_lines(cls, lines: Iterable[str] = (), /, **options: Unpack[Options]) -> By default, the lines are assumed *not* to have line breaks (since this is usually what is the most common). """ - keepends = get_option(options, 'keepends') + # only compute the default options if needeed ('keepends' is a boolean) + keepends = options.get('keepends') or cls.default_options['keepends'] glue = '' if keepends else '\n' return cls(glue.join(lines), **options) def __iter__(self) -> Iterator[Line]: """An iterator on the cached lines.""" - return starmap(Line.view, enumerate(self.lines())) - - @property - def content(self) -> str: - """The raw content.""" - return self._content - - @property - def options(self) -> CompleteOptions: - """Return a *read-only* view on the (complete) set of options. - - The runtime type of this field is a :class:`!MappingProxyType` and - protects against *runtime* destructive operations (which would not - have been the case solely with a type annotation). - """ - return MappingProxyType(self._options) # type: ignore[return-value] - - @contextlib.contextmanager - def use(self, /, **options: Unpack[Options]) -> Generator[None, None, None]: - """Temporarily set the set of options for this object to *options*. - - If an option is not specified in *options*, its default value is used. - """ - local_options = DEFAULT_OPTIONS | options - with self.override(**local_options): - yield + # we do not use Line.view to avoid checking the type of each line + yield from (Line(s, i, _check=False) for i, s in enumerate(self.lines())) @contextlib.contextmanager def override(self, /, **options: Unpack[Options]) -> Generator[None, None, None]: - """Temporarily extend the set of options for this object using *options*.""" - saved_options = self._options.copy() - self._options |= options self._stack.append(None) # prepare the next cache entry - try: + with super().override(**options): yield - finally: - self._stack.pop() # pop the cached lines for this scope - self._options = saved_options + self._stack.pop() # pop the cached lines + + @property + def content(self) -> str: + """The raw content.""" + return self._content def lines(self) -> Block: """The content lines, cleaned up according to the current options. @@ -116,14 +90,27 @@ def lines(self) -> Block: if cached is None: # compute for the first time the value - cached = Block(cleaner.clean_text(self.content, **self.options)) + options = self.default_options | cast(Options, self.options) + # use the *same* type as a block's buffer to speed-up the Block's constructor + lines = tuple(cleaner.clean_text(self.content, **options)) # check if the value is the same as any of a previously cached value - for addr, value in enumerate(stack): - if value == cached: - stack[-1] = addr # indirection - return cached + for addr, value in enumerate(itertools.islice(stack, 0, len(stack) - 1)): + if isinstance(value, int): + cached = cast(Block, stack[value]) + assert isinstance(cached.buffer, tuple) + if cached.buffer == lines: + # compare only the lines (C interface) + stack[-1] = value # indirection + return cached + + if isinstance(value, Block): + assert isinstance(value.buffer, tuple) + if value.buffer == lines: + stack[-1] = addr # indirection + return value + # the value did not exist yet, so we store it at most once - stack[-1] = cached + stack[-1] = cached = Block(lines, _check=False) return cached if isinstance(cached, int): @@ -148,11 +135,13 @@ def iterfind( When one or more patterns are given, the order of evaluation is the same as they are given (or arbitrary if they are given in a set). """ - patterns = engine.to_line_patterns(expect, optimized=True) - matchers = [pattern.match for pattern in self.__compile(patterns, flavor=flavor)] + patterns = engine.to_line_patterns(expect) + compiled_patterns = set(self.__compile(patterns, flavor=flavor)) + matchers = {pattern.match for pattern in compiled_patterns} def predicate(line: Line) -> bool: - return any(matcher(line.buffer) for matcher in matchers) + text = line.buffer + return any(matcher(text) for matcher in matchers) yield from filter(predicate, self) @@ -180,21 +169,24 @@ def iterfind_blocks( objects as they could be interpreted as a line or a block pattern. """ - lines = self.lines() - # early abort if there are no lines to match - if not lines: + # in general, the patterns are smaller than the lines + # so we expect the following to be more efficient than + # cleaning up the whole text source + patterns = engine.to_block_pattern(expect) + if not patterns: # no pattern to locate return - patterns = engine.to_block_pattern(expect) - # early abort if there are more expected lines than actual ones - if (width := len(patterns)) > len(lines): + lines: Sequence[str] = self.lines() + if not lines: # no line to match return - compiled_patterns = self.__compile(patterns, flavor=flavor) + if (width := len(patterns)) > len(lines): # too many lines to match + return + compiled_patterns = self.__compile(patterns, flavor=flavor) block_iterator = enumerate(util.windowed(lines, width)) for start, block in block_iterator: - # check if the block matches the pattern line by line + # check if the block matches the patterns line by line if all(pattern.match(line) for pattern, line in zip(compiled_patterns, block)): yield Block(block, start, _check=False) # Consume the iterator so that the next block consists @@ -220,7 +212,7 @@ def assert_match( :param count: If specified, the exact number of matching lines. :param flavor: Optional temporary flavor for string patterns. """ - patterns = engine.to_line_patterns(expect, optimized=True) + patterns = engine.to_line_patterns(expect) self._assert_found('line', patterns, count=count, flavor=flavor) def assert_no_match( @@ -237,7 +229,7 @@ def assert_no_match( :param context: Number of lines to print around a failing line. :param flavor: Optional temporary flavor for string patterns. """ - patterns = engine.to_line_patterns(expect, optimized=True) + patterns = engine.to_line_patterns(expect) self._assert_not_found('line', patterns, context_size=context, flavor=flavor) def assert_lines( @@ -296,7 +288,7 @@ def _assert_found( if next(blocks, None): return - keepends = get_option(self.options, 'keepends') + keepends = self.get_option('keepends') ctx = util.highlight(self.lines(), keepends=keepends) pat = util.prettify_patterns(patterns, sort=pattern_type == 'line') logs = [f'{pattern_type} pattern', pat, 'not found in', ctx] @@ -306,7 +298,7 @@ def _assert_found( if (found := len(indices)) == count: return - keepends = get_option(self.options, 'keepends') + keepends = self.get_option('keepends') ctx = util.highlight(self.lines(), indices, keepends=keepends) pat = util.prettify_patterns(patterns, sort=pattern_type == 'line') noun = util.plural_form(pattern_type, count) @@ -321,9 +313,11 @@ def _assert_not_found( context_size: int, flavor: Flavor | None = None, ) -> None: - lines = self.lines() - # early abort if there are no lines to match - if not lines: + if not patterns: # no pattern to find + return + + lines: Sequence[str] = self.lines() + if not lines: # no lines to match return # early abort if there are more lines to match than available @@ -343,5 +337,5 @@ def _assert_not_found( def __compile( self, patterns: Iterable[LinePattern], *, flavor: Flavor | None ) -> Sequence[Pattern[str]]: - flavor = get_option(self.options, 'flavor') if flavor is None else flavor + flavor = self.get_option('flavor') if flavor is None else flavor return engine.compile(patterns, flavor=flavor) diff --git a/tests/test_testing/test_matcher.py b/tests/test_testing/test_matcher.py index 240e3ced7c2..83276a82e23 100644 --- a/tests/test_testing/test_matcher.py +++ b/tests/test_testing/test_matcher.py @@ -143,7 +143,6 @@ def parse_excinfo(excinfo: ExceptionInfo[AssertionError]) -> list[str]: def test_matcher_cache(): source = [term.blue('hello'), '', 'world'] - # keep colors and empty lines matcher = LineMatcher.from_lines(source) stack = matcher._stack @@ -222,12 +221,11 @@ def test_assert_match(): matcher.assert_match('.+', flavor='re') matcher.assert_match('[abcd]', flavor='fnmatch') - matcher = LineMatcher() - matcher.feed('') + matcher = LineMatcher('') with pytest.raises(AssertionError, match=r'(?s:.+not found in.+)'): matcher.assert_match('.+', flavor='re') - matcher.feed('') + matcher = LineMatcher('') with pytest.raises(AssertionError, match=r'(?s:.+not found in.+)'): matcher.assert_match('.*', flavor='re') diff --git a/tests/test_testing/test_matcher_engine.py b/tests/test_testing/test_matcher_engine.py new file mode 100644 index 00000000000..90b32d5a135 --- /dev/null +++ b/tests/test_testing/test_matcher_engine.py @@ -0,0 +1,83 @@ +from __future__ import annotations + +import fnmatch +import random +import re + +import pytest + +from sphinx.testing._matcher import engine + + +def test_line_pattern(): + assert engine.to_line_patterns('a') == ('a',) + assert engine.to_line_patterns(['a', 'b']) == ('a', 'b') + + p = re.compile('b') + assert engine.to_line_patterns(p) == (p,) + assert engine.to_line_patterns(['a', p]) == ('a', p) + + # ensure build reproducibility + assert engine.to_line_patterns({'a', p}) == ('a', p) + + p1 = re.compile('a') + p2 = re.compile('a', re.MULTILINE) + p3 = re.compile('ab') + p4 = re.compile('ab', re.MULTILINE) + ps = (p1, p2, p3, p4) + + for _ in range(100): + random_patterns = list(ps) + random.shuffle(random_patterns) + patterns: set[str | re.Pattern[str]] = {*random_patterns, 'a'} + patterns.update(random_patterns) + assert engine.to_line_patterns(patterns) == ('a', p1, p2, p3, p4) + assert engine.to_line_patterns(frozenset(patterns)) == ('a', p1, p2, p3, p4) + + +def test_block_patterns(): + assert engine.to_block_pattern('a\nb\nc') == ('a', 'b', 'c') + + p = re.compile('a') + assert engine.to_block_pattern(p) == (p,) + + assert engine.to_block_pattern(['a', p]) == ('a', p) + + pytest.raises(TypeError, engine.to_block_pattern, {'a'}) + pytest.raises(TypeError, engine.to_block_pattern, {'a', p}) + + +def test_transform_expressions(): + fn = '^'.__add__ + assert engine.transform(fn, 'a') == '^a' + + p = re.compile('') + assert engine.transform(fn, p) is p + + +def test_translate_expressions(): + string, pattern = 'a*', re.compile('.*') + inputs = (string, pattern) + + expect = [re.escape(string), pattern] + assert list(engine.translate(inputs, flavor='none')) == expect + + expect = [string, pattern] + assert list(engine.translate(inputs, flavor='re')) == expect + + expect = [fnmatch.translate(string), pattern] + assert list(engine.translate(inputs, flavor='fnmatch')) == expect + + +def test_compile_patterns(): + string = 'a*' + compiled = re.compile('.*') + + expect = (re.compile(re.escape(string)), compiled) + assert engine.compile([string, compiled], flavor='none') == expect + + expect = (re.compile(fnmatch.translate(string)), compiled) + assert engine.compile([string, compiled], flavor='fnmatch') == expect + + expect = (re.compile(string), compiled) + assert engine.compile([string, compiled], flavor='re') == expect diff --git a/tests/test_testing/test_matcher_options.py b/tests/test_testing/test_matcher_options.py index 2680b828e9e..6baf677a2fa 100644 --- a/tests/test_testing/test_matcher_options.py +++ b/tests/test_testing/test_matcher_options.py @@ -2,34 +2,31 @@ from typing import TYPE_CHECKING -import pytest - -from sphinx.testing._matcher.options import DEFAULT_OPTIONS, CompleteOptions, Options -from sphinx.testing.matcher import LineMatcher +from sphinx.testing._matcher.options import CompleteOptions, Configurable, Options if TYPE_CHECKING: - from collections.abc import Mapping - from sphinx.testing._matcher.options import OptionName def test_options_class(): # ensure that the classes are kept synchronized missing_keys = Options.__annotations__.keys() - CompleteOptions.__annotations__ - assert not missing_keys, f'missing fields in proxy class: {", ".join(missing_keys)}' + assert not missing_keys, f'missing option(s): {", ".join(missing_keys)}' foreign_keys = CompleteOptions.__annotations__.keys() - Options.__annotations__ - assert not missing_keys, f'foreign fields in proxy class: {", ".join(foreign_keys)}' + assert not foreign_keys, f'unknown option(s): {", ".join(foreign_keys)}' -@pytest.mark.parametrize('options', [DEFAULT_OPTIONS, LineMatcher('').options]) -def test_matcher_default_options(options: Mapping[str, object]) -> None: +def test_matcher_default_options(): """Check the synchronization of default options and classes in Sphinx.""" + default_options = Configurable.default_options.copy() + processed = set() def check(option: OptionName, default: object) -> None: - assert option in options - assert options[option] == default + assert option not in processed + assert option in default_options + assert default_options[option] == default processed.add(option) check('color', True) @@ -48,5 +45,5 @@ def check(option: OptionName, default: object) -> None: check('flavor', 'none') - # check that there are no left over options + # check that there are no leftover options assert sorted(processed) == sorted(Options.__annotations__) From 83b5003481197f7c8cd11fe855458bf9f44a2d8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 2 Apr 2024 17:53:14 +0200 Subject: [PATCH 18/66] fix bugs --- sphinx/testing/_matcher/cleaner.py | 22 ++++++---- sphinx/testing/_matcher/engine.py | 40 +++++++++++++----- sphinx/testing/matcher.py | 7 +++- tests/test_testing/test_matcher.py | 50 +++++++++++++++-------- tests/test_testing/test_matcher_engine.py | 4 +- 5 files changed, 84 insertions(+), 39 deletions(-) diff --git a/sphinx/testing/_matcher/cleaner.py b/sphinx/testing/_matcher/cleaner.py index 36bd11097f5..431429ff933 100644 --- a/sphinx/testing/_matcher/cleaner.py +++ b/sphinx/testing/_matcher/cleaner.py @@ -2,7 +2,9 @@ __all__ = () +import fnmatch import itertools +import re from functools import reduce from itertools import filterfalse from typing import TYPE_CHECKING @@ -11,7 +13,6 @@ from sphinx.util.console import strip_colors, strip_control_sequences if TYPE_CHECKING: - import re from collections.abc import Iterable, Sequence from typing import TypeVar @@ -182,12 +183,19 @@ def prune( yield res """ delete_patterns = engine.to_line_patterns(delete) - patterns = engine.translate(delete_patterns, flavor=flavor) - # ensure that we are using the beginning of the string (this must - # be done *after* the regular expression translation, since fnmatch - # patterns do not support 'start of the string' syntax) - patterns = (engine.transform(lambda p: rf'^{p}', p) for p in patterns) - compiled = engine.compile(patterns, flavor='re') + # Since fnmatch-style patterns do not support a meta-character for + # matching at the start of the string, we first translate patterns + # and then add an explicit '^' character in the regular expression. + patterns = engine.translate( + delete_patterns, + flavor=flavor, + default_translate=re.escape, + fnmatch_translate=lambda prefix: fnmatch.translate(prefix).rstrip(r'\Z$'), + ) + # and now we add the '^' meta-character to ensure that we only match + # at the beginning of the string and not in the middle of the string + patterns = (engine.transform('^'.__add__, pattern) for pattern in patterns) + compiled = [re.compile(pattern) for pattern in patterns] def prune_redux(line: str, pattern: re.Pattern[str]) -> str: return pattern.sub('', line) diff --git a/sphinx/testing/_matcher/engine.py b/sphinx/testing/_matcher/engine.py index b29fa1894f3..56d098f6a14 100644 --- a/sphinx/testing/_matcher/engine.py +++ b/sphinx/testing/_matcher/engine.py @@ -105,30 +105,50 @@ def transform(fn: Callable[[str], str], x: LinePattern, /) -> LinePattern: # No return fn(x) if isinstance(x, str) else x -def translate(patterns: Iterable[LinePattern], *, flavor: Flavor) -> Iterable[LinePattern]: +def string_expression(line: str, /) -> str: + """A regular expression matching exactly *line*.""" + return rf'^(?s:{re.escape(line)})\Z' + + +def translate( + patterns: Iterable[LinePattern], + *, + flavor: Flavor, + default_translate: Callable[[str], str] = string_expression, + fnmatch_translate: Callable[[str], str] = fnmatch.translate, +) -> Iterable[LinePattern]: r"""Translate regular expressions in *patterns* according to *flavor*. :param patterns: An iterable of patterns to translate if needed. + :param flavor: The regex pattern to use. + :param default_translate: Translation function for ``'none'`` flavor. + :param fnmatch_translate: Translation function for ``'fnmatch'`` flavor. :return: An iterable of :class:`re`-style patterns. - - Usage:: - - patterns = list(translate(['a*', re.compile('b')], flavor='fnmatch')) - patterns == ['(?:a.*)\\Z', re.compile('b')] """ _check_flavor(flavor) if flavor == 'none': - return (transform(re.escape, pattern) for pattern in patterns) + return (transform(default_translate, pattern) for pattern in patterns) if flavor == 'fnmatch': - return (transform(fnmatch.translate, pattern) for pattern in patterns) + return (transform(fnmatch_translate, pattern) for pattern in patterns) return patterns -def compile(patterns: Iterable[LinePattern], *, flavor: Flavor) -> Sequence[re.Pattern[str]]: +def compile( + patterns: Iterable[LinePattern], + *, + flavor: Flavor, + default_translate: Callable[[str], str] = string_expression, + fnmatch_translate: Callable[[str], str] = fnmatch.translate, +) -> Sequence[re.Pattern[str]]: """Compile one or more patterns into :class:`~re.Pattern` objects.""" - patterns = translate(patterns, flavor=flavor) + patterns = translate( + patterns, + flavor=flavor, + default_translate=default_translate, + fnmatch_translate=fnmatch_translate, + ) # mypy does not like map + re.compile() although it is correct but # this is likely due to https://github.com/python/mypy/issues/11880 return tuple(re.compile(pattern) for pattern in patterns) diff --git a/sphinx/testing/matcher.py b/sphinx/testing/matcher.py index c7cfdda776c..b4777bc06c4 100644 --- a/sphinx/testing/matcher.py +++ b/sphinx/testing/matcher.py @@ -136,6 +136,9 @@ def iterfind( same as they are given (or arbitrary if they are given in a set). """ patterns = engine.to_line_patterns(expect) + if not patterns: # nothinig to match + return + compiled_patterns = set(self.__compile(patterns, flavor=flavor)) matchers = {pattern.match for pattern in compiled_patterns} @@ -280,7 +283,7 @@ def _assert_found( patterns: Sequence[LinePattern], *, count: int | None, - flavor: Flavor | None = None, + flavor: Flavor | None, ) -> None: blocks = self.iterfind_blocks(patterns, flavor=flavor) @@ -311,7 +314,7 @@ def _assert_not_found( patterns: Sequence[LinePattern], *, context_size: int, - flavor: Flavor | None = None, + flavor: Flavor | None, ) -> None: if not patterns: # no pattern to find return diff --git a/tests/test_testing/test_matcher.py b/tests/test_testing/test_matcher.py index 83276a82e23..4d283358c8c 100644 --- a/tests/test_testing/test_matcher.py +++ b/tests/test_testing/test_matcher.py @@ -12,7 +12,7 @@ from sphinx.testing.matcher import LineMatcher if TYPE_CHECKING: - from collections.abc import Sequence + from collections.abc import Collection, Sequence from _pytest._code import ExceptionInfo @@ -183,38 +183,52 @@ def test_matcher_cache(): assert stack[1] == ('hello', '', 'world') -def test_matcher_find(): - lines = ['hello', 'world', 'yay', '!', '!', '!'] - matcher = LineMatcher.from_lines(lines, flavor='none') - assert matcher.find({'hello', 'yay'}) == [('hello', 0), ('yay', 2)] - - -def test_matcher_find_blocks(): - lines = ['hello', 'world', 'yay', 'hello', 'world', '!', 'yay'] - matcher = LineMatcher.from_lines(lines) - assert matcher.find_blocks(['hello', 'world']) == [ - [('hello', 0), ('world', 1)], - [('hello', 3), ('world', 4)], - ] - - @pytest.mark.parametrize( ('lines', 'flavor', 'pattern', 'expect'), [ + ([], 'none', [], []), + (['a'], 'none', '', []), + (['a'], 'none', [], []), (['1', 'b', '3', 'a', '5', '!'], 'none', ('a', 'b'), [('b', 1), ('a', 3)]), (['blbl', 'yay', 'hihi', '^o^'], 'fnmatch', '*[ao]*', [('yay', 1), ('^o^', 3)]), (['111', 'hello', 'world', '222'], 're', r'\d+', [('111', 0), ('222', 3)]), + (['hello', 'world', 'yay'], 'none', {'hello', 'yay'}, [('hello', 0), ('yay', 2)]), + (['hello', 'world', 'yay'], 'fnmatch', {'hello', 'y*y'}, [('hello', 0), ('yay', 2)]), + (['hello', 'world', 'yay'], 're', {'hello', r'^y\wy$'}, [('hello', 0), ('yay', 2)]), ], ) -def test_matcher_flavor( +def test_matcher_find( lines: list[str], flavor: Flavor, - pattern: Sequence[LinePattern], + pattern: Collection[LinePattern], expect: Sequence[tuple[str, int]], ) -> None: matcher = LineMatcher.from_lines(lines, flavor=flavor) assert matcher.find(pattern) == expect + matcher = LineMatcher.from_lines(lines, flavor='none') + assert matcher.find(pattern, flavor=flavor) == expect + + +def test_matcher_find_blocks(): + lines = ['hello', 'world', 'yay', 'hello', 'world', '!', 'yay'] + matcher = LineMatcher.from_lines(lines) + + assert matcher.find_blocks(['hello', 'world']) == [ + [('hello', 0), ('world', 1)], + [('hello', 3), ('world', 4)], + ] + + assert matcher.find_blocks(['hello', 'w[oO]rld'], flavor='fnmatch') == [ + [('hello', 0), ('world', 1)], + [('hello', 3), ('world', 4)], + ] + + assert matcher.find_blocks(['hello', r'^w[a-z]{2}\wd$'], flavor='re') == [ + [('hello', 0), ('world', 1)], + [('hello', 3), ('world', 4)], + ] + def test_assert_match(): matcher = LineMatcher.from_lines(['a', 'b', 'c', 'd']) diff --git a/tests/test_testing/test_matcher_engine.py b/tests/test_testing/test_matcher_engine.py index 90b32d5a135..bd72f6d71cd 100644 --- a/tests/test_testing/test_matcher_engine.py +++ b/tests/test_testing/test_matcher_engine.py @@ -59,7 +59,7 @@ def test_translate_expressions(): string, pattern = 'a*', re.compile('.*') inputs = (string, pattern) - expect = [re.escape(string), pattern] + expect = [rf'^(?s:{re.escape(string)})\Z', pattern] assert list(engine.translate(inputs, flavor='none')) == expect expect = [string, pattern] @@ -73,7 +73,7 @@ def test_compile_patterns(): string = 'a*' compiled = re.compile('.*') - expect = (re.compile(re.escape(string)), compiled) + expect = (re.compile(rf'^(?s:{re.escape(string)})\Z'), compiled) assert engine.compile([string, compiled], flavor='none') == expect expect = (re.compile(fnmatch.translate(string)), compiled) From 7edf27327f0efad786560779ca4252b76f08acd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 2 Apr 2024 18:42:34 +0200 Subject: [PATCH 19/66] simplify ANSI handling --- sphinx/testing/_matcher/cleaner.py | 43 +++++++--------------- sphinx/testing/_matcher/options.py | 32 +++++----------- tests/test_testing/test_matcher.py | 2 +- tests/test_testing/test_matcher_cleaner.py | 38 +++++++++---------- tests/test_testing/test_matcher_options.py | 5 +-- 5 files changed, 45 insertions(+), 75 deletions(-) diff --git a/sphinx/testing/_matcher/cleaner.py b/sphinx/testing/_matcher/cleaner.py index 431429ff933..2a67c9c1d35 100644 --- a/sphinx/testing/_matcher/cleaner.py +++ b/sphinx/testing/_matcher/cleaner.py @@ -10,7 +10,7 @@ from typing import TYPE_CHECKING from sphinx.testing._matcher import engine, util -from sphinx.util.console import strip_colors, strip_control_sequences +from sphinx.util.console import strip_escape_sequences if TYPE_CHECKING: from collections.abc import Iterable, Sequence @@ -31,9 +31,8 @@ def clean_text(text: str, /, **options: Unpack[Options]) -> Iterable[str]: """Clean a text, returning an iterable of lines.""" - ctrl = options.get('ctrl', True) - color = options.get('color', True) - text = strip_ansi(text, ctrl=ctrl, color=color) + if not options.get('ansi', True): + text = strip_escape_sequences(text) strip = options.get('strip', False) text = strip_chars(text, strip) @@ -48,17 +47,15 @@ def clean_lines(lines: Iterable[str], /, **options: Unpack[Options]) -> Iterable """Clean an iterable of lines.""" stripline = options.get('stripline', False) lines = strip_lines(lines, stripline) - # Removing empty lines first ensures that serial duplicates can - # be eliminated in one cycle. Inverting the order of operations - # is not possible since empty lines may 'hide' duplicated lines. - empty = options.get('empty', True) + + keep_empty = options.get('keep_empty', True) compress = options.get('compress', False) unique = options.get('unique', False) - lines = filter_lines(lines, empty=empty, compress=compress, unique=unique) + lines = filter_lines(lines, keep_empty=keep_empty, compress=compress, unique=unique) delete = options.get('delete', ()) flavor = options.get('flavor', 'none') - lines = prune(lines, delete, flavor=flavor) + lines = prune_lines(lines, delete, flavor=flavor) ignore = options.get('ignore', None) lines = ignore_lines(lines, ignore) @@ -66,20 +63,6 @@ def clean_lines(lines: Iterable[str], /, **options: Unpack[Options]) -> Iterable return lines -def strip_ansi(text: str, /, ctrl: bool = False, color: bool = False) -> str: - """Strip ANSI escape sequences. - - :param text: The text to clean. - :param ctrl: If true, remove non-color ANSI escape sequences. - :param color: If true, remove color ANSI escape sequences. - :return: The cleaned text. - """ - # non-color control sequences must be stripped before colors - text = text if ctrl else strip_control_sequences(text) - text = text if color else strip_colors(text) - return text - - def strip_chars(text: str, chars: StripChars = True, /) -> str: """Strip expected characters from *text*.""" if isinstance(chars, bool): @@ -98,14 +81,14 @@ def filter_lines( lines: Iterable[str], /, *, - empty: bool = False, + keep_empty: bool = True, compress: bool = False, unique: bool = False, ) -> Iterable[str]: """Filter the lines. :param lines: The lines to filter. - :param empty: If true, remove empty lines. + :param keep_empty: If true, keep empty lines. :param unique: If true, remove duplicated lines. :param compress: If true, remove consecutive duplicated lines. :return: An iterable of filtered lines. @@ -117,7 +100,7 @@ def filter_lines( lines = filterlines(lines, compress=True) lines = filterlines(lines, empty=True) """ - if not empty: + if not keep_empty: lines = filter(None, lines) if unique: @@ -140,7 +123,7 @@ def ignore_lines(lines: Iterable[str], predicate: LinePredicate | None, /) -> It return filterfalse(predicate, lines) if callable(predicate) else lines -def prune( +def prune_lines( lines: Iterable[str], delete: DeletePattern, /, @@ -158,10 +141,10 @@ def prune( Usage:: - lines = prune(['1111a', 'b'], r'\d+', flavor='re') + lines = prune_lines(['1111a', 'b'], r'\d+', flavor='re') assert list(lines) == ['a', 'b'] - lines = prune(['a123b', 'c123d'], re.compile(r'\d+')) + lines = prune_lines(['a123b', 'c123d'], re.compile(r'\d+')) assert list(lines) == ['ab', 'cd'] For debugging purposes, an empty list *trace* can be given diff --git a/sphinx/testing/_matcher/options.py b/sphinx/testing/_matcher/options.py index 684970d3bd7..e8e8d8b4f1b 100644 --- a/sphinx/testing/_matcher/options.py +++ b/sphinx/testing/_matcher/options.py @@ -14,7 +14,7 @@ from sphinx.testing._matcher.util import LinePattern - FlagOption = Literal['color', 'ctrl', 'keepends', 'empty', 'compress', 'unique'] + FlagOption = Literal['ansi', 'keepends', 'keep_empty', 'compress', 'unique'] StripOption = Literal['strip', 'stripline'] StripChars = Union[bool, str, None] @@ -47,14 +47,8 @@ class Options(TypedDict, total=False): .. seealso:: :mod:`sphinx.testing._matcher.cleaner` """ - color: bool - """Indicate whether to keep the ANSI escape sequences for colors. - - The default value is ``True``. - """ - - ctrl: bool - """Indicate whether to keep the non-color ANSI escape sequences. + ansi: bool + """Indicate whether to keep the ANSI escape sequences. The default value is ``True``. """ @@ -72,7 +66,7 @@ class Options(TypedDict, total=False): stripline: StripChars """Call :meth:`str.strip` on the lines obtained after splitting the source. - The allowed values for :attr:`strip` are: + The allowed values for :attr:`stripline` are: * ``True`` -- remove leading and trailing whitespaces. * ``False`` -- keep leading and trailing whitespaces (the default). @@ -85,8 +79,8 @@ class Options(TypedDict, total=False): The default value is ``False``. """ - empty: bool - """If true, keep empty lines in the output. + keep_empty: bool + """If false, eliminate empty lines in the output. The default value is ``True``. """ @@ -160,17 +154,12 @@ class CompleteOptions(TypedDict): :meta private: """ - # Whenever a new option in :class:`Options` is added, do not - # forget to add it here and in :data:`DEFAULT_OPTIONS`. - - color: bool - ctrl: bool - + ansi: bool strip: StripChars stripline: StripChars keepends: bool - empty: bool + keep_empty: bool compress: bool unique: bool @@ -186,12 +175,11 @@ class Configurable: __slots__ = ('_options',) default_options: ClassVar[CompleteOptions] = CompleteOptions( - color=True, - ctrl=True, + ansi=True, strip=False, stripline=False, keepends=False, - empty=True, + keep_empty=True, compress=False, unique=False, delete=(), diff --git a/tests/test_testing/test_matcher.py b/tests/test_testing/test_matcher.py index 4d283358c8c..db77d4f6074 100644 --- a/tests/test_testing/test_matcher.py +++ b/tests/test_testing/test_matcher.py @@ -173,7 +173,7 @@ def test_matcher_cache(): assert stack[0] is cached assert matcher.lines() is cached - with matcher.override(color=False): + with matcher.override(ansi=False): assert len(stack) == 2 assert stack[0] is cached assert stack[1] is None diff --git a/tests/test_testing/test_matcher_cleaner.py b/tests/test_testing/test_matcher_cleaner.py index 046996d3a43..337b1dec407 100644 --- a/tests/test_testing/test_matcher_cleaner.py +++ b/tests/test_testing/test_matcher_cleaner.py @@ -5,35 +5,35 @@ import pytest -from sphinx.testing._matcher import cleaner +from sphinx.testing._matcher.cleaner import filter_lines, prune_lines, strip_chars, strip_lines if TYPE_CHECKING: from collections.abc import Callable, Sequence def test_strip_chars(): - assert cleaner.strip_chars('abaaa\n') == 'abaaa' - assert cleaner.strip_chars('abaaa\n', False) == 'abaaa\n' - assert cleaner.strip_chars('abaaa', 'a') == 'b' - assert cleaner.strip_chars('abaaa', 'ab') == '' + assert strip_chars('abaaa\n') == 'abaaa' + assert strip_chars('abaaa\n', False) == 'abaaa\n' + assert strip_chars('abaaa', 'a') == 'b' + assert strip_chars('abaaa', 'ab') == '' def test_strip_lines(): - assert list(cleaner.strip_lines(['aba\n', 'aba\n'])) == ['aba', 'aba'] - assert list(cleaner.strip_lines(['aba\n', 'aba\n'], False)) == ['aba\n', 'aba\n'] - assert list(cleaner.strip_lines(['aba', 'aba'], 'a')) == ['b', 'b'] - assert list(cleaner.strip_lines(['aba', 'aba'], 'ab')) == ['', ''] + assert list(strip_lines(['aba\n', 'aba\n'])) == ['aba', 'aba'] + assert list(strip_lines(['aba\n', 'aba\n'], False)) == ['aba\n', 'aba\n'] + assert list(strip_lines(['aba', 'aba'], 'a')) == ['b', 'b'] + assert list(strip_lines(['aba', 'aba'], 'ab')) == ['', ''] def test_filter_lines(): src = ['a', 'a', '', 'a', 'b', 'c', 'a'] - assert list(cleaner.filter_lines(src, empty=False, compress=True)) == ['a', 'b', 'c', 'a'] - assert list(cleaner.filter_lines(src, empty=False, unique=True)) == ['a', 'b', 'c'] + assert list(filter_lines(src, keep_empty=False, compress=True)) == ['a', 'b', 'c', 'a'] + assert list(filter_lines(src, keep_empty=False, unique=True)) == ['a', 'b', 'c'] expect = ['a', '', 'a', 'b', 'c', 'a'] - assert list(cleaner.filter_lines(src, empty=True, compress=True)) == expect + assert list(filter_lines(src, keep_empty=True, compress=True)) == expect - assert list(cleaner.filter_lines(src, empty=True, unique=True)) == ['a', '', 'b', 'c'] + assert list(filter_lines(src, keep_empty=True, unique=True)) == ['a', '', 'b', 'c'] @pytest.fixture() @@ -43,7 +43,7 @@ def prune_trace_object() -> Callable[[], list[Sequence[tuple[str, Sequence[str]] def test_prune_prefix(prune_trace_object): trace = prune_trace_object() - lines = cleaner.prune(['1111a1', 'b1'], '1', flavor='none', trace=trace) + lines = prune_lines(['1111a1', 'b1'], '1', flavor='none', trace=trace) assert list(lines) == ['a1', 'b1'] assert trace == [ [ @@ -57,7 +57,7 @@ def test_prune_prefix(prune_trace_object): ] trace = prune_trace_object() - lines = cleaner.prune(['1111a1', 'b1'], r'\d+', flavor='re', trace=trace) + lines = prune_lines(['1111a1', 'b1'], r'\d+', flavor='re', trace=trace) assert list(lines) == ['a1', 'b1'] assert trace == [ [('1111a1', ['a1']), ('a1', ['a1'])], @@ -65,7 +65,7 @@ def test_prune_prefix(prune_trace_object): ] trace = prune_trace_object() - lines = cleaner.prune(['/a/b/c.txt', 'keep.py'], '*.txt', flavor='fnmatch', trace=trace) + lines = prune_lines(['/a/b/c.txt', 'keep.py'], '*.txt', flavor='fnmatch', trace=trace) assert list(lines) == ['', 'keep.py'] assert trace == [ [('/a/b/c.txt', ['']), ('', [''])], @@ -74,14 +74,14 @@ def test_prune_prefix(prune_trace_object): def test_prune_groups(prune_trace_object): - lines = cleaner.prune(['a123b', 'c123d'], re.compile(r'\d+')) + lines = prune_lines(['a123b', 'c123d'], re.compile(r'\d+')) assert list(lines) == ['ab', 'cd'] p1 = re.compile(r'\d\d') p2 = re.compile(r'\n+') trace = prune_trace_object() - lines = cleaner.prune(['a 123\n456x7\n8\n b'], [p1, p2], trace=trace) + lines = prune_lines(['a 123\n456x7\n8\n b'], [p1, p2], trace=trace) assert list(lines) == ['a x b'] assert len(trace) == 1 @@ -94,7 +94,7 @@ def test_prune_groups(prune_trace_object): assert trace[0][2] == ('a x b', ['a x b', 'a x b']) trace = prune_trace_object() - lines = cleaner.prune(['a 123\n456x7\n8\n b'], [p2, p1], trace=trace) + lines = prune_lines(['a 123\n456x7\n8\n b'], [p2, p1], trace=trace) assert list(lines) == ['a x b'] assert len(trace) == 1 diff --git a/tests/test_testing/test_matcher_options.py b/tests/test_testing/test_matcher_options.py index 6baf677a2fa..233b1e78a5b 100644 --- a/tests/test_testing/test_matcher_options.py +++ b/tests/test_testing/test_matcher_options.py @@ -29,14 +29,13 @@ def check(option: OptionName, default: object) -> None: assert default_options[option] == default processed.add(option) - check('color', True) - check('ctrl', True) + check('ansi', True) check('strip', False) check('stripline', False) check('keepends', False) - check('empty', True) + check('keep_empty', True) check('compress', False) check('unique', False) From dc37de5b94d8f91024161ecb375a62a7d2f32584 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 2 Apr 2024 19:27:00 +0200 Subject: [PATCH 20/66] remove complicated stuff --- sphinx/util/console.py | 53 +++----- tests/test_util/test_util_console.py | 173 ++++----------------------- 2 files changed, 34 insertions(+), 192 deletions(-) diff --git a/sphinx/util/console.py b/sphinx/util/console.py index 1e4ce4e810f..f4a748a0c97 100644 --- a/sphinx/util/console.py +++ b/sphinx/util/console.py @@ -45,36 +45,17 @@ def turquoise(text: str) -> str: ... # NoQA: E704 colorama = None _CSI: Final[str] = re.escape('\x1b[') # 'ESC [': Control Sequence Introducer -_OSC: Final[str] = re.escape('\x1b]') # 'ESC ]': Operating System Command -_BELL: Final[str] = re.escape('\x07') # bell command # ANSI escape sequences for colors _ansi_color_re: Final[re.Pattern[str]] = re.compile('\x1b.*?m') -# ANSI escape sequences supported by vt100 terminal (non-colors) -_ansi_other_re: Final[re.Pattern[str]] = re.compile( +# ANSI escape sequences +_ansi_re: Final[re.Pattern[str]] = re.compile( _CSI + r"""(?: - H # HOME - |\?\d+[hl] # enable/disable features (e.g., cursor, mouse, etc) - |[1-6] q # cursor shape (e.g., blink) (note the space before 'q') - |2?J # erase down (J) or clear screen (2J) - |\d*[ABCD] # cursor up/down/forward/backward - |\d+G # move to column - |(?:\d;)?\d+;\d+H # move to (x, y) + (\d\d;){0,2}\d\dm # ANSI color code |\dK # erase in line - ) | """ - + _OSC - + r"""(?: - \d;.+?\x07 # set window title - ) | """ - + _BELL, - re.VERBOSE | re.ASCII, -) - -# ANSI escape sequences -_ansi_re: Final[re.Pattern[str]] = re.compile( - ' | '.join((_ansi_color_re.pattern, _ansi_other_re.pattern)), + )""", re.VERBOSE | re.ASCII, ) @@ -153,20 +134,14 @@ def strip_colors(s: str) -> str: return _ansi_color_re.sub('', s) -def strip_control_sequences(text: str, /) -> str: - """Strip non-color escape sequences from *text*.""" - return _ansi_other_re.sub('', text) - - def strip_escape_sequences(text: str, /) -> str: - """Strip all control sequences from *text*.""" - # Remove control sequences first so that text of the form - # - # '\x1b[94m' + '\x1bA' + TEXT + '\x1b[0m' - # - # is cleaned to TEXT and not '' (otherwise '[94m\x1bAabc\x1b[0' - # is considered by :data:`_ansi_color_re` and removed altogther). - return strip_colors(strip_control_sequences(text)) + """Strip ANSI escape sequences from *text*. + + Note that this function only strips color sequences and ANSI erase in line + escape sequences. Other escape sequences are kept and any escape sequence + present in + """ + return _ansi_re.sub('', text) def create_color_func(name: str) -> None: @@ -199,9 +174,9 @@ def inner(text: str) -> str: ('lightgray', 'white'), ] -for i, (dark, light) in enumerate(_colors, 30): - codes[dark] = '\x1b[%im' % i - codes[light] = '\x1b[%im' % (i + 60) +for _i, (_dark, _light) in enumerate(_colors, 30): + codes[_dark] = '\x1b[%im' % _i + codes[_light] = '\x1b[%im' % (_i + 60) _orig_codes = codes.copy() diff --git a/tests/test_util/test_util_console.py b/tests/test_util/test_util_console.py index 4055d7d0235..5742556c198 100644 --- a/tests/test_util/test_util_console.py +++ b/tests/test_util/test_util_console.py @@ -1,169 +1,36 @@ from __future__ import annotations -import itertools +import string from typing import TYPE_CHECKING -import pytest - import sphinx.util.console as term -from sphinx.util.console import strip_colors, strip_control_sequences, strip_escape_sequences +from sphinx.util.console import strip_colors, strip_escape_sequences if TYPE_CHECKING: - from collections.abc import Sequence - from typing import Any, Final, TypeVar + from typing import TypeVar _T = TypeVar('_T') - Style = str - """An ANSI style (color or format) known by :mod:`sphinx.util.console`.""" - AnsiCode = str - """An ANSI escape sequence.""" - -ESC: Final[str] = '\x1b' -CSI: Final[str] = '\x1b[' -OSC: Final[str] = '\x1b]' -BELL: Final[str] = '\x07' - - -def osc_title(title: str) -> str: - """OSC string for changing the terminal title.""" - return f'{OSC}2;{title}{BELL}' - - -def insert_ansi(text: str, codes: Sequence[AnsiCode], *, reset: bool = False) -> str: - """Add ANSI escape sequences codes to *text*. - - If *reset* is True, the reset code is added at the end. - - :param text: The text to decorate. - :param codes: A list of ANSI esc. seq. to use deprived of their CSI prefix. - :param reset: Indicate whether to add the reset esc. seq. - :return: The decorated text. - """ - for code in codes: - text = f'{code}{text}' - if reset: - text = term.reset(text) - return text - - -def apply_style(text: str, codes: Sequence[AnsiCode | Style]) -> str: - """Apply one or more ANSI esc. seq. to *text*. - - Each item in *codes* can either be a color name (e.g., 'blue'), - a text decoration (e.g., 'blink') or an ANSI esc. seq. deprived - of its CSI prefix (e.g., '34m'). - """ - for code in codes: - if code in term.codes: - text = term.colorize(code, text) - else: - text = insert_ansi(text, [code]) - return text - - -def powerset( - elems: Sequence[_T], *, n: int | None = None, total: bool = True -) -> list[tuple[_T, ...]]: - r"""Generate the powerset over *seq*. - - :param elems: The elements to get the powerset over. - :param n: Optional maximum size of a subset. - :param total: If false, quotient the result by :math:`\mathfrak{S}_n`. - - Example: - ------- - - .. code-block:: python - - powerset([1, 2], total=True) - [(), (1,), (2,), (1, 2), (2, 1)] - - powerset([1, 2], total=False) - [(), (1,), (2,), (1, 2)] - """ - if n is None: - n = len(elems) - gen = itertools.permutations if total else itertools.combinations - return list(itertools.chain.from_iterable(gen(elems, i) for i in range(n + 1))) - - -@pytest.mark.parametrize('invariant', [ESC, CSI, OSC]) -def test_strip_invariants(invariant: str) -> None: - assert strip_colors(invariant) == invariant - assert strip_control_sequences(invariant) == invariant - assert strip_escape_sequences(invariant) == invariant - - -# some color/style codes to use (but not composed) -_STYLES: list[tuple[AnsiCode, ...]] = [ - *[(f'{CSI}{";".join(map(str, s))}m',) for s in [range(s) for s in range(4)]], - *powerset(['blue', 'bold']), -] -# some non-color ESC codes to use (will be composed) -_CNTRLS: list[tuple[AnsiCode, ...]] = powerset([f'{CSI}A', f'{CSI}0G', f'{CSI}1;20;128H']) - - -# For some reason that I (picnixz) do not understand, it is not possible to -# create a mark decorator using pytest.mark.parametrize.with_args(ids=...). -# -# As such, in order not to lose autocompletion from PyCharm, we will pass -# the custom id function to each call to `pytest.mark.parametrize`. -def _clean_id(value: Any) -> str: - if isinstance(value, str) and not value: - return '' - - if isinstance(value, (list, tuple)): - if not value: - return '()' - return '-'.join(map(_clean_id, value)) - - return repr(value) - - -@pytest.mark.parametrize('prefix', ['', 'raw'], ids=_clean_id) # non-formatted part -@pytest.mark.parametrize('source', ['', 'abc\ndef', BELL], ids=_clean_id) -@pytest.mark.parametrize('style', _STYLES, ids=_clean_id) -def test_strip_style(prefix: str, source: str, style: tuple[AnsiCode, ...]) -> None: - expect = prefix + source - pretty = prefix + apply_style(source, style) - assert strip_colors(pretty) == expect, (pretty, expect) - - -@pytest.mark.parametrize('prefix', ['', 'raw'], ids=_clean_id) # non-formatted part -@pytest.mark.parametrize('source', ['', 'abc\ndef'], ids=_clean_id) -@pytest.mark.parametrize('style', _STYLES, ids=_clean_id) -@pytest.mark.parametrize('cntrl', _CNTRLS, ids=_clean_id) -def test_strip_cntrl( - prefix: str, source: str, style: tuple[AnsiCode, ...], cntrl: tuple[AnsiCode, ...] -) -> None: - expect = pretty = prefix + apply_style(source, style) - # does nothing since there are only color sequences - assert strip_control_sequences(pretty) == expect, (pretty, expect) - - expect = prefix + source - pretty = prefix + insert_ansi(source, cntrl) - # all non-color codes are removed correctly - assert strip_control_sequences(pretty) == expect, (pretty, expect) +ERASE_IN_LINE = '\x1b[2K' -@pytest.mark.parametrize('prefix', ['', 'raw'], ids=_clean_id) # non-formatted part -@pytest.mark.parametrize('source', ['', 'abc\ndef'], ids=_clean_id) -@pytest.mark.parametrize('style', _STYLES, ids=_clean_id) -@pytest.mark.parametrize('cntrl', _CNTRLS, ids=_clean_id) -def test_strip_ansi( - prefix: str, source: str, style: tuple[AnsiCode, ...], cntrl: tuple[AnsiCode, ...] -) -> None: - expect = prefix + source +def test_strip_colors(): + s = 'hello world - ' + assert strip_colors(s) == s, s + assert strip_colors(term.blue(s)) == s + assert strip_colors(term.blue(s) + ERASE_IN_LINE) == s + ERASE_IN_LINE - with_style = prefix + apply_style(source, style) - assert strip_escape_sequences(with_style) == expect, (with_style, expect) + t = s + term.blue(s) + assert strip_colors(t + ERASE_IN_LINE) == s * 2 + ERASE_IN_LINE - with_cntrl = prefix + insert_ansi(source, cntrl) - assert strip_escape_sequences(with_cntrl) == expect, (with_cntrl, expect) + # this fails but this shouldn't :( + # assert strip_colors('a' + term.blue('b') + ERASE_IN_LINE + 'c' + term.blue('d')) == 'abcd' - composed = insert_ansi(with_style, cntrl) # add some cntrl sequences - assert strip_escape_sequences(composed) == expect, (composed, expect) - composed = apply_style(with_cntrl, style) # add some color sequences - assert strip_escape_sequences(composed) == expect, (composed, expect) +def test_strip_escape_sequences(): + s = string.printable + assert strip_escape_sequences(s) == s + assert strip_escape_sequences(term.blue(s)) == s + assert strip_escape_sequences(term.blue(s) + '\x1b[2K') == s + assert strip_escape_sequences(s + term.blue(s) + '\x1b[2K') == s * 2 + assert strip_escape_sequences(s + term.blue(s) + '\x1b[2K' + s) == s * 3 From 7ce24f35e8b90995957fc0216c1a306df2768b93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 2 Apr 2024 19:28:16 +0200 Subject: [PATCH 21/66] fixup --- sphinx/util/console.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sphinx/util/console.py b/sphinx/util/console.py index f4a748a0c97..5b91f7ccf4b 100644 --- a/sphinx/util/console.py +++ b/sphinx/util/console.py @@ -135,12 +135,7 @@ def strip_colors(s: str) -> str: def strip_escape_sequences(text: str, /) -> str: - """Strip ANSI escape sequences from *text*. - - Note that this function only strips color sequences and ANSI erase in line - escape sequences. Other escape sequences are kept and any escape sequence - present in - """ + """Strip ANSI escape sequences from *text*.""" return _ansi_re.sub('', text) From 846edef92eed4a7fb6693a230741893c49f65745 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 2 Apr 2024 19:29:52 +0200 Subject: [PATCH 22/66] fixup --- tests/test_util/test_util_console.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/test_util/test_util_console.py b/tests/test_util/test_util_console.py index 5742556c198..63361c0b40d 100644 --- a/tests/test_util/test_util_console.py +++ b/tests/test_util/test_util_console.py @@ -1,6 +1,5 @@ from __future__ import annotations -import string from typing import TYPE_CHECKING import sphinx.util.console as term @@ -28,9 +27,11 @@ def test_strip_colors(): def test_strip_escape_sequences(): - s = string.printable - assert strip_escape_sequences(s) == s + s = 'hello world - ' + assert strip_escape_sequences(s) == s, s assert strip_escape_sequences(term.blue(s)) == s - assert strip_escape_sequences(term.blue(s) + '\x1b[2K') == s - assert strip_escape_sequences(s + term.blue(s) + '\x1b[2K') == s * 2 - assert strip_escape_sequences(s + term.blue(s) + '\x1b[2K' + s) == s * 3 + assert strip_escape_sequences(term.blue(s) + ERASE_IN_LINE) == s + + t = s + term.blue(s) + assert strip_escape_sequences(t + ERASE_IN_LINE) == s * 2 + assert strip_escape_sequences(t + ERASE_IN_LINE + t + ERASE_IN_LINE) == s * 4 From 136ac6699185d871657924d8d913bb2703ee3733 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 3 Apr 2024 12:39:55 +0200 Subject: [PATCH 23/66] fixup --- sphinx/util/console.py | 60 ++++++++++++++----- tests/test_util/test_util_console.py | 88 +++++++++++++++++++++------- 2 files changed, 112 insertions(+), 36 deletions(-) diff --git a/sphinx/util/console.py b/sphinx/util/console.py index 5b91f7ccf4b..1cbd7a858be 100644 --- a/sphinx/util/console.py +++ b/sphinx/util/console.py @@ -46,18 +46,23 @@ def turquoise(text: str) -> str: ... # NoQA: E704 _CSI: Final[str] = re.escape('\x1b[') # 'ESC [': Control Sequence Introducer -# ANSI escape sequences for colors -_ansi_color_re: Final[re.Pattern[str]] = re.compile('\x1b.*?m') +# Pattern matching ANSI control sequences containing colors. +_ansi_color_re: Final[re.Pattern[str]] = re.compile(r'\x1b\[(?:\d+;){0,2}\d*m') -# ANSI escape sequences _ansi_re: Final[re.Pattern[str]] = re.compile( _CSI - + r"""(?: - (\d\d;){0,2}\d\dm # ANSI color code - |\dK # erase in line + + r""" + (?: + (?:\d+;){0,2}\d*m # ANSI color code ('m' is equivalent to '0m') + | + [012]?K # ANSI Erase in Line ('K' is equivalent to '0K') )""", re.VERBOSE | re.ASCII, ) +"""Pattern matching ANSI CSI colors (SGR) and erase line (EL) sequences. + +See :func:`strip_escape_sequences` for details. +""" codes: dict[str, str] = {} @@ -81,7 +86,7 @@ def term_width_line(text: str) -> str: return text + '\n' else: # codes are not displayed, this must be taken into account - return text.ljust(_tw + len(text) - len(_ansi_re.sub('', text))) + '\r' + return text.ljust(_tw + len(text) - len(strip_escape_sequences(text))) + '\r' def color_terminal() -> bool: @@ -129,13 +134,38 @@ def escseq(name: str) -> str: def strip_colors(s: str) -> str: - """Strip all color escape sequences from *s*.""" - # TODO: deprecate parameter *s* in favor of a positional-only parameter *text* + """Remove the ANSI color codes in a string *s*. + + .. caution:: + + This function is not meant to be used in production and should only + be used for testing Sphinx's output messages. + + .. seealso:: :func:`strip_control_sequences` + """ return _ansi_color_re.sub('', s) def strip_escape_sequences(text: str, /) -> str: - """Strip ANSI escape sequences from *text*.""" + r"""Remove the ANSI CSI colors and "erase in line" sequences. + + Other `escape sequences `__ (e.g., VT100-specific functions) are not + supported and only control sequences *natively* known to Sphinx (i.e., + colors declared in this module and "erase entire line" (``'\x1b[2K'``)) + are eliminated by this function. + + .. caution:: + + This function is not meant to be used in production and should only + be used for testing Sphinx's output messages that were not tempered + with by third-party extensions. + + .. versionadded:: 7.3 + + This function is added as an *experimental* feature. + + __ https://en.wikipedia.org/wiki/ANSI_escape_code + """ return _ansi_re.sub('', text) @@ -155,8 +185,8 @@ def inner(text: str) -> str: 'blink': '05m', } -for _name, _value in _attrs.items(): - codes[_name] = '\x1b[' + _value +for __name, __value in _attrs.items(): + codes[__name] = '\x1b[' + __value _colors = [ ('black', 'darkgray'), @@ -169,9 +199,9 @@ def inner(text: str) -> str: ('lightgray', 'white'), ] -for _i, (_dark, _light) in enumerate(_colors, 30): - codes[_dark] = '\x1b[%im' % _i - codes[_light] = '\x1b[%im' % (_i + 60) +for __i, (__dark, __light) in enumerate(_colors, 30): + codes[__dark] = '\x1b[%im' % __i + codes[__light] = '\x1b[%im' % (__i + 60) _orig_codes = codes.copy() diff --git a/tests/test_util/test_util_console.py b/tests/test_util/test_util_console.py index 63361c0b40d..bec4c91ace9 100644 --- a/tests/test_util/test_util_console.py +++ b/tests/test_util/test_util_console.py @@ -1,37 +1,83 @@ from __future__ import annotations +import itertools +import operator from typing import TYPE_CHECKING -import sphinx.util.console as term -from sphinx.util.console import strip_colors, strip_escape_sequences +import pytest + +from sphinx.util.console import blue, reset, strip_colors, strip_escape_sequences if TYPE_CHECKING: - from typing import TypeVar + from collections.abc import Callable, Sequence + from typing import Final, TypeVar _T = TypeVar('_T') -ERASE_IN_LINE = '\x1b[2K' +ERASE_IN_LINE: Final[str] = '\x1b[2K' +BELL_TEXT: Final[str] = '\x07 Hello world!' + + +@pytest.mark.parametrize( + ('strip_function', 'ansi_base_blocks', 'text_base_blocks'), + [ + ( + strip_colors, + # double ERASE_IN_LINE so that the tested strings may have 2 of them + [BELL_TEXT, blue(BELL_TEXT), reset(BELL_TEXT), ERASE_IN_LINE, ERASE_IN_LINE], + # :func:`strip_colors` removes color codes but keep ERASE_IN_LINE + [BELL_TEXT, BELL_TEXT, BELL_TEXT, ERASE_IN_LINE, ERASE_IN_LINE], + ), + ( + strip_escape_sequences, + # double ERASE_IN_LINE so that the tested strings may have 2 of them + [BELL_TEXT, blue(BELL_TEXT), reset(BELL_TEXT), ERASE_IN_LINE, ERASE_IN_LINE], + # :func:`strip_escape_sequences` strips ANSI codes known by Sphinx + [BELL_TEXT, BELL_TEXT, BELL_TEXT, '', ''], + ), + ], + ids=[strip_colors.__name__, strip_escape_sequences.__name__], +) +def test_strip_ansi( + strip_function: Callable[[str], str], + ansi_base_blocks: Sequence[str], + text_base_blocks: Sequence[str], +) -> None: + assert callable(strip_function) + assert len(text_base_blocks) == len(ansi_base_blocks) + N = len(ansi_base_blocks) + + def next_ansi_blocks(choices: Sequence[str], n: int) -> Sequence[str]: + stream = itertools.cycle(choices) + return list(map(operator.itemgetter(0), zip(stream, range(n)))) + + for sigma in itertools.permutations(range(N), N): + ansi_blocks = list(map(ansi_base_blocks.__getitem__, sigma)) + text_blocks = list(map(text_base_blocks.__getitem__, sigma)) + for glue, n in itertools.product(['.', '\n', '\r\n'], range(4 * N)): + ansi_strings = next_ansi_blocks(ansi_blocks, n) + text_strings = next_ansi_blocks(text_blocks, n) + assert len(ansi_strings) == len(text_strings) == n -def test_strip_colors(): - s = 'hello world - ' - assert strip_colors(s) == s, s - assert strip_colors(term.blue(s)) == s - assert strip_colors(term.blue(s) + ERASE_IN_LINE) == s + ERASE_IN_LINE + ansi_string = glue.join(ansi_strings) + text_string = glue.join(text_strings) + assert strip_function(ansi_string) == text_string - t = s + term.blue(s) - assert strip_colors(t + ERASE_IN_LINE) == s * 2 + ERASE_IN_LINE - # this fails but this shouldn't :( - # assert strip_colors('a' + term.blue('b') + ERASE_IN_LINE + 'c' + term.blue('d')) == 'abcd' +def test_strip_ansi_short_forms(): + # In Sphinx, we always "normalize" the color codes so that they + # match "\x1b\[(\d\d;){0,2}(\d\d)m" but it might happen that + # some messages use '\x1b[0m' instead of ``reset(s)``, so we + # test whether this alternative form is supported or not. + for strip_function in [strip_colors, strip_escape_sequences]: + # \x1b[m and \x1b[0m are equivalent to \x1b[00m + assert strip_function('\x1b[m') == '' + assert strip_function('\x1b[0m') == '' -def test_strip_escape_sequences(): - s = 'hello world - ' - assert strip_escape_sequences(s) == s, s - assert strip_escape_sequences(term.blue(s)) == s - assert strip_escape_sequences(term.blue(s) + ERASE_IN_LINE) == s + # \x1b[1m is equivalent to \x1b[01m + assert strip_function('\x1b[1mbold\x1b[0m') == 'bold' - t = s + term.blue(s) - assert strip_escape_sequences(t + ERASE_IN_LINE) == s * 2 - assert strip_escape_sequences(t + ERASE_IN_LINE + t + ERASE_IN_LINE) == s * 4 + # \x1b[K is equivalent to \x1b[0K + assert strip_escape_sequences('\x1b[K') == '' From c3d4405533304f6190a8feae54cabd4aaf779554 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 3 Apr 2024 14:42:58 +0200 Subject: [PATCH 24/66] fix bug --- sphinx/testing/_matcher/buffer.py | 159 ++++++++++++++++------ sphinx/testing/_matcher/util.py | 6 +- tests/test_testing/test_matcher_buffer.py | 82 +++++++++++ 3 files changed, 202 insertions(+), 45 deletions(-) diff --git a/sphinx/testing/_matcher/buffer.py b/sphinx/testing/_matcher/buffer.py index f108f290429..91030cb261d 100644 --- a/sphinx/testing/_matcher/buffer.py +++ b/sphinx/testing/_matcher/buffer.py @@ -3,20 +3,30 @@ __all__ = ('Line', 'Block') import abc +import contextlib import itertools import operator +import re +import sys from collections.abc import Sequence from typing import TYPE_CHECKING, Generic, TypeVar, final, overload +from sphinx.testing._matcher import util + if TYPE_CHECKING: - from collections.abc import Iterable, Iterator + from collections.abc import Callable, Iterable, Iterator + from typing import Any, Union from typing_extensions import Self + from sphinx.testing._matcher.util import LinePattern + + BlockLine = Union[object, LinePattern, Callable[[str], object]] + _T = TypeVar('_T', bound=Sequence[str]) -class SourceView(Generic[_T], abc.ABC): +class SourceView(Generic[_T], Sequence[str], abc.ABC): """A string or a sequence of strings implementing rich comparison. Given an implicit *source* as a list of strings, a :class:`SourceView` @@ -70,6 +80,32 @@ def pformat(self) -> str: """A nice representation of this object.""" return '{0.__class__.__name__}({0!r}, @={0.offset}, #={0.length})'.format(self) + @abc.abstractmethod + # The 'value' is 'Any' so that subclasses do not violate Liskov's substitution principle + def count(self, value: Any, /) -> int: + """Count the number of occurences of matching item.""" + + # The 'value' is 'Any' so that subclasses do not violate Liskov's substitution principle + def index(self, value: Any, start: int = 0, stop: int = sys.maxsize, /) -> int: + """Return the lowest index of a matching item. + + :raise ValueError: The value does not exist. + + .. seealso:: :meth:`find` + """ + index = self.find(value, start, stop) + if index == -1: + raise ValueError(value) + return index + + @abc.abstractmethod + # The 'value' is 'Any' so that subclasses do not violate Liskov's substitution principle. + def find(self, value: Any, start: int = 0, stop: int = sys.maxsize, /) -> int: + """Return the lowest index of a matching item or *-1* on failure. + + .. seealso:: :meth:`index` + """ + def __repr__(self) -> str: return repr(self.buffer) @@ -80,8 +116,9 @@ def __bool__(self) -> bool: """Indicate whether this view is empty or not.""" return bool(len(self)) + @final def __iter__(self) -> Iterator[str]: - """An iterator over the view's items.""" + """An iterator over the string items.""" return iter(self.buffer) def __len__(self) -> int: @@ -90,7 +127,7 @@ def __len__(self) -> int: def __contains__(self, value: object, /) -> bool: """Check that an "atomic" value is represented by this view.""" - return value in self.buffer + return value in self.buffer or self.find(value) != -1 @abc.abstractmethod def __lt__(self, other: object, /) -> bool: @@ -227,50 +264,39 @@ def endswith(self, suffix: str, start: int = 0, end: int | None = None, /) -> bo """ return self.buffer.endswith(suffix, start, end) - def count(self, sub: str, start: int = 0, end: int | None = None, /) -> int: - """Count the number of non-overlapping occurrences of a substring. + def count(self, sub: str | re.Pattern[str], /) -> int: + """Count the number of occurrences of a substring or pattern. - :param sub: A substring to locate. - :param start: The test start position. - :param end: The test stop position. + :raise TypeError: *sub* is not a string or a compiled pattern. """ - return self.buffer.count(sub, start, end) + if isinstance(sub, re.Pattern): + # avoid using value.findall() since we only want the length + # of the corresponding iterator (the following lines are more + # efficient from a memory perspective) + counter = itertools.count() + util.consume(zip(sub.finditer(self.buffer), counter)) + return next(counter) - def index(self, sub: str, start: int = 0, end: int | None = None, /) -> int: - """Get the lowest index of the substring *sub* in ``self[start:end]``. + return self.buffer.count(sub) # raise a TypeError if *sub* is not a string - :raise ValueError: The substring is not found in ``self[start:end]``. + # explicitly add the method since its signature differs from :meth:`SourceView.index` + def index(self, sub: LinePattern, start: int = 0, stop: int = sys.maxsize, /) -> int: + """Find the lowest index of a substring. - .. seealso:: :meth:`find` + :raise TypeError: *sub* is not a string or a compiled pattern. """ - return self.buffer.index(sub, start, end) + return super().index(sub, start, stop) - def rindex(self, sub: str, start: int = 0, end: int | None = None, /) -> int: - """Get the highest index of the substring *sub* in ``self[start:end]``. + def find(self, sub: LinePattern, start: int = 0, stop: int = sys.maxsize, /) -> int: + """Find the lowest index of a substring or *-1* on failure. - :raise ValueError: The substring is not found in ``self[start:end]``. - - .. seealso:: :meth:`rfind` + :raise TypeError: *sub* is not a string or a compiled pattern. """ - return self.buffer.rindex(sub, start, end) - - def find(self, sub: str, start: int = 0, end: int | None = None, /) -> int: - """Get the lowest index of the substring *sub* in ``self[start:end]``. - - If the substring is not found, this returns ``-1``. - - .. seealso:: :meth:`index` - """ - return self.buffer.find(sub, start, end) - - def rfind(self, sub: str, start: int = 0, end: int | None = None, /) -> int: - """Get the highest index of the substring *sub* in ``self[start:end]``. - - If the substring is not found, this returns ``-1``. - - .. seealso:: :meth:`rindex` - """ - return self.buffer.rfind(sub, start, end) + if isinstance(sub, re.Pattern): + # use re.search() to find the pattern inside ``line[start:stop]`` + match = sub.search(self.buffer, start, stop) + return -1 if match is None else start + match.pos + return self.buffer.find(sub, start, stop) # raise a TypeError if *sub* is not a string def __parse_non_string(self, other: object, /) -> tuple[str, int] | None: """Try to parse *other* as a ``line`` or a ``(line, offset)`` pair.""" @@ -372,11 +398,58 @@ def context(self, delta: int, limit: int) -> tuple[slice, slice]: after_slice = slice(min(block_stop, limit), min(block_stop + delta, limit)) return before_slice, after_slice - def count(self, line: object, /) -> int: - """Count the number of occurences of a *line*.""" - return self.buffer.count(line) + def count(self, target: BlockLine, /) -> int: + """Count the number of occurrences of matching lines. + + For :class:`~re.Pattern` inputs, the following are equivalent:: + + block.count(target) + block.count(target.match) + """ + if isinstance(target, re.Pattern): + # Apply the pattern to the entire line unlike :class:`Line` + # objects that detect non-overlapping matching substrings. + return self.count(target.match) + + if callable(target): + counter = itertools.count() + util.consume(zip(filter(target, self.buffer), counter)) + return next(counter) + + return self.buffer.count(target) + + # explicitly add the method since its signature differs from :meth:`SourceView.index` + def index(self, target: BlockLine, start: int = 0, stop: int = sys.maxsize, /) -> int: + """Find the lowest index of a matching line. + + For :class:`~re.Pattern` inputs, the following are equivalent:: + + block.index(target, ...) + block.index(target.match, ...) + """ + return super().index(target, start, stop) + + def find(self, target: BlockLine, start: int = 0, stop: int = sys.maxsize, /) -> int: + """Find the lowest index of a matching line or *-1* on failure. + + For :class:`~re.Pattern` inputs, the following are equivalent:: + + block.find(target, ...) + block.find(target.match, ...) + """ + if isinstance(target, re.Pattern): + return self.find(target.match, start, stop) + + if callable(target): + sliced = itertools.islice(self.buffer, start, stop) + return next(itertools.compress(itertools.count(start), map(target, sliced)), -1) + + with contextlib.suppress(ValueError): + return self.buffer.index(target, start, stop) + return -1 + + # fmt: off - # fmt: off @overload def at(self, index: int, /) -> Line: ... # NoQA: E704 @overload diff --git a/sphinx/testing/_matcher/util.py b/sphinx/testing/_matcher/util.py index 5b65e2fbd80..28c4217338b 100644 --- a/sphinx/testing/_matcher/util.py +++ b/sphinx/testing/_matcher/util.py @@ -30,12 +30,14 @@ def consume(iterator: Iterator[object], /, n: int | None = None) -> None: - """Consume *n* values from *iterator*.""" + """Consume *n* values from *iterator*. + + If *n* is not specified, this consumes the entire iterator.""" # use the C API to efficiently consume iterators if n is None: deque(iterator, maxlen=0) else: - n = max(n, 0) + assert n >= 0 next(itertools.islice(iterator, n, n), None) diff --git a/tests/test_testing/test_matcher_buffer.py b/tests/test_testing/test_matcher_buffer.py index 522bab670e6..29709e99055 100644 --- a/tests/test_testing/test_matcher_buffer.py +++ b/tests/test_testing/test_matcher_buffer.py @@ -96,6 +96,47 @@ def test_line_unsupported_operators(operand): assert Line() != operand +def test_line_count_substring(): + line = Line('abac') + assert line.count('no') == 0 + assert line.count('a') == 2 + + # 0 1 2 3 4 5 6 7 8 9 10 + line = Line(''.join(('a', 'b', '', 'b', 'b', '', 'a', 'c', '', 'c', 'c'))) + assert line.count(re.compile(r'^\Z')) == 0 + assert line.count(re.compile(r'a[bc]')) == 2 + + +def test_line_substring_index(): + line = Line('abac') + assert line.index('a') == 0 + assert line.index('a', 1) == 2 + pytest.raises(ValueError, line.index, 'no') + pytest.raises(ValueError, line.index, 'c', 0, 2) + + # 0 1 2 3 4 5 6 7 8 9 10 + line = Line(''.join(('a', 'b', '', 'b', 'b', '', 'a', 'c', '', 'c', 'c'))) + assert line.index(re.compile(r'a\w')) == 0 + assert line.index(re.compile(r'a\w'), 3) == 6 + pytest.raises(ValueError, line.index, re.compile(r'^\Z')) + pytest.raises(ValueError, line.index, re.compile(r'c[cd]'), 0, 5) + + +def test_line_find_substring(): + line = Line('abac') + assert line.find('a') == 0 + assert line.find('a', 1) == 2 + assert line.find('no') == -1 + assert line.find('c', 0, 2) == -1 + + # 0 1 2 3 4 5 6 7 8 9 10 + line = Line(''.join(('a', 'b', '', 'b', 'b', '', 'a', 'c', '', 'c', 'c'))) + assert line.find(re.compile(r'^ab')) == 0 + assert line.find(re.compile(r'a\w'), 3) == 6 + assert line.find(re.compile(r'^\Z')) == -1 + assert line.find(re.compile(r'c[cd]'), 0, 5) == -1 + + def test_block_constructor(): empty = Block() assert empty.buffer == () @@ -179,3 +220,44 @@ def test_block_unsupported_operators(operand): def test_block_slice_context(): assert Block(['a', 'b'], 1).context(delta=4, limit=5) == (slice(0, 1), slice(3, 5)) assert Block(['a', 'b'], 3).context(delta=2, limit=9) == (slice(1, 3), slice(5, 7)) + + +def test_block_count_lines(): + block = Block(['a', 'b', 'a', 'c']) + assert block.count('no') == 0 + assert block.count('a') == 2 + + block = Block(['ab', 'bb', 'ac']) + # this also tests the predicate-based implementation + assert block.count(re.compile(r'^\Z')) == 0 + assert block.count(re.compile(r'a\w')) == 2 + + +def test_block_line_index(): + block = Block(['a', 'b', 'a', 'c']) + assert block.index('a') == 0 + assert block.index('a', 1) == 2 + pytest.raises(ValueError, block.index, 'no') + pytest.raises(ValueError, block.index, 'c', 0, 2) + + block = Block(['ab', 'bb', 'ac', 'cc']) + # this also tests the predicate-based implementation + assert block.index(re.compile(r'a\w')) == 0 + assert block.index(re.compile(r'a\w'), 1) == 2 + pytest.raises(ValueError, block.index, re.compile(r'^\Z')) + pytest.raises(ValueError, block.index, re.compile(r'c\w'), 0, 2) + + +def test_block_find_line(): + block = Block(['a', 'b', 'a', 'c']) + assert block.find('a') == 0 + assert block.find('a', 1) == 2 + assert block.find('no') == -1 + assert block.find('c', 0, 2) == -1 + + block = Block(['ab', 'bb', 'ac', 'cc']) + # this also tests the predicate-based implementation + assert block.find(re.compile(r'a\w')) == 0 + assert block.find(re.compile(r'a\w'), 1) == 2 + assert block.find(re.compile(r'^\Z')) == -1 + assert block.find(re.compile(r'c\W'), 0, 2) == -1 From c2d5c15df517778a035fb4618c166b19b42e4083 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 3 Apr 2024 17:02:13 +0200 Subject: [PATCH 25/66] improve coverage --- sphinx/testing/_matcher/buffer.py | 63 +++++-- sphinx/testing/_matcher/util.py | 3 +- tests/test_testing/test_matcher_buffer.py | 196 +++++++++++++++------- 3 files changed, 185 insertions(+), 77 deletions(-) diff --git a/sphinx/testing/_matcher/buffer.py b/sphinx/testing/_matcher/buffer.py index 91030cb261d..15da3f6932b 100644 --- a/sphinx/testing/_matcher/buffer.py +++ b/sphinx/testing/_matcher/buffer.py @@ -19,9 +19,11 @@ from typing_extensions import Self - from sphinx.testing._matcher.util import LinePattern + LineText = Union[str, re.Pattern[str]] + """A line's substring or a compiled substring pattern.""" - BlockLine = Union[object, LinePattern, Callable[[str], object]] + BlockMatch = Union[object, str, re.Pattern[str], Callable[[str], object]] + """A block's line, a compiled pattern or a predicate.""" _T = TypeVar('_T', bound=Sequence[str]) @@ -38,7 +40,7 @@ class SourceView(Generic[_T], Sequence[str], abc.ABC): # add __weakref__ to allow the object being weak-referencable __slots__ = ('__buffer', '__offset', '__weakref__') - def __init__(self, buffer: _T, offset: int = 0, /, *, _check: bool = True) -> None: + def __init__(self, buffer: _T, /, offset: int = 0, *, _check: bool = True) -> None: """Construct a :class:`SourceView`. :param buffer: The view's content (a string or a list of strings). @@ -50,6 +52,7 @@ def __init__(self, buffer: _T, offset: int = 0, /, *, _check: bool = True) -> No their constructor arguments are known to be valid at call time. """ if _check: + __tracebackhide__ = True if not isinstance(offset, int): msg = f'offset must be an integer, got: {offset!r}' raise TypeError(msg) @@ -95,6 +98,7 @@ def index(self, value: Any, start: int = 0, stop: int = sys.maxsize, /) -> int: """ index = self.find(value, start, stop) if index == -1: + __tracebackhide__ = True raise ValueError(value) return index @@ -181,7 +185,7 @@ class Line(SourceView[str]): # character's types, but it would not be possible to use the C API # implementing the :class:`str` interface anymore. - def __init__(self, line: str = '', offset: int = 0, /, *, _check: bool = True) -> None: + def __init__(self, line: str = '', /, offset: int = 0, *, _check: bool = True) -> None: """Construct a :class:`Line` object.""" super().__init__(line, offset, _check=_check) @@ -246,25 +250,27 @@ def __gt__(self, other: object, /) -> bool: # separately check offsets before the buffers for efficiency return self.offset == other[1] and self.buffer > other[0] - def startswith(self, prefix: str, start: int = 0, end: int | None = None, /) -> bool: + def startswith(self, prefix: str, start: int = 0, end: int = sys.maxsize, /) -> bool: """Test whether the line starts with the given *prefix*. :param prefix: A line prefix to test. :param start: The test start position. :param end: The test stop position. """ + __tracebackhide__ = True return self.buffer.startswith(prefix, start, end) - def endswith(self, suffix: str, start: int = 0, end: int | None = None, /) -> bool: + def endswith(self, suffix: str, start: int = 0, end: int = sys.maxsize, /) -> bool: """Test whether the line ends with the given *suffix*. :param suffix: A line suffix to test. :param start: The test start position. :param end: The test stop position. """ + __tracebackhide__ = True return self.buffer.endswith(suffix, start, end) - def count(self, sub: str | re.Pattern[str], /) -> int: + def count(self, sub: LineText, /) -> int: """Count the number of occurrences of a substring or pattern. :raise TypeError: *sub* is not a string or a compiled pattern. @@ -277,25 +283,36 @@ def count(self, sub: str | re.Pattern[str], /) -> int: util.consume(zip(sub.finditer(self.buffer), counter)) return next(counter) + __tracebackhide__ = True return self.buffer.count(sub) # raise a TypeError if *sub* is not a string # explicitly add the method since its signature differs from :meth:`SourceView.index` - def index(self, sub: LinePattern, start: int = 0, stop: int = sys.maxsize, /) -> int: + def index(self, sub: LineText, start: int = 0, stop: int = sys.maxsize, /) -> int: """Find the lowest index of a substring. :raise TypeError: *sub* is not a string or a compiled pattern. """ + __tracebackhide__ = True return super().index(sub, start, stop) - def find(self, sub: LinePattern, start: int = 0, stop: int = sys.maxsize, /) -> int: + def find(self, sub: LineText, start: int = 0, stop: int = sys.maxsize, /) -> int: """Find the lowest index of a substring or *-1* on failure. :raise TypeError: *sub* is not a string or a compiled pattern. """ if isinstance(sub, re.Pattern): - # use re.search() to find the pattern inside ``line[start:stop]`` - match = sub.search(self.buffer, start, stop) - return -1 if match is None else start + match.pos + # Do not use sub.search(buffer, start, end) since the '^' pattern + # character matches at the *real* beginning of *buffer* but *not* + # necessarily at the index where the search is to start. + # + # Ref: https://docs.python.org/3/library/re.html#re.Pattern.search + if match := sub.search(self.buffer[start:stop]): + # normalize the start position + start_index, _, _ = slice(start, stop).indices(self.length) + return match.start() + start_index + return -1 + + __tracebackhide__ = True return self.buffer.find(sub, start, stop) # raise a TypeError if *sub* is not a string def __parse_non_string(self, other: object, /) -> tuple[str, int] | None: @@ -335,13 +352,14 @@ class Block(SourceView[tuple[str, ...]], Sequence[str]): """ def __init__( - self, buffer: Iterable[str] = (), offset: int = 0, /, *, _check: bool = True + self, buffer: Iterable[str] = (), /, offset: int = 0, *, _check: bool = True ) -> None: # It is more efficient to first consume everything and then # iterate over the values for checks rather than to add the # validated values one by one. buffer = tuple(buffer) if _check: + __tracebackhide__ = True for line in buffer: if not isinstance(line, str): err = f'expecting a native string, got: {line!r}' @@ -393,12 +411,19 @@ def context(self, delta: int, limit: int) -> tuple[slice, slice]: assert source[before] == ['2', '3'] assert source[after] == ['7', '8'] """ + assert delta >= 0, 'context size must be >= 0' + assert limit >= 0, 'source length must be >= 0' + + before_start, before_stop = max(0, self.offset - delta), min(self.offset, limit) + before_slice = slice(before_start, before_stop) + block_stop = self.offset + self.length - before_slice = slice(max(0, self.offset - delta), min(self.offset, limit)) - after_slice = slice(min(block_stop, limit), min(block_stop + delta, limit)) + after_start, after_stop = min(block_stop, limit), min(block_stop + delta, limit) + after_slice = slice(after_start, after_stop) + return before_slice, after_slice - def count(self, target: BlockLine, /) -> int: + def count(self, target: BlockMatch, /) -> int: """Count the number of occurrences of matching lines. For :class:`~re.Pattern` inputs, the following are equivalent:: @@ -419,7 +444,7 @@ def count(self, target: BlockLine, /) -> int: return self.buffer.count(target) # explicitly add the method since its signature differs from :meth:`SourceView.index` - def index(self, target: BlockLine, start: int = 0, stop: int = sys.maxsize, /) -> int: + def index(self, target: BlockMatch, start: int = 0, stop: int = sys.maxsize, /) -> int: """Find the lowest index of a matching line. For :class:`~re.Pattern` inputs, the following are equivalent:: @@ -429,7 +454,7 @@ def index(self, target: BlockLine, start: int = 0, stop: int = sys.maxsize, /) - """ return super().index(target, start, stop) - def find(self, target: BlockLine, start: int = 0, stop: int = sys.maxsize, /) -> int: + def find(self, target: BlockMatch, start: int = 0, stop: int = sys.maxsize, /) -> int: """Find the lowest index of a matching line or *-1* on failure. For :class:`~re.Pattern` inputs, the following are equivalent:: @@ -441,6 +466,7 @@ def find(self, target: BlockLine, start: int = 0, stop: int = sys.maxsize, /) -> return self.find(target.match, start, stop) if callable(target): + start, stop, _ = slice(start, stop).indices(self.length) sliced = itertools.islice(self.buffer, start, stop) return next(itertools.compress(itertools.count(start), map(target, sliced)), -1) @@ -479,6 +505,7 @@ def __getitem__(self, index: int | slice, /) -> str | Sequence[str]: # NoQA: E3 # normalize negative and None slice fields _, _, step = index.indices(self.length) if step != 1: + __tracebackhide__ = True msg = 'only contiguous regions can be extracted' raise ValueError(msg) return self.buffer[index] diff --git a/sphinx/testing/_matcher/util.py b/sphinx/testing/_matcher/util.py index 28c4217338b..a4b83bcd111 100644 --- a/sphinx/testing/_matcher/util.py +++ b/sphinx/testing/_matcher/util.py @@ -32,7 +32,8 @@ def consume(iterator: Iterator[object], /, n: int | None = None) -> None: """Consume *n* values from *iterator*. - If *n* is not specified, this consumes the entire iterator.""" + If *n* is not specified, this consumes the entire iterator. + """ # use the C API to efficiently consume iterators if n is None: deque(iterator, maxlen=0) diff --git a/tests/test_testing/test_matcher_buffer.py b/tests/test_testing/test_matcher_buffer.py index 29709e99055..2dea8f80959 100644 --- a/tests/test_testing/test_matcher_buffer.py +++ b/tests/test_testing/test_matcher_buffer.py @@ -1,5 +1,6 @@ from __future__ import annotations +import itertools import operator import re from typing import TYPE_CHECKING @@ -96,45 +97,102 @@ def test_line_unsupported_operators(operand): assert Line() != operand -def test_line_count_substring(): +def test_line_startswith(): line = Line('abac') - assert line.count('no') == 0 - assert line.count('a') == 2 + assert line.startswith('a') + assert line.startswith('ab') + assert not line.startswith('no') - # 0 1 2 3 4 5 6 7 8 9 10 - line = Line(''.join(('a', 'b', '', 'b', 'b', '', 'a', 'c', '', 'c', 'c'))) - assert line.count(re.compile(r'^\Z')) == 0 - assert line.count(re.compile(r'a[bc]')) == 2 + line = Line('ab bb c') + assert line.startswith(' ', 2) + assert line.startswith(' bb', 2) + assert not line.startswith('a', 2) -def test_line_substring_index(): - line = Line('abac') - assert line.index('a') == 0 - assert line.index('a', 1) == 2 - pytest.raises(ValueError, line.index, 'no') - pytest.raises(ValueError, line.index, 'c', 0, 2) +def test_line_endswith(): + line = Line('ab1ac') + assert line.endswith('c') + assert line.endswith('ac') + assert not line.endswith('no') + + line = Line('ab 4b 3c ') + assert line.endswith(' ', 2) + assert line.endswith('3c ', 2) + assert not line.endswith('b 3c ', 0, 4) - # 0 1 2 3 4 5 6 7 8 9 10 - line = Line(''.join(('a', 'b', '', 'b', 'b', '', 'a', 'c', '', 'c', 'c'))) - assert line.index(re.compile(r'a\w')) == 0 - assert line.index(re.compile(r'a\w'), 3) == 6 - pytest.raises(ValueError, line.index, re.compile(r'^\Z')) - pytest.raises(ValueError, line.index, re.compile(r'c[cd]'), 0, 5) +def test_line_type_errors(): + line = Line() + pytest.raises(TypeError, line.count, 2) + pytest.raises(TypeError, line.index, 2) + pytest.raises(TypeError, line.find, 2) -def test_line_find_substring(): + +def test_line_count_substrings(): line = Line('abac') - assert line.find('a') == 0 - assert line.find('a', 1) == 2 - assert line.find('no') == -1 - assert line.find('c', 0, 2) == -1 + assert line.count('no') == 0 + assert line.count('a') == 2 + + line = Line(''.join(('a', 'b', ' ', 'b', 'b', ' ', 'a', 'c', ' ', 'c', 'c'))) + assert line.count(re.compile(r'^\Z')) == 0 + assert line.count(re.compile(r'a[bc]')) == 2 + + +@pytest.mark.parametrize( + ('line', 'data'), + [ + ( + Line('abaz'), + [ + ('a', (), 0), + ('a', (1,), 2), + ('not_found', (), -1), + ('z', (0, 2), -1), # do not include last character + ], + ), + ( + # -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 + # 0 1 2 3 4 5 6 7 8 9 10 + Line(''.join(('a', 'b', ' ', 'b', 'b', ' ', 'x', 'c', ' ', 'c', 'c'))), + [ + (re.compile(r'a\w'), (), 0), + (re.compile(r'\bx'), (2,), 6), + *itertools.product( + [re.compile(r'\s+')], + [(3,), (-8,)], + [5], + ), + *itertools.product( + [re.compile(r'c ')], + [(6, 9), (6, -2), (-5, -2), (-5, 9)], # all equivalent to (6, 9) + [7], + ), + *itertools.product( + [re.compile(r'^bb')], + [(3, 8), (3, -3), (-8, -3), (-8, -3)], # all equivalent to (3, 8) + [3], + ), + (re.compile(r'^\Z'), (), -1), + *itertools.product( + [re.compile(r'c[cd]')], + [(0, 5), (-6, 5)], + [-1], + ), + ], + ), + ], +) +def test_line_find(line: Line, data: list[tuple[str, tuple[int, ...], int]]) -> None: + for target, args, expect in data: + actual = line.find(target, *args) - # 0 1 2 3 4 5 6 7 8 9 10 - line = Line(''.join(('a', 'b', '', 'b', 'b', '', 'a', 'c', '', 'c', 'c'))) - assert line.find(re.compile(r'^ab')) == 0 - assert line.find(re.compile(r'a\w'), 3) == 6 - assert line.find(re.compile(r'^\Z')) == -1 - assert line.find(re.compile(r'c[cd]'), 0, 5) == -1 + if expect == -1: + assert actual == expect, (line.buffer, target, args) + with pytest.raises(ValueError, match=re.escape(str(target))): + line.index(target, *args) + else: + assert actual == expect, (line.buffer, target, args) + assert line.index(target, *args) == expect def test_block_constructor(): @@ -233,31 +291,53 @@ def test_block_count_lines(): assert block.count(re.compile(r'a\w')) == 2 -def test_block_line_index(): - block = Block(['a', 'b', 'a', 'c']) - assert block.index('a') == 0 - assert block.index('a', 1) == 2 - pytest.raises(ValueError, block.index, 'no') - pytest.raises(ValueError, block.index, 'c', 0, 2) - - block = Block(['ab', 'bb', 'ac', 'cc']) - # this also tests the predicate-based implementation - assert block.index(re.compile(r'a\w')) == 0 - assert block.index(re.compile(r'a\w'), 1) == 2 - pytest.raises(ValueError, block.index, re.compile(r'^\Z')) - pytest.raises(ValueError, block.index, re.compile(r'c\w'), 0, 2) - - -def test_block_find_line(): - block = Block(['a', 'b', 'a', 'c']) - assert block.find('a') == 0 - assert block.find('a', 1) == 2 - assert block.find('no') == -1 - assert block.find('c', 0, 2) == -1 - - block = Block(['ab', 'bb', 'ac', 'cc']) - # this also tests the predicate-based implementation - assert block.find(re.compile(r'a\w')) == 0 - assert block.find(re.compile(r'a\w'), 1) == 2 - assert block.find(re.compile(r'^\Z')) == -1 - assert block.find(re.compile(r'c\W'), 0, 2) == -1 +@pytest.mark.parametrize( + ('block', 'data'), + [ + ( + Block(['a', 'b', 'a', 'end']), + [ + ('a', (), 0), + ('a', (1,), 2), + ('not_found', (), -1), + ('end', (0, 2), -1), # do not include last line + ], + ), + ( + # -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 + # 0 1 2 3 4 5 6 7 8 9 10 + Block(('a0', 'b1', ' ', 'b3', 'b4', ' ', '6a', 'a7', ' ', 'cc', 'c?')), + [ + (re.compile(r'a\d'), (), 0), + (re.compile(r'a\d'), (1,), 7), + *itertools.product( + [re.compile(r'\d\w')], # '6a' + [(3, 9), (3, -2), (-8, 9), (-8, -2)], # all equivalent to (3, 9) + [6], + ), + *itertools.product( + [re.compile(r'^\s+')], + [(5, 8), (5, -3), (-6, 8), (-6, -3)], # all equivalent to (5, 8) + [5], + ), + (re.compile(r'^\Z'), (), -1), + *itertools.product( + [re.compile(r'c\?')], + [(0, 4), (-7, 9)], + [-1], + ), + ], + ), + ], +) +def test_block_find(block: Block, data: list[tuple[str, tuple[int, ...], int]]) -> None: + for target, args, expect in data: + actual = block.find(target, *args) + + if expect == -1: + assert actual == expect, (block.buffer, target, args) + with pytest.raises(ValueError, match=re.escape(str(target))): + block.index(target, *args) + else: + assert actual == expect, (block.buffer, target, args) + assert block.index(target, *args) == expect From d4fb7b9059fb21b7c1ccf60b9898562830a3fbd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 3 Apr 2024 17:02:44 +0200 Subject: [PATCH 26/66] fixup --- sphinx/testing/_matcher/buffer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sphinx/testing/_matcher/buffer.py b/sphinx/testing/_matcher/buffer.py index 15da3f6932b..49f795c0c64 100644 --- a/sphinx/testing/_matcher/buffer.py +++ b/sphinx/testing/_matcher/buffer.py @@ -474,8 +474,7 @@ def find(self, target: BlockMatch, start: int = 0, stop: int = sys.maxsize, /) - return self.buffer.index(target, start, stop) return -1 - # fmt: off - + # fmt: off @overload def at(self, index: int, /) -> Line: ... # NoQA: E704 @overload From 1250ac9a4ebc6be371880d5871c2e68b96c12f40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 3 Apr 2024 19:08:45 +0200 Subject: [PATCH 27/66] fixup --- sphinx/testing/_matcher/buffer.py | 185 +++++++++++----------- sphinx/testing/_matcher/cleaner.py | 1 - sphinx/testing/_matcher/engine.py | 2 + sphinx/testing/_matcher/options.py | 2 + sphinx/testing/_matcher/util.py | 25 +-- sphinx/testing/matcher.py | 14 +- tests/test_testing/test_matcher_buffer.py | 38 ++++- 7 files changed, 165 insertions(+), 102 deletions(-) diff --git a/sphinx/testing/_matcher/buffer.py b/sphinx/testing/_matcher/buffer.py index 49f795c0c64..efb2e5c8487 100644 --- a/sphinx/testing/_matcher/buffer.py +++ b/sphinx/testing/_matcher/buffer.py @@ -52,7 +52,6 @@ def __init__(self, buffer: _T, /, offset: int = 0, *, _check: bool = True) -> No their constructor arguments are known to be valid at call time. """ if _check: - __tracebackhide__ = True if not isinstance(offset, int): msg = f'offset must be an integer, got: {offset!r}' raise TypeError(msg) @@ -98,7 +97,6 @@ def index(self, value: Any, start: int = 0, stop: int = sys.maxsize, /) -> int: """ index = self.find(value, start, stop) if index == -1: - __tracebackhide__ = True raise ValueError(value) return index @@ -147,6 +145,7 @@ def __le__(self, other: object, /) -> bool: By default, ``self == other`` is called before ``self < other``, but subclasses should override this method for an efficient alternative. """ + __tracebackhide__ = False return self == other or self < other def __ge__(self, other: object, /) -> bool: @@ -155,6 +154,7 @@ def __ge__(self, other: object, /) -> bool: By default, ``self == other`` is called before ``self > other``, but subclasses should override this method for an efficient alternative. """ + __tracebackhide__ = False return self == other or self > other @abc.abstractmethod @@ -221,7 +221,7 @@ def __eq__(self, other: object, /) -> bool: if isinstance(other, str): return self.buffer == other - other = self.__parse_non_string(other) + other = _parse_non_string(other) if other is None: return NotImplemented @@ -229,10 +229,12 @@ def __eq__(self, other: object, /) -> bool: return self.offset == other[1] and self.buffer == other[0] def __lt__(self, other: object, /) -> bool: + __tracebackhide__ = False + if isinstance(other, str): return self.buffer < other - other = self.__parse_non_string(other) + other = _parse_non_string(other) if other is None: return NotImplemented @@ -240,10 +242,12 @@ def __lt__(self, other: object, /) -> bool: return self.offset == other[1] and self.buffer < other[0] def __gt__(self, other: object, /) -> bool: + __tracebackhide__ = False + if isinstance(other, str): return self.buffer > other - other = self.__parse_non_string(other) + other = _parse_non_string(other) if other is None: return NotImplemented @@ -257,7 +261,6 @@ def startswith(self, prefix: str, start: int = 0, end: int = sys.maxsize, /) -> :param start: The test start position. :param end: The test stop position. """ - __tracebackhide__ = True return self.buffer.startswith(prefix, start, end) def endswith(self, suffix: str, start: int = 0, end: int = sys.maxsize, /) -> bool: @@ -267,7 +270,6 @@ def endswith(self, suffix: str, start: int = 0, end: int = sys.maxsize, /) -> bo :param start: The test start position. :param end: The test stop position. """ - __tracebackhide__ = True return self.buffer.endswith(suffix, start, end) def count(self, sub: LineText, /) -> int: @@ -283,7 +285,6 @@ def count(self, sub: LineText, /) -> int: util.consume(zip(sub.finditer(self.buffer), counter)) return next(counter) - __tracebackhide__ = True return self.buffer.count(sub) # raise a TypeError if *sub* is not a string # explicitly add the method since its signature differs from :meth:`SourceView.index` @@ -292,7 +293,6 @@ def index(self, sub: LineText, start: int = 0, stop: int = sys.maxsize, /) -> in :raise TypeError: *sub* is not a string or a compiled pattern. """ - __tracebackhide__ = True return super().index(sub, start, stop) def find(self, sub: LineText, start: int = 0, stop: int = sys.maxsize, /) -> int: @@ -312,19 +312,8 @@ def find(self, sub: LineText, start: int = 0, stop: int = sys.maxsize, /) -> int return match.start() + start_index return -1 - __tracebackhide__ = True return self.buffer.find(sub, start, stop) # raise a TypeError if *sub* is not a string - def __parse_non_string(self, other: object, /) -> tuple[str, int] | None: - """Try to parse *other* as a ``line`` or a ``(line, offset)`` pair.""" - if isinstance(other, self.__class__): - return other.buffer, other.offset - if isinstance(other, Sequence) and len(other) == 2: - buffer, offset = other - if isinstance(buffer, str) and isinstance(offset, int): - return buffer, offset - return None - @final class Block(SourceView[tuple[str, ...]], Sequence[str]): @@ -351,6 +340,8 @@ class Block(SourceView[tuple[str, ...]], Sequence[str]): corresponding :class:`Line` or :class:`Block` values. """ + __slots__ = ('__cached_lines',) + def __init__( self, buffer: Iterable[str] = (), /, offset: int = 0, *, _check: bool = True ) -> None: @@ -359,12 +350,13 @@ def __init__( # validated values one by one. buffer = tuple(buffer) if _check: - __tracebackhide__ = True for line in buffer: if not isinstance(line, str): err = f'expecting a native string, got: {line!r}' raise TypeError(err) + super().__init__(buffer, offset, _check=_check) + self.__cached_lines: list[object] | None = None @classmethod def view(cls, index: int, buffer: Iterable[str], /, *, _check: bool = True) -> Self: @@ -386,7 +378,7 @@ def window(self) -> slice: """A slice representing this block in its source. If *source* is the original source this block is contained within, - then ``assert source[block.window] == block`` is always satisfied. + then ``assert source[block.window] == block`` is satisfied. Example:: @@ -504,7 +496,6 @@ def __getitem__(self, index: int | slice, /) -> str | Sequence[str]: # NoQA: E3 # normalize negative and None slice fields _, _, step = index.indices(self.length) if step != 1: - __tracebackhide__ = True msg = 'only contiguous regions can be extracted' raise ValueError(msg) return self.buffer[index] @@ -514,7 +505,7 @@ def __eq__(self, other: object, /) -> bool: # more efficient to first check the offsets return (self.offset, self.buffer) == (other.offset, other.buffer) - other = self.__parse_non_block(other) + other = _parse_non_block(other) if other is None: return NotImplemented @@ -530,78 +521,50 @@ def __eq__(self, other: object, /) -> bool: return False def __lt__(self, other: object, /) -> bool: - if isinstance(other, self.__class__): - # more efficient to first check if the indices are valid before checking the lines - if _can_be_strict_in(self.offset, self.length, other.offset, other.length): - return self.buffer < other.buffer - return False + __tracebackhide__ = False - other = self.__parse_non_block(other) + if isinstance(other, self.__class__): + # More efficient to first check if the indices are valid before + # checking the lines using tuple comparisons (both objects have + # compatible types at runtime). + aligned = _can_be_strict_in(self.offset, self.length, other.offset, other.length) + return aligned and self.buffer < other.buffer + + print('other=', other, _parse_non_block(other)) + other = _parse_non_block(other) if other is None: return NotImplemented - lines, offset = other - max_length = len(lines) - if self.length >= max_length: - # By Dirichlet's box principle, *other* must have strictly more - # items than *self* for the latter to be strictly contained. - return False - - # convert this block into its lines so that we use a rich comparison - # with the items in *other* (we do not know their exact type) - actual = self.__lines(0, self.length) - - if offset != -1: - if _can_be_strict_in(self.offset, self.length, offset, max_length): - return actual < lines - return False - + lines, other_offset = other + if other_offset != -1: + aligned = _can_be_strict_in(self.offset, self.length, other_offset, len(lines)) + return aligned and self.__lines() < lines # we want to find this block in the *other* block (at any place) - for start in range(max_length - self.length + 1): - region = itertools.islice(lines, start, start + self.length) - if all(map(operator.__eq__, actual, region)): - return True - return False + return self.__lines() < lines def __gt__(self, other: object, /) -> bool: + __tracebackhide__ = False + if isinstance(other, self.__class__): return other < self - other = self.__parse_non_block(other) + other = _parse_non_block(other) if other is None: return NotImplemented - # nothing can be a strict subset of the empty block (this check - # must be done *after* we decided whether *other* is correct) - if not self: - return False - lines, other_offset = other - other_length = len(lines) - - if self.length <= other_length: - # By Dirichlet's box principle, *self* must have strictly more - # items than *other* for the latter to be strictly contained. - return False - if other_offset != -1: - # we want to find *other* at a given offset - if _can_be_strict_in(other_offset, other_length, self.offset, self.length): - # dispatch to C implementation of list.__lt__ - actual = self.__lines(other_offset, other_length) - return actual > lines - - # we want to find *other* in this block (at any place) - for start in range(self.length - other_length + 1): - if self.__lines(start, other_length) > lines: - return True - return False + aligned = _can_be_strict_in(other_offset, len(lines), self.offset, self.length) + return aligned and self.__lines() > lines + return self.__lines() > lines # Do not annotate with list[Line] since otherwise mypy complains # when comparing with a right-hand side that is a list of objects. - def __lines(self, start: int, count: int) -> list[object]: + def __lines(self) -> list[object]: """Same as :func:`__lines_iterator` but return a list instead.""" - return list(self.__lines_iterator(start, count)) + if self.__cached_lines is None: + self.__cached_lines = list(self.__lines_iterator(0, self.length)) + return self.__cached_lines def __lines_iterator(self, start: int, count: int) -> Iterator[Line]: """Yield some lines in this block as :class:`Line` objects.""" @@ -609,23 +572,67 @@ def __lines_iterator(self, start: int, count: int) -> Iterator[Line]: for index, line in enumerate(region, self.offset + start): yield Line(line, index, _check=False) - def __parse_non_block(self, other: object, /) -> tuple[list[object], int] | None: - """Try to parse *other* as a pair ``(block lines, block offset)``. - For efficiency, do *not* call this method on :class:`Block` instances - since they can be handled separately more efficiently. - """ - if not isinstance(other, Sequence): +def _parse_non_string(other: object, /) -> tuple[str, int] | None: + """Try to parse *other* as a ``line`` or a ``(line, offset)`` pair. + + For efficiency, do *not* call this method on :class:`str` instances + since they will be handled separately more efficiently. + """ + if isinstance(other, Line): + return other.buffer, other.offset + if isinstance(other, Sequence) and len(other) == 2: + buffer, offset = other + if isinstance(buffer, str) and isinstance(offset, int): + return buffer, offset + return None + + +def _is_block_line_compatible(other: object, /) -> bool: + if isinstance(other, (str, Line)): + return True + + if isinstance(other, Sequence) and len(other) == 2: + buffer, offset = other + if isinstance(buffer, str) and isinstance(offset, int): + return True + + return False + + +def _parse_non_block(other: object, /) -> tuple[list[object], int] | None: + """Try to parse *other* as a pair ``(block lines, block offset)``. + + For efficiency, do *not* call this method on :class:`Block` instances + since they will be handled separately more efficiently. + """ + if not isinstance(other, Sequence): + return None + + if all(map(_is_block_line_compatible, other)): + # offset will never be given in this scenario + return list(other), -1 + + + if len(other) == 2: + lines, offset = other + if not isinstance(lines, Sequence) or not isinstance(offset, int): return None - # given as (lines, offset) with lines = sequence of line-like objects - if len(other) == 2 and isinstance(other[0], Sequence) and isinstance(other[1], int): - if isinstance(other[0], str): - return None - # mypy does not know how to deduce that the length is 2 - return list(other[0]), other[1] + if isinstance(lines, str): + # do not allow [line, offset] with single string 'line' + return None + + if not all(map(_is_block_line_compatible, lines)): + return None + + return list(lines), offset + + if all(map(_is_block_line_compatible, other)): return list(other), -1 + return None + def _can_be_strict_in(i1: int, l1: int, i2: int, l2: int) -> bool: """Check that a block can be strictly contained in another block. diff --git a/sphinx/testing/_matcher/cleaner.py b/sphinx/testing/_matcher/cleaner.py index 2a67c9c1d35..514397ef6cf 100644 --- a/sphinx/testing/_matcher/cleaner.py +++ b/sphinx/testing/_matcher/cleaner.py @@ -147,7 +147,6 @@ def prune_lines( lines = prune_lines(['a123b', 'c123d'], re.compile(r'\d+')) assert list(lines) == ['ab', 'cd'] - For debugging purposes, an empty list *trace* can be given When specified, *trace* is incrementally constructed as follows:: for i, line in enumerate(lines): diff --git a/sphinx/testing/_matcher/engine.py b/sphinx/testing/_matcher/engine.py index 56d098f6a14..b286126a912 100644 --- a/sphinx/testing/_matcher/engine.py +++ b/sphinx/testing/_matcher/engine.py @@ -20,6 +20,7 @@ def _check_flavor(flavor: Flavor) -> None: allowed: Sequence[Flavor] = ('none', 'fnmatch', 're') if flavor not in allowed: + __tracebackhide__ = True msg = f'unknown flavor: {flavor!r} (choose from {tuple(map(repr, allowed))})' raise ValueError(msg) @@ -89,6 +90,7 @@ def to_block_pattern(expect: LinePattern | Sequence[LinePattern]) -> Sequence[Li if isinstance(expect, re.Pattern): return (expect,) if not isinstance(expect, Sequence): + __tracebackhide__ = True msg = f'expecting a sequence of patterns, got: {expect!r}' raise TypeError(msg) return tuple(expect) diff --git a/sphinx/testing/_matcher/options.py b/sphinx/testing/_matcher/options.py index e8e8d8b4f1b..c3aa57fe339 100644 --- a/sphinx/testing/_matcher/options.py +++ b/sphinx/testing/_matcher/options.py @@ -241,4 +241,6 @@ def get_option(self, name: OptionName, /) -> object: # NoQA: E301 """Get a known option value, or its default value.""" if name in self._options: return self._options[name] + + __tracebackhide__ = True return self.default_options[name] diff --git a/sphinx/testing/_matcher/util.py b/sphinx/testing/_matcher/util.py index a4b83bcd111..4deac77e53a 100644 --- a/sphinx/testing/_matcher/util.py +++ b/sphinx/testing/_matcher/util.py @@ -30,22 +30,25 @@ def consume(iterator: Iterator[object], /, n: int | None = None) -> None: - """Consume *n* values from *iterator*. + """Advance the iterator *n*-steps ahead, or entirely if *n* is ``None``. - If *n* is not specified, this consumes the entire iterator. + Taken from `itertools recipes`__. + + __ https://docs.python.org/3/library/itertools.html#itertools-recipes """ # use the C API to efficiently consume iterators if n is None: deque(iterator, maxlen=0) else: - assert n >= 0 next(itertools.islice(iterator, n, n), None) def unique_justseen(iterable: Iterable[_T], /) -> Iterator[_T]: """Yield elements in order, ignoring serial duplicates. - Credits go to :func:`!more_itertools.recipes.unique_justseen`. + Taken from `itertools recipes`__. + + __ https://docs.python.org/3/library/itertools.html#itertools-recipes """ return map(next, map(itemgetter(1), itertools.groupby(iterable))) @@ -53,7 +56,9 @@ def unique_justseen(iterable: Iterable[_T], /) -> Iterator[_T]: def unique_everseen(iterable: Iterable[_T], /) -> Iterator[_T]: """Yield elements in order, ignoring duplicates. - Credits go to :func:`!more_itertools.recipes.unique_everseen`. + Taken from `itertools recipes`__. + + __ https://docs.python.org/3/library/itertools.html#itertools-recipes """ seen: set[_T] = set() mark, pred = seen.add, seen.__contains__ @@ -62,12 +67,14 @@ def unique_everseen(iterable: Iterable[_T], /) -> Iterator[_T]: yield element -def windowed(iterable: Iterable[_T], n: int, /) -> Iterator[Sequence[_T]]: +def strict_windowed(iterable: Iterable[_T], n: int, /) -> Iterator[Sequence[_T]]: """Return a sliding window of width *n* over the given iterable. - Credits go to :func:`!more_itertools.more.windowed` but slightly - differs in the sense that if *n* is *0*, then an empty iterator - is returned. + When *n* is *0*, the iterator does not yield anything. + + Adapted from `itertools recipes`__ for the case *n = 0*. + + __ https://docs.python.org/3/library/itertools.html#itertools-recipes """ if n == 0: return diff --git a/sphinx/testing/matcher.py b/sphinx/testing/matcher.py index b4777bc06c4..4960bafb613 100644 --- a/sphinx/testing/matcher.py +++ b/sphinx/testing/matcher.py @@ -187,7 +187,7 @@ def iterfind_blocks( return compiled_patterns = self.__compile(patterns, flavor=flavor) - block_iterator = enumerate(util.windowed(lines, width)) + block_iterator = enumerate(util.strict_windowed(lines, width)) for start, block in block_iterator: # check if the block matches the patterns line by line if all(pattern.match(line) for pattern, line in zip(compiled_patterns, block)): @@ -215,6 +215,7 @@ def assert_match( :param count: If specified, the exact number of matching lines. :param flavor: Optional temporary flavor for string patterns. """ + __tracebackhide__ = True patterns = engine.to_line_patterns(expect) self._assert_found('line', patterns, count=count, flavor=flavor) @@ -232,6 +233,7 @@ def assert_no_match( :param context: Number of lines to print around a failing line. :param flavor: Optional temporary flavor for string patterns. """ + __tracebackhide__ = True patterns = engine.to_line_patterns(expect) self._assert_not_found('line', patterns, context_size=context, flavor=flavor) @@ -252,6 +254,7 @@ def assert_lines( When *expect* is a single string, it is split into lines, each of which corresponding to the pattern a block's line must satisfy. """ + __tracebackhide__ = True patterns = engine.to_block_pattern(expect) self._assert_found('block', patterns, count=count, flavor=flavor) @@ -274,6 +277,7 @@ def assert_no_lines( Use :data:`sys.maxsize` to show all capture lines. """ + __tracebackhide__ = True patterns = engine.to_block_pattern(expect) self._assert_not_found('block', patterns, context_size=context, flavor=flavor) @@ -295,6 +299,8 @@ def _assert_found( ctx = util.highlight(self.lines(), keepends=keepends) pat = util.prettify_patterns(patterns, sort=pattern_type == 'line') logs = [f'{pattern_type} pattern', pat, 'not found in', ctx] + + __tracebackhide__ = True raise AssertionError('\n\n'.join(logs)) indices = {block.offset: len(block) for block in blocks} @@ -306,6 +312,8 @@ def _assert_found( pat = util.prettify_patterns(patterns, sort=pattern_type == 'line') noun = util.plural_form(pattern_type, count) logs = [f'found {found} != {count} {noun} matching', pat, 'in', ctx] + + __tracebackhide__ = True raise AssertionError('\n\n'.join(logs)) def _assert_not_found( @@ -329,12 +337,14 @@ def _assert_not_found( compiled_patterns = self.__compile(patterns, flavor=flavor) - for start, block in enumerate(util.windowed(lines, window_size)): + for start, block in enumerate(util.strict_windowed(lines, window_size)): if all(pattern.match(line) for pattern, line in zip(compiled_patterns, block)): pat = util.prettify_patterns(patterns, sort=pattern_type == 'line') block_object = Block(block, start, _check=False) ctx = util.get_debug_context(lines, block_object, context_size) logs = [f'{pattern_type} pattern', pat, 'found in', '\n'.join(ctx)] + + __tracebackhide__ = True raise AssertionError('\n\n'.join(logs)) def __compile( diff --git a/tests/test_testing/test_matcher_buffer.py b/tests/test_testing/test_matcher_buffer.py index 2dea8f80959..21eeb9e7f61 100644 --- a/tests/test_testing/test_matcher_buffer.py +++ b/tests/test_testing/test_matcher_buffer.py @@ -1,5 +1,6 @@ from __future__ import annotations +import contextlib import itertools import operator import re @@ -266,8 +267,43 @@ def test_block_comparison_operators( assert Block([*lines, foreign], 1) > expect assert Block([*lines, foreign], 1) > [expect, 1] + assert Block([foreign, *lines, foreign], 1) > expect + assert Block([foreign, *lines, foreign], 1) > [expect, 1] -@pytest.mark.parametrize('operand', [{1, 2, 3}]) +@pytest.mark.parametrize( + 'operand', + [ + [], + [[], 0], + ['L1'], + [Line()], + ['L1', 'L2'], + ['L1', Line(), ], + ['L1', 'L2', 'L3'], + ['L1', 'L2', Line()], + [['L1'], 0], + [[Line()], 0], + [['L1', 'L2'], 0], + [['L1', Line()], 0], + ], +) +def test_block_supported_operators(operand): + with contextlib.nullcontext(): + for dispatcher in [operator.__lt__, operator.__le__, operator.__ge__, operator.__gt__]: + dispatcher(Block(), operand) + +@pytest.mark.parametrize( + 'operand', + [ + object(), # bad lines + ['L1', object(), 'L3'], # bad lines (no offset) + [['a', object()], 1], # bad lines (with offset) + ['L1', 1], # single line + offset not allowed + [[], object()], # no lines + bad offset + [['L1', 'L2'], object()], # ok lines + bad offset + [[object(), object()], object()], # bad lines + bad offset + ], +) def test_block_unsupported_operators(operand): for dispatcher in [operator.__lt__, operator.__le__, operator.__ge__, operator.__gt__]: pytest.raises(TypeError, dispatcher, Block(), operand) From 6cc70a6d57a2db78921a1f7b57ae5c1b5d204037 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 3 Apr 2024 19:16:04 +0200 Subject: [PATCH 28/66] fixup --- sphinx/testing/_matcher/buffer.py | 47 ++++++++++++++--------- tests/test_testing/test_matcher_buffer.py | 4 +- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/sphinx/testing/_matcher/buffer.py b/sphinx/testing/_matcher/buffer.py index efb2e5c8487..de406692448 100644 --- a/sphinx/testing/_matcher/buffer.py +++ b/sphinx/testing/_matcher/buffer.py @@ -39,6 +39,7 @@ class SourceView(Generic[_T], Sequence[str], abc.ABC): # add __weakref__ to allow the object being weak-referencable __slots__ = ('__buffer', '__offset', '__weakref__') + __tracebackhide__ = True def __init__(self, buffer: _T, /, offset: int = 0, *, _check: bool = True) -> None: """Construct a :class:`SourceView`. @@ -51,6 +52,7 @@ def __init__(self, buffer: _T, /, offset: int = 0, *, _check: bool = True) -> No to speed-up the construction of :class:`SourceView` objects for which their constructor arguments are known to be valid at call time. """ + __tracebackhide__ = self.__tracebackhide__ if _check: if not isinstance(offset, int): msg = f'offset must be an integer, got: {offset!r}' @@ -95,6 +97,7 @@ def index(self, value: Any, start: int = 0, stop: int = sys.maxsize, /) -> int: .. seealso:: :meth:`find` """ + __tracebackhide__ = self.__tracebackhide__ index = self.find(value, start, stop) if index == -1: raise ValueError(value) @@ -145,7 +148,7 @@ def __le__(self, other: object, /) -> bool: By default, ``self == other`` is called before ``self < other``, but subclasses should override this method for an efficient alternative. """ - __tracebackhide__ = False + __tracebackhide__ = self.__tracebackhide__ return self == other or self < other def __ge__(self, other: object, /) -> bool: @@ -154,7 +157,7 @@ def __ge__(self, other: object, /) -> bool: By default, ``self == other`` is called before ``self > other``, but subclasses should override this method for an efficient alternative. """ - __tracebackhide__ = False + __tracebackhide__ = self.__tracebackhide__ return self == other or self > other @abc.abstractmethod @@ -187,6 +190,7 @@ class Line(SourceView[str]): def __init__(self, line: str = '', /, offset: int = 0, *, _check: bool = True) -> None: """Construct a :class:`Line` object.""" + __tracebackhide__ = self.__tracebackhide__ super().__init__(line, offset, _check=_check) @classmethod @@ -205,6 +209,7 @@ def cast(line: object) -> str: lines = [Line(cast(line), index) for index, line in enumerate(src)] """ + __tracebackhide__ = cls.__tracebackhide__ line = line if isinstance(line, str) else str(line) return cls(line, index, _check=_check) @@ -215,6 +220,7 @@ def __str__(self) -> str: return self.buffer def __getitem__(self, index: int | slice, /) -> str: + __tracebackhide__ = self.__tracebackhide__ return self.buffer[index] def __eq__(self, other: object, /) -> bool: @@ -229,7 +235,7 @@ def __eq__(self, other: object, /) -> bool: return self.offset == other[1] and self.buffer == other[0] def __lt__(self, other: object, /) -> bool: - __tracebackhide__ = False + __tracebackhide__ = self.__tracebackhide__ if isinstance(other, str): return self.buffer < other @@ -242,7 +248,7 @@ def __lt__(self, other: object, /) -> bool: return self.offset == other[1] and self.buffer < other[0] def __gt__(self, other: object, /) -> bool: - __tracebackhide__ = False + __tracebackhide__ = self.__tracebackhide__ if isinstance(other, str): return self.buffer > other @@ -261,6 +267,7 @@ def startswith(self, prefix: str, start: int = 0, end: int = sys.maxsize, /) -> :param start: The test start position. :param end: The test stop position. """ + __tracebackhide__ = self.__tracebackhide__ return self.buffer.startswith(prefix, start, end) def endswith(self, suffix: str, start: int = 0, end: int = sys.maxsize, /) -> bool: @@ -270,6 +277,7 @@ def endswith(self, suffix: str, start: int = 0, end: int = sys.maxsize, /) -> bo :param start: The test start position. :param end: The test stop position. """ + __tracebackhide__ = self.__tracebackhide__ return self.buffer.endswith(suffix, start, end) def count(self, sub: LineText, /) -> int: @@ -285,6 +293,7 @@ def count(self, sub: LineText, /) -> int: util.consume(zip(sub.finditer(self.buffer), counter)) return next(counter) + __tracebackhide__ = self.__tracebackhide__ return self.buffer.count(sub) # raise a TypeError if *sub* is not a string # explicitly add the method since its signature differs from :meth:`SourceView.index` @@ -293,6 +302,7 @@ def index(self, sub: LineText, start: int = 0, stop: int = sys.maxsize, /) -> in :raise TypeError: *sub* is not a string or a compiled pattern. """ + __tracebackhide__ = self.__tracebackhide__ return super().index(sub, start, stop) def find(self, sub: LineText, start: int = 0, stop: int = sys.maxsize, /) -> int: @@ -312,6 +322,7 @@ def find(self, sub: LineText, start: int = 0, stop: int = sys.maxsize, /) -> int return match.start() + start_index return -1 + __tracebackhide__ = self.__tracebackhide__ return self.buffer.find(sub, start, stop) # raise a TypeError if *sub* is not a string @@ -345,6 +356,7 @@ class Block(SourceView[tuple[str, ...]], Sequence[str]): def __init__( self, buffer: Iterable[str] = (), /, offset: int = 0, *, _check: bool = True ) -> None: + __tracebackhide__ = self.__tracebackhide__ # It is more efficient to first consume everything and then # iterate over the values for checks rather than to add the # validated values one by one. @@ -371,6 +383,7 @@ def view(cls, index: int, buffer: Iterable[str], /, *, _check: bool = True) -> S blocks = [Block(lines, index) for index, lines in enumerate(src)] """ + __tracebackhide__ = cls.__tracebackhide__ return cls(buffer, index, _check=_check) @property @@ -444,6 +457,7 @@ def index(self, target: BlockMatch, start: int = 0, stop: int = sys.maxsize, /) block.index(target, ...) block.index(target.match, ...) """ + __tracebackhide__ = self.__tracebackhide__ return super().index(target, start, stop) def find(self, target: BlockMatch, start: int = 0, stop: int = sys.maxsize, /) -> int: @@ -492,6 +506,7 @@ def __getitem__(self, index: slice, /) -> Sequence[str]: ... # NoQA: E704 # fmt: on def __getitem__(self, index: int | slice, /) -> str | Sequence[str]: # NoQA: E301 """Get a line or a contiguous sub-block.""" + __tracebackhide__ = self.__tracebackhide__ if isinstance(index, slice): # normalize negative and None slice fields _, _, step = index.indices(self.length) @@ -501,6 +516,8 @@ def __getitem__(self, index: int | slice, /) -> str | Sequence[str]: # NoQA: E3 return self.buffer[index] def __eq__(self, other: object, /) -> bool: + __tracebackhide__ = self.__tracebackhide__ + if isinstance(other, self.__class__): # more efficient to first check the offsets return (self.offset, self.buffer) == (other.offset, other.buffer) @@ -516,12 +533,12 @@ def __eq__(self, other: object, /) -> bool: if len(lines) == self.length: # match the lines one by one, possibly using a rich comparison - expect = self.__lines_iterator(0, self.length) + expect = self.__lines_iterator() return all(map(operator.__eq__, expect, lines)) return False def __lt__(self, other: object, /) -> bool: - __tracebackhide__ = False + __tracebackhide__ = self.__tracebackhide__ if isinstance(other, self.__class__): # More efficient to first check if the indices are valid before @@ -530,7 +547,6 @@ def __lt__(self, other: object, /) -> bool: aligned = _can_be_strict_in(self.offset, self.length, other.offset, other.length) return aligned and self.buffer < other.buffer - print('other=', other, _parse_non_block(other)) other = _parse_non_block(other) if other is None: return NotImplemented @@ -543,7 +559,7 @@ def __lt__(self, other: object, /) -> bool: return self.__lines() < lines def __gt__(self, other: object, /) -> bool: - __tracebackhide__ = False + __tracebackhide__ = self.__tracebackhide__ if isinstance(other, self.__class__): return other < self @@ -561,15 +577,14 @@ def __gt__(self, other: object, /) -> bool: # Do not annotate with list[Line] since otherwise mypy complains # when comparing with a right-hand side that is a list of objects. def __lines(self) -> list[object]: - """Same as :func:`__lines_iterator` but return a list instead.""" + """This block as a list of :class:`Line` objects.""" if self.__cached_lines is None: - self.__cached_lines = list(self.__lines_iterator(0, self.length)) + self.__cached_lines = list(self.__lines_iterator()) return self.__cached_lines - def __lines_iterator(self, start: int, count: int) -> Iterator[Line]: - """Yield some lines in this block as :class:`Line` objects.""" - region = itertools.islice(self.buffer, start, start + count) - for index, line in enumerate(region, self.offset + start): + def __lines_iterator(self) -> Iterator[Line]: + """This block as a list of :class:`Line` objects.""" + for index, line in enumerate(self, self.offset): yield Line(line, index, _check=False) @@ -613,7 +628,6 @@ def _parse_non_block(other: object, /) -> tuple[list[object], int] | None: # offset will never be given in this scenario return list(other), -1 - if len(other) == 2: lines, offset = other if not isinstance(lines, Sequence) or not isinstance(offset, int): @@ -628,9 +642,6 @@ def _parse_non_block(other: object, /) -> tuple[list[object], int] | None: return list(lines), offset - if all(map(_is_block_line_compatible, other)): - return list(other), -1 - return None diff --git a/tests/test_testing/test_matcher_buffer.py b/tests/test_testing/test_matcher_buffer.py index 21eeb9e7f61..4b6a8e8c3dc 100644 --- a/tests/test_testing/test_matcher_buffer.py +++ b/tests/test_testing/test_matcher_buffer.py @@ -270,6 +270,7 @@ def test_block_comparison_operators( assert Block([foreign, *lines, foreign], 1) > expect assert Block([foreign, *lines, foreign], 1) > [expect, 1] + @pytest.mark.parametrize( 'operand', [ @@ -278,7 +279,7 @@ def test_block_comparison_operators( ['L1'], [Line()], ['L1', 'L2'], - ['L1', Line(), ], + ['L1', Line()], ['L1', 'L2', 'L3'], ['L1', 'L2', Line()], [['L1'], 0], @@ -292,6 +293,7 @@ def test_block_supported_operators(operand): for dispatcher in [operator.__lt__, operator.__le__, operator.__ge__, operator.__gt__]: dispatcher(Block(), operand) + @pytest.mark.parametrize( 'operand', [ From 70f232dccdc4aebe17d4683518ff1e0258a9a4ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 3 Apr 2024 19:36:13 +0200 Subject: [PATCH 29/66] explain ``__tracebackhide__`` flag --- sphinx/testing/_matcher/buffer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sphinx/testing/_matcher/buffer.py b/sphinx/testing/_matcher/buffer.py index de406692448..c7ad0bc5e33 100644 --- a/sphinx/testing/_matcher/buffer.py +++ b/sphinx/testing/_matcher/buffer.py @@ -37,9 +37,11 @@ class SourceView(Generic[_T], Sequence[str], abc.ABC): :meta private: """ + __tracebackhide__: bool = True + """A flag to hide the traceback frames in pytest output.""" + # add __weakref__ to allow the object being weak-referencable __slots__ = ('__buffer', '__offset', '__weakref__') - __tracebackhide__ = True def __init__(self, buffer: _T, /, offset: int = 0, *, _check: bool = True) -> None: """Construct a :class:`SourceView`. From 7e9fa1a0c0aab699a3ed0ee1652dbc3425ac939e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 3 Apr 2024 20:15:41 +0200 Subject: [PATCH 30/66] set default options --- sphinx/testing/_matcher/options.py | 28 ++++++++++++++++++++++++---- sphinx/testing/util.py | 10 ++++++++-- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/sphinx/testing/_matcher/options.py b/sphinx/testing/_matcher/options.py index c3aa57fe339..7a6f4740248 100644 --- a/sphinx/testing/_matcher/options.py +++ b/sphinx/testing/_matcher/options.py @@ -31,6 +31,7 @@ # For some reason, mypy does not like Union of Literal, # so we wrap the Literal types inside a bigger Literal. OptionName = Literal[FlagOption, StripOption, DeleteOption, FilteringOption, FlavorOption] + OptionValue = Union[bool, StripChars, DeletePattern, LinePredicate, Flavor] DT = TypeVar('DT') _OptionsView = Union['Options', 'CompleteOptions'] @@ -47,6 +48,9 @@ class Options(TypedDict, total=False): .. seealso:: :mod:`sphinx.testing._matcher.cleaner` """ + # only immutable fields should be used as options, otherwise undesired + # side-effects might occur when using a default option mutable value + ansi: bool """Indicate whether to keep the ANSI escape sequences. @@ -173,6 +177,7 @@ class Configurable: """Mixin supporting a known set of options.""" __slots__ = ('_options',) + __tracebackhide__: bool = True default_options: ClassVar[CompleteOptions] = CompleteOptions( ansi=True, @@ -186,13 +191,12 @@ class Configurable: ignore=None, flavor='none', ) - """The default options to use. + """The default options to use when an option is not specified. Subclasses should override this field for different default options. """ def __init__(self, /, *args: object, **options: Unpack[Options]) -> None: - # always complete the set of options for this object self._options = options @property @@ -239,8 +243,24 @@ def get_option(self, name: FlavorOption, /) -> Flavor: ... # NoQA: E704 # fmt: on def get_option(self, name: OptionName, /) -> object: # NoQA: E301 """Get a known option value, or its default value.""" + __tracebackhide__ = self.__tracebackhide__ if name in self._options: return self._options[name] - - __tracebackhide__ = True return self.default_options[name] + + # fmt: off + @overload + def set_option(self, name: FlagOption, value: bool, /) -> None: ... # NoQA: E704 + @overload + def set_option(self, name: StripOption, value: StripChars, /) -> None: ... # NoQA: E704 + @overload + def set_option(self, name: DeleteOption, value: DeletePattern, /) -> None: ... # NoQA: E704 + @overload + def set_option(self, name: FilteringOption, value: LinePredicate | None, /) -> None: ... # NoQA: E704 + @overload + def set_option(self, name: FlavorOption, value: Flavor, /) -> None: ... # NoQA: E704 + # fmt: on + def set_option(self, name: OptionName, value: OptionValue, /) -> None: # NoQA: E301 + """Set a persistent option value.""" + __tracebackhide__ = self.__tracebackhide__ + self._options[name] = value diff --git a/sphinx/testing/util.py b/sphinx/testing/util.py index 131bfe1a998..baf2e5125a8 100644 --- a/sphinx/testing/util.py +++ b/sphinx/testing/util.py @@ -80,6 +80,12 @@ def etree_parse(path: str | os.PathLike[str]) -> ElementTree: return xml_parse(path) +class _SphinxLineMatcher(LineMatcher): + default_options = LineMatcher.default_options.copy() + default_options['ansi'] = False + default_options['strip'] = True + + class SphinxTestApp(sphinx.application.Sphinx): """A subclass of :class:`~sphinx.application.Sphinx` for tests. @@ -196,11 +202,11 @@ def warning(self) -> StringIO: def stdout(self, /, **options: Unpack[Options]) -> LineMatcher: """Create a line matcher object for the status messages.""" - return LineMatcher(self.status, **options) + return _SphinxLineMatcher(self.status, **options) def stderr(self, /, **options: Unpack[Options]) -> LineMatcher: """Create a line matcher object for the warning messages.""" - return LineMatcher(self.warning, **options) + return _SphinxLineMatcher(self.warning, **options) def cleanup(self, doctrees: bool = False) -> None: sys.path[:] = self._saved_path From 594b0cd095ab58b32d5ce94cd0d9f3ffa8c3cfaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 3 Apr 2024 20:19:17 +0200 Subject: [PATCH 31/66] update docs + option names --- sphinx/testing/_matcher/cleaner.py | 2 +- sphinx/testing/_matcher/options.py | 14 +++++++------- sphinx/testing/util.py | 2 +- tests/test_testing/test_matcher.py | 2 +- tests/test_testing/test_matcher_options.py | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/sphinx/testing/_matcher/cleaner.py b/sphinx/testing/_matcher/cleaner.py index 514397ef6cf..8278340aecf 100644 --- a/sphinx/testing/_matcher/cleaner.py +++ b/sphinx/testing/_matcher/cleaner.py @@ -31,7 +31,7 @@ def clean_text(text: str, /, **options: Unpack[Options]) -> Iterable[str]: """Clean a text, returning an iterable of lines.""" - if not options.get('ansi', True): + if not options.get('keep_ansi', True): text = strip_escape_sequences(text) strip = options.get('strip', False) diff --git a/sphinx/testing/_matcher/options.py b/sphinx/testing/_matcher/options.py index 7a6f4740248..4b0c0b4b02b 100644 --- a/sphinx/testing/_matcher/options.py +++ b/sphinx/testing/_matcher/options.py @@ -14,7 +14,7 @@ from sphinx.testing._matcher.util import LinePattern - FlagOption = Literal['ansi', 'keepends', 'keep_empty', 'compress', 'unique'] + FlagOption = Literal['keep_ansi', 'keepends', 'keep_empty', 'compress', 'unique'] StripOption = Literal['strip', 'stripline'] StripChars = Union[bool, str, None] @@ -51,7 +51,7 @@ class Options(TypedDict, total=False): # only immutable fields should be used as options, otherwise undesired # side-effects might occur when using a default option mutable value - ansi: bool + keep_ansi: bool """Indicate whether to keep the ANSI escape sequences. The default value is ``True``. @@ -62,8 +62,8 @@ class Options(TypedDict, total=False): The allowed values for :attr:`strip` are: - * ``True`` -- remove leading and trailing whitespaces (the default). - * ``False`` -- keep leading and trailing whitespaces. + * ``False`` -- keep leading and trailing whitespaces (the default). + * ``True`` -- remove leading and trailing whitespaces. * a string (*chars*) -- remove leading and trailing characters in *chars*. """ @@ -72,8 +72,8 @@ class Options(TypedDict, total=False): The allowed values for :attr:`stripline` are: - * ``True`` -- remove leading and trailing whitespaces. * ``False`` -- keep leading and trailing whitespaces (the default). + * ``True`` -- remove leading and trailing whitespaces. * a string (*chars*) -- remove leading and trailing characters in *chars*. """ @@ -158,7 +158,7 @@ class CompleteOptions(TypedDict): :meta private: """ - ansi: bool + keep_ansi: bool strip: StripChars stripline: StripChars @@ -180,7 +180,7 @@ class Configurable: __tracebackhide__: bool = True default_options: ClassVar[CompleteOptions] = CompleteOptions( - ansi=True, + keep_ansi=True, strip=False, stripline=False, keepends=False, diff --git a/sphinx/testing/util.py b/sphinx/testing/util.py index baf2e5125a8..1f5de684614 100644 --- a/sphinx/testing/util.py +++ b/sphinx/testing/util.py @@ -82,7 +82,7 @@ def etree_parse(path: str | os.PathLike[str]) -> ElementTree: class _SphinxLineMatcher(LineMatcher): default_options = LineMatcher.default_options.copy() - default_options['ansi'] = False + default_options['keep_ansi'] = False default_options['strip'] = True diff --git a/tests/test_testing/test_matcher.py b/tests/test_testing/test_matcher.py index db77d4f6074..4ddeb21fe30 100644 --- a/tests/test_testing/test_matcher.py +++ b/tests/test_testing/test_matcher.py @@ -173,7 +173,7 @@ def test_matcher_cache(): assert stack[0] is cached assert matcher.lines() is cached - with matcher.override(ansi=False): + with matcher.override(keep_ansi=False): assert len(stack) == 2 assert stack[0] is cached assert stack[1] is None diff --git a/tests/test_testing/test_matcher_options.py b/tests/test_testing/test_matcher_options.py index 233b1e78a5b..11978ebc8f0 100644 --- a/tests/test_testing/test_matcher_options.py +++ b/tests/test_testing/test_matcher_options.py @@ -29,7 +29,7 @@ def check(option: OptionName, default: object) -> None: assert default_options[option] == default processed.add(option) - check('ansi', True) + check('keep_ansi', True) check('strip', False) check('stripline', False) From c1707f0ed80120d4cc5dc4350808f9f16a074c68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 4 Apr 2024 12:41:33 +0200 Subject: [PATCH 32/66] cleanup --- sphinx/testing/_matcher/buffer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sphinx/testing/_matcher/buffer.py b/sphinx/testing/_matcher/buffer.py index c7ad0bc5e33..9a214ddd33c 100644 --- a/sphinx/testing/_matcher/buffer.py +++ b/sphinx/testing/_matcher/buffer.py @@ -82,10 +82,6 @@ def length(self) -> int: """The number of items in this object.""" return len(self) - def pformat(self) -> str: - """A nice representation of this object.""" - return '{0.__class__.__name__}({0!r}, @={0.offset}, #={0.length})'.format(self) - @abc.abstractmethod # The 'value' is 'Any' so that subclasses do not violate Liskov's substitution principle def count(self, value: Any, /) -> int: @@ -113,6 +109,10 @@ def find(self, value: Any, start: int = 0, stop: int = sys.maxsize, /) -> int: .. seealso:: :meth:`index` """ + def pformat(self) -> str: + """A nice representation of this object.""" + return f'{self.__class__.__name__}({self!r}, @={self.offset}, #={self.length})' + def __repr__(self) -> str: return repr(self.buffer) From 974d39be716732d19ca0f1dc9ecc3a39762e7b87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 4 Apr 2024 12:45:21 +0200 Subject: [PATCH 33/66] remove unused methods --- sphinx/testing/_matcher/buffer.py | 35 ------------------------------- 1 file changed, 35 deletions(-) diff --git a/sphinx/testing/_matcher/buffer.py b/sphinx/testing/_matcher/buffer.py index 9a214ddd33c..48819d0b29c 100644 --- a/sphinx/testing/_matcher/buffer.py +++ b/sphinx/testing/_matcher/buffer.py @@ -195,25 +195,6 @@ def __init__(self, line: str = '', /, offset: int = 0, *, _check: bool = True) - __tracebackhide__ = self.__tracebackhide__ super().__init__(line, offset, _check=_check) - @classmethod - def view(cls, index: int, line: str, /, *, _check: bool = True) -> Self: - """Alternative constructor flipping the order of the arguments. - - This is typically useful with :func:`enumerate`, namely this makes:: - - from itertools import starmap - lines = list(starmap(Line.view, enumerate(src)) - - equivalent to:: - - def cast(line: object) -> str: - return line if isinstance(line, str) else str(line) - - lines = [Line(cast(line), index) for index, line in enumerate(src)] - """ - __tracebackhide__ = cls.__tracebackhide__ - line = line if isinstance(line, str) else str(line) - return cls(line, index, _check=_check) # dunder methods @@ -372,22 +353,6 @@ def __init__( super().__init__(buffer, offset, _check=_check) self.__cached_lines: list[object] | None = None - @classmethod - def view(cls, index: int, buffer: Iterable[str], /, *, _check: bool = True) -> Self: - """Alternative constructor flipping the order of the arguments. - - This is typically useful with :func:`enumerate`, namely this makes:: - - from itertools import starmap - blocks = list(starmap(Block.view, enumerate(src)) - - equivalent to:: - - blocks = [Block(lines, index) for index, lines in enumerate(src)] - """ - __tracebackhide__ = cls.__tracebackhide__ - return cls(buffer, index, _check=_check) - @property def window(self) -> slice: """A slice representing this block in its source. From df7496ab1f8fd014ceb9bebfe0e658f22fc391ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 4 Apr 2024 13:14:37 +0200 Subject: [PATCH 34/66] add tests for options --- tests/test_testing/test_matcher_options.py | 26 +++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tests/test_testing/test_matcher_options.py b/tests/test_testing/test_matcher_options.py index 11978ebc8f0..46ebf68e30d 100644 --- a/tests/test_testing/test_matcher_options.py +++ b/tests/test_testing/test_matcher_options.py @@ -1,5 +1,6 @@ from __future__ import annotations +from types import MappingProxyType from typing import TYPE_CHECKING from sphinx.testing._matcher.options import CompleteOptions, Configurable, Options @@ -17,7 +18,7 @@ def test_options_class(): assert not foreign_keys, f'unknown option(s): {", ".join(foreign_keys)}' -def test_matcher_default_options(): +def test_default_options(): """Check the synchronization of default options and classes in Sphinx.""" default_options = Configurable.default_options.copy() @@ -46,3 +47,26 @@ def check(option: OptionName, default: object) -> None: # check that there are no leftover options assert sorted(processed) == sorted(Options.__annotations__) + + +def test_get_option(): + class Object(Configurable): + default_options = Configurable.default_options.copy() + default_options['keepends'] = True + + obj = Object() + assert isinstance(obj.options, MappingProxyType) + + assert 'keepends' not in obj.options + assert obj.get_option('keepends') is True + assert 'keepends' not in obj.options + + +def test_set_option(): + obj = Configurable() + + assert 'delete' not in obj.options + assert obj.get_option('delete') == () + obj.set_option('delete', 'abc') + assert 'delete' in obj.options + assert obj.get_option('delete') == 'abc' From 4574410d9ca4ed4b9f46fa6fcbc8c844e1ff5cc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 4 Apr 2024 13:48:24 +0200 Subject: [PATCH 35/66] cleanup --- sphinx/testing/_matcher/buffer.py | 119 ++++++++++++----------------- sphinx/testing/_matcher/engine.py | 5 +- sphinx/testing/_matcher/options.py | 3 - sphinx/testing/matcher.py | 40 ++++------ 4 files changed, 64 insertions(+), 103 deletions(-) diff --git a/sphinx/testing/_matcher/buffer.py b/sphinx/testing/_matcher/buffer.py index 48819d0b29c..d797361c740 100644 --- a/sphinx/testing/_matcher/buffer.py +++ b/sphinx/testing/_matcher/buffer.py @@ -5,7 +5,6 @@ import abc import contextlib import itertools -import operator import re import sys from collections.abc import Sequence @@ -37,9 +36,6 @@ class SourceView(Generic[_T], Sequence[str], abc.ABC): :meta private: """ - __tracebackhide__: bool = True - """A flag to hide the traceback frames in pytest output.""" - # add __weakref__ to allow the object being weak-referencable __slots__ = ('__buffer', '__offset', '__weakref__') @@ -54,7 +50,6 @@ def __init__(self, buffer: _T, /, offset: int = 0, *, _check: bool = True) -> No to speed-up the construction of :class:`SourceView` objects for which their constructor arguments are known to be valid at call time. """ - __tracebackhide__ = self.__tracebackhide__ if _check: if not isinstance(offset, int): msg = f'offset must be an integer, got: {offset!r}' @@ -95,7 +90,6 @@ def index(self, value: Any, start: int = 0, stop: int = sys.maxsize, /) -> int: .. seealso:: :meth:`find` """ - __tracebackhide__ = self.__tracebackhide__ index = self.find(value, start, stop) if index == -1: raise ValueError(value) @@ -150,7 +144,6 @@ def __le__(self, other: object, /) -> bool: By default, ``self == other`` is called before ``self < other``, but subclasses should override this method for an efficient alternative. """ - __tracebackhide__ = self.__tracebackhide__ return self == other or self < other def __ge__(self, other: object, /) -> bool: @@ -159,7 +152,6 @@ def __ge__(self, other: object, /) -> bool: By default, ``self == other`` is called before ``self > other``, but subclasses should override this method for an efficient alternative. """ - __tracebackhide__ = self.__tracebackhide__ return self == other or self > other @abc.abstractmethod @@ -171,9 +163,8 @@ def __gt__(self, other: object, /) -> bool: """ -@final class Line(SourceView[str]): - """A line found by :meth:`~sphinx.testing.matcher.LineMatcher.match`. + """A line found by :meth:`~sphinx.testing.matcher.LineMatcher.find`. A :class:`Line` can be compared to :class:`str`, :class:`Line` objects or a pair (i.e., a two-length sequence) ``(line, line_offset)`` where @@ -192,10 +183,8 @@ class Line(SourceView[str]): def __init__(self, line: str = '', /, offset: int = 0, *, _check: bool = True) -> None: """Construct a :class:`Line` object.""" - __tracebackhide__ = self.__tracebackhide__ super().__init__(line, offset, _check=_check) - # dunder methods def __str__(self) -> str: @@ -203,7 +192,6 @@ def __str__(self) -> str: return self.buffer def __getitem__(self, index: int | slice, /) -> str: - __tracebackhide__ = self.__tracebackhide__ return self.buffer[index] def __eq__(self, other: object, /) -> bool: @@ -218,8 +206,6 @@ def __eq__(self, other: object, /) -> bool: return self.offset == other[1] and self.buffer == other[0] def __lt__(self, other: object, /) -> bool: - __tracebackhide__ = self.__tracebackhide__ - if isinstance(other, str): return self.buffer < other @@ -231,8 +217,6 @@ def __lt__(self, other: object, /) -> bool: return self.offset == other[1] and self.buffer < other[0] def __gt__(self, other: object, /) -> bool: - __tracebackhide__ = self.__tracebackhide__ - if isinstance(other, str): return self.buffer > other @@ -250,7 +234,6 @@ def startswith(self, prefix: str, start: int = 0, end: int = sys.maxsize, /) -> :param start: The test start position. :param end: The test stop position. """ - __tracebackhide__ = self.__tracebackhide__ return self.buffer.startswith(prefix, start, end) def endswith(self, suffix: str, start: int = 0, end: int = sys.maxsize, /) -> bool: @@ -260,7 +243,6 @@ def endswith(self, suffix: str, start: int = 0, end: int = sys.maxsize, /) -> bo :param start: The test start position. :param end: The test stop position. """ - __tracebackhide__ = self.__tracebackhide__ return self.buffer.endswith(suffix, start, end) def count(self, sub: LineText, /) -> int: @@ -276,7 +258,6 @@ def count(self, sub: LineText, /) -> int: util.consume(zip(sub.finditer(self.buffer), counter)) return next(counter) - __tracebackhide__ = self.__tracebackhide__ return self.buffer.count(sub) # raise a TypeError if *sub* is not a string # explicitly add the method since its signature differs from :meth:`SourceView.index` @@ -285,7 +266,6 @@ def index(self, sub: LineText, start: int = 0, stop: int = sys.maxsize, /) -> in :raise TypeError: *sub* is not a string or a compiled pattern. """ - __tracebackhide__ = self.__tracebackhide__ return super().index(sub, start, stop) def find(self, sub: LineText, start: int = 0, stop: int = sys.maxsize, /) -> int: @@ -305,33 +285,38 @@ def find(self, sub: LineText, start: int = 0, stop: int = sys.maxsize, /) -> int return match.start() + start_index return -1 - __tracebackhide__ = self.__tracebackhide__ return self.buffer.find(sub, start, stop) # raise a TypeError if *sub* is not a string -@final class Block(SourceView[tuple[str, ...]], Sequence[str]): - """Block found by :meth:`~sphinx.testing.matcher.LineMatcher.find`. + """Block found by :meth:`~sphinx.testing.matcher.LineMatcher.find_blocks`. A block is a sequence of lines comparable to :class:`Line`, generally a - string (the line content) or a pair ``(line, line_offset)``. In addition, - a block can be compared to pair ``(block_lines, block_offset)`` where: + string (the line content) or a pair ``(line, line_offset)``. - - *block_lines* is a sequence of lines-like objects, and - - *block_offset* is an integer (matched against :attr:`offset`). + A block can also be compared to pair ``(block_lines, block_offset)`` where - Whenever a pair ``(line, line_offset)`` or ``(block, block_offset)`` - is needed, it can be any two-element sequence (e.g., tuple or list). + - *block_lines* is a sequence of line-like objects, and + - *block_offset* is an integer (matched against :attr:`offset`). - For instance,:: + Pairs ``(line, line_offset)`` or ``(block_lines, block_offset)`` can be any + non-string two-elements sequence (e.g., a tuple or a list), e.g:: - assert Block(['a', 'b', 'c'], 2) == ['a', ('b', 3), Line('c', 4)] + assert Block(['a', 'b', 'c', 'd'], 2) == [ + 'a', + ('b', 3), + ['c', 4], + Line('d', 5), + ] .. note:: - By convention, ``block[i]`` or ``block[i:j]`` returns :class:`str` - or sequences of :class:`str`. Consider using :meth:`at` to get the - corresponding :class:`Line` or :class:`Block` values. + By convention, ``block[i]`` and ``block[i:j]`` return :class:`str` + and tuples of :class:`str` respectively. Consider using :meth:`at` + to convert the output to :class:`Line` or :class:`Block` objects. + + Similarly, ``iter(block)`` returns an iterator on strings. Consider + using :meth:`lines_iterator` to iterate over :class:`Line` objects. """ __slots__ = ('__cached_lines',) @@ -339,7 +324,6 @@ class Block(SourceView[tuple[str, ...]], Sequence[str]): def __init__( self, buffer: Iterable[str] = (), /, offset: int = 0, *, _check: bool = True ) -> None: - __tracebackhide__ = self.__tracebackhide__ # It is more efficient to first consume everything and then # iterate over the values for checks rather than to add the # validated values one by one. @@ -351,7 +335,13 @@ def __init__( raise TypeError(err) super().__init__(buffer, offset, _check=_check) - self.__cached_lines: list[object] | None = None + self.__cached_lines: tuple[Line, ...] | None = None + """This block as a tuple of :class:`Line` objects. + + The rationale behind duplicating the buffer's data is to ease + comparison by relying on the C API for comparing lists which + dispatches to the :class:`Line` comparison operators. + """ @property def window(self) -> slice: @@ -424,7 +414,6 @@ def index(self, target: BlockMatch, start: int = 0, stop: int = sys.maxsize, /) block.index(target, ...) block.index(target.match, ...) """ - __tracebackhide__ = self.__tracebackhide__ return super().index(target, start, stop) def find(self, target: BlockMatch, start: int = 0, stop: int = sys.maxsize, /) -> int: @@ -447,6 +436,17 @@ def find(self, target: BlockMatch, start: int = 0, stop: int = sys.maxsize, /) - return self.buffer.index(target, start, stop) return -1 + def lines(self) -> tuple[Line, ...]: + """This block as a tuple of :class:`Line` objects.""" + if self.__cached_lines is None: + self.__cached_lines = tuple(self.lines_iterator()) + return self.__cached_lines + + def lines_iterator(self) -> Iterator[Line]: + """This block as a list of :class:`Line` objects.""" + for index, line in enumerate(self, self.offset): + yield Line(line, index, _check=False) + # fmt: off @overload def at(self, index: int, /) -> Line: ... # NoQA: E704 @@ -473,7 +473,6 @@ def __getitem__(self, index: slice, /) -> Sequence[str]: ... # NoQA: E704 # fmt: on def __getitem__(self, index: int | slice, /) -> str | Sequence[str]: # NoQA: E301 """Get a line or a contiguous sub-block.""" - __tracebackhide__ = self.__tracebackhide__ if isinstance(index, slice): # normalize negative and None slice fields _, _, step = index.indices(self.length) @@ -483,8 +482,6 @@ def __getitem__(self, index: int | slice, /) -> str | Sequence[str]: # NoQA: E3 return self.buffer[index] def __eq__(self, other: object, /) -> bool: - __tracebackhide__ = self.__tracebackhide__ - if isinstance(other, self.__class__): # more efficient to first check the offsets return (self.offset, self.buffer) == (other.offset, other.buffer) @@ -494,19 +491,14 @@ def __eq__(self, other: object, /) -> bool: return NotImplemented lines, offset = other - # check offsets before computing len(lines) or len(self) + # check offsets before computing len(lines) if offset != -1 and offset != self.offset: return False - if len(lines) == self.length: - # match the lines one by one, possibly using a rich comparison - expect = self.__lines_iterator() - return all(map(operator.__eq__, expect, lines)) - return False + # check the lengths before computing the cached lines if possible + return self.length == len(lines) and self.lines() == lines def __lt__(self, other: object, /) -> bool: - __tracebackhide__ = self.__tracebackhide__ - if isinstance(other, self.__class__): # More efficient to first check if the indices are valid before # checking the lines using tuple comparisons (both objects have @@ -521,13 +513,11 @@ def __lt__(self, other: object, /) -> bool: lines, other_offset = other if other_offset != -1: aligned = _can_be_strict_in(self.offset, self.length, other_offset, len(lines)) - return aligned and self.__lines() < lines + return aligned and self.lines() < lines # we want to find this block in the *other* block (at any place) - return self.__lines() < lines + return self.lines() < lines def __gt__(self, other: object, /) -> bool: - __tracebackhide__ = self.__tracebackhide__ - if isinstance(other, self.__class__): return other < self @@ -538,21 +528,12 @@ def __gt__(self, other: object, /) -> bool: lines, other_offset = other if other_offset != -1: aligned = _can_be_strict_in(other_offset, len(lines), self.offset, self.length) - return aligned and self.__lines() > lines - return self.__lines() > lines + return aligned and self.lines() > lines + return self.lines() > lines - # Do not annotate with list[Line] since otherwise mypy complains - # when comparing with a right-hand side that is a list of objects. - def __lines(self) -> list[object]: - """This block as a list of :class:`Line` objects.""" - if self.__cached_lines is None: - self.__cached_lines = list(self.__lines_iterator()) - return self.__cached_lines - def __lines_iterator(self) -> Iterator[Line]: - """This block as a list of :class:`Line` objects.""" - for index, line in enumerate(self, self.offset): - yield Line(line, index, _check=False) +# Those functions are private and are not included in :class:`Line` +# or :class:`Block` to minimize the size of the class dictionary. def _parse_non_string(other: object, /) -> tuple[str, int] | None: @@ -582,7 +563,7 @@ def _is_block_line_compatible(other: object, /) -> bool: return False -def _parse_non_block(other: object, /) -> tuple[list[object], int] | None: +def _parse_non_block(other: object, /) -> tuple[tuple[object, ...], int] | None: """Try to parse *other* as a pair ``(block lines, block offset)``. For efficiency, do *not* call this method on :class:`Block` instances @@ -593,7 +574,7 @@ def _parse_non_block(other: object, /) -> tuple[list[object], int] | None: if all(map(_is_block_line_compatible, other)): # offset will never be given in this scenario - return list(other), -1 + return tuple(other), -1 if len(other) == 2: lines, offset = other @@ -607,7 +588,7 @@ def _parse_non_block(other: object, /) -> tuple[list[object], int] | None: if not all(map(_is_block_line_compatible, lines)): return None - return list(lines), offset + return tuple(lines), offset return None diff --git a/sphinx/testing/_matcher/engine.py b/sphinx/testing/_matcher/engine.py index b286126a912..768d9cda992 100644 --- a/sphinx/testing/_matcher/engine.py +++ b/sphinx/testing/_matcher/engine.py @@ -16,12 +16,10 @@ _LinePatternT = TypeVar('_LinePatternT', str, re.Pattern[str]) - def _check_flavor(flavor: Flavor) -> None: allowed: Sequence[Flavor] = ('none', 'fnmatch', 're') if flavor not in allowed: - __tracebackhide__ = True - msg = f'unknown flavor: {flavor!r} (choose from {tuple(map(repr, allowed))})' + msg = f'unknown flavor: {flavor!r} (choose from: {allowed})' raise ValueError(msg) @@ -90,7 +88,6 @@ def to_block_pattern(expect: LinePattern | Sequence[LinePattern]) -> Sequence[Li if isinstance(expect, re.Pattern): return (expect,) if not isinstance(expect, Sequence): - __tracebackhide__ = True msg = f'expecting a sequence of patterns, got: {expect!r}' raise TypeError(msg) return tuple(expect) diff --git a/sphinx/testing/_matcher/options.py b/sphinx/testing/_matcher/options.py index 4b0c0b4b02b..dd90bc0a6af 100644 --- a/sphinx/testing/_matcher/options.py +++ b/sphinx/testing/_matcher/options.py @@ -177,7 +177,6 @@ class Configurable: """Mixin supporting a known set of options.""" __slots__ = ('_options',) - __tracebackhide__: bool = True default_options: ClassVar[CompleteOptions] = CompleteOptions( keep_ansi=True, @@ -243,7 +242,6 @@ def get_option(self, name: FlavorOption, /) -> Flavor: ... # NoQA: E704 # fmt: on def get_option(self, name: OptionName, /) -> object: # NoQA: E301 """Get a known option value, or its default value.""" - __tracebackhide__ = self.__tracebackhide__ if name in self._options: return self._options[name] return self.default_options[name] @@ -262,5 +260,4 @@ def set_option(self, name: FlavorOption, value: Flavor, /) -> None: ... # NoQA: # fmt: on def set_option(self, name: OptionName, value: OptionValue, /) -> None: # NoQA: E301 """Set a persistent option value.""" - __tracebackhide__ = self.__tracebackhide__ self._options[name] = value diff --git a/sphinx/testing/matcher.py b/sphinx/testing/matcher.py index 4960bafb613..604f206a1bc 100644 --- a/sphinx/testing/matcher.py +++ b/sphinx/testing/matcher.py @@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, cast from sphinx.testing._matcher import cleaner, engine, util -from sphinx.testing._matcher.buffer import Block, Line +from sphinx.testing._matcher.buffer import Block from sphinx.testing._matcher.options import Configurable, Options if TYPE_CHECKING: @@ -18,6 +18,7 @@ from typing_extensions import Self, Unpack + from sphinx.testing._matcher.buffer import Line from sphinx.testing._matcher.options import Flavor from sphinx.testing._matcher.util import LinePattern @@ -63,8 +64,7 @@ def from_lines(cls, lines: Iterable[str] = (), /, **options: Unpack[Options]) -> def __iter__(self) -> Iterator[Line]: """An iterator on the cached lines.""" - # we do not use Line.view to avoid checking the type of each line - yield from (Line(s, i, _check=False) for i, s in enumerate(self.lines())) + return self.lines().lines_iterator() @contextlib.contextmanager def override(self, /, **options: Unpack[Options]) -> Generator[None, None, None]: @@ -89,22 +89,19 @@ def lines(self) -> Block: cached = stack[-1] if cached is None: - # compute for the first time the value options = self.default_options | cast(Options, self.options) - # use the *same* type as a block's buffer to speed-up the Block's constructor + # compute for the first time the block's lines lines = tuple(cleaner.clean_text(self.content, **options)) # check if the value is the same as any of a previously cached value for addr, value in enumerate(itertools.islice(stack, 0, len(stack) - 1)): if isinstance(value, int): cached = cast(Block, stack[value]) - assert isinstance(cached.buffer, tuple) if cached.buffer == lines: - # compare only the lines (C interface) - stack[-1] = value # indirection + # compare only the lines as strings + stack[-1] = value # indirection near to beginning return cached if isinstance(value, Block): - assert isinstance(value.buffer, tuple) if value.buffer == lines: stack[-1] = addr # indirection return value @@ -118,7 +115,6 @@ def lines(self) -> Block: assert isinstance(value, Block) return value - assert isinstance(cached, Block) return cached def find( @@ -132,8 +128,8 @@ def iterfind( ) -> Iterator[Line]: """Yield the lines that match one (or more) of the given patterns. - When one or more patterns are given, the order of evaluation is the - same as they are given (or arbitrary if they are given in a set). + :param expect: One or more patterns to satisfy. + :param flavor: Optional temporary flavor for non-compiled patterns. """ patterns = engine.to_line_patterns(expect) if not patterns: # nothinig to match @@ -160,7 +156,7 @@ def iterfind_blocks( """Yield non-overlapping blocks matching the given line patterns. :param expect: The line patterns that a block must satisfy. - :param flavor: Optional temporary flavor for string patterns. + :param flavor: Optional temporary flavor for non-compiled patterns. :return: An iterator on the matching blocks. When *expect* is a single string, it is split into lines, each of @@ -213,9 +209,8 @@ def assert_match( :param expect: One or more patterns the lines must satisfy. :param count: If specified, the exact number of matching lines. - :param flavor: Optional temporary flavor for string patterns. + :param flavor: Optional temporary flavor for non-compiled patterns. """ - __tracebackhide__ = True patterns = engine.to_line_patterns(expect) self._assert_found('line', patterns, count=count, flavor=flavor) @@ -231,9 +226,8 @@ def assert_no_match( :param expect: One or more patterns the lines must not satisfy. :param context: Number of lines to print around a failing line. - :param flavor: Optional temporary flavor for string patterns. + :param flavor: Optional temporary flavor for non-compiled patterns. """ - __tracebackhide__ = True patterns = engine.to_line_patterns(expect) self._assert_not_found('line', patterns, context_size=context, flavor=flavor) @@ -249,12 +243,11 @@ def assert_lines( :param expect: The line patterns that a block must satisfy. :param count: The number of blocks that should be found. - :param flavor: Optional temporary flavor for string patterns. + :param flavor: Optional temporary flavor for non-compiled patterns. When *expect* is a single string, it is split into lines, each of which corresponding to the pattern a block's line must satisfy. """ - __tracebackhide__ = True patterns = engine.to_block_pattern(expect) self._assert_found('block', patterns, count=count, flavor=flavor) @@ -270,14 +263,13 @@ def assert_no_lines( :param expect: The line patterns that a block must satisfy. :param context: Number of lines to print around a failing block. - :param flavor: Optional temporary flavor for string patterns. + :param flavor: Optional temporary flavor for non-compiled patterns. When *expect* is a single string, it is split into lines, each of which corresponding to the pattern a block's line must satisfy. Use :data:`sys.maxsize` to show all capture lines. """ - __tracebackhide__ = True patterns = engine.to_block_pattern(expect) self._assert_not_found('block', patterns, context_size=context, flavor=flavor) @@ -299,8 +291,6 @@ def _assert_found( ctx = util.highlight(self.lines(), keepends=keepends) pat = util.prettify_patterns(patterns, sort=pattern_type == 'line') logs = [f'{pattern_type} pattern', pat, 'not found in', ctx] - - __tracebackhide__ = True raise AssertionError('\n\n'.join(logs)) indices = {block.offset: len(block) for block in blocks} @@ -312,8 +302,6 @@ def _assert_found( pat = util.prettify_patterns(patterns, sort=pattern_type == 'line') noun = util.plural_form(pattern_type, count) logs = [f'found {found} != {count} {noun} matching', pat, 'in', ctx] - - __tracebackhide__ = True raise AssertionError('\n\n'.join(logs)) def _assert_not_found( @@ -343,8 +331,6 @@ def _assert_not_found( block_object = Block(block, start, _check=False) ctx = util.get_debug_context(lines, block_object, context_size) logs = [f'{pattern_type} pattern', pat, 'found in', '\n'.join(ctx)] - - __tracebackhide__ = True raise AssertionError('\n\n'.join(logs)) def __compile( From 3fec5a86e562cf8125e965b1be6dc86138bf48b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 4 Apr 2024 13:48:39 +0200 Subject: [PATCH 36/66] cleanup --- sphinx/testing/_matcher/engine.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sphinx/testing/_matcher/engine.py b/sphinx/testing/_matcher/engine.py index 768d9cda992..e40a3db7196 100644 --- a/sphinx/testing/_matcher/engine.py +++ b/sphinx/testing/_matcher/engine.py @@ -16,6 +16,7 @@ _LinePatternT = TypeVar('_LinePatternT', str, re.Pattern[str]) + def _check_flavor(flavor: Flavor) -> None: allowed: Sequence[Flavor] = ('none', 'fnmatch', 're') if flavor not in allowed: From e6ac4a95396a9aad40bac23fd6bb931a54925b27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 4 Apr 2024 16:18:01 +0200 Subject: [PATCH 37/66] cleanup --- sphinx/testing/_matcher/buffer.py | 4 - sphinx/testing/_matcher/cleaner.py | 30 ++-- sphinx/testing/_matcher/engine.py | 6 - sphinx/testing/_matcher/options.py | 168 +++++++++++++++++---- sphinx/testing/_matcher/util.py | 4 +- sphinx/testing/matcher.py | 43 +++--- sphinx/testing/util.py | 6 +- tests/test_testing/test_matcher.py | 34 +++-- tests/test_testing/test_matcher_options.py | 64 ++++++-- 9 files changed, 249 insertions(+), 110 deletions(-) diff --git a/sphinx/testing/_matcher/buffer.py b/sphinx/testing/_matcher/buffer.py index d797361c740..87e6b317404 100644 --- a/sphinx/testing/_matcher/buffer.py +++ b/sphinx/testing/_matcher/buffer.py @@ -447,12 +447,10 @@ def lines_iterator(self) -> Iterator[Line]: for index, line in enumerate(self, self.offset): yield Line(line, index, _check=False) - # fmt: off @overload def at(self, index: int, /) -> Line: ... # NoQA: E704 @overload def at(self, index: slice, /) -> Self: ... # NoQA: E704 - # fmt: on def at(self, index: int | slice, /) -> Line | Block: # NoQA: E301 """Get a :class:`Line` or a contiguous sub-:class:`Block`.""" if isinstance(index, slice): @@ -465,12 +463,10 @@ def at(self, index: int | slice, /) -> Line | Block: # NoQA: E301 start, _, _ = slice(index, -1).indices(self.length) return Line(self.buffer[index], self.offset + start, _check=False) - # fmt: off @overload def __getitem__(self, index: int, /) -> str: ... # NoQA: E704 @overload def __getitem__(self, index: slice, /) -> Sequence[str]: ... # NoQA: E704 - # fmt: on def __getitem__(self, index: int | slice, /) -> str | Sequence[str]: # NoQA: E301 """Get a line or a contiguous sub-block.""" if isinstance(index, slice): diff --git a/sphinx/testing/_matcher/cleaner.py b/sphinx/testing/_matcher/cleaner.py index 8278340aecf..47a74d4fc66 100644 --- a/sphinx/testing/_matcher/cleaner.py +++ b/sphinx/testing/_matcher/cleaner.py @@ -10,6 +10,7 @@ from typing import TYPE_CHECKING from sphinx.testing._matcher import engine, util +from sphinx.testing._matcher.options import OptionsHolder from sphinx.util.console import strip_escape_sequences if TYPE_CHECKING: @@ -31,34 +32,31 @@ def clean_text(text: str, /, **options: Unpack[Options]) -> Iterable[str]: """Clean a text, returning an iterable of lines.""" - if not options.get('keep_ansi', True): - text = strip_escape_sequences(text) + config = OptionsHolder(**options) - strip = options.get('strip', False) - text = strip_chars(text, strip) + if not config.keep_ansi: + text = strip_escape_sequences(text) - keepends = options.get('keepends', False) - lines = text.splitlines(keepends) + text = strip_chars(text, config.strip) + lines = text.splitlines(config.keep_break) return clean_lines(lines, **options) def clean_lines(lines: Iterable[str], /, **options: Unpack[Options]) -> Iterable[str]: """Clean an iterable of lines.""" - stripline = options.get('stripline', False) - lines = strip_lines(lines, stripline) + config = OptionsHolder(**options) + + lines = strip_lines(lines, config.stripline) - keep_empty = options.get('keep_empty', True) - compress = options.get('compress', False) - unique = options.get('unique', False) + keep_empty, compress, unique = config.keep_empty, config.compress, config.unique lines = filter_lines(lines, keep_empty=keep_empty, compress=compress, unique=unique) - delete = options.get('delete', ()) - flavor = options.get('flavor', 'none') - lines = prune_lines(lines, delete, flavor=flavor) + deleter_objects, flavor = config.delete, config.flavor + lines = prune_lines(lines, deleter_objects, flavor=flavor) - ignore = options.get('ignore', None) - lines = ignore_lines(lines, ignore) + ignore_predicate = config.ignore + lines = ignore_lines(lines, ignore_predicate) return lines diff --git a/sphinx/testing/_matcher/engine.py b/sphinx/testing/_matcher/engine.py index e40a3db7196..2a8a624930d 100644 --- a/sphinx/testing/_matcher/engine.py +++ b/sphinx/testing/_matcher/engine.py @@ -30,14 +30,12 @@ def _sort_pattern(s: str | re.Pattern[str]) -> tuple[str, int, int]: return (s.pattern, s.flags, s.groups) -# fmt: off @overload def to_line_patterns(expect: str) -> tuple[str]: ... # NoQA: E704 @overload def to_line_patterns(expect: re.Pattern[str]) -> tuple[re.Pattern[str]]: ... # NoQA: E704 @overload def to_line_patterns(expect: Iterable[LinePattern], /) -> tuple[LinePattern, ...]: ... # NoQA: E704 -# fmt: on def to_line_patterns(expect: LinePattern | Iterable[LinePattern]) -> Sequence[LinePattern]: # NoqA: E302 """Get a read-only sequence of line-matching patterns. @@ -64,14 +62,12 @@ def to_line_patterns(expect: LinePattern | Iterable[LinePattern]) -> Sequence[Li return tuple(expect) -# fmt: off @overload def to_block_pattern(expect: str) -> tuple[str, ...]: ... # NoQA: E704 @overload def to_block_pattern(expect: re.Pattern[str]) -> tuple[re.Pattern[str]]: ... # NoQA: E704 @overload def to_block_pattern(expect: Sequence[LinePattern]) -> Sequence[LinePattern]: ... # NoQA: E704 -# fmt: on def to_block_pattern(expect: LinePattern | Sequence[LinePattern]) -> Sequence[LinePattern]: # NoQA: E302 r"""Get a read-only sequence for a s single block pattern. @@ -94,12 +90,10 @@ def to_block_pattern(expect: LinePattern | Sequence[LinePattern]) -> Sequence[Li return tuple(expect) -# fmt: off @overload def transform(fn: Callable[[str], str], x: str, /) -> str: ... # NoQA: E704 @overload def transform(fn: Callable[[str], str], x: re.Pattern[str], /) -> re.Pattern[str]: ... # NoQA: E704 -# fmt: on def transform(fn: Callable[[str], str], x: LinePattern, /) -> LinePattern: # NoQA: E302 """Transform regular expressions, leaving compiled patterns untouched.""" return fn(x) if isinstance(x, str) else x diff --git a/sphinx/testing/_matcher/options.py b/sphinx/testing/_matcher/options.py index dd90bc0a6af..d0bdcaab824 100644 --- a/sphinx/testing/_matcher/options.py +++ b/sphinx/testing/_matcher/options.py @@ -14,7 +14,7 @@ from sphinx.testing._matcher.util import LinePattern - FlagOption = Literal['keep_ansi', 'keepends', 'keep_empty', 'compress', 'unique'] + FlagOption = Literal['keep_ansi', 'keep_break', 'keep_empty', 'compress', 'unique'] StripOption = Literal['strip', 'stripline'] StripChars = Union[bool, str, None] @@ -22,7 +22,7 @@ DeleteOption = Literal['delete'] DeletePattern = Union[LinePattern, Sequence[LinePattern]] - FilteringOption = Literal['ignore'] + IgnoreOption = Literal['ignore'] LinePredicate = Callable[[str], object] FlavorOption = Literal['flavor'] @@ -30,8 +30,8 @@ # For some reason, mypy does not like Union of Literal, # so we wrap the Literal types inside a bigger Literal. - OptionName = Literal[FlagOption, StripOption, DeleteOption, FilteringOption, FlavorOption] - OptionValue = Union[bool, StripChars, DeletePattern, LinePredicate, Flavor] + OptionName = Literal[FlagOption, StripOption, DeleteOption, IgnoreOption, FlavorOption] + OptionValue = Union[bool, StripChars, DeletePattern, Union[LinePredicate, None], Flavor] DT = TypeVar('DT') _OptionsView = Union['Options', 'CompleteOptions'] @@ -77,7 +77,7 @@ class Options(TypedDict, total=False): * a string (*chars*) -- remove leading and trailing characters in *chars*. """ - keepends: bool + keep_break: bool """If true, keep line breaks in the output. The default value is ``False``. @@ -162,7 +162,7 @@ class CompleteOptions(TypedDict): strip: StripChars stripline: StripChars - keepends: bool + keep_break: bool keep_empty: bool compress: bool unique: bool @@ -173,16 +173,16 @@ class CompleteOptions(TypedDict): flavor: Flavor -class Configurable: +class OptionsHolder: """Mixin supporting a known set of options.""" - __slots__ = ('_options',) + __slots__ = ('__options',) default_options: ClassVar[CompleteOptions] = CompleteOptions( keep_ansi=True, strip=False, stripline=False, - keepends=False, + keep_break=False, keep_empty=True, compress=False, unique=False, @@ -195,13 +195,13 @@ class Configurable: Subclasses should override this field for different default options. """ - def __init__(self, /, *args: object, **options: Unpack[Options]) -> None: - self._options = options + def __init__(self, /, **options: Unpack[Options]) -> None: + self.__options = options @property def options(self) -> Mapping[str, object]: # cannot use CompleteOptions :( """A read-only view on the current mapping of options.""" - return MappingProxyType(self._options) + return MappingProxyType(self.__options) @contextlib.contextmanager def use(self, /, **options: Unpack[Options]) -> Generator[None, None, None]: @@ -213,51 +213,161 @@ def use(self, /, **options: Unpack[Options]) -> Generator[None, None, None]: @contextlib.contextmanager def override(self, /, **options: Unpack[Options]) -> Generator[None, None, None]: """Temporarily extend the set of options with *options*.""" - saved_options = self._options.copy() - self._options |= options + saved_options = self.__options.copy() + self.__options |= options try: yield finally: - self._options = saved_options + self.__options = saved_options # When an option is added, add an overloaded definition # so that mypy can correctly deduce the option's type. # - # fmt: off # boolean-like options @overload def get_option(self, name: FlagOption, /) -> bool: ... # NoQA: E704 + @overload + def get_option(self, name: FlagOption, default: bool, /) -> bool: ... # NoQA: E704 + @overload + def get_option(self, name: FlagOption, default: DT, /) -> bool | DT: ... # NoQA: E704 # strip-like options @overload def get_option(self, name: StripOption, /) -> StripChars: ... # NoQA: E704 + @overload + def get_option(self, name: StripOption, default: StripChars, /) -> StripChars: ... # NoQA: E704 + @overload + def get_option(self, name: StripOption, default: DT, /) -> StripChars | DT: ... # NoQA: E704 # delete prefix/suffix option @overload def get_option(self, name: DeleteOption, /) -> DeletePattern: ... # NoQA: E704 + @overload + def get_option(self, name: DeleteOption, default: DeletePattern, /) -> DeletePattern: ... # NoQA: E704 + @overload + def get_option(self, name: DeleteOption, default: DT, /) -> DeletePattern | DT: ... # NoQA: E704 # filtering options @overload - def get_option(self, name: FilteringOption, /) -> LinePredicate | None: ... # NoQA: E704 + def get_option(self, name: IgnoreOption, /) -> LinePredicate | None: ... # NoQA: E704 + @overload # NoQA: E301 + def get_option( # NoQA: E704 + self, name: IgnoreOption, default: LinePredicate | None, / + ) -> LinePredicate | None: ... + @overload # NoQA: E301 + def get_option( # NoQA: E704 + self, name: IgnoreOption, default: DT, / + ) -> LinePredicate | None | DT: ... # miscellaneous options @overload def get_option(self, name: FlavorOption, /) -> Flavor: ... # NoQA: E704 - # fmt: on - def get_option(self, name: OptionName, /) -> object: # NoQA: E301 - """Get a known option value, or its default value.""" - if name in self._options: - return self._options[name] - return self.default_options[name] - - # fmt: off @overload - def set_option(self, name: FlagOption, value: bool, /) -> None: ... # NoQA: E704 + def get_option(self, name: FlavorOption, default: Flavor, /) -> Flavor: ... # NoQA: E704 + @overload + def get_option(self, name: FlavorOption, default: DT, /) -> Flavor | DT: ... # NoQA: E704 + def get_option(self, name: OptionName, /, *default: object) -> object: # NoQA: E301 + """Get a known option value, or a default value.""" + if name in self.__options: + return self.__options[name] + return default[0] if default else self.default_options[name] + + @overload + def set_option(self, name: FlagOption, value: bool, /) -> None: ... # NoQA: E704 @overload def set_option(self, name: StripOption, value: StripChars, /) -> None: ... # NoQA: E704 @overload def set_option(self, name: DeleteOption, value: DeletePattern, /) -> None: ... # NoQA: E704 @overload - def set_option(self, name: FilteringOption, value: LinePredicate | None, /) -> None: ... # NoQA: E704 + def set_option(self, name: IgnoreOption, value: LinePredicate | None, /) -> None: ... # NoQA: E704 @overload def set_option(self, name: FlavorOption, value: Flavor, /) -> None: ... # NoQA: E704 - # fmt: on def set_option(self, name: OptionName, value: OptionValue, /) -> None: # NoQA: E301 """Set a persistent option value.""" - self._options[name] = value + self.__options[name] = value + + @property + def keep_ansi(self) -> bool: + """See :attr:`Options.keep_ansi`.""" + return self.get_option('keep_ansi') + + @keep_ansi.setter + def keep_ansi(self, value: bool) -> None: + self.set_option('keep_ansi', value) + + @property + def strip(self) -> StripChars: + """See :attr:`Options.strip`.""" + return self.get_option('strip') + + @strip.setter + def strip(self, value: StripChars) -> None: + self.set_option('strip', value) + + @property + def stripline(self) -> StripChars: + """See :attr:`Options.stripline`.""" + return self.get_option('stripline') + + @stripline.setter + def stripline(self, value: StripChars) -> None: + self.set_option('stripline', value) + + @property + def keep_break(self) -> bool: + """See :attr:`Options.keep_break`.""" + return self.get_option('keep_break') + + @keep_break.setter + def keep_break(self, value: bool) -> None: + self.set_option('keep_break', value) + + @property + def keep_empty(self) -> bool: + """See :attr:`Options.keep_empty`.""" + return self.get_option('keep_empty') + + @keep_empty.setter + def keep_empty(self, value: bool) -> None: + self.set_option('keep_empty', value) + + @property + def compress(self) -> bool: + """See :attr:`Options.compress`.""" + return self.get_option('compress') + + @compress.setter + def compress(self, value: bool) -> None: + self.set_option('compress', value) + + @property + def unique(self) -> bool: + """See :attr:`Options.unique`.""" + return self.get_option('unique') + + @unique.setter + def unique(self, value: bool) -> None: + self.set_option('unique', value) + + @property + def delete(self) -> DeletePattern: + """See :attr:`Options.delete`.""" + return self.get_option('delete') + + @delete.setter + def delete(self, value: DeletePattern) -> None: + self.set_option('delete', value) + + @property + def ignore(self) -> LinePredicate | None: + """See :attr:`Options.ignore`.""" + return self.get_option('ignore') + + @ignore.setter + def ignore(self, value: LinePredicate | None) -> None: + self.set_option('ignore', value) + + @property + def flavor(self) -> Flavor: + """See :attr:`Options.flavor`.""" + return self.get_option('flavor') + + @flavor.setter + def flavor(self, value: Flavor) -> None: + self.set_option('flavor', value) diff --git a/sphinx/testing/_matcher/util.py b/sphinx/testing/_matcher/util.py index 4deac77e53a..b8a448b67cb 100644 --- a/sphinx/testing/_matcher/util.py +++ b/sphinx/testing/_matcher/util.py @@ -114,7 +114,6 @@ def make_prefix(indent: int, /, *, highlight: bool = False) -> str: return f'>{prefix[1:]}' if highlight else prefix -# fmt: off @overload def indent_source( # NoQA: E704 text: str, /, *, sep: Never = ..., indent: int = ..., highlight: bool = ... @@ -122,8 +121,7 @@ def indent_source( # NoQA: E704 @overload # NoQA: E302 def indent_source( # NoQA: E704 lines: Iterable[str], /, *, sep: str = ..., indent: int = ..., highlight: bool = ... -) -> str: ... -# fmt: on +) -> str: ... def indent_source( # NoQA: E302 src: Iterable[str], /, *, sep: str = '\n', indent: int = 4, highlight: bool = False ) -> str: diff --git a/sphinx/testing/matcher.py b/sphinx/testing/matcher.py index 604f206a1bc..8776a575e98 100644 --- a/sphinx/testing/matcher.py +++ b/sphinx/testing/matcher.py @@ -8,30 +8,29 @@ from sphinx.testing._matcher import cleaner, engine, util from sphinx.testing._matcher.buffer import Block -from sphinx.testing._matcher.options import Configurable, Options +from sphinx.testing._matcher.options import Options, OptionsHolder if TYPE_CHECKING: from collections.abc import Collection, Generator, Iterable, Iterator, Sequence from io import StringIO from re import Pattern - from typing import Literal + from typing import ClassVar, Literal from typing_extensions import Self, Unpack from sphinx.testing._matcher.buffer import Line - from sphinx.testing._matcher.options import Flavor + from sphinx.testing._matcher.options import CompleteOptions, Flavor from sphinx.testing._matcher.util import LinePattern PatternType = Literal['line', 'block'] -class LineMatcher(Configurable): +class LineMatcher(OptionsHolder): """Helper object for matching output lines.""" - __slots__ = ('_content', '_stack') + __slots__ = ('__content', '__stack') - # make sure to have an independent object - default_options = Configurable.default_options.copy() + default_options: ClassVar[CompleteOptions] = OptionsHolder.default_options.copy() def __init__(self, content: str | StringIO, /, **options: Unpack[Options]) -> None: """Construct a :class:`LineMatcher` for the given string content. @@ -39,10 +38,10 @@ def __init__(self, content: str | StringIO, /, **options: Unpack[Options]) -> No :param content: The source string. :param options: The matcher options. """ - self._content = content if isinstance(content, str) else content.getvalue() - # stack of cached cleaned lines (with a possible indirection) - self._stack: list[int | Block | None] = [None] super().__init__(**options) + self.__content = content if isinstance(content, str) else content.getvalue() + # stack of cached cleaned lines (with a possible indirection) + self.__stack: list[int | Block | None] = [None] @classmethod def from_lines(cls, lines: Iterable[str] = (), /, **options: Unpack[Options]) -> Self: @@ -57,9 +56,8 @@ def from_lines(cls, lines: Iterable[str] = (), /, **options: Unpack[Options]) -> By default, the lines are assumed *not* to have line breaks (since this is usually what is the most common). """ - # only compute the default options if needeed ('keepends' is a boolean) - keepends = options.get('keepends') or cls.default_options['keepends'] - glue = '' if keepends else '\n' + keep_break = options.get('keep_break', cls.default_options['keep_break']) + glue = '' if keep_break else '\n' return cls(glue.join(lines), **options) def __iter__(self) -> Iterator[Line]: @@ -68,15 +66,16 @@ def __iter__(self) -> Iterator[Line]: @contextlib.contextmanager def override(self, /, **options: Unpack[Options]) -> Generator[None, None, None]: - self._stack.append(None) # prepare the next cache entry + """Temporarily extend the set of options with *options*.""" + self.__stack.append(None) # prepare the next cache entry with super().override(**options): yield - self._stack.pop() # pop the cached lines + self.__stack.pop() # pop the cached lines @property def content(self) -> str: """The raw content.""" - return self._content + return self.__content def lines(self) -> Block: """The content lines, cleaned up according to the current options. @@ -84,7 +83,7 @@ def lines(self) -> Block: This method is efficient in the sense that the lines are computed once per set of options and cached for subsequent calls. """ - stack = self._stack + stack = self.__stack assert stack, 'invalid stack state' cached = stack[-1] @@ -111,7 +110,7 @@ def lines(self) -> Block: return cached if isinstance(cached, int): - value = self._stack[cached] + value = self.__stack[cached] assert isinstance(value, Block) return value @@ -287,8 +286,7 @@ def _assert_found( if next(blocks, None): return - keepends = self.get_option('keepends') - ctx = util.highlight(self.lines(), keepends=keepends) + ctx = util.highlight(self.lines(), keepends=self.keep_break) pat = util.prettify_patterns(patterns, sort=pattern_type == 'line') logs = [f'{pattern_type} pattern', pat, 'not found in', ctx] raise AssertionError('\n\n'.join(logs)) @@ -297,8 +295,7 @@ def _assert_found( if (found := len(indices)) == count: return - keepends = self.get_option('keepends') - ctx = util.highlight(self.lines(), indices, keepends=keepends) + ctx = util.highlight(self.lines(), indices, keepends=self.keep_break) pat = util.prettify_patterns(patterns, sort=pattern_type == 'line') noun = util.plural_form(pattern_type, count) logs = [f'found {found} != {count} {noun} matching', pat, 'in', ctx] @@ -336,5 +333,5 @@ def _assert_not_found( def __compile( self, patterns: Iterable[LinePattern], *, flavor: Flavor | None ) -> Sequence[Pattern[str]]: - flavor = self.get_option('flavor') if flavor is None else flavor + flavor = self.flavor if flavor is None else flavor return engine.compile(patterns, flavor=flavor) diff --git a/sphinx/testing/util.py b/sphinx/testing/util.py index 1f5de684614..3afbece6cc8 100644 --- a/sphinx/testing/util.py +++ b/sphinx/testing/util.py @@ -25,13 +25,13 @@ if TYPE_CHECKING: from collections.abc import Mapping from pathlib import Path - from typing import Any + from typing import Any, ClassVar from xml.etree.ElementTree import ElementTree from docutils.nodes import Node from typing_extensions import Unpack - from sphinx.testing._matcher.options import Options + from sphinx.testing._matcher.options import CompleteOptions, Options def assert_node(node: Node, cls: Any = None, xpath: str = "", **kwargs: Any) -> None: @@ -81,7 +81,7 @@ def etree_parse(path: str | os.PathLike[str]) -> ElementTree: class _SphinxLineMatcher(LineMatcher): - default_options = LineMatcher.default_options.copy() + default_options: ClassVar[CompleteOptions] = LineMatcher.default_options.copy() default_options['keep_ansi'] = False default_options['strip'] = True diff --git a/tests/test_testing/test_matcher.py b/tests/test_testing/test_matcher.py index 4ddeb21fe30..1d955f21046 100644 --- a/tests/test_testing/test_matcher.py +++ b/tests/test_testing/test_matcher.py @@ -145,7 +145,8 @@ def test_matcher_cache(): source = [term.blue('hello'), '', 'world'] matcher = LineMatcher.from_lines(source) - stack = matcher._stack + stack_attribute = f'_{matcher.__class__.__name__.lstrip("_")}__stack' + stack = getattr(matcher, stack_attribute) assert len(stack) == 1 assert stack[0] is None @@ -418,14 +419,12 @@ def test_assert_lines_debug(lines, pattern, count, expect): assert parse_excinfo(exc_info) == expect -# fmt: off @pytest.mark.parametrize(('maxsize', 'start', 'count'), [ # combinations of integers (a, b, c) such that c >= 1 and a >= b + c (1, 0, 1), (2, 0, 1), (2, 0, 2), (2, 1, 1), (3, 0, 1), (3, 0, 2), (3, 0, 3), (3, 1, 1), (3, 1, 2), (3, 2, 1), -]) -# fmt: on +]) # fmt: skip @pytest.mark.parametrize('dedup', range(3)) def test_assert_no_lines(maxsize, start, count, dedup): # 'maxsize' might be smaller than start + (dedup + 1) * count @@ -437,10 +436,13 @@ def test_assert_no_lines(maxsize, start, count, dedup): matcher.assert_no_lines(source.main, context=0) assert parse_excinfo(exc_info) == [ - 'block pattern', '', + 'block pattern', + '', *util.indent_lines(source.main, indent=4, highlight=False), - '', 'found in', '', - *util.indent_lines(source.main, indent=4, highlight=True) + '', + 'found in', + '', + *util.indent_lines(source.main, indent=4, highlight=True), ] @@ -480,13 +482,19 @@ def test_assert_no_lines_debug( matcher.assert_no_lines(source.main, context=context_size) assert parse_excinfo(exc_info) == [ - 'block pattern', '', + 'block pattern', + '', *util.indent_lines(source.main, indent=4, highlight=False), - '', 'found in', '', + '', + 'found in', + '', *make_debug_context( source.main, - source.peek_prev(context_size), omit_prev, - source.peek_next(context_size), omit_next, - context_size=context_size, indent=4, - ) + source.peek_prev(context_size), + omit_prev, + source.peek_next(context_size), + omit_next, + context_size=context_size, + indent=4, + ), ] diff --git a/tests/test_testing/test_matcher_options.py b/tests/test_testing/test_matcher_options.py index 46ebf68e30d..f709128d7c1 100644 --- a/tests/test_testing/test_matcher_options.py +++ b/tests/test_testing/test_matcher_options.py @@ -3,13 +3,25 @@ from types import MappingProxyType from typing import TYPE_CHECKING -from sphinx.testing._matcher.options import CompleteOptions, Configurable, Options +import pytest + +from sphinx.testing._matcher.options import ( + CompleteOptions, + Options, + OptionsHolder, +) if TYPE_CHECKING: - from sphinx.testing._matcher.options import OptionName + from typing import ClassVar + + from sphinx.testing._matcher.options import ( + OptionName, + ) def test_options_class(): + assert len(Options.__annotations__) > 0, 'missing annotations' + # ensure that the classes are kept synchronized missing_keys = Options.__annotations__.keys() - CompleteOptions.__annotations__ assert not missing_keys, f'missing option(s): {", ".join(missing_keys)}' @@ -17,10 +29,17 @@ def test_options_class(): foreign_keys = CompleteOptions.__annotations__.keys() - Options.__annotations__ assert not foreign_keys, f'unknown option(s): {", ".join(foreign_keys)}' + for name in Options.__annotations__: + func = OptionsHolder.__dict__.get(name) + assert isinstance(func, property), f'missing property for option {name!r}' + assert func.fget is not None, f'missing getter for option {name!r}' + assert func.fset is not None, f'missing setter for option {name!r}' + assert func.fdel is None, f'extra deleter for option {name!r}' + def test_default_options(): """Check the synchronization of default options and classes in Sphinx.""" - default_options = Configurable.default_options.copy() + default_options = OptionsHolder.default_options.copy() processed = set() @@ -35,7 +54,7 @@ def check(option: OptionName, default: object) -> None: check('strip', False) check('stripline', False) - check('keepends', False) + check('keep_break', False) check('keep_empty', True) check('compress', False) check('unique', False) @@ -50,23 +69,42 @@ def check(option: OptionName, default: object) -> None: def test_get_option(): - class Object(Configurable): - default_options = Configurable.default_options.copy() - default_options['keepends'] = True + class Config(OptionsHolder): + default_options: ClassVar[CompleteOptions] = OptionsHolder.default_options.copy() + default_options['keep_break'] = True - obj = Object() + obj = Config() assert isinstance(obj.options, MappingProxyType) - assert 'keepends' not in obj.options - assert obj.get_option('keepends') is True - assert 'keepends' not in obj.options + assert 'keep_break' not in obj.options + assert obj.keep_break is True + assert obj.get_option('keep_break') is True + assert obj.get_option('keep_break', False) is False + + obj = Config(delete='abc') + assert obj.get_option('delete') == 'abc' + assert obj.get_option('delete', 'unused') == 'abc' def test_set_option(): - obj = Configurable() + obj = OptionsHolder() assert 'delete' not in obj.options - assert obj.get_option('delete') == () + assert obj.delete == () obj.set_option('delete', 'abc') + assert 'delete' in obj.options + assert obj.delete == 'abc' assert obj.get_option('delete') == 'abc' + assert obj.get_option('delete', 'unused') == 'abc' + + +@pytest.mark.parametrize('option', list(Options.__annotations__)) +def test_set_option_property_implementation(option: OptionName) -> None: + """Test that the implementation is correct and do not have typos.""" + obj, val = OptionsHolder(), object() # fresh sentinel for every option + # assert that the default value being returned is the correct one + assert obj.__class__.__dict__[option].fget(obj) is OptionsHolder.default_options[option] + obj.__class__.__dict__[option].fset(obj, val) + assert obj.get_option(option) is val + assert obj.__class__.__dict__[option].fget(obj) is val From dd2d267c8c182e3b03543bddeba75f2cc4d131bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 4 Apr 2024 16:19:09 +0200 Subject: [PATCH 38/66] cleanup --- tests/test_testing/test_matcher_options.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tests/test_testing/test_matcher_options.py b/tests/test_testing/test_matcher_options.py index f709128d7c1..21f06710822 100644 --- a/tests/test_testing/test_matcher_options.py +++ b/tests/test_testing/test_matcher_options.py @@ -5,18 +5,12 @@ import pytest -from sphinx.testing._matcher.options import ( - CompleteOptions, - Options, - OptionsHolder, -) +from sphinx.testing._matcher.options import CompleteOptions, Options, OptionsHolder if TYPE_CHECKING: from typing import ClassVar - from sphinx.testing._matcher.options import ( - OptionName, - ) + from sphinx.testing._matcher.options import OptionName def test_options_class(): From bc2070359173c1b357cc7ddff5826dbc5803a39e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 5 Apr 2024 18:10:05 +0200 Subject: [PATCH 39/66] fixup --- sphinx/testing/_matcher/__init__.py | 7 - sphinx/testing/_matcher/engine.py | 148 ---------- .../{matcher.py => matcher/__init__.py} | 185 +++++++----- .../cleaner.py => matcher/_cleaner.py} | 56 ++-- sphinx/testing/matcher/_engine.py | 169 +++++++++++ .../{_matcher/util.py => matcher/_util.py} | 28 +- .../testing/{_matcher => matcher}/buffer.py | 268 ++++++++++-------- .../testing/{_matcher => matcher}/options.py | 30 +- sphinx/testing/util.py | 2 +- tests/test_testing/test_matcher.py | 68 ++++- tests/test_testing/test_matcher_buffer.py | 6 +- tests/test_testing/test_matcher_cleaner.py | 6 +- tests/test_testing/test_matcher_engine.py | 31 +- tests/test_testing/test_matcher_options.py | 4 +- 14 files changed, 567 insertions(+), 441 deletions(-) delete mode 100644 sphinx/testing/_matcher/__init__.py delete mode 100644 sphinx/testing/_matcher/engine.py rename sphinx/testing/{matcher.py => matcher/__init__.py} (62%) rename sphinx/testing/{_matcher/cleaner.py => matcher/_cleaner.py} (80%) create mode 100644 sphinx/testing/matcher/_engine.py rename sphinx/testing/{_matcher/util.py => matcher/_util.py} (87%) rename sphinx/testing/{_matcher => matcher}/buffer.py (89%) rename sphinx/testing/{_matcher => matcher}/options.py (93%) diff --git a/sphinx/testing/_matcher/__init__.py b/sphinx/testing/_matcher/__init__.py deleted file mode 100644 index c7552fd3ebf..00000000000 --- a/sphinx/testing/_matcher/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -"""Private package for :class:`~sphinx.testing.matcher.LineMatcher`. - -Unless explicitly exported in ``__all__`` or stated otherwise at a module -level, any object provided in this package or any of its submodules is -considered an implementation detail and can be removed, changed, moved -without prior notice (even if its name is public). -""" diff --git a/sphinx/testing/_matcher/engine.py b/sphinx/testing/_matcher/engine.py deleted file mode 100644 index 2a8a624930d..00000000000 --- a/sphinx/testing/_matcher/engine.py +++ /dev/null @@ -1,148 +0,0 @@ -from __future__ import annotations - -__all__ = () - -import fnmatch -import re -from collections.abc import Sequence, Set -from typing import TYPE_CHECKING, overload - -if TYPE_CHECKING: - from collections.abc import Callable, Iterable - from typing import TypeVar - - from sphinx.testing._matcher.options import Flavor - from sphinx.testing._matcher.util import LinePattern - - _LinePatternT = TypeVar('_LinePatternT', str, re.Pattern[str]) - - -def _check_flavor(flavor: Flavor) -> None: - allowed: Sequence[Flavor] = ('none', 'fnmatch', 're') - if flavor not in allowed: - msg = f'unknown flavor: {flavor!r} (choose from: {allowed})' - raise ValueError(msg) - - -def _sort_pattern(s: str | re.Pattern[str]) -> tuple[str, int, int]: - if isinstance(s, str): - return (s, -1, -1) - return (s.pattern, s.flags, s.groups) - - -@overload -def to_line_patterns(expect: str) -> tuple[str]: ... # NoQA: E704 -@overload -def to_line_patterns(expect: re.Pattern[str]) -> tuple[re.Pattern[str]]: ... # NoQA: E704 -@overload -def to_line_patterns(expect: Iterable[LinePattern], /) -> tuple[LinePattern, ...]: ... # NoQA: E704 -def to_line_patterns(expect: LinePattern | Iterable[LinePattern]) -> Sequence[LinePattern]: # NoqA: E302 - """Get a read-only sequence of line-matching patterns. - - :param expect: One or more patterns a line should match. - :return: The possible line patterns. - - By convention,:: - - to_line_patterns("my pattern") == to_line_patterns(["my pattern"]) - - .. note:: - - The order in which the patterns are given is retained, except for - iterables that do not have an ordering (e.g., :class:`set`). For - such collections, patterns are ordered accroding to their string - representation, :class:`flags ` and capture groups. - """ - if isinstance(expect, (str, re.Pattern)): - return (expect,) - - if isinstance(expect, Set): - return tuple(sorted(expect, key=_sort_pattern)) - - return tuple(expect) - - -@overload -def to_block_pattern(expect: str) -> tuple[str, ...]: ... # NoQA: E704 -@overload -def to_block_pattern(expect: re.Pattern[str]) -> tuple[re.Pattern[str]]: ... # NoQA: E704 -@overload -def to_block_pattern(expect: Sequence[LinePattern]) -> Sequence[LinePattern]: ... # NoQA: E704 -def to_block_pattern(expect: LinePattern | Sequence[LinePattern]) -> Sequence[LinePattern]: # NoQA: E302 - r"""Get a read-only sequence for a s single block pattern. - - :param expect: A string, :class:`~re.Pattern` or a sequence thereof. - :return: The line patterns of the block. - :raise TypeError: The type of *expect* is not supported. - - When *expect* is a single string, it is split into lines to produce - the corresponding block pattern, e.g.:: - - to_block_pattern('line1\nline2') == ('line1', 'line2') - """ - if isinstance(expect, str): - return tuple(expect.splitlines()) - if isinstance(expect, re.Pattern): - return (expect,) - if not isinstance(expect, Sequence): - msg = f'expecting a sequence of patterns, got: {expect!r}' - raise TypeError(msg) - return tuple(expect) - - -@overload -def transform(fn: Callable[[str], str], x: str, /) -> str: ... # NoQA: E704 -@overload -def transform(fn: Callable[[str], str], x: re.Pattern[str], /) -> re.Pattern[str]: ... # NoQA: E704 -def transform(fn: Callable[[str], str], x: LinePattern, /) -> LinePattern: # NoQA: E302 - """Transform regular expressions, leaving compiled patterns untouched.""" - return fn(x) if isinstance(x, str) else x - - -def string_expression(line: str, /) -> str: - """A regular expression matching exactly *line*.""" - return rf'^(?s:{re.escape(line)})\Z' - - -def translate( - patterns: Iterable[LinePattern], - *, - flavor: Flavor, - default_translate: Callable[[str], str] = string_expression, - fnmatch_translate: Callable[[str], str] = fnmatch.translate, -) -> Iterable[LinePattern]: - r"""Translate regular expressions in *patterns* according to *flavor*. - - :param patterns: An iterable of patterns to translate if needed. - :param flavor: The regex pattern to use. - :param default_translate: Translation function for ``'none'`` flavor. - :param fnmatch_translate: Translation function for ``'fnmatch'`` flavor. - :return: An iterable of :class:`re`-style patterns. - """ - _check_flavor(flavor) - - if flavor == 'none': - return (transform(default_translate, pattern) for pattern in patterns) - if flavor == 'fnmatch': - return (transform(fnmatch_translate, pattern) for pattern in patterns) - - return patterns - - -def compile( - patterns: Iterable[LinePattern], - *, - flavor: Flavor, - default_translate: Callable[[str], str] = string_expression, - fnmatch_translate: Callable[[str], str] = fnmatch.translate, -) -> Sequence[re.Pattern[str]]: - """Compile one or more patterns into :class:`~re.Pattern` objects.""" - patterns = translate( - patterns, - flavor=flavor, - default_translate=default_translate, - fnmatch_translate=fnmatch_translate, - ) - # mypy does not like map + re.compile() although it is correct but - # this is likely due to https://github.com/python/mypy/issues/11880 - return tuple(re.compile(pattern) for pattern in patterns) diff --git a/sphinx/testing/matcher.py b/sphinx/testing/matcher/__init__.py similarity index 62% rename from sphinx/testing/matcher.py rename to sphinx/testing/matcher/__init__.py index 8776a575e98..829beb702ea 100644 --- a/sphinx/testing/matcher.py +++ b/sphinx/testing/matcher/__init__.py @@ -1,26 +1,29 @@ +"""Public module containing the matcher interface.""" + from __future__ import annotations -__all__ = ('Options', 'LineMatcher') +__all__ = ('LineMatcher',) import contextlib import itertools -from typing import TYPE_CHECKING, cast +import re +from typing import TYPE_CHECKING, cast, overload -from sphinx.testing._matcher import cleaner, engine, util -from sphinx.testing._matcher.buffer import Block -from sphinx.testing._matcher.options import Options, OptionsHolder +from sphinx.testing.matcher import _cleaner, _engine, _util +from sphinx.testing.matcher.buffer import Block +from sphinx.testing.matcher.options import Options, OptionsHolder if TYPE_CHECKING: - from collections.abc import Collection, Generator, Iterable, Iterator, Sequence + from collections.abc import Generator, Iterable, Iterator, Sequence, Set from io import StringIO from re import Pattern from typing import ClassVar, Literal from typing_extensions import Self, Unpack - from sphinx.testing._matcher.buffer import Line - from sphinx.testing._matcher.options import CompleteOptions, Flavor - from sphinx.testing._matcher.util import LinePattern + from sphinx.testing.matcher._util import BlockPattern, LinePattern + from sphinx.testing.matcher.buffer import Line + from sphinx.testing.matcher.options import CompleteOptions, Flavor PatternType = Literal['line', 'block'] @@ -90,7 +93,7 @@ def lines(self) -> Block: if cached is None: options = self.default_options | cast(Options, self.options) # compute for the first time the block's lines - lines = tuple(cleaner.clean_text(self.content, **options)) + lines = tuple(_cleaner.clean_text(self.content, **options)) # check if the value is the same as any of a previously cached value for addr, value in enumerate(itertools.islice(stack, 0, len(stack) - 1)): if isinstance(value, int): @@ -117,25 +120,34 @@ def lines(self) -> Block: return cached def find( - self, expect: LinePattern | Collection[LinePattern], /, *, flavor: Flavor | None = None + self, + patterns: LinePattern | Set[LinePattern] | Sequence[LinePattern], + /, + *, + flavor: Flavor | None = None, ) -> Sequence[Line]: """Same as :meth:`iterfind` but returns a sequence of lines.""" - return list(self.iterfind(expect, flavor=flavor)) + return list(self.iterfind(patterns, flavor=flavor)) def iterfind( - self, expect: LinePattern | Collection[LinePattern], /, *, flavor: Flavor | None = None + self, + patterns: LinePattern | Set[LinePattern] | Sequence[LinePattern], + /, + *, + flavor: Flavor | None = None, ) -> Iterator[Line]: """Yield the lines that match one (or more) of the given patterns. - :param expect: One or more patterns to satisfy. + :param patterns: The patterns deciding whether a line is selected. :param flavor: Optional temporary flavor for non-compiled patterns. """ - patterns = engine.to_line_patterns(expect) + patterns = _engine.to_line_patterns(patterns) if not patterns: # nothinig to match return compiled_patterns = set(self.__compile(patterns, flavor=flavor)) - matchers = {pattern.match for pattern in compiled_patterns} + # faster to iterate over a tuple rather than a set or a list + matchers = tuple(pattern.match for pattern in compiled_patterns) def predicate(line: Line) -> bool: text = line.buffer @@ -144,33 +156,32 @@ def predicate(line: Line) -> bool: yield from filter(predicate, self) def find_blocks( - self, expect: str | Sequence[LinePattern], /, *, flavor: Flavor | None = None + self, pattern: str | BlockPattern, /, *, flavor: Flavor | None = None ) -> Sequence[Block]: """Same as :meth:`iterfind_blocks` but returns a sequence of blocks.""" - return list(self.iterfind_blocks(expect, flavor=flavor)) + return list(self.iterfind_blocks(pattern, flavor=flavor)) def iterfind_blocks( - self, expect: str | Sequence[LinePattern], /, *, flavor: Flavor | None = None + self, patterns: str | BlockPattern, /, *, flavor: Flavor | None = None ) -> Iterator[Block]: """Yield non-overlapping blocks matching the given line patterns. - :param expect: The line patterns that a block must satisfy. + :param patterns: The line patterns that a block must satisfy. :param flavor: Optional temporary flavor for non-compiled patterns. :return: An iterator on the matching blocks. - When *expect* is a single string, it is split into lines, each of - which corresponding to the pattern a block's line must satisfy. + When *patterns* is a single string, it is split into lines, each + of which corresponding to the pattern a block's line must satisfy. .. note:: - This interface does not support single :class:`~re.Pattern` - objects as they could be interpreted as a line or a block - pattern. + Standalone :class:`~re.Pattern` objects are not supported + as they could be interpreted as a line or a block pattern. """ # in general, the patterns are smaller than the lines # so we expect the following to be more efficient than # cleaning up the whole text source - patterns = engine.to_block_pattern(expect) + patterns = _engine.to_block_pattern(patterns) if not patterns: # no pattern to locate return @@ -181,11 +192,12 @@ def iterfind_blocks( if (width := len(patterns)) > len(lines): # too many lines to match return + match_function = re.Pattern.match compiled_patterns = self.__compile(patterns, flavor=flavor) - block_iterator = enumerate(util.strict_windowed(lines, width)) + block_iterator = enumerate(_util.strict_windowed(lines, width)) for start, block in block_iterator: # check if the block matches the patterns line by line - if all(pattern.match(line) for pattern, line in zip(compiled_patterns, block)): + if all(map(match_function, compiled_patterns, block)): yield Block(block, start, _check=False) # Consume the iterator so that the next block consists # of lines just after the block that was just yielded. @@ -193,83 +205,89 @@ def iterfind_blocks( # Note that since the iterator yielded *block*, its # state is already on the "next" line, so we need to # advance by the block size - 1 only. - util.consume(block_iterator, width - 1) + _util.consume(block_iterator, width - 1) # assert methods def assert_match( self, - expect: LinePattern | Collection[LinePattern], + patterns: LinePattern | Set[LinePattern] | Sequence[LinePattern], /, count: int | None = None, flavor: Flavor | None = None, ) -> None: - """Assert that there exist one or more lines matching *pattern*. + """Assert that the number of matching lines for the given patterns. + + A matching line is a line that satisfies one or more patterns + given in *patterns*. - :param expect: One or more patterns the lines must satisfy. + :param patterns: The patterns deciding whether a line is counted. :param count: If specified, the exact number of matching lines. :param flavor: Optional temporary flavor for non-compiled patterns. """ - patterns = engine.to_line_patterns(expect) + patterns = _engine.to_line_patterns(patterns) self._assert_found('line', patterns, count=count, flavor=flavor) def assert_no_match( self, - expect: LinePattern | Collection[LinePattern], + patterns: LinePattern | Set[LinePattern] | Sequence[LinePattern], /, *, context: int = 3, flavor: Flavor | None = None, ) -> None: - """Assert that there are no lines matching *pattern*. + """Assert that there exist no matching line for the given patterns. - :param expect: One or more patterns the lines must not satisfy. + A matching line is a line that satisfies one or more patterns + given in *patterns*. + + :param patterns: The patterns deciding whether a line is counted. :param context: Number of lines to print around a failing line. :param flavor: Optional temporary flavor for non-compiled patterns. """ - patterns = engine.to_line_patterns(expect) + patterns = _engine.to_line_patterns(patterns) self._assert_not_found('line', patterns, context_size=context, flavor=flavor) - def assert_lines( + def assert_block( self, - expect: str | Sequence[LinePattern], + lines: str | BlockPattern, /, *, count: int | None = None, flavor: Flavor | None = None, ) -> None: - """Assert that there exist one or more blocks matching the *patterns*. + """Assert that the number of matching blocks for the given patterns. - :param expect: The line patterns that a block must satisfy. + :param lines: The line patterns that a block must satisfy. :param count: The number of blocks that should be found. :param flavor: Optional temporary flavor for non-compiled patterns. - When *expect* is a single string, it is split into lines, each - of which corresponding to the pattern a block's line must satisfy. + When *lines* is a single string, it is split into lines, each of + which corresponding to the pattern a block's line must satisfy. """ - patterns = engine.to_block_pattern(expect) + patterns = _engine.to_block_pattern(lines) self._assert_found('block', patterns, count=count, flavor=flavor) - def assert_no_lines( + def assert_no_block( self, - expect: str | Sequence[LinePattern], + lines: str | BlockPattern, /, *, context: int = 3, flavor: Flavor | None = None, ) -> None: - """Assert that no block matches the *patterns*. + """Assert that there exist no matching blocks for the given patterns. - :param expect: The line patterns that a block must satisfy. + :param lines: The line patterns that a block must satisfy. :param context: Number of lines to print around a failing block. :param flavor: Optional temporary flavor for non-compiled patterns. - When *expect* is a single string, it is split into lines, each + When *patterns* is a single string, it is split into lines, each of which corresponding to the pattern a block's line must satisfy. Use :data:`sys.maxsize` to show all capture lines. """ - patterns = engine.to_block_pattern(expect) + patterns = _engine.to_block_pattern(lines) self._assert_not_found('block', patterns, context_size=context, flavor=flavor) def _assert_found( @@ -280,24 +298,24 @@ def _assert_found( count: int | None, flavor: Flavor | None, ) -> None: - blocks = self.iterfind_blocks(patterns, flavor=flavor) + regions = self.__find(pattern_type, patterns, flavor=flavor) if count is None: - if next(blocks, None): + if next(regions, None) is not None: return - ctx = util.highlight(self.lines(), keepends=self.keep_break) - pat = util.prettify_patterns(patterns, sort=pattern_type == 'line') + ctx = _util.highlight(self.lines(), keepends=self.keep_break) + pat = _util.prettify_patterns(patterns, sort=pattern_type == 'line') logs = [f'{pattern_type} pattern', pat, 'not found in', ctx] raise AssertionError('\n\n'.join(logs)) - indices = {block.offset: len(block) for block in blocks} + indices = {region.offset: region.length for region in regions} if (found := len(indices)) == count: return - ctx = util.highlight(self.lines(), indices, keepends=self.keep_break) - pat = util.prettify_patterns(patterns, sort=pattern_type == 'line') - noun = util.plural_form(pattern_type, count) + ctx = _util.highlight(self.lines(), indices, keepends=self.keep_break) + pat = _util.prettify_patterns(patterns, sort=pattern_type == 'line') + noun = _util.plural_form(pattern_type, count) logs = [f'found {found} != {count} {noun} matching', pat, 'in', ctx] raise AssertionError('\n\n'.join(logs)) @@ -312,26 +330,45 @@ def _assert_not_found( if not patterns: # no pattern to find return - lines: Sequence[str] = self.lines() - if not lines: # no lines to match - return - - # early abort if there are more lines to match than available - if (window_size := len(patterns)) > len(lines): + values = self.__find(pattern_type, patterns, flavor=flavor) + found: Line | Block | None = next(values, None) + if found is None: return - compiled_patterns = self.__compile(patterns, flavor=flavor) - - for start, block in enumerate(util.strict_windowed(lines, window_size)): - if all(pattern.match(line) for pattern, line in zip(compiled_patterns, block)): - pat = util.prettify_patterns(patterns, sort=pattern_type == 'line') - block_object = Block(block, start, _check=False) - ctx = util.get_debug_context(lines, block_object, context_size) - logs = [f'{pattern_type} pattern', pat, 'found in', '\n'.join(ctx)] - raise AssertionError('\n\n'.join(logs)) + pat = _util.prettify_patterns(patterns, sort=pattern_type == 'line') + ctx = _util.diff(self.lines(), found, context_size) + logs = [f'{pattern_type} pattern', pat, 'found in', '\n'.join(ctx)] + raise AssertionError('\n\n'.join(logs)) def __compile( self, patterns: Iterable[LinePattern], *, flavor: Flavor | None ) -> Sequence[Pattern[str]]: flavor = self.flavor if flavor is None else flavor - return engine.compile(patterns, flavor=flavor) + return _engine.compile(patterns, flavor=flavor) + + @overload + def __find( # NoQA: E704 + self, + pattern_type: Literal['line'], + patterns: Sequence[LinePattern], + /, + flavor: Flavor | None, + ) -> Iterator[Line]: ... + @overload # NoQA: E301 + def __find( # NoQA: E704 + self, + pattern_type: Literal['block'], + patterns: Sequence[LinePattern], + /, + flavor: Flavor | None, + ) -> Iterator[Block]: ... + def __find( # NoQA: E301 + self, + pattern_type: PatternType, + patterns: Sequence[LinePattern], + /, + flavor: Flavor | None, + ) -> Iterator[Line] | Iterator[Block]: + if pattern_type == 'line': + return self.iterfind(patterns, flavor=flavor) + return self.iterfind_blocks(patterns, flavor=flavor) diff --git a/sphinx/testing/_matcher/cleaner.py b/sphinx/testing/matcher/_cleaner.py similarity index 80% rename from sphinx/testing/_matcher/cleaner.py rename to sphinx/testing/matcher/_cleaner.py index 47a74d4fc66..a1368d2d5b7 100644 --- a/sphinx/testing/_matcher/cleaner.py +++ b/sphinx/testing/matcher/_cleaner.py @@ -1,3 +1,9 @@ +"""Private utility functions for :mod:`sphinx.testing.matcher`. + +All objects provided by this module are considered an implementation detail +and are not meant to be used by external libraries. +""" + from __future__ import annotations __all__ = () @@ -9,25 +15,17 @@ from itertools import filterfalse from typing import TYPE_CHECKING -from sphinx.testing._matcher import engine, util -from sphinx.testing._matcher.options import OptionsHolder +from sphinx.testing.matcher import _engine, _util +from sphinx.testing.matcher.options import OptionsHolder from sphinx.util.console import strip_escape_sequences if TYPE_CHECKING: - from collections.abc import Iterable, Sequence - from typing import TypeVar + from collections.abc import Iterable, MutableSequence, Sequence from typing_extensions import Unpack - from sphinx.testing._matcher.options import ( - DeletePattern, - Flavor, - LinePredicate, - Options, - StripChars, - ) - - _StrIterableT = TypeVar('_StrIterableT', bound=Iterable[str]) + from sphinx.testing.matcher._util import LinePredicate + from sphinx.testing.matcher.options import DeletePattern, Flavor, Options, StripChars def clean_text(text: str, /, **options: Unpack[Options]) -> Iterable[str]: @@ -53,7 +51,7 @@ def clean_lines(lines: Iterable[str], /, **options: Unpack[Options]) -> Iterable lines = filter_lines(lines, keep_empty=keep_empty, compress=compress, unique=unique) deleter_objects, flavor = config.delete, config.flavor - lines = prune_lines(lines, deleter_objects, flavor=flavor) + lines = prune_lines(lines, deleter_objects, flavor=flavor, trace=None) ignore_predicate = config.ignore lines = ignore_lines(lines, ignore_predicate) @@ -95,18 +93,18 @@ def filter_lines( in the same iteration, duplicates elimination is performed *after* empty lines are removed. To change the behaviour, consider using:: - lines = filterlines(lines, compress=True) - lines = filterlines(lines, empty=True) + lines = filter_lines(lines, compress=True) + lines = filter_lines(lines, empty=True) """ if not keep_empty: lines = filter(None, lines) if unique: # 'compress' has no effect when 'unique' is set - return util.unique_everseen(lines) + return _util.unique_everseen(lines) if compress: - return util.unique_justseen(lines) + return _util.unique_justseen(lines) return lines @@ -125,9 +123,9 @@ def prune_lines( lines: Iterable[str], delete: DeletePattern, /, - flavor: Flavor = 'none', *, - trace: list[Sequence[tuple[str, Sequence[str]]]] | None = None, + flavor: Flavor = 'none', + trace: MutableSequence[Sequence[tuple[str, Sequence[str]]]] | None = None, ) -> Iterable[str]: r"""Remove substrings from a source satisfying some patterns. @@ -162,20 +160,20 @@ def prune_lines( trace.append(entry) yield res """ - delete_patterns = engine.to_line_patterns(delete) + delete_patterns = _engine.to_line_patterns(delete) # Since fnmatch-style patterns do not support a meta-character for # matching at the start of the string, we first translate patterns - # and then add an explicit '^' character in the regular expression. - patterns = engine.translate( + # and then add an explicit '\A' character in the regular expression. + patterns = _engine.translate( delete_patterns, flavor=flavor, - default_translate=re.escape, - fnmatch_translate=lambda prefix: fnmatch.translate(prefix).rstrip(r'\Z$'), + escape=re.escape, + str2fnmatch=lambda prefix: fnmatch.translate(prefix).rstrip(r'\Z$'), ) - # and now we add the '^' meta-character to ensure that we only match - # at the beginning of the string and not in the middle of the string - patterns = (engine.transform('^'.__add__, pattern) for pattern in patterns) - compiled = [re.compile(pattern) for pattern in patterns] + # Now, we add the '\A' meta-character to ensure that we only match + # at the beginning of the string and not in the middle of the string. + re_translate = r'\A'.__add__ + compiled = _engine.compile(patterns, flavor='re', str2regexpr=re_translate) def prune_redux(line: str, pattern: re.Pattern[str]) -> str: return pattern.sub('', line) diff --git a/sphinx/testing/matcher/_engine.py b/sphinx/testing/matcher/_engine.py new file mode 100644 index 00000000000..04f5a092523 --- /dev/null +++ b/sphinx/testing/matcher/_engine.py @@ -0,0 +1,169 @@ +"""Private utility functions for :mod:`sphinx.testing.matcher`. + +All objects provided by this module are considered an implementation detail +and are not meant to be used by external libraries. +""" + +from __future__ import annotations + +__all__ = () + +import fnmatch +import re +from collections.abc import Set +from typing import TYPE_CHECKING, overload + +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Sequence + + from sphinx.testing.matcher._util import BlockPattern, LinePattern, PatternLike + from sphinx.testing.matcher.options import Flavor + + +def _check_flavor(flavor: Flavor) -> None: + allowed: Sequence[Flavor] = ('none', 'fnmatch', 're') + if flavor not in allowed: + msg = f'unknown flavor: {flavor!r} (choose from: {allowed})' + raise ValueError(msg) + + +def _sort_pattern(s: PatternLike) -> tuple[str, int, int]: + if isinstance(s, str): + return (s, -1, -1) + return (s.pattern, s.flags, s.groups) + + +@overload +def to_line_patterns(line: str, /) -> tuple[str]: ... # NoQA: E704 +@overload +def to_line_patterns(pattern: re.Pattern[str], /) -> tuple[re.Pattern[str]]: ... # NoQA: E704 +@overload # NoqA: E302 +def to_line_patterns( # NoQA: E704 + patterns: Set[LinePattern] | Sequence[LinePattern], / +) -> tuple[LinePattern, ...]: ... +def to_line_patterns( # NoqA: E302 + patterns: LinePattern | Set[LinePattern] | Sequence[LinePattern], / +) -> Sequence[LinePattern]: + """Get a read-only sequence of line-matching patterns. + + :param patterns: One or more patterns a line should match (in its entirety). + :return: The possible line patterns. + + By convention,:: + + to_line_patterns("my pattern") == to_line_patterns(["my pattern"]) + + .. note:: + + If *expect* is a :class:`collections.abc.Set`-like object, the order + of the output sequence is an implementation detail but guaranteed to + be the same for the same inputs. Otherwise, the order of *expect* is + retained, in case this could make a difference. + """ + if isinstance(patterns, (str, re.Pattern)): + return (patterns,) + if isinstance(patterns, Set): + return tuple(sorted(patterns, key=_sort_pattern)) + return tuple(patterns) + + +@overload +def to_block_pattern(pattern: str, /) -> tuple[str, ...]: ... # NoQA: E704 +@overload +def to_block_pattern(pattern: re.Pattern[str], /) -> tuple[re.Pattern[str]]: ... # NoQA: E704 +@overload +def to_block_pattern(patterns: BlockPattern, /) -> BlockPattern: ... # NoQA: E704 +def to_block_pattern(patterns: PatternLike | BlockPattern, /) -> BlockPattern: # NoQA: E302 + r"""Get a read-only sequence for a s single block pattern. + + :param patterns: A string, :class:`~re.Pattern` or a sequence thereof. + :return: The line patterns of the block. + + When *expect* is a single string, it is split into lines to produce + the corresponding block pattern, e.g.:: + + to_block_pattern('line1\nline2') == ('line1', 'line2') + """ + if isinstance(patterns, str): + return tuple(patterns.splitlines()) + if isinstance(patterns, re.Pattern): + return (patterns,) + return tuple(patterns) + + +@overload +def format_expression(fn: Callable[[str], str], x: str, /) -> str: ... # NoQA: E704 +@overload +def format_expression(fn: Callable[[str], str], x: re.Pattern[str], /) -> re.Pattern[str]: ... # NoQA: E704 +def format_expression(fn: Callable[[str], str], x: PatternLike, /) -> PatternLike: # NoQA: E302 + """Transform regular expressions, leaving compiled patterns untouched.""" + return fn(x) if isinstance(x, str) else x + + +def string_expression(line: str, /) -> str: + """A regular expression matching exactly *line*.""" + # use '\A' and '\Z' to match the beginning and end of the string + return rf'\A{re.escape(line)}\Z' + + +def translate( + patterns: Iterable[PatternLike], + *, + flavor: Flavor, + escape: Callable[[str], str] | None = string_expression, + str2regexpr: Callable[[str], str] | None = None, + str2fnmatch: Callable[[str], str] | None = fnmatch.translate, +) -> Iterable[PatternLike]: + r"""Translate regular expressions according to *flavor*. + + Non-compiled regular expressions are translated by the translation function + corresponding to the given *flavor* while compiled patterns are kept as is. + + :param patterns: An iterable of regular expressions to translate. + :param flavor: The translation flavor for non-compiled patterns. + :param escape: Translation function for ``'none'`` flavor. + :param str2regexpr: Translation function for ``'re'`` flavor. + :param str2fnmatch: Translation function for ``'fnmatch'`` flavor. + :return: An iterable of :class:`re`-style pattern-like objects. + """ + _check_flavor(flavor) + + if flavor == 'none' and callable(translator := escape): + return (format_expression(translator, expr) for expr in patterns) + + if flavor == 're' and callable(translator := str2regexpr): + return (format_expression(translator, expr) for expr in patterns) + + if flavor == 'fnmatch' and callable(translator := str2fnmatch): + return (format_expression(translator, expr) for expr in patterns) + + return patterns + + +def compile( + patterns: Iterable[PatternLike], + *, + flavor: Flavor, + escape: Callable[[str], str] | None = string_expression, + str2regexpr: Callable[[str], str] | None = None, + str2fnmatch: Callable[[str], str] | None = fnmatch.translate, +) -> Sequence[re.Pattern[str]]: + """Compile one or more patterns into :class:`~re.Pattern` objects. + + :param patterns: An iterable of patterns to translate and compile. + :param flavor: The translation flavor for non-compiled patterns. + :param escape: Translation function for ``'none'`` flavor. + :param str2regexpr: Translation function for ``'re'`` flavor. + :param str2fnmatch: Translation function for ``'fnmatch'`` flavor. + :return: A sequence of compiled regular expressions. + """ + patterns = translate( + patterns, + flavor=flavor, + escape=escape, + str2regexpr=str2regexpr, + str2fnmatch=str2fnmatch, + ) + # mypy does not like map + re.compile() although it is correct but + # this is likely due to https://github.com/python/mypy/issues/11880 + return tuple(re.compile(pattern) for pattern in patterns) diff --git a/sphinx/testing/_matcher/util.py b/sphinx/testing/matcher/_util.py similarity index 87% rename from sphinx/testing/_matcher/util.py rename to sphinx/testing/matcher/_util.py index b8a448b67cb..c9412e996c0 100644 --- a/sphinx/testing/_matcher/util.py +++ b/sphinx/testing/matcher/_util.py @@ -16,15 +16,21 @@ if TYPE_CHECKING: import re - from collections.abc import Iterable, Iterator, Mapping, Sequence + from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence from typing import TypeVar, Union from typing_extensions import Never - from sphinx.testing._matcher.buffer import Block + from sphinx.testing.matcher.buffer import Block, Line + PatternLike = Union[str, re.Pattern[str]] + """A regular expression (compiled or not).""" LinePattern = Union[str, re.Pattern[str]] - """A regular expression or a compiled pattern.""" + """A regular expression (compiled or not) for an entire line.""" + LinePredicate = Callable[[str], object] + """A predicate called on an entire line.""" + BlockPattern = Sequence[LinePattern] + """A sequence of regular expression (compiled or not) for a block.""" _T = TypeVar('_T') @@ -152,7 +158,7 @@ def indent_lines( def prettify_patterns( - patterns: Sequence[LinePattern], + patterns: Sequence[PatternLike], /, *, indent: int = 4, @@ -164,27 +170,27 @@ def prettify_patterns( return indent_source(source, indent=indent, highlight=False) -def get_debug_context( - source: Sequence[str], block: Block, /, context: int, *, indent: int = 4 +def diff( + source: Sequence[str], region: Line | Block, /, context: int, *, indent: int = 4 ) -> list[str]: """Get some context lines around *block* and highlight the *block*. :param source: The source containing the *block*. - :param block: A block to highlight. + :param region: A block to highlight. :param context: The number of lines to display around the block. :param indent: The number of indentation spaces. :return: A list of formatted lines. """ - assert block <= source, 'the block must be contained in the source' + assert region <= source, 'the block must be contained in the source' logs: list[str] = [] writelines = logs.extend has_context = int(context > 0) - before, after = block.context(context, limit := len(source)) - + before, after = region.context(context, limit := len(source)) writelines(omit_line(has_context * before.start)) writelines(indent_lines(source[before], indent=indent, highlight=False)) - writelines(indent_lines(block, indent=indent, highlight=True)) + # use region.window to ensure that single lines are wrapped in lists + writelines(indent_lines(source[region.window], indent=indent, highlight=True)) writelines(indent_lines(source[after], indent=indent, highlight=False)) writelines(omit_line(has_context * (limit - after.stop))) diff --git a/sphinx/testing/_matcher/buffer.py b/sphinx/testing/matcher/buffer.py similarity index 89% rename from sphinx/testing/_matcher/buffer.py rename to sphinx/testing/matcher/buffer.py index 87e6b317404..8a0eb88ac01 100644 --- a/sphinx/testing/_matcher/buffer.py +++ b/sphinx/testing/matcher/buffer.py @@ -1,3 +1,5 @@ +"""Interface for comparing strings or list of strings.""" + from __future__ import annotations __all__ = ('Line', 'Block') @@ -10,24 +12,28 @@ from collections.abc import Sequence from typing import TYPE_CHECKING, Generic, TypeVar, final, overload -from sphinx.testing._matcher import util +from sphinx.testing.matcher._util import consume as _consume if TYPE_CHECKING: - from collections.abc import Callable, Iterable, Iterator + from collections.abc import Iterable, Iterator from typing import Any, Union from typing_extensions import Self - LineText = Union[str, re.Pattern[str]] + from sphinx.testing.matcher._util import LinePattern, LinePredicate, PatternLike + + SubStringLike = PatternLike """A line's substring or a compiled substring pattern.""" - BlockMatch = Union[object, str, re.Pattern[str], Callable[[str], object]] + BlockLineLike = Union[object, LinePattern, LinePredicate] """A block's line, a compiled pattern or a predicate.""" +# We would like to have a covariant buffer type but Python does not +# support higher-kinded type, so we can only use an invariant type. _T = TypeVar('_T', bound=Sequence[str]) -class SourceView(Generic[_T], Sequence[str], abc.ABC): +class _Region(Generic[_T], Sequence[str], abc.ABC): """A string or a sequence of strings implementing rich comparison. Given an implicit *source* as a list of strings, a :class:`SourceView` @@ -77,6 +83,60 @@ def length(self) -> int: """The number of items in this object.""" return len(self) + @property + @abc.abstractmethod + def window(self) -> slice: + """A slice representing this region in its source. + + If *source* is the original source this region is contained within, + then ``assert [*source[region.window]] == [*region.lines()]`` holds. + + Examples:: + + source = ['L1', 'L2', 'L3'] + line = Line('L2', 1) + assert source[line.window] == ['L2'] + + source = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'] + block = Block(['4', '5', '6'], 3) + assert source[block.window] == ['4', '5', '6'] + """ + + @abc.abstractmethod + def lines(self) -> tuple[Line, ...]: + """This region as a tuple of :class:`Line` objects.""" + + @abc.abstractmethod + def lines_iterator(self) -> Iterator[Line]: + """This region as an iterator of :class:`Line` objects.""" + + def context(self, delta: int, limit: int) -> tuple[slice, slice]: + """A slice object indicating a context around this region. + + :param delta: The number of context lines to show. + :param limit: The number of lines in the source the region belongs to. + :return: The slices for the 'before' and 'after' lines. + + Example:: + + source = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'] + block = Block(['4', '5', '6'], 3) + before, after = block.context(2, 10) + assert source[before] == ['2', '3'] + assert source[after] == ['7', '8'] + """ + assert delta >= 0, 'context size must be >= 0' + assert limit >= 0, 'source length must be >= 0' + + window = self.window + before_start, before_stop = max(0, window.start - delta), min(window.start, limit) + before_slice = slice(before_start, before_stop) + + after_start, after_stop = min(window.stop, limit), min(window.stop + delta, limit) + after_slice = slice(after_start, after_stop) + + return before_slice, after_slice + @abc.abstractmethod # The 'value' is 'Any' so that subclasses do not violate Liskov's substitution principle def count(self, value: Any, /) -> int: @@ -115,7 +175,7 @@ def __copy__(self) -> Self: def __bool__(self) -> bool: """Indicate whether this view is empty or not.""" - return bool(len(self)) + return bool(self.buffer) @final def __iter__(self) -> Iterator[str]: @@ -163,7 +223,7 @@ def __gt__(self, other: object, /) -> bool: """ -class Line(SourceView[str]): +class Line(_Region[str]): """A line found by :meth:`~sphinx.testing.matcher.LineMatcher.find`. A :class:`Line` can be compared to :class:`str`, :class:`Line` objects or @@ -185,6 +245,76 @@ def __init__(self, line: str = '', /, offset: int = 0, *, _check: bool = True) - """Construct a :class:`Line` object.""" super().__init__(line, offset, _check=_check) + @property + def window(self) -> slice: + return slice(self.offset, self.offset + 1) + + def lines(self) -> tuple[Line]: + return (self,) + + def lines_iterator(self) -> Iterator[Line]: + yield self + + def count(self, sub: SubStringLike, /) -> int: + """Count the number of occurrences of a substring or pattern. + + :raise TypeError: *sub* is not a string or a compiled pattern. + """ + if isinstance(sub, re.Pattern): + # avoid using value.findall() since we only want the length + # of the corresponding iterator (the following lines are more + # efficient from a memory perspective) + counter = itertools.count() + _consume(zip(sub.finditer(self.buffer), counter)) + return next(counter) + + return self.buffer.count(sub) # raise a TypeError if *sub* is not a string + + # explicitly add the method since its signature differs from :meth:`SourceView.index` + def index(self, sub: SubStringLike, start: int = 0, stop: int = sys.maxsize, /) -> int: + """Find the lowest index of a substring. + + :raise TypeError: *sub* is not a string or a compiled pattern. + """ + return super().index(sub, start, stop) + + def find(self, sub: SubStringLike, start: int = 0, stop: int = sys.maxsize, /) -> int: + """Find the lowest index of a substring or *-1* on failure. + + :raise TypeError: *sub* is not a string or a compiled pattern. + """ + if isinstance(sub, re.Pattern): + # Do not use sub.search(buffer, start, end) since the '^' pattern + # character matches at the *real* beginning of *buffer* but *not* + # necessarily at the index where the search is to start. + # + # Ref: https://docs.python.org/3/library/re.html#re.Pattern.search + if match := sub.search(self.buffer[start:stop]): + # normalize the start position + start_index, _, _ = slice(start, stop).indices(self.length) + return match.start() + start_index + return -1 + + return self.buffer.find(sub, start, stop) # raise a TypeError if *sub* is not a string + + def startswith(self, prefix: str, start: int = 0, end: int = sys.maxsize, /) -> bool: + """Test whether the line starts with the given *prefix*. + + :param prefix: A line prefix to test. + :param start: The test start position. + :param end: The test stop position. + """ + return self.buffer.startswith(prefix, start, end) + + def endswith(self, suffix: str, start: int = 0, end: int = sys.maxsize, /) -> bool: + """Test whether the line ends with the given *suffix*. + + :param suffix: A line suffix to test. + :param start: The test start position. + :param end: The test stop position. + """ + return self.buffer.endswith(suffix, start, end) + # dunder methods def __str__(self) -> str: @@ -227,68 +357,8 @@ def __gt__(self, other: object, /) -> bool: # separately check offsets before the buffers for efficiency return self.offset == other[1] and self.buffer > other[0] - def startswith(self, prefix: str, start: int = 0, end: int = sys.maxsize, /) -> bool: - """Test whether the line starts with the given *prefix*. - - :param prefix: A line prefix to test. - :param start: The test start position. - :param end: The test stop position. - """ - return self.buffer.startswith(prefix, start, end) - - def endswith(self, suffix: str, start: int = 0, end: int = sys.maxsize, /) -> bool: - """Test whether the line ends with the given *suffix*. - - :param suffix: A line suffix to test. - :param start: The test start position. - :param end: The test stop position. - """ - return self.buffer.endswith(suffix, start, end) - - def count(self, sub: LineText, /) -> int: - """Count the number of occurrences of a substring or pattern. - - :raise TypeError: *sub* is not a string or a compiled pattern. - """ - if isinstance(sub, re.Pattern): - # avoid using value.findall() since we only want the length - # of the corresponding iterator (the following lines are more - # efficient from a memory perspective) - counter = itertools.count() - util.consume(zip(sub.finditer(self.buffer), counter)) - return next(counter) - - return self.buffer.count(sub) # raise a TypeError if *sub* is not a string - - # explicitly add the method since its signature differs from :meth:`SourceView.index` - def index(self, sub: LineText, start: int = 0, stop: int = sys.maxsize, /) -> int: - """Find the lowest index of a substring. - - :raise TypeError: *sub* is not a string or a compiled pattern. - """ - return super().index(sub, start, stop) - - def find(self, sub: LineText, start: int = 0, stop: int = sys.maxsize, /) -> int: - """Find the lowest index of a substring or *-1* on failure. - - :raise TypeError: *sub* is not a string or a compiled pattern. - """ - if isinstance(sub, re.Pattern): - # Do not use sub.search(buffer, start, end) since the '^' pattern - # character matches at the *real* beginning of *buffer* but *not* - # necessarily at the index where the search is to start. - # - # Ref: https://docs.python.org/3/library/re.html#re.Pattern.search - if match := sub.search(self.buffer[start:stop]): - # normalize the start position - start_index, _, _ = slice(start, stop).indices(self.length) - return match.start() + start_index - return -1 - - return self.buffer.find(sub, start, stop) # raise a TypeError if *sub* is not a string - -class Block(SourceView[tuple[str, ...]], Sequence[str]): +class Block(_Region[tuple[str, ...]]): """Block found by :meth:`~sphinx.testing.matcher.LineMatcher.find_blocks`. A block is a sequence of lines comparable to :class:`Line`, generally a @@ -345,47 +415,18 @@ def __init__( @property def window(self) -> slice: - """A slice representing this block in its source. - - If *source* is the original source this block is contained within, - then ``assert source[block.window] == block`` is satisfied. - - Example:: - - source = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'] - block = Block(['4', '5', '6'], 3) - assert source[block.window] == ['4', '5', '6'] - """ return slice(self.offset, self.offset + self.length) - def context(self, delta: int, limit: int) -> tuple[slice, slice]: - """A slice object indicating a context around this block. - - :param delta: The number of context lines to show. - :param limit: The number of lines in the source the block belongs to. - :return: The slices for the 'before' and 'after' lines. - - Example:: - - source = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'] - block = Block(['4', '5', '6'], 3) - before, after = block.context(2, 10) - assert source[before] == ['2', '3'] - assert source[after] == ['7', '8'] - """ - assert delta >= 0, 'context size must be >= 0' - assert limit >= 0, 'source length must be >= 0' - - before_start, before_stop = max(0, self.offset - delta), min(self.offset, limit) - before_slice = slice(before_start, before_stop) - - block_stop = self.offset + self.length - after_start, after_stop = min(block_stop, limit), min(block_stop + delta, limit) - after_slice = slice(after_start, after_stop) + def lines(self) -> tuple[Line, ...]: + if self.__cached_lines is None: + self.__cached_lines = tuple(self.lines_iterator()) + return self.__cached_lines - return before_slice, after_slice + def lines_iterator(self) -> Iterator[Line]: + for index, line in enumerate(self, self.offset): + yield Line(line, index, _check=False) - def count(self, target: BlockMatch, /) -> int: + def count(self, target: BlockLineLike, /) -> int: """Count the number of occurrences of matching lines. For :class:`~re.Pattern` inputs, the following are equivalent:: @@ -400,13 +441,13 @@ def count(self, target: BlockMatch, /) -> int: if callable(target): counter = itertools.count() - util.consume(zip(filter(target, self.buffer), counter)) + _consume(zip(filter(target, self.buffer), counter)) return next(counter) return self.buffer.count(target) # explicitly add the method since its signature differs from :meth:`SourceView.index` - def index(self, target: BlockMatch, start: int = 0, stop: int = sys.maxsize, /) -> int: + def index(self, target: BlockLineLike, start: int = 0, stop: int = sys.maxsize, /) -> int: """Find the lowest index of a matching line. For :class:`~re.Pattern` inputs, the following are equivalent:: @@ -416,7 +457,7 @@ def index(self, target: BlockMatch, start: int = 0, stop: int = sys.maxsize, /) """ return super().index(target, start, stop) - def find(self, target: BlockMatch, start: int = 0, stop: int = sys.maxsize, /) -> int: + def find(self, target: BlockLineLike, start: int = 0, stop: int = sys.maxsize, /) -> int: """Find the lowest index of a matching line or *-1* on failure. For :class:`~re.Pattern` inputs, the following are equivalent:: @@ -436,17 +477,6 @@ def find(self, target: BlockMatch, start: int = 0, stop: int = sys.maxsize, /) - return self.buffer.index(target, start, stop) return -1 - def lines(self) -> tuple[Line, ...]: - """This block as a tuple of :class:`Line` objects.""" - if self.__cached_lines is None: - self.__cached_lines = tuple(self.lines_iterator()) - return self.__cached_lines - - def lines_iterator(self) -> Iterator[Line]: - """This block as a list of :class:`Line` objects.""" - for index, line in enumerate(self, self.offset): - yield Line(line, index, _check=False) - @overload def at(self, index: int, /) -> Line: ... # NoQA: E704 @overload diff --git a/sphinx/testing/_matcher/options.py b/sphinx/testing/matcher/options.py similarity index 93% rename from sphinx/testing/_matcher/options.py rename to sphinx/testing/matcher/options.py index d0bdcaab824..4b77ffab24f 100644 --- a/sphinx/testing/_matcher/options.py +++ b/sphinx/testing/matcher/options.py @@ -1,40 +1,43 @@ +"""Module for the :class:`~sphinx.testing.matcher.LineMatcher` options.""" + from __future__ import annotations -__all__ = ('Options',) +__all__ = ('Options', 'CompleteOptions', 'OptionsHolder') import contextlib from types import MappingProxyType from typing import TYPE_CHECKING, TypedDict, final, overload if TYPE_CHECKING: - from collections.abc import Callable, Generator, Mapping, Sequence + from collections.abc import Generator, Mapping, Sequence from typing import ClassVar, Literal, TypeVar, Union from typing_extensions import Unpack - from sphinx.testing._matcher.util import LinePattern + from sphinx.testing.matcher._util import LinePattern, LinePredicate FlagOption = Literal['keep_ansi', 'keep_break', 'keep_empty', 'compress', 'unique'] StripOption = Literal['strip', 'stripline'] StripChars = Union[bool, str, None] + """Allowed values for :attr:`Options.strip` and :attr:`Options.stripline`.""" DeleteOption = Literal['delete'] DeletePattern = Union[LinePattern, Sequence[LinePattern]] + """A prefix or a compiled prefix pattern.""" IgnoreOption = Literal['ignore'] - LinePredicate = Callable[[str], object] FlavorOption = Literal['flavor'] Flavor = Literal['re', 'fnmatch', 'none'] + """Allowed values for :attr:`Options.flavor`.""" # For some reason, mypy does not like Union of Literal, # so we wrap the Literal types inside a bigger Literal. - OptionName = Literal[FlagOption, StripOption, DeleteOption, IgnoreOption, FlavorOption] OptionValue = Union[bool, StripChars, DeletePattern, Union[LinePredicate, None], Flavor] + OptionName = Literal[FlagOption, StripOption, DeleteOption, IgnoreOption, FlavorOption] DT = TypeVar('DT') - _OptionsView = Union['Options', 'CompleteOptions'] @final @@ -58,7 +61,7 @@ class Options(TypedDict, total=False): """ strip: StripChars - """Call :meth:`str.strip` on the original source. + """Describe the characters to strip from the source. The allowed values for :attr:`strip` are: @@ -68,7 +71,7 @@ class Options(TypedDict, total=False): """ stripline: StripChars - """Call :meth:`str.strip` on the lines obtained after splitting the source. + """Describe the characters to strip from each source's line. The allowed values for :attr:`stripline` are: @@ -78,13 +81,13 @@ class Options(TypedDict, total=False): """ keep_break: bool - """If true, keep line breaks in the output. + """Indicate whether to keep line breaks at the end of each line. - The default value is ``False``. + The default value is ``False`` (to mirror :meth:`str.splitlines`). """ keep_empty: bool - """If false, eliminate empty lines in the output. + """Indicate whether to keep empty lines in the output. The default value is ``True``. """ @@ -153,10 +156,7 @@ class Options(TypedDict, total=False): @final class CompleteOptions(TypedDict): - """Same as :class:`Options` but as a total dictionary. - - :meta private: - """ + """Same as :class:`Options` but as a total dictionary.""" keep_ansi: bool strip: StripChars diff --git a/sphinx/testing/util.py b/sphinx/testing/util.py index 3afbece6cc8..3bfa7ef83f6 100644 --- a/sphinx/testing/util.py +++ b/sphinx/testing/util.py @@ -31,7 +31,7 @@ from docutils.nodes import Node from typing_extensions import Unpack - from sphinx.testing._matcher.options import CompleteOptions, Options + from sphinx.testing.matcher.options import CompleteOptions, Options def assert_node(node: Node, cls: Any = None, xpath: str = "", **kwargs: Any) -> None: diff --git a/tests/test_testing/test_matcher.py b/tests/test_testing/test_matcher.py index 1d955f21046..23bc019e0b6 100644 --- a/tests/test_testing/test_matcher.py +++ b/tests/test_testing/test_matcher.py @@ -7,8 +7,8 @@ import pytest +import sphinx.testing.matcher._util as util import sphinx.util.console as term -from sphinx.testing._matcher import util from sphinx.testing.matcher import LineMatcher if TYPE_CHECKING: @@ -16,8 +16,8 @@ from _pytest._code import ExceptionInfo - from sphinx.testing._matcher.options import Flavor - from sphinx.testing._matcher.util import LinePattern + from sphinx.testing.matcher._util import LinePattern + from sphinx.testing.matcher.options import Flavor @dataclasses.dataclass @@ -315,7 +315,7 @@ def test_assert_no_match(): def test_assert_no_match_debug(lines, pattern, flavor, context, expect): matcher = LineMatcher.from_lines(lines) - with pytest.raises(AssertionError, match='.*') as exc_info: + with pytest.raises(AssertionError) as exc_info: matcher.assert_no_match(pattern, context=context, flavor=flavor) assert parse_excinfo(exc_info) == expect @@ -323,21 +323,21 @@ def test_assert_no_match_debug(lines, pattern, flavor, context, expect): @pytest.mark.parametrize('dedup', range(3)) @pytest.mark.parametrize(('maxsize', 'start', 'count'), [(10, 3, 4)]) -def test_assert_lines(maxsize, start, count, dedup): +def test_assert_block_coverage(maxsize, start, count, dedup): # 'maxsize' might be smaller than start + (dedup + 1) * count # but it is fine since stop indices are clamped internally source = Source(maxsize, start, count, dedup=dedup) matcher = LineMatcher(source.text) # the main block is matched exactly once - matcher.assert_lines(source.main, count=1, flavor='none') + matcher.assert_block(source.main, count=1, flavor='none') assert source.base * source.ncopy == source.main - matcher.assert_lines(source.base, count=source.ncopy, flavor='none') + matcher.assert_block(source.base, count=source.ncopy, flavor='none') for subidx in range(1, count + 1): # check that the sub-blocks are matched correctly subblock = [Source.block_line(start + i) for i in range(subidx)] - matcher.assert_lines(subblock, count=source.ncopy, flavor='none') + matcher.assert_block(subblock, count=source.ncopy, flavor='none') @pytest.mark.parametrize( @@ -406,15 +406,15 @@ def test_assert_lines(maxsize, start, count, dedup): ), ], ) -def test_assert_lines_debug(lines, pattern, count, expect): +def test_assert_block_debug(lines, pattern, count, expect): matcher = LineMatcher.from_lines(lines, flavor='none') if expect is None: - matcher.assert_lines(pattern, count=count) + matcher.assert_block(pattern, count=count) return with pytest.raises(AssertionError, match='.*') as exc_info: - matcher.assert_lines(pattern, count=count) + matcher.assert_block(pattern, count=count) assert parse_excinfo(exc_info) == expect @@ -426,14 +426,14 @@ def test_assert_lines_debug(lines, pattern, count, expect): (3, 0, 1), (3, 0, 2), (3, 0, 3), (3, 1, 1), (3, 1, 2), (3, 2, 1), ]) # fmt: skip @pytest.mark.parametrize('dedup', range(3)) -def test_assert_no_lines(maxsize, start, count, dedup): +def test_assert_no_block_coverage(maxsize, start, count, dedup): # 'maxsize' might be smaller than start + (dedup + 1) * count # but it is fine since stop indices are clamped internally source = Source(maxsize, start, count, dedup=dedup) matcher = LineMatcher(source.text, flavor='none') with pytest.raises(AssertionError) as exc_info: - matcher.assert_no_lines(source.main, context=0) + matcher.assert_no_block(source.main, context=0) assert parse_excinfo(exc_info) == [ 'block pattern', @@ -446,6 +446,44 @@ def test_assert_no_lines(maxsize, start, count, dedup): ] +@pytest.mark.parametrize( + ('lines', 'pattern', 'flavor', 'context', 'expect'), + [ + ( + ['a', 'b', '11X', '22Y', '33Z', 'c', 'd', 'e', 'f'], + [r'\d{2}X', r'\d*\w+', r'^33Z$'], + 're', + 2, + [ + 'block pattern', + '', + r' \d{2}X', + r' \d*\w+', + r' ^33Z$', + '', + 'found in', + '', + ' a', + ' b', + '> 11X', + '> 22Y', + '> 33Z', + ' c', + ' d', + '... (omitted 2 lines) ...', + ], + ), + ], +) +def test_assert_no_block_debug(lines, pattern, flavor, context, expect): + matcher = LineMatcher.from_lines(lines) + + with pytest.raises(AssertionError) as exc_info: + matcher.assert_no_block(pattern, context=context, flavor=flavor) + + assert parse_excinfo(exc_info) == expect + + @pytest.mark.parametrize( ('maxsize', 'start', 'count', 'dedup', 'omit_prev', 'omit_next', 'context_size'), [ @@ -473,13 +511,13 @@ def test_assert_no_lines(maxsize, start, count, dedup): (20, 8, 2, 4, 5, 0, 3), ], ) -def test_assert_no_lines_debug( +def test_assert_no_block_debug_coverage( maxsize, start, count, dedup, omit_prev, omit_next, context_size ): source = Source(maxsize, start, count, dedup=dedup) matcher = LineMatcher(source.text, flavor='none') with pytest.raises(AssertionError) as exc_info: - matcher.assert_no_lines(source.main, context=context_size) + matcher.assert_no_block(source.main, context=context_size) assert parse_excinfo(exc_info) == [ 'block pattern', diff --git a/tests/test_testing/test_matcher_buffer.py b/tests/test_testing/test_matcher_buffer.py index 4b6a8e8c3dc..c502344937e 100644 --- a/tests/test_testing/test_matcher_buffer.py +++ b/tests/test_testing/test_matcher_buffer.py @@ -8,17 +8,17 @@ import pytest -from sphinx.testing._matcher.buffer import Block, Line +from sphinx.testing.matcher.buffer import Block, Line if TYPE_CHECKING: from collections.abc import Sequence from typing import Any - from sphinx.testing._matcher.buffer import SourceView + from sphinx.testing.matcher.buffer import _Region @pytest.mark.parametrize('cls', [Line, Block]) -def test_offset_value(cls: type[SourceView[Any]]) -> None: +def test_offset_value(cls: type[_Region[Any]]) -> None: with pytest.raises(TypeError, match=re.escape('offset must be an integer, got: None')): cls('', None) # type: ignore[arg-type] diff --git a/tests/test_testing/test_matcher_cleaner.py b/tests/test_testing/test_matcher_cleaner.py index 337b1dec407..738109b7b1a 100644 --- a/tests/test_testing/test_matcher_cleaner.py +++ b/tests/test_testing/test_matcher_cleaner.py @@ -5,7 +5,7 @@ import pytest -from sphinx.testing._matcher.cleaner import filter_lines, prune_lines, strip_chars, strip_lines +from sphinx.testing.matcher._cleaner import filter_lines, prune_lines, strip_chars, strip_lines if TYPE_CHECKING: from collections.abc import Callable, Sequence @@ -38,6 +38,10 @@ def test_filter_lines(): @pytest.fixture() def prune_trace_object() -> Callable[[], list[Sequence[tuple[str, Sequence[str]]]]]: + """A fixture returning a factory for a typed trace object. + + Without this fixture, trace objects need to be explicitly typed for mypy. + """ return list diff --git a/tests/test_testing/test_matcher_engine.py b/tests/test_testing/test_matcher_engine.py index bd72f6d71cd..6a936358854 100644 --- a/tests/test_testing/test_matcher_engine.py +++ b/tests/test_testing/test_matcher_engine.py @@ -4,9 +4,7 @@ import random import re -import pytest - -from sphinx.testing._matcher import engine +from sphinx.testing.matcher import _engine as engine def test_line_pattern(): @@ -40,40 +38,41 @@ def test_block_patterns(): p = re.compile('a') assert engine.to_block_pattern(p) == (p,) - assert engine.to_block_pattern(['a', p]) == ('a', p) - pytest.raises(TypeError, engine.to_block_pattern, {'a'}) - pytest.raises(TypeError, engine.to_block_pattern, {'a', p}) - -def test_transform_expressions(): - fn = '^'.__add__ - assert engine.transform(fn, 'a') == '^a' +def test_format_expression(): + assert engine.format_expression(str.upper, 'a') == 'A' p = re.compile('') - assert engine.transform(fn, p) is p + assert engine.format_expression(str.upper, p) is p def test_translate_expressions(): string, pattern = 'a*', re.compile('.*') inputs = (string, pattern) - expect = [rf'^(?s:{re.escape(string)})\Z', pattern] - assert list(engine.translate(inputs, flavor='none')) == expect + expect = [engine.string_expression(string), pattern] + assert [*engine.translate(inputs, flavor='none')] == expect + expect = [string.upper(), pattern] + assert [*engine.translate(inputs, flavor='none', escape=str.upper)] == expect expect = [string, pattern] - assert list(engine.translate(inputs, flavor='re')) == expect + assert [*engine.translate(inputs, flavor='re')] == expect + expect = [string.upper(), pattern] + assert [*engine.translate(inputs, flavor='re', str2regexpr=str.upper)] == expect expect = [fnmatch.translate(string), pattern] - assert list(engine.translate(inputs, flavor='fnmatch')) == expect + assert [*engine.translate(inputs, flavor='fnmatch')] == expect + expect = [string.upper(), pattern] + assert [*engine.translate(inputs, flavor='fnmatch', str2fnmatch=str.upper)] == expect def test_compile_patterns(): string = 'a*' compiled = re.compile('.*') - expect = (re.compile(rf'^(?s:{re.escape(string)})\Z'), compiled) + expect = (re.compile(engine.string_expression(string)), compiled) assert engine.compile([string, compiled], flavor='none') == expect expect = (re.compile(fnmatch.translate(string)), compiled) diff --git a/tests/test_testing/test_matcher_options.py b/tests/test_testing/test_matcher_options.py index 21f06710822..d793000aa70 100644 --- a/tests/test_testing/test_matcher_options.py +++ b/tests/test_testing/test_matcher_options.py @@ -5,12 +5,12 @@ import pytest -from sphinx.testing._matcher.options import CompleteOptions, Options, OptionsHolder +from sphinx.testing.matcher.options import CompleteOptions, Options, OptionsHolder if TYPE_CHECKING: from typing import ClassVar - from sphinx.testing._matcher.options import OptionName + from sphinx.testing.matcher.options import OptionName def test_options_class(): From a543021140e421658af13a50cb7cb9085a662571 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 5 Apr 2024 18:10:49 +0200 Subject: [PATCH 40/66] fixup --- tests/test_testing/test_matcher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_testing/test_matcher.py b/tests/test_testing/test_matcher.py index 23bc019e0b6..4f06f5dbf88 100644 --- a/tests/test_testing/test_matcher.py +++ b/tests/test_testing/test_matcher.py @@ -12,7 +12,7 @@ from sphinx.testing.matcher import LineMatcher if TYPE_CHECKING: - from collections.abc import Collection, Sequence + from collections.abc import Sequence, Set from _pytest._code import ExceptionInfo @@ -201,7 +201,7 @@ def test_matcher_cache(): def test_matcher_find( lines: list[str], flavor: Flavor, - pattern: Collection[LinePattern], + pattern: LinePattern | Set[LinePattern] | Sequence[LinePattern], expect: Sequence[tuple[str, int]], ) -> None: matcher = LineMatcher.from_lines(lines, flavor=flavor) From fb95a6e8b971c8fd191ea09cd098c9ce6f5213f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 5 Apr 2024 18:37:58 +0200 Subject: [PATCH 41/66] improve coverage --- tests/test_testing/test_matcher_buffer.py | 196 ++++++++++++++-------- 1 file changed, 126 insertions(+), 70 deletions(-) diff --git a/tests/test_testing/test_matcher_buffer.py b/tests/test_testing/test_matcher_buffer.py index c502344937e..794a9e5d98a 100644 --- a/tests/test_testing/test_matcher_buffer.py +++ b/tests/test_testing/test_matcher_buffer.py @@ -26,76 +26,18 @@ def test_offset_value(cls: type[_Region[Any]]) -> None: cls('', -1) -def test_line_comparison_operators(): - assert Line('a', 1) == 'a' - assert Line('a', 1) == ('a', 1) - assert Line('a', 1) == ['a', 1] - assert Line('a', 1) == Line('a', 1) - - assert Line('a', 2) != 'b' - assert Line('a', 2) != ('a', 1) - assert Line('a', 2) != ('b', 2) - assert Line('a', 2) != ['a', 1] - assert Line('a', 2) != ['b', 2] - assert Line('a', 2) != Line('a', 1) - assert Line('a', 2) != Line('b', 2) - - # order - assert Line('ab', 1) > 'a' - assert Line('ab', 1) > ('a', 1) - assert Line('ab', 1) > ['a', 1] - assert Line('ab', 1) > Line('a', 1) - - assert Line('a', 1) < 'ab' - assert Line('a', 1) < ('ab', 1) - assert Line('a', 1) < ['ab', 1] - assert Line('a', 1) < Line('ab', 1) - - assert Line('ab', 1) >= 'ab' - assert Line('ab', 1) >= ('ab', 1) - assert Line('ab', 1) >= ['ab', 1] - assert Line('ab', 1) >= Line('ab', 1) - - assert Line('ab', 1) <= 'ab' - assert Line('ab', 1) <= ('ab', 1) - assert Line('ab', 1) <= ['ab', 1] - assert Line('ab', 1) <= Line('ab', 1) - - -def test_empty_line(): - assert Line() == '' - assert Line() == ['', 0] - - assert Line() != ['', 1] - assert Line() != ['a'] - assert Line() != ['a', 0] - assert Line() != object() - - assert Line() <= '' - assert Line() <= 'a' - assert Line() <= ['a', 0] - assert Line() <= Line('a', 0) - - assert Line() < 'a' - assert Line() < ['a', 0] - assert Line() < Line('a', 0) - - # do not simplify these expressions - assert not operator.__lt__(Line(), '') - assert not operator.__lt__(Line(), ['', 0]) - assert not operator.__lt__(Line(), Line()) - - assert not operator.__gt__(Line(), '') - assert not operator.__gt__(Line(), ['', 0]) - assert not operator.__gt__(Line(), Line()) +def test_line_region_window(): + for n in range(3): + # the empty line is still a line in the source + assert Line('', n).window == slice(n, n + 1) + line = Line('', 1) + assert ['L1', '', 'L3', 'L4', 'L4'][line.window] == [''] -@pytest.mark.parametrize('operand', [[], [Line()], [Line(), 0], [[chr(1), chr(2)], 0]]) -def test_line_unsupported_operators(operand): - for dispatcher in [operator.__lt__, operator.__le__, operator.__ge__, operator.__gt__]: - pytest.raises(TypeError, dispatcher, Line(), operand) - assert Line() != operand +def test_line_slice_context(): + assert Line('L2', 1).context(delta=4, limit=5) == (slice(0, 1), slice(2, 5)) + assert Line('L2', 3).context(delta=2, limit=9) == (slice(1, 3), slice(4, 6)) def test_line_startswith(): @@ -196,6 +138,106 @@ def test_line_find(line: Line, data: list[tuple[str, tuple[int, ...], int]]) -> assert line.index(target, *args) == expect +def test_empty_line_operators(): + assert Line() == '' + assert Line() == ['', 0] + + assert Line() != ['', 1] + assert Line() != ['a'] + assert Line() != ['a', 0] + assert Line() != object() + + assert Line() <= '' + assert Line() <= 'a' + assert Line() <= ['a', 0] + assert Line() <= Line('a', 0) + + assert Line() < 'a' + assert Line() < ['a', 0] + assert Line() < Line('a', 0) + + # do not simplify these expressions + assert not operator.__lt__(Line(), '') + assert not operator.__lt__(Line(), ['', 0]) + assert not operator.__lt__(Line(), Line()) + + assert not operator.__gt__(Line(), '') + assert not operator.__gt__(Line(), ['', 0]) + assert not operator.__gt__(Line(), Line()) + + +def test_non_empty_line_operators(): + assert Line('a', 1) == 'a' + assert Line('a', 1) == ('a', 1) + assert Line('a', 1) == ['a', 1] + assert Line('a', 1) == Line('a', 1) + + assert Line('a', 2) != 'b' + assert Line('a', 2) != ('a', 1) + assert Line('a', 2) != ('b', 2) + assert Line('a', 2) != ['a', 1] + assert Line('a', 2) != ['b', 2] + assert Line('a', 2) != Line('a', 1) + assert Line('a', 2) != Line('b', 2) + + # order + assert Line('ab', 1) > 'a' + assert Line('ab', 1) > ('a', 1) + assert Line('ab', 1) > ['a', 1] + assert Line('ab', 1) > Line('a', 1) + + assert Line('a', 1) < 'ab' + assert Line('a', 1) < ('ab', 1) + assert Line('a', 1) < ['ab', 1] + assert Line('a', 1) < Line('ab', 1) + + assert Line('ab', 1) >= 'ab' + assert Line('ab', 1) >= ('ab', 1) + assert Line('ab', 1) >= ['ab', 1] + assert Line('ab', 1) >= Line('ab', 1) + + assert Line('ab', 1) <= 'ab' + assert Line('ab', 1) <= ('ab', 1) + assert Line('ab', 1) <= ['ab', 1] + assert Line('ab', 1) <= Line('ab', 1) + + +@pytest.mark.parametrize( + 'operand', + [ + '', + '12', # 2-element sequence + 'abcdef', + ['L1', 0], + ('L1', 1), + Line(), + ], +) +def test_line_supported_operators(operand): + with contextlib.nullcontext(): + for dispatcher in [operator.__lt__, operator.__le__, operator.__ge__, operator.__gt__]: + dispatcher(Line(), operand) + + +@pytest.mark.parametrize( + 'operand', + [ + [], + [Line()], + [Line(), 0], + [chr(1)], + [chr(1), chr(2)], + [chr(1), chr(2), chr(3)], + [[chr(1), chr(2)], 0], + ], +) +def test_line_unsupported_operators(operand): + for dispatcher in [operator.__lt__, operator.__le__, operator.__ge__, operator.__gt__]: + pytest.raises(TypeError, dispatcher, Line(), operand) + + assert Line() != operand + + def test_block_constructor(): empty = Block() assert empty.buffer == () @@ -206,7 +248,7 @@ def test_block_constructor(): Block([1234]) # type: ignore[list-item] -def test_empty_block(): +def test_empty_block_operators(): assert Block() == [] assert Block() == [[], 0] @@ -243,7 +285,7 @@ def test_empty_block(): (['a', 'b', 'c'], 'd', ('a', ['b', 2], Line('c', 3))), ], ) -def test_block_comparison_operators( +def test_non_empty_block_operators( lines: list[str], foreign: str, expect: Sequence[str | tuple[str, int] | Line] ) -> None: assert Block(lines, 1) == expect @@ -278,7 +320,9 @@ def test_block_comparison_operators( [[], 0], ['L1'], [Line()], - ['L1', 'L2'], + ['AA', 'AA'], # outer: 2 items, inner: 2 items + ['AAA', 'AAA'], # outer: 2 items, inner: 3 items + ['AA', ('AA', 1)], # first line, second line + offset ['L1', Line()], ['L1', 'L2', 'L3'], ['L1', 'L2', Line()], @@ -300,7 +344,10 @@ def test_block_supported_operators(operand): object(), # bad lines ['L1', object(), 'L3'], # bad lines (no offset) [['a', object()], 1], # bad lines (with offset) + [1, 'L1'], # two-elements bad inputs ['L1', 1], # single line + offset not allowed + ['AA', (1, 1)], # outer: 2 items, inner: 2 items + ['AA', ('AA', '102')], [[], object()], # no lines + bad offset [['L1', 'L2'], object()], # ok lines + bad offset [[object(), object()], object()], # bad lines + bad offset @@ -313,6 +360,15 @@ def test_block_unsupported_operators(operand): assert Block() != operand +def test_block_region_window(): + for n in range(3): + assert Block([], n).window == slice(n, n) + + block = Block(['B', 'C', 'D'], 1) + assert block.window == slice(1, 4) + assert ['A', 'B', 'C', 'D', 'E'][block.window] == ['B', 'C', 'D'] + + def test_block_slice_context(): assert Block(['a', 'b'], 1).context(delta=4, limit=5) == (slice(0, 1), slice(3, 5)) assert Block(['a', 'b'], 3).context(delta=2, limit=9) == (slice(1, 3), slice(5, 7)) From c9c6888b477da12351043a6f4be7c56058ede4d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 5 Apr 2024 19:16:41 +0200 Subject: [PATCH 42/66] Update doc --- sphinx/testing/matcher/_util.py | 4 +- sphinx/testing/matcher/buffer.py | 109 +++++++++------------- tests/test_testing/test_matcher_buffer.py | 18 ++-- 3 files changed, 53 insertions(+), 78 deletions(-) diff --git a/sphinx/testing/matcher/_util.py b/sphinx/testing/matcher/_util.py index c9412e996c0..8a552784e34 100644 --- a/sphinx/testing/matcher/_util.py +++ b/sphinx/testing/matcher/_util.py @@ -189,8 +189,8 @@ def diff( before, after = region.context(context, limit := len(source)) writelines(omit_line(has_context * before.start)) writelines(indent_lines(source[before], indent=indent, highlight=False)) - # use region.window to ensure that single lines are wrapped in lists - writelines(indent_lines(source[region.window], indent=indent, highlight=True)) + # use region.span to ensure that single lines are wrapped in lists + writelines(indent_lines(source[region.span], indent=indent, highlight=True)) writelines(indent_lines(source[after], indent=indent, highlight=False)) writelines(omit_line(has_context * (limit - after.stop))) diff --git a/sphinx/testing/matcher/buffer.py b/sphinx/testing/matcher/buffer.py index 8a0eb88ac01..4a69da35be4 100644 --- a/sphinx/testing/matcher/buffer.py +++ b/sphinx/testing/matcher/buffer.py @@ -33,27 +33,25 @@ _T = TypeVar('_T', bound=Sequence[str]) -class _Region(Generic[_T], Sequence[str], abc.ABC): +class Region(Generic[_T], Sequence[str], abc.ABC): """A string or a sequence of strings implementing rich comparison. - Given an implicit *source* as a list of strings, a :class:`SourceView` - is a subset of that implicit *source* starting at some :attr:`offset`. - - :meta private: + Given an implicit *source* as a list of strings, a :class:`Region` is + of that of that implicit *source* starting at some :attr:`offset`. """ # add __weakref__ to allow the object being weak-referencable __slots__ = ('__buffer', '__offset', '__weakref__') def __init__(self, buffer: _T, /, offset: int = 0, *, _check: bool = True) -> None: - """Construct a :class:`SourceView`. + """Construct a :class:`Region` object. - :param buffer: The view's content (a string or a list of strings). - :param offset: The view's offset with respect to the original source. + :param buffer: The region's content (a string or a list of strings). + :param offset: The region's offset with respect to the original source. :param _check: An internal parameter used for validating inputs. The *_check* parameter is only meant for internal usage and strives - to speed-up the construction of :class:`SourceView` objects for which + to speed-up the construction of :class:`Region` objects for which their constructor arguments are known to be valid at call time. """ if _check: @@ -75,41 +73,30 @@ def buffer(self) -> _T: @property def offset(self) -> int: - """The index of this object in the original source.""" + """The index of this region in the original source.""" return self.__offset @property def length(self) -> int: - """The number of items in this object.""" + """The number of "atomic" items in this region.""" return len(self) @property @abc.abstractmethod - def window(self) -> slice: + def span(self) -> slice: """A slice representing this region in its source. - If *source* is the original source this region is contained within, - then ``assert [*source[region.window]] == [*region.lines()]`` holds. - Examples:: source = ['L1', 'L2', 'L3'] line = Line('L2', 1) - assert source[line.window] == ['L2'] + assert source[line.span] == ['L2'] source = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'] block = Block(['4', '5', '6'], 3) - assert source[block.window] == ['4', '5', '6'] + assert source[block.span] == ['4', '5', '6'] """ - @abc.abstractmethod - def lines(self) -> tuple[Line, ...]: - """This region as a tuple of :class:`Line` objects.""" - - @abc.abstractmethod - def lines_iterator(self) -> Iterator[Line]: - """This region as an iterator of :class:`Line` objects.""" - def context(self, delta: int, limit: int) -> tuple[slice, slice]: """A slice object indicating a context around this region. @@ -128,11 +115,11 @@ def context(self, delta: int, limit: int) -> tuple[slice, slice]: assert delta >= 0, 'context size must be >= 0' assert limit >= 0, 'source length must be >= 0' - window = self.window - before_start, before_stop = max(0, window.start - delta), min(window.start, limit) + span = self.span + before_start, before_stop = max(0, span.start - delta), min(span.start, limit) before_slice = slice(before_start, before_stop) - after_start, after_stop = min(window.stop, limit), min(window.stop + delta, limit) + after_start, after_stop = min(span.stop, limit), min(span.stop + delta, limit) after_slice = slice(after_start, after_stop) return before_slice, after_slice @@ -164,7 +151,7 @@ def find(self, value: Any, start: int = 0, stop: int = sys.maxsize, /) -> int: """ def pformat(self) -> str: - """A nice representation of this object.""" + """A nice representation of this region.""" return f'{self.__class__.__name__}({self!r}, @={self.offset}, #={self.length})' def __repr__(self) -> str: @@ -174,7 +161,7 @@ def __copy__(self) -> Self: return self.__class__(self.buffer, self.offset, _check=False) def __bool__(self) -> bool: - """Indicate whether this view is empty or not.""" + """Indicate whether this region is empty or not.""" return bool(self.buffer) @final @@ -183,23 +170,19 @@ def __iter__(self) -> Iterator[str]: return iter(self.buffer) def __len__(self) -> int: - """The number of "atomic" items in this view.""" + """The number of "atomic" items in this region.""" return len(self.buffer) def __contains__(self, value: object, /) -> bool: - """Check that an "atomic" value is represented by this view.""" + """Check that an "atomic" value is represented by this region.""" return value in self.buffer or self.find(value) != -1 @abc.abstractmethod def __lt__(self, other: object, /) -> bool: - """Check that this view is strictly contained in *other*. - - Subclasses implementing the :class:`SourceView` interface - should describe the expected types for *object*. - """ + """Check that this region is strictly contained in *other*.""" def __le__(self, other: object, /) -> bool: - """Check that this view is contained in *other*. + """Check that this region is contained in *other*. By default, ``self == other`` is called before ``self < other``, but subclasses should override this method for an efficient alternative. @@ -207,7 +190,7 @@ def __le__(self, other: object, /) -> bool: return self == other or self < other def __ge__(self, other: object, /) -> bool: - """Check that *other* is contained by this view. + """Check that *other* is contained by this region. By default, ``self == other`` is called before ``self > other``, but subclasses should override this method for an efficient alternative. @@ -216,20 +199,16 @@ def __ge__(self, other: object, /) -> bool: @abc.abstractmethod def __gt__(self, other: object, /) -> bool: - """Check that this view strictly contains *other*. + """Check that this region strictly contains *other*.""" - Subclasses implementing the :class:`SourceView` interface - should describe the expected types for *object*. - """ - -class Line(_Region[str]): +class Line(Region[str]): """A line found by :meth:`~sphinx.testing.matcher.LineMatcher.find`. A :class:`Line` can be compared to :class:`str`, :class:`Line` objects or - a pair (i.e., a two-length sequence) ``(line, line_offset)`` where + a pair (i.e., a two-length sequence) ``(line_content, line_offset)`` where - - *line* is a :class:`str` object, and + - *line_content* is a :class:`str`, and - *line_offset* is an nonnegative integer. By convention, the comparison result (except for ``!=``) of :class:`Line` @@ -246,15 +225,9 @@ def __init__(self, line: str = '', /, offset: int = 0, *, _check: bool = True) - super().__init__(line, offset, _check=_check) @property - def window(self) -> slice: + def span(self) -> slice: return slice(self.offset, self.offset + 1) - def lines(self) -> tuple[Line]: - return (self,) - - def lines_iterator(self) -> Iterator[Line]: - yield self - def count(self, sub: SubStringLike, /) -> int: """Count the number of occurrences of a substring or pattern. @@ -270,7 +243,7 @@ def count(self, sub: SubStringLike, /) -> int: return self.buffer.count(sub) # raise a TypeError if *sub* is not a string - # explicitly add the method since its signature differs from :meth:`SourceView.index` + # explicitly add the method since its signature differs from :meth:`_Region.index` def index(self, sub: SubStringLike, start: int = 0, stop: int = sys.maxsize, /) -> int: """Find the lowest index of a substring. @@ -358,7 +331,7 @@ def __gt__(self, other: object, /) -> bool: return self.offset == other[1] and self.buffer > other[0] -class Block(_Region[tuple[str, ...]]): +class Block(Region[tuple[str, ...]]): """Block found by :meth:`~sphinx.testing.matcher.LineMatcher.find_blocks`. A block is a sequence of lines comparable to :class:`Line`, generally a @@ -414,18 +387,9 @@ def __init__( """ @property - def window(self) -> slice: + def span(self) -> slice: return slice(self.offset, self.offset + self.length) - def lines(self) -> tuple[Line, ...]: - if self.__cached_lines is None: - self.__cached_lines = tuple(self.lines_iterator()) - return self.__cached_lines - - def lines_iterator(self) -> Iterator[Line]: - for index, line in enumerate(self, self.offset): - yield Line(line, index, _check=False) - def count(self, target: BlockLineLike, /) -> int: """Count the number of occurrences of matching lines. @@ -446,7 +410,7 @@ def count(self, target: BlockLineLike, /) -> int: return self.buffer.count(target) - # explicitly add the method since its signature differs from :meth:`SourceView.index` + # explicitly add the method since its signature differs from :meth:`_Region.index` def index(self, target: BlockLineLike, start: int = 0, stop: int = sys.maxsize, /) -> int: """Find the lowest index of a matching line. @@ -477,6 +441,17 @@ def find(self, target: BlockLineLike, start: int = 0, stop: int = sys.maxsize, / return self.buffer.index(target, start, stop) return -1 + def lines(self) -> tuple[Line, ...]: + """This region as a tuple of :class:`Line` objects.""" + if self.__cached_lines is None: + self.__cached_lines = tuple(self.lines_iterator()) + return self.__cached_lines + + def lines_iterator(self) -> Iterator[Line]: + """This region as an iterator of :class:`Line` objects.""" + for index, line in enumerate(self, self.offset): + yield Line(line, index, _check=False) + @overload def at(self, index: int, /) -> Line: ... # NoQA: E704 @overload diff --git a/tests/test_testing/test_matcher_buffer.py b/tests/test_testing/test_matcher_buffer.py index 794a9e5d98a..cf5908f3381 100644 --- a/tests/test_testing/test_matcher_buffer.py +++ b/tests/test_testing/test_matcher_buffer.py @@ -14,11 +14,11 @@ from collections.abc import Sequence from typing import Any - from sphinx.testing.matcher.buffer import _Region + from sphinx.testing.matcher.buffer import Region @pytest.mark.parametrize('cls', [Line, Block]) -def test_offset_value(cls: type[_Region[Any]]) -> None: +def test_offset_value(cls: type[Region[Any]]) -> None: with pytest.raises(TypeError, match=re.escape('offset must be an integer, got: None')): cls('', None) # type: ignore[arg-type] @@ -26,13 +26,13 @@ def test_offset_value(cls: type[_Region[Any]]) -> None: cls('', -1) -def test_line_region_window(): +def test_line_region_span(): for n in range(3): # the empty line is still a line in the source - assert Line('', n).window == slice(n, n + 1) + assert Line('', n).span == slice(n, n + 1) line = Line('', 1) - assert ['L1', '', 'L3', 'L4', 'L4'][line.window] == [''] + assert ['L1', '', 'L3', 'L4', 'L4'][line.span] == [''] def test_line_slice_context(): @@ -360,13 +360,13 @@ def test_block_unsupported_operators(operand): assert Block() != operand -def test_block_region_window(): +def test_block_region_span(): for n in range(3): - assert Block([], n).window == slice(n, n) + assert Block([], n).span == slice(n, n) block = Block(['B', 'C', 'D'], 1) - assert block.window == slice(1, 4) - assert ['A', 'B', 'C', 'D', 'E'][block.window] == ['B', 'C', 'D'] + assert block.span == slice(1, 4) + assert ['A', 'B', 'C', 'D', 'E'][block.span] == ['B', 'C', 'D'] def test_block_slice_context(): From bb5cb117f24067128479ff7fd35d26fa3cbb5b4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 5 Apr 2024 19:20:12 +0200 Subject: [PATCH 43/66] Update doc --- sphinx/testing/matcher/buffer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sphinx/testing/matcher/buffer.py b/sphinx/testing/matcher/buffer.py index 4a69da35be4..6ff6094566a 100644 --- a/sphinx/testing/matcher/buffer.py +++ b/sphinx/testing/matcher/buffer.py @@ -334,10 +334,10 @@ def __gt__(self, other: object, /) -> bool: class Block(Region[tuple[str, ...]]): """Block found by :meth:`~sphinx.testing.matcher.LineMatcher.find_blocks`. - A block is a sequence of lines comparable to :class:`Line`, generally a - string (the line content) or a pair ``(line, line_offset)``. + A block is a *sequence* of lines comparable to :class:`Line` objects, + usually given as :class:`str` objects or ``(line, line_offset)`` pairs. - A block can also be compared to pair ``(block_lines, block_offset)`` where + A block can be compared to pairs ``(block_lines, block_offset)`` where - *block_lines* is a sequence of line-like objects, and - *block_offset* is an integer (matched against :attr:`offset`). From 173076d8c9cca2b483dbc60eb3f58ff3a4e0a7cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 5 Apr 2024 19:20:24 +0200 Subject: [PATCH 44/66] Update doc --- sphinx/testing/matcher/buffer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sphinx/testing/matcher/buffer.py b/sphinx/testing/matcher/buffer.py index 6ff6094566a..9fa1a7725bf 100644 --- a/sphinx/testing/matcher/buffer.py +++ b/sphinx/testing/matcher/buffer.py @@ -342,8 +342,8 @@ class Block(Region[tuple[str, ...]]): - *block_lines* is a sequence of line-like objects, and - *block_offset* is an integer (matched against :attr:`offset`). - Pairs ``(line, line_offset)`` or ``(block_lines, block_offset)`` can be any - non-string two-elements sequence (e.g., a tuple or a list), e.g:: + Pairs ``(line, line_offset)`` or ``(block_lines, block_offset)`` can be + any non-string two-elements sequence (e.g., a tuple or a list), e.g:: assert Block(['a', 'b', 'c', 'd'], 2) == [ 'a', From e1a412f8a728e7b7ac3089c60959347ea17d4c07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 5 Apr 2024 19:36:50 +0200 Subject: [PATCH 45/66] fix ref --- tests/test_testing/test_matcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_testing/test_matcher.py b/tests/test_testing/test_matcher.py index 4f06f5dbf88..7cb2690b4a3 100644 --- a/tests/test_testing/test_matcher.py +++ b/tests/test_testing/test_matcher.py @@ -112,7 +112,7 @@ def make_debug_context( context_size: int, # the original value of the 'context_size' parameter indent: int = 4, ) -> list[str]: - """Other API for :func:`sphinx.testing._matcher.util.get_debug_context`. + """Other API for :func:`sphinx.testing.matcher._util.diff`. The resulting lines are of the form:: From c6b97fbcb99b5f288c9c3664ae3e2a28a0b5e0eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 5 Apr 2024 19:38:45 +0200 Subject: [PATCH 46/66] fix typos --- sphinx/testing/matcher/_engine.py | 2 +- sphinx/testing/matcher/options.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sphinx/testing/matcher/_engine.py b/sphinx/testing/matcher/_engine.py index 04f5a092523..38246472bfa 100644 --- a/sphinx/testing/matcher/_engine.py +++ b/sphinx/testing/matcher/_engine.py @@ -55,7 +55,7 @@ def to_line_patterns( # NoqA: E302 .. note:: - If *expect* is a :class:`collections.abc.Set`-like object, the order + If *expect* is a :class:`~collections.abc.Set`-like object, the order of the output sequence is an implementation detail but guaranteed to be the same for the same inputs. Otherwise, the order of *expect* is retained, in case this could make a difference. diff --git a/sphinx/testing/matcher/options.py b/sphinx/testing/matcher/options.py index 4b77ffab24f..a7c8cc41b5d 100644 --- a/sphinx/testing/matcher/options.py +++ b/sphinx/testing/matcher/options.py @@ -48,7 +48,7 @@ class Options(TypedDict, total=False): while others (e.g., :attr:`stripline`) act on the lines obtained after splitting the (transformed) original string. - .. seealso:: :mod:`sphinx.testing._matcher.cleaner` + .. seealso:: :mod:`sphinx.testing.matcher._cleaner` """ # only immutable fields should be used as options, otherwise undesired From fd84ef32ee6d696b3a49cb8b22389e71af0b7fc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 6 Apr 2024 15:18:05 +0200 Subject: [PATCH 47/66] cleanup --- sphinx/testing/matcher/__init__.py | 6 +- sphinx/testing/matcher/_cleaner.py | 201 --------------------- sphinx/testing/matcher/_engine.py | 38 ++-- sphinx/testing/matcher/_util.py | 17 +- sphinx/testing/matcher/cleaner.py | 161 +++++++++++++++++ sphinx/testing/matcher/options.py | 39 ++-- tests/test_testing/test_matcher_cleaner.py | 146 +++++++-------- tests/test_testing/test_matcher_engine.py | 55 +++--- 8 files changed, 314 insertions(+), 349 deletions(-) delete mode 100644 sphinx/testing/matcher/_cleaner.py create mode 100644 sphinx/testing/matcher/cleaner.py diff --git a/sphinx/testing/matcher/__init__.py b/sphinx/testing/matcher/__init__.py index 829beb702ea..0b71101b7da 100644 --- a/sphinx/testing/matcher/__init__.py +++ b/sphinx/testing/matcher/__init__.py @@ -9,7 +9,7 @@ import re from typing import TYPE_CHECKING, cast, overload -from sphinx.testing.matcher import _cleaner, _engine, _util +from sphinx.testing.matcher import cleaner, _engine, _util from sphinx.testing.matcher.buffer import Block from sphinx.testing.matcher.options import Options, OptionsHolder @@ -93,7 +93,7 @@ def lines(self) -> Block: if cached is None: options = self.default_options | cast(Options, self.options) # compute for the first time the block's lines - lines = tuple(_cleaner.clean_text(self.content, **options)) + lines = tuple(cleaner.clean(self.content, **options)) # check if the value is the same as any of a previously cached value for addr, value in enumerate(itertools.islice(stack, 0, len(stack) - 1)): if isinstance(value, int): @@ -336,7 +336,7 @@ def _assert_not_found( return pat = _util.prettify_patterns(patterns, sort=pattern_type == 'line') - ctx = _util.diff(self.lines(), found, context_size) + ctx = _util.get_context_lines(self.lines(), found, context_size) logs = [f'{pattern_type} pattern', pat, 'found in', '\n'.join(ctx)] raise AssertionError('\n\n'.join(logs)) diff --git a/sphinx/testing/matcher/_cleaner.py b/sphinx/testing/matcher/_cleaner.py deleted file mode 100644 index a1368d2d5b7..00000000000 --- a/sphinx/testing/matcher/_cleaner.py +++ /dev/null @@ -1,201 +0,0 @@ -"""Private utility functions for :mod:`sphinx.testing.matcher`. - -All objects provided by this module are considered an implementation detail -and are not meant to be used by external libraries. -""" - -from __future__ import annotations - -__all__ = () - -import fnmatch -import itertools -import re -from functools import reduce -from itertools import filterfalse -from typing import TYPE_CHECKING - -from sphinx.testing.matcher import _engine, _util -from sphinx.testing.matcher.options import OptionsHolder -from sphinx.util.console import strip_escape_sequences - -if TYPE_CHECKING: - from collections.abc import Iterable, MutableSequence, Sequence - - from typing_extensions import Unpack - - from sphinx.testing.matcher._util import LinePredicate - from sphinx.testing.matcher.options import DeletePattern, Flavor, Options, StripChars - - -def clean_text(text: str, /, **options: Unpack[Options]) -> Iterable[str]: - """Clean a text, returning an iterable of lines.""" - config = OptionsHolder(**options) - - if not config.keep_ansi: - text = strip_escape_sequences(text) - - text = strip_chars(text, config.strip) - lines = text.splitlines(config.keep_break) - - return clean_lines(lines, **options) - - -def clean_lines(lines: Iterable[str], /, **options: Unpack[Options]) -> Iterable[str]: - """Clean an iterable of lines.""" - config = OptionsHolder(**options) - - lines = strip_lines(lines, config.stripline) - - keep_empty, compress, unique = config.keep_empty, config.compress, config.unique - lines = filter_lines(lines, keep_empty=keep_empty, compress=compress, unique=unique) - - deleter_objects, flavor = config.delete, config.flavor - lines = prune_lines(lines, deleter_objects, flavor=flavor, trace=None) - - ignore_predicate = config.ignore - lines = ignore_lines(lines, ignore_predicate) - - return lines - - -def strip_chars(text: str, chars: StripChars = True, /) -> str: - """Strip expected characters from *text*.""" - if isinstance(chars, bool): - return text.strip() if chars else text - return text.strip(chars) - - -def strip_lines(lines: Iterable[str], chars: StripChars = True, /) -> Iterable[str]: - """Call :meth:`str.strip` to each line in *lines*.""" - if isinstance(chars, bool): - return map(str.strip, lines) if chars else lines - return (line.strip(chars) for line in lines) - - -def filter_lines( - lines: Iterable[str], - /, - *, - keep_empty: bool = True, - compress: bool = False, - unique: bool = False, -) -> Iterable[str]: - """Filter the lines. - - :param lines: The lines to filter. - :param keep_empty: If true, keep empty lines. - :param unique: If true, remove duplicated lines. - :param compress: If true, remove consecutive duplicated lines. - :return: An iterable of filtered lines. - - Since removing empty lines first allows serial duplicates to be eliminated - in the same iteration, duplicates elimination is performed *after* empty - lines are removed. To change the behaviour, consider using:: - - lines = filter_lines(lines, compress=True) - lines = filter_lines(lines, empty=True) - """ - if not keep_empty: - lines = filter(None, lines) - - if unique: - # 'compress' has no effect when 'unique' is set - return _util.unique_everseen(lines) - - if compress: - return _util.unique_justseen(lines) - - return lines - - -def ignore_lines(lines: Iterable[str], predicate: LinePredicate | None, /) -> Iterable[str]: - """Ignore lines satisfying the *predicate*. - - :param lines: The lines to filter. - :param predicate: An optional predicate. - :return: An iterable of filtered lines. - """ - return filterfalse(predicate, lines) if callable(predicate) else lines - - -def prune_lines( - lines: Iterable[str], - delete: DeletePattern, - /, - *, - flavor: Flavor = 'none', - trace: MutableSequence[Sequence[tuple[str, Sequence[str]]]] | None = None, -) -> Iterable[str]: - r"""Remove substrings from a source satisfying some patterns. - - :param lines: The source to transform. - :param delete: One or more prefixes to remove or substitution patterns. - :param flavor: Indicate the flavor of prefix regular expressions. - :param trace: A buffer where intermediate results are stored. - :return: An iterable of transformed lines. - - Usage:: - - lines = prune_lines(['1111a', 'b'], r'\d+', flavor='re') - assert list(lines) == ['a', 'b'] - - lines = prune_lines(['a123b', 'c123d'], re.compile(r'\d+')) - assert list(lines) == ['ab', 'cd'] - - When specified, *trace* is incrementally constructed as follows:: - - for i, line in enumerate(lines): - entry, res = [(line, frame := [])], line - for j, pattern in enumerate(patterns): - res = patterns.sub('', res) - frame.append(res) - - while res != line: - entry.append((res, frame := [])) - for j, pattern in enumerate(patterns): - res = patterns.sub('', res) - frame.append(res) - - trace.append(entry) - yield res - """ - delete_patterns = _engine.to_line_patterns(delete) - # Since fnmatch-style patterns do not support a meta-character for - # matching at the start of the string, we first translate patterns - # and then add an explicit '\A' character in the regular expression. - patterns = _engine.translate( - delete_patterns, - flavor=flavor, - escape=re.escape, - str2fnmatch=lambda prefix: fnmatch.translate(prefix).rstrip(r'\Z$'), - ) - # Now, we add the '\A' meta-character to ensure that we only match - # at the beginning of the string and not in the middle of the string. - re_translate = r'\A'.__add__ - compiled = _engine.compile(patterns, flavor='re', str2regexpr=re_translate) - - def prune_redux(line: str, pattern: re.Pattern[str]) -> str: - return pattern.sub('', line) - - def prune_debug(line: str, frame: list[str]) -> str: - results = itertools.accumulate(compiled, prune_redux, initial=line) - frame.extend(itertools.islice(results, 1, None)) # skip the first element - assert frame - return frame[-1] - - if trace is None: - for line in lines: - ret = reduce(prune_redux, compiled, line) - while line != ret: - line, ret = ret, reduce(prune_redux, compiled, ret) - yield ret - else: - for line in lines: - entry: list[tuple[str, list[str]]] = [(line, [])] - ret = prune_debug(line, entry[-1][1]) - while line != ret: - entry.append((ret, [])) - line, ret = ret, prune_debug(ret, entry[-1][1]) - trace.append(entry) - yield ret diff --git a/sphinx/testing/matcher/_engine.py b/sphinx/testing/matcher/_engine.py index 38246472bfa..993b0ec667e 100644 --- a/sphinx/testing/matcher/_engine.py +++ b/sphinx/testing/matcher/_engine.py @@ -1,7 +1,6 @@ -"""Private utility functions for :mod:`sphinx.testing.matcher`. +"""Private regular expressions utilities for :mod:`sphinx.testing.matcher`. -All objects provided by this module are considered an implementation detail -and are not meant to be used by external libraries. +All objects provided by this module are considered an implementation detail. """ from __future__ import annotations @@ -43,7 +42,7 @@ def to_line_patterns( # NoQA: E704 ) -> tuple[LinePattern, ...]: ... def to_line_patterns( # NoqA: E302 patterns: LinePattern | Set[LinePattern] | Sequence[LinePattern], / -) -> Sequence[LinePattern]: +) -> tuple[LinePattern, ...]: """Get a read-only sequence of line-matching patterns. :param patterns: One or more patterns a line should match (in its entirety). @@ -106,13 +105,18 @@ def string_expression(line: str, /) -> str: return rf'\A{re.escape(line)}\Z' +def fnmatch_prefix(prefix: str) -> str: + """A regular expression matching a :mod:`fnmatch`-style prefix.""" + return fnmatch.translate(prefix).rstrip(r'\Z$') + + def translate( patterns: Iterable[PatternLike], *, flavor: Flavor, escape: Callable[[str], str] | None = string_expression, - str2regexpr: Callable[[str], str] | None = None, - str2fnmatch: Callable[[str], str] | None = fnmatch.translate, + regular_translate: Callable[[str], str] | None = None, + fnmatch_translate: Callable[[str], str] | None = fnmatch.translate, ) -> Iterable[PatternLike]: r"""Translate regular expressions according to *flavor*. @@ -122,8 +126,8 @@ def translate( :param patterns: An iterable of regular expressions to translate. :param flavor: The translation flavor for non-compiled patterns. :param escape: Translation function for ``'none'`` flavor. - :param str2regexpr: Translation function for ``'re'`` flavor. - :param str2fnmatch: Translation function for ``'fnmatch'`` flavor. + :param regular_translate: Translation function for ``'re'`` flavor. + :param fnmatch_translate: Translation function for ``'fnmatch'`` flavor. :return: An iterable of :class:`re`-style pattern-like objects. """ _check_flavor(flavor) @@ -131,10 +135,10 @@ def translate( if flavor == 'none' and callable(translator := escape): return (format_expression(translator, expr) for expr in patterns) - if flavor == 're' and callable(translator := str2regexpr): + if flavor == 're' and callable(translator := regular_translate): return (format_expression(translator, expr) for expr in patterns) - if flavor == 'fnmatch' and callable(translator := str2fnmatch): + if flavor == 'fnmatch' and callable(translator := fnmatch_translate): return (format_expression(translator, expr) for expr in patterns) return patterns @@ -145,24 +149,24 @@ def compile( *, flavor: Flavor, escape: Callable[[str], str] | None = string_expression, - str2regexpr: Callable[[str], str] | None = None, - str2fnmatch: Callable[[str], str] | None = fnmatch.translate, -) -> Sequence[re.Pattern[str]]: + regular_translate: Callable[[str], str] | None = None, + fnmatch_translate: Callable[[str], str] | None = fnmatch.translate, +) -> tuple[re.Pattern[str], ...]: """Compile one or more patterns into :class:`~re.Pattern` objects. :param patterns: An iterable of patterns to translate and compile. :param flavor: The translation flavor for non-compiled patterns. :param escape: Translation function for ``'none'`` flavor. - :param str2regexpr: Translation function for ``'re'`` flavor. - :param str2fnmatch: Translation function for ``'fnmatch'`` flavor. + :param regular_translate: Translation function for ``'re'`` flavor. + :param fnmatch_translate: Translation function for ``'fnmatch'`` flavor. :return: A sequence of compiled regular expressions. """ patterns = translate( patterns, flavor=flavor, escape=escape, - str2regexpr=str2regexpr, - str2fnmatch=str2fnmatch, + regular_translate=regular_translate, + fnmatch_translate=fnmatch_translate, ) # mypy does not like map + re.compile() although it is correct but # this is likely due to https://github.com/python/mypy/issues/11880 diff --git a/sphinx/testing/matcher/_util.py b/sphinx/testing/matcher/_util.py index 8a552784e34..efaedb9185d 100644 --- a/sphinx/testing/matcher/_util.py +++ b/sphinx/testing/matcher/_util.py @@ -1,7 +1,6 @@ """Private utility functions for :mod:`sphinx.testing.matcher`. -All objects provided by this module are considered an implementation detail -and are not meant to be used by external libraries. +All objects provided by this module are considered an implementation detail. """ from __future__ import annotations @@ -30,7 +29,11 @@ LinePredicate = Callable[[str], object] """A predicate called on an entire line.""" BlockPattern = Sequence[LinePattern] - """A sequence of regular expression (compiled or not) for a block.""" + """A sequence of regular expressions (compiled or not) for a block. + + For instance, ``['a', re.compile('b*')]`` matches blocks + with the line ``'a'`` followed by a line matching ``'b*'``. + """ _T = TypeVar('_T') @@ -170,18 +173,18 @@ def prettify_patterns( return indent_source(source, indent=indent, highlight=False) -def diff( +def get_context_lines( source: Sequence[str], region: Line | Block, /, context: int, *, indent: int = 4 ) -> list[str]: - """Get some context lines around *block* and highlight the *block*. + """Get some context lines around *block* and highlight the *region*. :param source: The source containing the *block*. - :param region: A block to highlight. + :param region: A region to highlight (a line or a block). :param context: The number of lines to display around the block. :param indent: The number of indentation spaces. :return: A list of formatted lines. """ - assert region <= source, 'the block must be contained in the source' + assert region <= source, 'the region must be contained in the source' logs: list[str] = [] writelines = logs.extend diff --git a/sphinx/testing/matcher/cleaner.py b/sphinx/testing/matcher/cleaner.py new file mode 100644 index 00000000000..f8da4ed3d03 --- /dev/null +++ b/sphinx/testing/matcher/cleaner.py @@ -0,0 +1,161 @@ +"""Public cleaning functions for :mod:`sphinx.testing.matcher`.""" + +from __future__ import annotations + +__all__ = () + +import itertools +from functools import reduce +from typing import TYPE_CHECKING + +from sphinx.testing.matcher import _engine, _util +from sphinx.testing.matcher.options import OptionsHolder +from sphinx.util.console import strip_escape_sequences + +if TYPE_CHECKING: + import re + from collections.abc import Iterable + + from typing_extensions import Unpack + + from sphinx.testing.matcher.options import DeletePattern, Options, StripChars + + +def clean(text: str, /, **options: Unpack[Options]) -> Iterable[str]: + """Clean a text, returning an iterable of lines. + + :param text: The text to clean. + :param options: The cleaning options. + :return: The list of cleaned lines. + """ + config = OptionsHolder(**options) + + # clean the text as a string + if not config.keep_ansi: + text = strip_escape_sequences(text) + text = strip_chars(text, config.strip) + + lines: Iterable[str] = text.splitlines(config.keep_break) + lines = strip_lines(lines, config.stripline) + + keep_empty, compress, unique = config.keep_empty, config.compress, config.unique + lines = filter_lines(lines, keep_empty=keep_empty, compress=compress, unique=unique) + lines = prune_lines(lines, config.delete) + + if callable(ignore_predicate := config.ignore): + lines = itertools.filterfalse(ignore_predicate, lines) + + return lines + + +def strip_chars(text: str, chars: StripChars = True, /) -> str: + """Strip expected characters from *text*.""" + if isinstance(chars, bool): + return text.strip() if chars else text + return text.strip(chars) + + +def strip_lines(lines: Iterable[str], chars: StripChars = True, /) -> Iterable[str]: + """Call :meth:`str.strip` to each line in *lines*.""" + if isinstance(chars, bool): + return map(str.strip, lines) if chars else lines + return (line.strip(chars) for line in lines) + + +def filter_lines( + lines: Iterable[str], + /, + *, + keep_empty: bool = True, + compress: bool = False, + unique: bool = False, +) -> Iterable[str]: + """Filter the lines. + + :param lines: The lines to filter. + :param keep_empty: If true, keep empty lines. + :param unique: If true, remove duplicated lines. + :param compress: If true, remove consecutive duplicated lines. + :return: An iterable of filtered lines. + + Since removing empty lines first allows serial duplicates to be eliminated + in the same iteration, duplicates elimination is performed *after* empty + lines are removed. To change the behaviour, consider using:: + + lines = filter_lines(lines, compress=True) + lines = filter_lines(lines, empty=True) + """ + if not keep_empty: + lines = filter(None, lines) + + if unique: + # 'compress' has no effect when 'unique' is set + return _util.unique_everseen(lines) + + if compress: + return _util.unique_justseen(lines) + + return lines + + +def prune_lines( + lines: Iterable[str], + patterns: DeletePattern, + /, + *, + trace: list[list[tuple[str, list[str]]]] | None = None, +) -> Iterable[str]: + r"""Remove substrings from a source satisfying some patterns. + + :param lines: The source to transform. + :param patterns: One or more substring patterns to delete. + :param trace: A buffer where intermediate results are stored. + :return: An iterable of transformed lines. + + Usage:: + + lines = prune_lines(['1111a', 'b1'], r'^\d+') + assert list(lines) == ['a', 'b1'] + + When specified, the *trace* contains the line's reduction chains, e.g., if + the line is ``'ABC#123'`` amd ``patterns = (r'^[A-Z]', r'\d$')``, then the + corresponding reduction chain is:: + + [ + ('ABC#123', ['BC#123', 'BC#12']), + ('BC#12', ['C#12', 'C#1']), + ('C#1', ['#1', '#']), + ] + + In the above example, the final value is given by ``'#'`` which can also + be accessed by ``trace[i][-1][-1][-1]``. + """ + patterns = _engine.to_line_patterns(patterns) + compiled = _engine.compile(patterns, flavor='re') + + def prune_redux(line: str, pattern: re.Pattern[str]) -> str: + return pattern.sub('', line) + + def prune_debug(line: str, accumulator: list[str]) -> str: + values = itertools.accumulate(compiled, prune_redux, initial=line) + accumulator.extend(itertools.islice(values, 1, None)) # skip initial value + return accumulator[-1] # a reduced value + + if trace is None: + for line in lines: + ret = reduce(prune_redux, compiled, line) + while line != ret: + line, ret = ret, reduce(prune_redux, compiled, ret) + yield ret + else: + for line in lines: + entry: list[tuple[str, list[str]]] = [(line, [])] + ret = None + ret = prune_debug(line, entry[-1][1]) + while line != ret: + frame: tuple[str, list[str]] = (ret, []) + line, ret = ret, prune_debug(ret, frame[1]) + if ret != line: + entry.append(frame) + trace.append(entry) + yield ret diff --git a/sphinx/testing/matcher/options.py b/sphinx/testing/matcher/options.py index a7c8cc41b5d..fea903b368e 100644 --- a/sphinx/testing/matcher/options.py +++ b/sphinx/testing/matcher/options.py @@ -14,7 +14,7 @@ from typing_extensions import Unpack - from sphinx.testing.matcher._util import LinePattern, LinePredicate + from sphinx.testing.matcher._util import LinePredicate, PatternLike FlagOption = Literal['keep_ansi', 'keep_break', 'keep_empty', 'compress', 'unique'] @@ -23,8 +23,8 @@ """Allowed values for :attr:`Options.strip` and :attr:`Options.stripline`.""" DeleteOption = Literal['delete'] - DeletePattern = Union[LinePattern, Sequence[LinePattern]] - """A prefix or a compiled prefix pattern.""" + DeletePattern = Union[PatternLike, Sequence[PatternLike]] + """One or more patterns to delete.""" IgnoreOption = Literal['ignore'] @@ -70,6 +70,12 @@ class Options(TypedDict, total=False): * a string (*chars*) -- remove leading and trailing characters in *chars*. """ + keep_break: bool + """Indicate whether to keep line breaks at the end of each line. + + The default value is ``False`` (to mirror :meth:`str.splitlines`). + """ + stripline: StripChars """Describe the characters to strip from each source's line. @@ -80,12 +86,6 @@ class Options(TypedDict, total=False): * a string (*chars*) -- remove leading and trailing characters in *chars*. """ - keep_break: bool - """Indicate whether to keep line breaks at the end of each line. - - The default value is ``False`` (to mirror :meth:`str.splitlines`). - """ - keep_empty: bool """Indicate whether to keep empty lines in the output. @@ -113,23 +113,16 @@ class Options(TypedDict, total=False): """ delete: DeletePattern - r"""Prefixes or patterns to remove from the output lines. + r"""Regular expressions for substrings to delete from the output lines. - The transformation is described for one or more :class:`str` - or :class:`~re.Pattern` objects as follows: - - - Compile :class:`str` pattern into :class:`~re.Pattern` according - to the pattern :attr:`flavor` and remove prefixes matching those - patterns from the output lines. - - Replace substrings in the output lines matching one or more - patterns directly given as :class:`~re.Pattern` objects. - - The process is repeated until no output lines starts by any - of the given strings or matches any of the given patterns. + The output lines are pruned from their matching substrings (checked + using :func:`re.match`) until the output lines are stabilized. This transformation is applied at the end of the transformation chain, just before filtering the output lines are filtered with the :attr:`ignore` predicate. + + See :func:`sphinx.testing.matcher.cleaner.prune_lines` for an example. """ ignore: LinePredicate | None @@ -149,8 +142,8 @@ class Options(TypedDict, total=False): * ``'fnmatch'`` -- match lines using :mod:`fnmatch`-style patterns. * ``'re'`` -- match lines using :mod:`re`-style patterns. - This option only affects non-compiled patterns (i.e., those given - as :class:`str` and not :class:`~re.Pattern` objects). + This option only affects non-compiled patterns. Unless stated otheriwse, + matching is performed on compiled patterns by :func:`~re.Pattern.match`. """ diff --git a/tests/test_testing/test_matcher_cleaner.py b/tests/test_testing/test_matcher_cleaner.py index 738109b7b1a..d3c9c1d3ad6 100644 --- a/tests/test_testing/test_matcher_cleaner.py +++ b/tests/test_testing/test_matcher_cleaner.py @@ -5,10 +5,12 @@ import pytest -from sphinx.testing.matcher._cleaner import filter_lines, prune_lines, strip_chars, strip_lines +from sphinx.testing.matcher.cleaner import filter_lines, prune_lines, strip_chars, strip_lines if TYPE_CHECKING: - from collections.abc import Callable, Sequence + from collections.abc import Sequence + + from sphinx.testing.matcher._util import PatternLike def test_strip_chars(): @@ -36,74 +38,72 @@ def test_filter_lines(): assert list(filter_lines(src, keep_empty=True, unique=True)) == ['a', '', 'b', 'c'] -@pytest.fixture() -def prune_trace_object() -> Callable[[], list[Sequence[tuple[str, Sequence[str]]]]]: - """A fixture returning a factory for a typed trace object. - - Without this fixture, trace objects need to be explicitly typed for mypy. - """ - return list - - -def test_prune_prefix(prune_trace_object): - trace = prune_trace_object() - lines = prune_lines(['1111a1', 'b1'], '1', flavor='none', trace=trace) - assert list(lines) == ['a1', 'b1'] - assert trace == [ - [ - ('1111a1', ['111a1']), - ('111a1', ['11a1']), - ('11a1', ['1a1']), - ('1a1', ['a1']), - ('a1', ['a1']), - ], - [('b1', ['b1'])], - ] - - trace = prune_trace_object() - lines = prune_lines(['1111a1', 'b1'], r'\d+', flavor='re', trace=trace) - assert list(lines) == ['a1', 'b1'] - assert trace == [ - [('1111a1', ['a1']), ('a1', ['a1'])], - [('b1', ['b1'])], - ] - - trace = prune_trace_object() - lines = prune_lines(['/a/b/c.txt', 'keep.py'], '*.txt', flavor='fnmatch', trace=trace) - assert list(lines) == ['', 'keep.py'] - assert trace == [ - [('/a/b/c.txt', ['']), ('', [''])], - [('keep.py', ['keep.py'])], - ] - - -def test_prune_groups(prune_trace_object): - lines = prune_lines(['a123b', 'c123d'], re.compile(r'\d+')) - assert list(lines) == ['ab', 'cd'] - - p1 = re.compile(r'\d\d') - p2 = re.compile(r'\n+') - - trace = prune_trace_object() - lines = prune_lines(['a 123\n456x7\n8\n b'], [p1, p2], trace=trace) - assert list(lines) == ['a x b'] - - assert len(trace) == 1 - assert len(trace[0]) == 3 - # elimination of double digits and new lines (in that order) - assert trace[0][0] == ('a 123\n456x7\n8\n b', ['a 3\n6x7\n8\n b', 'a 36x78 b']) - # new digits appeared so we re-eliminated them - assert trace[0][1] == ('a 36x78 b', ['a x b', 'a x b']) - # identity for both patterns - assert trace[0][2] == ('a x b', ['a x b', 'a x b']) - - trace = prune_trace_object() - lines = prune_lines(['a 123\n456x7\n8\n b'], [p2, p1], trace=trace) - assert list(lines) == ['a x b'] - - assert len(trace) == 1 - assert len(trace[0]) == 2 - # elimination of new lines and double digits (in that order) - assert trace[0][0] == ('a 123\n456x7\n8\n b', ['a 123456x78 b', 'a x b']) - # identity for both patterns - assert trace[0][1] == ('a x b', ['a x b', 'a x b']) +@pytest.mark.parametrize( + ('lines', 'patterns', 'expect', 'trace'), + [ + ( + ['88D79F0A2', '###'], + r'\d+', + ['DFA', '###'], + [ + [('88D79F0A2', ['DFA'])], + [('###', ['###'])], + ], + ), + ( + ['11a1', 'b1'], + '^1', + ['a1', 'b1'], + [ + [('11a1', ['1a1']), ('1a1', ['a1'])], + [('b1', ['b1'])], + ], + ), + ( + ['ABC#123'], + [r'^[A-Z]', r'\d$'], + ['#'], + [ + [ + ('ABC#123', ['BC#123', 'BC#12']), + ('BC#12', ['C#12', 'C#1']), + ('C#1', ['#1', '#']), + ], + ], + ), + ( + ['a 123\n456x7\n8\n b'], + [re.compile(r'\d\d'), re.compile(r'\n+')], + ['a x b'], + [ + [ + # elimination of double digits and new lines (in that order) + ('a 123\n456x7\n8\n b', ['a 3\n6x7\n8\n b', 'a 36x78 b']), + # new digits appeared so we re-eliminated them + ('a 36x78 b', ['a x b', 'a x b']), + ] + ], + ), + ( + ['a 123\n456x7\n8\n b'], + [re.compile(r'\n+'), re.compile(r'\d\d')], + ['a x b'], + [ + [ + # elimination of new lines and double digits (in that order) + ('a 123\n456x7\n8\n b', ['a 123456x78 b', 'a x b']), + ] + ], + ), + ], +) +def test_prune_lines( + lines: Sequence[str], + patterns: PatternLike | Sequence[PatternLike], + expect: Sequence[str], + trace: list[list[tuple[str, list[str]]]], +) -> None: + actual_trace: list[list[tuple[str, list[str]]]] = [] + actual = prune_lines(lines, patterns, trace=actual_trace) + assert list(actual) == list(expect) + assert actual_trace == list(trace) diff --git a/tests/test_testing/test_matcher_engine.py b/tests/test_testing/test_matcher_engine.py index 6a936358854..f2f1dbdbd50 100644 --- a/tests/test_testing/test_matcher_engine.py +++ b/tests/test_testing/test_matcher_engine.py @@ -1,9 +1,10 @@ from __future__ import annotations -import fnmatch import random import re +import pytest + from sphinx.testing.matcher import _engine as engine @@ -48,35 +49,39 @@ def test_format_expression(): assert engine.format_expression(str.upper, p) is p -def test_translate_expressions(): - string, pattern = 'a*', re.compile('.*') - inputs = (string, pattern) +@pytest.mark.parametrize(('string', 'expect'), [('foo.bar', r'\Afoo\.bar\Z')]) +def test_string_expression(string, expect): + assert engine.string_expression(string) == expect + pattern = re.compile(engine.string_expression(string)) + for func in (pattern.match, pattern.search, pattern.fullmatch): + assert func(string) is not None + assert func(string + '.') is None + assert func('.' + string) is None - expect = [engine.string_expression(string), pattern] - assert [*engine.translate(inputs, flavor='none')] == expect - expect = [string.upper(), pattern] - assert [*engine.translate(inputs, flavor='none', escape=str.upper)] == expect - expect = [string, pattern] - assert [*engine.translate(inputs, flavor='re')] == expect - expect = [string.upper(), pattern] - assert [*engine.translate(inputs, flavor='re', str2regexpr=str.upper)] == expect +def test_translate_expressions(): + string, compiled = 'a*', re.compile('.*') + patterns = (string, compiled) - expect = [fnmatch.translate(string), pattern] - assert [*engine.translate(inputs, flavor='fnmatch')] == expect - expect = [string.upper(), pattern] - assert [*engine.translate(inputs, flavor='fnmatch', str2fnmatch=str.upper)] == expect + assert [*engine.translate(patterns, flavor='none')] == [r'\Aa\*\Z', compiled] + assert [*engine.translate(patterns, flavor='re')] == [string, compiled] + assert [*engine.translate(patterns, flavor='fnmatch')] == [r'(?s:a.*)\Z', compiled] + expect, func = [string.upper(), compiled], str.upper + assert [*engine.translate(patterns, flavor='none', escape=func)] == expect + assert [*engine.translate(patterns, flavor='re', regular_translate=func)] == expect + assert [*engine.translate(patterns, flavor='fnmatch', fnmatch_translate=func)] == expect -def test_compile_patterns(): - string = 'a*' - compiled = re.compile('.*') - expect = (re.compile(engine.string_expression(string)), compiled) - assert engine.compile([string, compiled], flavor='none') == expect +def test_compile_patterns(): + string, compiled = 'a*', re.compile('.*') + patterns = (string, compiled) - expect = (re.compile(fnmatch.translate(string)), compiled) - assert engine.compile([string, compiled], flavor='fnmatch') == expect + assert engine.compile(patterns, flavor='none') == (re.compile(r'\Aa\*\Z'), compiled) + assert engine.compile(patterns, flavor='re') == (re.compile(string), compiled) + assert engine.compile(patterns, flavor='fnmatch') == (re.compile(r'(?s:a.*)\Z'), compiled) - expect = (re.compile(string), compiled) - assert engine.compile([string, compiled], flavor='re') == expect + expect = (re.compile('A*'), compiled) + assert engine.compile(patterns, flavor='none', escape=str.upper) == expect + assert engine.compile(patterns, flavor='re', regular_translate=str.upper) == expect + assert engine.compile(patterns, flavor='fnmatch', fnmatch_translate=str.upper) == expect From a533d1485a47c4191e430e7177e025ba32f979ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 6 Apr 2024 16:18:36 +0200 Subject: [PATCH 48/66] micro optimizations --- sphinx/testing/matcher/__init__.py | 224 +++++++++++++---------------- sphinx/testing/matcher/_engine.py | 8 +- sphinx/testing/matcher/_util.py | 26 ++-- sphinx/testing/matcher/buffer.py | 6 +- tests/test_testing/test_matcher.py | 16 +-- 5 files changed, 124 insertions(+), 156 deletions(-) diff --git a/sphinx/testing/matcher/__init__.py b/sphinx/testing/matcher/__init__.py index 0b71101b7da..11b38126ac8 100644 --- a/sphinx/testing/matcher/__init__.py +++ b/sphinx/testing/matcher/__init__.py @@ -5,27 +5,25 @@ __all__ = ('LineMatcher',) import contextlib -import itertools import re -from typing import TYPE_CHECKING, cast, overload +from typing import TYPE_CHECKING, cast -from sphinx.testing.matcher import cleaner, _engine, _util +from sphinx.testing.matcher import _engine, _util, cleaner from sphinx.testing.matcher.buffer import Block from sphinx.testing.matcher.options import Options, OptionsHolder if TYPE_CHECKING: from collections.abc import Generator, Iterable, Iterator, Sequence, Set from io import StringIO - from re import Pattern - from typing import ClassVar, Literal + from typing import Any, ClassVar, Literal from typing_extensions import Self, Unpack - from sphinx.testing.matcher._util import BlockPattern, LinePattern - from sphinx.testing.matcher.buffer import Line + from sphinx.testing.matcher._util import BlockPattern, LinePattern, PatternLike, Patterns + from sphinx.testing.matcher.buffer import Line, Region from sphinx.testing.matcher.options import CompleteOptions, Flavor - PatternType = Literal['line', 'block'] + _RegionType = Literal['line', 'block'] class LineMatcher(OptionsHolder): @@ -90,33 +88,31 @@ def lines(self) -> Block: assert stack, 'invalid stack state' cached = stack[-1] - if cached is None: - options = self.default_options | cast(Options, self.options) - # compute for the first time the block's lines - lines = tuple(cleaner.clean(self.content, **options)) - # check if the value is the same as any of a previously cached value - for addr, value in enumerate(itertools.islice(stack, 0, len(stack) - 1)): - if isinstance(value, int): - cached = cast(Block, stack[value]) - if cached.buffer == lines: - # compare only the lines as strings - stack[-1] = value # indirection near to beginning - return cached - - if isinstance(value, Block): - if value.buffer == lines: - stack[-1] = addr # indirection - return value - - # the value did not exist yet, so we store it at most once - stack[-1] = cached = Block(lines, _check=False) + if cached is not None: + if isinstance(cached, int): + return cast(Block, self.__stack[cached]) return cached - if isinstance(cached, int): - value = self.__stack[cached] - assert isinstance(value, Block) - return value - + options = self.default_options | cast(Options, self.options) + # compute for the first time the block's lines + lines = tuple(cleaner.clean(self.content, **options)) + # check if the value is the same as any of a previously cached value + # but do not use slices to avoid a copy of the stack + for addr, value in zip(range(len(stack) - 1), stack): + if isinstance(value, int): + cached = cast(Block, stack[value]) + if cached.buffer == lines: + # compare only the lines as strings + stack[-1] = value # indirection near to beginning + return cached + + if isinstance(value, Block): + if value.buffer == lines: + stack[-1] = addr # indirection + return value + + # the value did not exist yet, so we store it at most once + stack[-1] = cached = Block(lines, _check=False) return cached def find( @@ -127,7 +123,8 @@ def find( flavor: Flavor | None = None, ) -> Sequence[Line]: """Same as :meth:`iterfind` but returns a sequence of lines.""" - return list(self.iterfind(patterns, flavor=flavor)) + # use tuple to preserve immutability + return tuple(self.iterfind(patterns, flavor=flavor)) def iterfind( self, @@ -140,6 +137,11 @@ def iterfind( :param patterns: The patterns deciding whether a line is selected. :param flavor: Optional temporary flavor for non-compiled patterns. + + By convention, the following are equivalent:: + + matcher.iterfind('line to find', ...) + matcher.iterfind(['line to find'], ...) """ patterns = _engine.to_line_patterns(patterns) if not patterns: # nothinig to match @@ -159,24 +161,21 @@ def find_blocks( self, pattern: str | BlockPattern, /, *, flavor: Flavor | None = None ) -> Sequence[Block]: """Same as :meth:`iterfind_blocks` but returns a sequence of blocks.""" - return list(self.iterfind_blocks(pattern, flavor=flavor)) + return tuple(self.iterfind_blocks(pattern, flavor=flavor)) def iterfind_blocks( self, patterns: str | BlockPattern, /, *, flavor: Flavor | None = None ) -> Iterator[Block]: - """Yield non-overlapping blocks matching the given line patterns. + r"""Yield non-overlapping blocks matching the given line patterns. :param patterns: The line patterns that a block must satisfy. :param flavor: Optional temporary flavor for non-compiled patterns. :return: An iterator on the matching blocks. - When *patterns* is a single string, it is split into lines, each - of which corresponding to the pattern a block's line must satisfy. - - .. note:: + By convention, the following are equivalent:: - Standalone :class:`~re.Pattern` objects are not supported - as they could be interpreted as a line or a block pattern. + matcher.iterfind_blocks('line1\nline2', ...) + matcher.iterfind_blocks(['line1', 'line2'], ...) """ # in general, the patterns are smaller than the lines # so we expect the following to be more efficient than @@ -213,20 +212,24 @@ def assert_match( self, patterns: LinePattern | Set[LinePattern] | Sequence[LinePattern], /, + *, count: int | None = None, flavor: Flavor | None = None, ) -> None: """Assert that the number of matching lines for the given patterns. - A matching line is a line that satisfies one or more patterns - given in *patterns*. - :param patterns: The patterns deciding whether a line is counted. :param count: If specified, the exact number of matching lines. :param flavor: Optional temporary flavor for non-compiled patterns. + + By convention, the following are equivalent:: + + matcher.assert_match('line to find', ...) + matcher.assert_match(['line to find'], ...) """ patterns = _engine.to_line_patterns(patterns) - self._assert_found('line', patterns, count=count, flavor=flavor) + lines = self.iterfind(patterns, flavor=flavor) + self.__assert_found('line', lines, patterns, count, flavor) def assert_no_match( self, @@ -238,75 +241,80 @@ def assert_no_match( ) -> None: """Assert that there exist no matching line for the given patterns. - A matching line is a line that satisfies one or more patterns - given in *patterns*. - :param patterns: The patterns deciding whether a line is counted. :param context: Number of lines to print around a failing line. :param flavor: Optional temporary flavor for non-compiled patterns. + + By convention, the following are equivalent:: + + matcher.assert_no_match('some bad line', ...) + matcher.assert_no_match(['some bad line'], ...) """ - patterns = _engine.to_line_patterns(patterns) - self._assert_not_found('line', patterns, context_size=context, flavor=flavor) + if patterns := _engine.to_line_patterns(patterns): + lines = self.iterfind(patterns, flavor=flavor) + self.__assert_not_found('line', lines, patterns, context, flavor) def assert_block( self, - lines: str | BlockPattern, + pattern: str | BlockPattern, /, *, count: int | None = None, flavor: Flavor | None = None, ) -> None: - """Assert that the number of matching blocks for the given patterns. + r"""Assert that the number of matching blocks for the given patterns. - :param lines: The line patterns that a block must satisfy. + :param pattern: The line patterns that a block must satisfy. :param count: The number of blocks that should be found. :param flavor: Optional temporary flavor for non-compiled patterns. - When *lines* is a single string, it is split into lines, each of - which corresponding to the pattern a block's line must satisfy. + By convention, the following are equivalent:: + + matcher.assert_block('line1\nline2', ...) + matcher.assert_block(['line1', 'line2'], ...) """ - patterns = _engine.to_block_pattern(lines) - self._assert_found('block', patterns, count=count, flavor=flavor) + patterns = _engine.to_block_pattern(pattern) + blocks = self.iterfind_blocks(patterns, flavor=flavor) + self.__assert_found('block', blocks, patterns, count, flavor) def assert_no_block( self, - lines: str | BlockPattern, + pattern: str | BlockPattern, /, *, context: int = 3, flavor: Flavor | None = None, ) -> None: - """Assert that there exist no matching blocks for the given patterns. + r"""Assert that there exist no matching blocks for the given patterns. - :param lines: The line patterns that a block must satisfy. + :param pattern: The line patterns that a block must satisfy. :param context: Number of lines to print around a failing block. :param flavor: Optional temporary flavor for non-compiled patterns. - When *patterns* is a single string, it is split into lines, each - of which corresponding to the pattern a block's line must satisfy. + By convention, the following are equivalent:: - Use :data:`sys.maxsize` to show all capture lines. + matcher.assert_no_block('line1\nline2', ...) + matcher.assert_no_block(['line1', 'line2'], ...) """ - patterns = _engine.to_block_pattern(lines) - self._assert_not_found('block', patterns, context_size=context, flavor=flavor) + if patterns := _engine.to_block_pattern(pattern): + blocks = self.iterfind_blocks(patterns, flavor=flavor) + self.__assert_not_found('block', blocks, patterns, context, flavor) - def _assert_found( + def __assert_found( self, - pattern_type: PatternType, - patterns: Sequence[LinePattern], - *, - count: int | None, - flavor: Flavor | None, + typ: _RegionType, # the region's type + regions: Iterator[Region[Any]], # the regions that were found + patterns: Iterable[PatternLike], # the patterns that were used (debug only) + count: int | None, # the expected number of regions + flavor: Flavor | None, # the flavor that was used to compile the patterns ) -> None: - regions = self.__find(pattern_type, patterns, flavor=flavor) - if count is None: if next(regions, None) is not None: return ctx = _util.highlight(self.lines(), keepends=self.keep_break) - pat = _util.prettify_patterns(patterns, sort=pattern_type == 'line') - logs = [f'{pattern_type} pattern', pat, 'not found in', ctx] + pat = self.__pformat_patterns(typ, patterns) + logs = [f'{typ} pattern', pat, 'not found in', ctx] raise AssertionError('\n\n'.join(logs)) indices = {region.offset: region.length for region in regions} @@ -314,61 +322,33 @@ def _assert_found( return ctx = _util.highlight(self.lines(), indices, keepends=self.keep_break) - pat = _util.prettify_patterns(patterns, sort=pattern_type == 'line') - noun = _util.plural_form(pattern_type, count) + pat = self.__pformat_patterns(typ, patterns) + noun = _util.plural_form(typ, count) logs = [f'found {found} != {count} {noun} matching', pat, 'in', ctx] raise AssertionError('\n\n'.join(logs)) - def _assert_not_found( + def __assert_not_found( self, - pattern_type: PatternType, - patterns: Sequence[LinePattern], - *, - context_size: int, + typ: _RegionType, + regions: Iterator[Region[Any]], + patterns: Sequence[PatternLike], + context: int, flavor: Flavor | None, ) -> None: - if not patterns: # no pattern to find - return - - values = self.__find(pattern_type, patterns, flavor=flavor) - found: Line | Block | None = next(values, None) - if found is None: + if (region := next(regions, None)) is None: return - pat = _util.prettify_patterns(patterns, sort=pattern_type == 'line') - ctx = _util.get_context_lines(self.lines(), found, context_size) - logs = [f'{pattern_type} pattern', pat, 'found in', '\n'.join(ctx)] + pat = self.__pformat_patterns(typ, patterns) + ctx = _util.get_context_lines(self.lines(), region, context) + logs = [f'{typ} pattern', pat, 'found in', '\n'.join(ctx)] raise AssertionError('\n\n'.join(logs)) - def __compile( - self, patterns: Iterable[LinePattern], *, flavor: Flavor | None - ) -> Sequence[Pattern[str]]: + def __pformat_patterns(self, typ: _RegionType, patterns: Iterable[PatternLike]) -> str: + """Prettify the *patterns* as a string to print.""" + lines = (p if isinstance(p, str) else p.pattern for p in patterns) + source = sorted(lines) if typ == 'line' else lines + return _util.indent_source(source, highlight=False) + + def __compile(self, patterns: Iterable[PatternLike], flavor: Flavor | None) -> Patterns: flavor = self.flavor if flavor is None else flavor return _engine.compile(patterns, flavor=flavor) - - @overload - def __find( # NoQA: E704 - self, - pattern_type: Literal['line'], - patterns: Sequence[LinePattern], - /, - flavor: Flavor | None, - ) -> Iterator[Line]: ... - @overload # NoQA: E301 - def __find( # NoQA: E704 - self, - pattern_type: Literal['block'], - patterns: Sequence[LinePattern], - /, - flavor: Flavor | None, - ) -> Iterator[Block]: ... - def __find( # NoQA: E301 - self, - pattern_type: PatternType, - patterns: Sequence[LinePattern], - /, - flavor: Flavor | None, - ) -> Iterator[Line] | Iterator[Block]: - if pattern_type == 'line': - return self.iterfind(patterns, flavor=flavor) - return self.iterfind_blocks(patterns, flavor=flavor) diff --git a/sphinx/testing/matcher/_engine.py b/sphinx/testing/matcher/_engine.py index 993b0ec667e..915c2a5d837 100644 --- a/sphinx/testing/matcher/_engine.py +++ b/sphinx/testing/matcher/_engine.py @@ -15,7 +15,7 @@ if TYPE_CHECKING: from collections.abc import Callable, Iterable, Sequence - from sphinx.testing.matcher._util import BlockPattern, LinePattern, PatternLike + from sphinx.testing.matcher._util import BlockPattern, LinePattern, PatternLike, Patterns from sphinx.testing.matcher.options import Flavor @@ -71,8 +71,8 @@ def to_block_pattern(pattern: str, /) -> tuple[str, ...]: ... # NoQA: E704 @overload def to_block_pattern(pattern: re.Pattern[str], /) -> tuple[re.Pattern[str]]: ... # NoQA: E704 @overload -def to_block_pattern(patterns: BlockPattern, /) -> BlockPattern: ... # NoQA: E704 -def to_block_pattern(patterns: PatternLike | BlockPattern, /) -> BlockPattern: # NoQA: E302 +def to_block_pattern(patterns: BlockPattern, /) -> tuple[LinePattern, ...]: ... # NoQA: E704 +def to_block_pattern(patterns: PatternLike | BlockPattern, /) -> tuple[LinePattern, ...]: # NoQA: E302 r"""Get a read-only sequence for a s single block pattern. :param patterns: A string, :class:`~re.Pattern` or a sequence thereof. @@ -151,7 +151,7 @@ def compile( escape: Callable[[str], str] | None = string_expression, regular_translate: Callable[[str], str] | None = None, fnmatch_translate: Callable[[str], str] | None = fnmatch.translate, -) -> tuple[re.Pattern[str], ...]: +) -> Patterns: """Compile one or more patterns into :class:`~re.Pattern` objects. :param patterns: An iterable of patterns to translate and compile. diff --git a/sphinx/testing/matcher/_util.py b/sphinx/testing/matcher/_util.py index efaedb9185d..f9092fc9c69 100644 --- a/sphinx/testing/matcher/_util.py +++ b/sphinx/testing/matcher/_util.py @@ -16,11 +16,11 @@ if TYPE_CHECKING: import re from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence - from typing import TypeVar, Union + from typing import Any, TypeVar, Union from typing_extensions import Never - from sphinx.testing.matcher.buffer import Block, Line + from sphinx.testing.matcher.buffer import Region PatternLike = Union[str, re.Pattern[str]] """A regular expression (compiled or not).""" @@ -30,11 +30,14 @@ """A predicate called on an entire line.""" BlockPattern = Sequence[LinePattern] """A sequence of regular expressions (compiled or not) for a block. - - For instance, ``['a', re.compile('b*')]`` matches blocks + + For instance, ``['a', re.compile('b*')]`` matches blocks with the line ``'a'`` followed by a line matching ``'b*'``. """ + Patterns = tuple[re.Pattern[str], ...] + """Sequence of compiled patterns to use.""" + _T = TypeVar('_T') @@ -160,21 +163,8 @@ def indent_lines( return [prefix + line for line in lines] -def prettify_patterns( - patterns: Sequence[PatternLike], - /, - *, - indent: int = 4, - sort: bool = False, -) -> str: - """Prettify the *patterns* as a string to print.""" - lines = (p if isinstance(p, str) else p.pattern for p in patterns) - source = sorted(lines) if sort else lines - return indent_source(source, indent=indent, highlight=False) - - def get_context_lines( - source: Sequence[str], region: Line | Block, /, context: int, *, indent: int = 4 + source: Sequence[str], region: Region[Any], /, context: int, *, indent: int = 4 ) -> list[str]: """Get some context lines around *block* and highlight the *region*. diff --git a/sphinx/testing/matcher/buffer.py b/sphinx/testing/matcher/buffer.py index 9fa1a7725bf..a58227df36c 100644 --- a/sphinx/testing/matcher/buffer.py +++ b/sphinx/testing/matcher/buffer.py @@ -243,7 +243,7 @@ def count(self, sub: SubStringLike, /) -> int: return self.buffer.count(sub) # raise a TypeError if *sub* is not a string - # explicitly add the method since its signature differs from :meth:`_Region.index` + # explicitly add the method since its signature differs from :meth:`Region.index` def index(self, sub: SubStringLike, start: int = 0, stop: int = sys.maxsize, /) -> int: """Find the lowest index of a substring. @@ -288,8 +288,6 @@ def endswith(self, suffix: str, start: int = 0, end: int = sys.maxsize, /) -> bo """ return self.buffer.endswith(suffix, start, end) - # dunder methods - def __str__(self) -> str: """The line as a string.""" return self.buffer @@ -410,7 +408,7 @@ def count(self, target: BlockLineLike, /) -> int: return self.buffer.count(target) - # explicitly add the method since its signature differs from :meth:`_Region.index` + # explicitly add the method since its signature differs from :meth:`Region.index` def index(self, target: BlockLineLike, start: int = 0, stop: int = sys.maxsize, /) -> int: """Find the lowest index of a matching line. diff --git a/tests/test_testing/test_matcher.py b/tests/test_testing/test_matcher.py index 7cb2690b4a3..6be805f810d 100644 --- a/tests/test_testing/test_matcher.py +++ b/tests/test_testing/test_matcher.py @@ -205,30 +205,30 @@ def test_matcher_find( expect: Sequence[tuple[str, int]], ) -> None: matcher = LineMatcher.from_lines(lines, flavor=flavor) - assert matcher.find(pattern) == expect + assert matcher.find(pattern) == tuple(expect) matcher = LineMatcher.from_lines(lines, flavor='none') - assert matcher.find(pattern, flavor=flavor) == expect + assert matcher.find(pattern, flavor=flavor) == tuple(expect) def test_matcher_find_blocks(): lines = ['hello', 'world', 'yay', 'hello', 'world', '!', 'yay'] matcher = LineMatcher.from_lines(lines) - assert matcher.find_blocks(['hello', 'world']) == [ + assert matcher.find_blocks(['hello', 'world']) == ( [('hello', 0), ('world', 1)], [('hello', 3), ('world', 4)], - ] + ) - assert matcher.find_blocks(['hello', 'w[oO]rld'], flavor='fnmatch') == [ + assert matcher.find_blocks(['hello', 'w[oO]rld'], flavor='fnmatch') == ( [('hello', 0), ('world', 1)], [('hello', 3), ('world', 4)], - ] + ) - assert matcher.find_blocks(['hello', r'^w[a-z]{2}\wd$'], flavor='re') == [ + assert matcher.find_blocks(['hello', r'^w[a-z]{2}\wd$'], flavor='re') == ( [('hello', 0), ('world', 1)], [('hello', 3), ('world', 4)], - ] + ) def test_assert_match(): From d759057f7feafcc3037b9eb83867e401ca9cd450 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 6 Apr 2024 20:12:53 +0200 Subject: [PATCH 49/66] [search] fix an undefined local variable (#12235) --- sphinx/themes/basic/static/searchtools.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sphinx/themes/basic/static/searchtools.js b/sphinx/themes/basic/static/searchtools.js index 3f82b2a9dc1..92da3f8b22c 100644 --- a/sphinx/themes/basic/static/searchtools.js +++ b/sphinx/themes/basic/static/searchtools.js @@ -402,8 +402,8 @@ const Search = { }, query: (query) => { - const searchParameters = Search._parseQuery(query); - const results = Search._performSearch(...searchParameters); + const [searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms] = Search._parseQuery(query); + const results = Search._performSearch(searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms); // for debugging //Search.lastresults = results.slice(); // a copy From 1cbec3139a384e3388c8620e8a84cfc2dc42d30a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 7 Apr 2024 11:43:24 +0200 Subject: [PATCH 50/66] doc and implementation details improvements --- sphinx/testing/matcher/__init__.py | 31 +++-- sphinx/testing/matcher/buffer.py | 55 ++++---- sphinx/testing/matcher/cleaner.py | 140 +++++++++++++-------- sphinx/testing/matcher/options.py | 96 +++++++++----- tests/test_testing/test_matcher_cleaner.py | 38 +++--- tests/test_testing/test_matcher_options.py | 57 ++++++--- 6 files changed, 263 insertions(+), 154 deletions(-) diff --git a/sphinx/testing/matcher/__init__.py b/sphinx/testing/matcher/__init__.py index 11b38126ac8..e105ab4344b 100644 --- a/sphinx/testing/matcher/__init__.py +++ b/sphinx/testing/matcher/__init__.py @@ -36,7 +36,7 @@ class LineMatcher(OptionsHolder): def __init__(self, content: str | StringIO, /, **options: Unpack[Options]) -> None: """Construct a :class:`LineMatcher` for the given string content. - :param content: The source string. + :param content: The source string or stream. :param options: The matcher options. """ super().__init__(**options) @@ -46,16 +46,21 @@ def __init__(self, content: str | StringIO, /, **options: Unpack[Options]) -> No @classmethod def from_lines(cls, lines: Iterable[str] = (), /, **options: Unpack[Options]) -> Self: - """Construct a :class:`LineMatcher` object from a list of lines. + r"""Construct a :class:`LineMatcher` object from a list of lines. This is typically useful when writing tests for :class:`LineMatcher` since writing the lines instead of a long string is usually cleaner. - The lines are glued together according to whether line breaks, - which can be specified by the keyword argument *keepends*. + The lines are glued together according to *keep_break* (the default + value is specified by :attr:`default_options`), e.g.,:: - By default, the lines are assumed *not* to have line breaks (since - this is usually what is the most common). + text = 'foo\nbar' + + lines = text.splitlines() + LineMatcher.from_lines(lines) == LineMatcher(text) + + lines = text.splitlines(True) + LineMatcher.from_lines(lines, keep_break=True) == LineMatcher(text) """ keep_break = options.get('keep_break', cls.default_options['keep_break']) glue = '' if keep_break else '\n' @@ -93,9 +98,7 @@ def lines(self) -> Block: return cast(Block, self.__stack[cached]) return cached - options = self.default_options | cast(Options, self.options) - # compute for the first time the block's lines - lines = tuple(cleaner.clean(self.content, **options)) + lines = self.__get_clean_lines() # check if the value is the same as any of a previously cached value # but do not use slices to avoid a copy of the stack for addr, value in zip(range(len(stack) - 1), stack): @@ -121,7 +124,7 @@ def find( /, *, flavor: Flavor | None = None, - ) -> Sequence[Line]: + ) -> tuple[Line, ...]: """Same as :meth:`iterfind` but returns a sequence of lines.""" # use tuple to preserve immutability return tuple(self.iterfind(patterns, flavor=flavor)) @@ -159,7 +162,7 @@ def predicate(line: Line) -> bool: def find_blocks( self, pattern: str | BlockPattern, /, *, flavor: Flavor | None = None - ) -> Sequence[Block]: + ) -> tuple[Block, ...]: """Same as :meth:`iterfind_blocks` but returns a sequence of blocks.""" return tuple(self.iterfind_blocks(pattern, flavor=flavor)) @@ -352,3 +355,9 @@ def __pformat_patterns(self, typ: _RegionType, patterns: Iterable[PatternLike]) def __compile(self, patterns: Iterable[PatternLike], flavor: Flavor | None) -> Patterns: flavor = self.flavor if flavor is None else flavor return _engine.compile(patterns, flavor=flavor) + + def __get_clean_lines(self) -> tuple[str, ...]: + # use a complete set of options so that the default + # that were chosen by cleaner.clean() are ignored + options = cast(Options, self.complete_options) + return tuple(cleaner.clean(self.content, **options)) diff --git a/sphinx/testing/matcher/buffer.py b/sphinx/testing/matcher/buffer.py index a58227df36c..e6c6341f2e5 100644 --- a/sphinx/testing/matcher/buffer.py +++ b/sphinx/testing/matcher/buffer.py @@ -10,7 +10,7 @@ import re import sys from collections.abc import Sequence -from typing import TYPE_CHECKING, Generic, TypeVar, final, overload +from typing import TYPE_CHECKING, Generic, TypeVar, overload from sphinx.testing.matcher._util import consume as _consume @@ -164,7 +164,6 @@ def __bool__(self) -> bool: """Indicate whether this region is empty or not.""" return bool(self.buffer) - @final def __iter__(self) -> Iterator[str]: """An iterator over the string items.""" return iter(self.buffer) @@ -234,14 +233,15 @@ def count(self, sub: SubStringLike, /) -> int: :raise TypeError: *sub* is not a string or a compiled pattern. """ if isinstance(sub, re.Pattern): - # avoid using value.findall() since we only want the length - # of the corresponding iterator (the following lines are more - # efficient from a memory perspective) + # avoid using sub.findall() since we only want the length + # of the corresponding iterator (the following lines are + # more efficient from a memory perspective) counter = itertools.count() _consume(zip(sub.finditer(self.buffer), counter)) return next(counter) - return self.buffer.count(sub) # raise a TypeError if *sub* is not a string + # buffer.count() raises a TypeError if *sub* is not a string + return self.buffer.count(sub) # explicitly add the method since its signature differs from :meth:`Region.index` def index(self, sub: SubStringLike, start: int = 0, stop: int = sys.maxsize, /) -> int: @@ -268,7 +268,8 @@ def find(self, sub: SubStringLike, start: int = 0, stop: int = sys.maxsize, /) - return match.start() + start_index return -1 - return self.buffer.find(sub, start, stop) # raise a TypeError if *sub* is not a string + # buffer.find() raises a TypeError if *sub* is not a string + return self.buffer.find(sub, start, stop) def startswith(self, prefix: str, start: int = 0, end: int = sys.maxsize, /) -> bool: """Test whether the line starts with the given *prefix*. @@ -340,8 +341,8 @@ class Block(Region[tuple[str, ...]]): - *block_lines* is a sequence of line-like objects, and - *block_offset* is an integer (matched against :attr:`offset`). - Pairs ``(line, line_offset)`` or ``(block_lines, block_offset)`` can be - any non-string two-elements sequence (e.g., a tuple or a list), e.g:: + Pairs ``(line, line_offset)`` or ``(block_lines, block_offset)`` are + to be given as any two-elements sequences (tuple, list, deque, ...):: assert Block(['a', 'b', 'c', 'd'], 2) == [ 'a', @@ -350,14 +351,12 @@ class Block(Region[tuple[str, ...]]): Line('d', 5), ] - .. note:: + By convention, ``block[i]`` and ``block[i:j]`` return :class:`str` + and tuples of :class:`str` respectively. Consider using :meth:`at` + to convert the output to :class:`Line` or :class:`Block` objects. - By convention, ``block[i]`` and ``block[i:j]`` return :class:`str` - and tuples of :class:`str` respectively. Consider using :meth:`at` - to convert the output to :class:`Line` or :class:`Block` objects. - - Similarly, ``iter(block)`` returns an iterator on strings. Consider - using :meth:`lines_iterator` to iterate over :class:`Line` objects. + Similarly, ``iter(block)`` returns an iterator on strings. Consider + using :meth:`lines_iterator` to iterate over :class:`Line` objects. """ __slots__ = ('__cached_lines',) @@ -380,7 +379,7 @@ def __init__( """This block as a tuple of :class:`Line` objects. The rationale behind duplicating the buffer's data is to ease - comparison by relying on the C API for comparing lists which + comparison by relying on the C API for comparing tuples which dispatches to the :class:`Line` comparison operators. """ @@ -469,8 +468,8 @@ def at(self, index: int | slice, /) -> Line | Block: # NoQA: E301 @overload def __getitem__(self, index: int, /) -> str: ... # NoQA: E704 @overload - def __getitem__(self, index: slice, /) -> Sequence[str]: ... # NoQA: E704 - def __getitem__(self, index: int | slice, /) -> str | Sequence[str]: # NoQA: E301 + def __getitem__(self, index: slice, /) -> tuple[str, ...]: ... # NoQA: E704 + def __getitem__(self, index: int | slice, /) -> str | tuple[str, ...]: # NoQA: E301 """Get a line or a contiguous sub-block.""" if isinstance(index, slice): # normalize negative and None slice fields @@ -536,11 +535,12 @@ def __gt__(self, other: object, /) -> bool: def _parse_non_string(other: object, /) -> tuple[str, int] | None: - """Try to parse *other* as a ``line`` or a ``(line, offset)`` pair. + """Try to parse *other* as a ``(line_content, line_offset)`` pair. - For efficiency, do *not* call this method on :class:`str` instances - since they will be handled separately more efficiently. + Do **NOT** call this method on :class:`str` instances since they are + handled separately and more efficiently by :class:`Line`'s operators. """ + assert not isinstance(other, str) if isinstance(other, Line): return other.buffer, other.offset if isinstance(other, Sequence) and len(other) == 2: @@ -550,7 +550,7 @@ def _parse_non_string(other: object, /) -> tuple[str, int] | None: return None -def _is_block_line_compatible(other: object, /) -> bool: +def _is_block_line_like(other: object, /) -> bool: if isinstance(other, (str, Line)): return True @@ -565,13 +565,14 @@ def _is_block_line_compatible(other: object, /) -> bool: def _parse_non_block(other: object, /) -> tuple[tuple[object, ...], int] | None: """Try to parse *other* as a pair ``(block lines, block offset)``. - For efficiency, do *not* call this method on :class:`Block` instances - since they will be handled separately more efficiently. + Do **NOT** call this method on :class:`Block` instances since they are + handled separately and more efficiently by :class:`Block`'s operators. """ + assert not isinstance(other, Block) if not isinstance(other, Sequence): return None - if all(map(_is_block_line_compatible, other)): + if all(map(_is_block_line_like, other)): # offset will never be given in this scenario return tuple(other), -1 @@ -584,7 +585,7 @@ def _parse_non_block(other: object, /) -> tuple[tuple[object, ...], int] | None: # do not allow [line, offset] with single string 'line' return None - if not all(map(_is_block_line_compatible, lines)): + if not all(map(_is_block_line_like, lines)): return None return tuple(lines), offset diff --git a/sphinx/testing/matcher/cleaner.py b/sphinx/testing/matcher/cleaner.py index f8da4ed3d03..c62a42ffde2 100644 --- a/sphinx/testing/matcher/cleaner.py +++ b/sphinx/testing/matcher/cleaner.py @@ -18,15 +18,19 @@ from typing_extensions import Unpack - from sphinx.testing.matcher.options import DeletePattern, Options, StripChars + from sphinx.testing.matcher._util import Patterns + from sphinx.testing.matcher.options import Options, PrunePattern, StripChars + + Trace = list[list[tuple[str, list[str]]]] def clean(text: str, /, **options: Unpack[Options]) -> Iterable[str]: """Clean a text, returning an iterable of lines. :param text: The text to clean. - :param options: The cleaning options. - :return: The list of cleaned lines. + :return: An iterable of cleaned lines. + + See :class:`~.options.Options` for the meaning of each supported option. """ config = OptionsHolder(**options) @@ -49,14 +53,20 @@ def clean(text: str, /, **options: Unpack[Options]) -> Iterable[str]: def strip_chars(text: str, chars: StripChars = True, /) -> str: - """Strip expected characters from *text*.""" + """Return a copy of *text* with leading and trailing characters removed. + + See :attr:`~.options.Options.strip` for the meaning of *chars*. + """ if isinstance(chars, bool): return text.strip() if chars else text return text.strip(chars) def strip_lines(lines: Iterable[str], chars: StripChars = True, /) -> Iterable[str]: - """Call :meth:`str.strip` to each line in *lines*.""" + """Same as :func:`strip_chars` but applied to each line in *lines*. + + See :attr:`~.options.Options.stripline` for the meaning of *chars*. + """ if isinstance(chars, bool): return map(str.strip, lines) if chars else lines return (line.strip(chars) for line in lines) @@ -73,17 +83,26 @@ def filter_lines( """Filter the lines. :param lines: The lines to filter. - :param keep_empty: If true, keep empty lines. - :param unique: If true, remove duplicated lines. + :param keep_empty: If true, keep empty lines in the output. :param compress: If true, remove consecutive duplicated lines. + :param unique: If true, remove duplicated lines. :return: An iterable of filtered lines. Since removing empty lines first allows serial duplicates to be eliminated in the same iteration, duplicates elimination is performed *after* empty - lines are removed. To change the behaviour, consider using:: + lines are removed. Consider comparing:: + + >>> lines = ['a', '', 'a', '', 'a'] + >>> list(filter_lines(lines, keep_empty=False, compress=True)) + ['a'] + + together with:: - lines = filter_lines(lines, compress=True) - lines = filter_lines(lines, empty=True) + >>> lines = ['a', '', 'a', '', 'a'] + >>> filtered = filter_lines(lines, compress=True) + >>> filtered = filter_lines(filtered, keep_empty=False) + >>> list(filtered) + ['a', 'a', 'a'] """ if not keep_empty: lines = filter(None, lines) @@ -99,63 +118,76 @@ def filter_lines( def prune_lines( - lines: Iterable[str], - patterns: DeletePattern, - /, - *, - trace: list[list[tuple[str, list[str]]]] | None = None, + lines: Iterable[str], patterns: PrunePattern, /, *, trace: Trace | None = None ) -> Iterable[str]: - r"""Remove substrings from a source satisfying some patterns. + r"""Eliminate substrings in each line. :param lines: The source to transform. :param patterns: One or more substring patterns to delete. :param trace: A buffer where intermediate results are stored. :return: An iterable of transformed lines. - Usage:: + Example:: - lines = prune_lines(['1111a', 'b1'], r'^\d+') - assert list(lines) == ['a', 'b1'] + >>> lines = prune_lines(['1111a', 'b1'], r'^\d+') + >>> list(lines) + ['a', 'b1'] - When specified, the *trace* contains the line's reduction chains, e.g., if - the line is ``'ABC#123'`` amd ``patterns = (r'^[A-Z]', r'\d$')``, then the - corresponding reduction chain is:: + When specified, the *trace* contains the line's reduction chains, e.g.:: - [ - ('ABC#123', ['BC#123', 'BC#12']), - ('BC#12', ['C#12', 'C#1']), - ('C#1', ['#1', '#']), - ] - - In the above example, the final value is given by ``'#'`` which can also - be accessed by ``trace[i][-1][-1][-1]``. + >>> trace = [] + >>> list(prune_lines(['ABC#123'], [r'^[A-Z]', r'\d$'], trace=trace)) + ['#'] + >>> trace # doctest: +NORMALIZE_WHITESPACE + [[('ABC#123', ['BC#123', 'BC#12']), + ('BC#12', ['C#12', 'C#1']), + ('C#1', ['#1', '#'])]] """ patterns = _engine.to_line_patterns(patterns) compiled = _engine.compile(patterns, flavor='re') + if trace is None: + return _prune(lines, compiled) + return _prune_debug(lines, compiled, trace) - def prune_redux(line: str, pattern: re.Pattern[str]) -> str: - return pattern.sub('', line) - def prune_debug(line: str, accumulator: list[str]) -> str: - values = itertools.accumulate(compiled, prune_redux, initial=line) - accumulator.extend(itertools.islice(values, 1, None)) # skip initial value - return accumulator[-1] # a reduced value +def _prune_pattern(line: str, pattern: re.Pattern[str]) -> str: + return pattern.sub('', line) - if trace is None: - for line in lines: - ret = reduce(prune_redux, compiled, line) - while line != ret: - line, ret = ret, reduce(prune_redux, compiled, ret) - yield ret - else: - for line in lines: - entry: list[tuple[str, list[str]]] = [(line, [])] - ret = None - ret = prune_debug(line, entry[-1][1]) - while line != ret: - frame: tuple[str, list[str]] = (ret, []) - line, ret = ret, prune_debug(ret, frame[1]) - if ret != line: - entry.append(frame) - trace.append(entry) - yield ret + +def _prune(lines: Iterable[str], compiled: Patterns) -> Iterable[str]: + def apply(line: str) -> str: + return reduce(_prune_pattern, compiled, line) + + def prune(line: str) -> str: + text = apply(line) + while text != line: + line, text = text, apply(text) + return text + + return map(prune, lines) + + +def _prune_debug(lines: Iterable[str], compiled: Patterns, trace: Trace) -> Iterable[str]: + def apply(line: str) -> tuple[str, list[str]]: + values = itertools.accumulate(compiled, _prune_pattern, initial=line) + states = list(itertools.islice(values, 1, None)) # skip initial value + return states[-1], states + + def prune(line: str) -> str: + text, states = apply(line) + # first reduction is always logged + trace_item: list[tuple[str, list[str]]] = [(line, states)] + + while text != line: + line, (text, states) = text, apply(text) + trace_item.append((line, states)) + + if len(trace_item) >= 2: + # the while-loop was executed at least once and + # the last appended item represents the identity + trace_item.pop() + + trace.append(trace_item) + return text + + return map(prune, lines) diff --git a/sphinx/testing/matcher/options.py b/sphinx/testing/matcher/options.py index fea903b368e..a4b357dc3fd 100644 --- a/sphinx/testing/matcher/options.py +++ b/sphinx/testing/matcher/options.py @@ -10,7 +10,7 @@ if TYPE_CHECKING: from collections.abc import Generator, Mapping, Sequence - from typing import ClassVar, Literal, TypeVar, Union + from typing import Any, ClassVar, Literal, TypeVar, Union from typing_extensions import Unpack @@ -22,9 +22,9 @@ StripChars = Union[bool, str, None] """Allowed values for :attr:`Options.strip` and :attr:`Options.stripline`.""" - DeleteOption = Literal['delete'] - DeletePattern = Union[PatternLike, Sequence[PatternLike]] - """One or more patterns to delete.""" + PruneOption = Literal['delete'] + PrunePattern = Union[PatternLike, Sequence[PatternLike]] + """One or more patterns to prune.""" IgnoreOption = Literal['ignore'] @@ -34,8 +34,8 @@ # For some reason, mypy does not like Union of Literal, # so we wrap the Literal types inside a bigger Literal. - OptionValue = Union[bool, StripChars, DeletePattern, Union[LinePredicate, None], Flavor] - OptionName = Literal[FlagOption, StripOption, DeleteOption, IgnoreOption, FlavorOption] + OptionValue = Union[bool, StripChars, PrunePattern, Union[LinePredicate, None], Flavor] + OptionName = Literal[FlagOption, StripOption, PruneOption, IgnoreOption, FlavorOption] DT = TypeVar('DT') @@ -112,8 +112,8 @@ class Options(TypedDict, total=False): after empty and duplicated consecutive lines might have been eliminated. """ - delete: DeletePattern - r"""Regular expressions for substrings to delete from the output lines. + delete: PrunePattern + r"""Regular expressions for substrings to prune from the output lines. The output lines are pruned from their matching substrings (checked using :func:`re.match`) until the output lines are stabilized. @@ -160,14 +160,29 @@ class CompleteOptions(TypedDict): compress: bool unique: bool - delete: DeletePattern + delete: PrunePattern ignore: LinePredicate | None flavor: Flavor class OptionsHolder: - """Mixin supporting a known set of options.""" + """Mixin supporting a known set of options. + + An :class:`OptionsHolder` object stores a set of partial options, + overriding the default values specified by :attr:`default_options`. + + At runtime, only the options given at construction time, explicitly + set via :meth:`set_option` or the corresponding property are stored + by this object. + + As such, :attr:`options` and :attr:`complete_options` return a proxy + on :class:`Options` and :class:`CompleteOptions` respectively, e.g.:: + + obj = OptionsHolder(strip=True) + assert obj.options == {'strip': True} + assert obj.complete_options == dict(obj.default_options, strip=True) + """ __slots__ = ('__options',) @@ -183,31 +198,44 @@ class OptionsHolder: ignore=None, flavor='none', ) - """The default options to use when an option is not specified. + """The supported options specifications and their default values. Subclasses should override this field for different default options. """ def __init__(self, /, **options: Unpack[Options]) -> None: + """Construct an :class:`OptionsHolder` object.""" self.__options = options @property - def options(self) -> Mapping[str, object]: # cannot use CompleteOptions :( - """A read-only view on the current mapping of options.""" + def options(self) -> Mapping[str, object]: + """A read-only view of the *current* mapping of options. + + It can be regarded as a proxy on a :class:`Options` dictionary. + """ return MappingProxyType(self.__options) + @property + def complete_options(self) -> Mapping[str, object]: + """A read-only view of the *complete* mapping of options. + + It can be regarded as a proxy on a :class:`CompleteOptions` dictionary. + """ + return MappingProxyType(self.default_options | self.__options) + @contextlib.contextmanager - def use(self, /, **options: Unpack[Options]) -> Generator[None, None, None]: + def set_options(self, /, **options: Unpack[Options]) -> Generator[None, None, None]: """Temporarily replace the set of options with *options*.""" - local_options = self.default_options | options - with self.override(**local_options): - yield + return self.__set_options(options) @contextlib.contextmanager def override(self, /, **options: Unpack[Options]) -> Generator[None, None, None]: """Temporarily extend the set of options with *options*.""" + return self.__set_options(self.__options | options) + + def __set_options(self, options: Options) -> Generator[None, None, None]: saved_options = self.__options.copy() - self.__options |= options + self.__options = options try: yield finally: @@ -230,13 +258,13 @@ def get_option(self, name: StripOption, /) -> StripChars: ... # NoQA: E704 def get_option(self, name: StripOption, default: StripChars, /) -> StripChars: ... # NoQA: E704 @overload def get_option(self, name: StripOption, default: DT, /) -> StripChars | DT: ... # NoQA: E704 - # delete prefix/suffix option + # pruning option @overload - def get_option(self, name: DeleteOption, /) -> DeletePattern: ... # NoQA: E704 + def get_option(self, name: PruneOption, /) -> PrunePattern: ... # NoQA: E704 @overload - def get_option(self, name: DeleteOption, default: DeletePattern, /) -> DeletePattern: ... # NoQA: E704 + def get_option(self, name: PruneOption, default: PrunePattern, /) -> PrunePattern: ... # NoQA: E704 @overload - def get_option(self, name: DeleteOption, default: DT, /) -> DeletePattern | DT: ... # NoQA: E704 + def get_option(self, name: PruneOption, default: DT, /) -> PrunePattern | DT: ... # NoQA: E704 # filtering options @overload def get_option(self, name: IgnoreOption, /) -> LinePredicate | None: ... # NoQA: E704 @@ -256,7 +284,14 @@ def get_option(self, name: FlavorOption, default: Flavor, /) -> Flavor: ... # N @overload def get_option(self, name: FlavorOption, default: DT, /) -> Flavor | DT: ... # NoQA: E704 def get_option(self, name: OptionName, /, *default: object) -> object: # NoQA: E301 - """Get a known option value, or a default value.""" + """Get an option value, or a default value. + + :param name: An option name specified in :attr:`default_options`. + :return: An option value. + + When *default* is specified and *name* is not explicitly set, it is + returned instead of the default specified in :attr:`default_options`. + """ if name in self.__options: return self.__options[name] return default[0] if default else self.default_options[name] @@ -266,13 +301,18 @@ def set_option(self, name: FlagOption, value: bool, /) -> None: ... # NoQA: E70 @overload def set_option(self, name: StripOption, value: StripChars, /) -> None: ... # NoQA: E704 @overload - def set_option(self, name: DeleteOption, value: DeletePattern, /) -> None: ... # NoQA: E704 + def set_option(self, name: PruneOption, value: PrunePattern, /) -> None: ... # NoQA: E704 @overload def set_option(self, name: IgnoreOption, value: LinePredicate | None, /) -> None: ... # NoQA: E704 @overload def set_option(self, name: FlavorOption, value: Flavor, /) -> None: ... # NoQA: E704 - def set_option(self, name: OptionName, value: OptionValue, /) -> None: # NoQA: E301 - """Set a persistent option value.""" + def set_option(self, name: OptionName, value: Any, /) -> None: # NoQA: E301 + """Set a persistent option value. + + The *name* should be an option for which a default value is specified + in :attr:`default_options`, but this is not enforced at runtime; thus, + the consistency of this object's state is left to the user. + """ self.__options[name] = value @property @@ -339,12 +379,12 @@ def unique(self, value: bool) -> None: self.set_option('unique', value) @property - def delete(self) -> DeletePattern: + def delete(self) -> PrunePattern: """See :attr:`Options.delete`.""" return self.get_option('delete') @delete.setter - def delete(self, value: DeletePattern) -> None: + def delete(self, value: PrunePattern) -> None: self.set_option('delete', value) @property diff --git a/tests/test_testing/test_matcher_cleaner.py b/tests/test_testing/test_matcher_cleaner.py index d3c9c1d3ad6..09322ddd378 100644 --- a/tests/test_testing/test_matcher_cleaner.py +++ b/tests/test_testing/test_matcher_cleaner.py @@ -5,37 +5,43 @@ import pytest -from sphinx.testing.matcher.cleaner import filter_lines, prune_lines, strip_chars, strip_lines +from sphinx.testing.matcher import cleaner if TYPE_CHECKING: from collections.abc import Sequence from sphinx.testing.matcher._util import PatternLike + from sphinx.testing.matcher.cleaner import Trace def test_strip_chars(): - assert strip_chars('abaaa\n') == 'abaaa' - assert strip_chars('abaaa\n', False) == 'abaaa\n' - assert strip_chars('abaaa', 'a') == 'b' - assert strip_chars('abaaa', 'ab') == '' + assert cleaner.strip_chars('abaaa\n') == 'abaaa' + assert cleaner.strip_chars('abaaa\n', False) == 'abaaa\n' + assert cleaner.strip_chars('abaaa', 'a') == 'b' + assert cleaner.strip_chars('abaaa', 'ab') == '' def test_strip_lines(): - assert list(strip_lines(['aba\n', 'aba\n'])) == ['aba', 'aba'] - assert list(strip_lines(['aba\n', 'aba\n'], False)) == ['aba\n', 'aba\n'] - assert list(strip_lines(['aba', 'aba'], 'a')) == ['b', 'b'] - assert list(strip_lines(['aba', 'aba'], 'ab')) == ['', ''] + assert list(cleaner.strip_lines(['aba\n', 'aba\n'])) == ['aba', 'aba'] + assert list(cleaner.strip_lines(['aba\n', 'aba\n'], False)) == ['aba\n', 'aba\n'] + assert list(cleaner.strip_lines(['aba', 'aba'], 'a')) == ['b', 'b'] + assert list(cleaner.strip_lines(['aba', 'aba'], 'ab')) == ['', ''] def test_filter_lines(): src = ['a', 'a', '', 'a', 'b', 'c', 'a'] - assert list(filter_lines(src, keep_empty=False, compress=True)) == ['a', 'b', 'c', 'a'] - assert list(filter_lines(src, keep_empty=False, unique=True)) == ['a', 'b', 'c'] + + expect = ['a', 'b', 'c', 'a'] + assert list(cleaner.filter_lines(src, keep_empty=False, compress=True)) == expect + + expect = ['a', 'b', 'c'] + assert list(cleaner.filter_lines(src, keep_empty=False, unique=True)) == expect expect = ['a', '', 'a', 'b', 'c', 'a'] - assert list(filter_lines(src, keep_empty=True, compress=True)) == expect + assert list(cleaner.filter_lines(src, keep_empty=True, compress=True)) == expect - assert list(filter_lines(src, keep_empty=True, unique=True)) == ['a', '', 'b', 'c'] + expect = ['a', '', 'b', 'c'] + assert list(cleaner.filter_lines(src, keep_empty=True, unique=True)) == expect @pytest.mark.parametrize( @@ -101,9 +107,9 @@ def test_prune_lines( lines: Sequence[str], patterns: PatternLike | Sequence[PatternLike], expect: Sequence[str], - trace: list[list[tuple[str, list[str]]]], + trace: Trace, ) -> None: - actual_trace: list[list[tuple[str, list[str]]]] = [] - actual = prune_lines(lines, patterns, trace=actual_trace) + actual_trace: Trace = [] + actual = cleaner.prune_lines(lines, patterns, trace=actual_trace) assert list(actual) == list(expect) assert actual_trace == list(trace) diff --git a/tests/test_testing/test_matcher_options.py b/tests/test_testing/test_matcher_options.py index d793000aa70..5bbfb48d55e 100644 --- a/tests/test_testing/test_matcher_options.py +++ b/tests/test_testing/test_matcher_options.py @@ -23,13 +23,6 @@ def test_options_class(): foreign_keys = CompleteOptions.__annotations__.keys() - Options.__annotations__ assert not foreign_keys, f'unknown option(s): {", ".join(foreign_keys)}' - for name in Options.__annotations__: - func = OptionsHolder.__dict__.get(name) - assert isinstance(func, property), f'missing property for option {name!r}' - assert func.fget is not None, f'missing getter for option {name!r}' - assert func.fset is not None, f'missing setter for option {name!r}' - assert func.fdel is None, f'extra deleter for option {name!r}' - def test_default_options(): """Check the synchronization of default options and classes in Sphinx.""" @@ -62,15 +55,22 @@ def check(option: OptionName, default: object) -> None: assert sorted(processed) == sorted(Options.__annotations__) -def test_get_option(): +def test_options_holder(): + obj = OptionsHolder() + assert isinstance(obj.options, MappingProxyType) + assert isinstance(obj.complete_options, MappingProxyType) + + obj = OptionsHolder() + assert 'keep_break' not in obj.options + assert 'keep_break' in obj.complete_options + + +def test_get_options(): class Config(OptionsHolder): default_options: ClassVar[CompleteOptions] = OptionsHolder.default_options.copy() default_options['keep_break'] = True obj = Config() - assert isinstance(obj.options, MappingProxyType) - - assert 'keep_break' not in obj.options assert obj.keep_break is True assert obj.get_option('keep_break') is True assert obj.get_option('keep_break', False) is False @@ -93,12 +93,33 @@ def test_set_option(): assert obj.get_option('delete', 'unused') == 'abc' -@pytest.mark.parametrize('option', list(Options.__annotations__)) -def test_set_option_property_implementation(option: OptionName) -> None: +@pytest.mark.parametrize('option_name', list(Options.__annotations__)) +def test_property_implementation(option_name: OptionName) -> None: """Test that the implementation is correct and do not have typos.""" - obj, val = OptionsHolder(), object() # fresh sentinel for every option + obj = OptionsHolder() + + descriptor = obj.__class__.__dict__.get(option_name) + assert isinstance(descriptor, property) + + # make sure that the docstring is correct + assert descriptor.__doc__ == f'See :attr:`Options.{option_name}`.' + + assert descriptor.fget is not None + assert descriptor.fget.__doc__ == descriptor.__doc__ + assert descriptor.fget.__name__ == option_name + + assert descriptor.fset is not None + assert descriptor.fset.__doc__ in (None, '') + assert descriptor.fset.__name__ == option_name + + assert descriptor.fdel is None # no deleter + # assert that the default value being returned is the correct one - assert obj.__class__.__dict__[option].fget(obj) is OptionsHolder.default_options[option] - obj.__class__.__dict__[option].fset(obj, val) - assert obj.get_option(option) is val - assert obj.__class__.__dict__[option].fget(obj) is val + default_value = obj.__class__.default_options[option_name] + assert descriptor.fget(obj) is default_value + assert obj.get_option(option_name) is default_value + + # assert that the setter is correctly implemented + descriptor.fset(obj, val := object()) + assert descriptor.fget(obj) is val + assert obj.get_option(option_name) is val From 0eb66393bcea54444c6973f8ee3f5455b1f9e7e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 8 Apr 2024 11:20:58 +0200 Subject: [PATCH 51/66] cleanup --- sphinx/testing/matcher/cleaner.py | 6 +++--- tests/test_testing/test_matcher_cleaner.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sphinx/testing/matcher/cleaner.py b/sphinx/testing/matcher/cleaner.py index c62a42ffde2..3adef63673c 100644 --- a/sphinx/testing/matcher/cleaner.py +++ b/sphinx/testing/matcher/cleaner.py @@ -21,7 +21,7 @@ from sphinx.testing.matcher._util import Patterns from sphinx.testing.matcher.options import Options, PrunePattern, StripChars - Trace = list[list[tuple[str, list[str]]]] + TraceInfo = list[list[tuple[str, list[str]]]] def clean(text: str, /, **options: Unpack[Options]) -> Iterable[str]: @@ -118,7 +118,7 @@ def filter_lines( def prune_lines( - lines: Iterable[str], patterns: PrunePattern, /, *, trace: Trace | None = None + lines: Iterable[str], patterns: PrunePattern, /, *, trace: TraceInfo | None = None ) -> Iterable[str]: r"""Eliminate substrings in each line. @@ -167,7 +167,7 @@ def prune(line: str) -> str: return map(prune, lines) -def _prune_debug(lines: Iterable[str], compiled: Patterns, trace: Trace) -> Iterable[str]: +def _prune_debug(lines: Iterable[str], compiled: Patterns, trace: TraceInfo) -> Iterable[str]: def apply(line: str) -> tuple[str, list[str]]: values = itertools.accumulate(compiled, _prune_pattern, initial=line) states = list(itertools.islice(values, 1, None)) # skip initial value diff --git a/tests/test_testing/test_matcher_cleaner.py b/tests/test_testing/test_matcher_cleaner.py index 09322ddd378..1115717d791 100644 --- a/tests/test_testing/test_matcher_cleaner.py +++ b/tests/test_testing/test_matcher_cleaner.py @@ -11,7 +11,7 @@ from collections.abc import Sequence from sphinx.testing.matcher._util import PatternLike - from sphinx.testing.matcher.cleaner import Trace + from sphinx.testing.matcher.cleaner import TraceInfo def test_strip_chars(): @@ -107,9 +107,9 @@ def test_prune_lines( lines: Sequence[str], patterns: PatternLike | Sequence[PatternLike], expect: Sequence[str], - trace: Trace, + trace: TraceInfo, ) -> None: - actual_trace: Trace = [] + actual_trace: TraceInfo = [] actual = cleaner.prune_lines(lines, patterns, trace=actual_trace) assert list(actual) == list(expect) assert actual_trace == list(trace) From ec2dd0f73cb4497d6e1af2a88a4749d5007d6c23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 8 Apr 2024 17:07:44 +0200 Subject: [PATCH 52/66] cleanup --- doc/conf.py | 27 ++- doc/development/index.rst | 1 + doc/development/testing/index.rst | 15 ++ doc/development/testing/matcher.rst | 23 ++ doc/development/testing/plugin.rst | 19 ++ sphinx/testing/matcher/_codes.py | 66 ++++++ sphinx/testing/matcher/_util.py | 39 ++-- sphinx/testing/matcher/buffer.py | 90 ++++---- sphinx/testing/matcher/cleaner.py | 88 +++---- sphinx/testing/matcher/options.py | 253 +++++++++++++-------- tests/test_testing/test_matcher_cleaner.py | 23 +- tests/test_testing/test_matcher_options.py | 29 +-- 12 files changed, 428 insertions(+), 245 deletions(-) create mode 100644 doc/development/testing/index.rst create mode 100644 doc/development/testing/matcher.rst create mode 100644 doc/development/testing/plugin.rst create mode 100644 sphinx/testing/matcher/_codes.py diff --git a/doc/conf.py b/doc/conf.py index b1b5d873a5e..b42c43848f8 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,11 +1,16 @@ # Sphinx documentation build configuration file +from __future__ import annotations import os import re import time +from typing import TYPE_CHECKING import sphinx +if TYPE_CHECKING: + from sphinx.application import Sphinx + os.environ['SPHINX_AUTODOC_RELOAD_MODULES'] = '1' extensions = [ @@ -159,7 +164,7 @@ ] intersphinx_mapping = { - 'python': ('https://docs.python.org/3/', None), + 'python': ('https://docs.python.org/3/', ('_static/python.inv', None)), 'requests': ('https://requests.readthedocs.io/en/latest/', None), 'readthedocs': ('https://docs.readthedocs.io/en/stable', None), } @@ -208,6 +213,13 @@ ('py:class', 'sphinx.theming.Theme'), ('py:class', 'sphinxcontrib.websupport.errors.DocumentNotFoundError'), ('py:class', 'sphinxcontrib.websupport.errors.UserNotAuthorizedError'), + # stdlib + ('py:class', '_io.StringIO'), + ('py:class', 'typing_extensions.Self'), + ('py:class', 'typing_extensions.Unpack'), + # type variables + ('py:class', 'sphinx.testing.matcher.buffer.T'), + ('py:class', 'sphinx.testing.matcher.options.DT'), ('py:exc', 'docutils.nodes.SkipNode'), ('py:exc', 'sphinx.environment.NoUri'), ('py:func', 'setup'), @@ -274,9 +286,10 @@ def linkify(match): source[0] = source[0].replace('.. include:: ../CHANGES.rst', linkified_changelog) -def setup(app): +def setup(app: Sphinx) -> None: from sphinx.ext.autodoc import cut_lines from sphinx.util.docfields import GroupedField + from sphinx.roles import code_role app.connect('autodoc-process-docstring', cut_lines(4, what=['module'])) app.connect('source-read', linkify_issues_in_changelog) @@ -290,3 +303,13 @@ def setup(app): app.add_object_type( 'event', 'event', 'pair: %s; event', parse_event, doc_field_types=[fdesc] ) + + def pycode_role(name, rawtext, text, lineno, inliner, options=None, content=()): + options = (options or {}) | {'language': 'python'} + return code_role(name, rawtext, text, lineno, inliner, options, content) + + def pyrepr_role(name, rawtext, text, lineno, inliner, options=None, content=()): + return pycode_role(name, rawtext, repr(text), lineno, inliner, options, content) + + app.add_role('py3', pycode_role) + app.add_role('py3repr', pyrepr_role) diff --git a/doc/development/index.rst b/doc/development/index.rst index 55a31a0c134..25ead1e6945 100644 --- a/doc/development/index.rst +++ b/doc/development/index.rst @@ -15,6 +15,7 @@ the extension interface see :doc:`/extdev/index`. overview tutorials/index builders + testing/index .. toctree:: :caption: Theming diff --git a/doc/development/testing/index.rst b/doc/development/testing/index.rst new file mode 100644 index 00000000000..9e416e16827 --- /dev/null +++ b/doc/development/testing/index.rst @@ -0,0 +1,15 @@ +======= +Testing +======= + +The :mod:`!sphinx.testing` module provides utility classes, functions, fixtures +and markers for testing with `pytest`_. Refer to the following sections to get +started with testing integration. + +.. toctree:: + :maxdepth: 1 + + plugin + matcher + +.. _pytest: https://docs.pytest.org/en/latest/ diff --git a/doc/development/testing/matcher.rst b/doc/development/testing/matcher.rst new file mode 100644 index 00000000000..8959ed59f28 --- /dev/null +++ b/doc/development/testing/matcher.rst @@ -0,0 +1,23 @@ +Testing the Sphinx output +========================= + +.. automodule:: sphinx.testing.matcher + :members: + :member-order: bysource + +.. automodule:: sphinx.testing.matcher.options + :members: + :member-order: bysource + +.. automodule:: sphinx.testing.matcher.buffer + :members: + :member-order: bysource + +Utility functions +----------------- + +.. automodule:: sphinx.testing.matcher.cleaner + :members: + :member-order: bysource + :ignore-module-all: + diff --git a/doc/development/testing/plugin.rst b/doc/development/testing/plugin.rst new file mode 100644 index 00000000000..7d41a6a5125 --- /dev/null +++ b/doc/development/testing/plugin.rst @@ -0,0 +1,19 @@ +The Sphinx testing plugin +========================= + +The testing plugin can be enabled by adding following line in ``conftest.py``: + +.. code-block:: python + :caption: conftest.py + + pytest_plugins = ['sphinx.testing.fixtures'] + +This rest of the section is dedicated to documenting the testing features but +the reader is assumed to have some prior knowledge on `pytest`_. + +.. warning:: + + This topic is incomplete and some features are not yet documented. + +.. _pytest: https://docs.pytest.org/en/latest/ + diff --git a/sphinx/testing/matcher/_codes.py b/sphinx/testing/matcher/_codes.py new file mode 100644 index 00000000000..806cd3b7243 --- /dev/null +++ b/sphinx/testing/matcher/_codes.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +import functools +import itertools +from typing import TYPE_CHECKING, TypedDict, final + +from sphinx.testing.matcher._util import unique_everseen, unique_justseen + +if TYPE_CHECKING: + from collections.abc import Callable, Iterable + + from sphinx.testing.matcher.options import OpCode, OptionsHolder + + DispatcherFunc = Callable[[Iterable[str]], Iterable[str]] + + +@final +class DispatcherMap(TypedDict): + # Whenever a new operation code is supported, do not forget to + # update :func:`get_dispatcher_map` and :func.`get_active_opcodes`. + strip: DispatcherFunc + check: DispatcherFunc + compress: DispatcherFunc + unique: DispatcherFunc + prune: DispatcherFunc + filter: DispatcherFunc + + +def get_dispatcher_map( + options: OptionsHolder, + # here, we pass the functions so that we do not need to import them + strip_lines: DispatcherFunc, + prune_lines: DispatcherFunc, +) -> DispatcherMap: + return { + 'strip': strip_lines, + 'check': functools.partial(filter, None), + 'compress': unique_justseen, + 'unique': unique_everseen, + 'prune': prune_lines, + 'filter': functools.partial(itertools.filterfalse, options.ignore), + } + + +def get_active_opcodes(options: OptionsHolder) -> Iterable[OpCode]: + disable: set[OpCode] = set() + + if options.strip_line is False: + disable.add('strip') + + if options.keep_empty: + disable.add('check') + + if not options.compress: + disable.add('compress') + + if not options.unique: + disable.add('unique') + + if not isinstance(prune_patterns := options.prune, str) and not prune_patterns: + disable.add('prune') + + if not callable(options.ignore): + disable.add('filter') + + return itertools.filterfalse(disable.__contains__, options.ops) diff --git a/sphinx/testing/matcher/_util.py b/sphinx/testing/matcher/_util.py index f9092fc9c69..1f6d41e99a4 100644 --- a/sphinx/testing/matcher/_util.py +++ b/sphinx/testing/matcher/_util.py @@ -8,37 +8,38 @@ __all__ = () import itertools +import re import textwrap from collections import deque +from collections.abc import Callable, Sequence from operator import itemgetter -from typing import TYPE_CHECKING, overload +from typing import TYPE_CHECKING, Union, overload if TYPE_CHECKING: - import re - from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence - from typing import Any, TypeVar, Union + from collections.abc import Iterable, Iterator, Mapping + from typing import Any, TypeVar from typing_extensions import Never from sphinx.testing.matcher.buffer import Region - PatternLike = Union[str, re.Pattern[str]] - """A regular expression (compiled or not).""" - LinePattern = Union[str, re.Pattern[str]] - """A regular expression (compiled or not) for an entire line.""" - LinePredicate = Callable[[str], object] - """A predicate called on an entire line.""" - BlockPattern = Sequence[LinePattern] - """A sequence of regular expressions (compiled or not) for a block. - - For instance, ``['a', re.compile('b*')]`` matches blocks - with the line ``'a'`` followed by a line matching ``'b*'``. - """ + _T = TypeVar('_T') - Patterns = tuple[re.Pattern[str], ...] - """Sequence of compiled patterns to use.""" +PatternLike = Union[str, re.Pattern[str]] +"""A regular expression (compiled or not).""" +LinePattern = Union[str, re.Pattern[str]] +"""A regular expression (compiled or not) for an entire line.""" +LinePredicate = Callable[[str], object] +"""A predicate called on an entire line.""" +BlockPattern = Sequence[LinePattern] +"""A sequence of regular expressions (compiled or not) for a block. + +For instance, ``['a', re.compile('b*')]`` matches blocks +with the line ``'a'`` followed by a line matching ``'b*'``. +""" - _T = TypeVar('_T') +Patterns = tuple[re.Pattern[str], ...] +"""Sequence of compiled patterns to use.""" def consume(iterator: Iterator[object], /, n: int | None = None) -> None: diff --git a/sphinx/testing/matcher/buffer.py b/sphinx/testing/matcher/buffer.py index e6c6341f2e5..c4da57472c7 100644 --- a/sphinx/testing/matcher/buffer.py +++ b/sphinx/testing/matcher/buffer.py @@ -2,13 +2,12 @@ from __future__ import annotations -__all__ = ('Line', 'Block') +__all__ = ('Region', 'Line', 'Block') import abc import contextlib import itertools import re -import sys from collections.abc import Sequence from typing import TYPE_CHECKING, Generic, TypeVar, overload @@ -30,10 +29,10 @@ # We would like to have a covariant buffer type but Python does not # support higher-kinded type, so we can only use an invariant type. -_T = TypeVar('_T', bound=Sequence[str]) +T = TypeVar('T', bound=Sequence[str]) -class Region(Generic[_T], Sequence[str], abc.ABC): +class Region(Generic[T], Sequence[str], abc.ABC): """A string or a sequence of strings implementing rich comparison. Given an implicit *source* as a list of strings, a :class:`Region` is @@ -43,7 +42,7 @@ class Region(Generic[_T], Sequence[str], abc.ABC): # add __weakref__ to allow the object being weak-referencable __slots__ = ('__buffer', '__offset', '__weakref__') - def __init__(self, buffer: _T, /, offset: int = 0, *, _check: bool = True) -> None: + def __init__(self, buffer: T, /, offset: int = 0, *, _check: bool = True) -> None: """Construct a :class:`Region` object. :param buffer: The region's content (a string or a list of strings). @@ -67,7 +66,7 @@ def __init__(self, buffer: _T, /, offset: int = 0, *, _check: bool = True) -> No self.__offset = offset @property - def buffer(self) -> _T: + def buffer(self) -> T: """The internal (immutable) buffer.""" return self.__buffer @@ -84,18 +83,7 @@ def length(self) -> int: @property @abc.abstractmethod def span(self) -> slice: - """A slice representing this region in its source. - - Examples:: - - source = ['L1', 'L2', 'L3'] - line = Line('L2', 1) - assert source[line.span] == ['L2'] - - source = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'] - block = Block(['4', '5', '6'], 3) - assert source[block.span] == ['4', '5', '6'] - """ + """A slice representing this region in its source.""" def context(self, delta: int, limit: int) -> tuple[slice, slice]: """A slice object indicating a context around this region. @@ -130,12 +118,10 @@ def count(self, value: Any, /) -> int: """Count the number of occurences of matching item.""" # The 'value' is 'Any' so that subclasses do not violate Liskov's substitution principle - def index(self, value: Any, start: int = 0, stop: int = sys.maxsize, /) -> int: + def index(self, value: Any, start: int = 0, stop: int | None = None, /) -> int: """Return the lowest index of a matching item. :raise ValueError: The value does not exist. - - .. seealso:: :meth:`find` """ index = self.find(value, start, stop) if index == -1: @@ -144,11 +130,8 @@ def index(self, value: Any, start: int = 0, stop: int = sys.maxsize, /) -> int: @abc.abstractmethod # The 'value' is 'Any' so that subclasses do not violate Liskov's substitution principle. - def find(self, value: Any, start: int = 0, stop: int = sys.maxsize, /) -> int: - """Return the lowest index of a matching item or *-1* on failure. - - .. seealso:: :meth:`index` - """ + def find(self, value: Any, start: int = 0, stop: int | None = None, /) -> int: + """Return the lowest index of a matching item or *-1* on failure.""" def pformat(self) -> str: """A nice representation of this region.""" @@ -202,17 +185,14 @@ def __gt__(self, other: object, /) -> bool: class Line(Region[str]): - """A line found by :meth:`~sphinx.testing.matcher.LineMatcher.find`. - - A :class:`Line` can be compared to :class:`str`, :class:`Line` objects or - a pair (i.e., a two-length sequence) ``(line_content, line_offset)`` where + """A line found by :meth:`.LineMatcher.find`. - - *line_content* is a :class:`str`, and - - *line_offset* is an nonnegative integer. + A :class:`Line` can be compared to: - By convention, the comparison result (except for ``!=``) of :class:`Line` - objects with distinct :attr:`offset` is always ``False``. Use :class:`str` - objects instead if the offset is not relevant. + - a :class:`str`, in which case the :attr:`text <.buffer>` is compared, + - a pair ``(line_content, line_offset)`` where *line_content* is a string + and *line_offset* is an nonnegative integer, or another :class:`Line`, + in which case both the offset and the content must match. """ # NOTE(picnixz): this class could be extended to support arbitrary @@ -225,6 +205,14 @@ def __init__(self, line: str = '', /, offset: int = 0, *, _check: bool = True) - @property def span(self) -> slice: + """A slice representing this line in its source. + + Example:: + + source = ['L1', 'L2', 'L3'] + line = Line('L2', 1) + assert source[line.span] == ['L2'] + """ return slice(self.offset, self.offset + 1) def count(self, sub: SubStringLike, /) -> int: @@ -244,14 +232,14 @@ def count(self, sub: SubStringLike, /) -> int: return self.buffer.count(sub) # explicitly add the method since its signature differs from :meth:`Region.index` - def index(self, sub: SubStringLike, start: int = 0, stop: int = sys.maxsize, /) -> int: + def index(self, sub: SubStringLike, start: int = 0, stop: int | None = None, /) -> int: """Find the lowest index of a substring. :raise TypeError: *sub* is not a string or a compiled pattern. """ return super().index(sub, start, stop) - def find(self, sub: SubStringLike, start: int = 0, stop: int = sys.maxsize, /) -> int: + def find(self, sub: SubStringLike, start: int = 0, stop: int | None = None, /) -> int: """Find the lowest index of a substring or *-1* on failure. :raise TypeError: *sub* is not a string or a compiled pattern. @@ -271,7 +259,7 @@ def find(self, sub: SubStringLike, start: int = 0, stop: int = sys.maxsize, /) - # buffer.find() raises a TypeError if *sub* is not a string return self.buffer.find(sub, start, stop) - def startswith(self, prefix: str, start: int = 0, end: int = sys.maxsize, /) -> bool: + def startswith(self, prefix: str, start: int = 0, end: int | None = None, /) -> bool: """Test whether the line starts with the given *prefix*. :param prefix: A line prefix to test. @@ -280,7 +268,7 @@ def startswith(self, prefix: str, start: int = 0, end: int = sys.maxsize, /) -> """ return self.buffer.startswith(prefix, start, end) - def endswith(self, suffix: str, start: int = 0, end: int = sys.maxsize, /) -> bool: + def endswith(self, suffix: str, start: int = 0, end: int | None = None, /) -> bool: """Test whether the line ends with the given *suffix*. :param suffix: A line suffix to test. @@ -331,7 +319,7 @@ def __gt__(self, other: object, /) -> bool: class Block(Region[tuple[str, ...]]): - """Block found by :meth:`~sphinx.testing.matcher.LineMatcher.find_blocks`. + """Block found by :meth:`.LineMatcher.find_blocks`. A block is a *sequence* of lines comparable to :class:`Line` objects, usually given as :class:`str` objects or ``(line, line_offset)`` pairs. @@ -339,10 +327,10 @@ class Block(Region[tuple[str, ...]]): A block can be compared to pairs ``(block_lines, block_offset)`` where - *block_lines* is a sequence of line-like objects, and - - *block_offset* is an integer (matched against :attr:`offset`). + - *block_offset* is an integer (matched against :attr:`.offset`). - Pairs ``(line, line_offset)`` or ``(block_lines, block_offset)`` are - to be given as any two-elements sequences (tuple, list, deque, ...):: + Pairs ``(line, line_offset)`` or ``(block_lines, block_offset)`` are to + be given as any two-elements sequences (tuple, list, deque, ...), e.g.:: assert Block(['a', 'b', 'c', 'd'], 2) == [ 'a', @@ -385,6 +373,14 @@ def __init__( @property def span(self) -> slice: + """A slice representing this block in its source. + + Example:: + + source = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'] + block = Block(['4', '5', '6'], 3) + assert source[block.span] == ['4', '5', '6'] + """ return slice(self.offset, self.offset + self.length) def count(self, target: BlockLineLike, /) -> int: @@ -408,7 +404,7 @@ def count(self, target: BlockLineLike, /) -> int: return self.buffer.count(target) # explicitly add the method since its signature differs from :meth:`Region.index` - def index(self, target: BlockLineLike, start: int = 0, stop: int = sys.maxsize, /) -> int: + def index(self, target: BlockLineLike, start: int = 0, stop: int | None = None, /) -> int: """Find the lowest index of a matching line. For :class:`~re.Pattern` inputs, the following are equivalent:: @@ -418,7 +414,7 @@ def index(self, target: BlockLineLike, start: int = 0, stop: int = sys.maxsize, """ return super().index(target, start, stop) - def find(self, target: BlockLineLike, start: int = 0, stop: int = sys.maxsize, /) -> int: + def find(self, target: BlockLineLike, start: int = 0, stop: int | None = None, /) -> int: """Find the lowest index of a matching line or *-1* on failure. For :class:`~re.Pattern` inputs, the following are equivalent:: @@ -435,6 +431,8 @@ def find(self, target: BlockLineLike, start: int = 0, stop: int = sys.maxsize, / return next(itertools.compress(itertools.count(start), map(target, sliced)), -1) with contextlib.suppress(ValueError): + if stop is None: + return self.buffer.index(target, start) return self.buffer.index(target, start, stop) return -1 @@ -454,7 +452,7 @@ def at(self, index: int, /) -> Line: ... # NoQA: E704 @overload def at(self, index: slice, /) -> Self: ... # NoQA: E704 def at(self, index: int | slice, /) -> Line | Block: # NoQA: E301 - """Get a :class:`Line` or a contiguous sub-:class:`Block`.""" + """Get a :class:`Line` or a contiguous region as a :class:`Block`.""" if isinstance(index, slice): # exception for invalid step is handled by __getitem__ buffer = self[index] diff --git a/sphinx/testing/matcher/cleaner.py b/sphinx/testing/matcher/cleaner.py index 3adef63673c..922065f8a5c 100644 --- a/sphinx/testing/matcher/cleaner.py +++ b/sphinx/testing/matcher/cleaner.py @@ -5,10 +5,10 @@ __all__ = () import itertools -from functools import reduce +from functools import partial, reduce from typing import TYPE_CHECKING -from sphinx.testing.matcher import _engine, _util +from sphinx.testing.matcher import _codes, _engine from sphinx.testing.matcher.options import OptionsHolder from sphinx.util.console import strip_escape_sequences @@ -24,6 +24,16 @@ TraceInfo = list[list[tuple[str, list[str]]]] +# we do not want to expose a non-positional-only public interface +def _strip_lines_aux(chars: StripChars, lines: Iterable[str]) -> Iterable[str]: + return strip_lines(lines, chars) + + +# we do not want to expose a non-positional-only public interface +def _prune_lines_aux(patterns: PrunePattern, lines: Iterable[str]) -> Iterable[str]: + return prune_lines(lines, patterns, trace=None) + + def clean(text: str, /, **options: Unpack[Options]) -> Iterable[str]: """Clean a text, returning an iterable of lines. @@ -32,23 +42,22 @@ def clean(text: str, /, **options: Unpack[Options]) -> Iterable[str]: See :class:`~.options.Options` for the meaning of each supported option. """ - config = OptionsHolder(**options) + args = OptionsHolder(**options) # clean the text as a string - if not config.keep_ansi: + if not args.keep_ansi: text = strip_escape_sequences(text) - text = strip_chars(text, config.strip) - - lines: Iterable[str] = text.splitlines(config.keep_break) - lines = strip_lines(lines, config.stripline) - - keep_empty, compress, unique = config.keep_empty, config.compress, config.unique - lines = filter_lines(lines, keep_empty=keep_empty, compress=compress, unique=unique) - lines = prune_lines(lines, config.delete) - - if callable(ignore_predicate := config.ignore): - lines = itertools.filterfalse(ignore_predicate, lines) - + text = strip_chars(text, args.strip) + # obtain the lines + lines: Iterable[str] = text.splitlines(args.keep_break) + # process the lines according to the operation codes sequence + stripfn = partial(_strip_lines_aux, args.strip_line) + prunefn = partial(_prune_lines_aux, args.prune) + dispatchers = _codes.get_dispatcher_map(args, strip_lines=stripfn, prune_lines=prunefn) + for opcode in _codes.get_active_opcodes(args): + if (fn := dispatchers.get(opcode)) is None: + raise ValueError('unknown operation code: %r' % opcode) + lines = fn(lines) return lines @@ -65,58 +74,13 @@ def strip_chars(text: str, chars: StripChars = True, /) -> str: def strip_lines(lines: Iterable[str], chars: StripChars = True, /) -> Iterable[str]: """Same as :func:`strip_chars` but applied to each line in *lines*. - See :attr:`~.options.Options.stripline` for the meaning of *chars*. + See :attr:`~.options.Options.strip_line` for the meaning of *chars*. """ if isinstance(chars, bool): return map(str.strip, lines) if chars else lines return (line.strip(chars) for line in lines) -def filter_lines( - lines: Iterable[str], - /, - *, - keep_empty: bool = True, - compress: bool = False, - unique: bool = False, -) -> Iterable[str]: - """Filter the lines. - - :param lines: The lines to filter. - :param keep_empty: If true, keep empty lines in the output. - :param compress: If true, remove consecutive duplicated lines. - :param unique: If true, remove duplicated lines. - :return: An iterable of filtered lines. - - Since removing empty lines first allows serial duplicates to be eliminated - in the same iteration, duplicates elimination is performed *after* empty - lines are removed. Consider comparing:: - - >>> lines = ['a', '', 'a', '', 'a'] - >>> list(filter_lines(lines, keep_empty=False, compress=True)) - ['a'] - - together with:: - - >>> lines = ['a', '', 'a', '', 'a'] - >>> filtered = filter_lines(lines, compress=True) - >>> filtered = filter_lines(filtered, keep_empty=False) - >>> list(filtered) - ['a', 'a', 'a'] - """ - if not keep_empty: - lines = filter(None, lines) - - if unique: - # 'compress' has no effect when 'unique' is set - return _util.unique_everseen(lines) - - if compress: - return _util.unique_justseen(lines) - - return lines - - def prune_lines( lines: Iterable[str], patterns: PrunePattern, /, *, trace: TraceInfo | None = None ) -> Iterable[str]: diff --git a/sphinx/testing/matcher/options.py b/sphinx/testing/matcher/options.py index a4b357dc3fd..4b573fcefbe 100644 --- a/sphinx/testing/matcher/options.py +++ b/sphinx/testing/matcher/options.py @@ -5,39 +5,47 @@ __all__ = ('Options', 'CompleteOptions', 'OptionsHolder') import contextlib +from collections.abc import Sequence from types import MappingProxyType -from typing import TYPE_CHECKING, TypedDict, final, overload +from typing import TYPE_CHECKING, Literal, TypedDict, TypeVar, Union, final, overload + +from sphinx.testing.matcher._util import LinePredicate, PatternLike if TYPE_CHECKING: - from collections.abc import Generator, Mapping, Sequence - from typing import Any, ClassVar, Literal, TypeVar, Union + from collections.abc import Generator, Mapping + from typing import Any, ClassVar from typing_extensions import Unpack - from sphinx.testing.matcher._util import LinePredicate, PatternLike +_FLAG = Literal['keep_ansi', 'keep_break', 'keep_empty', 'compress', 'unique'] - FlagOption = Literal['keep_ansi', 'keep_break', 'keep_empty', 'compress', 'unique'] +_STRIP = Literal['strip', 'strip_line'] +StripChars = Union[bool, str, None] +"""Allowed values for :attr:`Options.strip` and :attr:`Options.strip_line`.""" - StripOption = Literal['strip', 'stripline'] - StripChars = Union[bool, str, None] - """Allowed values for :attr:`Options.strip` and :attr:`Options.stripline`.""" +_PRUNE = Literal['prune'] +PrunePattern = Union[PatternLike, Sequence[PatternLike]] +"""One or more (non-empty) patterns to prune.""" - PruneOption = Literal['delete'] - PrunePattern = Union[PatternLike, Sequence[PatternLike]] - """One or more patterns to prune.""" +_IGNORE = Literal['ignore'] +IgnorePredicate = Union[LinePredicate, None] - IgnoreOption = Literal['ignore'] +_OPCODES = Literal['ops'] +# must be kept in sync with :mod:`sphinx.testing.matcher._codes` +OpCode = Literal['strip', 'check', 'compress', 'unique', 'prune', 'filter'] +"""Known operation codes (see :attr:`Options.ops`).""" +OpCodes = Sequence[OpCode] - FlavorOption = Literal['flavor'] - Flavor = Literal['re', 'fnmatch', 'none'] - """Allowed values for :attr:`Options.flavor`.""" +_FLAVOR = Literal['flavor'] +Flavor = Literal['re', 'fnmatch', 'none'] +"""Allowed values for :attr:`Options.flavor`.""" - # For some reason, mypy does not like Union of Literal, - # so we wrap the Literal types inside a bigger Literal. - OptionValue = Union[bool, StripChars, PrunePattern, Union[LinePredicate, None], Flavor] - OptionName = Literal[FlagOption, StripOption, PruneOption, IgnoreOption, FlavorOption] +# For some reason, mypy does not like Union of Literal, +# so we wrap the Literal types inside a bigger Literal. +OptionValue = Union[bool, StripChars, PrunePattern, IgnorePredicate, OpCodes, Flavor] +OptionName = Literal[_FLAG, _STRIP, _PRUNE, _IGNORE, _OPCODES, _FLAVOR] - DT = TypeVar('DT') +DT = TypeVar('DT') @final @@ -45,10 +53,10 @@ class Options(TypedDict, total=False): """Options for a :class:`~sphinx.testing.matcher.LineMatcher` object. Some options directly act on the original string (e.g., :attr:`strip`), - while others (e.g., :attr:`stripline`) act on the lines obtained after + while others (e.g., :attr:`strip_line`) act on the lines obtained after splitting the (transformed) original string. - .. seealso:: :mod:`sphinx.testing.matcher._cleaner` + .. seealso:: :mod:`sphinx.testing.matcher.cleaner` """ # only immutable fields should be used as options, otherwise undesired @@ -57,7 +65,7 @@ class Options(TypedDict, total=False): keep_ansi: bool """Indicate whether to keep the ANSI escape sequences. - The default value is ``True``. + The default value is :py3:`True`. """ strip: StripChars @@ -65,72 +73,105 @@ class Options(TypedDict, total=False): The allowed values for :attr:`strip` are: - * ``False`` -- keep leading and trailing whitespaces (the default). - * ``True`` -- remove leading and trailing whitespaces. + * :py3:`False` -- keep leading and trailing whitespaces (the default). + * :py3:`True` or :py3:`None` -- remove leading and trailing whitespaces. * a string (*chars*) -- remove leading and trailing characters in *chars*. """ keep_break: bool """Indicate whether to keep line breaks at the end of each line. - The default value is ``False`` (to mirror :meth:`str.splitlines`). + The default value is :py3:`False` (to mirror :meth:`str.splitlines`). """ - stripline: StripChars + strip_line: StripChars """Describe the characters to strip from each source's line. - The allowed values for :attr:`stripline` are: + The allowed values for :attr:`strip_line` are: - * ``False`` -- keep leading and trailing whitespaces (the default). - * ``True`` -- remove leading and trailing whitespaces. + * :py3:`False` -- keep leading and trailing whitespaces (the default). + * :py3:`True` or :py3:`None` -- remove leading and trailing whitespaces. * a string (*chars*) -- remove leading and trailing characters in *chars*. """ keep_empty: bool """Indicate whether to keep empty lines in the output. - The default value is ``True``. + The default value is :py3:`True`. """ compress: bool """Eliminate duplicated consecutive lines in the output. - The default value is ``False``. - - For instance, ``['a', 'b', 'b', 'c'] -> ['a', 'b', 'c']``. - - Note that if :attr:`empty` is ``False``, empty lines are removed *before* - the duplicated lines, i.e., ``['a', 'b', '', 'b'] -> ['a', 'b']``. + The default value is :py3:`False`. """ unique: bool """Eliminate multiple occurrences of lines in the output. - The default value is ``False``. - - This option is only applied at the very end of the transformation chain, - after empty and duplicated consecutive lines might have been eliminated. + The default value is :py3:`False`. """ - delete: PrunePattern + prune: PrunePattern r"""Regular expressions for substrings to prune from the output lines. The output lines are pruned from their matching substrings (checked using :func:`re.match`) until the output lines are stabilized. - This transformation is applied at the end of the transformation - chain, just before filtering the output lines are filtered with - the :attr:`ignore` predicate. - See :func:`sphinx.testing.matcher.cleaner.prune_lines` for an example. """ - ignore: LinePredicate | None + ignore: IgnorePredicate """A predicate for filtering the output lines. Lines that satisfy this predicate are not included in the output. - The default is ``None``, meaning that all lines are included. + The default is :py3:`None`, meaning that all lines are included. + """ + + ops: OpCodes + """A sequence of *opcode* representing the line operations. + + The following table describes the allowed *opcode*. + + .. default-role:: py3repr + + +------------+--------------------+---------------------------------------+ + | Op. Code | Option | Description | + +============+====================+=======================================+ + | `strip` | :attr:`strip_line` | Strip leading and trailing characters | + +------------+--------------------+---------------------------------------+ + | `check` | :attr:`keep_empty` | Remove empty lines | + +------------+--------------------+---------------------------------------+ + | `compress` | :attr:`compress` | Remove consecutive duplicated lines | + +------------+--------------------+---------------------------------------+ + | `unique` | :attr:`unique` | Remove duplicated lines | + +------------+--------------------+---------------------------------------+ + | `prune` | :attr:`prune` | Remove matching substrings | + +------------+--------------------+---------------------------------------+ + | `filter` | :attr:`ignore` | Ignore matching lines | + +------------+--------------------+---------------------------------------+ + + .. default-role:: + + The default value:: + + ('strip', 'check', 'compress', 'unique', 'prune', 'filter') + + .. rubric:: Example + + Let :py3:`lines = ['a', '', 'a', '', 'a']` and:: + + options = Options(strip_line=True, keep_empty=False, compress=True) + + By default, the lines are transformed into :py3:`['a']` since empty lines + are removed before serial duplicates. On the other hand, assume that:: + + options = Options(strip_line=True, keep_empty=False, compress=True, + ops=('strip', 'compress', 'check')) + + Here, the empty lines will be removed *after* the serial duplicates, + and therefore the lines are trasnformed into :py3:`['a', 'a', 'a']`. """ flavor: Flavor @@ -138,12 +179,12 @@ class Options(TypedDict, total=False): The allowed values for :attr:`flavor` are: - * ``'none'`` -- match lines using string equality (the default). - * ``'fnmatch'`` -- match lines using :mod:`fnmatch`-style patterns. - * ``'re'`` -- match lines using :mod:`re`-style patterns. + * :py3:`'none'` -- match lines using string equality (the default). + * :py3:`'fnmatch'` -- match lines using :mod:`fnmatch`-style patterns. + * :py3:`'re'` -- match lines using :mod:`re`-style patterns. This option only affects non-compiled patterns. Unless stated otheriwse, - matching is performed on compiled patterns by :func:`~re.Pattern.match`. + matching is performed on compiled patterns by :meth:`re.Pattern.match`. """ @@ -153,16 +194,17 @@ class CompleteOptions(TypedDict): keep_ansi: bool strip: StripChars - stripline: StripChars + strip_line: StripChars keep_break: bool keep_empty: bool compress: bool unique: bool - delete: PrunePattern - ignore: LinePredicate | None + prune: PrunePattern + ignore: IgnorePredicate + ops: OpCodes flavor: Flavor @@ -189,13 +231,14 @@ class OptionsHolder: default_options: ClassVar[CompleteOptions] = CompleteOptions( keep_ansi=True, strip=False, - stripline=False, + strip_line=False, keep_break=False, keep_empty=True, compress=False, unique=False, - delete=(), + prune=(), ignore=None, + ops=('strip', 'check', 'compress', 'unique', 'prune', 'filter'), flavor='none', ) """The supported options specifications and their default values. @@ -211,7 +254,7 @@ def __init__(self, /, **options: Unpack[Options]) -> None: def options(self) -> Mapping[str, object]: """A read-only view of the *current* mapping of options. - It can be regarded as a proxy on a :class:`Options` dictionary. + It can be regarded as a proxy on an :class:`Options` dictionary. """ return MappingProxyType(self.__options) @@ -246,66 +289,71 @@ def __set_options(self, options: Options) -> Generator[None, None, None]: # # boolean-like options @overload - def get_option(self, name: FlagOption, /) -> bool: ... # NoQA: E704 + def get_option(self, name: _FLAG, /) -> bool: ... # NoQA: E704 @overload - def get_option(self, name: FlagOption, default: bool, /) -> bool: ... # NoQA: E704 + def get_option(self, name: _FLAG, default: bool, /) -> bool: ... # NoQA: E704 @overload - def get_option(self, name: FlagOption, default: DT, /) -> bool | DT: ... # NoQA: E704 + def get_option(self, name: _FLAG, default: DT, /) -> bool | DT: ... # NoQA: E704 # strip-like options @overload - def get_option(self, name: StripOption, /) -> StripChars: ... # NoQA: E704 + def get_option(self, name: _STRIP, /) -> StripChars: ... # NoQA: E704 @overload - def get_option(self, name: StripOption, default: StripChars, /) -> StripChars: ... # NoQA: E704 + def get_option(self, name: _STRIP, default: StripChars, /) -> StripChars: ... # NoQA: E704 @overload - def get_option(self, name: StripOption, default: DT, /) -> StripChars | DT: ... # NoQA: E704 + def get_option(self, name: _STRIP, default: DT, /) -> StripChars | DT: ... # NoQA: E704 # pruning option @overload - def get_option(self, name: PruneOption, /) -> PrunePattern: ... # NoQA: E704 + def get_option(self, name: _PRUNE, /) -> PrunePattern: ... # NoQA: E704 @overload - def get_option(self, name: PruneOption, default: PrunePattern, /) -> PrunePattern: ... # NoQA: E704 + def get_option(self, name: _PRUNE, default: PrunePattern, /) -> PrunePattern: ... # NoQA: E704 @overload - def get_option(self, name: PruneOption, default: DT, /) -> PrunePattern | DT: ... # NoQA: E704 + def get_option(self, name: _PRUNE, default: DT, /) -> PrunePattern | DT: ... # NoQA: E704 # filtering options @overload - def get_option(self, name: IgnoreOption, /) -> LinePredicate | None: ... # NoQA: E704 - @overload # NoQA: E301 - def get_option( # NoQA: E704 - self, name: IgnoreOption, default: LinePredicate | None, / - ) -> LinePredicate | None: ... - @overload # NoQA: E301 - def get_option( # NoQA: E704 - self, name: IgnoreOption, default: DT, / - ) -> LinePredicate | None | DT: ... + def get_option(self, name: _IGNORE, /) -> IgnorePredicate: ... # NoQA: E704 + @overload + def get_option(self, name: _IGNORE, default: IgnorePredicate, /) -> IgnorePredicate: ... # NoQA: E704 + @overload + def get_option(self, name: _IGNORE, default: DT, /) -> IgnorePredicate | DT: ... # NoQA: E704 # miscellaneous options @overload - def get_option(self, name: FlavorOption, /) -> Flavor: ... # NoQA: E704 + def get_option(self, name: _OPCODES, /) -> OpCodes: ... # NoQA: E704 @overload - def get_option(self, name: FlavorOption, default: Flavor, /) -> Flavor: ... # NoQA: E704 + def get_option(self, name: _OPCODES, default: OpCodes, /) -> OpCodes: ... # NoQA: E704 @overload - def get_option(self, name: FlavorOption, default: DT, /) -> Flavor | DT: ... # NoQA: E704 + def get_option(self, name: _OPCODES, default: DT, /) -> OpCodes | DT: ... # NoQA: E704 + @overload + def get_option(self, name: _FLAVOR, /) -> Flavor: ... # NoQA: E704 + @overload + def get_option(self, name: _FLAVOR, default: Flavor, /) -> Flavor: ... # NoQA: E704 + @overload + def get_option(self, name: _FLAVOR, default: DT, /) -> Flavor | DT: ... # NoQA: E704 def get_option(self, name: OptionName, /, *default: object) -> object: # NoQA: E301 """Get an option value, or a default value. :param name: An option name specified in :attr:`default_options`. :return: An option value. - When *default* is specified and *name* is not explicitly set, it is - returned instead of the default specified in :attr:`default_options`. + When *default* is specified and *name* is not explicitly stored by + this object, that *default* is returned instead of the default value + specified in :attr:`default_options`. """ if name in self.__options: return self.__options[name] return default[0] if default else self.default_options[name] @overload - def set_option(self, name: FlagOption, value: bool, /) -> None: ... # NoQA: E704 + def set_option(self, name: _FLAG, value: bool, /) -> None: ... # NoQA: E704 + @overload + def set_option(self, name: _STRIP, value: StripChars, /) -> None: ... # NoQA: E704 @overload - def set_option(self, name: StripOption, value: StripChars, /) -> None: ... # NoQA: E704 + def set_option(self, name: _PRUNE, value: PrunePattern, /) -> None: ... # NoQA: E704 @overload - def set_option(self, name: PruneOption, value: PrunePattern, /) -> None: ... # NoQA: E704 + def set_option(self, name: _IGNORE, value: LinePredicate | None, /) -> None: ... # NoQA: E704 @overload - def set_option(self, name: IgnoreOption, value: LinePredicate | None, /) -> None: ... # NoQA: E704 + def set_option(self, name: _OPCODES, value: OpCodes, /) -> None: ... # NoQA: E704 @overload - def set_option(self, name: FlavorOption, value: Flavor, /) -> None: ... # NoQA: E704 + def set_option(self, name: _FLAVOR, value: Flavor, /) -> None: ... # NoQA: E704 def set_option(self, name: OptionName, value: Any, /) -> None: # NoQA: E301 """Set a persistent option value. @@ -334,13 +382,13 @@ def strip(self, value: StripChars) -> None: self.set_option('strip', value) @property - def stripline(self) -> StripChars: - """See :attr:`Options.stripline`.""" - return self.get_option('stripline') + def strip_line(self) -> StripChars: + """See :attr:`Options.strip_line`.""" + return self.get_option('strip_line') - @stripline.setter - def stripline(self, value: StripChars) -> None: - self.set_option('stripline', value) + @strip_line.setter + def strip_line(self, value: StripChars) -> None: + self.set_option('strip_line', value) @property def keep_break(self) -> bool: @@ -379,13 +427,13 @@ def unique(self, value: bool) -> None: self.set_option('unique', value) @property - def delete(self) -> PrunePattern: - """See :attr:`Options.delete`.""" - return self.get_option('delete') + def prune(self) -> PrunePattern: + """See :attr:`Options.prune`.""" + return self.get_option('prune') - @delete.setter - def delete(self, value: PrunePattern) -> None: - self.set_option('delete', value) + @prune.setter + def prune(self, value: PrunePattern) -> None: + self.set_option('prune', value) @property def ignore(self) -> LinePredicate | None: @@ -396,6 +444,15 @@ def ignore(self) -> LinePredicate | None: def ignore(self, value: LinePredicate | None) -> None: self.set_option('ignore', value) + @property + def ops(self) -> Sequence[OpCode]: + """See :attr:`Options.ops`.""" + return self.get_option('ops') + + @ops.setter + def ops(self, value: OpCodes) -> None: + self.set_option('ops', value) + @property def flavor(self) -> Flavor: """See :attr:`Options.flavor`.""" diff --git a/tests/test_testing/test_matcher_cleaner.py b/tests/test_testing/test_matcher_cleaner.py index 1115717d791..c484b46eaf3 100644 --- a/tests/test_testing/test_matcher_cleaner.py +++ b/tests/test_testing/test_matcher_cleaner.py @@ -6,6 +6,7 @@ import pytest from sphinx.testing.matcher import cleaner +from sphinx.testing.matcher.options import Options if TYPE_CHECKING: from collections.abc import Sequence @@ -29,19 +30,19 @@ def test_strip_lines(): def test_filter_lines(): - src = ['a', 'a', '', 'a', 'b', 'c', 'a'] + src = '\n'.join(['a', 'a', '', 'a', 'b', 'c', 'a']) expect = ['a', 'b', 'c', 'a'] - assert list(cleaner.filter_lines(src, keep_empty=False, compress=True)) == expect + assert list(cleaner.clean(src, keep_empty=False, compress=True)) == expect expect = ['a', 'b', 'c'] - assert list(cleaner.filter_lines(src, keep_empty=False, unique=True)) == expect + assert list(cleaner.clean(src, keep_empty=False, unique=True)) == expect expect = ['a', '', 'a', 'b', 'c', 'a'] - assert list(cleaner.filter_lines(src, keep_empty=True, compress=True)) == expect + assert list(cleaner.clean(src, keep_empty=True, compress=True)) == expect expect = ['a', '', 'b', 'c'] - assert list(cleaner.filter_lines(src, keep_empty=True, unique=True)) == expect + assert list(cleaner.clean(src, keep_empty=True, unique=True)) == expect @pytest.mark.parametrize( @@ -113,3 +114,15 @@ def test_prune_lines( actual = cleaner.prune_lines(lines, patterns, trace=actual_trace) assert list(actual) == list(expect) assert actual_trace == list(trace) + + +def test_opcodes(): + options = Options(strip_line=True, keep_empty=False, compress=True) + + src = '\n'.join(['a', '', 'a', '', 'a']) + # empty lines removed before duplicates + assert list(cleaner.clean(src, **options)) == ['a'] + + # empty lines removed after duplicates + ops = ('strip', 'compress', 'check') + assert list(cleaner.clean(src, **options, ops=ops)) == ['a', 'a', 'a'] diff --git a/tests/test_testing/test_matcher_options.py b/tests/test_testing/test_matcher_options.py index 5bbfb48d55e..79cf0819e4f 100644 --- a/tests/test_testing/test_matcher_options.py +++ b/tests/test_testing/test_matcher_options.py @@ -39,16 +39,17 @@ def check(option: OptionName, default: object) -> None: check('keep_ansi', True) check('strip', False) - check('stripline', False) + check('strip_line', False) check('keep_break', False) check('keep_empty', True) check('compress', False) check('unique', False) - check('delete', ()) + check('prune', ()) check('ignore', None) + check('ops', ('strip', 'check', 'compress', 'unique', 'prune', 'filter')) check('flavor', 'none') # check that there are no leftover options @@ -75,22 +76,22 @@ class Config(OptionsHolder): assert obj.get_option('keep_break') is True assert obj.get_option('keep_break', False) is False - obj = Config(delete='abc') - assert obj.get_option('delete') == 'abc' - assert obj.get_option('delete', 'unused') == 'abc' + obj = Config(prune='abc') + assert obj.get_option('prune') == 'abc' + assert obj.get_option('prune', 'unused') == 'abc' def test_set_option(): obj = OptionsHolder() - assert 'delete' not in obj.options - assert obj.delete == () - obj.set_option('delete', 'abc') + assert 'prune' not in obj.options + assert obj.prune == () + obj.set_option('prune', 'abc') - assert 'delete' in obj.options - assert obj.delete == 'abc' - assert obj.get_option('delete') == 'abc' - assert obj.get_option('delete', 'unused') == 'abc' + assert 'prune' in obj.options + assert obj.prune == 'abc' + assert obj.get_option('prune') == 'abc' + assert obj.get_option('prune', 'unused') == 'abc' @pytest.mark.parametrize('option_name', list(Options.__annotations__)) @@ -102,7 +103,9 @@ def test_property_implementation(option_name: OptionName) -> None: assert isinstance(descriptor, property) # make sure that the docstring is correct - assert descriptor.__doc__ == f'See :attr:`Options.{option_name}`.' + docstring = descriptor.__doc__ + assert docstring is not None + assert docstring.startswith(f'See :attr:`Options.{option_name}`.') assert descriptor.fget is not None assert descriptor.fget.__doc__ == descriptor.__doc__ From 5752a51046ea4e72861cd2913dd208e17c911c9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 10 Apr 2024 09:40:35 +0200 Subject: [PATCH 53/66] revert --- doc/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index cc4ee30a4ee..56efa95a174 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -164,7 +164,7 @@ ] intersphinx_mapping = { - 'python': ('https://docs.python.org/3/', ('_static/python.inv', None)), + 'python': ('https://docs.python.org/3/', None), 'requests': ('https://requests.readthedocs.io/en/latest/', None), 'readthedocs': ('https://docs.readthedocs.io/en/stable', None), } From 0b0b5a17da227cb66ed91d72e4dffb850f75a148 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 10 Apr 2024 09:45:52 +0200 Subject: [PATCH 54/66] fixup --- doc/conf.py | 2 +- tests/test_testing/test_matcher_cleaner.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 56efa95a174..f840f76143d 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -288,8 +288,8 @@ def linkify(match): def setup(app: Sphinx) -> None: from sphinx.ext.autodoc import cut_lines - from sphinx.util.docfields import GroupedField from sphinx.roles import code_role + from sphinx.util.docfields import GroupedField app.connect('autodoc-process-docstring', cut_lines(4, what=['module'])) app.connect('source-read', linkify_issues_in_changelog) diff --git a/tests/test_testing/test_matcher_cleaner.py b/tests/test_testing/test_matcher_cleaner.py index c484b46eaf3..4c89caa3720 100644 --- a/tests/test_testing/test_matcher_cleaner.py +++ b/tests/test_testing/test_matcher_cleaner.py @@ -124,5 +124,5 @@ def test_opcodes(): assert list(cleaner.clean(src, **options)) == ['a'] # empty lines removed after duplicates - ops = ('strip', 'compress', 'check') - assert list(cleaner.clean(src, **options, ops=ops)) == ['a', 'a', 'a'] + options_with_opcodes = options | {'ops': ('strip', 'compress', 'check')} + assert list(cleaner.clean(src, **options_with_opcodes)) == ['a', 'a', 'a'] From b85e80ab725108d42a7fb04990171a2a2386331f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 19 Apr 2024 09:55:07 +0200 Subject: [PATCH 55/66] cleanup --- sphinx/testing/matcher/__init__.py | 12 ++++++---- sphinx/testing/matcher/options.py | 28 +++++++++++----------- tests/test_testing/test_matcher_options.py | 6 ++--- 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/sphinx/testing/matcher/__init__.py b/sphinx/testing/matcher/__init__.py index e105ab4344b..3f128c474e6 100644 --- a/sphinx/testing/matcher/__init__.py +++ b/sphinx/testing/matcher/__init__.py @@ -13,7 +13,7 @@ from sphinx.testing.matcher.options import Options, OptionsHolder if TYPE_CHECKING: - from collections.abc import Generator, Iterable, Iterator, Sequence, Set + from collections.abc import Iterable, Iterator, Sequence, Set from io import StringIO from typing import Any, ClassVar, Literal @@ -71,12 +71,14 @@ def __iter__(self) -> Iterator[Line]: return self.lines().lines_iterator() @contextlib.contextmanager - def override(self, /, **options: Unpack[Options]) -> Generator[None, None, None]: + def override(self, /, **options: Unpack[Options]) -> Iterator[None]: """Temporarily extend the set of options with *options*.""" self.__stack.append(None) # prepare the next cache entry - with super().override(**options): - yield - self.__stack.pop() # pop the cached lines + try: + with super().override(**options): + yield + finally: + self.__stack.pop() # pop the cached lines @property def content(self) -> str: diff --git a/sphinx/testing/matcher/options.py b/sphinx/testing/matcher/options.py index 4b573fcefbe..e0d918c337f 100644 --- a/sphinx/testing/matcher/options.py +++ b/sphinx/testing/matcher/options.py @@ -7,16 +7,18 @@ import contextlib from collections.abc import Sequence from types import MappingProxyType -from typing import TYPE_CHECKING, Literal, TypedDict, TypeVar, Union, final, overload +from typing import TYPE_CHECKING, Literal, TypedDict, Union, final, overload from sphinx.testing.matcher._util import LinePredicate, PatternLike if TYPE_CHECKING: - from collections.abc import Generator, Mapping - from typing import Any, ClassVar + from collections.abc import Iterator, Mapping + from typing import Any, ClassVar, TypeVar from typing_extensions import Unpack + DT = TypeVar('DT') + _FLAG = Literal['keep_ansi', 'keep_break', 'keep_empty', 'compress', 'unique'] _STRIP = Literal['strip', 'strip_line'] @@ -40,12 +42,10 @@ Flavor = Literal['re', 'fnmatch', 'none'] """Allowed values for :attr:`Options.flavor`.""" -# For some reason, mypy does not like Union of Literal, -# so we wrap the Literal types inside a bigger Literal. -OptionValue = Union[bool, StripChars, PrunePattern, IgnorePredicate, OpCodes, Flavor] -OptionName = Literal[_FLAG, _STRIP, _PRUNE, _IGNORE, _OPCODES, _FLAVOR] - -DT = TypeVar('DT') +# For some reason, mypy does not like Union of Literal when used as keys +# of a TypedDict (see: https://github.com/python/mypy/issues/16818), so +# we instead use a Literal of those (which is equivalent). +_OPTION = Literal[_FLAG, _STRIP, _PRUNE, _IGNORE, _OPCODES, _FLAVOR] @final @@ -267,16 +267,16 @@ def complete_options(self) -> Mapping[str, object]: return MappingProxyType(self.default_options | self.__options) @contextlib.contextmanager - def set_options(self, /, **options: Unpack[Options]) -> Generator[None, None, None]: + def set_options(self, /, **options: Unpack[Options]) -> Iterator[None]: """Temporarily replace the set of options with *options*.""" return self.__set_options(options) @contextlib.contextmanager - def override(self, /, **options: Unpack[Options]) -> Generator[None, None, None]: + def override(self, /, **options: Unpack[Options]) -> Iterator[None]: """Temporarily extend the set of options with *options*.""" return self.__set_options(self.__options | options) - def __set_options(self, options: Options) -> Generator[None, None, None]: + def __set_options(self, options: Options) -> Iterator[None]: saved_options = self.__options.copy() self.__options = options try: @@ -328,7 +328,7 @@ def get_option(self, name: _FLAVOR, /) -> Flavor: ... # NoQA: E704 def get_option(self, name: _FLAVOR, default: Flavor, /) -> Flavor: ... # NoQA: E704 @overload def get_option(self, name: _FLAVOR, default: DT, /) -> Flavor | DT: ... # NoQA: E704 - def get_option(self, name: OptionName, /, *default: object) -> object: # NoQA: E301 + def get_option(self, name: _OPTION, /, *default: object) -> object: # NoQA: E301 """Get an option value, or a default value. :param name: An option name specified in :attr:`default_options`. @@ -354,7 +354,7 @@ def set_option(self, name: _IGNORE, value: LinePredicate | None, /) -> None: ... def set_option(self, name: _OPCODES, value: OpCodes, /) -> None: ... # NoQA: E704 @overload def set_option(self, name: _FLAVOR, value: Flavor, /) -> None: ... # NoQA: E704 - def set_option(self, name: OptionName, value: Any, /) -> None: # NoQA: E301 + def set_option(self, name: _OPTION, value: Any, /) -> None: # NoQA: E301 """Set a persistent option value. The *name* should be an option for which a default value is specified diff --git a/tests/test_testing/test_matcher_options.py b/tests/test_testing/test_matcher_options.py index 79cf0819e4f..30c8bd67a76 100644 --- a/tests/test_testing/test_matcher_options.py +++ b/tests/test_testing/test_matcher_options.py @@ -10,7 +10,7 @@ if TYPE_CHECKING: from typing import ClassVar - from sphinx.testing.matcher.options import OptionName + from sphinx.testing.matcher.options import _OPTION def test_options_class(): @@ -30,7 +30,7 @@ def test_default_options(): processed = set() - def check(option: OptionName, default: object) -> None: + def check(option: _OPTION, default: object) -> None: assert option not in processed assert option in default_options assert default_options[option] == default @@ -95,7 +95,7 @@ def test_set_option(): @pytest.mark.parametrize('option_name', list(Options.__annotations__)) -def test_property_implementation(option_name: OptionName) -> None: +def test_property_implementation(option_name: _OPTION) -> None: """Test that the implementation is correct and do not have typos.""" obj = OptionsHolder() From 488343f7336c0616b9f46752cf68b12d1345ff0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 28 Apr 2024 01:05:58 +0200 Subject: [PATCH 56/66] Update test_matcher_buffer.py --- tests/test_testing/test_matcher_buffer.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_testing/test_matcher_buffer.py b/tests/test_testing/test_matcher_buffer.py index cf5908f3381..64a2b0c1ffd 100644 --- a/tests/test_testing/test_matcher_buffer.py +++ b/tests/test_testing/test_matcher_buffer.py @@ -76,7 +76,7 @@ def test_line_count_substrings(): assert line.count('no') == 0 assert line.count('a') == 2 - line = Line(''.join(('a', 'b', ' ', 'b', 'b', ' ', 'a', 'c', ' ', 'c', 'c'))) + line = Line('ab bb ac cc') assert line.count(re.compile(r'^\Z')) == 0 assert line.count(re.compile(r'a[bc]')) == 2 @@ -96,7 +96,7 @@ def test_line_count_substrings(): ( # -11 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 # 0 1 2 3 4 5 6 7 8 9 10 - Line(''.join(('a', 'b', ' ', 'b', 'b', ' ', 'x', 'c', ' ', 'c', 'c'))), + Line(''.join(('a', 'b', ' ', 'b', 'b', ' ', 'x', 'c', ' ', 'c', 'c'))), # NoQA: FLY002 [ (re.compile(r'a\w'), (), 0), (re.compile(r'\bx'), (2,), 6), @@ -157,13 +157,13 @@ def test_empty_line_operators(): assert Line() < Line('a', 0) # do not simplify these expressions - assert not operator.__lt__(Line(), '') - assert not operator.__lt__(Line(), ['', 0]) - assert not operator.__lt__(Line(), Line()) + assert not operator.__lt__(Line(), '') # NoQA: PLC2801 + assert not operator.__lt__(Line(), ['', 0]) # NoQA: PLC2801 + assert not operator.__lt__(Line(), Line()) # NoQA: PLC2801 - assert not operator.__gt__(Line(), '') - assert not operator.__gt__(Line(), ['', 0]) - assert not operator.__gt__(Line(), Line()) + assert not operator.__gt__(Line(), '') # NoQA: PLC2801 + assert not operator.__gt__(Line(), ['', 0]) # NoQA: PLC2801 + assert not operator.__gt__(Line(), Line()) # NoQA: PLC2801 def test_non_empty_line_operators(): @@ -267,8 +267,8 @@ def test_empty_block_operators(): assert Block() < [[Line('a', 0)], 0] # do not simplify these expressions - assert not operator.__lt__(Block(), []) - assert not operator.__lt__(Block(), [[], 0]) + assert not operator.__lt__(Block(), []) # NoQA: PLC2801 + assert not operator.__lt__(Block(), [[], 0]) # NoQA: PLC2801 assert not operator.__gt__(Block(), []) assert not operator.__gt__(Block(), ['a']) From b3967c4fe4623948c04ade8fc8aed9c2f6403f34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 28 Apr 2024 01:08:31 +0200 Subject: [PATCH 57/66] Update test_matcher_cleaner.py --- tests/test_testing/test_matcher_cleaner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_testing/test_matcher_cleaner.py b/tests/test_testing/test_matcher_cleaner.py index 4c89caa3720..06750274ba3 100644 --- a/tests/test_testing/test_matcher_cleaner.py +++ b/tests/test_testing/test_matcher_cleaner.py @@ -30,7 +30,7 @@ def test_strip_lines(): def test_filter_lines(): - src = '\n'.join(['a', 'a', '', 'a', 'b', 'c', 'a']) + src = '\n'.join(['a', 'a', '', 'a', 'b', 'c', 'a']) # NoQA: FLY002 expect = ['a', 'b', 'c', 'a'] assert list(cleaner.clean(src, keep_empty=False, compress=True)) == expect @@ -119,7 +119,7 @@ def test_prune_lines( def test_opcodes(): options = Options(strip_line=True, keep_empty=False, compress=True) - src = '\n'.join(['a', '', 'a', '', 'a']) + src = '\n'.join(['a', '', 'a', '', 'a']) # NoQA: FLY002 # empty lines removed before duplicates assert list(cleaner.clean(src, **options)) == ['a'] From dbeba1610cdc895926ce2b49eefa19fbfc5d7533 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 28 Apr 2024 01:09:40 +0200 Subject: [PATCH 58/66] Update test_matcher_buffer.py --- tests/test_testing/test_matcher_buffer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_testing/test_matcher_buffer.py b/tests/test_testing/test_matcher_buffer.py index 64a2b0c1ffd..fab03b01eda 100644 --- a/tests/test_testing/test_matcher_buffer.py +++ b/tests/test_testing/test_matcher_buffer.py @@ -270,11 +270,11 @@ def test_empty_block_operators(): assert not operator.__lt__(Block(), []) # NoQA: PLC2801 assert not operator.__lt__(Block(), [[], 0]) # NoQA: PLC2801 - assert not operator.__gt__(Block(), []) - assert not operator.__gt__(Block(), ['a']) - assert not operator.__gt__(Block(), [['a'], 0]) - assert not operator.__gt__(Block(), [[('a', 0)], 0]) - assert not operator.__gt__(Block(), [[Line('a', 0)], 0]) + assert not operator.__gt__(Block(), []) # NoQA: PLC2801 + assert not operator.__gt__(Block(), ['a']) # NoQA: PLC2801 + assert not operator.__gt__(Block(), [['a'], 0]) # NoQA: PLC2801 + assert not operator.__gt__(Block(), [[('a', 0)], 0]) # NoQA: PLC2801 + assert not operator.__gt__(Block(), [[Line('a', 0)], 0]) # NoQA: PLC2801 @pytest.mark.parametrize( From 963c4ceeea71bc8068e1903cd06e468e0dc2de1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 28 Apr 2024 01:35:26 +0200 Subject: [PATCH 59/66] Update __init__.py --- sphinx/testing/matcher/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sphinx/testing/matcher/__init__.py b/sphinx/testing/matcher/__init__.py index 3f128c474e6..4ef33cade36 100644 --- a/sphinx/testing/matcher/__init__.py +++ b/sphinx/testing/matcher/__init__.py @@ -152,9 +152,11 @@ def iterfind( if not patterns: # nothinig to match return - compiled_patterns = set(self.__compile(patterns, flavor=flavor)) + compiled_patterns = self.__compile(patterns, flavor=flavor) + # remove duplicated patterns but retain order + unique_compiled_patterns = _util.unique_everseen(compiled_patterns) # faster to iterate over a tuple rather than a set or a list - matchers = tuple(pattern.match for pattern in compiled_patterns) + matchers = tuple(pattern.match for pattern in unique_compiled_patterns) def predicate(line: Line) -> bool: text = line.buffer From 810ca5ad39444cb3cc0f505f9857bf4e19e67251 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 28 Apr 2024 14:19:44 +0200 Subject: [PATCH 60/66] improve python-code roles --- doc/conf.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index f840f76143d..9fb3f6033e0 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -242,7 +242,6 @@ ('std:confval', 'globaltoc_maxdepth'), } - # -- Extension interface ------------------------------------------------------- from sphinx import addnodes # NoQA: E402 @@ -309,7 +308,9 @@ def pycode_role(name, rawtext, text, lineno, inliner, options=None, content=()): return code_role(name, rawtext, text, lineno, inliner, options, content) def pyrepr_role(name, rawtext, text, lineno, inliner, options=None, content=()): - return pycode_role(name, rawtext, repr(text), lineno, inliner, options, content) + # restore backslashes instead of null bytes + text = repr(text).replace(r'\x00', '\\') + return pycode_role(name, rawtext, text, lineno, inliner, options, content) app.add_role('py3', pycode_role) - app.add_role('py3repr', pyrepr_role) + app.add_role('py3r', pyrepr_role) From 843e0c7fa1ddc0ecb2c4ee22ffa2073e75829242 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 28 Apr 2024 14:20:32 +0200 Subject: [PATCH 61/66] simplify op-codes logic --- sphinx/testing/matcher/_cleaner.py | 86 ++++++++++++++++++++++++++++++ sphinx/testing/matcher/cleaner.py | 40 +++++--------- 2 files changed, 100 insertions(+), 26 deletions(-) create mode 100644 sphinx/testing/matcher/_cleaner.py diff --git a/sphinx/testing/matcher/_cleaner.py b/sphinx/testing/matcher/_cleaner.py new file mode 100644 index 00000000000..1f6d58df279 --- /dev/null +++ b/sphinx/testing/matcher/_cleaner.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +from functools import partial +from itertools import filterfalse +from typing import TYPE_CHECKING, TypedDict, final + +from sphinx.testing.matcher._util import unique_everseen, unique_justseen + +if TYPE_CHECKING: + from collections.abc import Callable, Iterable + + from typing_extensions import TypeAlias + + from sphinx.testing.matcher.options import OpCode, OptionsHolder, PrunePattern, StripChars + + DispatcherFunc: TypeAlias = Callable[[Iterable[str]], Iterable[str]] + + +@final +class HandlerMap(TypedDict): + # Whenever a new operation code is supported, do not forget to + # update :func:`get_dispatcher_map` and :func.`get_active_opcodes`. + strip: DispatcherFunc + check: DispatcherFunc + compress: DispatcherFunc + unique: DispatcherFunc + prune: DispatcherFunc + filter: DispatcherFunc + + +def get_active_opcodes(options: OptionsHolder) -> Iterable[OpCode]: + """Get the iterable of operation's codes to execute.""" + disable: set[OpCode] = set() + + if options.strip_line is False: + disable.add('strip') + + if options.keep_empty: + disable.add('check') + + if not options.compress: + disable.add('compress') + + if not options.unique: + disable.add('unique') + + if not isinstance(prune_patterns := options.prune, str) and not prune_patterns: + disable.add('prune') + + if not callable(options.ignore): + disable.add('filter') + + return filterfalse(disable.__contains__, options.ops) + + +def make_handlers(args: OptionsHolder) -> HandlerMap: + return { + 'strip': partial(_strip_lines_aux, args.strip_line), + 'check': partial(filter, None), + 'compress': unique_justseen, + 'unique': unique_everseen, + 'prune': partial(_prune_lines_aux, args.prune), + 'filter': partial(filterfalse, args.ignore), + } + + +# we do not want to expose a non-positional-only public interface +# and we want to be able to have a pickable right partialization +# in case future multi-processing is added +def _strip_lines_aux(chars: StripChars, lines: Iterable[str]) -> Iterable[str]: + # local import to break circular imports (but the module should already + # be loaded since `get_handlers` is expected to be called from there) + from .cleaner import strip_lines + + return strip_lines(lines, chars) + + +# we do not want to expose a non-positional-only public interface +# and we want to be able to have a pickable right partialization +# in case future multi-processing is added +def _prune_lines_aux(patterns: PrunePattern, lines: Iterable[str]) -> Iterable[str]: + # local import to break circular imports (but the module should already + # be loaded since `get_handlers` is expected to be called from there) + from .cleaner import prune_lines + + return prune_lines(lines, patterns, trace=None) diff --git a/sphinx/testing/matcher/cleaner.py b/sphinx/testing/matcher/cleaner.py index 922065f8a5c..f15b0b4bc01 100644 --- a/sphinx/testing/matcher/cleaner.py +++ b/sphinx/testing/matcher/cleaner.py @@ -4,34 +4,24 @@ __all__ = () -import itertools -from functools import partial, reduce +from functools import reduce +from itertools import accumulate, islice from typing import TYPE_CHECKING -from sphinx.testing.matcher import _codes, _engine +from sphinx.testing.matcher import _cleaner, _engine from sphinx.testing.matcher.options import OptionsHolder from sphinx.util.console import strip_escape_sequences if TYPE_CHECKING: - import re from collections.abc import Iterable + from re import Pattern - from typing_extensions import Unpack + from typing_extensions import TypeAlias, Unpack from sphinx.testing.matcher._util import Patterns from sphinx.testing.matcher.options import Options, PrunePattern, StripChars - TraceInfo = list[list[tuple[str, list[str]]]] - - -# we do not want to expose a non-positional-only public interface -def _strip_lines_aux(chars: StripChars, lines: Iterable[str]) -> Iterable[str]: - return strip_lines(lines, chars) - - -# we do not want to expose a non-positional-only public interface -def _prune_lines_aux(patterns: PrunePattern, lines: Iterable[str]) -> Iterable[str]: - return prune_lines(lines, patterns, trace=None) + TraceInfo: TypeAlias = list[list[tuple[str, list[str]]]] def clean(text: str, /, **options: Unpack[Options]) -> Iterable[str]: @@ -50,14 +40,12 @@ def clean(text: str, /, **options: Unpack[Options]) -> Iterable[str]: text = strip_chars(text, args.strip) # obtain the lines lines: Iterable[str] = text.splitlines(args.keep_break) - # process the lines according to the operation codes sequence - stripfn = partial(_strip_lines_aux, args.strip_line) - prunefn = partial(_prune_lines_aux, args.prune) - dispatchers = _codes.get_dispatcher_map(args, strip_lines=stripfn, prune_lines=prunefn) - for opcode in _codes.get_active_opcodes(args): - if (fn := dispatchers.get(opcode)) is None: + # process the lines according to the operation codes sequence$ + handlers = _cleaner.make_handlers(args) + for opcode in _cleaner.get_active_opcodes(args): + if (handler := handlers.get(opcode)) is None: raise ValueError('unknown operation code: %r' % opcode) - lines = fn(lines) + lines = handler(lines) return lines @@ -114,7 +102,7 @@ def prune_lines( return _prune_debug(lines, compiled, trace) -def _prune_pattern(line: str, pattern: re.Pattern[str]) -> str: +def _prune_pattern(line: str, pattern: Pattern[str]) -> str: return pattern.sub('', line) @@ -133,8 +121,8 @@ def prune(line: str) -> str: def _prune_debug(lines: Iterable[str], compiled: Patterns, trace: TraceInfo) -> Iterable[str]: def apply(line: str) -> tuple[str, list[str]]: - values = itertools.accumulate(compiled, _prune_pattern, initial=line) - states = list(itertools.islice(values, 1, None)) # skip initial value + values = accumulate(compiled, _prune_pattern, initial=line) + states = list(islice(values, 1, None)) # skip initial value return states[-1], states def prune(line: str) -> str: From 35673f0070d4c8bbb62df9ab6832ab0b3c97c192 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 28 Apr 2024 14:22:17 +0200 Subject: [PATCH 62/66] use type aliases --- sphinx/testing/matcher/_util.py | 12 ++++++------ sphinx/testing/matcher/buffer.py | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sphinx/testing/matcher/_util.py b/sphinx/testing/matcher/_util.py index 1f6d41e99a4..5e39698c04e 100644 --- a/sphinx/testing/matcher/_util.py +++ b/sphinx/testing/matcher/_util.py @@ -19,26 +19,26 @@ from collections.abc import Iterable, Iterator, Mapping from typing import Any, TypeVar - from typing_extensions import Never + from typing_extensions import Never, TypeAlias from sphinx.testing.matcher.buffer import Region _T = TypeVar('_T') -PatternLike = Union[str, re.Pattern[str]] +PatternLike: TypeAlias = Union[str, re.Pattern[str]] """A regular expression (compiled or not).""" -LinePattern = Union[str, re.Pattern[str]] +LinePattern: TypeAlias = Union[str, re.Pattern[str]] """A regular expression (compiled or not) for an entire line.""" -LinePredicate = Callable[[str], object] +LinePredicate: TypeAlias = Callable[[str], object] """A predicate called on an entire line.""" -BlockPattern = Sequence[LinePattern] +BlockPattern: TypeAlias = Sequence[LinePattern] """A sequence of regular expressions (compiled or not) for a block. For instance, ``['a', re.compile('b*')]`` matches blocks with the line ``'a'`` followed by a line matching ``'b*'``. """ -Patterns = tuple[re.Pattern[str], ...] +Patterns: TypeAlias = tuple[re.Pattern[str], ...] """Sequence of compiled patterns to use.""" diff --git a/sphinx/testing/matcher/buffer.py b/sphinx/testing/matcher/buffer.py index c4da57472c7..9e15b2892c0 100644 --- a/sphinx/testing/matcher/buffer.py +++ b/sphinx/testing/matcher/buffer.py @@ -17,14 +17,14 @@ from collections.abc import Iterable, Iterator from typing import Any, Union - from typing_extensions import Self + from typing_extensions import Self, TypeAlias from sphinx.testing.matcher._util import LinePattern, LinePredicate, PatternLike - SubStringLike = PatternLike + SubStringLike: TypeAlias = PatternLike """A line's substring or a compiled substring pattern.""" - BlockLineLike = Union[object, LinePattern, LinePredicate] + BlockLineLike: TypeAlias = Union[object, LinePattern, LinePredicate] """A block's line, a compiled pattern or a predicate.""" # We would like to have a covariant buffer type but Python does not From 6c50e61a277b5c8fb911fb57d821b549733e09eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 28 Apr 2024 14:22:46 +0200 Subject: [PATCH 63/66] changed 'none' flavor to 'literal' --- sphinx/testing/matcher/_engine.py | 38 ++++++++++++++++----------- sphinx/testing/matcher/options.py | 43 +++++++++++++++++-------------- 2 files changed, 46 insertions(+), 35 deletions(-) diff --git a/sphinx/testing/matcher/_engine.py b/sphinx/testing/matcher/_engine.py index 915c2a5d837..7f4ddc96402 100644 --- a/sphinx/testing/matcher/_engine.py +++ b/sphinx/testing/matcher/_engine.py @@ -20,7 +20,7 @@ def _check_flavor(flavor: Flavor) -> None: - allowed: Sequence[Flavor] = ('none', 'fnmatch', 're') + allowed: Sequence[Flavor] = ('literal', 'fnmatch', 're') if flavor not in allowed: msg = f'unknown flavor: {flavor!r} (choose from: {allowed})' raise ValueError(msg) @@ -59,6 +59,14 @@ def to_line_patterns( # NoqA: E302 be the same for the same inputs. Otherwise, the order of *expect* is retained, in case this could make a difference. """ + # This function is usually called *twice* in ``assert_*``-like routines + # and thus, we expect the inputs to mainly be strings or tuples. + # + # Nevertheless, tuples could appear more frequently than strings since + # the inputs could arise from variadic functions and thus we check for + # tuples first. + if isinstance(patterns, tuple): + return patterns if isinstance(patterns, (str, re.Pattern)): return (patterns,) if isinstance(patterns, Set): @@ -83,6 +91,9 @@ def to_block_pattern(patterns: PatternLike | BlockPattern, /) -> tuple[LinePatte to_block_pattern('line1\nline2') == ('line1', 'line2') """ + # See `to_line_patterns` for the `if` statements evaluation order. + if isinstance(patterns, tuple): + return patterns if isinstance(patterns, str): return tuple(patterns.splitlines()) if isinstance(patterns, re.Pattern): @@ -105,11 +116,6 @@ def string_expression(line: str, /) -> str: return rf'\A{re.escape(line)}\Z' -def fnmatch_prefix(prefix: str) -> str: - """A regular expression matching a :mod:`fnmatch`-style prefix.""" - return fnmatch.translate(prefix).rstrip(r'\Z$') - - def translate( patterns: Iterable[PatternLike], *, @@ -125,14 +131,14 @@ def translate( :param patterns: An iterable of regular expressions to translate. :param flavor: The translation flavor for non-compiled patterns. - :param escape: Translation function for ``'none'`` flavor. - :param regular_translate: Translation function for ``'re'`` flavor. - :param fnmatch_translate: Translation function for ``'fnmatch'`` flavor. + :param escape: Translation function for :py3r:`literal` flavor. + :param regular_translate: Translation function for :py3r:`re` flavor. + :param fnmatch_translate: Translation function for :py3r:`fnmatch` flavor. :return: An iterable of :class:`re`-style pattern-like objects. """ _check_flavor(flavor) - if flavor == 'none' and callable(translator := escape): + if flavor == 'literal' and callable(translator := escape): return (format_expression(translator, expr) for expr in patterns) if flavor == 're' and callable(translator := regular_translate): @@ -156,9 +162,9 @@ def compile( :param patterns: An iterable of patterns to translate and compile. :param flavor: The translation flavor for non-compiled patterns. - :param escape: Translation function for ``'none'`` flavor. - :param regular_translate: Translation function for ``'re'`` flavor. - :param fnmatch_translate: Translation function for ``'fnmatch'`` flavor. + :param escape: Translation function for :py3r:`literal` flavor. + :param regular_translate: Translation function for :py3r:`re` flavor. + :param fnmatch_translate: Translation function for :py3r:`fnmatch` flavor. :return: A sequence of compiled regular expressions. """ patterns = translate( @@ -168,6 +174,8 @@ def compile( regular_translate=regular_translate, fnmatch_translate=fnmatch_translate, ) - # mypy does not like map + re.compile() although it is correct but - # this is likely due to https://github.com/python/mypy/issues/11880 + + # mypy does not like map + re.compile() although it is correct + # + # xref: https://github.com/python/mypy/issues/11880 return tuple(re.compile(pattern) for pattern in patterns) diff --git a/sphinx/testing/matcher/options.py b/sphinx/testing/matcher/options.py index e0d918c337f..ee666d1194a 100644 --- a/sphinx/testing/matcher/options.py +++ b/sphinx/testing/matcher/options.py @@ -15,37 +15,39 @@ from collections.abc import Iterator, Mapping from typing import Any, ClassVar, TypeVar - from typing_extensions import Unpack + from typing_extensions import TypeAlias, Unpack DT = TypeVar('DT') -_FLAG = Literal['keep_ansi', 'keep_break', 'keep_empty', 'compress', 'unique'] +_FLAG: TypeAlias = Literal['keep_ansi', 'keep_break', 'keep_empty', 'compress', 'unique'] -_STRIP = Literal['strip', 'strip_line'] -StripChars = Union[bool, str, None] +_STRIP: TypeAlias = Literal['strip', 'strip_line'] +StripChars: TypeAlias = Union[bool, str, None] """Allowed values for :attr:`Options.strip` and :attr:`Options.strip_line`.""" -_PRUNE = Literal['prune'] -PrunePattern = Union[PatternLike, Sequence[PatternLike]] +_PRUNE: TypeAlias = Literal['prune'] +PrunePattern: TypeAlias = Union[PatternLike, Sequence[PatternLike]] """One or more (non-empty) patterns to prune.""" -_IGNORE = Literal['ignore'] -IgnorePredicate = Union[LinePredicate, None] +_IGNORE: TypeAlias = Literal['ignore'] +IgnorePredicate: TypeAlias = Union[LinePredicate, None] _OPCODES = Literal['ops'] # must be kept in sync with :mod:`sphinx.testing.matcher._codes` -OpCode = Literal['strip', 'check', 'compress', 'unique', 'prune', 'filter'] +# and must be present at runtime for testing the synchronization +OpCode: TypeAlias = Literal['strip', 'check', 'compress', 'unique', 'prune', 'filter'] """Known operation codes (see :attr:`Options.ops`).""" -OpCodes = Sequence[OpCode] +OpCodes: TypeAlias = Sequence[OpCode] -_FLAVOR = Literal['flavor'] -Flavor = Literal['re', 'fnmatch', 'none'] +_FLAVOR: TypeAlias = Literal['flavor'] +# When Python 3.11 becomes the minimal version, change this for a string enumeration. +Flavor: TypeAlias = Literal['literal', 'fnmatch', 're'] """Allowed values for :attr:`Options.flavor`.""" # For some reason, mypy does not like Union of Literal when used as keys # of a TypedDict (see: https://github.com/python/mypy/issues/16818), so # we instead use a Literal of those (which is equivalent). -_OPTION = Literal[_FLAG, _STRIP, _PRUNE, _IGNORE, _OPCODES, _FLAVOR] +_OPTION: TypeAlias = Literal[_FLAG, _STRIP, _PRUNE, _IGNORE, _OPCODES, _FLAVOR] @final @@ -134,7 +136,7 @@ class Options(TypedDict, total=False): The following table describes the allowed *opcode*. - .. default-role:: py3repr + .. default-role:: py3r +------------+--------------------+---------------------------------------+ | Op. Code | Option | Description | @@ -160,8 +162,9 @@ class Options(TypedDict, total=False): .. rubric:: Example - Let :py3:`lines = ['a', '', 'a', '', 'a']` and:: + Consider the following setup:: + lines = ['a', '', 'a', '', 'a'] options = Options(strip_line=True, keep_empty=False, compress=True) By default, the lines are transformed into :py3:`['a']` since empty lines @@ -179,11 +182,11 @@ class Options(TypedDict, total=False): The allowed values for :attr:`flavor` are: - * :py3:`'none'` -- match lines using string equality (the default). - * :py3:`'fnmatch'` -- match lines using :mod:`fnmatch`-style patterns. - * :py3:`'re'` -- match lines using :mod:`re`-style patterns. + * :py3r:`literal` -- match lines using string equality (the default). + * :py3r:`fnmatch` -- match lines using :mod:`fnmatch`-style patterns. + * :py3r:`re` -- match lines using :mod:`re`-style patterns. - This option only affects non-compiled patterns. Unless stated otheriwse, + This option only affects non-compiled patterns. Unless stated otherwise, matching is performed on compiled patterns by :meth:`re.Pattern.match`. """ @@ -239,7 +242,7 @@ class OptionsHolder: prune=(), ignore=None, ops=('strip', 'check', 'compress', 'unique', 'prune', 'filter'), - flavor='none', + flavor='literal', ) """The supported options specifications and their default values. From acdc53f7cccd27f3b5b3d2faf50e88f478b70127 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 28 Apr 2024 14:23:29 +0200 Subject: [PATCH 64/66] Update API according to comments --- sphinx/testing/matcher/__init__.py | 395 +++++++++++++++++++++++------ 1 file changed, 315 insertions(+), 80 deletions(-) diff --git a/sphinx/testing/matcher/__init__.py b/sphinx/testing/matcher/__init__.py index 4ef33cade36..84e17945f85 100644 --- a/sphinx/testing/matcher/__init__.py +++ b/sphinx/testing/matcher/__init__.py @@ -6,28 +6,79 @@ import contextlib import re -from typing import TYPE_CHECKING, cast +from collections.abc import Sequence, Set +from typing import TYPE_CHECKING, Union, cast, final from sphinx.testing.matcher import _engine, _util, cleaner +from sphinx.testing.matcher._util import BlockPattern, LinePattern from sphinx.testing.matcher.buffer import Block from sphinx.testing.matcher.options import Options, OptionsHolder if TYPE_CHECKING: - from collections.abc import Iterable, Iterator, Sequence, Set + from collections.abc import Iterable, Iterator from io import StringIO from typing import Any, ClassVar, Literal - from typing_extensions import Self, Unpack + from typing_extensions import Self, TypeAlias, Unpack - from sphinx.testing.matcher._util import BlockPattern, LinePattern, PatternLike, Patterns + from sphinx.testing.matcher._util import PatternLike, Patterns from sphinx.testing.matcher.buffer import Line, Region from sphinx.testing.matcher.options import CompleteOptions, Flavor - _RegionType = Literal['line', 'block'] + _RegionType: TypeAlias = Literal['line', 'block'] + +LineSet: TypeAlias = Union[LinePattern, Set[LinePattern], Sequence[LinePattern]] +"""One or more valid lines to find. + +Non-compiled patterns are compiled according to the matcher's flavor. +""" + +BlockLike: TypeAlias = Union[str, BlockPattern] +"""A pattern for a block to find. + +A non-compiled pattern is compiled according to :class:`LineMatcher`'s flavor, +or the flavor of methods such as :meth:`LineMatcher.assert_block_literal`. +""" class LineMatcher(OptionsHolder): - """Helper object for matching output lines.""" + r"""Helper object for matching output lines. + + Matching output lines is achieved by matching against compiled regular + expressions, i.e., :class:`~re.Pattern` objects, e.g.,: + + >>> matcher = LineMatcher.from_lines(('Line 1', 'Line 2.0', r'Line \d+')) + >>> matcher.find(re.compile(r'Line \d+')) + ('Line 1', 'Line 2.0') + + The interface also supports non-compiled :class:`str` expressions, which + are interpreted according to the matcher :attr:`~.OptionsHolder.flavor`. + + The default flavor is :py3r:`literal`, meaning that such expressions are + escaped via :func:`re.escape` before being compiled into *exact match* + patterns, i.e., literal strings are assumed to span the entire line, + and thus are prefixed with :py3r:`\A` and :py3r:`\Z` meta-characters: + + >>> matcher.find('Line 1') + ('Line 1',) + >>> matcher.find('Line 2') + () + >>> matcher.find('Line 2.0') + ('Line 2.0',) + >>> matcher.find(r'Line \d+') + ('Line \\d+',) + + A useful flavor is :py3r:`re` which compiles :class:`str` expressions into + patterns *without* escaping them first or adding meta-characters. + + >>> matcher.find(r'Line \w+', flavor='re') + ('Line 1', 'Line 2.0') + + For some users, it might also be useful to support :mod:`fnmatch`-style + patterns described by the :py3r:`fnmatch` flavor and where strings are + translated into :mod:`fnmatch` patterns via :func:`fnmatch.translate` + but allowed to be + """ __slots__ = ('__content', '__stack') @@ -51,16 +102,23 @@ def from_lines(cls, lines: Iterable[str] = (), /, **options: Unpack[Options]) -> This is typically useful when writing tests for :class:`LineMatcher` since writing the lines instead of a long string is usually cleaner. - The lines are glued together according to *keep_break* (the default - value is specified by :attr:`default_options`), e.g.,:: - - text = 'foo\nbar' - - lines = text.splitlines() - LineMatcher.from_lines(lines) == LineMatcher(text) - - lines = text.splitlines(True) - LineMatcher.from_lines(lines, keep_break=True) == LineMatcher(text) + The lines are glued together depending on the :py3r:`keep_break` + option, whose default value is given by :attr:`default_options`: + + >>> text = 'foo\nbar' + >>> m1 = LineMatcher(text) + >>> m2 = LineMatcher.from_lines(text.splitlines()) + >>> m2.lines() == m1.lines() + True + >>> m2.lines() + ('foo', 'bar') + >>> + >>> m1 = LineMatcher(text, keep_break=True) + >>> m2 = LineMatcher.from_lines(text.splitlines(True), keep_break=True) + >>> m1.lines() == m2.lines() + True + >>> m1.lines() + ('foo\n', 'bar') """ keep_break = options.get('keep_break', cls.default_options['keep_break']) glue = '' if keep_break else '\n' @@ -120,23 +178,8 @@ def lines(self) -> Block: stack[-1] = cached = Block(lines, _check=False) return cached - def find( - self, - patterns: LinePattern | Set[LinePattern] | Sequence[LinePattern], - /, - *, - flavor: Flavor | None = None, - ) -> tuple[Line, ...]: - """Same as :meth:`iterfind` but returns a sequence of lines.""" - # use tuple to preserve immutability - return tuple(self.iterfind(patterns, flavor=flavor)) - def iterfind( - self, - patterns: LinePattern | Set[LinePattern] | Sequence[LinePattern], - /, - *, - flavor: Flavor | None = None, + self, patterns: LineSet, /, *, flavor: Flavor | None = None ) -> Iterator[Line]: """Yield the lines that match one (or more) of the given patterns. @@ -147,7 +190,24 @@ def iterfind( matcher.iterfind('line to find', ...) matcher.iterfind(['line to find'], ...) + + For simple usages, consider using the following flavor-binding aliases: + + .. default-role:: py3r + + +-----------+---------------------------+ + | Flavor | Alias | + +===========+===========================+ + | `literal` | :meth:`iterfind_literal` | + +-----------+---------------------------+ + | `re` | :meth:`iterfind_matching` | + +-----------+---------------------------+ + + .. default-role:: + + .. seealso:: :attr:`Options.flavor ` """ + # make sure that the patterns are correctly normalized patterns = _engine.to_line_patterns(patterns) if not patterns: # nothinig to match return @@ -164,14 +224,33 @@ def predicate(line: Line) -> bool: yield from filter(predicate, self) - def find_blocks( - self, pattern: str | BlockPattern, /, *, flavor: Flavor | None = None - ) -> tuple[Block, ...]: - """Same as :meth:`iterfind_blocks` but returns a sequence of blocks.""" - return tuple(self.iterfind_blocks(pattern, flavor=flavor)) + @final + def iterfind_literal(self, lines: LineSet, /) -> Iterator[Line]: + """Partialization of :meth:`iterfind` for the :py3r:`literal` flavor.""" + return self.iterfind(lines, flavor='literal') + + @final + def iterfind_matching(self, lines: LineSet) -> Iterator[Line]: + """Partialization of :meth:`iterfind` for the :py3r:`re` flavor.""" + return self.iterfind(lines, flavor='re') + + def find(self, patterns: LineSet, /, *, flavor: Flavor | None = None) -> tuple[Line, ...]: + """Same as :meth:`iterfind` but returns a sequence of lines.""" + # use tuple to preserve immutability + return tuple(self.iterfind(patterns, flavor=flavor)) + + @final + def find_literal(self, lines: LineSet, /) -> tuple[Line, ...]: + """Partialization of :meth:`find` for the :py3r:`literal` flavor.""" + return self.find(lines, flavor='literal') + + @final + def find_matching(self, lines: LineSet) -> tuple[Line, ...]: + """Partialization of :meth:`find` for the :py3r:`re` flavor.""" + return self.find(lines, flavor='re') def iterfind_blocks( - self, patterns: str | BlockPattern, /, *, flavor: Flavor | None = None + self, patterns: BlockLike, /, *, flavor: Flavor | None = None ) -> Iterator[Block]: r"""Yield non-overlapping blocks matching the given line patterns. @@ -183,10 +262,26 @@ def iterfind_blocks( matcher.iterfind_blocks('line1\nline2', ...) matcher.iterfind_blocks(['line1', 'line2'], ...) + + For simple usages, consider using the following flavor-binding aliases: + + .. default-role:: py3r + + +-----------+----------------------------------+ + | Flavor | Alias | + +===========+==================================+ + | `literal` | :meth:`iterfind_literal_blocks` | + +-----------+----------------------------------+ + | `re` | :meth:`iterfind_matching_blocks` | + +-----------+----------------------------------+ + + .. default-role:: + + .. seealso:: :attr:`Options.flavor ` """ - # in general, the patterns are smaller than the lines - # so we expect the following to be more efficient than - # cleaning up the whole text source + # The number of patterns is usually smaller than the expected lines + # and thus it is more efficient to normalize and count the number of + # patterns rather than cleaning up the entire text source. patterns = _engine.to_block_pattern(patterns) if not patterns: # no pattern to locate return @@ -195,35 +290,74 @@ def iterfind_blocks( if not lines: # no line to match return - if (width := len(patterns)) > len(lines): # too many lines to match + if (blocksize := len(patterns)) > len(lines): # too many lines to match return match_function = re.Pattern.match - compiled_patterns = self.__compile(patterns, flavor=flavor) - block_iterator = enumerate(_util.strict_windowed(lines, width)) + compiled_block = self.__compile(patterns, flavor=flavor) + block_iterator = enumerate(_util.strict_windowed(lines, blocksize)) for start, block in block_iterator: # check if the block matches the patterns line by line - if all(map(match_function, compiled_patterns, block)): + if all(map(match_function, compiled_block, block)): yield Block(block, start, _check=False) # Consume the iterator so that the next block consists # of lines just after the block that was just yielded. # # Note that since the iterator yielded *block*, its # state is already on the "next" line, so we need to - # advance by the block size - 1 only. - _util.consume(block_iterator, width - 1) + # advance the iterator by *blocksize - 1* steps. + _util.consume(block_iterator, blocksize - 1) + + @final + def iterfind_literal_blocks(self, block: BlockLike, /) -> Iterator[Block]: + """Partialization of :meth:`iterfind_blocks` for the :py3r:`literal` flavor.""" + return self.iterfind_blocks(block, flavor='literal') + + @final + def iterfind_matching_blocks(self, block: BlockLike) -> Iterator[Block]: + """Partialization of :meth:`iterfind_blocks` for the :py3r:`re` flavor.""" + return self.iterfind_blocks(block, flavor='re') + + def find_blocks( + self, pattern: BlockLike, /, *, flavor: Flavor | None = None + ) -> tuple[Block, ...]: + """Same as :meth:`iterfind_blocks` but returns a sequence of blocks. + + For simple usages, consider using the following flavor-binding aliases: + + .. default-role:: py3r + + +-----------+------------------------------+ + | Flavor | Alias | + +===========+==============================+ + | `literal` | :meth:`find_literal_blocks` | + +-----------+------------------------------+ + | `re` | :meth:`find_matching_blocks` | + +-----------+------------------------------+ + + .. default-role:: + + .. seealso:: :attr:`Options.flavor ` + """ + # use tuple to preserve immutability + return tuple(self.iterfind_blocks(pattern, flavor=flavor)) + + @final + def find_literal_blocks(self, block: BlockLike, /) -> tuple[Block, ...]: + """Partialization of :meth:`find_blocks` for the :py3r:`literal` flavor.""" + return self.find_blocks(block, flavor='literal') + + @final + def find_matching_blocks(self, block: BlockLike) -> tuple[Block, ...]: + """Partialization of :meth:`find_blocks` for the :py3r:`re` flavor.""" + return self.find_blocks(block, flavor='re') # assert methods - def assert_match( - self, - patterns: LinePattern | Set[LinePattern] | Sequence[LinePattern], - /, - *, - count: int | None = None, - flavor: Flavor | None = None, + def assert_any_of( + self, patterns: LineSet, /, *, count: int | None = None, flavor: Flavor | None = None ) -> None: - """Assert that the number of matching lines for the given patterns. + """Assert the number of matching lines for the given patterns. :param patterns: The patterns deciding whether a line is counted. :param count: If specified, the exact number of matching lines. @@ -231,20 +365,44 @@ def assert_match( By convention, the following are equivalent:: - matcher.assert_match('line to find', ...) - matcher.assert_match(['line to find'], ...) + matcher.assert_any_of('line to find', ...) + matcher.assert_any_of(['line to find'], ...) + + For simple usages, consider using the following flavor-binding aliases: + + .. default-role:: py3r + + +-----------+----------------------------+ + | Flavor | Alias | + +===========+============================+ + | `literal` | :meth:`assert_any_literal` | + +-----------+----------------------------+ + | `re` | :meth:`assert_any_match` | + +-----------+----------------------------+ + + .. default-role:: + + .. seealso:: :attr:`Options.flavor ` """ + # Normalize the patterns now so that we can have a nice debugging, + # even if `to_line_patterns` is called in `iterfind` (it is a no-op + # the second time). patterns = _engine.to_line_patterns(patterns) lines = self.iterfind(patterns, flavor=flavor) self.__assert_found('line', lines, patterns, count, flavor) - def assert_no_match( - self, - patterns: LinePattern | Set[LinePattern] | Sequence[LinePattern], - /, - *, - context: int = 3, - flavor: Flavor | None = None, + @final + def assert_any_literal(self, lines: LineSet, /, *, count: int | None = None) -> None: + """Partialization of :meth:`assert_any_of` for the :py3r:`literal` flavor.""" + return self.assert_any_of(lines, count=count, flavor='literal') + + @final + def assert_any_match(self, lines: LineSet, /, *, count: int | None = None) -> None: + """Partialization of :meth:`assert_any_of` for the :py3r:`re` flavor.""" + return self.assert_any_of(lines, count=count, flavor='re') + + def assert_none_of( + self, patterns: LineSet, /, *, context: int = 3, flavor: Flavor | None = None ) -> None: """Assert that there exist no matching line for the given patterns. @@ -254,20 +412,44 @@ def assert_no_match( By convention, the following are equivalent:: - matcher.assert_no_match('some bad line', ...) - matcher.assert_no_match(['some bad line'], ...) + matcher.assert_none_of('some bad line', ...) + matcher.assert_none_of(['some bad line'], ...) + + For simple usages, consider using the following flavor-binding aliases: + + .. default-role:: py3r + + +-----------+---------------------------+ + | Flavor | Alias | + +===========+===========================+ + | `literal` | :meth:`assert_no_literal` | + +-----------+---------------------------+ + | `re` | :meth:`assert_no_match` | + +-----------+---------------------------+ + + .. default-role:: + + .. seealso:: :attr:`Options.flavor ` """ + # Normalize the patterns now so that we can have a nice debugging, + # even if `to_line_patterns` is called in `iterfind` (it is a no-op + # the second time). if patterns := _engine.to_line_patterns(patterns): lines = self.iterfind(patterns, flavor=flavor) self.__assert_not_found('line', lines, patterns, context, flavor) + @final + def assert_no_literal(self, lines: LineSet, /, *, context: int = 3) -> None: + """Partialization of :meth:`assert_no_match` for the :py3r:`literal` flavor.""" + return self.assert_none_of(lines, context=context, flavor='literal') + + @final + def assert_no_match(self, lines: LineSet, /, *, context: int = 3) -> None: + """Partialization of :meth:`assert_no_match` for the :py3r:`re` flavor.""" + return self.assert_none_of(lines, context=context, flavor='re') + def assert_block( - self, - pattern: str | BlockPattern, - /, - *, - count: int | None = None, - flavor: Flavor | None = None, + self, pattern: BlockLike, /, *, count: int | None = None, flavor: Flavor | None = None ) -> None: r"""Assert that the number of matching blocks for the given patterns. @@ -279,18 +461,42 @@ def assert_block( matcher.assert_block('line1\nline2', ...) matcher.assert_block(['line1', 'line2'], ...) + + For simple usages, consider using the following flavor-binding aliases: + + .. default-role:: py3r + + +-----------+-------------------------------+ + | Flavor | Alias | + +===========+===============================+ + | `literal` | :meth:`assert_literal_block` | + +-----------+-------------------------------+ + | `re` | :meth:`assert_matching_block` | + +-----------+-------------------------------+ + + .. default-role:: + + .. seealso:: :attr:`Options.flavor ` """ + # Normalize the patterns now so that we can have a nice debugging, + # even if `to_block_pattern` is called in `iterfind` (it is a no-op + # the second time). patterns = _engine.to_block_pattern(pattern) blocks = self.iterfind_blocks(patterns, flavor=flavor) self.__assert_found('block', blocks, patterns, count, flavor) + @final + def assert_literal_block(self, block: BlockLike, /, *, count: int | None = None) -> None: + """Partialization of :meth:`assert_block` for the :py3r:`literal` flavor.""" + return self.assert_block(block, count=count, flavor='literal') + + @final + def assert_matching_block(self, block: BlockLike, /, *, count: int | None = None) -> None: + """Partialization of :meth:`assert_block` for the :py3r:`re` flavor.""" + return self.assert_block(block, count=count, flavor='re') + def assert_no_block( - self, - pattern: str | BlockPattern, - /, - *, - context: int = 3, - flavor: Flavor | None = None, + self, pattern: str | BlockPattern, /, *, context: int = 3, flavor: Flavor | None = None ) -> None: r"""Assert that there exist no matching blocks for the given patterns. @@ -302,11 +508,42 @@ def assert_no_block( matcher.assert_no_block('line1\nline2', ...) matcher.assert_no_block(['line1', 'line2'], ...) + + For simple usages, consider using the following flavor-binding aliases: + + .. default-role:: py3r + + +-----------+----------------------------------+ + | Flavor | Alias | + +===========+==================================+ + | `literal` | :meth:`assert_no_literal_block` | + +-----------+----------------------------------+ + | `re` | :meth:`assert_no_matching_block` | + +-----------+----------------------------------+ + + .. default-role:: + + .. seealso:: :attr:`Options.flavor ` """ + # Normalize the patterns now so that we can have a nice debugging, + # even if `to_block_pattern` is called in `iterfind` (it is a no-op + # the second time). if patterns := _engine.to_block_pattern(pattern): blocks = self.iterfind_blocks(patterns, flavor=flavor) self.__assert_not_found('block', blocks, patterns, context, flavor) + @final + def assert_no_literal_block(self, block: BlockLike, /, *, context: int = 3) -> None: + """Partialization of :meth:`assert_no_block` for the :py3r:`literal` flavor.""" + return self.assert_no_block(block, context=context, flavor='literal') + + @final + def assert_no_matching_block(self, block: BlockLike, /, *, context: int = 3) -> None: + """Partialization of :meth:`assert_no_block` for the :py3r:`re` flavor.""" + return self.assert_no_block(block, context=context, flavor='re') + + # private + def __assert_found( self, typ: _RegionType, # the region's type @@ -361,7 +598,5 @@ def __compile(self, patterns: Iterable[PatternLike], flavor: Flavor | None) -> P return _engine.compile(patterns, flavor=flavor) def __get_clean_lines(self) -> tuple[str, ...]: - # use a complete set of options so that the default - # that were chosen by cleaner.clean() are ignored options = cast(Options, self.complete_options) return tuple(cleaner.clean(self.content, **options)) From 35fe86bfcde3901ca1cdddbbe2527d01eeae01b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 28 Apr 2024 14:23:38 +0200 Subject: [PATCH 65/66] update tests --- tests/test_testing/test_matcher.py | 42 +++++++++++----------- tests/test_testing/test_matcher_cleaner.py | 21 ++++++++++- tests/test_testing/test_matcher_engine.py | 16 ++++----- tests/test_testing/test_matcher_options.py | 5 +-- 4 files changed, 52 insertions(+), 32 deletions(-) diff --git a/tests/test_testing/test_matcher.py b/tests/test_testing/test_matcher.py index 6be805f810d..9cc624aa724 100644 --- a/tests/test_testing/test_matcher.py +++ b/tests/test_testing/test_matcher.py @@ -187,13 +187,13 @@ def test_matcher_cache(): @pytest.mark.parametrize( ('lines', 'flavor', 'pattern', 'expect'), [ - ([], 'none', [], []), - (['a'], 'none', '', []), - (['a'], 'none', [], []), - (['1', 'b', '3', 'a', '5', '!'], 'none', ('a', 'b'), [('b', 1), ('a', 3)]), + ([], 'literal', [], []), + (['a'], 'literal', '', []), + (['a'], 'literal', [], []), + (['1', 'b', '3', 'a', '5', '!'], 'literal', ('a', 'b'), [('b', 1), ('a', 3)]), (['blbl', 'yay', 'hihi', '^o^'], 'fnmatch', '*[ao]*', [('yay', 1), ('^o^', 3)]), (['111', 'hello', 'world', '222'], 're', r'\d+', [('111', 0), ('222', 3)]), - (['hello', 'world', 'yay'], 'none', {'hello', 'yay'}, [('hello', 0), ('yay', 2)]), + (['hello', 'world', 'yay'], 'literal', {'hello', 'yay'}, [('hello', 0), ('yay', 2)]), (['hello', 'world', 'yay'], 'fnmatch', {'hello', 'y*y'}, [('hello', 0), ('yay', 2)]), (['hello', 'world', 'yay'], 're', {'hello', r'^y\wy$'}, [('hello', 0), ('yay', 2)]), ], @@ -207,7 +207,7 @@ def test_matcher_find( matcher = LineMatcher.from_lines(lines, flavor=flavor) assert matcher.find(pattern) == tuple(expect) - matcher = LineMatcher.from_lines(lines, flavor='none') + matcher = LineMatcher.from_lines(lines, flavor='literal') assert matcher.find(pattern, flavor=flavor) == tuple(expect) @@ -233,20 +233,20 @@ def test_matcher_find_blocks(): def test_assert_match(): matcher = LineMatcher.from_lines(['a', 'b', 'c', 'd']) - matcher.assert_match('.+', flavor='re') - matcher.assert_match('[abcd]', flavor='fnmatch') + matcher.assert_any_of('.+', flavor='re') + matcher.assert_any_of('[abcd]', flavor='fnmatch') matcher = LineMatcher('') with pytest.raises(AssertionError, match=r'(?s:.+not found in.+)'): - matcher.assert_match('.+', flavor='re') + matcher.assert_any_of('.+', flavor='re') matcher = LineMatcher('') with pytest.raises(AssertionError, match=r'(?s:.+not found in.+)'): - matcher.assert_match('.*', flavor='re') + matcher.assert_any_of('.*', flavor='re') matcher = LineMatcher.from_lines(['\n']) assert matcher.lines() == [''] - matcher.assert_match('.*', flavor='re') + matcher.assert_any_of('.*', flavor='re') @pytest.mark.parametrize( @@ -276,15 +276,15 @@ def test_assert_match_debug(lines, pattern, flavor, expect): matcher = LineMatcher.from_lines(lines) with pytest.raises(AssertionError) as exc_info: - matcher.assert_match(pattern, flavor=flavor) + matcher.assert_any_of(pattern, flavor=flavor) assert parse_excinfo(exc_info) == expect def test_assert_no_match(): matcher = LineMatcher.from_lines(['a', 'b', 'c', 'd']) - matcher.assert_no_match(r'\d+', flavor='re') - matcher.assert_no_match('[1-9]', flavor='fnmatch') + matcher.assert_none_of(r'\d+', flavor='re') + matcher.assert_none_of('[1-9]', flavor='fnmatch') @pytest.mark.parametrize( @@ -316,7 +316,7 @@ def test_assert_no_match_debug(lines, pattern, flavor, context, expect): matcher = LineMatcher.from_lines(lines) with pytest.raises(AssertionError) as exc_info: - matcher.assert_no_match(pattern, context=context, flavor=flavor) + matcher.assert_none_of(pattern, context=context, flavor=flavor) assert parse_excinfo(exc_info) == expect @@ -330,14 +330,14 @@ def test_assert_block_coverage(maxsize, start, count, dedup): matcher = LineMatcher(source.text) # the main block is matched exactly once - matcher.assert_block(source.main, count=1, flavor='none') + matcher.assert_block(source.main, count=1, flavor='literal') assert source.base * source.ncopy == source.main - matcher.assert_block(source.base, count=source.ncopy, flavor='none') + matcher.assert_block(source.base, count=source.ncopy, flavor='literal') for subidx in range(1, count + 1): # check that the sub-blocks are matched correctly subblock = [Source.block_line(start + i) for i in range(subidx)] - matcher.assert_block(subblock, count=source.ncopy, flavor='none') + matcher.assert_block(subblock, count=source.ncopy, flavor='literal') @pytest.mark.parametrize( @@ -407,7 +407,7 @@ def test_assert_block_coverage(maxsize, start, count, dedup): ], ) def test_assert_block_debug(lines, pattern, count, expect): - matcher = LineMatcher.from_lines(lines, flavor='none') + matcher = LineMatcher.from_lines(lines, flavor='literal') if expect is None: matcher.assert_block(pattern, count=count) @@ -430,7 +430,7 @@ def test_assert_no_block_coverage(maxsize, start, count, dedup): # 'maxsize' might be smaller than start + (dedup + 1) * count # but it is fine since stop indices are clamped internally source = Source(maxsize, start, count, dedup=dedup) - matcher = LineMatcher(source.text, flavor='none') + matcher = LineMatcher(source.text, flavor='literal') with pytest.raises(AssertionError) as exc_info: matcher.assert_no_block(source.main, context=0) @@ -515,7 +515,7 @@ def test_assert_no_block_debug_coverage( maxsize, start, count, dedup, omit_prev, omit_next, context_size ): source = Source(maxsize, start, count, dedup=dedup) - matcher = LineMatcher(source.text, flavor='none') + matcher = LineMatcher(source.text, flavor='literal') with pytest.raises(AssertionError) as exc_info: matcher.assert_no_block(source.main, context=context_size) diff --git a/tests/test_testing/test_matcher_cleaner.py b/tests/test_testing/test_matcher_cleaner.py index 06750274ba3..1a6d4ffa3c9 100644 --- a/tests/test_testing/test_matcher_cleaner.py +++ b/tests/test_testing/test_matcher_cleaner.py @@ -2,11 +2,13 @@ import re from typing import TYPE_CHECKING +from unittest import mock import pytest from sphinx.testing.matcher import cleaner -from sphinx.testing.matcher.options import Options +from sphinx.testing.matcher._cleaner import HandlerMap, make_handlers +from sphinx.testing.matcher.options import OpCode, Options if TYPE_CHECKING: from collections.abc import Sequence @@ -15,6 +17,23 @@ from sphinx.testing.matcher.cleaner import TraceInfo +def test_implementation_details(): + # expected and supported operation codes + expect = sorted(getattr(OpCode, '__args__', [])) + qualname = f'{HandlerMap.__module__}.{HandlerMap.__name__}' + assert expect, f'{qualname}: invalid literal type: {OpCode}' + + # ensure that the typed dictionary is synchronized + actual = sorted(HandlerMap.__annotations__.keys()) + qualname = f'{HandlerMap.__module__}.{HandlerMap.__name__}' + assert actual == expect, f'invalid operation codes in: {qualname!r}' + + handlers = make_handlers(mock.Mock()) + assert isinstance(handlers, dict) + actual = sorted(handlers.keys()) + assert actual == expect, 'invalid factory function' + + def test_strip_chars(): assert cleaner.strip_chars('abaaa\n') == 'abaaa' assert cleaner.strip_chars('abaaa\n', False) == 'abaaa\n' diff --git a/tests/test_testing/test_matcher_engine.py b/tests/test_testing/test_matcher_engine.py index f2f1dbdbd50..d1f76a2cddb 100644 --- a/tests/test_testing/test_matcher_engine.py +++ b/tests/test_testing/test_matcher_engine.py @@ -63,25 +63,25 @@ def test_translate_expressions(): string, compiled = 'a*', re.compile('.*') patterns = (string, compiled) - assert [*engine.translate(patterns, flavor='none')] == [r'\Aa\*\Z', compiled] - assert [*engine.translate(patterns, flavor='re')] == [string, compiled] + assert [*engine.translate(patterns, flavor='literal')] == [r'\Aa\*\Z', compiled] assert [*engine.translate(patterns, flavor='fnmatch')] == [r'(?s:a.*)\Z', compiled] + assert [*engine.translate(patterns, flavor='re')] == [string, compiled] expect, func = [string.upper(), compiled], str.upper - assert [*engine.translate(patterns, flavor='none', escape=func)] == expect - assert [*engine.translate(patterns, flavor='re', regular_translate=func)] == expect + assert [*engine.translate(patterns, flavor='literal', escape=func)] == expect assert [*engine.translate(patterns, flavor='fnmatch', fnmatch_translate=func)] == expect + assert [*engine.translate(patterns, flavor='re', regular_translate=func)] == expect def test_compile_patterns(): string, compiled = 'a*', re.compile('.*') patterns = (string, compiled) - assert engine.compile(patterns, flavor='none') == (re.compile(r'\Aa\*\Z'), compiled) - assert engine.compile(patterns, flavor='re') == (re.compile(string), compiled) + assert engine.compile(patterns, flavor='literal') == (re.compile(r'\Aa\*\Z'), compiled) assert engine.compile(patterns, flavor='fnmatch') == (re.compile(r'(?s:a.*)\Z'), compiled) + assert engine.compile(patterns, flavor='re') == (re.compile(string), compiled) expect = (re.compile('A*'), compiled) - assert engine.compile(patterns, flavor='none', escape=str.upper) == expect - assert engine.compile(patterns, flavor='re', regular_translate=str.upper) == expect + assert engine.compile(patterns, flavor='literal', escape=str.upper) == expect assert engine.compile(patterns, flavor='fnmatch', fnmatch_translate=str.upper) == expect + assert engine.compile(patterns, flavor='re', regular_translate=str.upper) == expect diff --git a/tests/test_testing/test_matcher_options.py b/tests/test_testing/test_matcher_options.py index 30c8bd67a76..0a322604ee3 100644 --- a/tests/test_testing/test_matcher_options.py +++ b/tests/test_testing/test_matcher_options.py @@ -13,7 +13,8 @@ from sphinx.testing.matcher.options import _OPTION -def test_options_class(): +def test_options_type_implementation_details(): + """Test total and non-total synchronized typed dictionaries.""" assert len(Options.__annotations__) > 0, 'missing annotations' # ensure that the classes are kept synchronized @@ -50,7 +51,7 @@ def check(option: _OPTION, default: object) -> None: check('ignore', None) check('ops', ('strip', 'check', 'compress', 'unique', 'prune', 'filter')) - check('flavor', 'none') + check('flavor', 'literal') # check that there are no leftover options assert sorted(processed) == sorted(Options.__annotations__) From 466bcc3eca9c698842fb71912d4bc96e9483d0ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 28 Apr 2024 14:23:45 +0200 Subject: [PATCH 66/66] remove unused code --- sphinx/testing/matcher/_codes.py | 66 -------------------------------- 1 file changed, 66 deletions(-) delete mode 100644 sphinx/testing/matcher/_codes.py diff --git a/sphinx/testing/matcher/_codes.py b/sphinx/testing/matcher/_codes.py deleted file mode 100644 index 806cd3b7243..00000000000 --- a/sphinx/testing/matcher/_codes.py +++ /dev/null @@ -1,66 +0,0 @@ -from __future__ import annotations - -import functools -import itertools -from typing import TYPE_CHECKING, TypedDict, final - -from sphinx.testing.matcher._util import unique_everseen, unique_justseen - -if TYPE_CHECKING: - from collections.abc import Callable, Iterable - - from sphinx.testing.matcher.options import OpCode, OptionsHolder - - DispatcherFunc = Callable[[Iterable[str]], Iterable[str]] - - -@final -class DispatcherMap(TypedDict): - # Whenever a new operation code is supported, do not forget to - # update :func:`get_dispatcher_map` and :func.`get_active_opcodes`. - strip: DispatcherFunc - check: DispatcherFunc - compress: DispatcherFunc - unique: DispatcherFunc - prune: DispatcherFunc - filter: DispatcherFunc - - -def get_dispatcher_map( - options: OptionsHolder, - # here, we pass the functions so that we do not need to import them - strip_lines: DispatcherFunc, - prune_lines: DispatcherFunc, -) -> DispatcherMap: - return { - 'strip': strip_lines, - 'check': functools.partial(filter, None), - 'compress': unique_justseen, - 'unique': unique_everseen, - 'prune': prune_lines, - 'filter': functools.partial(itertools.filterfalse, options.ignore), - } - - -def get_active_opcodes(options: OptionsHolder) -> Iterable[OpCode]: - disable: set[OpCode] = set() - - if options.strip_line is False: - disable.add('strip') - - if options.keep_empty: - disable.add('check') - - if not options.compress: - disable.add('compress') - - if not options.unique: - disable.add('unique') - - if not isinstance(prune_patterns := options.prune, str) and not prune_patterns: - disable.add('prune') - - if not callable(options.ignore): - disable.add('filter') - - return itertools.filterfalse(disable.__contains__, options.ops)