Skip to content

Commit

Permalink
[console] enhance detection and elimination of known ANSI escape sequ…
Browse files Browse the repository at this point in the history
…ences (#12216)

This PR improves the logic for detecting and eliminating ANSI color codes and other escape sequences introduced by Sphinx. ANSI escape sequences that are not natively known to Sphinx are not eliminated (e.g., VT100-specific functions).
  • Loading branch information
picnixz committed Apr 4, 2024
1 parent df3cde6 commit f7a1397
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 18 deletions.
4 changes: 2 additions & 2 deletions sphinx/util/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import TYPE_CHECKING

from sphinx.util.console import _strip_escape_sequences
from sphinx.util.console import strip_escape_sequences

if TYPE_CHECKING:
from typing import Protocol
Expand All @@ -25,7 +25,7 @@ def __init__(

def write(self, text: str, /) -> None:
self.stream_term.write(text)
self.stream_file.write(_strip_escape_sequences(text))
self.stream_file.write(strip_escape_sequences(text))

def flush(self) -> None:
if hasattr(self.stream_term, 'flush'):
Expand Down
62 changes: 48 additions & 14 deletions sphinx/util/console.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,25 @@ def turquoise(text: str) -> str: ... # NoQA: E704
except ImportError:
colorama = None

_CSI: Final[str] = re.escape('\x1b[') # 'ESC [': Control Sequence Introducer

_CSI = re.escape('\x1b[') # 'ESC [': Control Sequence Introducer
_ansi_re: re.Pattern[str] = re.compile(
# Pattern matching ANSI control sequences containing colors.
_ansi_color_re: Final[re.Pattern[str]] = re.compile(r'\x1b\[(?:\d+;){0,2}\d*m')

_ansi_re: Final[re.Pattern[str]] = re.compile(
_CSI
+ r"""
(
(\d\d;){0,2}\d\dm # ANSI colour code
(?:
(?:\d+;){0,2}\d*m # ANSI color code ('m' is equivalent to '0m')
|
\dK # ANSI Erase in Line
[012]?K # ANSI Erase in Line ('K' is equivalent to '0K')
)""",
re.VERBOSE | re.ASCII,
)
_ansi_color_re: Final[re.Pattern[str]] = re.compile('\x1b.*?m')
"""Pattern matching ANSI CSI colors (SGR) and erase line (EL) sequences.
See :func:`strip_escape_sequences` for details.
"""

codes: dict[str, str] = {}

Expand All @@ -80,7 +86,7 @@ def term_width_line(text: str) -> str:
return text + '\n'
else:
# codes are not displayed, this must be taken into account
return text.ljust(_tw + len(text) - len(_ansi_re.sub('', text))) + '\r'
return text.ljust(_tw + len(text) - len(strip_escape_sequences(text))) + '\r'


def color_terminal() -> bool:
Expand Down Expand Up @@ -128,11 +134,39 @@ def escseq(name: str) -> str:


def strip_colors(s: str) -> str:
"""Remove the ANSI color codes in a string *s*.
.. caution::
This function is not meant to be used in production and should only
be used for testing Sphinx's output messages.
.. seealso:: :func:`strip_escape_sequences`
"""
return _ansi_color_re.sub('', s)


def _strip_escape_sequences(s: str) -> str:
return _ansi_re.sub('', s)
def strip_escape_sequences(text: str, /) -> str:
r"""Remove the ANSI CSI colors and "erase in line" sequences.
Other `escape sequences `__ (e.g., VT100-specific functions) are not
supported and only control sequences *natively* known to Sphinx (i.e.,
colors declared in this module and "erase entire line" (``'\x1b[2K'``))
are eliminated by this function.
.. caution::
This function is not meant to be used in production and should only
be used for testing Sphinx's output messages that were not tempered
with by third-party extensions.
.. versionadded:: 7.3
This function is added as an *experimental* feature.
__ https://en.wikipedia.org/wiki/ANSI_escape_code
"""
return _ansi_re.sub('', text)


def create_color_func(name: str) -> None:
Expand All @@ -151,8 +185,8 @@ def inner(text: str) -> str:
'blink': '05m',
}

for _name, _value in _attrs.items():
codes[_name] = '\x1b[' + _value
for __name, __value in _attrs.items():
codes[__name] = '\x1b[' + __value

_colors = [
('black', 'darkgray'),
Expand All @@ -165,9 +199,9 @@ def inner(text: str) -> str:
('lightgray', 'white'),
]

for i, (dark, light) in enumerate(_colors, 30):
codes[dark] = '\x1b[%im' % i
codes[light] = '\x1b[%im' % (i + 60)
for __i, (__dark, __light) in enumerate(_colors, 30):
codes[__dark] = '\x1b[%im' % __i
codes[__light] = '\x1b[%im' % (__i + 60)

_orig_codes = codes.copy()

Expand Down
4 changes: 2 additions & 2 deletions sphinx/util/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import TYPE_CHECKING

from sphinx.errors import SphinxParallelError
from sphinx.util.console import _strip_escape_sequences
from sphinx.util.console import strip_escape_sequences

if TYPE_CHECKING:
from sphinx.application import Sphinx
Expand All @@ -31,7 +31,7 @@ def save_traceback(app: Sphinx | None, exc: BaseException) -> str:
last_msgs = exts_list = ''
else:
extensions = app.extensions.values()
last_msgs = '\n'.join(f'# {_strip_escape_sequences(s).strip()}'
last_msgs = '\n'.join(f'# {strip_escape_sequences(s).strip()}'
for s in app.messagelog)
exts_list = '\n'.join(f'# {ext.name} ({ext.version})' for ext in extensions
if ext.version != 'builtin')
Expand Down
90 changes: 90 additions & 0 deletions tests/test_util/test_util_console.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from __future__ import annotations

import itertools
import operator
from typing import TYPE_CHECKING

import pytest

from sphinx.util.console import blue, reset, strip_colors, strip_escape_sequences

if TYPE_CHECKING:
from collections.abc import Callable, Sequence
from typing import Final, TypeVar

_T = TypeVar('_T')

CURSOR_UP: Final[str] = '\x1b[2A' # ignored ANSI code
ERASE_LINE: Final[str] = '\x1b[2K' # supported ANSI code
TEXT: Final[str] = '\x07 Hello world!'


@pytest.mark.parametrize(
('strip_function', 'ansi_base_blocks', 'text_base_blocks'),
[
(
strip_colors,
# double ERASE_LINE so that the tested strings may have 2 of them
[TEXT, blue(TEXT), reset(TEXT), ERASE_LINE, ERASE_LINE, CURSOR_UP],
# :func:`strip_colors` removes color codes but keeps ERASE_LINE and CURSOR_UP
[TEXT, TEXT, TEXT, ERASE_LINE, ERASE_LINE, CURSOR_UP],
),
(
strip_escape_sequences,
# double ERASE_LINE so that the tested strings may have 2 of them
[TEXT, blue(TEXT), reset(TEXT), ERASE_LINE, ERASE_LINE, CURSOR_UP],
# :func:`strip_escape_sequences` strips ANSI codes known by Sphinx
[TEXT, TEXT, TEXT, '', '', CURSOR_UP],
),
],
ids=[strip_colors.__name__, strip_escape_sequences.__name__],
)
def test_strip_ansi(
strip_function: Callable[[str], str],
ansi_base_blocks: Sequence[str],
text_base_blocks: Sequence[str],
) -> None:
assert callable(strip_function)
assert len(text_base_blocks) == len(ansi_base_blocks)
N = len(ansi_base_blocks)

def next_ansi_blocks(choices: Sequence[str], n: int) -> Sequence[str]:
# Get a list of *n* words from a cyclic sequence of *choices*.
#
# For instance ``next_ansi_blocks(['a', 'b'], 3) == ['a', 'b', 'a']``.
stream = itertools.cycle(choices)
return list(map(operator.itemgetter(0), zip(stream, range(n))))

# generate all permutations of length N
for sigma in itertools.permutations(range(N), N):
# apply the permutation on the blocks with ANSI codes
ansi_blocks = list(map(ansi_base_blocks.__getitem__, sigma))
# apply the permutation on the blocks with stripped codes
text_blocks = list(map(text_base_blocks.__getitem__, sigma))

for glue, n in itertools.product(['.', '\n', '\r\n'], range(4 * N)):
ansi_strings = next_ansi_blocks(ansi_blocks, n)
text_strings = next_ansi_blocks(text_blocks, n)
assert len(ansi_strings) == len(text_strings) == n

ansi_string = glue.join(ansi_strings)
text_string = glue.join(text_strings)
assert strip_function(ansi_string) == text_string


def test_strip_ansi_short_forms():
# In Sphinx, we always "normalize" the color codes so that they
# match "\x1b\[(\d\d;){0,2}(\d\d)m" but it might happen that
# some messages use '\x1b[0m' instead of ``reset(s)``, so we
# test whether this alternative form is supported or not.

for strip_function in [strip_colors, strip_escape_sequences]:
# \x1b[m and \x1b[0m are equivalent to \x1b[00m
assert strip_function('\x1b[m') == ''
assert strip_function('\x1b[0m') == ''

# \x1b[1m is equivalent to \x1b[01m
assert strip_function('\x1b[1mbold\x1b[0m') == 'bold'

# \x1b[K is equivalent to \x1b[0K
assert strip_escape_sequences('\x1b[K') == ''

0 comments on commit f7a1397

Please sign in to comment.