diff --git a/CHANGELOG.md b/CHANGELOG.md index f3819a781..f3e81f724 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Workaround for edge case of object from Faiss with no `__class__` https://github.com/Textualize/rich/issues/1838 +- Add `Syntax.guess_lexer`, add support for more lexers (e.g. Django templates etc.) https://github.com/Textualize/rich/pull/1869 ### Added diff --git a/rich/syntax.py b/rich/syntax.py index 759351907..e912daaa1 100644 --- a/rich/syntax.py +++ b/rich/syntax.py @@ -1,9 +1,9 @@ import os.path import platform -from rich.containers import Lines import textwrap from abc import ABC, abstractmethod -from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type, Union +from os import PathLike +from typing import Any, AnyStr, Dict, Iterable, List, Optional, Set, Tuple, Type, Union from pygments.lexer import Lexer from pygments.lexers import get_lexer_by_name, guess_lexer_for_filename @@ -23,6 +23,8 @@ ) from pygments.util import ClassNotFound +from rich.containers import Lines + from ._loop import loop_first from .color import Color, blend_rgb from .console import Console, ConsoleOptions, JustifyMethod, RenderResult @@ -200,7 +202,8 @@ class Syntax(JupyterMixin): dedent (bool, optional): Enable stripping of initial whitespace. Defaults to False. line_numbers (bool, optional): Enable rendering of line numbers. Defaults to False. start_line (int, optional): Starting number for line numbers. Defaults to 1. - line_range (Tuple[int, int], optional): If given should be a tuple of the start and end line to render. + line_range (Tuple[int | None, int | None], optional): If given should be a tuple of the start and end line to render. + A value of None in the tuple indicates the range is open in that direction. highlight_lines (Set[int]): A set of line numbers to highlight. code_width: Width of code to render (not including line numbers), or ``None`` to use all available width. tab_size (int, optional): Size of tabs. Defaults to 4. @@ -233,7 +236,7 @@ def __init__( dedent: bool = False, line_numbers: bool = False, start_line: int = 1, - line_range: Optional[Tuple[int, int]] = None, + line_range: Optional[Tuple[Optional[int], Optional[int]]] = None, highlight_lines: Optional[Set[int]] = None, code_width: Optional[int] = None, tab_size: int = 4, @@ -299,22 +302,7 @@ def from_path( with open(path, "rt", encoding=encoding) as code_file: code = code_file.read() - lexer = None - lexer_name = "default" - try: - _, ext = os.path.splitext(path) - if ext: - extension = ext.lstrip(".").lower() - lexer = get_lexer_by_name(extension) - lexer_name = lexer.name - except ClassNotFound: - pass - - if lexer is None: - try: - lexer_name = guess_lexer_for_filename(path, code).name - except ClassNotFound: - pass + lexer_name = cls.guess_lexer(path, code=code) return cls( code, @@ -332,6 +320,48 @@ def from_path( indent_guides=indent_guides, ) + @classmethod + def guess_lexer(cls, path: str, code: Optional[str] = None) -> str: + """Guess the alias of the Pygments lexer to use based on a path and an optional string of code. + If code is supplied, it will use a combination of the code and the filename to determine the + best lexer to use. For example, if the file is ``index.html`` and the file contains Django + templating syntax, then "html+django" will be returned. If the file is ``index.html``, and no + templating language is used, the "html" lexer will be used. If no string of code + is supplied, the lexer will be chosen based on the file extension.. + + Args: + path (AnyStr): The path to the file containing the code you wish to know the lexer for. + code (str, optional): Optional string of code that will be used as a fallback if no lexer + is found for the supplied path. + + Returns: + str: The name of the Pygments lexer that best matches the supplied path/code. + """ + lexer: Optional[Lexer] = None + lexer_name = "default" + if code: + try: + lexer = guess_lexer_for_filename(path, code) + except ClassNotFound: + pass + + if not lexer: + try: + _, ext = os.path.splitext(path) + if ext: + extension = ext.lstrip(".").lower() + lexer = get_lexer_by_name(extension) + except ClassNotFound: + pass + + if lexer: + if lexer.aliases: + lexer_name = lexer.aliases[0] + else: + lexer_name = lexer.name + + return lexer_name + def _get_base_style(self) -> Style: """Get the base style.""" default_style = self._theme.get_background_style() + self.background_style @@ -369,7 +399,9 @@ def lexer(self) -> Optional[Lexer]: return None def highlight( - self, code: str, line_range: Optional[Tuple[int, int]] = None + self, + code: str, + line_range: Optional[Tuple[Optional[int], Optional[int]]] = None, ) -> Text: """Highlight code and return a Text instance. @@ -417,7 +449,7 @@ def tokens_to_spans() -> Iterable[Tuple[str, Optional[Style]]]: """Convert tokens to spans.""" tokens = iter(line_tokenize()) line_no = 0 - _line_start = line_start - 1 + _line_start = line_start - 1 if line_start else 0 # Skip over tokens until line start while line_no < _line_start: @@ -430,7 +462,7 @@ def tokens_to_spans() -> Iterable[Tuple[str, Optional[Style]]]: yield (token, _get_theme_style(token_type)) if token.endswith("\n"): line_no += 1 - if line_no >= line_end: + if line_end and line_no >= line_end: break text.append_tokens(tokens_to_spans()) @@ -513,11 +545,6 @@ def __rich_console__( else self.code_width ) - line_offset = 0 - if self.line_range: - start_line, end_line = self.line_range - line_offset = max(0, start_line - 1) - ends_on_nl = self.code.endswith("\n") code = self.code if ends_on_nl else self.code + "\n" code = textwrap.dedent(code) if self.dedent else code @@ -559,6 +586,10 @@ def __rich_console__( yield from syntax_line return + start_line, end_line = self.line_range or (None, None) + line_offset = 0 + if start_line: + line_offset = max(0, start_line - 1) lines: Union[List[Text], Lines] = text.split("\n", allow_blank=ends_on_nl) if self.line_range: lines = lines[line_offset:end_line] diff --git a/tests/test_syntax.py b/tests/test_syntax.py index 834285ac1..a8ac05b6b 100644 --- a/tests/test_syntax.py +++ b/tests/test_syntax.py @@ -1,17 +1,17 @@ # coding=utf-8 +import os import sys -import os, tempfile +import tempfile import pytest -from .render import render +from pygments.lexers import PythonLexer from rich.panel import Panel from rich.style import Style -from rich.syntax import Syntax, ANSISyntaxTheme, PygmentsSyntaxTheme, Color, Console - -from pygments.lexers import PythonLexer +from rich.syntax import ANSISyntaxTheme, Color, Console, PygmentsSyntaxTheme, Syntax +from .render import render CODE = '''\ def loop_first_last(values: Iterable[T]) -> Iterable[Tuple[bool, bool, T]]: @@ -266,6 +266,14 @@ def test_from_file_unknown_lexer(): os.remove(path) +def test_syntax_guess_lexer(): + assert Syntax.guess_lexer("banana.py") == "python" + assert Syntax.guess_lexer("banana.py", "import this") == "python" + assert Syntax.guess_lexer("banana.html", "hello") == "html" + assert Syntax.guess_lexer("banana.html", "<%= @foo %>") == "rhtml" + assert Syntax.guess_lexer("banana.html", "{{something|filter:3}}") == "html+django" + + if __name__ == "__main__": syntax = Panel.fit( Syntax(