Textualize · willmcgugan · Jan 25, 2022 · Jan 25, 2022 · Jan 25, 2022 · Jan 25, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 
 - Workaround for edge case of object from Faiss with no `__class__` https://github.com/Textualize/rich/issues/1838
+- Add `Syntax.guess_lexer`, add support for more lexers (e.g. Django templates etc.) https://github.com/Textualize/rich/pull/1869
 
 
 ### Added

diff --git a/rich/syntax.py b/rich/syntax.py
@@ -1,9 +1,9 @@
 import os.path
 import platform
-from rich.containers import Lines
 import textwrap
 from abc import ABC, abstractmethod
-from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type, Union
+from os import PathLike
+from typing import Any, AnyStr, Dict, Iterable, List, Optional, Set, Tuple, Type, Union
 
 from pygments.lexer import Lexer
 from pygments.lexers import get_lexer_by_name, guess_lexer_for_filename
@@ -23,6 +23,8 @@
 )
 from pygments.util import ClassNotFound
 
+from rich.containers import Lines
+
 from ._loop import loop_first
 from .color import Color, blend_rgb
 from .console import Console, ConsoleOptions, JustifyMethod, RenderResult
@@ -200,7 +202,8 @@ class Syntax(JupyterMixin):
         dedent (bool, optional): Enable stripping of initial whitespace. Defaults to False.
         line_numbers (bool, optional): Enable rendering of line numbers. Defaults to False.
         start_line (int, optional): Starting number for line numbers. Defaults to 1.
-        line_range (Tuple[int, int], optional): If given should be a tuple of the start and end line to render.
+        line_range (Tuple[int | None, int | None], optional): If given should be a tuple of the start and end line to render.
+            A value of None in the tuple indicates the range is open in that direction.
         highlight_lines (Set[int]): A set of line numbers to highlight.
         code_width: Width of code to render (not including line numbers), or ``None`` to use all available width.
         tab_size (int, optional): Size of tabs. Defaults to 4.
@@ -233,7 +236,7 @@ def __init__(
         dedent: bool = False,
         line_numbers: bool = False,
         start_line: int = 1,
-        line_range: Optional[Tuple[int, int]] = None,
+        line_range: Optional[Tuple[Optional[int], Optional[int]]] = None,
         highlight_lines: Optional[Set[int]] = None,
         code_width: Optional[int] = None,
         tab_size: int = 4,
@@ -299,22 +302,7 @@ def from_path(
         with open(path, "rt", encoding=encoding) as code_file:
             code = code_file.read()
 
-        lexer = None
-        lexer_name = "default"
-        try:
-            _, ext = os.path.splitext(path)
-            if ext:
-                extension = ext.lstrip(".").lower()
-                lexer = get_lexer_by_name(extension)
-                lexer_name = lexer.name
-        except ClassNotFound:
-            pass
-
-        if lexer is None:
-            try:
-                lexer_name = guess_lexer_for_filename(path, code).name
-            except ClassNotFound:
-                pass
+        lexer_name = cls.guess_lexer(path, code=code)
 
         return cls(
             code,
@@ -332,6 +320,48 @@ def from_path(
             indent_guides=indent_guides,
         )
 
+    @classmethod
+    def guess_lexer(cls, path: str, code: Optional[str] = None) -> str:
+        """Guess the alias of the Pygments lexer to use based on a path and an optional string of code.
+        If code is supplied, it will use a combination of the code and the filename to determine the
+        best lexer to use. For example, if the file is ``index.html`` and the file contains Django
+        templating syntax, then "html+django" will be returned. If the file is ``index.html``, and no
+        templating language is used, the "html" lexer will be used. If no string of code
+        is supplied, the lexer will be chosen based on the file extension..
+
+        Args:
+             path (AnyStr): The path to the file containing the code you wish to know the lexer for.
+             code (str, optional): Optional string of code that will be used as a fallback if no lexer
+                is found for the supplied path.
+
+        Returns:
+            str: The name of the Pygments lexer that best matches the supplied path/code.
+        """
+        lexer: Optional[Lexer] = None
+        lexer_name = "default"
+        if code:
+            try:
+                lexer = guess_lexer_for_filename(path, code)
+            except ClassNotFound:
+                pass
+
+        if not lexer:
+            try:
+                _, ext = os.path.splitext(path)
+                if ext:
+                    extension = ext.lstrip(".").lower()
+                    lexer = get_lexer_by_name(extension)
+            except ClassNotFound:
+                pass
+
+        if lexer:
+            if lexer.aliases:
+                lexer_name = lexer.aliases[0]
+            else:
+                lexer_name = lexer.name
+
+        return lexer_name
+
     def _get_base_style(self) -> Style:
         """Get the base style."""
         default_style = self._theme.get_background_style() + self.background_style
@@ -369,7 +399,9 @@ def lexer(self) -> Optional[Lexer]:
             return None
 
     def highlight(
-        self, code: str, line_range: Optional[Tuple[int, int]] = None
+        self,
+        code: str,
+        line_range: Optional[Tuple[Optional[int], Optional[int]]] = None,
     ) -> Text:
         """Highlight code and return a Text instance.
 
@@ -417,7 +449,7 @@ def tokens_to_spans() -> Iterable[Tuple[str, Optional[Style]]]:
                     """Convert tokens to spans."""
                     tokens = iter(line_tokenize())
                     line_no = 0
-                    _line_start = line_start - 1
+                    _line_start = line_start - 1 if line_start else 0
 
                     # Skip over tokens until line start
                     while line_no < _line_start:
@@ -430,7 +462,7 @@ def tokens_to_spans() -> Iterable[Tuple[str, Optional[Style]]]:
                         yield (token, _get_theme_style(token_type))
                         if token.endswith("\n"):
                             line_no += 1
-                            if line_no >= line_end:
+                            if line_end and line_no >= line_end:
                                 break
 
                 text.append_tokens(tokens_to_spans())
@@ -513,11 +545,6 @@ def __rich_console__(
             else self.code_width
         )
 
-        line_offset = 0
-        if self.line_range:
-            start_line, end_line = self.line_range
-            line_offset = max(0, start_line - 1)
-
         ends_on_nl = self.code.endswith("\n")
         code = self.code if ends_on_nl else self.code + "\n"
         code = textwrap.dedent(code) if self.dedent else code
@@ -559,6 +586,10 @@ def __rich_console__(
                     yield from syntax_line
             return
 
+        start_line, end_line = self.line_range or (None, None)
+        line_offset = 0
+        if start_line:
+            line_offset = max(0, start_line - 1)
         lines: Union[List[Text], Lines] = text.split("\n", allow_blank=ends_on_nl)
         if self.line_range:
             lines = lines[line_offset:end_line]

diff --git a/tests/test_syntax.py b/tests/test_syntax.py
@@ -1,17 +1,17 @@
 # coding=utf-8
 
+import os
 import sys
-import os, tempfile
+import tempfile
 
 import pytest
-from .render import render
+from pygments.lexers import PythonLexer
 
 from rich.panel import Panel
 from rich.style import Style
-from rich.syntax import Syntax, ANSISyntaxTheme, PygmentsSyntaxTheme, Color, Console
-
-from pygments.lexers import PythonLexer
+from rich.syntax import ANSISyntaxTheme, Color, Console, PygmentsSyntaxTheme, Syntax
 
+from .render import render
 
 CODE = '''\
 def loop_first_last(values: Iterable[T]) -> Iterable[Tuple[bool, bool, T]]:
@@ -266,6 +266,14 @@ def test_from_file_unknown_lexer():
         os.remove(path)
 
 
+def test_syntax_guess_lexer():
+    assert Syntax.guess_lexer("banana.py") == "python"
+    assert Syntax.guess_lexer("banana.py", "import this") == "python"
+    assert Syntax.guess_lexer("banana.html", "<a href='#'>hello</a>") == "html"
+    assert Syntax.guess_lexer("banana.html", "<%= @foo %>") == "rhtml"
+    assert Syntax.guess_lexer("banana.html", "{{something|filter:3}}") == "html+django"
+
+
 if __name__ == "__main__":
     syntax = Panel.fit(
         Syntax(