Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Syntax.guess_lexer, add support for more lexers (e.g. Django templates etc.) #1869

Merged
merged 6 commits into from Jan 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

- Workaround for edge case of object from Faiss with no `__class__` https://github.com/Textualize/rich/issues/1838
- Add `Syntax.guess_lexer`, add support for more lexers (e.g. Django templates etc.) https://github.com/Textualize/rich/pull/1869


### Added
Expand Down
87 changes: 59 additions & 28 deletions rich/syntax.py
@@ -1,9 +1,9 @@
import os.path
import platform
from rich.containers import Lines
import textwrap
from abc import ABC, abstractmethod
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type, Union
from os import PathLike
from typing import Any, AnyStr, Dict, Iterable, List, Optional, Set, Tuple, Type, Union

from pygments.lexer import Lexer
from pygments.lexers import get_lexer_by_name, guess_lexer_for_filename
Expand All @@ -23,6 +23,8 @@
)
from pygments.util import ClassNotFound

from rich.containers import Lines

from ._loop import loop_first
from .color import Color, blend_rgb
from .console import Console, ConsoleOptions, JustifyMethod, RenderResult
Expand Down Expand Up @@ -200,7 +202,8 @@ class Syntax(JupyterMixin):
dedent (bool, optional): Enable stripping of initial whitespace. Defaults to False.
line_numbers (bool, optional): Enable rendering of line numbers. Defaults to False.
start_line (int, optional): Starting number for line numbers. Defaults to 1.
line_range (Tuple[int, int], optional): If given should be a tuple of the start and end line to render.
line_range (Tuple[int | None, int | None], optional): If given should be a tuple of the start and end line to render.
A value of None in the tuple indicates the range is open in that direction.
highlight_lines (Set[int]): A set of line numbers to highlight.
code_width: Width of code to render (not including line numbers), or ``None`` to use all available width.
tab_size (int, optional): Size of tabs. Defaults to 4.
Expand Down Expand Up @@ -233,7 +236,7 @@ def __init__(
dedent: bool = False,
line_numbers: bool = False,
start_line: int = 1,
line_range: Optional[Tuple[int, int]] = None,
line_range: Optional[Tuple[Optional[int], Optional[int]]] = None,
highlight_lines: Optional[Set[int]] = None,
code_width: Optional[int] = None,
tab_size: int = 4,
Expand Down Expand Up @@ -299,22 +302,7 @@ def from_path(
with open(path, "rt", encoding=encoding) as code_file:
code = code_file.read()

lexer = None
lexer_name = "default"
try:
_, ext = os.path.splitext(path)
if ext:
extension = ext.lstrip(".").lower()
lexer = get_lexer_by_name(extension)
lexer_name = lexer.name
except ClassNotFound:
pass

if lexer is None:
try:
lexer_name = guess_lexer_for_filename(path, code).name
except ClassNotFound:
pass
lexer_name = cls.guess_lexer(path, code=code)

return cls(
code,
Expand All @@ -332,6 +320,48 @@ def from_path(
indent_guides=indent_guides,
)

@classmethod
def guess_lexer(cls, path: str, code: Optional[str] = None) -> str:
"""Guess the alias of the Pygments lexer to use based on a path and an optional string of code.
If code is supplied, it will use a combination of the code and the filename to determine the
best lexer to use. For example, if the file is ``index.html`` and the file contains Django
templating syntax, then "html+django" will be returned. If the file is ``index.html``, and no
templating language is used, the "html" lexer will be used. If no string of code
is supplied, the lexer will be chosen based on the file extension..

Args:
path (AnyStr): The path to the file containing the code you wish to know the lexer for.
code (str, optional): Optional string of code that will be used as a fallback if no lexer
is found for the supplied path.

Returns:
str: The name of the Pygments lexer that best matches the supplied path/code.
"""
lexer: Optional[Lexer] = None
lexer_name = "default"
if code:
try:
lexer = guess_lexer_for_filename(path, code)
except ClassNotFound:
pass

if not lexer:
try:
_, ext = os.path.splitext(path)
if ext:
extension = ext.lstrip(".").lower()
lexer = get_lexer_by_name(extension)
except ClassNotFound:
pass

if lexer:
if lexer.aliases:
lexer_name = lexer.aliases[0]
else:
lexer_name = lexer.name

return lexer_name

def _get_base_style(self) -> Style:
"""Get the base style."""
default_style = self._theme.get_background_style() + self.background_style
Expand Down Expand Up @@ -369,7 +399,9 @@ def lexer(self) -> Optional[Lexer]:
return None

def highlight(
self, code: str, line_range: Optional[Tuple[int, int]] = None
self,
code: str,
line_range: Optional[Tuple[Optional[int], Optional[int]]] = None,
) -> Text:
"""Highlight code and return a Text instance.

Expand Down Expand Up @@ -417,7 +449,7 @@ def tokens_to_spans() -> Iterable[Tuple[str, Optional[Style]]]:
"""Convert tokens to spans."""
tokens = iter(line_tokenize())
line_no = 0
_line_start = line_start - 1
_line_start = line_start - 1 if line_start else 0

# Skip over tokens until line start
while line_no < _line_start:
Expand All @@ -430,7 +462,7 @@ def tokens_to_spans() -> Iterable[Tuple[str, Optional[Style]]]:
yield (token, _get_theme_style(token_type))
if token.endswith("\n"):
line_no += 1
if line_no >= line_end:
if line_end and line_no >= line_end:
break

text.append_tokens(tokens_to_spans())
Expand Down Expand Up @@ -513,11 +545,6 @@ def __rich_console__(
else self.code_width
)

line_offset = 0
if self.line_range:
start_line, end_line = self.line_range
line_offset = max(0, start_line - 1)

ends_on_nl = self.code.endswith("\n")
code = self.code if ends_on_nl else self.code + "\n"
code = textwrap.dedent(code) if self.dedent else code
Expand Down Expand Up @@ -559,6 +586,10 @@ def __rich_console__(
yield from syntax_line
return

start_line, end_line = self.line_range or (None, None)
line_offset = 0
if start_line:
line_offset = max(0, start_line - 1)
lines: Union[List[Text], Lines] = text.split("\n", allow_blank=ends_on_nl)
if self.line_range:
lines = lines[line_offset:end_line]
Expand Down
18 changes: 13 additions & 5 deletions tests/test_syntax.py
@@ -1,17 +1,17 @@
# coding=utf-8

import os
import sys
import os, tempfile
import tempfile

import pytest
from .render import render
from pygments.lexers import PythonLexer

from rich.panel import Panel
from rich.style import Style
from rich.syntax import Syntax, ANSISyntaxTheme, PygmentsSyntaxTheme, Color, Console

from pygments.lexers import PythonLexer
from rich.syntax import ANSISyntaxTheme, Color, Console, PygmentsSyntaxTheme, Syntax

from .render import render

CODE = '''\
def loop_first_last(values: Iterable[T]) -> Iterable[Tuple[bool, bool, T]]:
Expand Down Expand Up @@ -266,6 +266,14 @@ def test_from_file_unknown_lexer():
os.remove(path)


def test_syntax_guess_lexer():
assert Syntax.guess_lexer("banana.py") == "python"
assert Syntax.guess_lexer("banana.py", "import this") == "python"
assert Syntax.guess_lexer("banana.html", "<a href='#'>hello</a>") == "html"
assert Syntax.guess_lexer("banana.html", "<%= @foo %>") == "rhtml"
assert Syntax.guess_lexer("banana.html", "{{something|filter:3}}") == "html+django"


if __name__ == "__main__":
syntax = Panel.fit(
Syntax(
Expand Down