From 739028eb930298ce714107d6dab930dc23bacb66 Mon Sep 17 00:00:00 2001 From: Forest0923 Date: Wed, 24 Aug 2022 13:54:52 +0900 Subject: [PATCH] Add pygments2chroma_xml.py This script automatically generates a lexer in xml format from Pygments. --- _tools/pygments2chroma_xml.py | 191 ++++++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 _tools/pygments2chroma_xml.py diff --git a/_tools/pygments2chroma_xml.py b/_tools/pygments2chroma_xml.py new file mode 100644 index 000000000..a824fee64 --- /dev/null +++ b/_tools/pygments2chroma_xml.py @@ -0,0 +1,191 @@ +import functools +import importlib +import json +import os +import re +import sys +import types +import html + +import pystache +from pygments import lexer as pygments_lexer +from pygments.token import _TokenType + +TEMPLATE = r''' + + + {{name}} + {{#aliases}} + {{alias}} + {{/aliases}} + {{#filenames}} + {{filename}} + {{/filenames}} + {{#mimetypes}} + {{mimetype}} + {{/mimetypes}} + {{#re_ignorecase}} + true + {{/re_ignorecase}} + {{#re_dotall}} + true + {{/re_dotall}} + {{#re_not_multiline}} + true + {{/re_not_multiline}} + + + {{#tokens}} + + {{#rules}} + {{{.}}} + {{/rules}} + + {{/tokens}} + + +''' + + +def xml_regex(s): + return xml_string(s) + +def xml_string(s): + s = html.escape(s) + return '"' + s + '"' + + +def to_camel_case(snake_str): + components = snake_str.split('_') + return ''.join(x.title() for x in components) + + +def warning(message): + print('warning: ' + message, file=sys.stderr) + + +def resolve_emitter(emitter): + if isinstance(emitter, types.FunctionType): + if repr(emitter).startswith('' % '" state="'.join(rule[2]) + else: + raise ValueError('unsupported modifier %r' % (rule[2],)) + out.append('{}{}'.format(regex, emitter, modifier)) + elif isinstance(rule, pygments_lexer.include): + out.append(''.format(rule)) + elif isinstance(rule, pygments_lexer.default): + process_state_action(rule.state) + out.append('{}'.format(''.join(process_state_action(rule.state)))) + else: + raise ValueError('unsupported rule %r' % (rule,)) + return out + + +class TemplateView(object): + def __init__(self, **kwargs): + for key, value in kwargs.items(): + setattr(self, key, value) + + def re_not_multiline(self): + return not (self.regex_flags & re.MULTILINE) + + def re_dotall(self): + return self.regex_flags & re.DOTALL + + def re_ignorecase(self): + return self.regex_flags & re.IGNORECASE + + +def main(): + package_name, symbol_name = sys.argv[1].rsplit(sep=".", maxsplit=1) + + package = importlib.import_module(package_name) + + lexer_cls = getattr(package, symbol_name) + + assert issubclass(lexer_cls, pygments_lexer.RegexLexer), 'can only translate from RegexLexer' + + print(pystache.render(TEMPLATE, TemplateView( + name=lexer_cls.name, + regex_flags=lexer_cls.flags, + aliases=[{'alias': alias} for alias in lexer_cls.aliases], + filenames=[{'filename': filename} for filename in lexer_cls.filenames], + mimetypes=[{'mimetype': mimetype} for mimetype in lexer_cls.mimetypes], + tokens=[{'state': state, 'rules': translate_rules(rules)} for (state, rules) in lexer_cls.get_tokendefs().items()], + ))) + + +if __name__ == '__main__': + main()