-
Notifications
You must be signed in to change notification settings - Fork 631
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add World of Warcraft TOC file lexer (#2244)
Also fix a broken link and decode as UTF8 in count_token_references.py.
- Loading branch information
Showing
12 changed files
with
771 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
""" | ||
pygments.lexers.wowtoc | ||
~~~~~~~~~~~~~~~~~~~~~~ | ||
Lexer for World of Warcraft TOC files, which describe game addon metadata. | ||
:copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. | ||
:license: BSD, see LICENSE for details. | ||
""" | ||
import re | ||
|
||
from pygments.lexer import RegexLexer, bygroups | ||
from pygments.token import Comment, Name, Text, Punctuation, String, Keyword | ||
|
||
__all__ = ["WoWTocLexer"] | ||
|
||
def _create_tag_line_pattern(inner_pattern, ignore_case=False): | ||
return ((r"(?i)" if ignore_case else r"") | ||
+ r"^(##)( *)" # groups 1, 2 | ||
+ inner_pattern # group 3 | ||
+ r"( *)(:)( *)(.*?)( *)$") # groups 4, 5, 6, 7, 8 | ||
|
||
|
||
def _create_tag_line_token(inner_pattern, inner_token, ignore_case=False): | ||
# this function template-izes the tag line for a specific type of tag, which will | ||
# have a different pattern and different token. otherwise, everything about a tag | ||
# line is the same | ||
return ( | ||
_create_tag_line_pattern(inner_pattern, ignore_case=ignore_case), | ||
bygroups( | ||
Keyword.Declaration, | ||
Text.Whitespace, | ||
inner_token, | ||
Text.Whitespace, | ||
Punctuation, | ||
Text.Whitespace, | ||
String, | ||
Text.Whitespace, | ||
), | ||
) | ||
|
||
|
||
class WoWTocLexer(RegexLexer): | ||
""" | ||
Lexer for World of Warcraft TOC files. | ||
.. versionadded:: 2.13 | ||
""" | ||
|
||
name = "World of Warcraft TOC" | ||
aliases = ["wowtoc"] | ||
filenames = ["*.toc"] | ||
|
||
tokens = { | ||
"root": [ | ||
# official localized tags, Notes and Title | ||
# (normal part is insensitive, locale part is sensitive) | ||
_create_tag_line_token( | ||
r"((?:[nN][oO][tT][eE][sS]|[tT][iI][tT][lL][eE])-(?:ptBR|zhCN|enCN|frFR|deDE|itIT|esMX|ptPT|koKR|ruRU|esES|zhTW|enTW|enGB|enUS))", | ||
Name.Builtin, | ||
), | ||
# other official tags | ||
_create_tag_line_token( | ||
r"(Interface|Title|Notes|RequiredDeps|Dep[^: ]*|OptionalDeps|LoadOnDemand|LoadWith|LoadManagers|SavedVariablesPerCharacter|SavedVariables|DefaultState|Secure|Author|Version)", | ||
Name.Builtin, | ||
ignore_case=True, | ||
), | ||
# user-defined tags | ||
_create_tag_line_token( | ||
r"(X-[^: ]*)", | ||
Name.Variable, | ||
ignore_case=True, | ||
), | ||
# non-conforming tags, but still valid | ||
_create_tag_line_token( | ||
r"([^: ]*)", | ||
Name.Other, | ||
), | ||
|
||
# Comments | ||
(r"^#.*$", Comment), | ||
|
||
# Addon Files | ||
(r"^.+$", Name), | ||
] | ||
} | ||
|
||
def analyse_text(text): | ||
# at time of writing, this file suffix conflict's with one of Tex's in | ||
# markup.py. Tex's anaylse_text() appears to be definitive (binary) and does not | ||
# share any likeness to WoW TOCs, which means we wont have to compete with it by | ||
# abitrary increments in score. | ||
|
||
result = 0 | ||
|
||
# while not required, an almost certain marker of WoW TOC's is the interface tag | ||
# if this tag is omitted, players will need to opt-in to loading the addon with | ||
# an options change ("Load out of date addons"). the value is also standardized: | ||
# `<major><minor><patch>`, with minor and patch being two-digit zero-padded. | ||
interface_pattern = _create_tag_line_pattern(r"(Interface)", ignore_case=True) | ||
match = re.search(interface_pattern, text) | ||
if match and re.match(r"(\d+)(\d{2})(\d{2})", match.group(7)): | ||
result += 0.8 | ||
|
||
casefolded = text.casefold() | ||
# Lua file listing is good marker too, but probably conflicts with many other | ||
# lexers | ||
if ".lua" in casefolded: | ||
result += 0.1 | ||
# ditto for XML files, but they're less used in WoW TOCs | ||
if ".xml" in casefolded: | ||
result += 0.05 | ||
|
||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# | ||
#a | ||
# a comment | ||
# a comment with a # in it | ||
## no comma, thus a comment | ||
## has space: and is thus, a comment |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
a | ||
Foo.lua | ||
Spaces allowed.lua |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
## TiTlE-ptBR: value | ||
## TiTlE-zhCN: value | ||
## TiTlE-enCN: value | ||
## TiTlE-frFR: value | ||
## TiTlE-deDE: value | ||
## TiTlE-itIT: value | ||
## TiTlE-esMX: value | ||
## TiTlE-ptPT: value | ||
## TiTlE-koKR: value | ||
## TiTlE-ruRU: value | ||
## TiTlE-esES: value | ||
## TiTlE-zhTW: value | ||
## TiTlE-enTW: value | ||
## TiTlE-enGB: value | ||
## TiTlE-enUS: value | ||
## NoTeS-ptBR: value | ||
## NoTeS-zhCN: value | ||
## NoTeS-enCN: value | ||
## NoTeS-frFR: value | ||
## NoTeS-deDE: value | ||
## NoTeS-itIT: value | ||
## NoTeS-esMX: value | ||
## NoTeS-ptPT: value | ||
## NoTeS-koKR: value | ||
## NoTeS-ruRU: value | ||
## NoTeS-esES: value | ||
## NoTeS-zhTW: value | ||
## NoTeS-enTW: value | ||
## NoTeS-enGB: value | ||
## NoTeS-enUS: value | ||
## Interface: value | ||
## interface: value | ||
## Title: value | ||
## title: value | ||
## Notes: value | ||
## notes: value | ||
## RequiredDeps: value | ||
## requireddeps: value | ||
## Dependencies: value | ||
## dependencies: value | ||
## OptionalDeps: value | ||
## optionaldeps: value | ||
## LoadOnDemand: value | ||
## loadondemand: value | ||
## LoadWith: value | ||
## loadwith: value | ||
## LoadManagers: value | ||
## loadmanagers: value | ||
## SavedVariablesPerCharacter: value | ||
## savedvariablespercharacter: value | ||
## SavedVariables: value | ||
## savedvariables: value | ||
## DefaultState: value | ||
## defaultstate: value | ||
## Secure: value | ||
## secure: value | ||
## Author: value | ||
## author: value | ||
## Version: value | ||
## version: value | ||
## Dep: value | ||
## dep: value | ||
## DepSomething: value |
Oops, something went wrong.