Skip to content

Commit

Permalink
Add lexer for ArangoDB Query Language (AQL) (#794)
Browse files Browse the repository at this point in the history
* Add ArangoDB Query Language (AQL) lexer
  • Loading branch information
Simran-B committed Jun 29, 2023
1 parent e422a62 commit e8acfc2
Show file tree
Hide file tree
Showing 17 changed files with 705 additions and 2 deletions.
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -37,7 +37,7 @@ translators for Pygments lexers and styles.

Prefix | Language
:----: | --------
A | ABAP, ABNF, ActionScript, ActionScript 3, Ada, Angular2, ANTLR, ApacheConf, APL, AppleScript, Arduino, Awk
A | ABAP, ABNF, ActionScript, ActionScript 3, Ada, Angular2, ANTLR, ApacheConf, APL, AppleScript, ArangoDB AQL, Arduino, Awk
B | Ballerina, Bash, Batchfile, BibTeX, Bicep, BlitzBasic, BNF, Brainfuck, BQN
C | C, C#, C++, Caddyfile, Caddyfile Directives, Cap'n Proto, Cassandra CQL, Ceylon, CFEngine3, cfstatement, ChaiScript, Chapel, Cheetah, Clojure, CMake, COBOL, CoffeeScript, Common Lisp, Coq, Crystal, CSS, Cython
D | D, Dart, Diff, Django/Jinja, Docker, DTD, Dylan
Expand Down
2 changes: 1 addition & 1 deletion lexers/README.md
Expand Up @@ -5,7 +5,7 @@ All lexers in Chroma should now be defined in XML unless they require custom cod
## Lexer tests

The tests in this directory feed a known input `testdata/<name>.actual` into the parser for `<name>` and check
that its output matches `<name>.exported`.
that its output matches `<name>.expected`.

It is also possible to perform several tests on a same parser `<name>`, by placing know inputs `*.actual` into a
directory `testdata/<name>/`.
Expand Down
175 changes: 175 additions & 0 deletions lexers/embedded/aql.xml
@@ -0,0 +1,175 @@
<lexer>
<config>
<name>ArangoDB AQL</name>
<alias>aql</alias>
<filename>*.aql</filename>
<mime_type>text/x-aql</mime_type>
<dot_all>true</dot_all>
<ensure_nl>true</ensure_nl>
<case_insensitive>true</case_insensitive>
</config>
<rules>
<state name="comments-and-whitespace">
<rule pattern="\s+">
<token type="Text"/>
</rule>
<rule pattern="//.*?\n">
<token type="CommentSingle"/>
</rule>
<rule pattern="/\*">
<token type="CommentMultiline"/>
<push state="multiline-comment"/>
</rule>
</state>
<state name="multiline-comment">
<rule pattern="[^*]+">
<token type="CommentMultiline"/>
</rule>
<rule pattern="\*/">
<token type="CommentMultiline"/>
<pop depth="1"/>
</rule>
<rule pattern="\*">
<token type="CommentMultiline"/>
</rule>
</state>
<state name="double-quote">
<rule pattern="\\.">
<token type="LiteralStringDouble"/>
</rule>
<rule pattern="[^&quot;\\]+">
<token type="LiteralStringDouble"/>
</rule>
<rule pattern="&quot;">
<token type="LiteralStringDouble"/>
<pop depth="1"/>
</rule>
</state>
<state name="single-quote">
<rule pattern="\\.">
<token type="LiteralStringSingle"/>
</rule>
<rule pattern="[^'\\]+">
<token type="LiteralStringSingle"/>
</rule>
<rule pattern="'">
<token type="LiteralStringSingle"/>
<pop depth="1"/>
</rule>
</state>
<state name="backtick">
<rule pattern="\\.">
<token type="Name"/>
</rule>
<rule pattern="[^`\\]+">
<token type="Name"/>
</rule>
<rule pattern="`">
<token type="Name"/>
<pop depth="1"/>
</rule>
</state>
<state name="forwardtick">
<rule pattern="\\.">
<token type="Name"/>
</rule>
<rule pattern="[^´\\]+">
<token type="Name"/>
</rule>
<rule pattern="´">
<token type="Name"/>
<pop depth="1"/>
</rule>
</state>
<state name="identifier">
<rule pattern="(?:\$?|_+)[a-z]+[_a-z0-9]*">
<token type="Name"/>
</rule>
<rule pattern="`">
<token type="Name"/>
<push state="backtick"/>
</rule>
<rule pattern="´">
<token type="Name"/>
<push state="forwardtick"/>
</rule>
</state>
<state name="root">
<rule>
<include state="comments-and-whitespace"/>
</rule>
<rule pattern="0b[01]+">
<token type="LiteralNumberBin"/>
</rule>
<rule pattern="0x[0-9a-f]+">
<token type="LiteralNumberHex"/>
</rule>
<rule pattern="(?:0|[1-9][0-9]*)(?![\.e])">
<token type="LiteralNumberInteger"/>
</rule>
<rule pattern="(?:(?:0|[1-9][0-9]*)(?:\.[0-9]+)?|\.[0-9]+)(?:e[\-\+]?[0-9]+)?">
<token type="LiteralNumberFloat"/>
</rule>
<rule pattern="@@(?:_+[a-z0-9]+[a-z0-9_]*|[a-z0-9][a-z0-9_]*)">
<token type="NameVariableGlobal"/>
</rule>
<rule pattern="@(?:_+[a-z0-9]+[a-z0-9_]*|[a-z0-9][a-z0-9_]*)">
<token type="NameVariable"/>
</rule>
<rule pattern="=~|!~|[=!&lt;&gt;]=?|[%?:/*+-]|\.\.|&amp;&amp;|\|\|">
<token type="Operator"/>
</rule>
<rule pattern="[.,(){}\[\]]">
<token type="Punctuation"/>
</rule>
<rule pattern="[a-zA-Z0-9][a-zA-Z0-9_]*(?:::[a-zA-Z0-9_]+)+(?=\s*\()">
<token type="NameFunction"/>
</rule>
<rule pattern="(WITH)(\s+)(COUNT)(\s+)(INTO)\b">
<bygroups>
<token type="KeywordReserved"/>
<token type="Text"/>
<token type="KeywordPseudo"/>
<token type="Text"/>
<token type="KeywordReserved"/>
</bygroups>
</rule>
<rule pattern="(?:KEEP|PRUNE|SEARCH|TO)\b">
<token type="KeywordPseudo"/>
</rule>
<rule pattern="OPTIONS(?=\s*\{)">
<token type="KeywordPseudo"/>
</rule>
<rule pattern="(?:AGGREGATE|ALL|ALL_SHORTEST_PATHS|AND|ANY|ASC|AT LEAST|COLLECT|DESC|DISTINCT|FILTER|FOR|GRAPH|IN|INBOUND|INSERT|INTO|K_PATHS|K_SHORTEST_PATHS|LIKE|LIMIT|NONE|NOT|OR|OUTBOUND|REMOVE|REPLACE|RETURN|SHORTEST_PATH|SORT|UPDATE|UPSERT|WITH|WINDOW)\b">
<token type="KeywordReserved"/>
</rule>
<rule pattern="LET\b">
<token type="KeywordDeclaration"/>
</rule>
<rule pattern="(?:true|false|null)\b">
<token type="KeywordConstant"/>
</rule>
<rule pattern="(?-i)(?:CURRENT|NEW|OLD)\b">
<token type="NameBuiltinPseudo"/>
</rule>
<rule pattern="(?:to_bool|to_number|to_string|to_array|to_list|is_null|is_bool|is_number|is_string|is_array|is_list|is_object|is_document|is_datestring|typename|json_stringify|json_parse|concat|concat_separator|char_length|lower|upper|substring|substring_bytes|left|right|trim|reverse|contains|log|log2|log10|exp|exp2|sin|cos|tan|asin|acos|atan|atan2|radians|degrees|pi|regex_test|regex_replace|like|floor|ceil|round|abs|rand|sqrt|pow|length|count|min|max|average|avg|sum|product|median|variance_population|variance_sample|variance|percentile|bit_and|bit_or|bit_xor|bit_negate|bit_test|bit_popcount|bit_shift_left|bit_shift_right|bit_construct|bit_deconstruct|bit_to_string|bit_from_string|first|last|unique|outersection|interleave|in_range|jaccard|matches|merge|merge_recursive|has|attributes|keys|values|unset|unset_recursive|keep|keep_recursive|near|within|within_rectangle|is_in_polygon|distance|fulltext|stddev_sample|stddev_population|stddev|slice|nth|position|contains_array|translate|zip|call|apply|push|append|pop|shift|unshift|remove_value|remove_values|remove_nth|replace_nth|date_now|date_timestamp|date_iso8601|date_dayofweek|date_year|date_month|date_day|date_hour|date_minute|date_second|date_millisecond|date_dayofyear|date_isoweek|date_isoweekyear|date_leapyear|date_quarter|date_days_in_month|date_trunc|date_round|date_add|date_subtract|date_diff|date_compare|date_format|date_utctolocal|date_localtoutc|date_timezone|date_timezones|fail|passthru|v8|sleep|schema_get|schema_validate|shard_id|version|noopt|noeval|not_null|first_list|first_document|parse_identifier|current_user|current_database|collection_count|pregel_result|collections|document|decode_rev|range|union|union_distinct|minus|intersection|flatten|is_same_collection|check_document|ltrim|rtrim|find_first|find_last|split|substitute|ipv4_to_number|ipv4_from_number|is_ipv4|md5|sha1|sha256|sha512|crc32|fnv64|hash|random_token|to_base64|to_hex|encode_uri_component|soundex|assert|warn|is_key|sorted|sorted_unique|count_distinct|count_unique|levenshtein_distance|levenshtein_match|regex_matches|regex_split|ngram_match|ngram_similarity|ngram_positional_similarity|uuid|tokens|exists|starts_with|phrase|min_match|bm25|tfidf|boost|analyzer|offset_info|value|cosine_similarity|decay_exp|decay_gauss|decay_linear|l1_distance|l2_distance|minhash|minhash_count|minhash_error|minhash_match|geo_point|geo_multipoint|geo_polygon|geo_multipolygon|geo_linestring|geo_multilinestring|geo_contains|geo_intersects|geo_equals|geo_distance|geo_area|geo_in_range)(?=\s*\()">
<token type="NameFunction"/>
</rule>
<rule pattern="&quot;">
<token type="LiteralStringDouble"/>
<push state="double-quote"/>
</rule>
<rule pattern="'">
<token type="LiteralStringSingle"/>
<push state="single-quote"/>
</rule>
<!-- not part of the language but useful for highlighting query explain outputs -->
<rule pattern="#\d+\b">
<token type="NameLabel"/>
</rule>
<rule>
<include state="identifier"/>
</rule>
</state>
</rules>
</lexer>
5 changes: 5 additions & 0 deletions lexers/testdata/aql/comments.actual
@@ -0,0 +1,5 @@
code//comment
code/*comment*/
/* comment RETURN 42 "'` *
multiline */code
white space
13 changes: 13 additions & 0 deletions lexers/testdata/aql/comments.expected
@@ -0,0 +1,13 @@
[
{"type":"Name","value":"code"},
{"type":"CommentSingle","value":"//comment\n"},
{"type":"Name","value":"code"},
{"type":"CommentMultiline","value":"/*comment*/"},
{"type":"Text","value":"\n"},
{"type":"CommentMultiline","value":"/* comment RETURN 42 \"'` *\nmultiline */"},
{"type":"Name","value":"code"},
{"type":"Text","value":"\n"},
{"type":"Name","value":"white"},
{"type":"Text","value":" \t"},
{"type":"Name","value":"space"}
]
14 changes: 14 additions & 0 deletions lexers/testdata/aql/functions.actual
@@ -0,0 +1,14 @@
RAND()
rand ()
Collections()
COUNT_DISTINCT()
COUNT()
not_null()
REMOVE_VALUE()
group::func()
GROUP_57::F9_()

0::0()
1SUM()
_G::A()
_aql::avg()
48 changes: 48 additions & 0 deletions lexers/testdata/aql/functions.expected
@@ -0,0 +1,48 @@
[
{"type":"NameFunction","value":"RAND"},
{"type":"Punctuation","value":"()"},
{"type":"Text","value":"\n"},
{"type":"NameFunction","value":"rand"},
{"type":"Text","value":" "},
{"type":"Punctuation","value":"()"},
{"type":"Text","value":"\n"},
{"type":"NameFunction","value":"Collections"},
{"type":"Punctuation","value":"()"},
{"type":"Text","value":"\n"},
{"type":"NameFunction","value":"COUNT_DISTINCT"},
{"type":"Punctuation","value":"()"},
{"type":"Text","value":"\n"},
{"type":"NameFunction","value":"COUNT"},
{"type":"Punctuation","value":"()"},
{"type":"Text","value":"\n"},
{"type":"NameFunction","value":"not_null"},
{"type":"Punctuation","value":"()"},
{"type":"Text","value":"\n"},
{"type":"NameFunction","value":"REMOVE_VALUE"},
{"type":"Punctuation","value":"()"},
{"type":"Text","value":"\n"},
{"type":"NameFunction","value":"group::func"},
{"type":"Punctuation","value":"()"},
{"type":"Text","value":"\n"},
{"type":"NameFunction","value":"GROUP_57::F9_"},
{"type":"Punctuation","value":"()"},
{"type":"Text","value":"\n\n"},
{"type":"LiteralNumberInteger","value":"0"},
{"type":"Operator","value":"::"},
{"type":"LiteralNumberInteger","value":"0"},
{"type":"Punctuation","value":"()"},
{"type":"Text","value":"\n"},
{"type":"LiteralNumberInteger","value":"1"},
{"type":"NameFunction","value":"SUM"},
{"type":"Punctuation","value":"()"},
{"type":"Text","value":"\n"},
{"type":"Name","value":"_G"},
{"type":"Operator","value":"::"},
{"type":"Name","value":"A"},
{"type":"Punctuation","value":"()"},
{"type":"Text","value":"\n"},
{"type":"Name","value":"_aql"},
{"type":"Operator","value":"::"},
{"type":"NameFunction","value":"avg"},
{"type":"Punctuation","value":"()"}
]
19 changes: 19 additions & 0 deletions lexers/testdata/aql/identifiers.actual
@@ -0,0 +1,19 @@
i
doc
Vertex
n036
$X
__foo__
`FILTER`
`@12 3!`
´&tab FOR äöü´
`multi\`
\\\`
line`
´multi\´
\\\´
line´

$6
$_Y
_8
32 changes: 32 additions & 0 deletions lexers/testdata/aql/identifiers.expected
@@ -0,0 +1,32 @@
[
{"type":"Name","value":"i"},
{"type":"Text","value":"\n"},
{"type":"Name","value":"doc"},
{"type":"Text","value":"\n"},
{"type":"Name","value":"Vertex"},
{"type":"Text","value":"\n"},
{"type":"Name","value":"n036"},
{"type":"Text","value":"\n"},
{"type":"Name","value":"$X"},
{"type":"Text","value":"\n"},
{"type":"Name","value":"__foo__"},
{"type":"Text","value":"\n"},
{"type":"Name","value":"`FILTER`"},
{"type":"Text","value":"\n"},
{"type":"Name","value":"`@12 3!`"},
{"type":"Text","value":"\n"},
{"type":"Name","value":"´\u0026tab\tFOR äöü´"},
{"type":"Text","value":"\n"},
{"type":"Name","value":"`multi\\`\n\\\\\\`\nline`"},
{"type":"Text","value":"\n"},
{"type":"Name","value":"´multi\\´\n\\\\\\´\nline´"},
{"type":"Text","value":"\n\n"},
{"type":"Error","value":"$"},
{"type":"LiteralNumberInteger","value":"6"},
{"type":"Text","value":"\n"},
{"type":"Error","value":"$"},
{"type":"Name","value":"_Y"},
{"type":"Text","value":"\n"},
{"type":"Error","value":"_"},
{"type":"LiteralNumberInteger","value":"8"}
]
31 changes: 31 additions & 0 deletions lexers/testdata/aql/keywords.actual
@@ -0,0 +1,31 @@
LET
AT LEAST
NULL
nULL
true
false
TRUE
FALSE
True
fAlSe
WITH COUNT INTO
KEEP
prune
SEARCH
to
OPTIONS {}
options{}
CURRENT
NEW
OLD

LEAST
AT
CAT LEAST
KNULL
falsey
COUNT
OPTIONS
current
New
old

0 comments on commit e8acfc2

Please sign in to comment.