Skip to content

Commit

Permalink
Add lexer for BQN (#691)
Browse files Browse the repository at this point in the history
Based on the Vim syntax highlighting by Marshall Lochbaum[[0]], with number
parsing from PrismJS highlighter by Christopher Rodriguez[[1]].

The lexed types are selected to be as descriptive as possible while
retaining similar groupings to the Vim highlighter, so the highlighted
code looks familiar to BQN users.

[0]: https://github.com/mlochbaum/BQN/blob/a436a71a083f410fd58d86e8d228470c68c24b91/editors/vim/syntax/bqn.vim
[1]: PrismJS/prism@859f99a
  • Loading branch information
dancek committed Nov 8, 2022
1 parent 71d4be6 commit 5644f41
Show file tree
Hide file tree
Showing 4 changed files with 373 additions and 1 deletion.
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -37,7 +37,7 @@ translators for Pygments lexers and styles.
Prefix | Language
:----: | --------
A | ABAP, ABNF, ActionScript, ActionScript 3, Ada, Angular2, ANTLR, ApacheConf, APL, AppleScript, Arduino, Awk
B | Ballerina, Base Makefile, Bash, Batchfile, BibTeX, Bicep, BlitzBasic, BNF, Brainfuck
B | Ballerina, Base Makefile, Bash, Batchfile, BibTeX, Bicep, BlitzBasic, BNF, Brainfuck, BQN
C | C, C#, C++, Caddyfile, Caddyfile Directives, Cap'n Proto, Cassandra CQL, Ceylon, CFEngine3, cfstatement, ChaiScript, Chapel, Cheetah, Clojure, CMake, COBOL, CoffeeScript, Common Lisp, Coq, Crystal, CSS, Cython
D | D, Dart, Diff, Django/Jinja, Docker, DTD, Dylan
E | EBNF, Elixir, Elm, EmacsLisp, Erlang
Expand Down
83 changes: 83 additions & 0 deletions lexers/embedded/bqn.xml
@@ -0,0 +1,83 @@
<lexer>
<config>
<name>BQN</name>
<alias>bqn</alias>
<filename>*.bqn</filename>
</config>
<rules>
<state name="root">
<rule pattern="\s+">
<token type="TextWhitespace"/>
</rule>
<rule pattern="\A#!.+$">
<token type="CommentPreproc"/>
</rule>
<rule pattern="#.*$">
<token type="CommentSingle"/>
</rule>
<rule pattern="&quot;(?:[^&quot;]|&quot;&quot;)*&quot;">
<token type="LiteralString"/>
</rule>
<rule pattern="[{}]">
<token type="KeywordPseudo"/>
</rule>
<rule pattern="[⟨⟩\[\]‿]">
<token type="KeywordPseudo"/>
</rule>
<rule pattern="[()]">
<token type="Punctuation"/>
</rule>
<rule pattern="[:;?]">
<token type="Punctuation"/>
</rule>
<rule pattern="[⋄,]">
<token type="KeywordPseudo"/>
</rule>
<rule pattern="[←⇐↩→]">
<token type="Text"/>
</rule>
<rule pattern="'.'">
<token type="LiteralStringChar"/>
</rule>
<rule pattern="[˙˜˘¨⌜⁼´˝`]">
<token type="Operator"/>
</rule>
<rule pattern="[∘○⊸⟜⌾⊘◶⎉⚇⍟⎊]">
<token type="OperatorWord"/>
</rule>
<rule pattern="[𝔽𝔾𝕎𝕏𝕊+\-×÷⋆√⌊⌈|¬∧∨&lt;&gt;≠=≤≥≡≢⊣⊢⥊∾≍⋈↑↓↕«»⌽⍉/⍋⍒⊏⊑⊐⊒∊⍷⊔!⍕⍎]">
<token type="NameFunction"/>
</rule>
<rule pattern="[𝕗𝕘𝕨𝕩𝕤]">
<token type="Name"/>
</rule>
<rule pattern="·">
<token type="NameConstant"/>
</rule>
<rule pattern="@">
<token type="LiteralStringChar"/>
</rule>
<rule pattern="\d+(?:\.\d+)?[eE]¯?\d+">
<token type="LiteralNumber"/>
</rule>
<rule pattern="[¯∞π]?(?:\d*\.?\b\d+(?:e[+¯]?\d+|E[+¯]?\d+)?|¯|∞|π)(?:j¯?(?:(?:\d+(?:\.\d+)?|\.\d+)(?:e[+¯]?\d+|E[+¯]?\d+)?|¯|∞|π))?">
<token type="LiteralNumber"/>
</rule>
<rule pattern="(•?[a-z][A-Z_a-z0-9π∞¯]*|𝕣)">
<token type="Name"/>
</rule>
<rule pattern="•?[A-Z][A-Z_a-z0-9π∞¯]*">
<token type="NameFunction"/>
</rule>
<rule pattern="(•?_[A-Za-z][A-Z_a-z0-9π∞¯]*|_𝕣)">
<token type="Operator"/>
</rule>
<rule pattern="(•?_[A-Za-z][A-Z_a-z0-9π∞¯]*_|_𝕣_)">
<token type="OperatorWord"/>
</rule>
<rule pattern="\.">
<token type="Text"/>
</rule>
</state>
</rules>
</lexer>
31 changes: 31 additions & 0 deletions lexers/testdata/bqn.actual
@@ -0,0 +1,31 @@
#! /usr/bin/env bqn

# From BQN documentation / quick start:
# https://mlochbaum.github.io/BQN/doc/quick.html

# Case conversion utilities
case ← {
diff ← -´ "Aa"
Lower ⇐ -⟜diff
Upper ⇐ Lower⁼
}

hw ← <˘ 2‿∘ ⥊ "helloworld"
hw case.Upper⌾(⊑¨)↩
•Out hw ↩ ∾ ⥊⍉ [hw, ", "‿"!"] # Hello, World!

# Split at spaces and repeated characters
Split ← {
!1==𝕩 ⋄ (!2=•Type)¨𝕩
Proc ← {
· 𝕊 ' ': spl⇐1 ; # Space: break and delete it
prev Fn cur: ⟨spl,str⟩⇐
spl←0 ⋄ str←⟨cur⟩ # Include and don't break...
{ prev=cur ? spl+↩1 ; @ } # except at equal characters
}
GV‿GS ← {𝕏¨}¨ ⟨ {⟨s⇐str⟩:s;""}
{𝕩.spl} ⟩
r ← Proc{»𝔽¨⊢} 𝕩
(∾¨ GV ⊔˜ ·+`GS) r
}
•Show Split hw # ⟨ "Hel" "lo," "World!" ⟩
258 changes: 258 additions & 0 deletions lexers/testdata/bqn.expected
@@ -0,0 +1,258 @@
[
{"type":"CommentPreproc","value":"#! /usr/bin/env bqn"},
{"type":"TextWhitespace","value":"\n\n"},
{"type":"CommentSingle","value":"# From BQN documentation / quick start:"},
{"type":"TextWhitespace","value":"\n"},
{"type":"CommentSingle","value":"# https://mlochbaum.github.io/BQN/doc/quick.html"},
{"type":"TextWhitespace","value":"\n\n"},
{"type":"CommentSingle","value":"# Case conversion utilities"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Name","value":"case"},
{"type":"TextWhitespace","value":" "},
{"type":"Text","value":"←"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordPseudo","value":"{"},
{"type":"TextWhitespace","value":"\n "},
{"type":"Name","value":"diff"},
{"type":"TextWhitespace","value":" "},
{"type":"Text","value":"←"},
{"type":"TextWhitespace","value":" "},
{"type":"NameFunction","value":"-"},
{"type":"Operator","value":"´"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralString","value":"\"Aa\""},
{"type":"TextWhitespace","value":"\n "},
{"type":"NameFunction","value":"Lower"},
{"type":"TextWhitespace","value":" "},
{"type":"Text","value":"⇐"},
{"type":"TextWhitespace","value":" "},
{"type":"NameFunction","value":"-"},
{"type":"OperatorWord","value":"⟜"},
{"type":"Name","value":"diff"},
{"type":"TextWhitespace","value":"\n "},
{"type":"NameFunction","value":"Upper"},
{"type":"TextWhitespace","value":" "},
{"type":"Text","value":"⇐"},
{"type":"TextWhitespace","value":" "},
{"type":"NameFunction","value":"Lower"},
{"type":"Operator","value":"⁼"},
{"type":"TextWhitespace","value":"\n"},
{"type":"KeywordPseudo","value":"}"},
{"type":"TextWhitespace","value":"\n\n"},
{"type":"Name","value":"hw"},
{"type":"TextWhitespace","value":" "},
{"type":"Text","value":"←"},
{"type":"TextWhitespace","value":" "},
{"type":"NameFunction","value":"\u003c"},
{"type":"Operator","value":"˘"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralNumber","value":"2"},
{"type":"KeywordPseudo","value":"‿"},
{"type":"OperatorWord","value":"∘"},
{"type":"TextWhitespace","value":" "},
{"type":"NameFunction","value":"⥊"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralString","value":"\"helloworld\""},
{"type":"TextWhitespace","value":"\n"},
{"type":"Name","value":"hw"},
{"type":"TextWhitespace","value":" "},
{"type":"Name","value":"case"},
{"type":"Text","value":"."},
{"type":"NameFunction","value":"Upper"},
{"type":"OperatorWord","value":"⌾"},
{"type":"Punctuation","value":"("},
{"type":"NameFunction","value":"⊑"},
{"type":"Operator","value":"¨"},
{"type":"Punctuation","value":")"},
{"type":"Text","value":"↩"},
{"type":"TextWhitespace","value":"\n"},
{"type":"NameFunction","value":"•Out"},
{"type":"TextWhitespace","value":" "},
{"type":"Name","value":"hw"},
{"type":"TextWhitespace","value":" "},
{"type":"Text","value":"↩"},
{"type":"TextWhitespace","value":" "},
{"type":"NameFunction","value":"∾"},
{"type":"TextWhitespace","value":" "},
{"type":"NameFunction","value":"⥊⍉"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordPseudo","value":"["},
{"type":"Name","value":"hw"},
{"type":"KeywordPseudo","value":","},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralString","value":"\", \""},
{"type":"KeywordPseudo","value":"‿"},
{"type":"LiteralString","value":"\"!\""},
{"type":"KeywordPseudo","value":"]"},
{"type":"TextWhitespace","value":" "},
{"type":"CommentSingle","value":"# Hello, World!"},
{"type":"TextWhitespace","value":"\n\n"},
{"type":"CommentSingle","value":"# Split at spaces and repeated characters"},
{"type":"TextWhitespace","value":"\n"},
{"type":"NameFunction","value":"Split"},
{"type":"TextWhitespace","value":" "},
{"type":"Text","value":"←"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordPseudo","value":"{"},
{"type":"TextWhitespace","value":"\n "},
{"type":"NameFunction","value":"!"},
{"type":"LiteralNumber","value":"1"},
{"type":"NameFunction","value":"=="},
{"type":"Name","value":"𝕩"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordPseudo","value":"⋄"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"("},
{"type":"NameFunction","value":"!"},
{"type":"LiteralNumber","value":"2"},
{"type":"NameFunction","value":"=•Type"},
{"type":"Punctuation","value":")"},
{"type":"Operator","value":"¨"},
{"type":"Name","value":"𝕩"},
{"type":"TextWhitespace","value":"\n "},
{"type":"NameFunction","value":"Proc"},
{"type":"TextWhitespace","value":" "},
{"type":"Text","value":"←"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordPseudo","value":"{"},
{"type":"TextWhitespace","value":"\n "},
{"type":"NameConstant","value":"·"},
{"type":"TextWhitespace","value":" "},
{"type":"NameFunction","value":"𝕊"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralStringChar","value":"' '"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"Name","value":"spl"},
{"type":"Text","value":"⇐"},
{"type":"LiteralNumber","value":"1"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":";"},
{"type":"TextWhitespace","value":" "},
{"type":"CommentSingle","value":"# Space: break and delete it"},
{"type":"TextWhitespace","value":"\n "},
{"type":"Name","value":"prev"},
{"type":"TextWhitespace","value":" "},
{"type":"NameFunction","value":"Fn"},
{"type":"TextWhitespace","value":" "},
{"type":"Name","value":"cur"},
{"type":"Punctuation","value":":"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordPseudo","value":"⟨"},
{"type":"Name","value":"spl"},
{"type":"KeywordPseudo","value":","},
{"type":"Name","value":"str"},
{"type":"KeywordPseudo","value":"⟩"},
{"type":"Text","value":"⇐"},
{"type":"TextWhitespace","value":"\n "},
{"type":"Name","value":"spl"},
{"type":"Text","value":"←"},
{"type":"LiteralNumber","value":"0"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordPseudo","value":"⋄"},
{"type":"TextWhitespace","value":" "},
{"type":"Name","value":"str"},
{"type":"Text","value":"←"},
{"type":"KeywordPseudo","value":"⟨"},
{"type":"Name","value":"cur"},
{"type":"KeywordPseudo","value":"⟩"},
{"type":"TextWhitespace","value":" "},
{"type":"CommentSingle","value":"# Include and don't break..."},
{"type":"TextWhitespace","value":"\n "},
{"type":"KeywordPseudo","value":"{"},
{"type":"TextWhitespace","value":" "},
{"type":"Name","value":"prev"},
{"type":"NameFunction","value":"="},
{"type":"Name","value":"cur"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"?"},
{"type":"TextWhitespace","value":" "},
{"type":"Name","value":"spl"},
{"type":"NameFunction","value":"+"},
{"type":"Text","value":"↩"},
{"type":"LiteralNumber","value":"1"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":";"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralStringChar","value":"@"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordPseudo","value":"}"},
{"type":"TextWhitespace","value":" "},
{"type":"CommentSingle","value":"# except at equal characters"},
{"type":"TextWhitespace","value":"\n "},
{"type":"KeywordPseudo","value":"}"},
{"type":"TextWhitespace","value":"\n "},
{"type":"NameFunction","value":"GV"},
{"type":"KeywordPseudo","value":"‿"},
{"type":"NameFunction","value":"GS"},
{"type":"TextWhitespace","value":" "},
{"type":"Text","value":"←"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordPseudo","value":"{"},
{"type":"NameFunction","value":"𝕏"},
{"type":"Operator","value":"¨"},
{"type":"KeywordPseudo","value":"}"},
{"type":"Operator","value":"¨"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordPseudo","value":"⟨"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordPseudo","value":"{⟨"},
{"type":"Name","value":"s"},
{"type":"Text","value":"⇐"},
{"type":"Name","value":"str"},
{"type":"KeywordPseudo","value":"⟩"},
{"type":"Punctuation","value":":"},
{"type":"Name","value":"s"},
{"type":"Punctuation","value":";"},
{"type":"LiteralString","value":"\"\""},
{"type":"KeywordPseudo","value":"}"},
{"type":"TextWhitespace","value":"\n "},
{"type":"KeywordPseudo","value":"{"},
{"type":"Name","value":"𝕩"},
{"type":"Text","value":"."},
{"type":"Name","value":"spl"},
{"type":"KeywordPseudo","value":"}"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordPseudo","value":"⟩"},
{"type":"TextWhitespace","value":"\n "},
{"type":"Name","value":"r"},
{"type":"TextWhitespace","value":" "},
{"type":"Text","value":"←"},
{"type":"TextWhitespace","value":" "},
{"type":"NameFunction","value":"Proc"},
{"type":"KeywordPseudo","value":"{"},
{"type":"NameFunction","value":"»𝔽"},
{"type":"Operator","value":"¨"},
{"type":"NameFunction","value":"⊢"},
{"type":"KeywordPseudo","value":"}"},
{"type":"TextWhitespace","value":" "},
{"type":"Name","value":"𝕩"},
{"type":"TextWhitespace","value":"\n "},
{"type":"Punctuation","value":"("},
{"type":"NameFunction","value":"∾"},
{"type":"Operator","value":"¨"},
{"type":"TextWhitespace","value":" "},
{"type":"NameFunction","value":"GV"},
{"type":"TextWhitespace","value":" "},
{"type":"NameFunction","value":"⊔"},
{"type":"Operator","value":"˜"},
{"type":"TextWhitespace","value":" "},
{"type":"NameConstant","value":"·"},
{"type":"NameFunction","value":"+"},
{"type":"Operator","value":"`"},
{"type":"NameFunction","value":"GS"},
{"type":"Punctuation","value":")"},
{"type":"TextWhitespace","value":" "},
{"type":"Name","value":"r"},
{"type":"TextWhitespace","value":"\n"},
{"type":"KeywordPseudo","value":"}"},
{"type":"TextWhitespace","value":"\n"},
{"type":"NameFunction","value":"•Show"},
{"type":"TextWhitespace","value":" "},
{"type":"NameFunction","value":"Split"},
{"type":"TextWhitespace","value":" "},
{"type":"Name","value":"hw"},
{"type":"TextWhitespace","value":" "},
{"type":"CommentSingle","value":"# ⟨ \"Hel\" \"lo,\" \"World!\" ⟩"},
{"type":"TextWhitespace","value":"\n"}
]

0 comments on commit 5644f41

Please sign in to comment.