Skip to content

Commit

Permalink
OCaml: Improved tokenization (#3269)
Browse files Browse the repository at this point in the history
  • Loading branch information
RunDevelopment committed Dec 18, 2021
1 parent ffd8343 commit 7bcc5da
Show file tree
Hide file tree
Showing 8 changed files with 124 additions and 52 deletions.
43 changes: 30 additions & 13 deletions components/prism-ocaml.js
@@ -1,41 +1,58 @@
// https://ocaml.org/manual/lex.html

Prism.languages.ocaml = {
'comment': /\(\*[\s\S]*?\*\)/,
'comment': {
pattern: /\(\*[\s\S]*?\*\)/,
greedy: true
},
'char': {
pattern: /'(?:[^\\\r\n']|\\(?:.|[ox]?[0-9a-f]{1,3}))'/i,
greedy: true
},
'string': [
{
pattern: /"(?:\\.|[^\\\r\n"])*"/,
pattern: /"(?:\\(?:[\s\S]|\r\n)|[^\\\r\n"])*"/,
greedy: true
},
{
pattern: /(['`])(?:\\(?:\d+|x[\da-f]+|.)|(?!\1)[^\\\r\n])\1/i,
pattern: /\{([a-z_]*)\|[\s\S]*?\|\1\}/,
greedy: true
}
],
'number': /\b(?:0x[\da-f][\da-f_]+|(?:0[bo])?\d[\d_]*(?:\.[\d_]*)?(?:e[+-]?[\d_]+)?)/i,
'number': [
// binary and octal
/\b(?:0b[01][01_]*|0o[0-7][0-7_]*)\b/i,
// hexadecimal
/\b0x[a-f0-9][a-f0-9_]*(?:\.[a-f0-9_]*)?(?:p[+-]?\d[\d_]*)?(?!\w)/i,
// decimal
/\b\d[\d_]*(?:\.[\d_]*)?(?:e[+-]?\d[\d_]*)?(?!\w)/i,
],
'directive': {
pattern: /\B#\w+/,
alias: 'important'
alias: 'property'
},
'label': {
pattern: /\B~\w+/,
alias: 'function'
alias: 'property'
},
'type-variable': {
pattern: /\B'\w+/,
alias: 'function'
},
'variant': {
pattern: /`\w+/,
alias: 'variable'
},
'module': {
pattern: /\b[A-Z]\w+/,
alias: 'variable'
alias: 'symbol'
},
// For the list of keywords and operators,
// see: http://caml.inria.fr/pub/docs/manual-ocaml/lex.html#sec84
'keyword': /\b(?:as|assert|begin|class|constraint|do|done|downto|else|end|exception|external|for|fun|function|functor|if|in|include|inherit|initializer|lazy|let|match|method|module|mutable|new|nonrec|object|of|open|private|rec|sig|struct|then|to|try|type|val|value|virtual|when|where|while|with)\b/,
'boolean': /\b(?:false|true)\b/,

'operator-like-punctuation': {
pattern: /\[[<>|]|[>|]\]|\{<|>\}/,
alias: 'punctuation'
},
// Custom operators are allowed
'operator': /:=|[=<>@^|&+\-*\/$%!?~][!$%&*+\-.\/:<=>?@^|~]*|\b(?:and|asr|land|lor|lsl|lsr|lxor|mod|or)\b/,
'punctuation': /[(){}\[\].,:;]|\b_\b/
'operator': /\.[.~]|:[=>]|[=<>@^|&+\-*\/$%!?~][!$%&*+\-.\/:<=>?@^|~]*|\b(?:and|asr|land|lor|lsl|lsr|lxor|mod|or)\b/,
'punctuation': /;;|::|[(){}\[\].,:;#]|\b_\b/
};
2 changes: 1 addition & 1 deletion components/prism-ocaml.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions tests/languages/ocaml/char_feature.test
@@ -0,0 +1,15 @@
'a'
'\n'
'\''
'\xA9'
'\169'

----------------------------------------------------

[
["char", "'a'"],
["char", "'\\n'"],
["char", "'\\''"],
["char", "'\\xA9'"],
["char", "'\\169'"]
]
15 changes: 0 additions & 15 deletions tests/languages/ocaml/module_feature.test

This file was deleted.

12 changes: 10 additions & 2 deletions tests/languages/ocaml/number_feature.test
Expand Up @@ -5,9 +5,13 @@
0b1010_1111
42_000
3.14_15_9
3.141_592_653_589_793_12
1e-5
3.2e8
6.1E-7
2.22044604925031308e-16
0.4e+12_415
0x1p-52

----------------------------------------------------

Expand All @@ -19,11 +23,15 @@
["number", "0b1010_1111"],
["number", "42_000"],
["number", "3.14_15_9"],
["number", "3.141_592_653_589_793_12"],
["number", "1e-5"],
["number", "3.2e8"],
["number", "6.1E-7"],
["number", "0.4e+12_415"]
["number", "2.22044604925031308e-16"],
["number", "0.4e+12_415"],
["number", "0x1p-52"]
]

----------------------------------------------------

Checks for numbers.
Checks for numbers.
35 changes: 28 additions & 7 deletions tests/languages/ocaml/operator_feature.test
Expand Up @@ -2,11 +2,12 @@ and asr land
lor lsl lsr
lxor mod or

:=
:= :>
= < > @
^ | & ~
^ | & ~ .~
+ - * /
$ % ! ?
..

~=~

Expand All @@ -18,14 +19,34 @@ $ % ! ?
["operator", "lxor"], ["operator", "mod"], ["operator", "or"],

["operator", ":="],
["operator", "="], ["operator", "<"], ["operator", ">"], ["operator", "@"],
["operator", "^"], ["operator", "|"], ["operator", "&"], ["operator", "~"],
["operator", "+"], ["operator", "-"], ["operator", "*"], ["operator", "/"],
["operator", "$"], ["operator", "%"], ["operator", "!"], ["operator", "?"],
["operator", ":>"],

["operator", "="],
["operator", "<"],
["operator", ">"],
["operator", "@"],

["operator", "^"],
["operator", "|"],
["operator", "&"],
["operator", "~"],
["operator", ".~"],

["operator", "+"],
["operator", "-"],
["operator", "*"],
["operator", "/"],

["operator", "$"],
["operator", "%"],
["operator", "!"],
["operator", "?"],

["operator", ".."],

["operator", "~=~"]
]

----------------------------------------------------

Checks for operators.
Checks for operators.
22 changes: 21 additions & 1 deletion tests/languages/ocaml/punctuation_feature.test
@@ -1,6 +1,12 @@
( ) { } [ ]
. , : ;
_
:: ;;

[< [> [| {<
>] >} |]

#

----------------------------------------------------

Expand All @@ -17,5 +23,19 @@ _
["punctuation", ":"],
["punctuation", ";"],

["punctuation", "_"]
["punctuation", "_"],

["punctuation", "::"],
["punctuation", ";;"],

["operator-like-punctuation", "[<"],
["operator-like-punctuation", "[>"],
["operator-like-punctuation", "[|"],
["operator-like-punctuation", "{<"],

["operator-like-punctuation", ">]"],
["operator-like-punctuation", ">}"],
["operator-like-punctuation", "|]"],

["punctuation", "#"]
]
32 changes: 19 additions & 13 deletions tests/languages/ocaml/string_feature.test
@@ -1,25 +1,31 @@
""
"Fo\"obar"
'\''
'\123'
'\xf4'
`\``
`\123`
`\xf4`
"Call me Ishmael. Some years ago — never mind how long \
precisely — having little or no money in my purse, and \
nothing particular to interest me on shore, I thought I\
\ would sail about a little and see the watery part of t\
he world."

{|This is a quoted string, here, neither \ nor " are special characters|}
{|"Hello, World!"|}
{|"\\"|}
{delimiter|the end of this|}quoted string is here|delimiter}
{ext|hello {|world|}|ext}

----------------------------------------------------

[
["string", "\"\""],
["string", "\"Fo\\\"obar\""],
["string", "'\\''"],
["string", "'\\123'"],
["string", "'\\xf4'"],
["string", "`\\``"],
["string", "`\\123`"],
["string", "`\\xf4`"]
["string", "\"Call me Ishmael. Some years ago — never mind how long \\\r\nprecisely — having little or no money in my purse, and \\\r\nnothing particular to interest me on shore, I thought I\\\r\n\\ would sail about a little and see the watery part of t\\\r\nhe world.\""],

["string", "{|This is a quoted string, here, neither \\ nor \" are special characters|}"],
["string", "{|\"Hello, World!\"|}"],
["string", "{|\"\\\\\"|}"],
["string", "{delimiter|the end of this|}quoted string is here|delimiter}"],
["string", "{ext|hello {|world|}|ext}"]
]

----------------------------------------------------

Checks for strings.
Checks for strings.

0 comments on commit 7bcc5da

Please sign in to comment.