Skip to content

Commit 7bcc5da

Browse files
authoredDec 18, 2021
OCaml: Improved tokenization (#3269)
1 parent ffd8343 commit 7bcc5da

8 files changed

+124
-52
lines changed
 

‎components/prism-ocaml.js

+30-13
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,58 @@
1+
// https://ocaml.org/manual/lex.html
2+
13
Prism.languages.ocaml = {
2-
'comment': /\(\*[\s\S]*?\*\)/,
4+
'comment': {
5+
pattern: /\(\*[\s\S]*?\*\)/,
6+
greedy: true
7+
},
8+
'char': {
9+
pattern: /'(?:[^\\\r\n']|\\(?:.|[ox]?[0-9a-f]{1,3}))'/i,
10+
greedy: true
11+
},
312
'string': [
413
{
5-
pattern: /"(?:\\.|[^\\\r\n"])*"/,
14+
pattern: /"(?:\\(?:[\s\S]|\r\n)|[^\\\r\n"])*"/,
615
greedy: true
716
},
817
{
9-
pattern: /(['`])(?:\\(?:\d+|x[\da-f]+|.)|(?!\1)[^\\\r\n])\1/i,
18+
pattern: /\{([a-z_]*)\|[\s\S]*?\|\1\}/,
1019
greedy: true
1120
}
1221
],
13-
'number': /\b(?:0x[\da-f][\da-f_]+|(?:0[bo])?\d[\d_]*(?:\.[\d_]*)?(?:e[+-]?[\d_]+)?)/i,
22+
'number': [
23+
// binary and octal
24+
/\b(?:0b[01][01_]*|0o[0-7][0-7_]*)\b/i,
25+
// hexadecimal
26+
/\b0x[a-f0-9][a-f0-9_]*(?:\.[a-f0-9_]*)?(?:p[+-]?\d[\d_]*)?(?!\w)/i,
27+
// decimal
28+
/\b\d[\d_]*(?:\.[\d_]*)?(?:e[+-]?\d[\d_]*)?(?!\w)/i,
29+
],
1430
'directive': {
1531
pattern: /\B#\w+/,
16-
alias: 'important'
32+
alias: 'property'
1733
},
1834
'label': {
1935
pattern: /\B~\w+/,
20-
alias: 'function'
36+
alias: 'property'
2137
},
2238
'type-variable': {
2339
pattern: /\B'\w+/,
2440
alias: 'function'
2541
},
2642
'variant': {
2743
pattern: /`\w+/,
28-
alias: 'variable'
29-
},
30-
'module': {
31-
pattern: /\b[A-Z]\w+/,
32-
alias: 'variable'
44+
alias: 'symbol'
3345
},
3446
// For the list of keywords and operators,
3547
// see: http://caml.inria.fr/pub/docs/manual-ocaml/lex.html#sec84
3648
'keyword': /\b(?:as|assert|begin|class|constraint|do|done|downto|else|end|exception|external|for|fun|function|functor|if|in|include|inherit|initializer|lazy|let|match|method|module|mutable|new|nonrec|object|of|open|private|rec|sig|struct|then|to|try|type|val|value|virtual|when|where|while|with)\b/,
3749
'boolean': /\b(?:false|true)\b/,
50+
51+
'operator-like-punctuation': {
52+
pattern: /\[[<>|]|[>|]\]|\{<|>\}/,
53+
alias: 'punctuation'
54+
},
3855
// Custom operators are allowed
39-
'operator': /:=|[=<>@^|&+\-*\/$%!?~][!$%&*+\-.\/:<=>?@^|~]*|\b(?:and|asr|land|lor|lsl|lsr|lxor|mod|or)\b/,
40-
'punctuation': /[(){}\[\].,:;]|\b_\b/
56+
'operator': /\.[.~]|:[=>]|[=<>@^|&+\-*\/$%!?~][!$%&*+\-.\/:<=>?@^|~]*|\b(?:and|asr|land|lor|lsl|lsr|lxor|mod|or)\b/,
57+
'punctuation': /;;|::|[(){}\[\].,:;#]|\b_\b/
4158
};

‎components/prism-ocaml.min.js

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
'a'
2+
'\n'
3+
'\''
4+
'\xA9'
5+
'\169'
6+
7+
----------------------------------------------------
8+
9+
[
10+
["char", "'a'"],
11+
["char", "'\\n'"],
12+
["char", "'\\''"],
13+
["char", "'\\xA9'"],
14+
["char", "'\\169'"]
15+
]

‎tests/languages/ocaml/module_feature.test

-15
This file was deleted.

‎tests/languages/ocaml/number_feature.test

+10-2
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,13 @@
55
0b1010_1111
66
42_000
77
3.14_15_9
8+
3.141_592_653_589_793_12
9+
1e-5
810
3.2e8
911
6.1E-7
12+
2.22044604925031308e-16
1013
0.4e+12_415
14+
0x1p-52
1115

1216
----------------------------------------------------
1317

@@ -19,11 +23,15 @@
1923
["number", "0b1010_1111"],
2024
["number", "42_000"],
2125
["number", "3.14_15_9"],
26+
["number", "3.141_592_653_589_793_12"],
27+
["number", "1e-5"],
2228
["number", "3.2e8"],
2329
["number", "6.1E-7"],
24-
["number", "0.4e+12_415"]
30+
["number", "2.22044604925031308e-16"],
31+
["number", "0.4e+12_415"],
32+
["number", "0x1p-52"]
2533
]
2634

2735
----------------------------------------------------
2836

29-
Checks for numbers.
37+
Checks for numbers.

‎tests/languages/ocaml/operator_feature.test

+28-7
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@ and asr land
22
lor lsl lsr
33
lxor mod or
44

5-
:=
5+
:= :>
66
= < > @
7-
^ | & ~
7+
^ | & ~ .~
88
+ - * /
99
$ % ! ?
10+
..
1011

1112
~=~
1213

@@ -18,14 +19,34 @@ $ % ! ?
1819
["operator", "lxor"], ["operator", "mod"], ["operator", "or"],
1920

2021
["operator", ":="],
21-
["operator", "="], ["operator", "<"], ["operator", ">"], ["operator", "@"],
22-
["operator", "^"], ["operator", "|"], ["operator", "&"], ["operator", "~"],
23-
["operator", "+"], ["operator", "-"], ["operator", "*"], ["operator", "/"],
24-
["operator", "$"], ["operator", "%"], ["operator", "!"], ["operator", "?"],
22+
["operator", ":>"],
23+
24+
["operator", "="],
25+
["operator", "<"],
26+
["operator", ">"],
27+
["operator", "@"],
28+
29+
["operator", "^"],
30+
["operator", "|"],
31+
["operator", "&"],
32+
["operator", "~"],
33+
["operator", ".~"],
34+
35+
["operator", "+"],
36+
["operator", "-"],
37+
["operator", "*"],
38+
["operator", "/"],
39+
40+
["operator", "$"],
41+
["operator", "%"],
42+
["operator", "!"],
43+
["operator", "?"],
44+
45+
["operator", ".."],
2546

2647
["operator", "~=~"]
2748
]
2849

2950
----------------------------------------------------
3051

31-
Checks for operators.
52+
Checks for operators.

‎tests/languages/ocaml/punctuation_feature.test

+21-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
( ) { } [ ]
22
. , : ;
33
_
4+
:: ;;
5+
6+
[< [> [| {<
7+
>] >} |]
8+
9+
#
410

511
----------------------------------------------------
612

@@ -17,5 +23,19 @@ _
1723
["punctuation", ":"],
1824
["punctuation", ";"],
1925

20-
["punctuation", "_"]
26+
["punctuation", "_"],
27+
28+
["punctuation", "::"],
29+
["punctuation", ";;"],
30+
31+
["operator-like-punctuation", "[<"],
32+
["operator-like-punctuation", "[>"],
33+
["operator-like-punctuation", "[|"],
34+
["operator-like-punctuation", "{<"],
35+
36+
["operator-like-punctuation", ">]"],
37+
["operator-like-punctuation", ">}"],
38+
["operator-like-punctuation", "|]"],
39+
40+
["punctuation", "#"]
2141
]
+19-13
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,31 @@
11
""
22
"Fo\"obar"
3-
'\''
4-
'\123'
5-
'\xf4'
6-
`\``
7-
`\123`
8-
`\xf4`
3+
"Call me Ishmael. Some years ago — never mind how long \
4+
precisely — having little or no money in my purse, and \
5+
nothing particular to interest me on shore, I thought I\
6+
\ would sail about a little and see the watery part of t\
7+
he world."
8+
9+
{|This is a quoted string, here, neither \ nor " are special characters|}
10+
{|"Hello, World!"|}
11+
{|"\\"|}
12+
{delimiter|the end of this|}quoted string is here|delimiter}
13+
{ext|hello {|world|}|ext}
914

1015
----------------------------------------------------
1116

1217
[
1318
["string", "\"\""],
1419
["string", "\"Fo\\\"obar\""],
15-
["string", "'\\''"],
16-
["string", "'\\123'"],
17-
["string", "'\\xf4'"],
18-
["string", "`\\``"],
19-
["string", "`\\123`"],
20-
["string", "`\\xf4`"]
20+
["string", "\"Call me Ishmael. Some years ago — never mind how long \\\r\nprecisely — having little or no money in my purse, and \\\r\nnothing particular to interest me on shore, I thought I\\\r\n\\ would sail about a little and see the watery part of t\\\r\nhe world.\""],
21+
22+
["string", "{|This is a quoted string, here, neither \\ nor \" are special characters|}"],
23+
["string", "{|\"Hello, World!\"|}"],
24+
["string", "{|\"\\\\\"|}"],
25+
["string", "{delimiter|the end of this|}quoted string is here|delimiter}"],
26+
["string", "{ext|hello {|world|}|ext}"]
2127
]
2228

2329
----------------------------------------------------
2430

25-
Checks for strings.
31+
Checks for strings.

0 commit comments

Comments
 (0)
Please sign in to comment.