Skip to content

Commit f22ea9f

Browse files
authoredDec 10, 2021
Perl: Improved tokenization (#3241)
1 parent a3905c0 commit f22ea9f

File tree

5 files changed

+181
-206
lines changed

5 files changed

+181
-206
lines changed
 

‎components/prism-perl.js

+153-188
Original file line numberDiff line numberDiff line change
@@ -1,191 +1,156 @@
1-
Prism.languages.perl = {
2-
'comment': [
3-
{
4-
// POD
5-
pattern: /(^\s*)=\w[\s\S]*?=cut.*/m,
1+
(function (Prism) {
2+
3+
var brackets = /(?:\((?:[^()\\]|\\[\s\S])*\)|\{(?:[^{}\\]|\\[\s\S])*\}|\[(?:[^[\]\\]|\\[\s\S])*\]|<(?:[^<>\\]|\\[\s\S])*>)/.source;
4+
5+
Prism.languages.perl = {
6+
'comment': [
7+
{
8+
// POD
9+
pattern: /(^\s*)=\w[\s\S]*?=cut.*/m,
10+
lookbehind: true,
11+
greedy: true
12+
},
13+
{
14+
pattern: /(^|[^\\$])#.*/,
15+
lookbehind: true,
16+
greedy: true
17+
}
18+
],
19+
// TODO Could be nice to handle Heredoc too.
20+
'string': [
21+
{
22+
pattern: RegExp(
23+
/\b(?:q|qq|qw|qx)(?![a-zA-Z0-9])\s*/.source +
24+
'(?:' +
25+
[
26+
// q/.../
27+
/([^a-zA-Z0-9\s{(\[<])(?:(?!\1)[^\\]|\\[\s\S])*\1/.source,
28+
29+
// q a...a
30+
// eslint-disable-next-line regexp/strict
31+
/([a-zA-Z0-9])(?:(?!\2)[^\\]|\\[\s\S])*\2/.source,
32+
33+
// q(...)
34+
// q{...}
35+
// q[...]
36+
// q<...>
37+
brackets,
38+
].join('|') +
39+
')'
40+
),
41+
greedy: true
42+
},
43+
44+
// "...", `...`
45+
{
46+
pattern: /("|`)(?:(?!\1)[^\\]|\\[\s\S])*\1/,
47+
greedy: true
48+
},
49+
50+
// '...'
51+
// FIXME Multi-line single-quoted strings are not supported as they would break variables containing '
52+
{
53+
pattern: /'(?:[^'\\\r\n]|\\.)*'/,
54+
greedy: true
55+
}
56+
],
57+
'regex': [
58+
{
59+
pattern: RegExp(
60+
/\b(?:m|qr)(?![a-zA-Z0-9])\s*/.source +
61+
'(?:' +
62+
[
63+
// m/.../
64+
/([^a-zA-Z0-9\s{(\[<])(?:(?!\1)[^\\]|\\[\s\S])*\1/.source,
65+
66+
// m a...a
67+
// eslint-disable-next-line regexp/strict
68+
/([a-zA-Z0-9])(?:(?!\2)[^\\]|\\[\s\S])*\2/.source,
69+
70+
// m(...)
71+
// m{...}
72+
// m[...]
73+
// m<...>
74+
brackets,
75+
].join('|') +
76+
')' +
77+
/[msixpodualngc]*/.source
78+
),
79+
greedy: true
80+
},
81+
82+
// The lookbehinds prevent -s from breaking
83+
{
84+
pattern: RegExp(
85+
/(^|[^-])\b(?:s|tr|y)(?![a-zA-Z0-9])\s*/.source +
86+
'(?:' +
87+
[
88+
// s/.../.../
89+
// eslint-disable-next-line regexp/strict
90+
/([^a-zA-Z0-9\s{(\[<])(?:(?!\2)[^\\]|\\[\s\S])*\2(?:(?!\2)[^\\]|\\[\s\S])*\2/.source,
91+
92+
// s a...a...a
93+
// eslint-disable-next-line regexp/strict
94+
/([a-zA-Z0-9])(?:(?!\3)[^\\]|\\[\s\S])*\3(?:(?!\3)[^\\]|\\[\s\S])*\3/.source,
95+
96+
// s(...)(...)
97+
// s{...}{...}
98+
// s[...][...]
99+
// s<...><...>
100+
// s(...)[...]
101+
brackets + /\s*/.source + brackets,
102+
].join('|') +
103+
')' +
104+
/[msixpodualngcer]*/.source
105+
),
106+
lookbehind: true,
107+
greedy: true
108+
},
109+
110+
// /.../
111+
// The look-ahead tries to prevent two divisions on
112+
// the same line from being highlighted as regex.
113+
// This does not support multi-line regex.
114+
{
115+
pattern: /\/(?:[^\/\\\r\n]|\\.)*\/[msixpodualngc]*(?=\s*(?:$|[\r\n,.;})&|\-+*~<>!?^]|(?:and|cmp|eq|ge|gt|le|lt|ne|not|or|x|xor)\b))/,
116+
greedy: true
117+
}
118+
],
119+
120+
// FIXME Not sure about the handling of ::, ', and #
121+
'variable': [
122+
// ${^POSTMATCH}
123+
/[&*$@%]\{\^[A-Z]+\}/,
124+
// $^V
125+
/[&*$@%]\^[A-Z_]/,
126+
// ${...}
127+
/[&*$@%]#?(?=\{)/,
128+
// $foo
129+
/[&*$@%]#?(?:(?:::)*'?(?!\d)[\w$]+(?![\w$]))+(?:::)*/,
130+
// $1
131+
/[&*$@%]\d+/,
132+
// $_, @_, %!
133+
// The negative lookahead prevents from breaking the %= operator
134+
/(?!%=)[$@%][!"#$%&'()*+,\-.\/:;<=>?@[\\\]^_`{|}~]/
135+
],
136+
'filehandle': {
137+
// <>, <FOO>, _
138+
pattern: /<(?![<=])\S*?>|\b_\b/,
139+
alias: 'symbol'
140+
},
141+
'v-string': {
142+
// v1.2, 1.2.3
143+
pattern: /v\d+(?:\.\d+)*|\d+(?:\.\d+){2,}/,
144+
alias: 'string'
145+
},
146+
'function': {
147+
pattern: /(\bsub[ \t]+)\w+/,
6148
lookbehind: true
7149
},
8-
{
9-
pattern: /(^|[^\\$])#.*/,
10-
lookbehind: true
11-
}
12-
],
13-
// TODO Could be nice to handle Heredoc too.
14-
'string': [
15-
// q/.../
16-
{
17-
pattern: /\b(?:q|qq|qw|qx)\s*([^a-zA-Z0-9\s{(\[<])(?:(?!\1)[^\\]|\\[\s\S])*\1/,
18-
greedy: true
19-
},
20-
21-
// q a...a
22-
{
23-
pattern: /\b(?:q|qq|qw|qx)\s+([a-zA-Z0-9])(?:(?!\1)[^\\]|\\[\s\S])*\1/,
24-
greedy: true
25-
},
26-
27-
// q(...)
28-
{
29-
pattern: /\b(?:q|qq|qw|qx)\s*\((?:[^()\\]|\\[\s\S])*\)/,
30-
greedy: true
31-
},
32-
33-
// q{...}
34-
{
35-
pattern: /\b(?:q|qq|qw|qx)\s*\{(?:[^{}\\]|\\[\s\S])*\}/,
36-
greedy: true
37-
},
38-
39-
// q[...]
40-
{
41-
pattern: /\b(?:q|qq|qw|qx)\s*\[(?:[^[\]\\]|\\[\s\S])*\]/,
42-
greedy: true
43-
},
44-
45-
// q<...>
46-
{
47-
pattern: /\b(?:q|qq|qw|qx)\s*<(?:[^<>\\]|\\[\s\S])*>/,
48-
greedy: true
49-
},
50-
51-
// "...", `...`
52-
{
53-
pattern: /("|`)(?:(?!\1)[^\\]|\\[\s\S])*\1/,
54-
greedy: true
55-
},
56-
57-
// '...'
58-
// FIXME Multi-line single-quoted strings are not supported as they would break variables containing '
59-
{
60-
pattern: /'(?:[^'\\\r\n]|\\.)*'/,
61-
greedy: true
62-
}
63-
],
64-
'regex': [
65-
// m/.../
66-
{
67-
pattern: /\b(?:m|qr)\s*([^a-zA-Z0-9\s{(\[<])(?:(?!\1)[^\\]|\\[\s\S])*\1[msixpodualngc]*/,
68-
greedy: true
69-
},
70-
71-
// m a...a
72-
{
73-
pattern: /\b(?:m|qr)\s+([a-zA-Z0-9])(?:(?!\1)[^\\]|\\[\s\S])*\1[msixpodualngc]*/,
74-
greedy: true
75-
},
76-
77-
// m(...)
78-
{
79-
pattern: /\b(?:m|qr)\s*\((?:[^()\\]|\\[\s\S])*\)[msixpodualngc]*/,
80-
greedy: true
81-
},
82-
83-
// m{...}
84-
{
85-
pattern: /\b(?:m|qr)\s*\{(?:[^{}\\]|\\[\s\S])*\}[msixpodualngc]*/,
86-
greedy: true
87-
},
88-
89-
// m[...]
90-
{
91-
pattern: /\b(?:m|qr)\s*\[(?:[^[\]\\]|\\[\s\S])*\][msixpodualngc]*/,
92-
greedy: true
93-
},
94-
95-
// m<...>
96-
{
97-
pattern: /\b(?:m|qr)\s*<(?:[^<>\\]|\\[\s\S])*>[msixpodualngc]*/,
98-
greedy: true
99-
},
100-
101-
// The lookbehinds prevent -s from breaking
102-
// FIXME We don't handle change of separator like s(...)[...]
103-
// s/.../.../
104-
{
105-
pattern: /(^|[^-]\b)(?:s|tr|y)\s*([^a-zA-Z0-9\s{(\[<])(?:(?!\2)[^\\]|\\[\s\S])*\2(?:(?!\2)[^\\]|\\[\s\S])*\2[msixpodualngcer]*/,
106-
lookbehind: true,
107-
greedy: true
108-
},
109-
110-
// s a...a...a
111-
{
112-
pattern: /(^|[^-]\b)(?:s|tr|y)\s+([a-zA-Z0-9])(?:(?!\2)[^\\]|\\[\s\S])*\2(?:(?!\2)[^\\]|\\[\s\S])*\2[msixpodualngcer]*/,
113-
lookbehind: true,
114-
greedy: true
115-
},
116-
117-
// s(...)(...)
118-
{
119-
pattern: /(^|[^-]\b)(?:s|tr|y)\s*\((?:[^()\\]|\\[\s\S])*\)\s*\((?:[^()\\]|\\[\s\S])*\)[msixpodualngcer]*/,
120-
lookbehind: true,
121-
greedy: true
122-
},
123-
124-
// s{...}{...}
125-
{
126-
pattern: /(^|[^-]\b)(?:s|tr|y)\s*\{(?:[^{}\\]|\\[\s\S])*\}\s*\{(?:[^{}\\]|\\[\s\S])*\}[msixpodualngcer]*/,
127-
lookbehind: true,
128-
greedy: true
129-
},
130-
131-
// s[...][...]
132-
{
133-
pattern: /(^|[^-]\b)(?:s|tr|y)\s*\[(?:[^[\]\\]|\\[\s\S])*\]\s*\[(?:[^[\]\\]|\\[\s\S])*\][msixpodualngcer]*/,
134-
lookbehind: true,
135-
greedy: true
136-
},
137-
138-
// s<...><...>
139-
{
140-
pattern: /(^|[^-]\b)(?:s|tr|y)\s*<(?:[^<>\\]|\\[\s\S])*>\s*<(?:[^<>\\]|\\[\s\S])*>[msixpodualngcer]*/,
141-
lookbehind: true,
142-
greedy: true
143-
},
144-
145-
// /.../
146-
// The look-ahead tries to prevent two divisions on
147-
// the same line from being highlighted as regex.
148-
// This does not support multi-line regex.
149-
{
150-
pattern: /\/(?:[^\/\\\r\n]|\\.)*\/[msixpodualngc]*(?=\s*(?:$|[\r\n,.;})&|\-+*~<>!?^]|(?:and|cmp|eq|ge|gt|le|lt|ne|not|or|x|xor)\b))/,
151-
greedy: true
152-
}
153-
],
150+
'keyword': /\b(?:any|break|continue|default|delete|die|do|else|elsif|eval|for|foreach|given|goto|if|last|local|my|next|our|package|print|redo|require|return|say|state|sub|switch|undef|unless|until|use|when|while)\b/,
151+
'number': /\b(?:0x[\dA-Fa-f](?:_?[\dA-Fa-f])*|0b[01](?:_?[01])*|(?:(?:\d(?:_?\d)*)?\.)?\d(?:_?\d)*(?:[Ee][+-]?\d+)?)\b/,
152+
'operator': /-[rwxoRWXOezsfdlpSbctugkTBMAC]\b|\+[+=]?|-[-=>]?|\*\*?=?|\/\/?=?|=[=~>]?|~[~=]?|\|\|?=?|&&?=?|<(?:=>?|<=?)?|>>?=?|![~=]?|[%^]=?|\.(?:=|\.\.?)?|[\\?]|\bx(?:=|\b)|\b(?:and|cmp|eq|ge|gt|le|lt|ne|not|or|xor)\b/,
153+
'punctuation': /[{}[\];(),:]/
154+
};
154155

155-
// FIXME Not sure about the handling of ::, ', and #
156-
'variable': [
157-
// ${^POSTMATCH}
158-
/[&*$@%]\{\^[A-Z]+\}/,
159-
// $^V
160-
/[&*$@%]\^[A-Z_]/,
161-
// ${...}
162-
/[&*$@%]#?(?=\{)/,
163-
// $foo
164-
/[&*$@%]#?(?:(?:::)*'?(?!\d)[\w$]+(?![\w$]))+(?:::)*/,
165-
// $1
166-
/[&*$@%]\d+/,
167-
// $_, @_, %!
168-
// The negative lookahead prevents from breaking the %= operator
169-
/(?!%=)[$@%][!"#$%&'()*+,\-.\/:;<=>?@[\\\]^_`{|}~]/
170-
],
171-
'filehandle': {
172-
// <>, <FOO>, _
173-
pattern: /<(?![<=])\S*>|\b_\b/,
174-
alias: 'symbol'
175-
},
176-
'vstring': {
177-
// v1.2, 1.2.3
178-
pattern: /v\d+(?:\.\d+)*|\d+(?:\.\d+){2,}/,
179-
alias: 'string'
180-
},
181-
'function': {
182-
pattern: /sub \w+/i,
183-
inside: {
184-
keyword: /sub/
185-
}
186-
},
187-
'keyword': /\b(?:any|break|continue|default|delete|die|do|else|elsif|eval|for|foreach|given|goto|if|last|local|my|next|our|package|print|redo|require|return|say|state|sub|switch|undef|unless|until|use|when|while)\b/,
188-
'number': /\b(?:0x[\dA-Fa-f](?:_?[\dA-Fa-f])*|0b[01](?:_?[01])*|(?:(?:\d(?:_?\d)*)?\.)?\d(?:_?\d)*(?:[Ee][+-]?\d+)?)\b/,
189-
'operator': /-[rwxoRWXOezsfdlpSbctugkTBMAC]\b|\+[+=]?|-[-=>]?|\*\*?=?|\/\/?=?|=[=~>]?|~[~=]?|\|\|?=?|&&?=?|<(?:=>?|<=?)?|>>?=?|![~=]?|[%^]=?|\.(?:=|\.\.?)?|[\\?]|\bx(?:=|\b)|\b(?:and|cmp|eq|ge|gt|le|lt|ne|not|or|xor)\b/,
190-
'punctuation': /[{}[\];(),:]/
191-
};
156+
}(Prism));

‎components/prism-perl.min.js

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎tests/languages/perl/function_feature.test

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@ sub Foo_Bar42
44
----------------------------------------------------
55

66
[
7-
["function", [["keyword", "sub"], " foo"]],
8-
["function", [["keyword", "sub"], " Foo_Bar42"]]
7+
["keyword", "sub"], ["function", "foo"],
8+
["keyword", "sub"], ["function", "Foo_Bar42"]
99
]
1010

1111
----------------------------------------------------
1212

13-
Checks for functions.
13+
Checks for functions.

‎tests/languages/perl/regex_feature.test

+11-1
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@ s<foo
6464
bar><foo
6565
baz>
6666

67+
tr()<>c
68+
y{foo\<bar}[ba\>a]
69+
s<foo
70+
bar>(foo
71+
baz)
72+
6773
//
6874
/foo/gsx
6975
/foo\/bar/n
@@ -119,11 +125,15 @@ baz>
119125
["regex", "y<foo\\<bar><ba\\>a>"],
120126
["regex", "s<foo\r\nbar><foo\r\nbaz>"],
121127

128+
["regex", "tr()<>c"],
129+
["regex", "y{foo\\<bar}[ba\\>a]"],
130+
["regex", "s<foo\r\nbar>(foo\r\nbaz)"],
131+
122132
["regex", "//"],
123133
["regex", "/foo/gsx"],
124134
["regex", "/foo\\/bar/n"]
125135
]
126136

127137
----------------------------------------------------
128138

129-
Checks for regex and regex quote-like operators.
139+
Checks for regex and regex quote-like operators.

0 commit comments

Comments
 (0)