Skip to content

Commit

Permalink
JS: Added support for new regex syntax (#3399)
Browse files Browse the repository at this point in the history
  • Loading branch information
RunDevelopment committed Mar 21, 2022
1 parent 8e648da commit ca78cde
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 5 deletions.
20 changes: 18 additions & 2 deletions components/prism-javascript.js
Expand Up @@ -53,8 +53,24 @@ Prism.languages.javascript['class-name'][0].pattern = /(\b(?:class|extends|imple

Prism.languages.insertBefore('javascript', 'keyword', {
'regex': {
// eslint-disable-next-line regexp/no-dupe-characters-character-class
pattern: /((?:^|[^$\w\xA0-\uFFFF."'\])\s]|\b(?:return|yield))\s*)\/(?:\[(?:[^\]\\\r\n]|\\.)*\]|\\.|[^/\\\[\r\n])+\/[dgimyus]{0,7}(?=(?:\s|\/\*(?:[^*]|\*(?!\/))*\*\/)*(?:$|[\r\n,.;:})\]]|\/\/))/,
pattern: RegExp(
// lookbehind
// eslint-disable-next-line regexp/no-dupe-characters-character-class
/((?:^|[^$\w\xA0-\uFFFF."'\])\s]|\b(?:return|yield))\s*)/.source +
// Regex pattern:
// There are 2 regex patterns here. The RegExp set notation proposal added support for nested character
// classes if the `v` flag is present. Unfortunately, nested CCs are both context-free and incompatible
// with the only syntax, so we have to define 2 different regex patterns.
/\//.source +
'(?:' +
/(?:\[(?:[^\]\\\r\n]|\\.)*\]|\\.|[^/\\\[\r\n])+\/[dgimyus]{0,7}/.source +
'|' +
// `v` flag syntax. This supports 3 levels of nested character classes.
/(?:\[(?:[^[\]\\\r\n]|\\.|\[(?:[^[\]\\\r\n]|\\.|\[(?:[^[\]\\\r\n]|\\.)*\])*\])*\]|\\.|[^/\\\[\r\n])+\/[dgimyus]{0,7}v[dgimyus]{0,7}/.source +
')' +
// lookahead
/(?=(?:\s|\/\*(?:[^*]|\*(?!\/))*\*\/)*(?:$|[\r\n,.;:})\]]|\/\/))/.source
),
lookbehind: true,
greedy: true,
inside: {
Expand Down
2 changes: 1 addition & 1 deletion components/prism-javascript.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 18 additions & 2 deletions prism.js
Expand Up @@ -1623,8 +1623,24 @@ Prism.languages.javascript['class-name'][0].pattern = /(\b(?:class|extends|imple

Prism.languages.insertBefore('javascript', 'keyword', {
'regex': {
// eslint-disable-next-line regexp/no-dupe-characters-character-class
pattern: /((?:^|[^$\w\xA0-\uFFFF."'\])\s]|\b(?:return|yield))\s*)\/(?:\[(?:[^\]\\\r\n]|\\.)*\]|\\.|[^/\\\[\r\n])+\/[dgimyus]{0,7}(?=(?:\s|\/\*(?:[^*]|\*(?!\/))*\*\/)*(?:$|[\r\n,.;:})\]]|\/\/))/,
pattern: RegExp(
// lookbehind
// eslint-disable-next-line regexp/no-dupe-characters-character-class
/((?:^|[^$\w\xA0-\uFFFF."'\])\s]|\b(?:return|yield))\s*)/.source +
// Regex pattern:
// There are 2 regex patterns here. The RegExp set notation proposal added support for nested character
// classes if the `v` flag is present. Unfortunately, nested CCs are both context-free and incompatible
// with the only syntax, so we have to define 2 different regex patterns.
/\//.source +
'(?:' +
/(?:\[(?:[^\]\\\r\n]|\\.)*\]|\\.|[^/\\\[\r\n])+\/[dgimyus]{0,7}/.source +
'|' +
// `v` flag syntax. This supports 3 levels of nested character classes.
/(?:\[(?:[^[\]\\\r\n]|\\.|\[(?:[^[\]\\\r\n]|\\.|\[(?:[^[\]\\\r\n]|\\.)*\])*\])*\]|\\.|[^/\\\[\r\n])+\/[dgimyus]{0,7}v[dgimyus]{0,7}/.source +
')' +
// lookahead
/(?=(?:\s|\/\*(?:[^*]|\*(?!\/))*\*\/)*(?:$|[\r\n,.;:})\]]|\/\/))/.source
),
lookbehind: true,
greedy: true,
inside: {
Expand Down
41 changes: 41 additions & 0 deletions tests/languages/javascript/regex_feature.test
Expand Up @@ -9,6 +9,12 @@
/ '1' '2' '3' '4' '5' /
[/foo/]

// RegExp set notation
;/[A--B]/v;
/[A--[0-9]]/v;
/[\p{Decimal_Number}--[0-9]]/v;
/[\p{Script=Khmer}&&[\p{Letter}\p{Mark}\p{Number}]]/v;

let a = /regex/m // comment
let b = condition ? /regex/ : /another one/
return /regex/;
Expand Down Expand Up @@ -88,6 +94,41 @@ yield /regex/;
]],
["punctuation", "]"],

["comment", "// RegExp set notation"],

["punctuation", ";"],
["regex", [
["regex-delimiter", "/"],
["regex-source", "[A--B]"],
["regex-delimiter", "/"],
["regex-flags", "v"]
]],
["punctuation", ";"],

["regex", [
["regex-delimiter", "/"],
["regex-source", "[A--[0-9]]"],
["regex-delimiter", "/"],
["regex-flags", "v"]
]],
["punctuation", ";"],

["regex", [
["regex-delimiter", "/"],
["regex-source", "[\\p{Decimal_Number}--[0-9]]"],
["regex-delimiter", "/"],
["regex-flags", "v"]
]],
["punctuation", ";"],

["regex", [
["regex-delimiter", "/"],
["regex-source", "[\\p{Script=Khmer}&&[\\p{Letter}\\p{Mark}\\p{Number}]]"],
["regex-delimiter", "/"],
["regex-flags", "v"]
]],
["punctuation", ";"],

["keyword", "let"],
" a ",
["operator", "="],
Expand Down

0 comments on commit ca78cde

Please sign in to comment.