From 8a72830ab4b08d8d21ea4fc75fa27ba1ea872da2 Mon Sep 17 00:00:00 2001 From: Michael Schmidt Date: Thu, 7 May 2020 22:44:16 +0200 Subject: [PATCH] Regex: Added aliases and minor improvements (#2325) This adds a lot of aliases to the regex tokens, so themes can apply their styles. It also makes a few improvements. See the PR for more details. --- components/prism-regex.js | 80 +++++++++++++------ components/prism-regex.min.js | 2 +- examples/prism-regex.html | 46 +++++++++++ tests/examples-test.js | 4 +- tests/languages/regex/charset_feature.test | 8 ++ tests/languages/regex/quantifier_feature.test | 22 ++++- 6 files changed, 132 insertions(+), 30 deletions(-) create mode 100644 examples/prism-regex.html diff --git a/components/prism-regex.js b/components/prism-regex.js index ec94277045..7b4d144035 100644 --- a/components/prism-regex.js +++ b/components/prism-regex.js @@ -4,8 +4,15 @@ pattern: /\\[\\(){}[\]^$+*?|.]/, alias: 'escape' }; - var escape = /\\(?:x[\da-fA-F]{2}|u[\da-fA-F]{4}|u\{[\da-fA-F]+\}|c[a-zA-Z]|0[0-7]{0,2}|[123][0-7]{2}|.)/ - var charClass = /\\[wsd]|\.|\\p{[^{}]+}/i + var escape = /\\(?:x[\da-fA-F]{2}|u[\da-fA-F]{4}|u\{[\da-fA-F]+\}|c[a-zA-Z]|0[0-7]{0,2}|[123][0-7]{2}|.)/; + var charClass = { + pattern: /\.|\\[wsd]|\\p{[^{}]+}/i, + alias: 'class-name' + }; + var charClassWithoutDot = { + pattern: /\\[wsd]|\\p{[^{}]+}/i, + alias: 'class-name' + }; var rangeChar = '(?:[^\\\\-]|' + escape.source + ')'; var range = RegExp(rangeChar + '-' + rangeChar); @@ -17,16 +24,6 @@ alias: 'variable' }; - var backreference = [ - /\\(?![123][0-7]{2})[1-9]/, // a backreference which is not an octal escape - { - pattern: /\\k<[^<>']+>/, - inside: { - 'group-name': groupName - } - } - ]; - Prism.languages.regex = { 'charset': { pattern: /((?:^|[^\\])(?:\\\\)*)\[(?:[^\\\]]|\\[\s\S])*\]/, @@ -35,25 +32,47 @@ 'charset-negation': { pattern: /(^\[)\^/, lookbehind: true, + alias: 'operator' + }, + 'charset-punctuation': { + pattern: /^\[|\]$/, + alias: 'punctuation' }, - 'charset-punctuation': /^\[|\]$/, 'range': { pattern: range, inside: { 'escape': escape, - 'range-punctuation': /-/ + 'range-punctuation': { + pattern: /-/, + alias: 'operator' + } } }, 'special-escape': specialEscape, - 'charclass': charClass, - 'backreference': backreference, + 'charclass': charClassWithoutDot, 'escape': escape } }, 'special-escape': specialEscape, 'charclass': charClass, - 'backreference': backreference, - 'anchor': /[$^]|\\[ABbGZz]/, + 'backreference': [ + { + // a backreference which is not an octal escape + pattern: /\\(?![123][0-7]{2})[1-9]/, + alias: 'keyword' + }, + { + pattern: /\\k<[^<>']+>/, + alias: 'keyword', + inside: { + 'group-name': groupName + } + } + ], + 'anchor': { + pattern: /[$^]|\\[ABbGZz]/, + alias: 'function' + }, 'escape': escape, 'group': [ { @@ -62,14 +81,24 @@ // (), (?), (?'name'), (?>), (?:), (?=), (?!), (?<=), (?']+>|'[^<>']+'|[>:]|']+(?=[>']$)/,lookbehind:!0,alias:"variable"},c=[/\\(?![123][0-7]{2})[1-9]/,{pattern:/\\k<[^<>']+>/,inside:{"group-name":t}}];n.languages.regex={charset:{pattern:/((?:^|[^\\])(?:\\\\)*)\[(?:[^\\\]]|\\[\s\S])*\]/,lookbehind:!0,inside:{"charset-negation":{pattern:/(^\[)\^/,lookbehind:!0},"charset-punctuation":/^\[|\]$/,range:{pattern:s,inside:{escape:a,"range-punctuation":/-/}},"special-escape":e,charclass:r,backreference:c,escape:a}},"special-escape":e,charclass:r,backreference:c,anchor:/[$^]|\\[ABbGZz]/,escape:a,group:[{pattern:/\((?:\?(?:<[^<>']+>|'[^<>']+'|[>:]|']+(?=[>']$)/,lookbehind:!0,alias:"variable"};n.languages.regex={charset:{pattern:/((?:^|[^\\])(?:\\\\)*)\[(?:[^\\\]]|\\[\s\S])*\]/,lookbehind:!0,inside:{"charset-negation":{pattern:/(^\[)\^/,lookbehind:!0,alias:"operator"},"charset-punctuation":{pattern:/^\[|\]$/,alias:"punctuation"},range:{pattern:i,inside:{escape:e,"range-punctuation":{pattern:/-/,alias:"operator"}}},"special-escape":a,charclass:{pattern:/\\[wsd]|\\p{[^{}]+}/i,alias:"class-name"},escape:e}},"special-escape":a,charclass:{pattern:/\.|\\[wsd]|\\p{[^{}]+}/i,alias:"class-name"},backreference:[{pattern:/\\(?![123][0-7]{2})[1-9]/,alias:"keyword"},{pattern:/\\k<[^<>']+>/,alias:"keyword",inside:{"group-name":r}}],anchor:{pattern:/[$^]|\\[ABbGZz]/,alias:"function"},escape:e,group:[{pattern:/\((?:\?(?:<[^<>']+>|'[^<>']+'|[>:]|The regex languages con be used for inline regex snippets like (?<number>\d+)[-_ ]\k<number> but it mainly adds itself to other languages such as:

+ +

JavaScript

+
Prism.languages.markup = {
+	'comment': /<!--[\s\S]*?-->/,
+	'prolog': /<\?[\s\S]+?\?>/,
+	'doctype': {
+		pattern: /<!DOCTYPE(?:[^>"'[\]]|"[^"]*"|'[^']*')+(?:\[(?:[^<"'\]]|"[^"]*"|'[^']*'|<(?!!--)|<!--(?:[^-]|-(?!->))*-->)*\]\s*)?>/i,
+		greedy: true
+	},
+	'cdata': /<!\[CDATA\[[\s\S]*?]]>/i,
+	'tag': {
+		pattern: /<\/?(?!\d)[^\s>\/=$<%]+(?:\s(?:\s*[^\s>\/=]+(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s'">=]+(?=[\s>]))|(?=[\s/>])))+)?\s*\/?>/i,
+		greedy: true,
+		inside: {
+			'tag': {
+				pattern: /^<\/?[^\s>\/]+/i,
+				inside: {
+					'punctuation': /^<\/?/,
+					'namespace': /^[^\s>\/:]+:/
+				}
+			},
+			'attr-value': {
+				pattern: /=\s*(?:"[^"]*"|'[^']*'|[^\s'">=]+)/i,
+				inside: {
+					'punctuation': [
+						/^=/,
+						{
+							pattern: /^(\s*)["']|["']$/,
+							lookbehind: true
+						}
+					]
+				}
+			},
+			'punctuation': /\/?>/,
+			'attr-name': {
+				pattern: /[^\s>\/]+/,
+				inside: {
+					'namespace': /^[^\s>\/:]+:/
+				}
+			}
+
+		}
+	},
+	'entity': /&#?[\da-z]{1,8};/i
+};
diff --git a/tests/examples-test.js b/tests/examples-test.js index b45c684917..0a4f3e4164 100644 --- a/tests/examples-test.js +++ b/tests/examples-test.js @@ -13,9 +13,7 @@ describe('Examples', function () { 'markup-templating', 't4-templating', // this does alter some languages but it's mainly a library - 'javadoclike', - // Regex doesn't have any classes supported by our themes and mainly extends other languages - 'regex' + 'javadoclike' ]); const validFiles = new Set(); diff --git a/tests/languages/regex/charset_feature.test b/tests/languages/regex/charset_feature.test index 0a970da1a9..317216e2de 100644 --- a/tests/languages/regex/charset_feature.test +++ b/tests/languages/regex/charset_feature.test @@ -2,6 +2,7 @@ [^] [foo] [\]\b] +[.^$\1] ---------------------------------------------------- @@ -28,6 +29,13 @@ ["special-escape", "\\]"], ["escape", "\\b"], ["charset-punctuation", "]"] + ]], + + ["charset", [ + ["charset-punctuation", "["], + ".^$", + ["escape", "\\1"], + ["charset-punctuation", "]"] ]] ] diff --git a/tests/languages/regex/quantifier_feature.test b/tests/languages/regex/quantifier_feature.test index 87246d65bd..bd4e9bf999 100644 --- a/tests/languages/regex/quantifier_feature.test +++ b/tests/languages/regex/quantifier_feature.test @@ -1,6 +1,12 @@ * + ? {2} {2,} {0,1} +*? +? ?? +{2}? {2,}? {0,1}? + +*+ ++ ?+ +{2}+ {2,}+ {0,1}+ + ---------------------------------------------------- [ @@ -9,7 +15,21 @@ ["quantifier", "?"], ["quantifier", "{2}"], ["quantifier", "{2,}"], - ["quantifier", "{0,1}"] + ["quantifier", "{0,1}"], + + ["quantifier", "*?"], + ["quantifier", "+?"], + ["quantifier", "??"], + ["quantifier", "{2}?"], + ["quantifier", "{2,}?"], + ["quantifier", "{0,1}?"], + + ["quantifier", "*+"], + ["quantifier", "++"], + ["quantifier", "?+"], + ["quantifier", "{2}+"], + ["quantifier", "{2,}+"], + ["quantifier", "{0,1}+"] ] ----------------------------------------------------