From d04d166df80f8cbe79e723c602f30416fef21d76 Mon Sep 17 00:00:00 2001 From: Michael Schmidt Date: Sun, 5 Dec 2021 14:54:45 +0100 Subject: [PATCH] Structured text: Improved tokenization (#3213) --- components/prism-iecst.js | 24 +++++---- components/prism-iecst.min.js | 2 +- .../{symbol.test => address_feature.test} | 8 +-- .../{number.test => number_feature.test} | 0 tests/languages/iecst/operator_feature.test | 54 +++++++++++++++++++ .../languages/iecst/punctuation_feature.test | 15 ++++++ 6 files changed, 87 insertions(+), 16 deletions(-) rename tests/languages/iecst/{symbol.test => address_feature.test} (75%) rename tests/languages/iecst/{number.test => number_feature.test} (100%) create mode 100644 tests/languages/iecst/operator_feature.test create mode 100644 tests/languages/iecst/punctuation_feature.test diff --git a/components/prism-iecst.js b/components/prism-iecst.js index 4456ad31c3..3e0fb97fca 100644 --- a/components/prism-iecst.js +++ b/components/prism-iecst.js @@ -3,6 +3,7 @@ Prism.languages.iecst = { { pattern: /(^|[^\\])(?:\/\*[\s\S]*?(?:\*\/|$)|\(\*[\s\S]*?(?:\*\)|$)|\{[\s\S]*?(?:\}|$))/, lookbehind: true, + greedy: true, }, { pattern: /(^|[^\\:])\/\/.*/, @@ -14,17 +15,18 @@ Prism.languages.iecst = { pattern: /(["'])(?:\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/, greedy: true, }, - 'class-name': /\b(?:END_)?(?:PROGRAM|CONFIGURATION|INTERFACE|FUNCTION_BLOCK|FUNCTION|ACTION|TRANSITION|TYPE|STRUCT|(?:INITIAL_)?STEP|NAMESPACE|LIBRARY|CHANNEL|FOLDER|RESOURCE|VAR_(?:ACCESS|CONFIG|EXTERNAL|GLOBAL|INPUT|IN_OUT|PUTPUT|TEMP)|VAR|METHOD|PROPERTY)\b/i, - 'keyword': /\b(?:AT|BY|(?:END_)?(?:CASE|FOR|IF|REPEAT|WHILE)|CONSTANT|CONTINUE|DO|ELSE|ELSIF|EXIT|EXTENDS|FROM|GET|GOTO|IMPLEMENTS|JMP|NON_RETAIN|PRIVATE|PROTECTED|PUBLIC|RETAIN|RETURN|SET|TASK|THEN|TO|UNTIL|USING|WITH|__CATCH|__ENDTRY|__FINALLY|__TRY)\b/, - 'variable': /\b(?:ANY|ARRAY|AT|BOOL|BYTE|U?(?:D|L|S)?INT|(?:D|L)?WORD|DATE(?:_AND_TIME)?|DT|L?REAL|POINTER|STRING|TIME(?:_OF_DAY)?|TOD)\b/, - 'symbol': /%[IQM][XBWDL][\d.]*|%[IQ][\d.]*/, + 'keyword': [ + /\b(?:END_)?(?:PROGRAM|CONFIGURATION|INTERFACE|FUNCTION_BLOCK|FUNCTION|ACTION|TRANSITION|TYPE|STRUCT|(?:INITIAL_)?STEP|NAMESPACE|LIBRARY|CHANNEL|FOLDER|RESOURCE|VAR_(?:ACCESS|CONFIG|EXTERNAL|GLOBAL|INPUT|IN_OUT|OUTPUT|TEMP)|VAR|METHOD|PROPERTY)\b/i, + /\b(?:AT|BY|(?:END_)?(?:CASE|FOR|IF|REPEAT|WHILE)|CONSTANT|CONTINUE|DO|ELSE|ELSIF|EXIT|EXTENDS|FROM|GET|GOTO|IMPLEMENTS|JMP|NON_RETAIN|OF|PRIVATE|PROTECTED|PUBLIC|RETAIN|RETURN|SET|TASK|THEN|TO|UNTIL|USING|WITH|__CATCH|__ENDTRY|__FINALLY|__TRY)\b/ + ], + 'class-name': /\b(?:ANY|ARRAY|BOOL|BYTE|U?(?:D|L|S)?INT|(?:D|L)?WORD|DATE(?:_AND_TIME)?|DT|L?REAL|POINTER|STRING|TIME(?:_OF_DAY)?|TOD)\b/, + 'address': { + pattern: /%[IQM][XBWDL][\d.]*|%[IQ][\d.]*/, + alias: 'symbol' + }, 'number': /\b(?:16#[\da-f]+|2#[01_]+|0x[\da-f]+)\b|\b(?:D|DT|T|TOD)#[\d_shmd:]*|\b[A-Z]*#[\d.,_]*|(?:\b\d+(?:\.\d*)?|\B\.\d+)(?:e[+-]?\d+)?/i, 'boolean': /\b(?:FALSE|NULL|TRUE)\b/, - 'function': /\w+(?=\()/, - 'operator': /(?:S?R?:?=>?|&&?|\*\*?|<=?|>=?|[-:^/+])|\b(?:AND|EQ|GE|GT|LE|LT|MOD|NE|NOT|OR|XOR)\b/, - 'punctuation': /[();]/, - 'type': { - 'pattern': /#/, - 'alias': 'selector', - }, + 'operator': /S?R?:?=>?|&&?|\*\*?|<[=>]?|>=?|[-:^/+#]|\b(?:AND|EQ|EXPT|GE|GT|LE|LT|MOD|NE|NOT|OR|XOR)\b/, + 'function': /\b[a-z_]\w*(?=\s*\()/i, + 'punctuation': /[()[\].,;]/, }; diff --git a/components/prism-iecst.min.js b/components/prism-iecst.min.js index 97757be8da..270815bcec 100644 --- a/components/prism-iecst.min.js +++ b/components/prism-iecst.min.js @@ -1 +1 @@ -Prism.languages.iecst={comment:[{pattern:/(^|[^\\])(?:\/\*[\s\S]*?(?:\*\/|$)|\(\*[\s\S]*?(?:\*\)|$)|\{[\s\S]*?(?:\}|$))/,lookbehind:!0},{pattern:/(^|[^\\:])\/\/.*/,lookbehind:!0,greedy:!0}],string:{pattern:/(["'])(?:\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/,greedy:!0},"class-name":/\b(?:END_)?(?:PROGRAM|CONFIGURATION|INTERFACE|FUNCTION_BLOCK|FUNCTION|ACTION|TRANSITION|TYPE|STRUCT|(?:INITIAL_)?STEP|NAMESPACE|LIBRARY|CHANNEL|FOLDER|RESOURCE|VAR_(?:ACCESS|CONFIG|EXTERNAL|GLOBAL|INPUT|IN_OUT|PUTPUT|TEMP)|VAR|METHOD|PROPERTY)\b/i,keyword:/\b(?:AT|BY|(?:END_)?(?:CASE|FOR|IF|REPEAT|WHILE)|CONSTANT|CONTINUE|DO|ELSE|ELSIF|EXIT|EXTENDS|FROM|GET|GOTO|IMPLEMENTS|JMP|NON_RETAIN|PRIVATE|PROTECTED|PUBLIC|RETAIN|RETURN|SET|TASK|THEN|TO|UNTIL|USING|WITH|__CATCH|__ENDTRY|__FINALLY|__TRY)\b/,variable:/\b(?:ANY|ARRAY|AT|BOOL|BYTE|U?(?:D|L|S)?INT|(?:D|L)?WORD|DATE(?:_AND_TIME)?|DT|L?REAL|POINTER|STRING|TIME(?:_OF_DAY)?|TOD)\b/,symbol:/%[IQM][XBWDL][\d.]*|%[IQ][\d.]*/,number:/\b(?:16#[\da-f]+|2#[01_]+|0x[\da-f]+)\b|\b(?:D|DT|T|TOD)#[\d_shmd:]*|\b[A-Z]*#[\d.,_]*|(?:\b\d+(?:\.\d*)?|\B\.\d+)(?:e[+-]?\d+)?/i,boolean:/\b(?:FALSE|NULL|TRUE)\b/,function:/\w+(?=\()/,operator:/(?:S?R?:?=>?|&&?|\*\*?|<=?|>=?|[-:^/+])|\b(?:AND|EQ|GE|GT|LE|LT|MOD|NE|NOT|OR|XOR)\b/,punctuation:/[();]/,type:{pattern:/#/,alias:"selector"}}; \ No newline at end of file +Prism.languages.iecst={comment:[{pattern:/(^|[^\\])(?:\/\*[\s\S]*?(?:\*\/|$)|\(\*[\s\S]*?(?:\*\)|$)|\{[\s\S]*?(?:\}|$))/,lookbehind:!0,greedy:!0},{pattern:/(^|[^\\:])\/\/.*/,lookbehind:!0,greedy:!0}],string:{pattern:/(["'])(?:\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/,greedy:!0},keyword:[/\b(?:END_)?(?:PROGRAM|CONFIGURATION|INTERFACE|FUNCTION_BLOCK|FUNCTION|ACTION|TRANSITION|TYPE|STRUCT|(?:INITIAL_)?STEP|NAMESPACE|LIBRARY|CHANNEL|FOLDER|RESOURCE|VAR_(?:ACCESS|CONFIG|EXTERNAL|GLOBAL|INPUT|IN_OUT|OUTPUT|TEMP)|VAR|METHOD|PROPERTY)\b/i,/\b(?:AT|BY|(?:END_)?(?:CASE|FOR|IF|REPEAT|WHILE)|CONSTANT|CONTINUE|DO|ELSE|ELSIF|EXIT|EXTENDS|FROM|GET|GOTO|IMPLEMENTS|JMP|NON_RETAIN|OF|PRIVATE|PROTECTED|PUBLIC|RETAIN|RETURN|SET|TASK|THEN|TO|UNTIL|USING|WITH|__CATCH|__ENDTRY|__FINALLY|__TRY)\b/],"class-name":/\b(?:ANY|ARRAY|BOOL|BYTE|U?(?:D|L|S)?INT|(?:D|L)?WORD|DATE(?:_AND_TIME)?|DT|L?REAL|POINTER|STRING|TIME(?:_OF_DAY)?|TOD)\b/,address:{pattern:/%[IQM][XBWDL][\d.]*|%[IQ][\d.]*/,alias:"symbol"},number:/\b(?:16#[\da-f]+|2#[01_]+|0x[\da-f]+)\b|\b(?:D|DT|T|TOD)#[\d_shmd:]*|\b[A-Z]*#[\d.,_]*|(?:\b\d+(?:\.\d*)?|\B\.\d+)(?:e[+-]?\d+)?/i,boolean:/\b(?:FALSE|NULL|TRUE)\b/,operator:/S?R?:?=>?|&&?|\*\*?|<[=>]?|>=?|[-:^/+#]|\b(?:AND|EQ|EXPT|GE|GT|LE|LT|MOD|NE|NOT|OR|XOR)\b/,function:/\b[a-z_]\w*(?=\s*\()/i,punctuation:/[()[\].,;]/}; \ No newline at end of file diff --git a/tests/languages/iecst/symbol.test b/tests/languages/iecst/address_feature.test similarity index 75% rename from tests/languages/iecst/symbol.test rename to tests/languages/iecst/address_feature.test index a55153dced..e556407b22 100644 --- a/tests/languages/iecst/symbol.test +++ b/tests/languages/iecst/address_feature.test @@ -5,18 +5,18 @@ END_VAR ---------------------------------------------------- [ - ["class-name", "VAR"], + ["keyword", "VAR"], "\r\n varname ", ["keyword", "AT"], - ["symbol", "%QX1.0.0"], + ["address", "%QX1.0.0"], ["operator", ":"], - ["variable", "BOOL"], + ["class-name", "BOOL"], ["operator", ":="], ["boolean", "TRUE"], ["punctuation", ";"], - ["class-name", "END_VAR"] + ["keyword", "END_VAR"] ] ---------------------------------------------------- diff --git a/tests/languages/iecst/number.test b/tests/languages/iecst/number_feature.test similarity index 100% rename from tests/languages/iecst/number.test rename to tests/languages/iecst/number_feature.test diff --git a/tests/languages/iecst/operator_feature.test b/tests/languages/iecst/operator_feature.test new file mode 100644 index 0000000000..68b5b74e1d --- /dev/null +++ b/tests/languages/iecst/operator_feature.test @@ -0,0 +1,54 @@ += <> < <= > >= ++ - * / ** ^ & && + +: := +# + +AND +EQ +EXPT +GE +GT +LE +LT +MOD +NE +NOT +OR +XOR + +---------------------------------------------------- + +[ + ["operator", "="], + ["operator", "<>"], + ["operator", "<"], + ["operator", "<="], + ["operator", ">"], + ["operator", ">="], + + ["operator", "+"], + ["operator", "-"], + ["operator", "*"], + ["operator", "/"], + ["operator", "**"], + ["operator", "^"], + ["operator", "&"], + ["operator", "&&"], + + ["operator", ":"], ["operator", ":="], + ["operator", "#"], + + ["operator", "AND"], + ["operator", "EQ"], + ["operator", "EXPT"], + ["operator", "GE"], + ["operator", "GT"], + ["operator", "LE"], + ["operator", "LT"], + ["operator", "MOD"], + ["operator", "NE"], + ["operator", "NOT"], + ["operator", "OR"], + ["operator", "XOR"] +] diff --git a/tests/languages/iecst/punctuation_feature.test b/tests/languages/iecst/punctuation_feature.test new file mode 100644 index 0000000000..b1400d0cae --- /dev/null +++ b/tests/languages/iecst/punctuation_feature.test @@ -0,0 +1,15 @@ +( ) [ ] +, ; . + +---------------------------------------------------- + +[ + ["punctuation", "("], + ["punctuation", ")"], + ["punctuation", "["], + ["punctuation", "]"], + + ["punctuation", ","], + ["punctuation", ";"], + ["punctuation", "."] +]