From 8d0b74b521c02284119c8825badf92a588715635 Mon Sep 17 00:00:00 2001 From: Michael Schmidt Date: Sun, 12 Sep 2021 19:52:23 +0200 Subject: [PATCH] Clojure: Improved tokenization (#3056) --- components/prism-clojure.js | 30 +++++++++++++++---- components/prism-clojure.min.js | 2 +- tests/languages/clojure/function_feature.test | 13 ++++++++ tests/languages/clojure/number_feature.test | 27 +++++++++++++++++ .../clojure/operator_and_punctuation.test | 20 ------------- tests/languages/clojure/operator_feature.test | 11 +++++++ .../clojure/punctuation_feature.test | 15 ++++++++++ tests/languages/clojure/symbol_feature.test | 11 +++++++ 8 files changed, 102 insertions(+), 27 deletions(-) create mode 100644 tests/languages/clojure/function_feature.test create mode 100644 tests/languages/clojure/number_feature.test delete mode 100644 tests/languages/clojure/operator_and_punctuation.test create mode 100644 tests/languages/clojure/operator_feature.test create mode 100644 tests/languages/clojure/punctuation_feature.test create mode 100644 tests/languages/clojure/symbol_feature.test diff --git a/components/prism-clojure.js b/components/prism-clojure.js index 9c16d32ffe..7077e78a02 100644 --- a/components/prism-clojure.js +++ b/components/prism-clojure.js @@ -1,16 +1,34 @@ // Copied from https://github.com/jeluard/prism-clojure Prism.languages.clojure = { - 'comment': /;.*/, - 'string': { - pattern: /"(?:[^"\\]|\\.)*"/, + 'comment': { + pattern: /;.*/, greedy: true }, - 'operator': /(?:::|[:|'])\b[a-z][\w*+!?-]*\b/i, //used for symbols and keywords + 'string': [ + { + pattern: /"(?:[^"\\]|\\.)*"/, + greedy: true + }, + // characters + /\\\w+/ + ], + 'symbol': { + pattern: /(^|[\s()\[\]{},])::?[\w*+!?'<>=/.-]+/, + lookbehind: true + }, 'keyword': { - pattern: /([^\w+*'?-])(?:def|if|do|let|\.\.|quote|var|->>|->|fn|loop|recur|throw|try|monitor-enter|\.|new|set!|def-|defn|defn-|defmacro|defmulti|defmethod|defstruct|defonce|declare|definline|definterface|defprotocol|==|defrecord|>=|deftype|<=|defproject|ns|\*|\+|-|\/|<|=|>|accessor|agent|agent-errors|aget|alength|all-ns|alter|and|append-child|apply|array-map|aset|aset-boolean|aset-byte|aset-char|aset-double|aset-float|aset-int|aset-long|aset-short|assert|assoc|await|await-for|bean|binding|bit-and|bit-not|bit-or|bit-shift-left|bit-shift-right|bit-xor|boolean|branch\?|butlast|byte|cast|char|children|class|clear-agent-errors|comment|commute|comp|comparator|complement|concat|conj|cons|constantly|cond|if-not|construct-proxy|contains\?|count|create-ns|create-struct|cycle|dec|deref|difference|disj|dissoc|distinct|doall|doc|dorun|doseq|dosync|dotimes|doto|double|down|drop|drop-while|edit|end\?|ensure|eval|every\?|false\?|ffirst|file-seq|filter|find|find-doc|find-ns|find-var|first|float|flush|for|fnseq|frest|gensym|get-proxy-class|get|hash-map|hash-set|identical\?|identity|if-let|import|in-ns|inc|index|insert-child|insert-left|insert-right|inspect-table|inspect-tree|instance\?|int|interleave|intersection|into|into-array|iterate|join|key|keys|keyword|keyword\?|last|lazy-cat|lazy-cons|left|lefts|line-seq|list\*|list|load|load-file|locking|long|macroexpand|macroexpand-1|make-array|make-node|map|map-invert|map\?|mapcat|max|max-key|memfn|merge|merge-with|meta|min|min-key|name|namespace|neg\?|newline|next|nil\?|node|not|not-any\?|not-every\?|not=|ns-imports|ns-interns|ns-map|ns-name|ns-publics|ns-refers|ns-resolve|ns-unmap|nth|nthrest|or|parse|partial|path|peek|pop|pos\?|pr|pr-str|print|print-str|println|println-str|prn|prn-str|project|proxy|proxy-mappings|quot|rand|rand-int|range|re-find|re-groups|re-matcher|re-matches|re-pattern|re-seq|read|read-line|reduce|ref|ref-set|refer|rem|remove|remove-method|remove-ns|rename|rename-keys|repeat|replace|replicate|resolve|rest|resultset-seq|reverse|rfirst|right|rights|root|rrest|rseq|second|select|select-keys|send|send-off|seq|seq-zip|seq\?|set|short|slurp|some|sort|sort-by|sorted-map|sorted-map-by|sorted-set|special-symbol\?|split-at|split-with|str|string\?|struct|struct-map|subs|subvec|symbol|symbol\?|sync|take|take-nth|take-while|test|time|to-array|to-array-2d|tree-seq|true\?|union|up|update-proxy|val|vals|var-get|var-set|var\?|vector|vector-zip|vector\?|when|when-first|when-let|when-not|with-local-vars|with-meta|with-open|with-out-str|xml-seq|xml-zip|zero\?|zipmap|zipper)(?=[^\w+*'?-])/, + pattern: /(\()(?:-|->|->>|\.|\.\.|\*|\/|\+|<|<=|=|==|>|>=|accessor|agent|agent-errors|aget|alength|all-ns|alter|and|append-child|apply|array-map|aset|aset-boolean|aset-byte|aset-char|aset-double|aset-float|aset-int|aset-long|aset-short|assert|assoc|await|await-for|bean|binding|bit-and|bit-not|bit-or|bit-shift-left|bit-shift-right|bit-xor|boolean|branch\?|butlast|byte|cast|char|children|class|clear-agent-errors|comment|commute|comp|comparator|complement|concat|cond|conj|cons|constantly|construct-proxy|contains\?|count|create-ns|create-struct|cycle|dec|declare|def|def-|definline|definterface|defmacro|defmethod|defmulti|defn|defn-|defonce|defproject|defprotocol|defrecord|defstruct|deftype|deref|difference|disj|dissoc|distinct|do|doall|doc|dorun|doseq|dosync|dotimes|doto|double|down|drop|drop-while|edit|end\?|ensure|eval|every\?|false\?|ffirst|file-seq|filter|find|find-doc|find-ns|find-var|first|float|flush|fn|fnseq|for|frest|gensym|get|get-proxy-class|hash-map|hash-set|identical\?|identity|if|if-let|if-not|import|in-ns|inc|index|insert-child|insert-left|insert-right|inspect-table|inspect-tree|instance\?|int|interleave|intersection|into|into-array|iterate|join|key|keys|keyword|keyword\?|last|lazy-cat|lazy-cons|left|lefts|let|line-seq|list|list\*|load|load-file|locking|long|loop|macroexpand|macroexpand-1|make-array|make-node|map|map-invert|map\?|mapcat|max|max-key|memfn|merge|merge-with|meta|min|min-key|monitor-enter|name|namespace|neg\?|new|newline|next|nil\?|node|not|not-any\?|not-every\?|not=|ns|ns-imports|ns-interns|ns-map|ns-name|ns-publics|ns-refers|ns-resolve|ns-unmap|nth|nthrest|or|parse|partial|path|peek|pop|pos\?|pr|pr-str|print|print-str|println|println-str|prn|prn-str|project|proxy|proxy-mappings|quot|quote|rand|rand-int|range|re-find|re-groups|re-matcher|re-matches|re-pattern|re-seq|read|read-line|recur|reduce|ref|ref-set|refer|rem|remove|remove-method|remove-ns|rename|rename-keys|repeat|replace|replicate|resolve|rest|resultset-seq|reverse|rfirst|right|rights|root|rrest|rseq|second|select|select-keys|send|send-off|seq|seq-zip|seq\?|set|set!|short|slurp|some|sort|sort-by|sorted-map|sorted-map-by|sorted-set|special-symbol\?|split-at|split-with|str|string\?|struct|struct-map|subs|subvec|symbol|symbol\?|sync|take|take-nth|take-while|test|throw|time|to-array|to-array-2d|tree-seq|true\?|try|union|up|update-proxy|val|vals|var|var-get|var-set|var\?|vector|vector-zip|vector\?|when|when-first|when-let|when-not|with-local-vars|with-meta|with-open|with-out-str|xml-seq|xml-zip|zero\?|zipmap|zipper)(?=[\s)]|$)/, lookbehind: true }, 'boolean': /\b(?:true|false|nil)\b/, - 'number': /\b[\da-f]+\b/i, + 'number': { + pattern: /(^|[^\w$@])(?:\d+(?:[/.]\d+)?(?:e[+-]?\d+)?|0x[a-f0-9]+|[1-9]\d?r[a-z0-9]+)[lmn]?(?![\w$@])/i, + lookbehind: true + }, + 'function': { + pattern: /((?:^|[^'])\()[\w*+!?'<>=/.-]+(?=[\s)]|$)/, + lookbehind: true + }, + 'operator': /[#@^`~]/, 'punctuation': /[{}\[\](),]/ }; diff --git a/components/prism-clojure.min.js b/components/prism-clojure.min.js index 666e2b0df9..2437df95e0 100644 --- a/components/prism-clojure.min.js +++ b/components/prism-clojure.min.js @@ -1 +1 @@ -Prism.languages.clojure={comment:/;.*/,string:{pattern:/"(?:[^"\\]|\\.)*"/,greedy:!0},operator:/(?:::|[:|'])\b[a-z][\w*+!?-]*\b/i,keyword:{pattern:/([^\w+*'?-])(?:def|if|do|let|\.\.|quote|var|->>|->|fn|loop|recur|throw|try|monitor-enter|\.|new|set!|def-|defn|defn-|defmacro|defmulti|defmethod|defstruct|defonce|declare|definline|definterface|defprotocol|==|defrecord|>=|deftype|<=|defproject|ns|\*|\+|-|\/|<|=|>|accessor|agent|agent-errors|aget|alength|all-ns|alter|and|append-child|apply|array-map|aset|aset-boolean|aset-byte|aset-char|aset-double|aset-float|aset-int|aset-long|aset-short|assert|assoc|await|await-for|bean|binding|bit-and|bit-not|bit-or|bit-shift-left|bit-shift-right|bit-xor|boolean|branch\?|butlast|byte|cast|char|children|class|clear-agent-errors|comment|commute|comp|comparator|complement|concat|conj|cons|constantly|cond|if-not|construct-proxy|contains\?|count|create-ns|create-struct|cycle|dec|deref|difference|disj|dissoc|distinct|doall|doc|dorun|doseq|dosync|dotimes|doto|double|down|drop|drop-while|edit|end\?|ensure|eval|every\?|false\?|ffirst|file-seq|filter|find|find-doc|find-ns|find-var|first|float|flush|for|fnseq|frest|gensym|get-proxy-class|get|hash-map|hash-set|identical\?|identity|if-let|import|in-ns|inc|index|insert-child|insert-left|insert-right|inspect-table|inspect-tree|instance\?|int|interleave|intersection|into|into-array|iterate|join|key|keys|keyword|keyword\?|last|lazy-cat|lazy-cons|left|lefts|line-seq|list\*|list|load|load-file|locking|long|macroexpand|macroexpand-1|make-array|make-node|map|map-invert|map\?|mapcat|max|max-key|memfn|merge|merge-with|meta|min|min-key|name|namespace|neg\?|newline|next|nil\?|node|not|not-any\?|not-every\?|not=|ns-imports|ns-interns|ns-map|ns-name|ns-publics|ns-refers|ns-resolve|ns-unmap|nth|nthrest|or|parse|partial|path|peek|pop|pos\?|pr|pr-str|print|print-str|println|println-str|prn|prn-str|project|proxy|proxy-mappings|quot|rand|rand-int|range|re-find|re-groups|re-matcher|re-matches|re-pattern|re-seq|read|read-line|reduce|ref|ref-set|refer|rem|remove|remove-method|remove-ns|rename|rename-keys|repeat|replace|replicate|resolve|rest|resultset-seq|reverse|rfirst|right|rights|root|rrest|rseq|second|select|select-keys|send|send-off|seq|seq-zip|seq\?|set|short|slurp|some|sort|sort-by|sorted-map|sorted-map-by|sorted-set|special-symbol\?|split-at|split-with|str|string\?|struct|struct-map|subs|subvec|symbol|symbol\?|sync|take|take-nth|take-while|test|time|to-array|to-array-2d|tree-seq|true\?|union|up|update-proxy|val|vals|var-get|var-set|var\?|vector|vector-zip|vector\?|when|when-first|when-let|when-not|with-local-vars|with-meta|with-open|with-out-str|xml-seq|xml-zip|zero\?|zipmap|zipper)(?=[^\w+*'?-])/,lookbehind:!0},boolean:/\b(?:true|false|nil)\b/,number:/\b[\da-f]+\b/i,punctuation:/[{}\[\](),]/}; \ No newline at end of file +Prism.languages.clojure={comment:{pattern:/;.*/,greedy:!0},string:[{pattern:/"(?:[^"\\]|\\.)*"/,greedy:!0},/\\\w+/],symbol:{pattern:/(^|[\s()\[\]{},])::?[\w*+!?'<>=/.-]+/,lookbehind:!0},keyword:{pattern:/(\()(?:-|->|->>|\.|\.\.|\*|\/|\+|<|<=|=|==|>|>=|accessor|agent|agent-errors|aget|alength|all-ns|alter|and|append-child|apply|array-map|aset|aset-boolean|aset-byte|aset-char|aset-double|aset-float|aset-int|aset-long|aset-short|assert|assoc|await|await-for|bean|binding|bit-and|bit-not|bit-or|bit-shift-left|bit-shift-right|bit-xor|boolean|branch\?|butlast|byte|cast|char|children|class|clear-agent-errors|comment|commute|comp|comparator|complement|concat|cond|conj|cons|constantly|construct-proxy|contains\?|count|create-ns|create-struct|cycle|dec|declare|def|def-|definline|definterface|defmacro|defmethod|defmulti|defn|defn-|defonce|defproject|defprotocol|defrecord|defstruct|deftype|deref|difference|disj|dissoc|distinct|do|doall|doc|dorun|doseq|dosync|dotimes|doto|double|down|drop|drop-while|edit|end\?|ensure|eval|every\?|false\?|ffirst|file-seq|filter|find|find-doc|find-ns|find-var|first|float|flush|fn|fnseq|for|frest|gensym|get|get-proxy-class|hash-map|hash-set|identical\?|identity|if|if-let|if-not|import|in-ns|inc|index|insert-child|insert-left|insert-right|inspect-table|inspect-tree|instance\?|int|interleave|intersection|into|into-array|iterate|join|key|keys|keyword|keyword\?|last|lazy-cat|lazy-cons|left|lefts|let|line-seq|list|list\*|load|load-file|locking|long|loop|macroexpand|macroexpand-1|make-array|make-node|map|map-invert|map\?|mapcat|max|max-key|memfn|merge|merge-with|meta|min|min-key|monitor-enter|name|namespace|neg\?|new|newline|next|nil\?|node|not|not-any\?|not-every\?|not=|ns|ns-imports|ns-interns|ns-map|ns-name|ns-publics|ns-refers|ns-resolve|ns-unmap|nth|nthrest|or|parse|partial|path|peek|pop|pos\?|pr|pr-str|print|print-str|println|println-str|prn|prn-str|project|proxy|proxy-mappings|quot|quote|rand|rand-int|range|re-find|re-groups|re-matcher|re-matches|re-pattern|re-seq|read|read-line|recur|reduce|ref|ref-set|refer|rem|remove|remove-method|remove-ns|rename|rename-keys|repeat|replace|replicate|resolve|rest|resultset-seq|reverse|rfirst|right|rights|root|rrest|rseq|second|select|select-keys|send|send-off|seq|seq-zip|seq\?|set|set!|short|slurp|some|sort|sort-by|sorted-map|sorted-map-by|sorted-set|special-symbol\?|split-at|split-with|str|string\?|struct|struct-map|subs|subvec|symbol|symbol\?|sync|take|take-nth|take-while|test|throw|time|to-array|to-array-2d|tree-seq|true\?|try|union|up|update-proxy|val|vals|var|var-get|var-set|var\?|vector|vector-zip|vector\?|when|when-first|when-let|when-not|with-local-vars|with-meta|with-open|with-out-str|xml-seq|xml-zip|zero\?|zipmap|zipper)(?=[\s)]|$)/,lookbehind:!0},boolean:/\b(?:true|false|nil)\b/,number:{pattern:/(^|[^\w$@])(?:\d+(?:[/.]\d+)?(?:e[+-]?\d+)?|0x[a-f0-9]+|[1-9]\d?r[a-z0-9]+)[lmn]?(?![\w$@])/i,lookbehind:!0},function:{pattern:/((?:^|[^'])\()[\w*+!?'<>=/.-]+(?=[\s)]|$)/,lookbehind:!0},operator:/[#@^`~]/,punctuation:/[{}\[\](),]/}; \ No newline at end of file diff --git a/tests/languages/clojure/function_feature.test b/tests/languages/clojure/function_feature.test new file mode 100644 index 0000000000..28fd4a5f83 --- /dev/null +++ b/tests/languages/clojure/function_feature.test @@ -0,0 +1,13 @@ +(foo args) + +; not a function +'(a b c) + +---------------------------------------------------- + +[ + ["punctuation", "("], ["function", "foo"], " args", ["punctuation", ")"], + + ["comment", "; not a function"], + "\r\n'", ["punctuation", "("], "a b c", ["punctuation", ")"] +] diff --git a/tests/languages/clojure/number_feature.test b/tests/languages/clojure/number_feature.test new file mode 100644 index 0000000000..7ca0213914 --- /dev/null +++ b/tests/languages/clojure/number_feature.test @@ -0,0 +1,27 @@ +123 +01234 +0xFFF +2r0101011 +8r52 +36r16 +1.0 +1M +2/3 +0.6666666666666666 +36786883868216818816N + +---------------------------------------------------- + +[ + ["number", "123"], + ["number", "01234"], + ["number", "0xFFF"], + ["number", "2r0101011"], + ["number", "8r52"], + ["number", "36r16"], + ["number", "1.0"], + ["number", "1M"], + ["number", "2/3"], + ["number", "0.6666666666666666"], + ["number", "36786883868216818816N"] +] diff --git a/tests/languages/clojure/operator_and_punctuation.test b/tests/languages/clojure/operator_and_punctuation.test deleted file mode 100644 index e90acb631d..0000000000 --- a/tests/languages/clojure/operator_and_punctuation.test +++ /dev/null @@ -1,20 +0,0 @@ -(::example [x y] (:kebab-case x y)) - ----------------------------------------------------- - -[ - ["punctuation", "("], - ["operator", "::example"], - ["punctuation", "["], - "x y", - ["punctuation", "]"], - ["punctuation", "("], - ["operator", ":kebab-case"], - " x y", - ["punctuation", ")"], - ["punctuation", ")"] -] - ----------------------------------------------------- - -Checks for operators and punctuation. \ No newline at end of file diff --git a/tests/languages/clojure/operator_feature.test b/tests/languages/clojure/operator_feature.test new file mode 100644 index 0000000000..7c57567c97 --- /dev/null +++ b/tests/languages/clojure/operator_feature.test @@ -0,0 +1,11 @@ +# @ ^ ` ~ + +---------------------------------------------------- + +[ + ["operator", "#"], + ["operator", "@"], + ["operator", "^"], + ["operator", "`"], + ["operator", "~"] +] diff --git a/tests/languages/clojure/punctuation_feature.test b/tests/languages/clojure/punctuation_feature.test new file mode 100644 index 0000000000..ea9bbc2188 --- /dev/null +++ b/tests/languages/clojure/punctuation_feature.test @@ -0,0 +1,15 @@ +{ } [ ] ( ) +, + +---------------------------------------------------- + +[ + ["punctuation", "{"], + ["punctuation", "}"], + ["punctuation", "["], + ["punctuation", "]"], + ["punctuation", "("], + ["punctuation", ")"], + + ["punctuation", ","] +] diff --git a/tests/languages/clojure/symbol_feature.test b/tests/languages/clojure/symbol_feature.test new file mode 100644 index 0000000000..e43df4fd9c --- /dev/null +++ b/tests/languages/clojure/symbol_feature.test @@ -0,0 +1,11 @@ +:foo +:foo/bar-baz +::foo + +---------------------------------------------------- + +[ + ["symbol", ":foo"], + ["symbol", ":foo/bar-baz"], + ["symbol", "::foo"] +]