Skip to content

Commit

Permalink
Ruby: Improved tokenization (#3193)
Browse files Browse the repository at this point in the history
  • Loading branch information
RunDevelopment committed Nov 22, 2021
1 parent 6add768 commit 86028ad
Show file tree
Hide file tree
Showing 23 changed files with 1,109 additions and 525 deletions.
2 changes: 1 addition & 1 deletion components/prism-crystal.js
Expand Up @@ -11,7 +11,7 @@
number: /\b(?:0b[01_]*[01]|0o[0-7_]*[0-7]|0x[\da-fA-F_]*[\da-fA-F]|(?:\d(?:[\d_]*\d)?)(?:\.[\d_]*\d)?(?:[eE][+-]?[\d_]*\d)?)(?:_(?:[uif](?:8|16|32|64))?)?\b/
});

Prism.languages.insertBefore('crystal', 'string', {
Prism.languages.insertBefore('crystal', 'string-literal', {
attribute: {
pattern: /@\[.+?\]/,
alias: 'attr-name',
Expand Down
2 changes: 1 addition & 1 deletion components/prism-crystal.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

217 changes: 134 additions & 83 deletions components/prism-ruby.js
Expand Up @@ -6,133 +6,184 @@
*/
(function (Prism) {
Prism.languages.ruby = Prism.languages.extend('clike', {
'comment': [
/#.*/,
{
pattern: /^=begin\s[\s\S]*?^=end/m,
greedy: true
}
],
'comment': {
pattern: /#.*|^=begin\s[\s\S]*?^=end/m,
greedy: true
},
'class-name': {
pattern: /(\b(?:class)\s+|\bcatch\s+\()[\w.\\]+/i,
pattern: /(\b(?:class|module)\s+|\bcatch\s+\()[\w.\\]+|\b[A-Z_]\w*(?=\s*\.\s*new\b)/,
lookbehind: true,
inside: {
'punctuation': /[.\\]/
}
},
'keyword': /\b(?:BEGIN|END|alias|and|begin|break|case|class|def|define_method|defined|do|each|else|elsif|end|ensure|extend|for|if|in|include|module|new|next|nil|not|or|prepend|private|protected|public|raise|redo|require|rescue|retry|return|self|super|then|throw|undef|unless|until|when|while|yield)\b/
'keyword': /\b(?:BEGIN|END|alias|and|begin|break|case|class|def|define_method|defined|do|each|else|elsif|end|ensure|extend|for|if|in|include|module|new|next|nil|not|or|prepend|private|protected|public|raise|redo|require|rescue|retry|return|self|super|then|throw|undef|unless|until|when|while|yield)\b/,
'operator': /\.{2,3}|&\.|===|<?=>|[!=]?~|(?:&&|\|\||<<|>>|\*\*|[+\-*/%<>!^&|=])=?|[?:]/,
'punctuation': /[(){}[\].,;]/,
});

Prism.languages.insertBefore('ruby', 'operator', {
'double-colon': {
pattern: /::/,
alias: 'punctuation'
},
});

var interpolation = {
pattern: /#\{[^}]+\}/,
pattern: /((?:^|[^\\])(?:\\{2})*)#\{(?:[^{}]|\{[^{}]*\})*\}/,
lookbehind: true,
inside: {
'content': {
pattern: /^(#\{)[\s\S]+(?=\}$)/,
lookbehind: true,
inside: Prism.languages.ruby
},
'delimiter': {
pattern: /^#\{|\}$/,
alias: 'tag'
},
rest: Prism.languages.ruby
alias: 'punctuation'
}
}
};

delete Prism.languages.ruby.function;

var percentExpression = '(?:' + [
/([^a-zA-Z0-9\s{(\[<=])(?:(?!\1)[^\\]|\\[\s\S])*\1/.source,
/\((?:[^()\\]|\\[\s\S]|\((?:[^()\\]|\\[\s\S])*\))*\)/.source,
/\{(?:[^{}\\]|\\[\s\S]|\{(?:[^{}\\]|\\[\s\S])*\})*\}/.source,
/\[(?:[^\[\]\\]|\\[\s\S]|\[(?:[^\[\]\\]|\\[\s\S])*\])*\]/.source,
/<(?:[^<>\\]|\\[\s\S]|<(?:[^<>\\]|\\[\s\S])*>)*>/.source
].join('|') + ')';

var symbolName = /(?:"(?:\\.|[^"\\\r\n])*"|(?:\b[a-zA-Z_]\w*|[^\s\0-\x7F]+)[?!]?|\$.)/.source;

Prism.languages.insertBefore('ruby', 'keyword', {
'regex': [
'regex-literal': [
{
pattern: RegExp(/%r/.source + '(?:' + [
/([^a-zA-Z0-9\s{(\[<])(?:(?!\1)[^\\]|\\[\s\S])*\1/.source,
/\((?:[^()\\]|\\[\s\S])*\)/.source,
// Here we need to specifically allow interpolation
/\{(?:[^#{}\\]|#(?:\{[^}]+\})?|\\[\s\S])*\}/.source,
/\[(?:[^\[\]\\]|\\[\s\S])*\]/.source,
/<(?:[^<>\\]|\\[\s\S])*>/.source
].join('|') + ')' + /[egimnosux]{0,6}/.source),
pattern: RegExp(/%r/.source + percentExpression + /[egimnosux]{0,6}/.source),
greedy: true,
inside: {
'interpolation': interpolation
'interpolation': interpolation,
'regex': /[\s\S]+/
}
},
{
pattern: /(^|[^/])\/(?!\/)(?:\[[^\r\n\]]+\]|\\.|[^[/\\\r\n])+\/[egimnosux]{0,6}(?=\s*(?:$|[\r\n,.;})#]))/,
lookbehind: true,
greedy: true,
inside: {
'interpolation': interpolation
'interpolation': interpolation,
'regex': /[\s\S]+/
}
}
],
'variable': /[@$]+[a-zA-Z_]\w*(?:[?!]|\b)/,
'symbol': {
pattern: /(^|[^:]):[a-zA-Z_]\w*(?:[?!]|\b)/,
lookbehind: true
},
'symbol': [
{
pattern: RegExp(/(^|[^:]):/.source + symbolName),
lookbehind: true,
greedy: true
},
{
pattern: RegExp(/([\r\n{(,][ \t]*)/.source + symbolName + /(?=:(?!:))/.source),
lookbehind: true,
greedy: true
},
],
'method-definition': {
pattern: /(\bdef\s+)[\w.]+/,
pattern: /(\bdef\s+)\w+(?:\s*\.\s*\w+)?/,
lookbehind: true,
inside: {
'function': /\w+$/,
rest: Prism.languages.ruby
'function': /\b\w+$/,
'keyword': /^self\b/,
'class-name': /^\w+/,
'punctuation': /\./
}
}
});

Prism.languages.insertBefore('ruby', 'number', {
'builtin': /\b(?:Array|Bignum|Binding|Class|Continuation|Dir|Exception|FalseClass|File|Fixnum|Float|Hash|IO|Integer|MatchData|Method|Module|NilClass|Numeric|Object|Proc|Range|Regexp|Stat|String|Struct|Symbol|TMS|Thread|ThreadGroup|Time|TrueClass)\b/,
'constant': /\b[A-Z]\w*(?:[?!]|\b)/
});

Prism.languages.ruby.string = [
{
pattern: RegExp(/%[qQiIwWxs]?/.source + '(?:' + [
/([^a-zA-Z0-9\s{(\[<])(?:(?!\1)[^\\]|\\[\s\S])*\1/.source,
/\((?:[^()\\]|\\[\s\S])*\)/.source,
// Here we need to specifically allow interpolation
/\{(?:[^#{}\\]|#(?:\{[^}]+\})?|\\[\s\S])*\}/.source,
/\[(?:[^\[\]\\]|\\[\s\S])*\]/.source,
/<(?:[^<>\\]|\\[\s\S])*>/.source
].join('|') + ')'),
greedy: true,
inside: {
'interpolation': interpolation
}
},
{
pattern: /("|')(?:#\{[^}]+\}|#(?!\{)|\\(?:\r\n|[\s\S])|(?!\1)[^\\#\r\n])*\1/,
greedy: true,
inside: {
'interpolation': interpolation
Prism.languages.insertBefore('ruby', 'string', {
'string-literal': [
{
pattern: RegExp(/%[qQiIwWs]?/.source + percentExpression),
greedy: true,
inside: {
'interpolation': interpolation,
'string': /[\s\S]+/
}
},
{
pattern: /("|')(?:#\{[^}]+\}|#(?!\{)|\\(?:\r\n|[\s\S])|(?!\1)[^\\#\r\n])*\1/,
greedy: true,
inside: {
'interpolation': interpolation,
'string': /[\s\S]+/
}
},
{
pattern: /<<[-~]?([a-z_]\w*)[\r\n](?:.*[\r\n])*?[\t ]*\1/i,
alias: 'heredoc-string',
greedy: true,
inside: {
'delimiter': {
pattern: /^<<[-~]?[a-z_]\w*|\b[a-z_]\w*$/i,
inside: {
'symbol': /\b\w+/,
'punctuation': /^<<[-~]?/
}
},
'interpolation': interpolation,
'string': /[\s\S]+/
}
},
{
pattern: /<<[-~]?'([a-z_]\w*)'[\r\n](?:.*[\r\n])*?[\t ]*\1/i,
alias: 'heredoc-string',
greedy: true,
inside: {
'delimiter': {
pattern: /^<<[-~]?'[a-z_]\w*'|\b[a-z_]\w*$/i,
inside: {
'symbol': /\b\w+/,
'punctuation': /^<<[-~]?'|'$/,
}
},
'string': /[\s\S]+/
}
}
},
{
pattern: /<<[-~]?([a-z_]\w*)[\r\n](?:.*[\r\n])*?[\t ]*\1/i,
alias: 'heredoc-string',
greedy: true,
inside: {
'delimiter': {
pattern: /^<<[-~]?[a-z_]\w*|[a-z_]\w*$/i,
alias: 'symbol',
inside: {
'punctuation': /^<<[-~]?/
],
'command-literal': [
{
pattern: RegExp(/%x/.source + percentExpression),
greedy: true,
inside: {
'interpolation': interpolation,
'command': {
pattern: /[\s\S]+/,
alias: 'string'
}
},
'interpolation': interpolation
}
},
{
pattern: /<<[-~]?'([a-z_]\w*)'[\r\n](?:.*[\r\n])*?[\t ]*\1/i,
alias: 'heredoc-string',
greedy: true,
inside: {
'delimiter': {
pattern: /^<<[-~]?'[a-z_]\w*'|[a-z_]\w*$/i,
alias: 'symbol',
inside: {
'punctuation': /^<<[-~]?'|'$/,
}
},
{
pattern: /`(?:#\{[^}]+\}|#(?!\{)|\\(?:\r\n|[\s\S])|[^\\`#\r\n])*`/,
greedy: true,
inside: {
'interpolation': interpolation,
'command': {
pattern: /[\s\S]+/,
alias: 'string'
}
}
}
}
];
]
});

delete Prism.languages.ruby.string;

Prism.languages.insertBefore('ruby', 'number', {
'builtin': /\b(?:Array|Bignum|Binding|Class|Continuation|Dir|Exception|FalseClass|File|Fixnum|Float|Hash|IO|Integer|MatchData|Method|Module|NilClass|Numeric|Object|Proc|Range|Regexp|Stat|String|Struct|Symbol|TMS|Thread|ThreadGroup|Time|TrueClass)\b/,
'constant': /\b[A-Z][A-Z0-9_]*(?:[?!]|\b)/
});

Prism.languages.rb = Prism.languages.ruby;
}(Prism));
2 changes: 1 addition & 1 deletion components/prism-ruby.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 39 additions & 2 deletions tests/languages/crystal/attribute_feature.test
@@ -1,17 +1,54 @@
@[AlwaysInline]
@[CallConvention("X86_StdCall")]
@[MyAnnotation(key: "value", value: 123)]
@[MyAnnotation("foo", 123, false)]

----------------------------------------------------

[
["attribute", [
["delimiter", "@["],
["constant", "AlwaysInline"],
"AlwaysInline",
["delimiter", "]"]
]],
["attribute", [
["delimiter", "@["],
["constant", "CallConvention"], ["punctuation", "("], ["string", [ "\"X86_StdCall\"" ]], ["punctuation", ")"],
"CallConvention",
["punctuation", "("],
["string-literal", [
["string", "\"X86_StdCall\""]
]],
["punctuation", ")"],
["delimiter", "]"]
]],
["attribute", [
["delimiter", "@["],
"MyAnnotation",
["punctuation", "("],
["symbol", "key"],
["operator", ":"],
["string-literal", [
["string", "\"value\""]
]],
["punctuation", ","],
["symbol", "value"],
["operator", ":"],
["number", "123"],
["punctuation", ")"],
["delimiter", "]"]
]],
["attribute", [
["delimiter", "@["],
"MyAnnotation",
["punctuation", "("],
["string-literal", [
["string", "\"foo\""]
]],
["punctuation", ","],
["number", "123"],
["punctuation", ","],
["boolean", "false"],
["punctuation", ")"],
["delimiter", "]"]
]]
]
Expand Down

0 comments on commit 86028ad

Please sign in to comment.