Skip to content

Commit

Permalink
Added support for ICU message format (#2745)
Browse files Browse the repository at this point in the history
  • Loading branch information
RunDevelopment committed Mar 5, 2021
1 parent d85e30d commit bf4e7ba
Show file tree
Hide file tree
Showing 11 changed files with 866 additions and 2 deletions.
2 changes: 1 addition & 1 deletion components.js

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions components.json
Expand Up @@ -528,6 +528,10 @@
"title": "Icon",
"owner": "Golmote"
},
"icu-message-format": {
"title": "ICU Message Format",
"owner": "RunDevelopment"
},
"idris": {
"title": "Idris",
"alias": "idr",
Expand Down
148 changes: 148 additions & 0 deletions components/prism-icu-message-format.js
@@ -0,0 +1,148 @@
// https://unicode-org.github.io/icu/userguide/format_parse/messages/
// https://unicode-org.github.io/icu-docs/apidoc/released/icu4j/com/ibm/icu/text/MessageFormat.html

(function (Prism) {

/**
* @param {string} source
* @param {number} level
* @returns {string}
*/
function nested(source, level) {
if (level <= 0) {
return /[]/.source;
} else {
return source.replace(/<SELF>/g, function () { return nested(source, level - 1) });
}
}

var stringPattern = /'[{}:=,](?:[^']|'')*'(?!')/;

var escape = {
pattern: /''/,
greedy: true,
alias: 'operator'
};
var string = {
pattern: stringPattern,
greedy: true,
inside: {
'escape': escape
}
};

var argumentSource = nested(
/\{(?:[^{}']|'(?![{},'])|''|<STR>|<SELF>)*\}/.source
.replace(/<STR>/g, function () { return stringPattern.source; }),
8
);

var nestedMessage = {
pattern: RegExp(argumentSource),
inside: {
'message': {
pattern: /^(\{)[\s\S]+(?=\}$)/,
lookbehind: true,
inside: null // see below
},
'message-delimiter': {
pattern: /./,
alias: 'punctuation'
}
}
};

Prism.languages['icu-message-format'] = {
'argument': {
pattern: RegExp(argumentSource),
greedy: true,
inside: {
'content': {
pattern: /^(\{)[\s\S]+(?=\}$)/,
lookbehind: true,
inside: {
'argument-name': {
pattern: /^(\s*)[^{}:=,\s]+/,
lookbehind: true
},
'choice-style': {
// https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/classicu_1_1ChoiceFormat.html#details
pattern: /^(\s*,\s*choice\s*,\s*)\S(?:[\s\S]*\S)?/,
lookbehind: true,
inside: {
'punctuation': /\|/,
'range': {
pattern: /^(\s*)[+-]?(?:\d+(?:\.\d*)?|\u221e)\s*[<#\u2264]/,
lookbehind: true,
inside: {
'operator': /[<#\u2264]/,
'number': /\S+/
}
},
rest: null // see below
}
},
'plural-style': {
// https://unicode-org.github.io/icu-docs/apidoc/released/icu4j/com/ibm/icu/text/PluralFormat.html#:~:text=Patterns%20and%20Their%20Interpretation
pattern: /^(\s*,\s*(?:plural|selectordinal)\s*,\s*)\S(?:[\s\S]*\S)?/,
lookbehind: true,
inside: {
'offset': /^offset:\s*\d+/,
'nested-message': nestedMessage,
'selector': {
pattern: /=\d+|[^{}:=,\s]+/,
inside: {
'keyword': /^(?:zero|one|two|few|many|other)$/
}
}
}
},
'select-style': {
// https://unicode-org.github.io/icu-docs/apidoc/released/icu4j/com/ibm/icu/text/SelectFormat.html#:~:text=Patterns%20and%20Their%20Interpretation
pattern: /^(\s*,\s*select\s*,\s*)\S(?:[\s\S]*\S)?/,
lookbehind: true,
inside: {
'nested-message': nestedMessage,
'selector': {
pattern: /[^{}:=,\s]+/,
inside: {
'keyword': /^other$/
}
}
}
},
'keyword': /\b(?:choice|plural|select|selectordinal)\b/,
'arg-type': {
pattern: /\b(?:number|date|time|spellout|ordinal|duration)\b/,
alias: 'keyword'
},
'arg-skeleton': {
pattern: /(,\s*)::[^{}:=,\s]+/,
lookbehind: true
},
'arg-style': {
pattern: /(,\s*)(?:short|medium|long|full|integer|currency|percent)(?=\s*$)/,
lookbehind: true
},
'arg-style-text': {
pattern: RegExp(/(^\s*,\s*(?=\S))/.source + nested(/(?:[^{}']|'[^']*'|\{(?:<SELF>)?\})+/.source, 8) + '$'),
lookbehind: true,
alias: 'string'
},
'punctuation': /,/
}
},
'argument-delimiter': {
pattern: /./,
alias: 'operator'
}
}
},
'escape': escape,
'string': string
};

nestedMessage.inside.message.inside = Prism.languages['icu-message-format'];
Prism.languages['icu-message-format'].argument.inside.content.inside['choice-style'].inside.rest = Prism.languages['icu-message-format'];

}(Prism));
1 change: 1 addition & 0 deletions components/prism-icu-message-format.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions examples/prism-icu-message-format.html
@@ -0,0 +1,22 @@
<h2>Full example</h2>
<pre><code>https://unicode-org.github.io/icu/userguide/format_parse/messages/

{gender_of_host, select,
female {
{num_guests, plural, offset:1
=0 {{host} does not give a party.}
=1 {{host} invites {guest} to her party.}
=2 {{host} invites {guest} and one other person to her party.}
other {{host} invites {guest} and # other people to her party.}}}
male {
{num_guests, plural, offset:1
=0 {{host} does not give a party.}
=1 {{host} invites {guest} to his party.}
=2 {{host} invites {guest} and one other person to his party.}
other {{host} invites {guest} and # other people to his party.}}}
other {
{num_guests, plural, offset:1
=0 {{host} does not give a party.}
=1 {{host} invites {guest} to their party.}
=2 {{host} invites {guest} and one other person to their party.}
other {{host} invites {guest} and # other people to their party.}}}}</code></pre>
1 change: 1 addition & 0 deletions plugins/show-language/prism-show-language.js
Expand Up @@ -89,6 +89,7 @@
"hpkp": "HTTP Public-Key-Pins",
"hsts": "HTTP Strict-Transport-Security",
"ichigojam": "IchigoJam",
"icu-message-format": "ICU Message Format",
"idr": "Idris",
"ignore": ".ignore",
"gitignore": ".gitignore",
Expand Down
2 changes: 1 addition & 1 deletion plugins/show-language/prism-show-language.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

51 changes: 51 additions & 0 deletions tests/languages/icu-message-format/arg-skeleton_feature.test
@@ -0,0 +1,51 @@
At {1,time,::jmm} on {1,date,::dMMMM}, there was {2} on planet {3,number,integer}.

----------------------------------------------------

[
"At ",
["argument", [
["argument-delimiter", "{"],
["content", [
["argument-name", "1"],
["punctuation", ","],
["arg-type", "time"],
["punctuation", ","],
["arg-skeleton", "::jmm"]
]],
["argument-delimiter", "}"]
]],
" on ",
["argument", [
["argument-delimiter", "{"],
["content", [
["argument-name", "1"],
["punctuation", ","],
["arg-type", "date"],
["punctuation", ","],
["arg-skeleton", "::dMMMM"]
]],
["argument-delimiter", "}"]
]],
", there was ",
["argument", [
["argument-delimiter", "{"],
["content", [
["argument-name", "2"]
]],
["argument-delimiter", "}"]
]],
" on planet ",
["argument", [
["argument-delimiter", "{"],
["content", [
["argument-name", "3"],
["punctuation", ","],
["arg-type", "number"],
["punctuation", ","],
["arg-style", "integer"]
]],
["argument-delimiter", "}"]
]],
"."
]
105 changes: 105 additions & 0 deletions tests/languages/icu-message-format/choice-style_feature.test
@@ -0,0 +1,105 @@
The disk {1} contains {0,choice,0#no files|1#one file|1<{0,number,integer} files}

{3, choice, -1#is negative| 0#is zero or fraction | 1#is one |1.0<is 1+ |2#is two |2<is more than 2.}

----------------------------------------------------

[
"The disk ",
["argument", [
["argument-delimiter", "{"],
["content", [
["argument-name", "1"]
]],
["argument-delimiter", "}"]
]],
" contains ",
["argument", [
["argument-delimiter", "{"],
["content", [
["argument-name", "0"],
["punctuation", ","],
["keyword", "choice"],
["punctuation", ","],
["choice-style", [
["range", [
["number", "0"],
["operator", "#"]
]],
"no files",
["punctuation", "|"],
["range", [
["number", "1"],
["operator", "#"]
]],
"one file",
["punctuation", "|"],
["range", [
["number", "1"],
["operator", "<"]
]],
["argument", [
["argument-delimiter", "{"],
["content", [
["argument-name", "0"],
["punctuation", ","],
["arg-type", "number"],
["punctuation", ","],
["arg-style", "integer"]
]],
["argument-delimiter", "}"]
]],
" files"
]]
]],
["argument-delimiter", "}"]
]],

["argument", [
["argument-delimiter", "{"],
["content", [
["argument-name", "3"],
["punctuation", ","],
["keyword", "choice"],
["punctuation", ","],
["choice-style", [
["range", [
["number", "-1"],
["operator", "#"]
]],
"is negative",
["punctuation", "|"],
["range", [
["number", "0"],
["operator", "#"]
]],
"is zero or fraction ",
["punctuation", "|"],
["range", [
["number", "1"],
["operator", "#"]
]],
"is one ",
["punctuation", "|"],
["range", [
["number", "1.0"],
["operator", "<"]
]],
"is 1+ ",
["punctuation", "|"],
["range", [
["number", "2"],
["operator", "#"]
]],
"is two ",
["punctuation", "|"],
["range", [
["number", "2"],
["operator", "<"]
]],
"is more than 2."
]]
]],
["argument-delimiter", "}"]
]]
]

0 comments on commit bf4e7ba

Please sign in to comment.