/
constants.evaluate.js
53 lines (46 loc) · 1.45 KB
/
constants.evaluate.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
"use strict";
const cjkRegex = require("cjk-regex");
const regexpUtil = require("regexp-util");
const unicodeRegex = require("unicode-regex");
const cjkPattern = cjkRegex()
.union(
unicodeRegex({
Script_Extensions: ["Han", "Katakana", "Hiragana", "Hangul", "Bopomofo"],
General_Category: [
"Other_Letter",
"Letter_Number",
"Other_Symbol",
"Modifier_Letter"
]
})
)
.toString();
const kPattern = unicodeRegex({ Script: ["Hangul"] })
.union(unicodeRegex({ Script_Extensions: ["Hangul"] }))
.toString();
// http://spec.commonmark.org/0.25/#ascii-punctuation-character
const asciiPunctuationCharset = /* prettier-ignore */ regexpUtil.charset(
"!", '"', "#", "$", "%", "&", "'", "(", ")", "*",
"+", ",", "-", ".", "/", ":", ";", "<", "=", ">",
"?", "@", "[", "\\", "]", "^", "_", "`", "{", "|",
"}", "~"
);
// http://spec.commonmark.org/0.25/#punctuation-character
const punctuationCharset = unicodeRegex({
// http://unicode.org/Public/5.1.0/ucd/UCD.html#General_Category_Values
General_Category: [
/* Pc */ "Connector_Punctuation",
/* Pd */ "Dash_Punctuation",
/* Pe */ "Close_Punctuation",
/* Pf */ "Final_Punctuation",
/* Pi */ "Initial_Punctuation",
/* Po */ "Other_Punctuation",
/* Ps */ "Open_Punctuation"
]
}).union(asciiPunctuationCharset);
const punctuationPattern = punctuationCharset.toString();
module.exports = {
cjkPattern,
kPattern,
punctuationPattern
};