-
-
Notifications
You must be signed in to change notification settings - Fork 232
/
unicode.ts
77 lines (69 loc) · 2.47 KB
/
unicode.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
const UNDEFINED_CODE_POINTS = new Set([
0xff_fe, 0xff_ff, 0x1_ff_fe, 0x1_ff_ff, 0x2_ff_fe, 0x2_ff_ff, 0x3_ff_fe, 0x3_ff_ff, 0x4_ff_fe, 0x4_ff_ff, 0x5_ff_fe,
0x5_ff_ff, 0x6_ff_fe, 0x6_ff_ff, 0x7_ff_fe, 0x7_ff_ff, 0x8_ff_fe, 0x8_ff_ff, 0x9_ff_fe, 0x9_ff_ff, 0xa_ff_fe,
0xa_ff_ff, 0xb_ff_fe, 0xb_ff_ff, 0xc_ff_fe, 0xc_ff_ff, 0xd_ff_fe, 0xd_ff_ff, 0xe_ff_fe, 0xe_ff_ff, 0xf_ff_fe,
0xf_ff_ff, 0x10_ff_fe, 0x10_ff_ff,
]);
export const REPLACEMENT_CHARACTER = '\uFFFD';
export const CODE_POINTS = {
EOF: -1,
NULL: 0x00,
TABULATION: 0x09,
CARRIAGE_RETURN: 0x0d,
LINE_FEED: 0x0a,
FORM_FEED: 0x0c,
SPACE: 0x20,
EXCLAMATION_MARK: 0x21,
QUOTATION_MARK: 0x22,
NUMBER_SIGN: 0x23,
AMPERSAND: 0x26,
APOSTROPHE: 0x27,
HYPHEN_MINUS: 0x2d,
SOLIDUS: 0x2f,
DIGIT_0: 0x30,
DIGIT_9: 0x39,
SEMICOLON: 0x3b,
LESS_THAN_SIGN: 0x3c,
EQUALS_SIGN: 0x3d,
GREATER_THAN_SIGN: 0x3e,
QUESTION_MARK: 0x3f,
LATIN_CAPITAL_A: 0x41,
LATIN_CAPITAL_F: 0x46,
LATIN_CAPITAL_X: 0x58,
LATIN_CAPITAL_Z: 0x5a,
RIGHT_SQUARE_BRACKET: 0x5d,
GRAVE_ACCENT: 0x60,
LATIN_SMALL_A: 0x61,
LATIN_SMALL_F: 0x66,
LATIN_SMALL_X: 0x78,
LATIN_SMALL_Z: 0x7a,
REPLACEMENT_CHARACTER: 0xff_fd,
};
export const CODE_POINT_SEQUENCES = {
DASH_DASH_STRING: new Uint16Array([0x2d, 0x2d]), //--
DOCTYPE_STRING: new Uint16Array([0x44, 0x4f, 0x43, 0x54, 0x59, 0x50, 0x45]), //DOCTYPE
CDATA_START_STRING: new Uint16Array([0x5b, 0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), //[CDATA[
SCRIPT_STRING: new Uint16Array([0x73, 0x63, 0x72, 0x69, 0x70, 0x74]), //script
PUBLIC_STRING: new Uint16Array([0x50, 0x55, 0x42, 0x4c, 0x49, 0x43]), //PUBLIC
SYSTEM_STRING: new Uint16Array([0x53, 0x59, 0x53, 0x54, 0x45, 0x4d]), //SYSTEM
};
//Surrogates
export function isSurrogate(cp: number) {
return cp >= 0xd8_00 && cp <= 0xdf_ff;
}
export function isSurrogatePair(cp: number) {
return cp >= 0xdc_00 && cp <= 0xdf_ff;
}
export function getSurrogatePairCodePoint(cp1: number, cp2: number) {
return (cp1 - 0xd8_00) * 0x4_00 + 0x24_00 + cp2;
}
//NOTE: excluding NULL and ASCII whitespace
export function isControlCodePoint(cp: number) {
return (
(cp !== 0x20 && cp !== 0x0a && cp !== 0x0d && cp !== 0x09 && cp !== 0x0c && cp >= 0x01 && cp <= 0x1f) ||
(cp >= 0x7f && cp <= 0x9f)
);
}
export function isUndefinedCodePoint(cp: number) {
return (cp >= 0xfd_d0 && cp <= 0xfd_ef) || UNDEFINED_CODE_POINTS.has(cp);
}