forked from nodejs/node
-
Notifications
You must be signed in to change notification settings - Fork 0
/
brace-expressions.js
148 lines (148 loc) · 5.5 KB
/
brace-expressions.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
// translate the various posix character classes into unicode properties
// this works across all unicode locales
// { <posix class>: [<translation>, /u flag required, negated]
const posixClasses = {
'[:alnum:]': ['\\p{L}\\p{Nl}\\p{Nd}', true],
'[:alpha:]': ['\\p{L}\\p{Nl}', true],
'[:ascii:]': ['\\x' + '00-\\x' + '7f', false],
'[:blank:]': ['\\p{Zs}\\t', true],
'[:cntrl:]': ['\\p{Cc}', true],
'[:digit:]': ['\\p{Nd}', true],
'[:graph:]': ['\\p{Z}\\p{C}', true, true],
'[:lower:]': ['\\p{Ll}', true],
'[:print:]': ['\\p{C}', true],
'[:punct:]': ['\\p{P}', true],
'[:space:]': ['\\p{Z}\\t\\r\\n\\v\\f', true],
'[:upper:]': ['\\p{Lu}', true],
'[:word:]': ['\\p{L}\\p{Nl}\\p{Nd}\\p{Pc}', true],
'[:xdigit:]': ['A-Fa-f0-9', false],
};
// only need to escape a few things inside of brace expressions
// escapes: [ \ ] -
const braceEscape = (s) => s.replace(/[[\]\\-]/g, '\\$&');
// escape all regexp magic characters
const regexpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&');
// everything has already been escaped, we just have to join
const rangesToString = (ranges) => ranges.join('');
// takes a glob string at a posix brace expression, and returns
// an equivalent regular expression source, and boolean indicating
// whether the /u flag needs to be applied, and the number of chars
// consumed to parse the character class.
// This also removes out of order ranges, and returns ($.) if the
// entire class just no good.
export const parseClass = (glob, position) => {
const pos = position;
/* c8 ignore start */
if (glob.charAt(pos) !== '[') {
throw new Error('not in a brace expression');
}
/* c8 ignore stop */
const ranges = [];
const negs = [];
let i = pos + 1;
let sawStart = false;
let uflag = false;
let escaping = false;
let negate = false;
let endPos = pos;
let rangeStart = '';
WHILE: while (i < glob.length) {
const c = glob.charAt(i);
if ((c === '!' || c === '^') && i === pos + 1) {
negate = true;
i++;
continue;
}
if (c === ']' && sawStart && !escaping) {
endPos = i + 1;
break;
}
sawStart = true;
if (c === '\\') {
if (!escaping) {
escaping = true;
i++;
continue;
}
// escaped \ char, fall through and treat like normal char
}
if (c === '[' && !escaping) {
// either a posix class, a collation equivalent, or just a [
for (const [cls, [unip, u, neg]] of Object.entries(posixClasses)) {
if (glob.startsWith(cls, i)) {
// invalid, [a-[] is fine, but not [a-[:alpha]]
if (rangeStart) {
return ['$.', false, glob.length - pos, true];
}
i += cls.length;
if (neg)
negs.push(unip);
else
ranges.push(unip);
uflag = uflag || u;
continue WHILE;
}
}
}
// now it's just a normal character, effectively
escaping = false;
if (rangeStart) {
// throw this range away if it's not valid, but others
// can still match.
if (c > rangeStart) {
ranges.push(braceEscape(rangeStart) + '-' + braceEscape(c));
}
else if (c === rangeStart) {
ranges.push(braceEscape(c));
}
rangeStart = '';
i++;
continue;
}
// now might be the start of a range.
// can be either c-d or c-] or c<more...>] or c] at this point
if (glob.startsWith('-]', i + 1)) {
ranges.push(braceEscape(c + '-'));
i += 2;
continue;
}
if (glob.startsWith('-', i + 1)) {
rangeStart = c;
i += 2;
continue;
}
// not the start of a range, just a single character
ranges.push(braceEscape(c));
i++;
}
if (endPos < i) {
// didn't see the end of the class, not a valid class,
// but might still be valid as a literal match.
return ['', false, 0, false];
}
// if we got no ranges and no negates, then we have a range that
// cannot possibly match anything, and that poisons the whole glob
if (!ranges.length && !negs.length) {
return ['$.', false, glob.length - pos, true];
}
// if we got one positive range, and it's a single character, then that's
// not actually a magic pattern, it's just that one literal character.
// we should not treat that as "magic", we should just return the literal
// character. [_] is a perfectly valid way to escape glob magic chars.
if (negs.length === 0 &&
ranges.length === 1 &&
/^\\?.$/.test(ranges[0]) &&
!negate) {
const r = ranges[0].length === 2 ? ranges[0].slice(-1) : ranges[0];
return [regexpEscape(r), false, endPos - pos, false];
}
const sranges = '[' + (negate ? '^' : '') + rangesToString(ranges) + ']';
const snegs = '[' + (negate ? '' : '^') + rangesToString(negs) + ']';
const comb = ranges.length && negs.length
? '(' + sranges + '|' + snegs + ')'
: ranges.length
? sranges
: snegs;
return [comb, uflag, endPos - pos, true];
};
//# sourceMappingURL=brace-expressions.js.map