Skip to content

Commit 1e6fcf3

Browse files
authoredAug 4, 2023
Feat: Add support for Unicode boundaries (#5265)
Add support for Unicode boundaries and detection methods Introduced Unicode boundaries support in text search Added supportsLookbehind and supportsUnicodeFlag methods in lang for feature detection Implemented fallback to ASCII boundaries when the browser does not support look-behinds Implemented fallback to old behaviour (without unicode support) in rare edge cases for backward compatibility
1 parent b196806 commit 1e6fcf3

File tree

3 files changed

+124
-28
lines changed

3 files changed

+124
-28
lines changed
 

‎src/lib/lang.js

+18
Original file line numberDiff line numberDiff line change
@@ -189,3 +189,21 @@ exports.delayedCall = function(fcn, defaultTimeout) {
189189

190190
return _self;
191191
};
192+
193+
exports.supportsLookbehind = function () {
194+
try {
195+
new RegExp('(?<=.)');
196+
} catch (e) {
197+
return false;
198+
}
199+
return true;
200+
};
201+
202+
exports.supportsUnicodeFlag = function () {
203+
try {
204+
new RegExp('^.$', 'u');
205+
} catch (error) {
206+
return false;
207+
}
208+
return true;
209+
};

‎src/search.js

+72-28
Original file line numberDiff line numberDiff line change
@@ -10,27 +10,34 @@ var Range = require("./range").Range;
1010
class Search {
1111
/**
1212
* Creates a new `Search` object. The following search options are available:
13-
*
14-
* - `needle`: The string or regular expression you're looking for
15-
* - `backwards`: Whether to search backwards from where cursor currently is. Defaults to `false`.
16-
* - `wrap`: Whether to wrap the search back to the beginning when it hits the end. Defaults to `false`.
17-
* - `caseSensitive`: Whether the search ought to be case-sensitive. Defaults to `false`.
18-
* - `wholeWord`: Whether the search matches only on whole words. Defaults to `false`.
19-
* - `range`: The [[Range]] to search within. Set this to `null` for the whole document
20-
* - `regExp`: Whether the search is a regular expression or not. Defaults to `false`.
21-
* - `start`: The starting [[Range]] or cursor position to begin the search
22-
* - `skipCurrent`: Whether or not to include the current line in the search. Default to `false`.
23-
*
13+
* @typedef SearchOptions
14+
*
15+
* @property {string|RegExp} [needle] - The string or regular expression you're looking for
16+
* @property {boolean} [backwards] - Whether to search backwards from where cursor currently is
17+
* @property {boolean} [wrap] - Whether to wrap the search back to the beginning when it hits the end
18+
* @property {boolean} [caseSensitive] - Whether the search ought to be case-sensitive
19+
* @property {boolean} [wholeWord] - Whether the search matches only on whole words
20+
* @property {Range|null} [range] - The [[Range]] to search within. Set this to `null` for the whole document
21+
* @property {boolean} [regExp] - Whether the search is a regular expression or not
22+
* @property {Range|Position} [start] - The starting [[Range]] or cursor position to begin the search
23+
* @property {boolean} [skipCurrent] - Whether or not to include the current line in the search
24+
* @property {boolean} [$isMultiLine] - true, if needle has \n or \r\n
25+
* @property {boolean} [preserveCase]
26+
* @property {boolean} [preventScroll]
27+
* @property {boolean} [$supportsUnicodeFlag] - internal property, determine if browser supports unicode flag
28+
* @property {any} [re]
2429
**/
30+
2531
constructor() {
32+
/**
33+
* @type {SearchOptions}
34+
*/
2635
this.$options = {};
2736
}
2837

2938
/**
3039
* Sets the search options via the `options` parameter.
31-
* @param {Object} options An object containing all the new search properties
32-
*
33-
*
40+
* @param {SearchOptions} options An object containing all the new search properties
3441
* @returns {Search}
3542
* @chainable
3643
**/
@@ -41,27 +48,26 @@ class Search {
4148

4249
/**
4350
* [Returns an object containing all the search options.]{: #Search.getOptions}
44-
* @returns {Object}
51+
* @returns {SearchOptions}
4552
**/
4653
getOptions() {
4754
return lang.copyObject(this.$options);
4855
}
4956

5057
/**
5158
* Sets the search options via the `options` parameter.
52-
* @param {Object} options object containing all the search propertie
59+
* @param {SearchOptions} options object containing all the search propertie
5360
* @related Search.set
5461
**/
5562
setOptions(options) {
5663
this.$options = options;
5764
}
65+
5866
/**
5967
* Searches for `options.needle`. If found, this method returns the [[Range `Range`]] where the text first occurs. If `options.backwards` is `true`, the search goes backwards in the session.
6068
* @param {EditSession} session The session to search with
61-
*
62-
*
63-
* @returns {Range}
64-
**/
69+
* @returns {Range|boolean}
70+
**/
6571
find(session) {
6672
var options = this.$options;
6773
var iterator = this.$matchIterator(session, options);
@@ -87,9 +93,7 @@ class Search {
8793
/**
8894
* Searches for all occurrances `options.needle`. If found, this method returns an array of [[Range `Range`s]] where the text first occurs. If `options.backwards` is `true`, the search goes backwards in the session.
8995
* @param {EditSession} session The session to search with
90-
*
91-
*
92-
* @returns {[Range]}
96+
* @returns {Range[]}
9397
**/
9498
findAll(session) {
9599
var options = this.$options;
@@ -200,15 +204,31 @@ class Search {
200204
return replacement;
201205
}
202206

207+
/**
208+
*
209+
* @param {SearchOptions} options
210+
* @param $disableFakeMultiline
211+
* @return {RegExp|boolean|*[]|*}
212+
*/
203213
$assembleRegExp(options, $disableFakeMultiline) {
204214
if (options.needle instanceof RegExp)
205215
return options.re = options.needle;
206-
216+
207217
var needle = options.needle;
208218

209219
if (!options.needle)
210220
return options.re = false;
221+
222+
if (options.$supportsUnicodeFlag === undefined) {
223+
options.$supportsUnicodeFlag = lang.supportsUnicodeFlag();
224+
}
211225

226+
try {
227+
new RegExp(needle, "u");
228+
} catch (e) {
229+
options.$supportsUnicodeFlag = false; //left for backward compatibility with previous versions for cases like /ab\{2}/gu
230+
}
231+
212232
if (!options.regExp)
213233
needle = lang.escapeRegExp(needle);
214234

@@ -217,6 +237,10 @@ class Search {
217237

218238
var modifier = options.caseSensitive ? "gm" : "gmi";
219239

240+
if (options.$supportsUnicodeFlag) {
241+
modifier += "u";
242+
}
243+
220244
options.$isMultiLine = !$disableFakeMultiline && /[\n\r]/.test(needle);
221245
if (options.$isMultiLine)
222246
return options.re = this.$assembleMultilineRegExp(needle, modifier);
@@ -356,13 +380,33 @@ class Search {
356380

357381
}
358382

383+
/**
384+
*
385+
* @param {string} needle
386+
* @param {SearchOptions} options
387+
* @return {string}
388+
*/
359389
function addWordBoundary(needle, options) {
360-
function wordBoundary(c) {
361-
if (/\w/.test(c) || options.regExp) return "\\b";
390+
let supportsLookbehind = lang.supportsLookbehind();
391+
392+
function wordBoundary(c, firstChar = true) {
393+
let wordRegExp = supportsLookbehind && options.$supportsUnicodeFlag ? new RegExp("[\\p{L}\\p{N}_]","u") : new RegExp("\\w");
394+
395+
if (wordRegExp.test(c) || options.regExp) {
396+
if (supportsLookbehind && options.$supportsUnicodeFlag) {
397+
if (firstChar) return "(?<=^|[^\\p{L}\\p{N}_])";
398+
return "(?=[^\\p{L}\\p{N}_]|$)";
399+
}
400+
return "\\b";
401+
}
362402
return "";
363403
}
364-
return wordBoundary(needle[0]) + needle
365-
+ wordBoundary(needle[needle.length - 1]);
404+
405+
let needleArray = Array.from(needle);
406+
let firstChar = needleArray[0];
407+
let lastChar = needleArray[needleArray.length - 1];
408+
409+
return wordBoundary(firstChar) + needle + wordBoundary(lastChar, false);
366410
}
367411

368412
exports.Search = Search;

‎src/search_test.js

+34
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,40 @@ module.exports = {
138138
assert.position(range.end, 1, 12);
139139
},
140140

141+
"test: fallback to nonUnicode mode on edge cases": function() {
142+
var session = new EditSession([
143+
/* eslint-disable no-octal-escape*/
144+
"string with \251 symbol", // test octal escape sequence
145+
"bracket ab{2}" // test lone quantifier brackets
146+
]);
147+
148+
var search = new Search().set({
149+
needle: "\\251",
150+
regExp: true
151+
});
152+
var range = search.find(session);
153+
assert.position(range.start, 0, 12);
154+
assert.position(range.end, 0, 13);
155+
156+
search.set({ needle: "ab\\{2}" });
157+
range = search.find(session);
158+
assert.position(range.start, 1, 8);
159+
assert.position(range.end, 1, 13);
160+
},
161+
162+
"test: whole word search should not match inside of words with unicode": function() {
163+
var session = new EditSession(["𝓗ello𝓦orld", "𝓗ello 𝓦orld 123", "456"]);
164+
165+
var search = new Search().set({
166+
needle: "𝓗ello",
167+
wholeWord: true
168+
});
169+
170+
var range = search.find(session);
171+
assert.position(range.start, 1, 0);
172+
assert.position(range.end, 1, 6);
173+
},
174+
141175
"test: find backwards": function() {
142176
var session = new EditSession(["juhu juhu juhu juhu"]);
143177
session.getSelection().moveCursorTo(0, 10);

0 commit comments

Comments
 (0)
Please sign in to comment.