Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: don't use grapheme-splitter on ASCII strings in key-spacing rule #17122

Merged
merged 3 commits into from Apr 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
37 changes: 2 additions & 35 deletions lib/rules/id-length.js
Expand Up @@ -9,41 +9,8 @@
//------------------------------------------------------------------------------
// Requirements
//------------------------------------------------------------------------------
const GraphemeSplitter = require("grapheme-splitter");

//------------------------------------------------------------------------------
// Helpers
//------------------------------------------------------------------------------

/**
* Checks if the string given as argument is ASCII or not.
* @param {string} value A string that you want to know if it is ASCII or not.
* @returns {boolean} `true` if `value` is ASCII string.
*/
function isASCII(value) {
if (typeof value !== "string") {
return false;
}
return /^[\u0020-\u007f]*$/u.test(value);
}

/** @type {GraphemeSplitter | undefined} */
let splitter;

/**
* Gets the length of the string. If the string is not in ASCII, counts graphemes.
* @param {string} value A string that you want to get the length.
* @returns {number} The length of `value`.
*/
function getStringLength(value) {
if (isASCII(value)) {
return value.length;
}
if (!splitter) {
splitter = new GraphemeSplitter();
}
return splitter.countGraphemes(value);
}
const { getGraphemeCount } = require("../shared/string-utils");

//------------------------------------------------------------------------------
// Rule Definition
Expand Down Expand Up @@ -169,7 +136,7 @@ module.exports = {
const name = node.name;
const parent = node.parent;

const nameLength = getStringLength(name);
const nameLength = getGraphemeCount(name);

const isShort = nameLength < minLength;
const isLong = nameLength > maxLength;
Expand Down
10 changes: 2 additions & 8 deletions lib/rules/key-spacing.js
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for this! Since this code is so similar to what we have in id-length.js would it be possible to move getStringLength() to ast-utils or to another shared file in order to avoid duplications? The different regex range should not be a problem: we could just use \u0000-\u007f and be safe in all cases, even if id-length would never use the lower part of the range, because it's not relevant to identifiers.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks for the suggestion!

Expand Up @@ -9,13 +9,7 @@
//------------------------------------------------------------------------------

const astUtils = require("./utils/ast-utils");
const GraphemeSplitter = require("grapheme-splitter");

const splitter = new GraphemeSplitter();

//------------------------------------------------------------------------------
// Helpers
//------------------------------------------------------------------------------
const { getGraphemeCount } = require("../shared/string-utils");

/**
* Checks whether a string contains a line terminator as defined in
Expand Down Expand Up @@ -523,7 +517,7 @@ module.exports = {
const startToken = sourceCode.getFirstToken(property);
const endToken = getLastTokenBeforeColon(property.key);

return splitter.countGraphemes(sourceCode.getText().slice(startToken.range[0], endToken.range[1]));
return getGraphemeCount(sourceCode.getText().slice(startToken.range[0], endToken.range[1]));
}

/**
Expand Down
40 changes: 39 additions & 1 deletion lib/shared/string-utils.js
Expand Up @@ -5,6 +5,26 @@

"use strict";

//------------------------------------------------------------------------------
// Requirements
//------------------------------------------------------------------------------

const GraphemeSplitter = require("grapheme-splitter");

//------------------------------------------------------------------------------
// Helpers
//------------------------------------------------------------------------------

// eslint-disable-next-line no-control-regex -- intentionally including control characters
const ASCII_REGEX = /^[\u0000-\u007f]*$/u;

/** @type {GraphemeSplitter | undefined} */
let splitter;

//------------------------------------------------------------------------------
// Public Interface
//------------------------------------------------------------------------------

/**
* Converts the first letter of a string to uppercase.
* @param {string} string The string to operate on
Expand All @@ -17,6 +37,24 @@ function upperCaseFirst(string) {
return string[0].toUpperCase() + string.slice(1);
}

/**
* Counts graphemes in a given string.
* @param {string} value A string to count graphemes.
* @returns {number} The number of graphemes in `value`.
*/
function getGraphemeCount(value) {
if (ASCII_REGEX.test(value)) {
return value.length;
}

if (!splitter) {
splitter = new GraphemeSplitter();
}

return splitter.countGraphemes(value);
}

module.exports = {
upperCaseFirst
upperCaseFirst,
getGraphemeCount
};
49 changes: 48 additions & 1 deletion tests/lib/shared/string-utils.js
Expand Up @@ -11,7 +11,23 @@

const assert = require("chai").assert;

const { upperCaseFirst } = require("../../../lib/shared/string-utils");
const { upperCaseFirst, getGraphemeCount } = require("../../../lib/shared/string-utils");

//------------------------------------------------------------------------------
// Helpers
//------------------------------------------------------------------------------

/**
* Replaces raw control characters with the `\xXX` form.
* @param {string} text The text to process.
* @returns {string} `text` with escaped control characters.
*/
function escapeControlCharacters(text) {
return text.replace(
/[\u0000-\u001F\u007F-\u009F]/gu, // eslint-disable-line no-control-regex -- intentionally including control characters
c => `\\x${c.codePointAt(0).toString(16).padStart(2, "0")}`
);
}

//------------------------------------------------------------------------------
// Tests
Expand Down Expand Up @@ -39,3 +55,34 @@ describe("upperCaseFirst", () => {
assert(upperCaseFirst("") === "");
});
});

describe("getGraphemeCount", () => {
/* eslint-disable quote-props -- Make consistent here for readability */
const expectedResults = {
"": 0,
"a": 1,
"ab": 2,
"aa": 2,
"123": 3,
"cccc": 4,
[Array.from({ length: 128 }, (_, i) => String.fromCharCode(i)).join("")]: 128, // all ASCII characters
"👍": 1, // 1 grapheme, 1 code point, 2 code units
"👍👍": 2,
"👍9👍": 3,
"a👍b": 3,
"👶🏽": 1, // 1 grapheme, 2 code points, 4 code units
"👨‍👩‍👦": 1, // 1 grapheme, 5 code points, 8 code units
"👨‍👩‍👦👨‍👩‍👦": 2,
"👨‍👩‍👦a👨‍👩‍👦": 3,
"a👨‍👩‍👦b👨‍👩‍👦c": 5,
"👨‍👩‍👦👍": 2,
"👶🏽👨‍👩‍👦": 2
};
/* eslint-enable quote-props -- Make consistent here for readability */

Object.entries(expectedResults).forEach(([key, value]) => {
it(`should return ${value} for ${escapeControlCharacters(key)}`, () => {
assert.strictEqual(getGraphemeCount(key), value);
});
});
});