diff --git a/src/compiler/checker.ts b/src/compiler/checker.ts index 6bb0efc2c8322..0dd481e113481 100644 --- a/src/compiler/checker.ts +++ b/src/compiler/checker.ts @@ -31360,7 +31360,7 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker { function checkGrammarRegularExpressionLiteral(node: RegularExpressionLiteral) { const sourceFile = getSourceFileOfNode(node); - if (!hasParseDiagnostics(sourceFile)) { + if (!hasParseDiagnostics(sourceFile) && !node.isUnterminated) { let lastError: DiagnosticWithLocation | undefined; scanner ??= createScanner(ScriptTarget.ESNext, /*skipTrivia*/ true); scanner.setScriptTarget(sourceFile.languageVersion); diff --git a/src/compiler/diagnosticMessages.json b/src/compiler/diagnosticMessages.json index 058dc8c8c23c5..859cc627a1689 100644 --- a/src/compiler/diagnosticMessages.json +++ b/src/compiler/diagnosticMessages.json @@ -1733,7 +1733,7 @@ "category": "Error", "code": 1519 }, - "Expected a class set oprand.": { + "Expected a class set operand.": { "category": "Error", "code": 1520 }, diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 2d26d915aa2cb..7b7e8875c6711 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -21,7 +21,6 @@ import { KeywordSyntaxKind, LanguageFeatureMinimumTarget, LanguageVariant, - last, LineAndCharacter, MapLike, parsePseudoBigInt, @@ -1101,6 +1100,40 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return scanner; + /** + * Returns the code point for the character at the given position within `text`. This + * should only be used when pos is guaranteed to be within the bounds of `text` as this + * function does not perform bounds checks. + */ + function codePointUnchecked(pos: number) { + return codePointAt(text, pos); + } + + /** + * Returns the code point for the character at the given position within `text`. If + * `pos` is outside the bounds set for `text`, `CharacterCodes.EOF` is returned instead. + */ + function codePointChecked(pos: number) { + return pos >= 0 && pos < end ? codePointUnchecked(pos) : CharacterCodes.EOF; + } + + /** + * Returns the char code for the character at the given position within `text`. This + * should only be used when pos is guaranteed to be within the bounds of `text` as this + * function does not perform bounds checks. + */ + function charCodeUnchecked(pos: number) { + return text.charCodeAt(pos); + } + + /** + * Returns the char code for the character at the given position within `text`. If + * `pos` is outside the bounds set for `text`, `CharacterCodes.EOF` is returned instead. + */ + function charCodeChecked(pos: number) { + return pos >= 0 && pos < end ? charCodeUnchecked(pos) : CharacterCodes.EOF; + } + function error(message: DiagnosticMessage): void; function error(message: DiagnosticMessage, errPos: number, length: number, arg0?: any): void; function error(message: DiagnosticMessage, errPos: number = pos, length?: number, arg0?: any): void { @@ -1118,7 +1151,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean let isPreviousTokenSeparator = false; let result = ""; while (true) { - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); if (ch === CharacterCodes._) { tokenFlags |= TokenFlags.ContainsSeparator; if (allowSeparator) { @@ -1147,7 +1180,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } break; } - if (text.charCodeAt(pos - 1) === CharacterCodes._) { + if (charCodeUnchecked(pos - 1) === CharacterCodes._) { tokenFlags |= TokenFlags.ContainsInvalidSeparator; error(Diagnostics.Numeric_separators_are_not_allowed_here, pos - 1, 1); } @@ -1177,9 +1210,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanNumber(): SyntaxKind { let start = pos; let mainFragment: string; - if (text.charCodeAt(pos) === CharacterCodes._0) { + if (charCodeUnchecked(pos) === CharacterCodes._0) { pos++; - if (text.charCodeAt(pos) === CharacterCodes._) { + if (charCodeUnchecked(pos) === CharacterCodes._) { tokenFlags |= TokenFlags.ContainsSeparator | TokenFlags.ContainsInvalidSeparator; error(Diagnostics.Numeric_separators_are_not_allowed_here, pos, 1); // treat it as a normal number literal @@ -1213,15 +1246,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } let decimalFragment: string | undefined; let scientificFragment: string | undefined; - if (text.charCodeAt(pos) === CharacterCodes.dot) { + if (charCodeUnchecked(pos) === CharacterCodes.dot) { pos++; decimalFragment = scanNumberFragment(); } let end = pos; - if (text.charCodeAt(pos) === CharacterCodes.E || text.charCodeAt(pos) === CharacterCodes.e) { + if (charCodeUnchecked(pos) === CharacterCodes.E || charCodeUnchecked(pos) === CharacterCodes.e) { pos++; tokenFlags |= TokenFlags.Scientific; - if (text.charCodeAt(pos) === CharacterCodes.plus || text.charCodeAt(pos) === CharacterCodes.minus) pos++; + if (charCodeUnchecked(pos) === CharacterCodes.plus || charCodeUnchecked(pos) === CharacterCodes.minus) pos++; const preNumericPart = pos; const finalFragment = scanNumberFragment(); if (!finalFragment) { @@ -1268,7 +1301,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } function checkForIdentifierStartAfterNumericLiteral(numericStart: number, isScientific?: boolean) { - if (!isIdentifierStart(codePointAt(text, pos), languageVersion)) { + if (!isIdentifierStart(codePointUnchecked(pos), languageVersion)) { return; } @@ -1292,8 +1325,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanDigits(): boolean { const start = pos; let isOctal = true; - while (isDigit(text.charCodeAt(pos))) { - if (!isOctalDigit(text.charCodeAt(pos))) { + while (isDigit(charCodeChecked(pos))) { + if (!isOctalDigit(charCodeUnchecked(pos))) { isOctal = false; } pos++; @@ -1324,7 +1357,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean let allowSeparator = false; let isPreviousTokenSeparator = false; while (valueChars.length < minCount || scanAsManyAsPossible) { - let ch = text.charCodeAt(pos); + let ch = charCodeUnchecked(pos); if (canHaveSeparators && ch === CharacterCodes._) { tokenFlags |= TokenFlags.ContainsSeparator; if (allowSeparator) { @@ -1357,14 +1390,14 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean if (valueChars.length < minCount) { valueChars = []; } - if (text.charCodeAt(pos - 1) === CharacterCodes._) { + if (charCodeUnchecked(pos - 1) === CharacterCodes._) { error(Diagnostics.Numeric_separators_are_not_allowed_here, pos - 1, 1); } return String.fromCharCode(...valueChars); } function scanString(jsxAttributeString = false): string { - const quote = text.charCodeAt(pos); + const quote = charCodeUnchecked(pos); pos++; let result = ""; let start = pos; @@ -1375,7 +1408,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean error(Diagnostics.Unterminated_string_literal); break; } - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); if (ch === quote) { result += text.substring(start, pos); pos++; @@ -1404,7 +1437,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean * a literal component of a TemplateExpression. */ function scanTemplateAndSetTokenValue(shouldEmitInvalidEscapeError: boolean): SyntaxKind { - const startedWithBacktick = text.charCodeAt(pos) === CharacterCodes.backtick; + const startedWithBacktick = charCodeUnchecked(pos) === CharacterCodes.backtick; pos++; let start = pos; @@ -1420,7 +1453,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean break; } - const currChar = text.charCodeAt(pos); + const currChar = charCodeUnchecked(pos); // '`' if (currChar === CharacterCodes.backtick) { @@ -1431,7 +1464,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } // '${' - if (currChar === CharacterCodes.$ && pos + 1 < end && text.charCodeAt(pos + 1) === CharacterCodes.openBrace) { + if (currChar === CharacterCodes.$ && pos + 1 < end && charCodeUnchecked(pos + 1) === CharacterCodes.openBrace) { contents += text.substring(start, pos); pos += 2; resultingToken = startedWithBacktick ? SyntaxKind.TemplateHead : SyntaxKind.TemplateMiddle; @@ -1452,7 +1485,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean contents += text.substring(start, pos); pos++; - if (pos < end && text.charCodeAt(pos) === CharacterCodes.lineFeed) { + if (pos < end && charCodeUnchecked(pos) === CharacterCodes.lineFeed) { pos++; } @@ -1491,13 +1524,13 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean error(Diagnostics.Unexpected_end_of_text); return ""; } - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); pos++; switch (ch) { case CharacterCodes._0: // Although '0' preceding any digit is treated as LegacyOctalEscapeSequence, // '\08' should separately be interpreted as '\0' + '8'. - if (pos >= end || !isDigit(text.charCodeAt(pos))) { + if (pos >= end || !isDigit(charCodeUnchecked(pos))) { return "\0"; } // '\01', '\011' @@ -1506,7 +1539,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean case CharacterCodes._2: case CharacterCodes._3: // '\1', '\17', '\177' - if (pos < end && isOctalDigit(text.charCodeAt(pos))) { + if (pos < end && isOctalDigit(charCodeUnchecked(pos))) { pos++; } // '\17', '\177' @@ -1516,7 +1549,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean case CharacterCodes._6: case CharacterCodes._7: // '\4', '\47' but not '\477' - if (pos < end && isOctalDigit(text.charCodeAt(pos))) { + if (pos < end && isOctalDigit(charCodeUnchecked(pos))) { pos++; } // '\47' @@ -1557,7 +1590,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean case CharacterCodes.u: if ( (!isRegularExpression || shouldEmitInvalidEscapeError) && - pos < end && text.charCodeAt(pos) === CharacterCodes.openBrace + pos < end && charCodeUnchecked(pos) === CharacterCodes.openBrace ) { // '\u{DDDDDD}' pos -= 2; @@ -1565,7 +1598,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } // '\uDDDD' for (; pos < start + 6; pos++) { - if (!(pos < end && isHexDigit(text.charCodeAt(pos)))) { + if (!(pos < end && isHexDigit(charCodeUnchecked(pos)))) { tokenFlags |= TokenFlags.ContainsInvalidEscape; if (isRegularExpression || shouldEmitInvalidEscapeError) { error(Diagnostics.Hexadecimal_digit_expected); @@ -1578,7 +1611,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean const escapedValueString = String.fromCharCode(escapedValue); if ( isRegularExpression && shouldEmitInvalidEscapeError && escapedValue >= 0xD800 && escapedValue <= 0xDBFF && - pos + 6 < end && text.substring(pos, pos + 2) === "\\u" && text.charCodeAt(pos + 2) !== CharacterCodes.openBrace + pos + 6 < end && text.substring(pos, pos + 2) === "\\u" && charCodeUnchecked(pos + 2) !== CharacterCodes.openBrace ) { // For regular expressions in Unicode mode, \u HexLeadSurrogate \u HexTrailSurrogate is treated as a single character // for the purpose of determining whether a character class range is out of order @@ -1586,7 +1619,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean const nextStart = pos; let nextPos = pos + 2; for (; nextPos < nextStart + 6; nextPos++) { - if (!isHexDigit(text.charCodeAt(pos))) { + if (!isHexDigit(charCodeUnchecked(pos))) { // leave the error to the next call return escapedValueString; } @@ -1602,7 +1635,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean case CharacterCodes.x: // '\xDD' for (; pos < start + 4; pos++) { - if (!(pos < end && isHexDigit(text.charCodeAt(pos)))) { + if (!(pos < end && isHexDigit(charCodeUnchecked(pos)))) { tokenFlags |= TokenFlags.ContainsInvalidEscape; if (isRegularExpression || shouldEmitInvalidEscapeError) { error(Diagnostics.Hexadecimal_digit_expected); @@ -1616,7 +1649,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // when encountering a LineContinuation (i.e. a backslash and a line terminator sequence), // the line terminator is interpreted to be "the empty code unit sequence". case CharacterCodes.carriageReturn: - if (pos < end && text.charCodeAt(pos) === CharacterCodes.lineFeed) { + if (pos < end && charCodeUnchecked(pos) === CharacterCodes.lineFeed) { pos++; } // falls through @@ -1660,7 +1693,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } isInvalidExtendedEscape = true; } - else if (text.charCodeAt(pos) === CharacterCodes.closeBrace) { + else if (charCodeUnchecked(pos) === CharacterCodes.closeBrace) { // Only swallow the following character up if it's a '}'. pos++; } @@ -1683,7 +1716,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // Current character is known to be a backslash. Check for Unicode escape of the form '\uXXXX' // and return code point value if valid Unicode escape is found. Otherwise return -1. function peekUnicodeEscape(): number { - if (pos + 5 < end && text.charCodeAt(pos + 1) === CharacterCodes.u) { + if (pos + 5 < end && charCodeUnchecked(pos + 1) === CharacterCodes.u) { const start = pos; pos += 2; const value = scanExactNumberOfHexDigits(4, /*canHaveSeparators*/ false); @@ -1694,7 +1727,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } function peekExtendedUnicodeEscape(): number { - if (codePointAt(text, pos + 1) === CharacterCodes.u && codePointAt(text, pos + 2) === CharacterCodes.openBrace) { + if (codePointUnchecked(pos + 1) === CharacterCodes.u && codePointUnchecked(pos + 2) === CharacterCodes.openBrace) { const start = pos; pos += 3; const escapedValueString = scanMinimumNumberOfHexDigits(1, /*canHaveSeparators*/ false); @@ -1709,7 +1742,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean let result = ""; let start = pos; while (pos < end) { - let ch = codePointAt(text, pos); + let ch = codePointUnchecked(pos); if (isIdentifierPart(ch, languageVersion)) { pos += charSize(ch); } @@ -1761,7 +1794,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean let separatorAllowed = false; let isPreviousTokenSeparator = false; while (true) { - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); // Numeric separators are allowed anywhere within a numeric literal, except not at the beginning, or following another separator if (ch === CharacterCodes._) { tokenFlags |= TokenFlags.ContainsSeparator; @@ -1786,7 +1819,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; isPreviousTokenSeparator = false; } - if (text.charCodeAt(pos - 1) === CharacterCodes._) { + if (charCodeUnchecked(pos - 1) === CharacterCodes._) { // Literal ends with underscore - not allowed error(Diagnostics.Numeric_separators_are_not_allowed_here, pos - 1, 1); } @@ -1794,7 +1827,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } function checkBigIntSuffix(): SyntaxKind { - if (text.charCodeAt(pos) === CharacterCodes.n) { + if (charCodeUnchecked(pos) === CharacterCodes.n) { tokenValue += "n"; // Use base 10 instead of base 2 or base 8 for shorter literals if (tokenFlags & TokenFlags.BinaryOrOctalSpecifier) { @@ -1825,7 +1858,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return token = SyntaxKind.EndOfFileToken; } - const ch = codePointAt(text, pos); + const ch = codePointUnchecked(pos); if (pos === 0) { // Special handling for shebang if (ch === CharacterCodes.hash && isShebangTrivia(text, pos)) { @@ -1848,7 +1881,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean continue; } else { - if (ch === CharacterCodes.carriageReturn && pos + 1 < end && text.charCodeAt(pos + 1) === CharacterCodes.lineFeed) { + if (ch === CharacterCodes.carriageReturn && pos + 1 < end && charCodeUnchecked(pos + 1) === CharacterCodes.lineFeed) { // consume both CR and LF pos += 2; } @@ -1884,14 +1917,14 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean continue; } else { - while (pos < end && isWhiteSpaceSingleLine(text.charCodeAt(pos))) { + while (pos < end && isWhiteSpaceSingleLine(charCodeUnchecked(pos))) { pos++; } return token = SyntaxKind.WhitespaceTrivia; } case CharacterCodes.exclamation: - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { - if (text.charCodeAt(pos + 2) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { return pos += 3, token = SyntaxKind.ExclamationEqualsEqualsToken; } return pos += 2, token = SyntaxKind.ExclamationEqualsToken; @@ -1905,19 +1938,19 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean case CharacterCodes.backtick: return token = scanTemplateAndSetTokenValue(/*shouldEmitInvalidEscapeError*/ false); case CharacterCodes.percent: - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.PercentEqualsToken; } pos++; return token = SyntaxKind.PercentToken; case CharacterCodes.ampersand: - if (text.charCodeAt(pos + 1) === CharacterCodes.ampersand) { - if (text.charCodeAt(pos + 2) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.ampersand) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { return pos += 3, token = SyntaxKind.AmpersandAmpersandEqualsToken; } return pos += 2, token = SyntaxKind.AmpersandAmpersandToken; } - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.AmpersandEqualsToken; } pos++; @@ -1929,11 +1962,11 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; return token = SyntaxKind.CloseParenToken; case CharacterCodes.asterisk: - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.AsteriskEqualsToken; } - if (text.charCodeAt(pos + 1) === CharacterCodes.asterisk) { - if (text.charCodeAt(pos + 2) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.asterisk) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { return pos += 3, token = SyntaxKind.AsteriskAsteriskEqualsToken; } return pos += 2, token = SyntaxKind.AsteriskAsteriskToken; @@ -1946,10 +1979,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } return token = SyntaxKind.AsteriskToken; case CharacterCodes.plus: - if (text.charCodeAt(pos + 1) === CharacterCodes.plus) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.plus) { return pos += 2, token = SyntaxKind.PlusPlusToken; } - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.PlusEqualsToken; } pos++; @@ -1958,31 +1991,31 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; return token = SyntaxKind.CommaToken; case CharacterCodes.minus: - if (text.charCodeAt(pos + 1) === CharacterCodes.minus) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.minus) { return pos += 2, token = SyntaxKind.MinusMinusToken; } - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.MinusEqualsToken; } pos++; return token = SyntaxKind.MinusToken; case CharacterCodes.dot: - if (isDigit(text.charCodeAt(pos + 1))) { + if (isDigit(charCodeUnchecked(pos + 1))) { scanNumber(); return token = SyntaxKind.NumericLiteral; } - if (text.charCodeAt(pos + 1) === CharacterCodes.dot && text.charCodeAt(pos + 2) === CharacterCodes.dot) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.dot && charCodeUnchecked(pos + 2) === CharacterCodes.dot) { return pos += 3, token = SyntaxKind.DotDotDotToken; } pos++; return token = SyntaxKind.DotToken; case CharacterCodes.slash: // Single-line comment - if (text.charCodeAt(pos + 1) === CharacterCodes.slash) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.slash) { pos += 2; while (pos < end) { - if (isLineBreak(text.charCodeAt(pos))) { + if (isLineBreak(charCodeUnchecked(pos))) { break; } pos++; @@ -2003,16 +2036,16 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } } // Multi-line comment - if (text.charCodeAt(pos + 1) === CharacterCodes.asterisk) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.asterisk) { pos += 2; - const isJSDoc = text.charCodeAt(pos) === CharacterCodes.asterisk && text.charCodeAt(pos + 1) !== CharacterCodes.slash; + const isJSDoc = charCodeUnchecked(pos) === CharacterCodes.asterisk && charCodeUnchecked(pos + 1) !== CharacterCodes.slash; let commentClosed = false; let lastLineStart = tokenStart; while (pos < end) { - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); - if (ch === CharacterCodes.asterisk && text.charCodeAt(pos + 1) === CharacterCodes.slash) { + if (ch === CharacterCodes.asterisk && charCodeUnchecked(pos + 1) === CharacterCodes.slash) { pos += 2; commentClosed = true; break; @@ -2047,7 +2080,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } } - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.SlashEqualsToken; } @@ -2055,7 +2088,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return token = SyntaxKind.SlashToken; case CharacterCodes._0: - if (pos + 2 < end && (text.charCodeAt(pos + 1) === CharacterCodes.X || text.charCodeAt(pos + 1) === CharacterCodes.x)) { + if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.X || charCodeUnchecked(pos + 1) === CharacterCodes.x)) { pos += 2; tokenValue = scanMinimumNumberOfHexDigits(1, /*canHaveSeparators*/ true); if (!tokenValue) { @@ -2066,7 +2099,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean tokenFlags |= TokenFlags.HexSpecifier; return token = checkBigIntSuffix(); } - else if (pos + 2 < end && (text.charCodeAt(pos + 1) === CharacterCodes.B || text.charCodeAt(pos + 1) === CharacterCodes.b)) { + else if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.B || charCodeUnchecked(pos + 1) === CharacterCodes.b)) { pos += 2; tokenValue = scanBinaryOrOctalDigits(/* base */ 2); if (!tokenValue) { @@ -2077,7 +2110,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean tokenFlags |= TokenFlags.BinarySpecifier; return token = checkBigIntSuffix(); } - else if (pos + 2 < end && (text.charCodeAt(pos + 1) === CharacterCodes.O || text.charCodeAt(pos + 1) === CharacterCodes.o)) { + else if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.O || charCodeUnchecked(pos + 1) === CharacterCodes.o)) { pos += 2; tokenValue = scanBinaryOrOctalDigits(/* base */ 8); if (!tokenValue) { @@ -2116,19 +2149,19 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } } - if (text.charCodeAt(pos + 1) === CharacterCodes.lessThan) { - if (text.charCodeAt(pos + 2) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.lessThan) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { return pos += 3, token = SyntaxKind.LessThanLessThanEqualsToken; } return pos += 2, token = SyntaxKind.LessThanLessThanToken; } - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.LessThanEqualsToken; } if ( languageVariant === LanguageVariant.JSX && - text.charCodeAt(pos + 1) === CharacterCodes.slash && - text.charCodeAt(pos + 2) !== CharacterCodes.asterisk + charCodeUnchecked(pos + 1) === CharacterCodes.slash && + charCodeUnchecked(pos + 2) !== CharacterCodes.asterisk ) { return pos += 2, token = SyntaxKind.LessThanSlashToken; } @@ -2145,13 +2178,13 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } } - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { - if (text.charCodeAt(pos + 2) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { return pos += 3, token = SyntaxKind.EqualsEqualsEqualsToken; } return pos += 2, token = SyntaxKind.EqualsEqualsToken; } - if (text.charCodeAt(pos + 1) === CharacterCodes.greaterThan) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.greaterThan) { return pos += 2, token = SyntaxKind.EqualsGreaterThanToken; } pos++; @@ -2170,11 +2203,11 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; return token = SyntaxKind.GreaterThanToken; case CharacterCodes.question: - if (text.charCodeAt(pos + 1) === CharacterCodes.dot && !isDigit(text.charCodeAt(pos + 2))) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.dot && !isDigit(charCodeUnchecked(pos + 2))) { return pos += 2, token = SyntaxKind.QuestionDotToken; } - if (text.charCodeAt(pos + 1) === CharacterCodes.question) { - if (text.charCodeAt(pos + 2) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.question) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { return pos += 3, token = SyntaxKind.QuestionQuestionEqualsToken; } return pos += 2, token = SyntaxKind.QuestionQuestionToken; @@ -2188,7 +2221,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; return token = SyntaxKind.CloseBracketToken; case CharacterCodes.caret: - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.CaretEqualsToken; } pos++; @@ -2207,13 +2240,13 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } } - if (text.charCodeAt(pos + 1) === CharacterCodes.bar) { - if (text.charCodeAt(pos + 2) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.bar) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { return pos += 3, token = SyntaxKind.BarBarEqualsToken; } return pos += 2, token = SyntaxKind.BarBarToken; } - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.BarEqualsToken; } pos++; @@ -2252,7 +2285,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return token = SyntaxKind.Unknown; } - const charAfterHash = codePointAt(text, pos + 1); + const charAfterHash = codePointUnchecked(pos + 1); if (charAfterHash === CharacterCodes.backslash) { pos++; const extendedCookedChar = peekExtendedUnicodeEscape(); @@ -2337,7 +2370,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean Debug.assert(token === SyntaxKind.Unknown, "'reScanInvalidIdentifier' should only be called when the current token is 'SyntaxKind.Unknown'."); pos = tokenStart = fullStartPos; tokenFlags = 0; - const ch = codePointAt(text, pos); + const ch = codePointUnchecked(pos); const identifierKind = scanIdentifier(ch, ScriptTarget.ESNext); if (identifierKind) { return token = identifierKind; @@ -2350,7 +2383,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean let ch = startCharacter; if (isIdentifierStart(ch, languageVersion)) { pos += charSize(ch); - while (pos < end && isIdentifierPart(ch = codePointAt(text, pos), languageVersion)) pos += charSize(ch); + while (pos < end && isIdentifierPart(ch = codePointUnchecked(pos), languageVersion)) pos += charSize(ch); tokenValue = text.substring(tokenStart, pos); if (ch === CharacterCodes.backslash) { tokenValue += scanIdentifierParts(); @@ -2361,20 +2394,20 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function reScanGreaterToken(): SyntaxKind { if (token === SyntaxKind.GreaterThanToken) { - if (text.charCodeAt(pos) === CharacterCodes.greaterThan) { - if (text.charCodeAt(pos + 1) === CharacterCodes.greaterThan) { - if (text.charCodeAt(pos + 2) === CharacterCodes.equals) { + if (charCodeUnchecked(pos) === CharacterCodes.greaterThan) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.greaterThan) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { return pos += 3, token = SyntaxKind.GreaterThanGreaterThanGreaterThanEqualsToken; } return pos += 2, token = SyntaxKind.GreaterThanGreaterThanGreaterThanToken; } - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.GreaterThanGreaterThanEqualsToken; } pos++; return token = SyntaxKind.GreaterThanGreaterThanToken; } - if (text.charCodeAt(pos) === CharacterCodes.equals) { + if (charCodeUnchecked(pos) === CharacterCodes.equals) { pos++; return token = SyntaxKind.GreaterThanEqualsToken; } @@ -2409,7 +2442,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean break; } - const ch = text.charCodeAt(p); + const ch = charCodeUnchecked(p); if (isLineBreak(ch)) { tokenFlags |= TokenFlags.Unterminated; error(Diagnostics.Unterminated_regular_expression_literal); @@ -2442,7 +2475,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean const endOfBody = p - (isUnterminated ? 0 : 1); let regExpFlags = RegularExpressionFlags.None; while (p < end) { - const ch = text.charCodeAt(p); + const ch = charCodeUnchecked(p); if (!isIdentifierPart(ch, languageVersion)) { break; } @@ -2464,986 +2497,1007 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } p++; } + pos = p; if (reportErrors) { - pos = tokenStart + 1; - const saveTokenPos = tokenStart; + const saveTokenStart = tokenStart; const saveTokenFlags = tokenFlags; - scanRegularExpressionWorker(text, endOfBody, regExpFlags, isUnterminated, /*annexB*/ true); - if (!isUnterminated) { - pos = p; - } - tokenStart = saveTokenPos; + const savePos = pos; + const saveEnd = end; + pos = tokenStart + 1; + end = endOfBody; + scanRegularExpressionWorker(regExpFlags, isUnterminated, /*annexB*/ true); + tokenStart = saveTokenStart; tokenFlags = saveTokenFlags; - } - else { - pos = p; + pos = savePos; + end = saveEnd; } tokenValue = text.substring(tokenStart, pos); token = SyntaxKind.RegularExpressionLiteral; } return token; + } - function scanRegularExpressionWorker(text: string, end: number, regExpFlags: RegularExpressionFlags, isUnterminated: boolean, annexB: boolean) { - /** Grammar parameter */ - const unicodeSetsMode = !!(regExpFlags & RegularExpressionFlags.UnicodeSets); - /** Grammar parameter */ - const unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode); - - if (unicodeMode) { - // Annex B treats any unicode mode as the strict syntax. - annexB = false; - } - - /** @see {scanClassSetExpression} */ - let mayContainStrings = false; - - /** The number of numeric (anonymous) capturing groups defined in the regex. */ - let numberOfCapturingGroups = 0; - /** All named capturing groups defined in the regex. */ - const groupSpecifiers = new Set(); - /** All references to named capturing groups in the regex. */ - const groupNameReferences: (TextRange & { name: string; })[] = []; - /** All numeric backreferences within the regex. */ - const decimalEscapes: (TextRange & { value: number; })[] = []; - /** A stack of scopes for named capturing groups. @see {scanGroupName} */ - const namedCapturingGroups: Set[] = []; - - // Disjunction ::= Alternative ('|' Alternative)* - function scanDisjunction(isInGroup: boolean) { - while (true) { - namedCapturingGroups.push(new Set()); - scanAlternative(isInGroup); - namedCapturingGroups.pop(); - if (text.charCodeAt(pos) !== CharacterCodes.bar) { - return; - } - pos++; + function scanRegularExpressionWorker(regExpFlags: RegularExpressionFlags, isUnterminated: boolean, annexB: boolean) { + // Why var? It avoids TDZ checks in the runtime which can be costly. + // See: https://github.com/microsoft/TypeScript/issues/52924 + /* eslint-disable no-var */ + + /** Grammar parameter */ + var unicodeSetsMode = !!(regExpFlags & RegularExpressionFlags.UnicodeSets); + /** Grammar parameter */ + var unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode); + + if (unicodeMode) { + // Annex B treats any unicode mode as the strict syntax. + annexB = false; + } + + /** @see {scanClassSetExpression} */ + var mayContainStrings = false; + + /** The number of numeric (anonymous) capturing groups defined in the regex. */ + var numberOfCapturingGroups = 0; + /** All named capturing groups defined in the regex. */ + var groupSpecifiers: Set | undefined; + /** All references to named capturing groups in the regex. */ + var groupNameReferences: (TextRange & { name: string; })[] | undefined; + /** All numeric backreferences within the regex. */ + var decimalEscapes: (TextRange & { value: number; })[] | undefined; + /** A stack of scopes for named capturing groups. @see {scanGroupName} */ + var namedCapturingGroupsScopeStack: (Set | undefined)[] = []; + var topNamedCapturingGroupsScope: Set | undefined; + /* eslint-enable no-var */ + // Disjunction ::= Alternative ('|' Alternative)* + function scanDisjunction(isInGroup: boolean) { + while (true) { + namedCapturingGroupsScopeStack.push(topNamedCapturingGroupsScope); + topNamedCapturingGroupsScope = undefined; + scanAlternative(isInGroup); + topNamedCapturingGroupsScope = namedCapturingGroupsScopeStack.pop(); + if (charCodeChecked(pos) !== CharacterCodes.bar) { + return; } + pos++; } + } - // Alternative ::= Term* - // Term ::= - // | Assertion - // | Atom Quantifier? - // Assertion ::= - // | '^' - // | '$' - // | '\b' - // | '\B' - // | '(?=' Disjunction ')' - // | '(?!' Disjunction ')' - // | '(?<=' Disjunction ')' - // | '(?' Disjunction ')' - // | '(?' RegularExpressionFlags ('-' RegularExpressionFlags)? ':' Disjunction ')' - // CharacterClass ::= unicodeMode - // ? '[' ClassRanges ']' - // : '[' ClassSetExpression ']' - function scanAlternative(isInGroup: boolean) { - let isPreviousTermQuantifiable = false; - while (pos < end) { - const start = pos; - const ch = text.charCodeAt(pos); - switch (ch) { - case CharacterCodes.caret: - case CharacterCodes.$: - pos++; - isPreviousTermQuantifiable = false; - break; - case CharacterCodes.backslash: + // Alternative ::= Term* + // Term ::= + // | Assertion + // | Atom Quantifier? + // Assertion ::= + // | '^' + // | '$' + // | '\b' + // | '\B' + // | '(?=' Disjunction ')' + // | '(?!' Disjunction ')' + // | '(?<=' Disjunction ')' + // | '(?' Disjunction ')' + // | '(?' RegularExpressionFlags ('-' RegularExpressionFlags)? ':' Disjunction ')' + // CharacterClass ::= unicodeMode + // ? '[' ClassRanges ']' + // : '[' ClassSetExpression ']' + function scanAlternative(isInGroup: boolean) { + let isPreviousTermQuantifiable = false; + while (true) { + const start = pos; + const ch = charCodeChecked(pos); + switch (ch) { + case CharacterCodes.EOF: + return; + case CharacterCodes.caret: + case CharacterCodes.$: + pos++; + isPreviousTermQuantifiable = false; + break; + case CharacterCodes.backslash: + pos++; + switch (charCodeChecked(pos)) { + case CharacterCodes.b: + case CharacterCodes.B: + pos++; + isPreviousTermQuantifiable = false; + break; + default: + scanAtomEscape(); + isPreviousTermQuantifiable = true; + break; + } + break; + case CharacterCodes.openParen: + pos++; + if (charCodeChecked(pos) === CharacterCodes.question) { pos++; - switch (text.charCodeAt(pos)) { - case CharacterCodes.b: - case CharacterCodes.B: + switch (charCodeChecked(pos)) { + case CharacterCodes.equals: + case CharacterCodes.exclamation: pos++; - isPreviousTermQuantifiable = false; - break; - default: - scanAtomEscape(); - isPreviousTermQuantifiable = true; + // In Annex B, `(?=Disjunction)` and `(?!Disjunction)` are quantifiable + isPreviousTermQuantifiable = annexB; break; - } - break; - case CharacterCodes.openParen: - pos++; - if (text.charCodeAt(pos) === CharacterCodes.question) { - pos++; - switch (text.charCodeAt(pos)) { - case CharacterCodes.equals: - case CharacterCodes.exclamation: - pos++; - // In Annex B, `(?=Disjunction)` and `(?!Disjunction)` are quantifiable - isPreviousTermQuantifiable = annexB; - break; - case CharacterCodes.lessThan: - const groupNameStart = pos; - pos++; - switch (text.charCodeAt(pos)) { - case CharacterCodes.equals: - case CharacterCodes.exclamation: - pos++; - isPreviousTermQuantifiable = false; - break; - default: - scanGroupName(/*isReference*/ false); - scanExpectedChar(CharacterCodes.greaterThan); - if (languageVersion < ScriptTarget.ES2018) { - error(Diagnostics.Named_capturing_groups_are_only_available_when_targeting_ES2018_or_later, groupNameStart, pos - groupNameStart); - } - numberOfCapturingGroups++; - isPreviousTermQuantifiable = true; - break; - } - break; - default: - const start = pos; - const setFlags = scanPatternModifiers(RegularExpressionFlags.None); - if (text.charCodeAt(pos) === CharacterCodes.minus) { + case CharacterCodes.lessThan: + const groupNameStart = pos; + pos++; + switch (charCodeChecked(pos)) { + case CharacterCodes.equals: + case CharacterCodes.exclamation: pos++; - scanPatternModifiers(setFlags); - if (pos === start + 1) { - error(Diagnostics.Subpattern_flags_must_be_present_when_there_is_a_minus_sign, start, pos - start); + isPreviousTermQuantifiable = false; + break; + default: + scanGroupName(/*isReference*/ false); + scanExpectedChar(CharacterCodes.greaterThan); + if (languageVersion < ScriptTarget.ES2018) { + error(Diagnostics.Named_capturing_groups_are_only_available_when_targeting_ES2018_or_later, groupNameStart, pos - groupNameStart); } - } - scanExpectedChar(CharacterCodes.colon); - isPreviousTermQuantifiable = true; - break; - } - } - else { - numberOfCapturingGroups++; - isPreviousTermQuantifiable = true; - } - scanDisjunction(/*isInGroup*/ true); - scanExpectedChar(CharacterCodes.closeParen); - break; - case CharacterCodes.openBrace: - pos++; - const digitsStart = pos; - scanDigits(); - const min = tokenValue; - if (text.charCodeAt(pos) === CharacterCodes.comma) { - pos++; - scanDigits(); - const max = tokenValue; - if (!min) { - if (max || text.charCodeAt(pos) === CharacterCodes.closeBrace) { - error(Diagnostics.Incomplete_quantifier_Digit_expected, digitsStart, 0); + numberOfCapturingGroups++; + isPreviousTermQuantifiable = true; + break; } - else { - if (unicodeMode) { - error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch)); + break; + default: + const start = pos; + const setFlags = scanPatternModifiers(RegularExpressionFlags.None); + if (charCodeChecked(pos) === CharacterCodes.minus) { + pos++; + scanPatternModifiers(setFlags); + if (pos === start + 1) { + error(Diagnostics.Subpattern_flags_must_be_present_when_there_is_a_minus_sign, start, pos - start); } - isPreviousTermQuantifiable = true; - break; } - } - if (max && Number.parseInt(min) > Number.parseInt(max)) { - error(Diagnostics.Numbers_out_of_order_in_quantifier, digitsStart, pos - digitsStart); - } - } - else if (!min) { - if (unicodeMode) { - error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch)); - } - isPreviousTermQuantifiable = true; - break; - } - scanExpectedChar(CharacterCodes.closeBrace); - pos--; - // falls through - case CharacterCodes.asterisk: - case CharacterCodes.plus: - case CharacterCodes.question: - pos++; - if (text.charCodeAt(pos) === CharacterCodes.question) { - // Non-greedy - pos++; - } - if (!isPreviousTermQuantifiable) { - error(Diagnostics.There_is_nothing_available_for_repetition, start, pos - start); + scanExpectedChar(CharacterCodes.colon); + isPreviousTermQuantifiable = true; + break; } - isPreviousTermQuantifiable = false; - break; - case CharacterCodes.dot: - pos++; + } + else { + numberOfCapturingGroups++; isPreviousTermQuantifiable = true; - break; - case CharacterCodes.openBracket: + } + scanDisjunction(/*isInGroup*/ true); + scanExpectedChar(CharacterCodes.closeParen); + break; + case CharacterCodes.openBrace: + pos++; + const digitsStart = pos; + scanDigits(); + const min = tokenValue; + if (charCodeChecked(pos) === CharacterCodes.comma) { pos++; - if (unicodeSetsMode) { - scanClassSetExpression(); - } - else { - scanClassRanges(); - } - scanExpectedChar(CharacterCodes.closeBracket); - isPreviousTermQuantifiable = true; - break; - case CharacterCodes.closeParen: - if (isInGroup) { - return; + scanDigits(); + const max = tokenValue; + if (!min) { + if (max || charCodeChecked(pos) === CharacterCodes.closeBrace) { + error(Diagnostics.Incomplete_quantifier_Digit_expected, digitsStart, 0); + } + else { + if (unicodeMode) { + error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch)); + } + isPreviousTermQuantifiable = true; + break; + } } - // falls through - case CharacterCodes.closeBracket: - case CharacterCodes.closeBrace: - if (isUnterminated && !isInGroup) { - // Assume what starting from the character to be outside of the regex - return; + if (max && Number.parseInt(min) > Number.parseInt(max)) { + error(Diagnostics.Numbers_out_of_order_in_quantifier, digitsStart, pos - digitsStart); } - if (unicodeMode || ch === CharacterCodes.closeParen) { - error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); + } + else if (!min) { + if (unicodeMode) { + error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch)); } - pos++; isPreviousTermQuantifiable = true; break; - case CharacterCodes.slash: - case CharacterCodes.bar: - return; - default: - scanSourceCharacter(); - isPreviousTermQuantifiable = true; - break; - } - } - } - - function scanPatternModifiers(currFlags: RegularExpressionFlags): RegularExpressionFlags { - while (pos < end) { - const ch = text.charCodeAt(pos); - if (!isIdentifierPart(ch, languageVersion)) { - break; - } - const flag = characterToRegularExpressionFlag(String.fromCharCode(ch)); - if (flag === undefined) { - error(Diagnostics.Unknown_regular_expression_flag, pos, 1); - } - else if (currFlags & flag) { - error(Diagnostics.Duplicate_regular_expression_flag, pos, 1); - } - else if (!(flag & RegularExpressionFlags.Modifiers)) { - error(Diagnostics.This_regular_expression_flag_cannot_be_toggled_within_a_subpattern, pos, 1); - } - else { - currFlags |= flag; - checkRegularExpressionFlagAvailable(flag, pos); - } - pos++; - } - return currFlags; - } - - // AtomEscape ::= - // | DecimalEscape - // | CharacterClassEscape - // | CharacterEscape - // | 'k<' RegExpIdentifierName '>' - function scanAtomEscape() { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash); - switch (text.charCodeAt(pos)) { - case CharacterCodes.k: + } + scanExpectedChar(CharacterCodes.closeBrace); + pos--; + // falls through + case CharacterCodes.asterisk: + case CharacterCodes.plus: + case CharacterCodes.question: pos++; - if (text.charCodeAt(pos) === CharacterCodes.lessThan) { + if (charCodeChecked(pos) === CharacterCodes.question) { + // Non-greedy pos++; - scanGroupName(/*isReference*/ true); - scanExpectedChar(CharacterCodes.greaterThan); } - else if (unicodeMode) { - error(Diagnostics.k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets, pos - 2, 2); + if (!isPreviousTermQuantifiable) { + error(Diagnostics.There_is_nothing_available_for_repetition, start, pos - start); } + isPreviousTermQuantifiable = false; break; - case CharacterCodes.q: - if (unicodeSetsMode) { - pos++; - error(Diagnostics.q_is_only_available_inside_character_class, pos - 2, 2); - break; - } - // falls through - default: - // The scanEscapeSequence call in scanCharacterEscape must return non-empty strings - // since there must not be line breaks in a regex literal - Debug.assert(scanCharacterClassEscape() || scanDecimalEscape() || scanCharacterEscape(/*atomEscape*/ true)); + case CharacterCodes.dot: + pos++; + isPreviousTermQuantifiable = true; break; - } - } - - // DecimalEscape ::= [1-9] [0-9]* - function scanDecimalEscape(): boolean { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash); - const ch = text.charCodeAt(pos); - if (ch >= CharacterCodes._1 && ch <= CharacterCodes._9) { - const start = pos; - scanDigits(); - decimalEscapes.push({ pos: start, end: pos, value: +tokenValue }); - return true; - } - return false; - } - - // CharacterEscape ::= - // | `c` ControlLetter - // | IdentityEscape - // | (Other sequences handled by `scanEscapeSequence`) - // IdentityEscape ::= - // | '^' | '$' | '/' | '\' | '.' | '*' | '+' | '?' | '(' | ')' | '[' | ']' | '{' | '}' | '|' - // | [~UnicodeMode] (any other non-identifier characters) - function scanCharacterEscape(atomEscape: boolean): string { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash); - let ch = text.charCodeAt(pos); - switch (ch) { - case CharacterCodes.c: + case CharacterCodes.openBracket: pos++; - ch = text.charCodeAt(pos); - if (isASCIILetter(ch)) { - pos++; - return String.fromCharCode(ch & 0x1f); - } - if (unicodeMode) { - error(Diagnostics.c_must_be_followed_by_an_ASCII_letter, pos - 2, 2); + if (unicodeSetsMode) { + scanClassSetExpression(); } - else if (atomEscape && annexB) { - // Annex B treats - // - // ExtendedAtom : `\` [lookahead = `c`] - // - // as the single character `\` when `c` isn't followed by a valid control character - pos--; - return "\\"; + else { + scanClassRanges(); } - return String.fromCharCode(ch); - case CharacterCodes.caret: - case CharacterCodes.$: - case CharacterCodes.slash: - case CharacterCodes.backslash: - case CharacterCodes.dot: - case CharacterCodes.asterisk: - case CharacterCodes.plus: - case CharacterCodes.question: - case CharacterCodes.openParen: + scanExpectedChar(CharacterCodes.closeBracket); + isPreviousTermQuantifiable = true; + break; case CharacterCodes.closeParen: - case CharacterCodes.openBracket: + if (isInGroup) { + return; + } + // falls through case CharacterCodes.closeBracket: - case CharacterCodes.openBrace: case CharacterCodes.closeBrace: - case CharacterCodes.bar: + if (isUnterminated && !isInGroup) { + // Assume what starting from the character to be outside of the regex + return; + } + if (unicodeMode || ch === CharacterCodes.closeParen) { + error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); + } pos++; - return String.fromCharCode(ch); + isPreviousTermQuantifiable = true; + break; + case CharacterCodes.slash: + case CharacterCodes.bar: + return; default: - if (pos >= end) { - error(Diagnostics.Undetermined_character_escape, pos - 1, 1, ch); - return "\\"; - } - pos--; - return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ annexB ? "annex-b" : true); + scanSourceCharacter(); + isPreviousTermQuantifiable = true; + break; } } + } - function scanGroupName(isReference: boolean) { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.lessThan); - tokenStart = pos; - scanIdentifier(codePointAt(text, pos), languageVersion); - if (pos === tokenStart) { - error(Diagnostics.Expected_a_capturing_group_name); + function scanPatternModifiers(currFlags: RegularExpressionFlags): RegularExpressionFlags { + while (true) { + const ch = charCodeChecked(pos); + if (ch === CharacterCodes.EOF || !isIdentifierPart(ch, languageVersion)) { + break; + } + const flag = characterToRegularExpressionFlag(String.fromCharCode(ch)); + if (flag === undefined) { + error(Diagnostics.Unknown_regular_expression_flag, pos, 1); } - else if (isReference) { - groupNameReferences.push({ pos: tokenStart, end: pos, name: tokenValue }); + else if (currFlags & flag) { + error(Diagnostics.Duplicate_regular_expression_flag, pos, 1); } - else if (namedCapturingGroups.some(group => group.has(tokenValue))) { - error(Diagnostics.Named_capturing_groups_with_the_same_name_must_be_mutually_exclusive_to_each_other, tokenStart, pos - tokenStart); + else if (!(flag & RegularExpressionFlags.Modifiers)) { + error(Diagnostics.This_regular_expression_flag_cannot_be_toggled_within_a_subpattern, pos, 1); } else { - last(namedCapturingGroups).add(tokenValue); - groupSpecifiers.add(tokenValue); + currFlags |= flag; + checkRegularExpressionFlagAvailable(flag, pos); } + pos++; + } + return currFlags; + } + + // AtomEscape ::= + // | DecimalEscape + // | CharacterClassEscape + // | CharacterEscape + // | 'k<' RegExpIdentifierName '>' + function scanAtomEscape() { + Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.backslash); + switch (charCodeChecked(pos)) { + case CharacterCodes.k: + pos++; + if (charCodeChecked(pos) === CharacterCodes.lessThan) { + pos++; + scanGroupName(/*isReference*/ true); + scanExpectedChar(CharacterCodes.greaterThan); + } + else if (unicodeMode) { + error(Diagnostics.k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets, pos - 2, 2); + } + break; + case CharacterCodes.q: + if (unicodeSetsMode) { + pos++; + error(Diagnostics.q_is_only_available_inside_character_class, pos - 2, 2); + break; + } + // falls through + default: + // The scanEscapeSequence call in scanCharacterEscape must return non-empty strings + // since there must not be line breaks in a regex literal + Debug.assert(scanCharacterClassEscape() || scanDecimalEscape() || scanCharacterEscape(/*atomEscape*/ true)); + break; } + } - function isClassContentExit(ch: number) { - return ch === CharacterCodes.closeBracket || pos >= end; + // DecimalEscape ::= [1-9] [0-9]* + function scanDecimalEscape(): boolean { + Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.backslash); + const ch = charCodeChecked(pos); + if (ch >= CharacterCodes._1 && ch <= CharacterCodes._9) { + const start = pos; + scanDigits(); + decimalEscapes = append(decimalEscapes, { pos: start, end: pos, value: +tokenValue }); + return true; } + return false; + } - // ClassRanges ::= '^'? (ClassAtom ('-' ClassAtom)?)* - function scanClassRanges() { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.openBracket); - if (text.charCodeAt(pos) === CharacterCodes.caret) { - // character complement + // CharacterEscape ::= + // | `c` ControlLetter + // | IdentityEscape + // | (Other sequences handled by `scanEscapeSequence`) + // IdentityEscape ::= + // | '^' | '$' | '/' | '\' | '.' | '*' | '+' | '?' | '(' | ')' | '[' | ']' | '{' | '}' | '|' + // | [~UnicodeMode] (any other non-identifier characters) + function scanCharacterEscape(atomEscape: boolean): string { + Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.backslash); + let ch = charCodeChecked(pos); + switch (ch) { + case CharacterCodes.c: + pos++; + ch = charCodeChecked(pos); + if (isASCIILetter(ch)) { + pos++; + return String.fromCharCode(ch & 0x1f); + } + if (unicodeMode) { + error(Diagnostics.c_must_be_followed_by_an_ASCII_letter, pos - 2, 2); + } + else if (atomEscape && annexB) { + // Annex B treats + // + // ExtendedAtom : `\` [lookahead = `c`] + // + // as the single character `\` when `c` isn't followed by a valid control character + pos--; + return "\\"; + } + return String.fromCharCode(ch); + case CharacterCodes.caret: + case CharacterCodes.$: + case CharacterCodes.slash: + case CharacterCodes.backslash: + case CharacterCodes.dot: + case CharacterCodes.asterisk: + case CharacterCodes.plus: + case CharacterCodes.question: + case CharacterCodes.openParen: + case CharacterCodes.closeParen: + case CharacterCodes.openBracket: + case CharacterCodes.closeBracket: + case CharacterCodes.openBrace: + case CharacterCodes.closeBrace: + case CharacterCodes.bar: pos++; + return String.fromCharCode(ch); + default: + if (pos >= end) { + error(Diagnostics.Undetermined_character_escape, pos - 1, 1); + return "\\"; + } + pos--; + return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ annexB ? "annex-b" : true); + } + } + + function scanGroupName(isReference: boolean) { + Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.lessThan); + tokenStart = pos; + scanIdentifier(codePointChecked(pos), languageVersion); + if (pos === tokenStart) { + error(Diagnostics.Expected_a_capturing_group_name); + } + else if (isReference) { + groupNameReferences = append(groupNameReferences, { pos: tokenStart, end: pos, name: tokenValue }); + } + else if (topNamedCapturingGroupsScope?.has(tokenValue) || namedCapturingGroupsScopeStack.some(group => group?.has(tokenValue))) { + error(Diagnostics.Named_capturing_groups_with_the_same_name_must_be_mutually_exclusive_to_each_other, tokenStart, pos - tokenStart); + } + else { + topNamedCapturingGroupsScope ??= new Set(); + topNamedCapturingGroupsScope.add(tokenValue); + groupSpecifiers ??= new Set(); + groupSpecifiers.add(tokenValue); + } + } + + function isClassContentExit(ch: number) { + return ch === CharacterCodes.closeBracket || ch === CharacterCodes.EOF || pos >= end; + } + + // ClassRanges ::= '^'? (ClassAtom ('-' ClassAtom)?)* + function scanClassRanges() { + Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.openBracket); + if (charCodeChecked(pos) === CharacterCodes.caret) { + // character complement + pos++; + } + while (true) { + const ch = charCodeChecked(pos); + if (isClassContentExit(ch)) { + return; } - while (pos < end) { - const ch = text.charCodeAt(pos); + const minStart = pos; + const minCharacter = scanClassAtom(); + if (charCodeChecked(pos) === CharacterCodes.minus) { + pos++; + const ch = charCodeChecked(pos); if (isClassContentExit(ch)) { return; } - const minStart = pos; - const minCharacter = scanClassAtom(); - if (text.charCodeAt(pos) === CharacterCodes.minus) { - pos++; - const ch = text.charCodeAt(pos); - if (isClassContentExit(ch)) { - return; + if (!minCharacter && !annexB) { + error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, minStart, pos - 1 - minStart); + } + const maxStart = pos; + const maxCharacter = scanClassAtom(); + if (!maxCharacter && !annexB) { + error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, maxStart, pos - maxStart); + continue; + } + if (!minCharacter) { + continue; + } + const minCharacterValue = codePointAt(minCharacter, 0); + const maxCharacterValue = codePointAt(maxCharacter, 0); + if ( + minCharacter.length === charSize(minCharacterValue) && + maxCharacter.length === charSize(maxCharacterValue) && + minCharacterValue > maxCharacterValue + ) { + error(Diagnostics.Range_out_of_order_in_character_class, minStart, pos - minStart); + } + } + } + } + + // Static Semantics: MayContainStrings + // ClassUnion: ClassSetOperands.some(ClassSetOperand => ClassSetOperand.MayContainStrings) + // ClassIntersection: ClassSetOperands.every(ClassSetOperand => ClassSetOperand.MayContainStrings) + // ClassSubtraction: ClassSetOperands[0].MayContainStrings + // ClassSetOperand: + // || ClassStringDisjunctionContents.MayContainStrings + // || CharacterClassEscape.UnicodePropertyValueExpression.LoneUnicodePropertyNameOrValue.MayContainStrings + // ClassStringDisjunctionContents: ClassStrings.some(ClassString => ClassString.ClassSetCharacters.length !== 1) + // LoneUnicodePropertyNameOrValue: isBinaryUnicodePropertyOfStrings(LoneUnicodePropertyNameOrValue) + + // ClassSetExpression ::= '^'? (ClassUnion | ClassIntersection | ClassSubtraction) + // ClassUnion ::= (ClassSetRange | ClassSetOperand)* + // ClassIntersection ::= ClassSetOperand ('&&' ClassSetOperand)+ + // ClassSubtraction ::= ClassSetOperand ('--' ClassSetOperand)+ + // ClassSetRange ::= ClassSetCharacter '-' ClassSetCharacter + function scanClassSetExpression() { + Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.openBracket); + let isCharacterComplement = false; + if (charCodeChecked(pos) === CharacterCodes.caret) { + pos++; + isCharacterComplement = true; + } + let expressionMayContainStrings = false; + let ch = charCodeChecked(pos); + if (isClassContentExit(ch)) { + return; + } + let start = pos; + let operand!: string; + switch (text.slice(pos, pos + 2)) { // TODO: don't use slice + case "--": + case "&&": + error(Diagnostics.Expected_a_class_set_operand); + mayContainStrings = false; + break; + default: + operand = scanClassSetOperand(); + break; + } + switch (charCodeChecked(pos)) { + case CharacterCodes.minus: + if (charCodeChecked(pos + 1) === CharacterCodes.minus) { + if (isCharacterComplement && mayContainStrings) { + error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, start, pos - start); } - if (!minCharacter && !annexB) { - error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, minStart, pos - 1 - minStart); + expressionMayContainStrings = mayContainStrings; + scanClassSetSubExpression(ClassSetExpressionType.ClassSubtraction); + mayContainStrings = !isCharacterComplement && expressionMayContainStrings; + return; + } + break; + case CharacterCodes.ampersand: + if (charCodeChecked(pos + 1) === CharacterCodes.ampersand) { + scanClassSetSubExpression(ClassSetExpressionType.ClassIntersection); + if (isCharacterComplement && mayContainStrings) { + error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, start, pos - start); } - const maxStart = pos; - const maxCharacter = scanClassAtom(); - if (!maxCharacter && !annexB) { - error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, maxStart, pos - maxStart); - continue; + expressionMayContainStrings = mayContainStrings; + mayContainStrings = !isCharacterComplement && expressionMayContainStrings; + return; + } + else { + error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); + } + break; + default: + if (isCharacterComplement && mayContainStrings) { + error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, start, pos - start); + } + expressionMayContainStrings = mayContainStrings; + break; + } + while (true) { + ch = charCodeChecked(pos); + if (ch === CharacterCodes.EOF) { + break; + } + switch (ch) { + case CharacterCodes.minus: + pos++; + ch = charCodeChecked(pos); + if (isClassContentExit(ch)) { + mayContainStrings = !isCharacterComplement && expressionMayContainStrings; + return; } - if (!minCharacter) { + if (ch === CharacterCodes.minus) { + pos++; + error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); + start = pos - 2; + operand = text.slice(start, pos); continue; } - const minCharacterValue = codePointAt(minCharacter, 0); - const maxCharacterValue = codePointAt(maxCharacter, 0); - if ( - minCharacter.length === charSize(minCharacterValue) && - maxCharacter.length === charSize(maxCharacterValue) && - minCharacterValue > maxCharacterValue - ) { - error(Diagnostics.Range_out_of_order_in_character_class, minStart, pos - minStart); - } - } - } - } - - // Static Semantics: MayContainStrings - // ClassUnion: ClassSetOperands.some(ClassSetOperand => ClassSetOperand.MayContainStrings) - // ClassIntersection: ClassSetOperands.every(ClassSetOperand => ClassSetOperand.MayContainStrings) - // ClassSubtraction: ClassSetOperands[0].MayContainStrings - // ClassSetOperand: - // || ClassStringDisjunctionContents.MayContainStrings - // || CharacterClassEscape.UnicodePropertyValueExpression.LoneUnicodePropertyNameOrValue.MayContainStrings - // ClassStringDisjunctionContents: ClassStrings.some(ClassString => ClassString.ClassSetCharacters.length !== 1) - // LoneUnicodePropertyNameOrValue: isBinaryUnicodePropertyOfStrings(LoneUnicodePropertyNameOrValue) - - // ClassSetExpression ::= '^'? (ClassUnion | ClassIntersection | ClassSubtraction) - // ClassUnion ::= (ClassSetRange | ClassSetOperand)* - // ClassIntersection ::= ClassSetOperand ('&&' ClassSetOperand)+ - // ClassSubtraction ::= ClassSetOperand ('--' ClassSetOperand)+ - // ClassSetRange ::= ClassSetCharacter '-' ClassSetCharacter - function scanClassSetExpression() { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.openBracket); - let isCharacterComplement = false; - if (text.charCodeAt(pos) === CharacterCodes.caret) { - pos++; - isCharacterComplement = true; + else { + if (!operand) { + error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, start, pos - 1 - start); + } + const secondStart = pos; + const secondOperand = scanClassSetOperand(); + if (isCharacterComplement && mayContainStrings) { + error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, secondStart, pos - secondStart); + } + expressionMayContainStrings ||= mayContainStrings; + if (!secondOperand) { + error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, secondStart, pos - secondStart); + break; + } + if (!operand) { + break; + } + const minCharacterValue = codePointAt(operand, 0); + const maxCharacterValue = codePointAt(secondOperand, 0); + if ( + operand.length === charSize(minCharacterValue) && + secondOperand.length === charSize(maxCharacterValue) && + minCharacterValue > maxCharacterValue + ) { + error(Diagnostics.Range_out_of_order_in_character_class, start, pos - start); + } + } + break; + case CharacterCodes.ampersand: + start = pos; + pos++; + if (charCodeChecked(pos) === CharacterCodes.ampersand) { + pos++; + error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); + if (charCodeChecked(pos) === CharacterCodes.ampersand) { + error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); + pos++; + } + } + else { + error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos - 1, 1, String.fromCharCode(ch)); + } + operand = text.slice(start, pos); + continue; } - let expressionMayContainStrings = false; - let ch = text.charCodeAt(pos); - if (isClassContentExit(ch)) { - return; + if (isClassContentExit(charCodeChecked(pos))) { + break; } - let start = pos; - let oprand!: string; - switch (text.slice(pos, pos + 2)) { + start = pos; + switch (text.slice(pos, pos + 2)) { // TODO: don't use slice case "--": case "&&": - error(Diagnostics.Expected_a_class_set_oprand); - mayContainStrings = false; + error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos, 2); + pos += 2; + operand = text.slice(start, pos); break; default: - oprand = scanClassSetOprand(); + operand = scanClassSetOperand(); break; } - switch (text.charCodeAt(pos)) { + } + mayContainStrings = !isCharacterComplement && expressionMayContainStrings; + } + + function scanClassSetSubExpression(expressionType: ClassSetExpressionType) { + let expressionMayContainStrings = mayContainStrings; + while (true) { + let ch = charCodeChecked(pos); + if (isClassContentExit(ch)) { + break; + } + // Provide user-friendly diagnostic messages + switch (ch) { case CharacterCodes.minus: - if (text.charCodeAt(pos + 1) === CharacterCodes.minus) { - if (isCharacterComplement && mayContainStrings) { - error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, start, pos - start); + pos++; + if (charCodeChecked(pos) === CharacterCodes.minus) { + pos++; + if (expressionType !== ClassSetExpressionType.ClassSubtraction) { + error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); } - expressionMayContainStrings = mayContainStrings; - scanClassSetSubExpression(ClassSetExpressionType.ClassSubtraction); - mayContainStrings = !isCharacterComplement && expressionMayContainStrings; - return; + } + else { + error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 1, 1); } break; case CharacterCodes.ampersand: - if (text.charCodeAt(pos + 1) === CharacterCodes.ampersand) { - scanClassSetSubExpression(ClassSetExpressionType.ClassIntersection); - if (isCharacterComplement && mayContainStrings) { - error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, start, pos - start); + pos++; + if (charCodeChecked(pos) === CharacterCodes.ampersand) { + pos++; + if (expressionType !== ClassSetExpressionType.ClassIntersection) { + error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); + } + if (charCodeChecked(pos) === CharacterCodes.ampersand) { + error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); + pos++; } - expressionMayContainStrings = mayContainStrings; - mayContainStrings = !isCharacterComplement && expressionMayContainStrings; - return; } else { - error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); + error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos - 1, 1, String.fromCharCode(ch)); } break; default: - if (isCharacterComplement && mayContainStrings) { - error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, start, pos - start); + switch (expressionType) { + case ClassSetExpressionType.ClassSubtraction: + error(Diagnostics._0_expected, pos, 0, "--"); + break; + case ClassSetExpressionType.ClassIntersection: + error(Diagnostics._0_expected, pos, 0, "&&"); + break; + default: + break; } - expressionMayContainStrings = mayContainStrings; break; } - while (pos < end) { - ch = text.charCodeAt(pos); - switch (ch) { - case CharacterCodes.minus: - pos++; - ch = text.charCodeAt(pos); - if (isClassContentExit(ch)) { - mayContainStrings = !isCharacterComplement && expressionMayContainStrings; - return; - } - if (ch === CharacterCodes.minus) { - pos++; - error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); - start = pos - 2; - oprand = text.slice(start, pos); - continue; - } - else { - if (!oprand) { - error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, start, pos - 1 - start); - } - const secondStart = pos; - const secondOprand = scanClassSetOprand(); - if (isCharacterComplement && mayContainStrings) { - error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, secondStart, pos - secondStart); - } - expressionMayContainStrings ||= mayContainStrings; - if (!secondOprand) { - error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, secondStart, pos - secondStart); - break; - } - if (!oprand) { - break; - } - const minCharacterValue = codePointAt(oprand, 0); - const maxCharacterValue = codePointAt(secondOprand, 0); - if ( - oprand.length === charSize(minCharacterValue) && - secondOprand.length === charSize(maxCharacterValue) && - minCharacterValue > maxCharacterValue - ) { - error(Diagnostics.Range_out_of_order_in_character_class, start, pos - start); - } - } - break; - case CharacterCodes.ampersand: - start = pos; - pos++; - if (text.charCodeAt(pos) === CharacterCodes.ampersand) { - pos++; - error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); - if (text.charCodeAt(pos) === CharacterCodes.ampersand) { - error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); - pos++; - } - } - else { - error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos - 1, 1, String.fromCharCode(ch)); - } - oprand = text.slice(start, pos); - continue; - } - if (isClassContentExit(text.charCodeAt(pos))) { - break; - } - start = pos; - switch (text.slice(pos, pos + 2)) { - case "--": - case "&&": - error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos, 2); - pos += 2; - oprand = text.slice(start, pos); - break; - default: - oprand = scanClassSetOprand(); - break; - } + ch = charCodeChecked(pos); + if (isClassContentExit(ch)) { + error(Diagnostics.Expected_a_class_set_operand); + break; } - mayContainStrings = !isCharacterComplement && expressionMayContainStrings; + scanClassSetOperand(); + // Used only if expressionType is Intersection + expressionMayContainStrings &&= mayContainStrings; } + mayContainStrings = expressionMayContainStrings; + } - function scanClassSetSubExpression(expressionType: ClassSetExpressionType) { - let expressionMayContainStrings = mayContainStrings; - while (pos < end) { - let ch = text.charCodeAt(pos); - if (isClassContentExit(ch)) { - break; + // ClassSetOperand ::= + // | '[' ClassSetExpression ']' + // | '\' CharacterClassEscape + // | '\q{' ClassStringDisjunctionContents '}' + // | ClassSetCharacter + function scanClassSetOperand(): string { + mayContainStrings = false; + switch (charCodeChecked(pos)) { + case CharacterCodes.EOF: + return ""; + case CharacterCodes.openBracket: + pos++; + scanClassSetExpression(); + scanExpectedChar(CharacterCodes.closeBracket); + return ""; + case CharacterCodes.backslash: + pos++; + if (scanCharacterClassEscape()) { + return ""; } - // Provide user-friendly diagnostic messages - switch (ch) { - case CharacterCodes.minus: - pos++; - if (text.charCodeAt(pos) === CharacterCodes.minus) { - pos++; - if (expressionType !== ClassSetExpressionType.ClassSubtraction) { - error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); - } - } - else { - error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 1, 1); - } - break; - case CharacterCodes.ampersand: + else if (charCodeChecked(pos) === CharacterCodes.q) { + pos++; + if (charCodeChecked(pos) === CharacterCodes.openBrace) { pos++; - if (text.charCodeAt(pos) === CharacterCodes.ampersand) { - pos++; - if (expressionType !== ClassSetExpressionType.ClassIntersection) { - error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); - } - if (text.charCodeAt(pos) === CharacterCodes.ampersand) { - error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); - pos++; - } - } - else { - error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos - 1, 1, String.fromCharCode(ch)); - } - break; - default: - switch (expressionType) { - case ClassSetExpressionType.ClassSubtraction: - error(Diagnostics._0_expected, pos, 0, "--"); - break; - case ClassSetExpressionType.ClassIntersection: - error(Diagnostics._0_expected, pos, 0, "&&"); - break; - default: - break; - } - break; + scanClassStringDisjunctionContents(); + scanExpectedChar(CharacterCodes.closeBrace); + return ""; + } + else { + error(Diagnostics.q_must_be_followed_by_string_alternatives_enclosed_in_braces, pos - 2, 2); + return "q"; + } } - ch = text.charCodeAt(pos); - if (isClassContentExit(ch)) { - error(Diagnostics.Expected_a_class_set_oprand); + pos--; + // falls through + default: + return scanClassSetCharacter(); + } + } + + // ClassStringDisjunctionContents ::= ClassSetCharacter* ('|' ClassSetCharacter*)* + function scanClassStringDisjunctionContents() { + Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.openBrace); + let characterCount = 0; + while (true) { + const ch = charCodeChecked(pos); + switch (ch) { + case CharacterCodes.EOF: + return; + case CharacterCodes.closeBrace: + if (characterCount !== 1) { + mayContainStrings = true; + } + return; + case CharacterCodes.bar: + if (characterCount !== 1) { + mayContainStrings = true; + } + pos++; + start = pos; + characterCount = 0; + break; + default: + scanClassSetCharacter(); + characterCount++; break; - } - scanClassSetOprand(); - // Used only if expressionType is Intersection - expressionMayContainStrings &&= mayContainStrings; } - mayContainStrings = expressionMayContainStrings; } + } - // ClassSetOperand ::= - // | '[' ClassSetExpression ']' - // | '\' CharacterClassEscape - // | '\q{' ClassStringDisjunctionContents '}' - // | ClassSetCharacter - function scanClassSetOprand(): string { - mayContainStrings = false; - switch (text.charCodeAt(pos)) { - case CharacterCodes.openBracket: + // ClassSetCharacter ::= + // | SourceCharacter -- ClassSetSyntaxCharacter -- ClassSetReservedDoublePunctuator + // | '\' (CharacterEscape | ClassSetReservedPunctuator | 'b') + function scanClassSetCharacter(): string { + const ch = charCodeChecked(pos); + if (ch === CharacterCodes.EOF) { + // no need to report an error, the initial scan will already have reported that the RegExp is unterminated. + return ""; + } + if (ch === CharacterCodes.backslash) { + pos++; + const ch = charCodeChecked(pos); + switch (ch) { + case CharacterCodes.b: pos++; - scanClassSetExpression(); - scanExpectedChar(CharacterCodes.closeBracket); - return ""; - case CharacterCodes.backslash: + return "\b"; + case CharacterCodes.ampersand: + case CharacterCodes.minus: + case CharacterCodes.exclamation: + case CharacterCodes.hash: + case CharacterCodes.percent: + case CharacterCodes.comma: + case CharacterCodes.colon: + case CharacterCodes.semicolon: + case CharacterCodes.lessThan: + case CharacterCodes.equals: + case CharacterCodes.greaterThan: + case CharacterCodes.at: + case CharacterCodes.backtick: + case CharacterCodes.tilde: pos++; - if (scanCharacterClassEscape()) { - return ""; - } - else if (text.charCodeAt(pos) === CharacterCodes.q) { - pos++; - if (text.charCodeAt(pos) === CharacterCodes.openBrace) { - pos++; - scanClassStringDisjunctionContents(); - scanExpectedChar(CharacterCodes.closeBrace); - return ""; - } - else { - error(Diagnostics.q_must_be_followed_by_string_alternatives_enclosed_in_braces, pos - 2, 2); - return "q"; - } - } - pos--; - // falls through + return String.fromCharCode(ch); default: - return scanClassSetCharacter(); + return scanCharacterEscape(/*atomEscape*/ false); } } - - // ClassStringDisjunctionContents ::= ClassSetCharacter* ('|' ClassSetCharacter*)* - function scanClassStringDisjunctionContents() { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.openBrace); - let characterCount = 0; - while (pos < end) { - const ch = text.charCodeAt(pos); - switch (ch) { - case CharacterCodes.closeBrace: - if (characterCount !== 1) { - mayContainStrings = true; - } - return; - case CharacterCodes.bar: - if (characterCount !== 1) { - mayContainStrings = true; - } - pos++; - start = pos; - characterCount = 0; - break; - default: - scanClassSetCharacter(); - characterCount++; - break; - } + else if (ch === charCodeChecked(pos + 1)) { + switch (ch) { + case CharacterCodes.ampersand: + case CharacterCodes.exclamation: + case CharacterCodes.hash: + case CharacterCodes.percent: + case CharacterCodes.asterisk: + case CharacterCodes.plus: + case CharacterCodes.comma: + case CharacterCodes.dot: + case CharacterCodes.colon: + case CharacterCodes.semicolon: + case CharacterCodes.lessThan: + case CharacterCodes.equals: + case CharacterCodes.greaterThan: + case CharacterCodes.question: + case CharacterCodes.at: + case CharacterCodes.backtick: + case CharacterCodes.tilde: + error(Diagnostics.A_character_class_must_not_contain_a_reserved_double_punctuator_Did_you_mean_to_escape_it_with_backslash, pos, 2); + pos += 2; + return text.substring(pos - 2, pos); } } - - // ClassSetCharacter ::= - // | SourceCharacter -- ClassSetSyntaxCharacter -- ClassSetReservedDoublePunctuator - // | '\' (CharacterEscape | ClassSetReservedPunctuator | 'b') - function scanClassSetCharacter(): string { - const ch = text.charCodeAt(pos); - if (ch === CharacterCodes.backslash) { + switch (ch) { + case CharacterCodes.slash: + case CharacterCodes.openParen: + case CharacterCodes.closeParen: + case CharacterCodes.openBracket: + case CharacterCodes.closeBracket: + case CharacterCodes.openBrace: + case CharacterCodes.closeBrace: + case CharacterCodes.minus: + case CharacterCodes.bar: + error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); pos++; - const ch = text.charCodeAt(pos); - switch (ch) { - case CharacterCodes.b: - pos++; - return "\b"; - case CharacterCodes.ampersand: - case CharacterCodes.minus: - case CharacterCodes.exclamation: - case CharacterCodes.hash: - case CharacterCodes.percent: - case CharacterCodes.comma: - case CharacterCodes.colon: - case CharacterCodes.semicolon: - case CharacterCodes.lessThan: - case CharacterCodes.equals: - case CharacterCodes.greaterThan: - case CharacterCodes.at: - case CharacterCodes.backtick: - case CharacterCodes.tilde: - pos++; - return String.fromCharCode(ch); - default: - return scanCharacterEscape(/*atomEscape*/ false); - } - } - else if (ch === text.charCodeAt(pos + 1)) { - switch (ch) { - case CharacterCodes.ampersand: - case CharacterCodes.exclamation: - case CharacterCodes.hash: - case CharacterCodes.percent: - case CharacterCodes.asterisk: - case CharacterCodes.plus: - case CharacterCodes.comma: - case CharacterCodes.dot: - case CharacterCodes.colon: - case CharacterCodes.semicolon: - case CharacterCodes.lessThan: - case CharacterCodes.equals: - case CharacterCodes.greaterThan: - case CharacterCodes.question: - case CharacterCodes.at: - case CharacterCodes.backtick: - case CharacterCodes.tilde: - error(Diagnostics.A_character_class_must_not_contain_a_reserved_double_punctuator_Did_you_mean_to_escape_it_with_backslash, pos, 2); - pos += 2; - return text.substring(pos - 2, pos); - } - } + return String.fromCharCode(ch); + } + return scanSourceCharacter(); + } + + // ClassAtom ::= + // | SourceCharacter but not one of '\' or ']' + // | '\' ClassEscape + // ClassEscape ::= + // | 'b' + // | '-' + // | CharacterClassEscape + // | CharacterEscape + function scanClassAtom(): string { + if (charCodeChecked(pos) === CharacterCodes.backslash) { + pos++; + const ch = charCodeChecked(pos); switch (ch) { - case CharacterCodes.slash: - case CharacterCodes.openParen: - case CharacterCodes.closeParen: - case CharacterCodes.openBracket: - case CharacterCodes.closeBracket: - case CharacterCodes.openBrace: - case CharacterCodes.closeBrace: + case CharacterCodes.b: + pos++; + return "\b"; case CharacterCodes.minus: - case CharacterCodes.bar: - error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); pos++; return String.fromCharCode(ch); + default: + if (scanCharacterClassEscape()) { + return ""; + } + return scanCharacterEscape(/*atomEscape*/ false); } + } + else { return scanSourceCharacter(); } + } - // ClassAtom ::= - // | SourceCharacter but not one of '\' or ']' - // | '\' ClassEscape - // ClassEscape ::= - // | 'b' - // | '-' - // | CharacterClassEscape - // | CharacterEscape - function scanClassAtom(): string { - if (text.charCodeAt(pos) === CharacterCodes.backslash) { + // CharacterClassEscape ::= + // | 'd' | 'D' | 's' | 'S' | 'w' | 'W' + // | [+UnicodeMode] ('P' | 'p') '{' UnicodePropertyValueExpression '}' + function scanCharacterClassEscape(): boolean { + Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.backslash); + let isCharacterComplement = false; + const start = pos - 1; + const ch = charCodeChecked(pos); + switch (ch) { + case CharacterCodes.d: + case CharacterCodes.D: + case CharacterCodes.s: + case CharacterCodes.S: + case CharacterCodes.w: + case CharacterCodes.W: pos++; - const ch = text.charCodeAt(pos); - switch (ch) { - case CharacterCodes.b: - pos++; - return "\b"; - case CharacterCodes.minus: - pos++; - return String.fromCharCode(ch); - default: - if (scanCharacterClassEscape()) { - return ""; - } - return scanCharacterEscape(/*atomEscape*/ false); - } - } - else { - return scanSourceCharacter(); - } - } - - // CharacterClassEscape ::= - // | 'd' | 'D' | 's' | 'S' | 'w' | 'W' - // | [+UnicodeMode] ('P' | 'p') '{' UnicodePropertyValueExpression '}' - function scanCharacterClassEscape(): boolean { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash); - let isCharacterComplement = false; - const start = pos - 1; - const ch = text.charCodeAt(pos); - switch (ch) { - case CharacterCodes.d: - case CharacterCodes.D: - case CharacterCodes.s: - case CharacterCodes.S: - case CharacterCodes.w: - case CharacterCodes.W: - pos++; - return true; - case CharacterCodes.P: - isCharacterComplement = true; - // falls through - case CharacterCodes.p: + return true; + case CharacterCodes.P: + isCharacterComplement = true; + // falls through + case CharacterCodes.p: + pos++; + if (charCodeChecked(pos) === CharacterCodes.openBrace) { pos++; - if (text.charCodeAt(pos) === CharacterCodes.openBrace) { - pos++; - const propertyNameOrValueStart = pos; - const propertyNameOrValue = scanWordCharacters(); - if (text.charCodeAt(pos) === CharacterCodes.equals) { - const propertyName = nonBinaryUnicodeProperties.get(propertyNameOrValue); - if (pos === propertyNameOrValueStart) { - error(Diagnostics.Expected_a_Unicode_property_name); - } - else if (propertyName === undefined) { - error(Diagnostics.Unknown_Unicode_property_name, propertyNameOrValueStart, pos - propertyNameOrValueStart); - const suggestion = getSpellingSuggestion(propertyNameOrValue, nonBinaryUnicodeProperties.keys(), identity); - if (suggestion) { - error(Diagnostics.Did_you_mean_0, propertyNameOrValueStart, pos - propertyNameOrValueStart, suggestion); - } - } - pos++; - const propertyValueStart = pos; - const propertyValue = scanWordCharacters(); - if (pos === propertyValueStart) { - error(Diagnostics.Expected_a_Unicode_property_value); + const propertyNameOrValueStart = pos; + const propertyNameOrValue = scanWordCharacters(); + if (charCodeChecked(pos) === CharacterCodes.equals) { + const propertyName = nonBinaryUnicodeProperties.get(propertyNameOrValue); + if (pos === propertyNameOrValueStart) { + error(Diagnostics.Expected_a_Unicode_property_name); + } + else if (propertyName === undefined) { + error(Diagnostics.Unknown_Unicode_property_name, propertyNameOrValueStart, pos - propertyNameOrValueStart); + const suggestion = getSpellingSuggestion(propertyNameOrValue, nonBinaryUnicodeProperties.keys(), identity); + if (suggestion) { + error(Diagnostics.Did_you_mean_0, propertyNameOrValueStart, pos - propertyNameOrValueStart, suggestion); } - else if (propertyName !== undefined && !valuesOfNonBinaryUnicodeProperties[propertyName].has(propertyValue)) { - error(Diagnostics.Unknown_Unicode_property_value, propertyValueStart, pos - propertyValueStart); - const suggestion = getSpellingSuggestion(propertyValue, valuesOfNonBinaryUnicodeProperties[propertyName], identity); - if (suggestion) { - error(Diagnostics.Did_you_mean_0, propertyValueStart, pos - propertyValueStart, suggestion); - } + } + pos++; + const propertyValueStart = pos; + const propertyValue = scanWordCharacters(); + if (pos === propertyValueStart) { + error(Diagnostics.Expected_a_Unicode_property_value); + } + else if (propertyName !== undefined && !valuesOfNonBinaryUnicodeProperties[propertyName].has(propertyValue)) { + error(Diagnostics.Unknown_Unicode_property_value, propertyValueStart, pos - propertyValueStart); + const suggestion = getSpellingSuggestion(propertyValue, valuesOfNonBinaryUnicodeProperties[propertyName], identity); + if (suggestion) { + error(Diagnostics.Did_you_mean_0, propertyValueStart, pos - propertyValueStart, suggestion); } } - else { - if (pos === propertyNameOrValueStart) { - error(Diagnostics.Expected_a_Unicode_property_name_or_value); + } + else { + if (pos === propertyNameOrValueStart) { + error(Diagnostics.Expected_a_Unicode_property_name_or_value); + } + else if (binaryUnicodePropertiesOfStrings.has(propertyNameOrValue)) { + if (!unicodeSetsMode) { + error(Diagnostics.Any_Unicode_property_that_would_possibly_match_more_than_a_single_character_is_only_available_when_the_Unicode_Sets_v_flag_is_set, propertyNameOrValueStart, pos - propertyNameOrValueStart); } - else if (binaryUnicodePropertiesOfStrings.has(propertyNameOrValue)) { - if (!unicodeSetsMode) { - error(Diagnostics.Any_Unicode_property_that_would_possibly_match_more_than_a_single_character_is_only_available_when_the_Unicode_Sets_v_flag_is_set, propertyNameOrValueStart, pos - propertyNameOrValueStart); - } - else if (isCharacterComplement) { - error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, propertyNameOrValueStart, pos - propertyNameOrValueStart); - } - else { - mayContainStrings = true; - } + else if (isCharacterComplement) { + error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, propertyNameOrValueStart, pos - propertyNameOrValueStart); } - else if (!valuesOfNonBinaryUnicodeProperties.General_Category.has(propertyNameOrValue) && !binaryUnicodeProperties.has(propertyNameOrValue)) { - error(Diagnostics.Unknown_Unicode_property_name_or_value, propertyNameOrValueStart, pos - propertyNameOrValueStart); - const suggestion = getSpellingSuggestion(propertyNameOrValue, [...valuesOfNonBinaryUnicodeProperties.General_Category, ...binaryUnicodeProperties, ...binaryUnicodePropertiesOfStrings], identity); - if (suggestion) { - error(Diagnostics.Did_you_mean_0, propertyNameOrValueStart, pos - propertyNameOrValueStart, suggestion); - } + else { + mayContainStrings = true; } } - scanExpectedChar(CharacterCodes.closeBrace); - if (!unicodeMode) { - error(Diagnostics.Unicode_property_value_expressions_are_only_available_when_the_Unicode_u_flag_or_the_Unicode_Sets_v_flag_is_set, start, pos - start); + else if (!valuesOfNonBinaryUnicodeProperties.General_Category.has(propertyNameOrValue) && !binaryUnicodeProperties.has(propertyNameOrValue)) { + error(Diagnostics.Unknown_Unicode_property_name_or_value, propertyNameOrValueStart, pos - propertyNameOrValueStart); + const suggestion = getSpellingSuggestion(propertyNameOrValue, [...valuesOfNonBinaryUnicodeProperties.General_Category, ...binaryUnicodeProperties, ...binaryUnicodePropertiesOfStrings], identity); + if (suggestion) { + error(Diagnostics.Did_you_mean_0, propertyNameOrValueStart, pos - propertyNameOrValueStart, suggestion); + } } } - else if (unicodeMode) { - error(Diagnostics._0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces, pos - 2, 2, String.fromCharCode(ch)); + scanExpectedChar(CharacterCodes.closeBrace); + if (!unicodeMode) { + error(Diagnostics.Unicode_property_value_expressions_are_only_available_when_the_Unicode_u_flag_or_the_Unicode_Sets_v_flag_is_set, start, pos - start); } - return true; - } - return false; + } + else if (unicodeMode) { + error(Diagnostics._0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces, pos - 2, 2, String.fromCharCode(ch)); + } + return true; } + return false; + } - function scanWordCharacters(): string { - let value = ""; - while (pos < end) { - const ch = text.charCodeAt(pos); - if (!isWordCharacter(ch)) { - break; - } - value += String.fromCharCode(ch); - pos++; + function scanWordCharacters(): string { + let value = ""; + while (true) { + const ch = charCodeChecked(pos); + if (ch === CharacterCodes.EOF || !isWordCharacter(ch)) { + break; } - return value; + value += String.fromCharCode(ch); + pos++; } + return value; + } - function scanSourceCharacter(): string { - const size = unicodeMode ? charSize(codePointAt(text, pos)) : 1; - pos += size; - return text.substring(pos - size, pos); - } + function scanSourceCharacter(): string { + const size = unicodeMode ? charSize(charCodeChecked(pos)) : 1; + pos += size; + return size > 0 ? text.substring(pos - size, pos) : ""; + } - function scanExpectedChar(ch: CharacterCodes) { - if (text.charCodeAt(pos) === ch) { - pos++; - } - else { - error(Diagnostics._0_expected, pos, 0, String.fromCharCode(ch)); - } + function scanExpectedChar(ch: CharacterCodes) { + if (charCodeChecked(pos) === ch) { + pos++; } + else { + error(Diagnostics._0_expected, pos, 0, String.fromCharCode(ch)); + } + } - scanDisjunction(/*isInGroup*/ false); + scanDisjunction(/*isInGroup*/ false); - forEach(groupNameReferences, reference => { - if (!groupSpecifiers.has(reference.name)) { - error(Diagnostics.There_is_no_capturing_group_named_0_in_this_regular_expression, reference.pos, reference.end - reference.pos, reference.name); + forEach(groupNameReferences, reference => { + if (!groupSpecifiers?.has(reference.name)) { + error(Diagnostics.There_is_no_capturing_group_named_0_in_this_regular_expression, reference.pos, reference.end - reference.pos, reference.name); + } + }); + forEach(decimalEscapes, escape => { + // in AnnexB, if a DecimalEscape is greater than the number of capturing groups then it is treated as + // either a LegacyOctalEscapeSequence or IdentityEscape + if (!annexB && escape.value > numberOfCapturingGroups) { + if (numberOfCapturingGroups) { + error(Diagnostics.This_backreference_refers_to_a_group_that_does_not_exist_There_are_only_0_capturing_groups_in_this_regular_expression, escape.pos, escape.end - escape.pos, numberOfCapturingGroups); } - }); - forEach(decimalEscapes, escape => { - // in AnnexB, if a DecimalEscape is greater than the number of capturing groups then it is treated as - // either a LegacyOctalEscapeSequence or IdentityEscape - if (!annexB && escape.value > numberOfCapturingGroups) { - if (numberOfCapturingGroups) { - error(Diagnostics.This_backreference_refers_to_a_group_that_does_not_exist_There_are_only_0_capturing_groups_in_this_regular_expression, escape.pos, escape.end - escape.pos, numberOfCapturingGroups); - } - else { - error(Diagnostics.This_backreference_is_invalid_because_the_containing_regular_expression_contains_no_capturing_groups, escape.pos, escape.end - escape.pos); - } + else { + error(Diagnostics.This_backreference_is_invalid_because_the_containing_regular_expression_contains_no_capturing_groups, escape.pos, escape.end - escape.pos); } - }); - } - - function checkRegularExpressionFlagAvailable(flag: RegularExpressionFlags, pos: number) { - const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag) as ScriptTarget | undefined; - if (availableFrom && languageVersion < availableFrom) { - error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, 1, getNameOfScriptTarget(availableFrom)); } + }); + } + + function checkRegularExpressionFlagAvailable(flag: RegularExpressionFlags, pos: number) { + const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag) as ScriptTarget | undefined; + if (availableFrom && languageVersion < availableFrom) { + error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, 1, getNameOfScriptTarget(availableFrom)); } } @@ -3531,9 +3585,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return token = SyntaxKind.EndOfFileToken; } - let char = text.charCodeAt(pos); + let char = charCodeUnchecked(pos); if (char === CharacterCodes.lessThan) { - if (text.charCodeAt(pos + 1) === CharacterCodes.slash) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.slash) { pos += 2; return token = SyntaxKind.LessThanSlashToken; } @@ -3553,7 +3607,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // firstNonWhitespace = 0 to indicate that we want leading whitespace, while (pos < end) { - char = text.charCodeAt(pos); + char = charCodeUnchecked(pos); if (char === CharacterCodes.openBrace) { break; } @@ -3606,7 +3660,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // Do note that this means that `scanJsxIdentifier` effectively _mutates_ the visible token without advancing to a new token // Any caller should be expecting this behavior and should only read the pos or token value after calling it. while (pos < end) { - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); if (ch === CharacterCodes.minus) { tokenValue += "-"; pos++; @@ -3626,7 +3680,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanJsxAttributeValue(): SyntaxKind { fullStartPos = pos; - switch (text.charCodeAt(pos)) { + switch (charCodeUnchecked(pos)) { case CharacterCodes.doubleQuote: case CharacterCodes.singleQuote: tokenValue = scanString(/*jsxAttributeString*/ true); @@ -3648,15 +3702,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean if (pos >= end) { return token = SyntaxKind.EndOfFileToken; } - for (let ch = text.charCodeAt(pos); pos < end && (!isLineBreak(ch) && ch !== CharacterCodes.backtick); ch = codePointAt(text, ++pos)) { + for (let ch = charCodeUnchecked(pos); pos < end && (!isLineBreak(ch) && ch !== CharacterCodes.backtick); ch = codePointUnchecked(++pos)) { if (!inBackticks) { if (ch === CharacterCodes.openBrace) { break; } else if ( ch === CharacterCodes.at - && pos - 1 >= 0 && isWhiteSpaceSingleLine(text.charCodeAt(pos - 1)) - && !(pos + 1 < end && isWhiteSpaceLike(text.charCodeAt(pos + 1))) + && pos - 1 >= 0 && isWhiteSpaceSingleLine(charCodeUnchecked(pos - 1)) + && !(pos + 1 < end && isWhiteSpaceLike(charCodeUnchecked(pos + 1))) ) { // @ doesn't start a new tag inside ``, and elsewhere, only after whitespace and before non-whitespace break; @@ -3677,21 +3731,21 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return token = SyntaxKind.EndOfFileToken; } - const ch = codePointAt(text, pos); + const ch = codePointUnchecked(pos); pos += charSize(ch); switch (ch) { case CharacterCodes.tab: case CharacterCodes.verticalTab: case CharacterCodes.formFeed: case CharacterCodes.space: - while (pos < end && isWhiteSpaceSingleLine(text.charCodeAt(pos))) { + while (pos < end && isWhiteSpaceSingleLine(charCodeUnchecked(pos))) { pos++; } return token = SyntaxKind.WhitespaceTrivia; case CharacterCodes.at: return token = SyntaxKind.AtToken; case CharacterCodes.carriageReturn: - if (text.charCodeAt(pos) === CharacterCodes.lineFeed) { + if (charCodeUnchecked(pos) === CharacterCodes.lineFeed) { pos++; } // falls through @@ -3747,7 +3801,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean if (isIdentifierStart(ch, languageVersion)) { let char = ch; - while (pos < end && isIdentifierPart(char = codePointAt(text, pos), languageVersion) || text.charCodeAt(pos) === CharacterCodes.minus) pos += charSize(char); + while (pos < end && isIdentifierPart(char = codePointUnchecked(pos), languageVersion) || char === CharacterCodes.minus) pos += charSize(char); tokenValue = text.substring(tokenStart, pos); if (char === CharacterCodes.backslash) { tokenValue += scanIdentifierParts(); @@ -3872,6 +3926,9 @@ function charSize(ch: number) { if (ch >= 0x10000) { return 2; } + if (ch === CharacterCodes.EOF) { + return 0; + } return 1; } diff --git a/src/compiler/types.ts b/src/compiler/types.ts index 76f8193ae127d..85bcc8f69e2e5 100644 --- a/src/compiler/types.ts +++ b/src/compiler/types.ts @@ -7612,6 +7612,7 @@ export type CommandLineOption = CommandLineOptionOfCustomType | CommandLineOptio // dprint-ignore /** @internal */ export const enum CharacterCodes { + EOF = -1, nullCharacter = 0, maxAsciiCharacter = 0x7F, diff --git a/tests/baselines/reference/regularExpressionScanning(target=es2015).errors.txt b/tests/baselines/reference/regularExpressionScanning(target=es2015).errors.txt index c2a61dc5f1dcc..7cf62fcaba4a2 100644 --- a/tests/baselines/reference/regularExpressionScanning(target=es2015).errors.txt +++ b/tests/baselines/reference/regularExpressionScanning(target=es2015).errors.txt @@ -157,8 +157,8 @@ regularExpressionScanning.ts(37,61): error TS1508: Unexpected '}'. Did you mean regularExpressionScanning.ts(37,63): error TS1517: Range out of order in character class. regularExpressionScanning.ts(37,76): error TS1535: This character cannot be escaped in a regular expression. regularExpressionScanning.ts(38,8): error TS1005: '--' expected. -regularExpressionScanning.ts(38,9): error TS1520: Expected a class set oprand. -regularExpressionScanning.ts(38,11): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,9): error TS1520: Expected a class set operand. +regularExpressionScanning.ts(38,11): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,12): error TS1005: '--' expected. regularExpressionScanning.ts(38,15): error TS1522: A character class must not contain a reserved double punctuator. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,20): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. @@ -166,15 +166,15 @@ regularExpressionScanning.ts(38,28): error TS1519: Operators must not be mixed w regularExpressionScanning.ts(38,40): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. regularExpressionScanning.ts(38,47): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. regularExpressionScanning.ts(38,49): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. -regularExpressionScanning.ts(38,50): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,50): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,55): error TS1511: '\q' is only available inside character class. regularExpressionScanning.ts(38,57): error TS1508: Unexpected '{'. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,61): error TS1508: Unexpected '}'. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,66): error TS1508: Unexpected '-'. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,67): error TS1005: '--' expected. -regularExpressionScanning.ts(38,70): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,70): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,75): error TS1508: Unexpected '&'. Did you mean to escape it with backslash? -regularExpressionScanning.ts(38,85): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,85): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,87): error TS1501: This regular expression flag is only available when targeting 'esnext' or later. regularExpressionScanning.ts(39,56): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. regularExpressionScanning.ts(39,67): error TS1005: '&&' expected. @@ -561,9 +561,9 @@ regularExpressionScanning.ts(47,101): error TS1501: This regular expression flag !!! error TS1005: '--' expected. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. !!! error TS1005: '--' expected. ~~ @@ -579,7 +579,7 @@ regularExpressionScanning.ts(47,101): error TS1501: This regular expression flag ~ !!! error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. ~~ !!! error TS1511: '\q' is only available inside character class. ~ @@ -591,11 +591,11 @@ regularExpressionScanning.ts(47,101): error TS1501: This regular expression flag !!! error TS1005: '--' expected. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. ~ !!! error TS1508: Unexpected '&'. Did you mean to escape it with backslash? -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. ~ !!! error TS1501: This regular expression flag is only available when targeting 'esnext' or later. /[[^\P{Decimal_Number}&&[0-9]]&&\p{L}&&\p{ID_Continue}--\p{ASCII}\p{CWCF}]/v, diff --git a/tests/baselines/reference/regularExpressionScanning(target=es5).errors.txt b/tests/baselines/reference/regularExpressionScanning(target=es5).errors.txt index 83b45f8443492..1db06a65a8c1e 100644 --- a/tests/baselines/reference/regularExpressionScanning(target=es5).errors.txt +++ b/tests/baselines/reference/regularExpressionScanning(target=es5).errors.txt @@ -164,8 +164,8 @@ regularExpressionScanning.ts(37,63): error TS1517: Range out of order in charact regularExpressionScanning.ts(37,76): error TS1535: This character cannot be escaped in a regular expression. regularExpressionScanning.ts(37,87): error TS1501: This regular expression flag is only available when targeting 'es6' or later. regularExpressionScanning.ts(38,8): error TS1005: '--' expected. -regularExpressionScanning.ts(38,9): error TS1520: Expected a class set oprand. -regularExpressionScanning.ts(38,11): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,9): error TS1520: Expected a class set operand. +regularExpressionScanning.ts(38,11): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,12): error TS1005: '--' expected. regularExpressionScanning.ts(38,15): error TS1522: A character class must not contain a reserved double punctuator. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,20): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. @@ -173,15 +173,15 @@ regularExpressionScanning.ts(38,28): error TS1519: Operators must not be mixed w regularExpressionScanning.ts(38,40): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. regularExpressionScanning.ts(38,47): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. regularExpressionScanning.ts(38,49): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. -regularExpressionScanning.ts(38,50): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,50): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,55): error TS1511: '\q' is only available inside character class. regularExpressionScanning.ts(38,57): error TS1508: Unexpected '{'. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,61): error TS1508: Unexpected '}'. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,66): error TS1508: Unexpected '-'. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,67): error TS1005: '--' expected. -regularExpressionScanning.ts(38,70): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,70): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,75): error TS1508: Unexpected '&'. Did you mean to escape it with backslash? -regularExpressionScanning.ts(38,85): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,85): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,87): error TS1501: This regular expression flag is only available when targeting 'esnext' or later. regularExpressionScanning.ts(39,56): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. regularExpressionScanning.ts(39,67): error TS1005: '&&' expected. @@ -582,9 +582,9 @@ regularExpressionScanning.ts(47,101): error TS1501: This regular expression flag !!! error TS1005: '--' expected. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. !!! error TS1005: '--' expected. ~~ @@ -600,7 +600,7 @@ regularExpressionScanning.ts(47,101): error TS1501: This regular expression flag ~ !!! error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. ~~ !!! error TS1511: '\q' is only available inside character class. ~ @@ -612,11 +612,11 @@ regularExpressionScanning.ts(47,101): error TS1501: This regular expression flag !!! error TS1005: '--' expected. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. ~ !!! error TS1508: Unexpected '&'. Did you mean to escape it with backslash? -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. ~ !!! error TS1501: This regular expression flag is only available when targeting 'esnext' or later. /[[^\P{Decimal_Number}&&[0-9]]&&\p{L}&&\p{ID_Continue}--\p{ASCII}\p{CWCF}]/v, diff --git a/tests/baselines/reference/regularExpressionScanning(target=esnext).errors.txt b/tests/baselines/reference/regularExpressionScanning(target=esnext).errors.txt index 3fd6f79843695..9b641966e016a 100644 --- a/tests/baselines/reference/regularExpressionScanning(target=esnext).errors.txt +++ b/tests/baselines/reference/regularExpressionScanning(target=esnext).errors.txt @@ -141,8 +141,8 @@ regularExpressionScanning.ts(37,61): error TS1508: Unexpected '}'. Did you mean regularExpressionScanning.ts(37,63): error TS1517: Range out of order in character class. regularExpressionScanning.ts(37,76): error TS1535: This character cannot be escaped in a regular expression. regularExpressionScanning.ts(38,8): error TS1005: '--' expected. -regularExpressionScanning.ts(38,9): error TS1520: Expected a class set oprand. -regularExpressionScanning.ts(38,11): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,9): error TS1520: Expected a class set operand. +regularExpressionScanning.ts(38,11): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,12): error TS1005: '--' expected. regularExpressionScanning.ts(38,15): error TS1522: A character class must not contain a reserved double punctuator. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,20): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. @@ -150,15 +150,15 @@ regularExpressionScanning.ts(38,28): error TS1519: Operators must not be mixed w regularExpressionScanning.ts(38,40): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. regularExpressionScanning.ts(38,47): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. regularExpressionScanning.ts(38,49): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. -regularExpressionScanning.ts(38,50): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,50): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,55): error TS1511: '\q' is only available inside character class. regularExpressionScanning.ts(38,57): error TS1508: Unexpected '{'. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,61): error TS1508: Unexpected '}'. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,66): error TS1508: Unexpected '-'. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,67): error TS1005: '--' expected. -regularExpressionScanning.ts(38,70): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,70): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,75): error TS1508: Unexpected '&'. Did you mean to escape it with backslash? -regularExpressionScanning.ts(38,85): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,85): error TS1520: Expected a class set operand. regularExpressionScanning.ts(39,56): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. regularExpressionScanning.ts(39,67): error TS1005: '&&' expected. regularExpressionScanning.ts(41,5): error TS1518: Anything that would possibly match more than a single character is invalid inside a negated character class. @@ -503,9 +503,9 @@ regularExpressionScanning.ts(47,89): error TS1518: Anything that would possibly !!! error TS1005: '--' expected. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. !!! error TS1005: '--' expected. ~~ @@ -521,7 +521,7 @@ regularExpressionScanning.ts(47,89): error TS1518: Anything that would possibly ~ !!! error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. ~~ !!! error TS1511: '\q' is only available inside character class. ~ @@ -533,11 +533,11 @@ regularExpressionScanning.ts(47,89): error TS1518: Anything that would possibly !!! error TS1005: '--' expected. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. ~ !!! error TS1508: Unexpected '&'. Did you mean to escape it with backslash? -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. /[[^\P{Decimal_Number}&&[0-9]]&&\p{L}&&\p{ID_Continue}--\p{ASCII}\p{CWCF}]/v, ~~ !!! error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead.