From a4db189af1c174f26e421e942ed7e41f6c0cec50 Mon Sep 17 00:00:00 2001 From: Ron Buckton Date: Fri, 26 Apr 2024 13:43:52 -0400 Subject: [PATCH 1/8] Extract scanRegularExpressionWorker from reScanSlashToken --- src/compiler/scanner.ts | 1672 +++++++++++++++++++-------------------- 1 file changed, 836 insertions(+), 836 deletions(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 2d26d915aa2cb..2937aa343b18d 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -2482,968 +2482,968 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean token = SyntaxKind.RegularExpressionLiteral; } return token; + } - function scanRegularExpressionWorker(text: string, end: number, regExpFlags: RegularExpressionFlags, isUnterminated: boolean, annexB: boolean) { - /** Grammar parameter */ - const unicodeSetsMode = !!(regExpFlags & RegularExpressionFlags.UnicodeSets); - /** Grammar parameter */ - const unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode); - - if (unicodeMode) { - // Annex B treats any unicode mode as the strict syntax. - annexB = false; - } - - /** @see {scanClassSetExpression} */ - let mayContainStrings = false; - - /** The number of numeric (anonymous) capturing groups defined in the regex. */ - let numberOfCapturingGroups = 0; - /** All named capturing groups defined in the regex. */ - const groupSpecifiers = new Set(); - /** All references to named capturing groups in the regex. */ - const groupNameReferences: (TextRange & { name: string; })[] = []; - /** All numeric backreferences within the regex. */ - const decimalEscapes: (TextRange & { value: number; })[] = []; - /** A stack of scopes for named capturing groups. @see {scanGroupName} */ - const namedCapturingGroups: Set[] = []; - - // Disjunction ::= Alternative ('|' Alternative)* - function scanDisjunction(isInGroup: boolean) { - while (true) { - namedCapturingGroups.push(new Set()); - scanAlternative(isInGroup); - namedCapturingGroups.pop(); - if (text.charCodeAt(pos) !== CharacterCodes.bar) { - return; - } - pos++; + function scanRegularExpressionWorker(text: string, end: number, regExpFlags: RegularExpressionFlags, isUnterminated: boolean, annexB: boolean) { + /** Grammar parameter */ + const unicodeSetsMode = !!(regExpFlags & RegularExpressionFlags.UnicodeSets); + /** Grammar parameter */ + const unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode); + + if (unicodeMode) { + // Annex B treats any unicode mode as the strict syntax. + annexB = false; + } + + /** @see {scanClassSetExpression} */ + let mayContainStrings = false; + + /** The number of numeric (anonymous) capturing groups defined in the regex. */ + let numberOfCapturingGroups = 0; + /** All named capturing groups defined in the regex. */ + const groupSpecifiers = new Set(); + /** All references to named capturing groups in the regex. */ + const groupNameReferences: (TextRange & { name: string; })[] = []; + /** All numeric backreferences within the regex. */ + const decimalEscapes: (TextRange & { value: number; })[] = []; + /** A stack of scopes for named capturing groups. @see {scanGroupName} */ + const namedCapturingGroups: Set[] = []; + + // Disjunction ::= Alternative ('|' Alternative)* + function scanDisjunction(isInGroup: boolean) { + while (true) { + namedCapturingGroups.push(new Set()); + scanAlternative(isInGroup); + namedCapturingGroups.pop(); + if (text.charCodeAt(pos) !== CharacterCodes.bar) { + return; } + pos++; } + } - // Alternative ::= Term* - // Term ::= - // | Assertion - // | Atom Quantifier? - // Assertion ::= - // | '^' - // | '$' - // | '\b' - // | '\B' - // | '(?=' Disjunction ')' - // | '(?!' Disjunction ')' - // | '(?<=' Disjunction ')' - // | '(?' Disjunction ')' - // | '(?' RegularExpressionFlags ('-' RegularExpressionFlags)? ':' Disjunction ')' - // CharacterClass ::= unicodeMode - // ? '[' ClassRanges ']' - // : '[' ClassSetExpression ']' - function scanAlternative(isInGroup: boolean) { - let isPreviousTermQuantifiable = false; - while (pos < end) { - const start = pos; - const ch = text.charCodeAt(pos); - switch (ch) { - case CharacterCodes.caret: - case CharacterCodes.$: - pos++; - isPreviousTermQuantifiable = false; - break; - case CharacterCodes.backslash: + // Alternative ::= Term* + // Term ::= + // | Assertion + // | Atom Quantifier? + // Assertion ::= + // | '^' + // | '$' + // | '\b' + // | '\B' + // | '(?=' Disjunction ')' + // | '(?!' Disjunction ')' + // | '(?<=' Disjunction ')' + // | '(?' Disjunction ')' + // | '(?' RegularExpressionFlags ('-' RegularExpressionFlags)? ':' Disjunction ')' + // CharacterClass ::= unicodeMode + // ? '[' ClassRanges ']' + // : '[' ClassSetExpression ']' + function scanAlternative(isInGroup: boolean) { + let isPreviousTermQuantifiable = false; + while (pos < end) { + const start = pos; + const ch = text.charCodeAt(pos); + switch (ch) { + case CharacterCodes.caret: + case CharacterCodes.$: + pos++; + isPreviousTermQuantifiable = false; + break; + case CharacterCodes.backslash: + pos++; + switch (text.charCodeAt(pos)) { + case CharacterCodes.b: + case CharacterCodes.B: + pos++; + isPreviousTermQuantifiable = false; + break; + default: + scanAtomEscape(); + isPreviousTermQuantifiable = true; + break; + } + break; + case CharacterCodes.openParen: + pos++; + if (text.charCodeAt(pos) === CharacterCodes.question) { pos++; switch (text.charCodeAt(pos)) { - case CharacterCodes.b: - case CharacterCodes.B: + case CharacterCodes.equals: + case CharacterCodes.exclamation: pos++; - isPreviousTermQuantifiable = false; - break; - default: - scanAtomEscape(); - isPreviousTermQuantifiable = true; + // In Annex B, `(?=Disjunction)` and `(?!Disjunction)` are quantifiable + isPreviousTermQuantifiable = annexB; break; - } - break; - case CharacterCodes.openParen: - pos++; - if (text.charCodeAt(pos) === CharacterCodes.question) { - pos++; - switch (text.charCodeAt(pos)) { - case CharacterCodes.equals: - case CharacterCodes.exclamation: - pos++; - // In Annex B, `(?=Disjunction)` and `(?!Disjunction)` are quantifiable - isPreviousTermQuantifiable = annexB; - break; - case CharacterCodes.lessThan: - const groupNameStart = pos; - pos++; - switch (text.charCodeAt(pos)) { - case CharacterCodes.equals: - case CharacterCodes.exclamation: - pos++; - isPreviousTermQuantifiable = false; - break; - default: - scanGroupName(/*isReference*/ false); - scanExpectedChar(CharacterCodes.greaterThan); - if (languageVersion < ScriptTarget.ES2018) { - error(Diagnostics.Named_capturing_groups_are_only_available_when_targeting_ES2018_or_later, groupNameStart, pos - groupNameStart); - } - numberOfCapturingGroups++; - isPreviousTermQuantifiable = true; - break; - } - break; - default: - const start = pos; - const setFlags = scanPatternModifiers(RegularExpressionFlags.None); - if (text.charCodeAt(pos) === CharacterCodes.minus) { + case CharacterCodes.lessThan: + const groupNameStart = pos; + pos++; + switch (text.charCodeAt(pos)) { + case CharacterCodes.equals: + case CharacterCodes.exclamation: pos++; - scanPatternModifiers(setFlags); - if (pos === start + 1) { - error(Diagnostics.Subpattern_flags_must_be_present_when_there_is_a_minus_sign, start, pos - start); + isPreviousTermQuantifiable = false; + break; + default: + scanGroupName(/*isReference*/ false); + scanExpectedChar(CharacterCodes.greaterThan); + if (languageVersion < ScriptTarget.ES2018) { + error(Diagnostics.Named_capturing_groups_are_only_available_when_targeting_ES2018_or_later, groupNameStart, pos - groupNameStart); } - } - scanExpectedChar(CharacterCodes.colon); - isPreviousTermQuantifiable = true; - break; - } - } - else { - numberOfCapturingGroups++; - isPreviousTermQuantifiable = true; - } - scanDisjunction(/*isInGroup*/ true); - scanExpectedChar(CharacterCodes.closeParen); - break; - case CharacterCodes.openBrace: - pos++; - const digitsStart = pos; - scanDigits(); - const min = tokenValue; - if (text.charCodeAt(pos) === CharacterCodes.comma) { - pos++; - scanDigits(); - const max = tokenValue; - if (!min) { - if (max || text.charCodeAt(pos) === CharacterCodes.closeBrace) { - error(Diagnostics.Incomplete_quantifier_Digit_expected, digitsStart, 0); + numberOfCapturingGroups++; + isPreviousTermQuantifiable = true; + break; } - else { - if (unicodeMode) { - error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch)); + break; + default: + const start = pos; + const setFlags = scanPatternModifiers(RegularExpressionFlags.None); + if (text.charCodeAt(pos) === CharacterCodes.minus) { + pos++; + scanPatternModifiers(setFlags); + if (pos === start + 1) { + error(Diagnostics.Subpattern_flags_must_be_present_when_there_is_a_minus_sign, start, pos - start); } - isPreviousTermQuantifiable = true; - break; } - } - if (max && Number.parseInt(min) > Number.parseInt(max)) { - error(Diagnostics.Numbers_out_of_order_in_quantifier, digitsStart, pos - digitsStart); - } - } - else if (!min) { - if (unicodeMode) { - error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch)); - } - isPreviousTermQuantifiable = true; - break; - } - scanExpectedChar(CharacterCodes.closeBrace); - pos--; - // falls through - case CharacterCodes.asterisk: - case CharacterCodes.plus: - case CharacterCodes.question: - pos++; - if (text.charCodeAt(pos) === CharacterCodes.question) { - // Non-greedy - pos++; - } - if (!isPreviousTermQuantifiable) { - error(Diagnostics.There_is_nothing_available_for_repetition, start, pos - start); + scanExpectedChar(CharacterCodes.colon); + isPreviousTermQuantifiable = true; + break; } - isPreviousTermQuantifiable = false; - break; - case CharacterCodes.dot: - pos++; + } + else { + numberOfCapturingGroups++; isPreviousTermQuantifiable = true; - break; - case CharacterCodes.openBracket: + } + scanDisjunction(/*isInGroup*/ true); + scanExpectedChar(CharacterCodes.closeParen); + break; + case CharacterCodes.openBrace: + pos++; + const digitsStart = pos; + scanDigits(); + const min = tokenValue; + if (text.charCodeAt(pos) === CharacterCodes.comma) { pos++; - if (unicodeSetsMode) { - scanClassSetExpression(); - } - else { - scanClassRanges(); - } - scanExpectedChar(CharacterCodes.closeBracket); - isPreviousTermQuantifiable = true; - break; - case CharacterCodes.closeParen: - if (isInGroup) { - return; + scanDigits(); + const max = tokenValue; + if (!min) { + if (max || text.charCodeAt(pos) === CharacterCodes.closeBrace) { + error(Diagnostics.Incomplete_quantifier_Digit_expected, digitsStart, 0); + } + else { + if (unicodeMode) { + error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch)); + } + isPreviousTermQuantifiable = true; + break; + } } - // falls through - case CharacterCodes.closeBracket: - case CharacterCodes.closeBrace: - if (isUnterminated && !isInGroup) { - // Assume what starting from the character to be outside of the regex - return; + if (max && Number.parseInt(min) > Number.parseInt(max)) { + error(Diagnostics.Numbers_out_of_order_in_quantifier, digitsStart, pos - digitsStart); } - if (unicodeMode || ch === CharacterCodes.closeParen) { - error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); + } + else if (!min) { + if (unicodeMode) { + error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch)); } - pos++; - isPreviousTermQuantifiable = true; - break; - case CharacterCodes.slash: - case CharacterCodes.bar: - return; - default: - scanSourceCharacter(); isPreviousTermQuantifiable = true; break; - } - } - } - - function scanPatternModifiers(currFlags: RegularExpressionFlags): RegularExpressionFlags { - while (pos < end) { - const ch = text.charCodeAt(pos); - if (!isIdentifierPart(ch, languageVersion)) { - break; - } - const flag = characterToRegularExpressionFlag(String.fromCharCode(ch)); - if (flag === undefined) { - error(Diagnostics.Unknown_regular_expression_flag, pos, 1); - } - else if (currFlags & flag) { - error(Diagnostics.Duplicate_regular_expression_flag, pos, 1); - } - else if (!(flag & RegularExpressionFlags.Modifiers)) { - error(Diagnostics.This_regular_expression_flag_cannot_be_toggled_within_a_subpattern, pos, 1); - } - else { - currFlags |= flag; - checkRegularExpressionFlagAvailable(flag, pos); - } - pos++; - } - return currFlags; - } - - // AtomEscape ::= - // | DecimalEscape - // | CharacterClassEscape - // | CharacterEscape - // | 'k<' RegExpIdentifierName '>' - function scanAtomEscape() { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash); - switch (text.charCodeAt(pos)) { - case CharacterCodes.k: + } + scanExpectedChar(CharacterCodes.closeBrace); + pos--; + // falls through + case CharacterCodes.asterisk: + case CharacterCodes.plus: + case CharacterCodes.question: pos++; - if (text.charCodeAt(pos) === CharacterCodes.lessThan) { + if (text.charCodeAt(pos) === CharacterCodes.question) { + // Non-greedy pos++; - scanGroupName(/*isReference*/ true); - scanExpectedChar(CharacterCodes.greaterThan); } - else if (unicodeMode) { - error(Diagnostics.k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets, pos - 2, 2); + if (!isPreviousTermQuantifiable) { + error(Diagnostics.There_is_nothing_available_for_repetition, start, pos - start); } + isPreviousTermQuantifiable = false; break; - case CharacterCodes.q: - if (unicodeSetsMode) { - pos++; - error(Diagnostics.q_is_only_available_inside_character_class, pos - 2, 2); - break; - } - // falls through - default: - // The scanEscapeSequence call in scanCharacterEscape must return non-empty strings - // since there must not be line breaks in a regex literal - Debug.assert(scanCharacterClassEscape() || scanDecimalEscape() || scanCharacterEscape(/*atomEscape*/ true)); + case CharacterCodes.dot: + pos++; + isPreviousTermQuantifiable = true; break; - } - } - - // DecimalEscape ::= [1-9] [0-9]* - function scanDecimalEscape(): boolean { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash); - const ch = text.charCodeAt(pos); - if (ch >= CharacterCodes._1 && ch <= CharacterCodes._9) { - const start = pos; - scanDigits(); - decimalEscapes.push({ pos: start, end: pos, value: +tokenValue }); - return true; - } - return false; - } - - // CharacterEscape ::= - // | `c` ControlLetter - // | IdentityEscape - // | (Other sequences handled by `scanEscapeSequence`) - // IdentityEscape ::= - // | '^' | '$' | '/' | '\' | '.' | '*' | '+' | '?' | '(' | ')' | '[' | ']' | '{' | '}' | '|' - // | [~UnicodeMode] (any other non-identifier characters) - function scanCharacterEscape(atomEscape: boolean): string { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash); - let ch = text.charCodeAt(pos); - switch (ch) { - case CharacterCodes.c: + case CharacterCodes.openBracket: pos++; - ch = text.charCodeAt(pos); - if (isASCIILetter(ch)) { - pos++; - return String.fromCharCode(ch & 0x1f); - } - if (unicodeMode) { - error(Diagnostics.c_must_be_followed_by_an_ASCII_letter, pos - 2, 2); + if (unicodeSetsMode) { + scanClassSetExpression(); } - else if (atomEscape && annexB) { - // Annex B treats - // - // ExtendedAtom : `\` [lookahead = `c`] - // - // as the single character `\` when `c` isn't followed by a valid control character - pos--; - return "\\"; + else { + scanClassRanges(); } - return String.fromCharCode(ch); - case CharacterCodes.caret: - case CharacterCodes.$: - case CharacterCodes.slash: - case CharacterCodes.backslash: - case CharacterCodes.dot: - case CharacterCodes.asterisk: - case CharacterCodes.plus: - case CharacterCodes.question: - case CharacterCodes.openParen: + scanExpectedChar(CharacterCodes.closeBracket); + isPreviousTermQuantifiable = true; + break; case CharacterCodes.closeParen: - case CharacterCodes.openBracket: + if (isInGroup) { + return; + } + // falls through case CharacterCodes.closeBracket: - case CharacterCodes.openBrace: case CharacterCodes.closeBrace: - case CharacterCodes.bar: + if (isUnterminated && !isInGroup) { + // Assume what starting from the character to be outside of the regex + return; + } + if (unicodeMode || ch === CharacterCodes.closeParen) { + error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); + } pos++; - return String.fromCharCode(ch); + isPreviousTermQuantifiable = true; + break; + case CharacterCodes.slash: + case CharacterCodes.bar: + return; default: - if (pos >= end) { - error(Diagnostics.Undetermined_character_escape, pos - 1, 1, ch); - return "\\"; - } - pos--; - return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ annexB ? "annex-b" : true); + scanSourceCharacter(); + isPreviousTermQuantifiable = true; + break; } } + } - function scanGroupName(isReference: boolean) { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.lessThan); - tokenStart = pos; - scanIdentifier(codePointAt(text, pos), languageVersion); - if (pos === tokenStart) { - error(Diagnostics.Expected_a_capturing_group_name); + function scanPatternModifiers(currFlags: RegularExpressionFlags): RegularExpressionFlags { + while (pos < end) { + const ch = text.charCodeAt(pos); + if (!isIdentifierPart(ch, languageVersion)) { + break; } - else if (isReference) { - groupNameReferences.push({ pos: tokenStart, end: pos, name: tokenValue }); + const flag = characterToRegularExpressionFlag(String.fromCharCode(ch)); + if (flag === undefined) { + error(Diagnostics.Unknown_regular_expression_flag, pos, 1); } - else if (namedCapturingGroups.some(group => group.has(tokenValue))) { - error(Diagnostics.Named_capturing_groups_with_the_same_name_must_be_mutually_exclusive_to_each_other, tokenStart, pos - tokenStart); + else if (currFlags & flag) { + error(Diagnostics.Duplicate_regular_expression_flag, pos, 1); + } + else if (!(flag & RegularExpressionFlags.Modifiers)) { + error(Diagnostics.This_regular_expression_flag_cannot_be_toggled_within_a_subpattern, pos, 1); } else { - last(namedCapturingGroups).add(tokenValue); - groupSpecifiers.add(tokenValue); + currFlags |= flag; + checkRegularExpressionFlagAvailable(flag, pos); } + pos++; } + return currFlags; + } - function isClassContentExit(ch: number) { - return ch === CharacterCodes.closeBracket || pos >= end; + // AtomEscape ::= + // | DecimalEscape + // | CharacterClassEscape + // | CharacterEscape + // | 'k<' RegExpIdentifierName '>' + function scanAtomEscape() { + Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash); + switch (text.charCodeAt(pos)) { + case CharacterCodes.k: + pos++; + if (text.charCodeAt(pos) === CharacterCodes.lessThan) { + pos++; + scanGroupName(/*isReference*/ true); + scanExpectedChar(CharacterCodes.greaterThan); + } + else if (unicodeMode) { + error(Diagnostics.k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets, pos - 2, 2); + } + break; + case CharacterCodes.q: + if (unicodeSetsMode) { + pos++; + error(Diagnostics.q_is_only_available_inside_character_class, pos - 2, 2); + break; + } + // falls through + default: + // The scanEscapeSequence call in scanCharacterEscape must return non-empty strings + // since there must not be line breaks in a regex literal + Debug.assert(scanCharacterClassEscape() || scanDecimalEscape() || scanCharacterEscape(/*atomEscape*/ true)); + break; } + } + + // DecimalEscape ::= [1-9] [0-9]* + function scanDecimalEscape(): boolean { + Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash); + const ch = text.charCodeAt(pos); + if (ch >= CharacterCodes._1 && ch <= CharacterCodes._9) { + const start = pos; + scanDigits(); + decimalEscapes.push({ pos: start, end: pos, value: +tokenValue }); + return true; + } + return false; + } - // ClassRanges ::= '^'? (ClassAtom ('-' ClassAtom)?)* - function scanClassRanges() { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.openBracket); - if (text.charCodeAt(pos) === CharacterCodes.caret) { - // character complement + // CharacterEscape ::= + // | `c` ControlLetter + // | IdentityEscape + // | (Other sequences handled by `scanEscapeSequence`) + // IdentityEscape ::= + // | '^' | '$' | '/' | '\' | '.' | '*' | '+' | '?' | '(' | ')' | '[' | ']' | '{' | '}' | '|' + // | [~UnicodeMode] (any other non-identifier characters) + function scanCharacterEscape(atomEscape: boolean): string { + Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash); + let ch = text.charCodeAt(pos); + switch (ch) { + case CharacterCodes.c: pos++; - } - while (pos < end) { - const ch = text.charCodeAt(pos); - if (isClassContentExit(ch)) { - return; - } - const minStart = pos; - const minCharacter = scanClassAtom(); - if (text.charCodeAt(pos) === CharacterCodes.minus) { + ch = text.charCodeAt(pos); + if (isASCIILetter(ch)) { pos++; - const ch = text.charCodeAt(pos); - if (isClassContentExit(ch)) { - return; - } - if (!minCharacter && !annexB) { - error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, minStart, pos - 1 - minStart); - } - const maxStart = pos; - const maxCharacter = scanClassAtom(); - if (!maxCharacter && !annexB) { - error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, maxStart, pos - maxStart); - continue; - } - if (!minCharacter) { - continue; - } - const minCharacterValue = codePointAt(minCharacter, 0); - const maxCharacterValue = codePointAt(maxCharacter, 0); - if ( - minCharacter.length === charSize(minCharacterValue) && - maxCharacter.length === charSize(maxCharacterValue) && - minCharacterValue > maxCharacterValue - ) { - error(Diagnostics.Range_out_of_order_in_character_class, minStart, pos - minStart); - } - } - } - } - - // Static Semantics: MayContainStrings - // ClassUnion: ClassSetOperands.some(ClassSetOperand => ClassSetOperand.MayContainStrings) - // ClassIntersection: ClassSetOperands.every(ClassSetOperand => ClassSetOperand.MayContainStrings) - // ClassSubtraction: ClassSetOperands[0].MayContainStrings - // ClassSetOperand: - // || ClassStringDisjunctionContents.MayContainStrings - // || CharacterClassEscape.UnicodePropertyValueExpression.LoneUnicodePropertyNameOrValue.MayContainStrings - // ClassStringDisjunctionContents: ClassStrings.some(ClassString => ClassString.ClassSetCharacters.length !== 1) - // LoneUnicodePropertyNameOrValue: isBinaryUnicodePropertyOfStrings(LoneUnicodePropertyNameOrValue) - - // ClassSetExpression ::= '^'? (ClassUnion | ClassIntersection | ClassSubtraction) - // ClassUnion ::= (ClassSetRange | ClassSetOperand)* - // ClassIntersection ::= ClassSetOperand ('&&' ClassSetOperand)+ - // ClassSubtraction ::= ClassSetOperand ('--' ClassSetOperand)+ - // ClassSetRange ::= ClassSetCharacter '-' ClassSetCharacter - function scanClassSetExpression() { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.openBracket); - let isCharacterComplement = false; - if (text.charCodeAt(pos) === CharacterCodes.caret) { + return String.fromCharCode(ch & 0x1f); + } + if (unicodeMode) { + error(Diagnostics.c_must_be_followed_by_an_ASCII_letter, pos - 2, 2); + } + else if (atomEscape && annexB) { + // Annex B treats + // + // ExtendedAtom : `\` [lookahead = `c`] + // + // as the single character `\` when `c` isn't followed by a valid control character + pos--; + return "\\"; + } + return String.fromCharCode(ch); + case CharacterCodes.caret: + case CharacterCodes.$: + case CharacterCodes.slash: + case CharacterCodes.backslash: + case CharacterCodes.dot: + case CharacterCodes.asterisk: + case CharacterCodes.plus: + case CharacterCodes.question: + case CharacterCodes.openParen: + case CharacterCodes.closeParen: + case CharacterCodes.openBracket: + case CharacterCodes.closeBracket: + case CharacterCodes.openBrace: + case CharacterCodes.closeBrace: + case CharacterCodes.bar: pos++; - isCharacterComplement = true; - } - let expressionMayContainStrings = false; - let ch = text.charCodeAt(pos); + return String.fromCharCode(ch); + default: + if (pos >= end) { + error(Diagnostics.Undetermined_character_escape, pos - 1, 1, ch); + return "\\"; + } + pos--; + return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ annexB ? "annex-b" : true); + } + } + + function scanGroupName(isReference: boolean) { + Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.lessThan); + tokenStart = pos; + scanIdentifier(codePointAt(text, pos), languageVersion); + if (pos === tokenStart) { + error(Diagnostics.Expected_a_capturing_group_name); + } + else if (isReference) { + groupNameReferences.push({ pos: tokenStart, end: pos, name: tokenValue }); + } + else if (namedCapturingGroups.some(group => group.has(tokenValue))) { + error(Diagnostics.Named_capturing_groups_with_the_same_name_must_be_mutually_exclusive_to_each_other, tokenStart, pos - tokenStart); + } + else { + last(namedCapturingGroups).add(tokenValue); + groupSpecifiers.add(tokenValue); + } + } + + function isClassContentExit(ch: number) { + return ch === CharacterCodes.closeBracket || pos >= end; + } + + // ClassRanges ::= '^'? (ClassAtom ('-' ClassAtom)?)* + function scanClassRanges() { + Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.openBracket); + if (text.charCodeAt(pos) === CharacterCodes.caret) { + // character complement + pos++; + } + while (pos < end) { + const ch = text.charCodeAt(pos); if (isClassContentExit(ch)) { return; } - let start = pos; - let oprand!: string; - switch (text.slice(pos, pos + 2)) { - case "--": - case "&&": - error(Diagnostics.Expected_a_class_set_oprand); - mayContainStrings = false; - break; - default: - oprand = scanClassSetOprand(); - break; + const minStart = pos; + const minCharacter = scanClassAtom(); + if (text.charCodeAt(pos) === CharacterCodes.minus) { + pos++; + const ch = text.charCodeAt(pos); + if (isClassContentExit(ch)) { + return; + } + if (!minCharacter && !annexB) { + error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, minStart, pos - 1 - minStart); + } + const maxStart = pos; + const maxCharacter = scanClassAtom(); + if (!maxCharacter && !annexB) { + error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, maxStart, pos - maxStart); + continue; + } + if (!minCharacter) { + continue; + } + const minCharacterValue = codePointAt(minCharacter, 0); + const maxCharacterValue = codePointAt(maxCharacter, 0); + if ( + minCharacter.length === charSize(minCharacterValue) && + maxCharacter.length === charSize(maxCharacterValue) && + minCharacterValue > maxCharacterValue + ) { + error(Diagnostics.Range_out_of_order_in_character_class, minStart, pos - minStart); + } } - switch (text.charCodeAt(pos)) { - case CharacterCodes.minus: - if (text.charCodeAt(pos + 1) === CharacterCodes.minus) { - if (isCharacterComplement && mayContainStrings) { - error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, start, pos - start); - } - expressionMayContainStrings = mayContainStrings; - scanClassSetSubExpression(ClassSetExpressionType.ClassSubtraction); - mayContainStrings = !isCharacterComplement && expressionMayContainStrings; - return; - } - break; - case CharacterCodes.ampersand: - if (text.charCodeAt(pos + 1) === CharacterCodes.ampersand) { - scanClassSetSubExpression(ClassSetExpressionType.ClassIntersection); - if (isCharacterComplement && mayContainStrings) { - error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, start, pos - start); - } - expressionMayContainStrings = mayContainStrings; - mayContainStrings = !isCharacterComplement && expressionMayContainStrings; - return; - } - else { - error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); + } + } + + // Static Semantics: MayContainStrings + // ClassUnion: ClassSetOperands.some(ClassSetOperand => ClassSetOperand.MayContainStrings) + // ClassIntersection: ClassSetOperands.every(ClassSetOperand => ClassSetOperand.MayContainStrings) + // ClassSubtraction: ClassSetOperands[0].MayContainStrings + // ClassSetOperand: + // || ClassStringDisjunctionContents.MayContainStrings + // || CharacterClassEscape.UnicodePropertyValueExpression.LoneUnicodePropertyNameOrValue.MayContainStrings + // ClassStringDisjunctionContents: ClassStrings.some(ClassString => ClassString.ClassSetCharacters.length !== 1) + // LoneUnicodePropertyNameOrValue: isBinaryUnicodePropertyOfStrings(LoneUnicodePropertyNameOrValue) + + // ClassSetExpression ::= '^'? (ClassUnion | ClassIntersection | ClassSubtraction) + // ClassUnion ::= (ClassSetRange | ClassSetOperand)* + // ClassIntersection ::= ClassSetOperand ('&&' ClassSetOperand)+ + // ClassSubtraction ::= ClassSetOperand ('--' ClassSetOperand)+ + // ClassSetRange ::= ClassSetCharacter '-' ClassSetCharacter + function scanClassSetExpression() { + Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.openBracket); + let isCharacterComplement = false; + if (text.charCodeAt(pos) === CharacterCodes.caret) { + pos++; + isCharacterComplement = true; + } + let expressionMayContainStrings = false; + let ch = text.charCodeAt(pos); + if (isClassContentExit(ch)) { + return; + } + let start = pos; + let oprand!: string; + switch (text.slice(pos, pos + 2)) { + case "--": + case "&&": + error(Diagnostics.Expected_a_class_set_oprand); + mayContainStrings = false; + break; + default: + oprand = scanClassSetOprand(); + break; + } + switch (text.charCodeAt(pos)) { + case CharacterCodes.minus: + if (text.charCodeAt(pos + 1) === CharacterCodes.minus) { + if (isCharacterComplement && mayContainStrings) { + error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, start, pos - start); } - break; - default: + expressionMayContainStrings = mayContainStrings; + scanClassSetSubExpression(ClassSetExpressionType.ClassSubtraction); + mayContainStrings = !isCharacterComplement && expressionMayContainStrings; + return; + } + break; + case CharacterCodes.ampersand: + if (text.charCodeAt(pos + 1) === CharacterCodes.ampersand) { + scanClassSetSubExpression(ClassSetExpressionType.ClassIntersection); if (isCharacterComplement && mayContainStrings) { error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, start, pos - start); } expressionMayContainStrings = mayContainStrings; - break; - } - while (pos < end) { - ch = text.charCodeAt(pos); - switch (ch) { - case CharacterCodes.minus: - pos++; - ch = text.charCodeAt(pos); - if (isClassContentExit(ch)) { - mayContainStrings = !isCharacterComplement && expressionMayContainStrings; - return; - } - if (ch === CharacterCodes.minus) { - pos++; - error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); - start = pos - 2; - oprand = text.slice(start, pos); - continue; - } - else { - if (!oprand) { - error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, start, pos - 1 - start); - } - const secondStart = pos; - const secondOprand = scanClassSetOprand(); - if (isCharacterComplement && mayContainStrings) { - error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, secondStart, pos - secondStart); - } - expressionMayContainStrings ||= mayContainStrings; - if (!secondOprand) { - error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, secondStart, pos - secondStart); - break; - } - if (!oprand) { - break; - } - const minCharacterValue = codePointAt(oprand, 0); - const maxCharacterValue = codePointAt(secondOprand, 0); - if ( - oprand.length === charSize(minCharacterValue) && - secondOprand.length === charSize(maxCharacterValue) && - minCharacterValue > maxCharacterValue - ) { - error(Diagnostics.Range_out_of_order_in_character_class, start, pos - start); - } - } - break; - case CharacterCodes.ampersand: - start = pos; - pos++; - if (text.charCodeAt(pos) === CharacterCodes.ampersand) { - pos++; - error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); - if (text.charCodeAt(pos) === CharacterCodes.ampersand) { - error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); - pos++; - } - } - else { - error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos - 1, 1, String.fromCharCode(ch)); - } - oprand = text.slice(start, pos); - continue; + mayContainStrings = !isCharacterComplement && expressionMayContainStrings; + return; } - if (isClassContentExit(text.charCodeAt(pos))) { - break; + else { + error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); } - start = pos; - switch (text.slice(pos, pos + 2)) { - case "--": - case "&&": - error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos, 2); - pos += 2; - oprand = text.slice(start, pos); - break; - default: - oprand = scanClassSetOprand(); - break; + break; + default: + if (isCharacterComplement && mayContainStrings) { + error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, start, pos - start); } - } - mayContainStrings = !isCharacterComplement && expressionMayContainStrings; + expressionMayContainStrings = mayContainStrings; + break; } - - function scanClassSetSubExpression(expressionType: ClassSetExpressionType) { - let expressionMayContainStrings = mayContainStrings; - while (pos < end) { - let ch = text.charCodeAt(pos); - if (isClassContentExit(ch)) { - break; - } - // Provide user-friendly diagnostic messages - switch (ch) { - case CharacterCodes.minus: + while (pos < end) { + ch = text.charCodeAt(pos); + switch (ch) { + case CharacterCodes.minus: + pos++; + ch = text.charCodeAt(pos); + if (isClassContentExit(ch)) { + mayContainStrings = !isCharacterComplement && expressionMayContainStrings; + return; + } + if (ch === CharacterCodes.minus) { pos++; - if (text.charCodeAt(pos) === CharacterCodes.minus) { - pos++; - if (expressionType !== ClassSetExpressionType.ClassSubtraction) { - error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); - } + error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); + start = pos - 2; + oprand = text.slice(start, pos); + continue; + } + else { + if (!oprand) { + error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, start, pos - 1 - start); + } + const secondStart = pos; + const secondOprand = scanClassSetOprand(); + if (isCharacterComplement && mayContainStrings) { + error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, secondStart, pos - secondStart); } - else { - error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 1, 1); + expressionMayContainStrings ||= mayContainStrings; + if (!secondOprand) { + error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, secondStart, pos - secondStart); + break; } - break; - case CharacterCodes.ampersand: + if (!oprand) { + break; + } + const minCharacterValue = codePointAt(oprand, 0); + const maxCharacterValue = codePointAt(secondOprand, 0); + if ( + oprand.length === charSize(minCharacterValue) && + secondOprand.length === charSize(maxCharacterValue) && + minCharacterValue > maxCharacterValue + ) { + error(Diagnostics.Range_out_of_order_in_character_class, start, pos - start); + } + } + break; + case CharacterCodes.ampersand: + start = pos; + pos++; + if (text.charCodeAt(pos) === CharacterCodes.ampersand) { pos++; + error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); if (text.charCodeAt(pos) === CharacterCodes.ampersand) { + error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); pos++; - if (expressionType !== ClassSetExpressionType.ClassIntersection) { - error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); - } - if (text.charCodeAt(pos) === CharacterCodes.ampersand) { - error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); - pos++; - } - } - else { - error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos - 1, 1, String.fromCharCode(ch)); - } - break; - default: - switch (expressionType) { - case ClassSetExpressionType.ClassSubtraction: - error(Diagnostics._0_expected, pos, 0, "--"); - break; - case ClassSetExpressionType.ClassIntersection: - error(Diagnostics._0_expected, pos, 0, "&&"); - break; - default: - break; } - break; - } - ch = text.charCodeAt(pos); - if (isClassContentExit(ch)) { - error(Diagnostics.Expected_a_class_set_oprand); + } + else { + error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos - 1, 1, String.fromCharCode(ch)); + } + oprand = text.slice(start, pos); + continue; + } + if (isClassContentExit(text.charCodeAt(pos))) { + break; + } + start = pos; + switch (text.slice(pos, pos + 2)) { + case "--": + case "&&": + error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos, 2); + pos += 2; + oprand = text.slice(start, pos); + break; + default: + oprand = scanClassSetOprand(); break; - } - scanClassSetOprand(); - // Used only if expressionType is Intersection - expressionMayContainStrings &&= mayContainStrings; } - mayContainStrings = expressionMayContainStrings; } + mayContainStrings = !isCharacterComplement && expressionMayContainStrings; + } - // ClassSetOperand ::= - // | '[' ClassSetExpression ']' - // | '\' CharacterClassEscape - // | '\q{' ClassStringDisjunctionContents '}' - // | ClassSetCharacter - function scanClassSetOprand(): string { - mayContainStrings = false; - switch (text.charCodeAt(pos)) { - case CharacterCodes.openBracket: - pos++; - scanClassSetExpression(); - scanExpectedChar(CharacterCodes.closeBracket); - return ""; - case CharacterCodes.backslash: + function scanClassSetSubExpression(expressionType: ClassSetExpressionType) { + let expressionMayContainStrings = mayContainStrings; + while (pos < end) { + let ch = text.charCodeAt(pos); + if (isClassContentExit(ch)) { + break; + } + // Provide user-friendly diagnostic messages + switch (ch) { + case CharacterCodes.minus: pos++; - if (scanCharacterClassEscape()) { - return ""; + if (text.charCodeAt(pos) === CharacterCodes.minus) { + pos++; + if (expressionType !== ClassSetExpressionType.ClassSubtraction) { + error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); + } + } + else { + error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 1, 1); } - else if (text.charCodeAt(pos) === CharacterCodes.q) { + break; + case CharacterCodes.ampersand: + pos++; + if (text.charCodeAt(pos) === CharacterCodes.ampersand) { pos++; - if (text.charCodeAt(pos) === CharacterCodes.openBrace) { - pos++; - scanClassStringDisjunctionContents(); - scanExpectedChar(CharacterCodes.closeBrace); - return ""; + if (expressionType !== ClassSetExpressionType.ClassIntersection) { + error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); } - else { - error(Diagnostics.q_must_be_followed_by_string_alternatives_enclosed_in_braces, pos - 2, 2); - return "q"; + if (text.charCodeAt(pos) === CharacterCodes.ampersand) { + error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); + pos++; } } - pos--; - // falls through + else { + error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos - 1, 1, String.fromCharCode(ch)); + } + break; default: - return scanClassSetCharacter(); + switch (expressionType) { + case ClassSetExpressionType.ClassSubtraction: + error(Diagnostics._0_expected, pos, 0, "--"); + break; + case ClassSetExpressionType.ClassIntersection: + error(Diagnostics._0_expected, pos, 0, "&&"); + break; + default: + break; + } + break; + } + ch = text.charCodeAt(pos); + if (isClassContentExit(ch)) { + error(Diagnostics.Expected_a_class_set_oprand); + break; } + scanClassSetOprand(); + // Used only if expressionType is Intersection + expressionMayContainStrings &&= mayContainStrings; } + mayContainStrings = expressionMayContainStrings; + } - // ClassStringDisjunctionContents ::= ClassSetCharacter* ('|' ClassSetCharacter*)* - function scanClassStringDisjunctionContents() { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.openBrace); - let characterCount = 0; - while (pos < end) { - const ch = text.charCodeAt(pos); - switch (ch) { - case CharacterCodes.closeBrace: - if (characterCount !== 1) { - mayContainStrings = true; - } - return; - case CharacterCodes.bar: - if (characterCount !== 1) { - mayContainStrings = true; - } + // ClassSetOperand ::= + // | '[' ClassSetExpression ']' + // | '\' CharacterClassEscape + // | '\q{' ClassStringDisjunctionContents '}' + // | ClassSetCharacter + function scanClassSetOprand(): string { + mayContainStrings = false; + switch (text.charCodeAt(pos)) { + case CharacterCodes.openBracket: + pos++; + scanClassSetExpression(); + scanExpectedChar(CharacterCodes.closeBracket); + return ""; + case CharacterCodes.backslash: + pos++; + if (scanCharacterClassEscape()) { + return ""; + } + else if (text.charCodeAt(pos) === CharacterCodes.q) { + pos++; + if (text.charCodeAt(pos) === CharacterCodes.openBrace) { pos++; - start = pos; - characterCount = 0; - break; - default: - scanClassSetCharacter(); - characterCount++; - break; + scanClassStringDisjunctionContents(); + scanExpectedChar(CharacterCodes.closeBrace); + return ""; + } + else { + error(Diagnostics.q_must_be_followed_by_string_alternatives_enclosed_in_braces, pos - 2, 2); + return "q"; + } } - } + pos--; + // falls through + default: + return scanClassSetCharacter(); } + } - // ClassSetCharacter ::= - // | SourceCharacter -- ClassSetSyntaxCharacter -- ClassSetReservedDoublePunctuator - // | '\' (CharacterEscape | ClassSetReservedPunctuator | 'b') - function scanClassSetCharacter(): string { + // ClassStringDisjunctionContents ::= ClassSetCharacter* ('|' ClassSetCharacter*)* + function scanClassStringDisjunctionContents() { + Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.openBrace); + let characterCount = 0; + while (pos < end) { const ch = text.charCodeAt(pos); - if (ch === CharacterCodes.backslash) { - pos++; - const ch = text.charCodeAt(pos); - switch (ch) { - case CharacterCodes.b: - pos++; - return "\b"; - case CharacterCodes.ampersand: - case CharacterCodes.minus: - case CharacterCodes.exclamation: - case CharacterCodes.hash: - case CharacterCodes.percent: - case CharacterCodes.comma: - case CharacterCodes.colon: - case CharacterCodes.semicolon: - case CharacterCodes.lessThan: - case CharacterCodes.equals: - case CharacterCodes.greaterThan: - case CharacterCodes.at: - case CharacterCodes.backtick: - case CharacterCodes.tilde: - pos++; - return String.fromCharCode(ch); - default: - return scanCharacterEscape(/*atomEscape*/ false); - } - } - else if (ch === text.charCodeAt(pos + 1)) { - switch (ch) { - case CharacterCodes.ampersand: - case CharacterCodes.exclamation: - case CharacterCodes.hash: - case CharacterCodes.percent: - case CharacterCodes.asterisk: - case CharacterCodes.plus: - case CharacterCodes.comma: - case CharacterCodes.dot: - case CharacterCodes.colon: - case CharacterCodes.semicolon: - case CharacterCodes.lessThan: - case CharacterCodes.equals: - case CharacterCodes.greaterThan: - case CharacterCodes.question: - case CharacterCodes.at: - case CharacterCodes.backtick: - case CharacterCodes.tilde: - error(Diagnostics.A_character_class_must_not_contain_a_reserved_double_punctuator_Did_you_mean_to_escape_it_with_backslash, pos, 2); - pos += 2; - return text.substring(pos - 2, pos); - } - } switch (ch) { - case CharacterCodes.slash: - case CharacterCodes.openParen: - case CharacterCodes.closeParen: - case CharacterCodes.openBracket: - case CharacterCodes.closeBracket: - case CharacterCodes.openBrace: case CharacterCodes.closeBrace: - case CharacterCodes.minus: + if (characterCount !== 1) { + mayContainStrings = true; + } + return; case CharacterCodes.bar: - error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); + if (characterCount !== 1) { + mayContainStrings = true; + } pos++; - return String.fromCharCode(ch); + start = pos; + characterCount = 0; + break; + default: + scanClassSetCharacter(); + characterCount++; + break; } - return scanSourceCharacter(); } + } - // ClassAtom ::= - // | SourceCharacter but not one of '\' or ']' - // | '\' ClassEscape - // ClassEscape ::= - // | 'b' - // | '-' - // | CharacterClassEscape - // | CharacterEscape - function scanClassAtom(): string { - if (text.charCodeAt(pos) === CharacterCodes.backslash) { - pos++; - const ch = text.charCodeAt(pos); - switch (ch) { - case CharacterCodes.b: - pos++; - return "\b"; - case CharacterCodes.minus: - pos++; - return String.fromCharCode(ch); - default: - if (scanCharacterClassEscape()) { - return ""; - } - return scanCharacterEscape(/*atomEscape*/ false); - } + // ClassSetCharacter ::= + // | SourceCharacter -- ClassSetSyntaxCharacter -- ClassSetReservedDoublePunctuator + // | '\' (CharacterEscape | ClassSetReservedPunctuator | 'b') + function scanClassSetCharacter(): string { + const ch = text.charCodeAt(pos); + if (ch === CharacterCodes.backslash) { + pos++; + const ch = text.charCodeAt(pos); + switch (ch) { + case CharacterCodes.b: + pos++; + return "\b"; + case CharacterCodes.ampersand: + case CharacterCodes.minus: + case CharacterCodes.exclamation: + case CharacterCodes.hash: + case CharacterCodes.percent: + case CharacterCodes.comma: + case CharacterCodes.colon: + case CharacterCodes.semicolon: + case CharacterCodes.lessThan: + case CharacterCodes.equals: + case CharacterCodes.greaterThan: + case CharacterCodes.at: + case CharacterCodes.backtick: + case CharacterCodes.tilde: + pos++; + return String.fromCharCode(ch); + default: + return scanCharacterEscape(/*atomEscape*/ false); } - else { - return scanSourceCharacter(); + } + else if (ch === text.charCodeAt(pos + 1)) { + switch (ch) { + case CharacterCodes.ampersand: + case CharacterCodes.exclamation: + case CharacterCodes.hash: + case CharacterCodes.percent: + case CharacterCodes.asterisk: + case CharacterCodes.plus: + case CharacterCodes.comma: + case CharacterCodes.dot: + case CharacterCodes.colon: + case CharacterCodes.semicolon: + case CharacterCodes.lessThan: + case CharacterCodes.equals: + case CharacterCodes.greaterThan: + case CharacterCodes.question: + case CharacterCodes.at: + case CharacterCodes.backtick: + case CharacterCodes.tilde: + error(Diagnostics.A_character_class_must_not_contain_a_reserved_double_punctuator_Did_you_mean_to_escape_it_with_backslash, pos, 2); + pos += 2; + return text.substring(pos - 2, pos); } } + switch (ch) { + case CharacterCodes.slash: + case CharacterCodes.openParen: + case CharacterCodes.closeParen: + case CharacterCodes.openBracket: + case CharacterCodes.closeBracket: + case CharacterCodes.openBrace: + case CharacterCodes.closeBrace: + case CharacterCodes.minus: + case CharacterCodes.bar: + error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); + pos++; + return String.fromCharCode(ch); + } + return scanSourceCharacter(); + } - // CharacterClassEscape ::= - // | 'd' | 'D' | 's' | 'S' | 'w' | 'W' - // | [+UnicodeMode] ('P' | 'p') '{' UnicodePropertyValueExpression '}' - function scanCharacterClassEscape(): boolean { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash); - let isCharacterComplement = false; - const start = pos - 1; + // ClassAtom ::= + // | SourceCharacter but not one of '\' or ']' + // | '\' ClassEscape + // ClassEscape ::= + // | 'b' + // | '-' + // | CharacterClassEscape + // | CharacterEscape + function scanClassAtom(): string { + if (text.charCodeAt(pos) === CharacterCodes.backslash) { + pos++; const ch = text.charCodeAt(pos); switch (ch) { - case CharacterCodes.d: - case CharacterCodes.D: - case CharacterCodes.s: - case CharacterCodes.S: - case CharacterCodes.w: - case CharacterCodes.W: + case CharacterCodes.b: pos++; - return true; - case CharacterCodes.P: - isCharacterComplement = true; - // falls through - case CharacterCodes.p: + return "\b"; + case CharacterCodes.minus: pos++; - if (text.charCodeAt(pos) === CharacterCodes.openBrace) { - pos++; - const propertyNameOrValueStart = pos; - const propertyNameOrValue = scanWordCharacters(); - if (text.charCodeAt(pos) === CharacterCodes.equals) { - const propertyName = nonBinaryUnicodeProperties.get(propertyNameOrValue); - if (pos === propertyNameOrValueStart) { - error(Diagnostics.Expected_a_Unicode_property_name); - } - else if (propertyName === undefined) { - error(Diagnostics.Unknown_Unicode_property_name, propertyNameOrValueStart, pos - propertyNameOrValueStart); - const suggestion = getSpellingSuggestion(propertyNameOrValue, nonBinaryUnicodeProperties.keys(), identity); - if (suggestion) { - error(Diagnostics.Did_you_mean_0, propertyNameOrValueStart, pos - propertyNameOrValueStart, suggestion); - } - } - pos++; - const propertyValueStart = pos; - const propertyValue = scanWordCharacters(); - if (pos === propertyValueStart) { - error(Diagnostics.Expected_a_Unicode_property_value); + return String.fromCharCode(ch); + default: + if (scanCharacterClassEscape()) { + return ""; + } + return scanCharacterEscape(/*atomEscape*/ false); + } + } + else { + return scanSourceCharacter(); + } + } + + // CharacterClassEscape ::= + // | 'd' | 'D' | 's' | 'S' | 'w' | 'W' + // | [+UnicodeMode] ('P' | 'p') '{' UnicodePropertyValueExpression '}' + function scanCharacterClassEscape(): boolean { + Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash); + let isCharacterComplement = false; + const start = pos - 1; + const ch = text.charCodeAt(pos); + switch (ch) { + case CharacterCodes.d: + case CharacterCodes.D: + case CharacterCodes.s: + case CharacterCodes.S: + case CharacterCodes.w: + case CharacterCodes.W: + pos++; + return true; + case CharacterCodes.P: + isCharacterComplement = true; + // falls through + case CharacterCodes.p: + pos++; + if (text.charCodeAt(pos) === CharacterCodes.openBrace) { + pos++; + const propertyNameOrValueStart = pos; + const propertyNameOrValue = scanWordCharacters(); + if (text.charCodeAt(pos) === CharacterCodes.equals) { + const propertyName = nonBinaryUnicodeProperties.get(propertyNameOrValue); + if (pos === propertyNameOrValueStart) { + error(Diagnostics.Expected_a_Unicode_property_name); + } + else if (propertyName === undefined) { + error(Diagnostics.Unknown_Unicode_property_name, propertyNameOrValueStart, pos - propertyNameOrValueStart); + const suggestion = getSpellingSuggestion(propertyNameOrValue, nonBinaryUnicodeProperties.keys(), identity); + if (suggestion) { + error(Diagnostics.Did_you_mean_0, propertyNameOrValueStart, pos - propertyNameOrValueStart, suggestion); } - else if (propertyName !== undefined && !valuesOfNonBinaryUnicodeProperties[propertyName].has(propertyValue)) { - error(Diagnostics.Unknown_Unicode_property_value, propertyValueStart, pos - propertyValueStart); - const suggestion = getSpellingSuggestion(propertyValue, valuesOfNonBinaryUnicodeProperties[propertyName], identity); - if (suggestion) { - error(Diagnostics.Did_you_mean_0, propertyValueStart, pos - propertyValueStart, suggestion); - } + } + pos++; + const propertyValueStart = pos; + const propertyValue = scanWordCharacters(); + if (pos === propertyValueStart) { + error(Diagnostics.Expected_a_Unicode_property_value); + } + else if (propertyName !== undefined && !valuesOfNonBinaryUnicodeProperties[propertyName].has(propertyValue)) { + error(Diagnostics.Unknown_Unicode_property_value, propertyValueStart, pos - propertyValueStart); + const suggestion = getSpellingSuggestion(propertyValue, valuesOfNonBinaryUnicodeProperties[propertyName], identity); + if (suggestion) { + error(Diagnostics.Did_you_mean_0, propertyValueStart, pos - propertyValueStart, suggestion); } } - else { - if (pos === propertyNameOrValueStart) { - error(Diagnostics.Expected_a_Unicode_property_name_or_value); + } + else { + if (pos === propertyNameOrValueStart) { + error(Diagnostics.Expected_a_Unicode_property_name_or_value); + } + else if (binaryUnicodePropertiesOfStrings.has(propertyNameOrValue)) { + if (!unicodeSetsMode) { + error(Diagnostics.Any_Unicode_property_that_would_possibly_match_more_than_a_single_character_is_only_available_when_the_Unicode_Sets_v_flag_is_set, propertyNameOrValueStart, pos - propertyNameOrValueStart); } - else if (binaryUnicodePropertiesOfStrings.has(propertyNameOrValue)) { - if (!unicodeSetsMode) { - error(Diagnostics.Any_Unicode_property_that_would_possibly_match_more_than_a_single_character_is_only_available_when_the_Unicode_Sets_v_flag_is_set, propertyNameOrValueStart, pos - propertyNameOrValueStart); - } - else if (isCharacterComplement) { - error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, propertyNameOrValueStart, pos - propertyNameOrValueStart); - } - else { - mayContainStrings = true; - } + else if (isCharacterComplement) { + error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, propertyNameOrValueStart, pos - propertyNameOrValueStart); } - else if (!valuesOfNonBinaryUnicodeProperties.General_Category.has(propertyNameOrValue) && !binaryUnicodeProperties.has(propertyNameOrValue)) { - error(Diagnostics.Unknown_Unicode_property_name_or_value, propertyNameOrValueStart, pos - propertyNameOrValueStart); - const suggestion = getSpellingSuggestion(propertyNameOrValue, [...valuesOfNonBinaryUnicodeProperties.General_Category, ...binaryUnicodeProperties, ...binaryUnicodePropertiesOfStrings], identity); - if (suggestion) { - error(Diagnostics.Did_you_mean_0, propertyNameOrValueStart, pos - propertyNameOrValueStart, suggestion); - } + else { + mayContainStrings = true; } } - scanExpectedChar(CharacterCodes.closeBrace); - if (!unicodeMode) { - error(Diagnostics.Unicode_property_value_expressions_are_only_available_when_the_Unicode_u_flag_or_the_Unicode_Sets_v_flag_is_set, start, pos - start); + else if (!valuesOfNonBinaryUnicodeProperties.General_Category.has(propertyNameOrValue) && !binaryUnicodeProperties.has(propertyNameOrValue)) { + error(Diagnostics.Unknown_Unicode_property_name_or_value, propertyNameOrValueStart, pos - propertyNameOrValueStart); + const suggestion = getSpellingSuggestion(propertyNameOrValue, [...valuesOfNonBinaryUnicodeProperties.General_Category, ...binaryUnicodeProperties, ...binaryUnicodePropertiesOfStrings], identity); + if (suggestion) { + error(Diagnostics.Did_you_mean_0, propertyNameOrValueStart, pos - propertyNameOrValueStart, suggestion); + } } } - else if (unicodeMode) { - error(Diagnostics._0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces, pos - 2, 2, String.fromCharCode(ch)); + scanExpectedChar(CharacterCodes.closeBrace); + if (!unicodeMode) { + error(Diagnostics.Unicode_property_value_expressions_are_only_available_when_the_Unicode_u_flag_or_the_Unicode_Sets_v_flag_is_set, start, pos - start); } - return true; - } - return false; + } + else if (unicodeMode) { + error(Diagnostics._0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces, pos - 2, 2, String.fromCharCode(ch)); + } + return true; } + return false; + } - function scanWordCharacters(): string { - let value = ""; - while (pos < end) { - const ch = text.charCodeAt(pos); - if (!isWordCharacter(ch)) { - break; - } - value += String.fromCharCode(ch); - pos++; + function scanWordCharacters(): string { + let value = ""; + while (pos < end) { + const ch = text.charCodeAt(pos); + if (!isWordCharacter(ch)) { + break; } - return value; + value += String.fromCharCode(ch); + pos++; } + return value; + } - function scanSourceCharacter(): string { - const size = unicodeMode ? charSize(codePointAt(text, pos)) : 1; - pos += size; - return text.substring(pos - size, pos); - } + function scanSourceCharacter(): string { + const size = unicodeMode ? charSize(codePointAt(text, pos)) : 1; + pos += size; + return text.substring(pos - size, pos); + } - function scanExpectedChar(ch: CharacterCodes) { - if (text.charCodeAt(pos) === ch) { - pos++; - } - else { - error(Diagnostics._0_expected, pos, 0, String.fromCharCode(ch)); - } + function scanExpectedChar(ch: CharacterCodes) { + if (text.charCodeAt(pos) === ch) { + pos++; + } + else { + error(Diagnostics._0_expected, pos, 0, String.fromCharCode(ch)); } + } - scanDisjunction(/*isInGroup*/ false); + scanDisjunction(/*isInGroup*/ false); - forEach(groupNameReferences, reference => { - if (!groupSpecifiers.has(reference.name)) { - error(Diagnostics.There_is_no_capturing_group_named_0_in_this_regular_expression, reference.pos, reference.end - reference.pos, reference.name); + forEach(groupNameReferences, reference => { + if (!groupSpecifiers.has(reference.name)) { + error(Diagnostics.There_is_no_capturing_group_named_0_in_this_regular_expression, reference.pos, reference.end - reference.pos, reference.name); + } + }); + forEach(decimalEscapes, escape => { + // in AnnexB, if a DecimalEscape is greater than the number of capturing groups then it is treated as + // either a LegacyOctalEscapeSequence or IdentityEscape + if (!annexB && escape.value > numberOfCapturingGroups) { + if (numberOfCapturingGroups) { + error(Diagnostics.This_backreference_refers_to_a_group_that_does_not_exist_There_are_only_0_capturing_groups_in_this_regular_expression, escape.pos, escape.end - escape.pos, numberOfCapturingGroups); } - }); - forEach(decimalEscapes, escape => { - // in AnnexB, if a DecimalEscape is greater than the number of capturing groups then it is treated as - // either a LegacyOctalEscapeSequence or IdentityEscape - if (!annexB && escape.value > numberOfCapturingGroups) { - if (numberOfCapturingGroups) { - error(Diagnostics.This_backreference_refers_to_a_group_that_does_not_exist_There_are_only_0_capturing_groups_in_this_regular_expression, escape.pos, escape.end - escape.pos, numberOfCapturingGroups); - } - else { - error(Diagnostics.This_backreference_is_invalid_because_the_containing_regular_expression_contains_no_capturing_groups, escape.pos, escape.end - escape.pos); - } + else { + error(Diagnostics.This_backreference_is_invalid_because_the_containing_regular_expression_contains_no_capturing_groups, escape.pos, escape.end - escape.pos); } - }); - } - - function checkRegularExpressionFlagAvailable(flag: RegularExpressionFlags, pos: number) { - const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag) as ScriptTarget | undefined; - if (availableFrom && languageVersion < availableFrom) { - error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, 1, getNameOfScriptTarget(availableFrom)); } + }); + } + + function checkRegularExpressionFlagAvailable(flag: RegularExpressionFlags, pos: number) { + const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag) as ScriptTarget | undefined; + if (availableFrom && languageVersion < availableFrom) { + error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, 1, getNameOfScriptTarget(availableFrom)); } } From 5333c479a9c7c924cee3982cef3b72b60c9143b5 Mon Sep 17 00:00:00 2001 From: Ron Buckton Date: Fri, 26 Apr 2024 16:31:06 -0400 Subject: [PATCH 2/8] Fix spelling --- src/compiler/diagnosticMessages.json | 2 +- src/compiler/scanner.ts | 36 +++++++++---------- ...pressionScanning(target=es2015).errors.txt | 20 +++++------ ...rExpressionScanning(target=es5).errors.txt | 20 +++++------ ...pressionScanning(target=esnext).errors.txt | 20 +++++------ 5 files changed, 49 insertions(+), 49 deletions(-) diff --git a/src/compiler/diagnosticMessages.json b/src/compiler/diagnosticMessages.json index 058dc8c8c23c5..859cc627a1689 100644 --- a/src/compiler/diagnosticMessages.json +++ b/src/compiler/diagnosticMessages.json @@ -1733,7 +1733,7 @@ "category": "Error", "code": 1519 }, - "Expected a class set oprand.": { + "Expected a class set operand.": { "category": "Error", "code": 1520 }, diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 2937aa343b18d..a254b3bc0c114 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -2946,15 +2946,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return; } let start = pos; - let oprand!: string; + let operand!: string; switch (text.slice(pos, pos + 2)) { case "--": case "&&": - error(Diagnostics.Expected_a_class_set_oprand); + error(Diagnostics.Expected_a_class_set_operand); mayContainStrings = false; break; default: - oprand = scanClassSetOprand(); + operand = scanClassSetOperand(); break; } switch (text.charCodeAt(pos)) { @@ -3004,31 +3004,31 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); start = pos - 2; - oprand = text.slice(start, pos); + operand = text.slice(start, pos); continue; } else { - if (!oprand) { + if (!operand) { error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, start, pos - 1 - start); } const secondStart = pos; - const secondOprand = scanClassSetOprand(); + const secondOperand = scanClassSetOperand(); if (isCharacterComplement && mayContainStrings) { error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, secondStart, pos - secondStart); } expressionMayContainStrings ||= mayContainStrings; - if (!secondOprand) { + if (!secondOperand) { error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, secondStart, pos - secondStart); break; } - if (!oprand) { + if (!operand) { break; } - const minCharacterValue = codePointAt(oprand, 0); - const maxCharacterValue = codePointAt(secondOprand, 0); + const minCharacterValue = codePointAt(operand, 0); + const maxCharacterValue = codePointAt(secondOperand, 0); if ( - oprand.length === charSize(minCharacterValue) && - secondOprand.length === charSize(maxCharacterValue) && + operand.length === charSize(minCharacterValue) && + secondOperand.length === charSize(maxCharacterValue) && minCharacterValue > maxCharacterValue ) { error(Diagnostics.Range_out_of_order_in_character_class, start, pos - start); @@ -3049,7 +3049,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean else { error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos - 1, 1, String.fromCharCode(ch)); } - oprand = text.slice(start, pos); + operand = text.slice(start, pos); continue; } if (isClassContentExit(text.charCodeAt(pos))) { @@ -3061,10 +3061,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean case "&&": error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos, 2); pos += 2; - oprand = text.slice(start, pos); + operand = text.slice(start, pos); break; default: - oprand = scanClassSetOprand(); + operand = scanClassSetOperand(); break; } } @@ -3123,10 +3123,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } ch = text.charCodeAt(pos); if (isClassContentExit(ch)) { - error(Diagnostics.Expected_a_class_set_oprand); + error(Diagnostics.Expected_a_class_set_operand); break; } - scanClassSetOprand(); + scanClassSetOperand(); // Used only if expressionType is Intersection expressionMayContainStrings &&= mayContainStrings; } @@ -3138,7 +3138,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // | '\' CharacterClassEscape // | '\q{' ClassStringDisjunctionContents '}' // | ClassSetCharacter - function scanClassSetOprand(): string { + function scanClassSetOperand(): string { mayContainStrings = false; switch (text.charCodeAt(pos)) { case CharacterCodes.openBracket: diff --git a/tests/baselines/reference/regularExpressionScanning(target=es2015).errors.txt b/tests/baselines/reference/regularExpressionScanning(target=es2015).errors.txt index c2a61dc5f1dcc..7cf62fcaba4a2 100644 --- a/tests/baselines/reference/regularExpressionScanning(target=es2015).errors.txt +++ b/tests/baselines/reference/regularExpressionScanning(target=es2015).errors.txt @@ -157,8 +157,8 @@ regularExpressionScanning.ts(37,61): error TS1508: Unexpected '}'. Did you mean regularExpressionScanning.ts(37,63): error TS1517: Range out of order in character class. regularExpressionScanning.ts(37,76): error TS1535: This character cannot be escaped in a regular expression. regularExpressionScanning.ts(38,8): error TS1005: '--' expected. -regularExpressionScanning.ts(38,9): error TS1520: Expected a class set oprand. -regularExpressionScanning.ts(38,11): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,9): error TS1520: Expected a class set operand. +regularExpressionScanning.ts(38,11): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,12): error TS1005: '--' expected. regularExpressionScanning.ts(38,15): error TS1522: A character class must not contain a reserved double punctuator. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,20): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. @@ -166,15 +166,15 @@ regularExpressionScanning.ts(38,28): error TS1519: Operators must not be mixed w regularExpressionScanning.ts(38,40): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. regularExpressionScanning.ts(38,47): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. regularExpressionScanning.ts(38,49): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. -regularExpressionScanning.ts(38,50): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,50): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,55): error TS1511: '\q' is only available inside character class. regularExpressionScanning.ts(38,57): error TS1508: Unexpected '{'. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,61): error TS1508: Unexpected '}'. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,66): error TS1508: Unexpected '-'. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,67): error TS1005: '--' expected. -regularExpressionScanning.ts(38,70): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,70): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,75): error TS1508: Unexpected '&'. Did you mean to escape it with backslash? -regularExpressionScanning.ts(38,85): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,85): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,87): error TS1501: This regular expression flag is only available when targeting 'esnext' or later. regularExpressionScanning.ts(39,56): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. regularExpressionScanning.ts(39,67): error TS1005: '&&' expected. @@ -561,9 +561,9 @@ regularExpressionScanning.ts(47,101): error TS1501: This regular expression flag !!! error TS1005: '--' expected. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. !!! error TS1005: '--' expected. ~~ @@ -579,7 +579,7 @@ regularExpressionScanning.ts(47,101): error TS1501: This regular expression flag ~ !!! error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. ~~ !!! error TS1511: '\q' is only available inside character class. ~ @@ -591,11 +591,11 @@ regularExpressionScanning.ts(47,101): error TS1501: This regular expression flag !!! error TS1005: '--' expected. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. ~ !!! error TS1508: Unexpected '&'. Did you mean to escape it with backslash? -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. ~ !!! error TS1501: This regular expression flag is only available when targeting 'esnext' or later. /[[^\P{Decimal_Number}&&[0-9]]&&\p{L}&&\p{ID_Continue}--\p{ASCII}\p{CWCF}]/v, diff --git a/tests/baselines/reference/regularExpressionScanning(target=es5).errors.txt b/tests/baselines/reference/regularExpressionScanning(target=es5).errors.txt index 83b45f8443492..1db06a65a8c1e 100644 --- a/tests/baselines/reference/regularExpressionScanning(target=es5).errors.txt +++ b/tests/baselines/reference/regularExpressionScanning(target=es5).errors.txt @@ -164,8 +164,8 @@ regularExpressionScanning.ts(37,63): error TS1517: Range out of order in charact regularExpressionScanning.ts(37,76): error TS1535: This character cannot be escaped in a regular expression. regularExpressionScanning.ts(37,87): error TS1501: This regular expression flag is only available when targeting 'es6' or later. regularExpressionScanning.ts(38,8): error TS1005: '--' expected. -regularExpressionScanning.ts(38,9): error TS1520: Expected a class set oprand. -regularExpressionScanning.ts(38,11): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,9): error TS1520: Expected a class set operand. +regularExpressionScanning.ts(38,11): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,12): error TS1005: '--' expected. regularExpressionScanning.ts(38,15): error TS1522: A character class must not contain a reserved double punctuator. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,20): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. @@ -173,15 +173,15 @@ regularExpressionScanning.ts(38,28): error TS1519: Operators must not be mixed w regularExpressionScanning.ts(38,40): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. regularExpressionScanning.ts(38,47): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. regularExpressionScanning.ts(38,49): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. -regularExpressionScanning.ts(38,50): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,50): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,55): error TS1511: '\q' is only available inside character class. regularExpressionScanning.ts(38,57): error TS1508: Unexpected '{'. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,61): error TS1508: Unexpected '}'. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,66): error TS1508: Unexpected '-'. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,67): error TS1005: '--' expected. -regularExpressionScanning.ts(38,70): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,70): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,75): error TS1508: Unexpected '&'. Did you mean to escape it with backslash? -regularExpressionScanning.ts(38,85): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,85): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,87): error TS1501: This regular expression flag is only available when targeting 'esnext' or later. regularExpressionScanning.ts(39,56): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. regularExpressionScanning.ts(39,67): error TS1005: '&&' expected. @@ -582,9 +582,9 @@ regularExpressionScanning.ts(47,101): error TS1501: This regular expression flag !!! error TS1005: '--' expected. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. !!! error TS1005: '--' expected. ~~ @@ -600,7 +600,7 @@ regularExpressionScanning.ts(47,101): error TS1501: This regular expression flag ~ !!! error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. ~~ !!! error TS1511: '\q' is only available inside character class. ~ @@ -612,11 +612,11 @@ regularExpressionScanning.ts(47,101): error TS1501: This regular expression flag !!! error TS1005: '--' expected. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. ~ !!! error TS1508: Unexpected '&'. Did you mean to escape it with backslash? -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. ~ !!! error TS1501: This regular expression flag is only available when targeting 'esnext' or later. /[[^\P{Decimal_Number}&&[0-9]]&&\p{L}&&\p{ID_Continue}--\p{ASCII}\p{CWCF}]/v, diff --git a/tests/baselines/reference/regularExpressionScanning(target=esnext).errors.txt b/tests/baselines/reference/regularExpressionScanning(target=esnext).errors.txt index 3fd6f79843695..9b641966e016a 100644 --- a/tests/baselines/reference/regularExpressionScanning(target=esnext).errors.txt +++ b/tests/baselines/reference/regularExpressionScanning(target=esnext).errors.txt @@ -141,8 +141,8 @@ regularExpressionScanning.ts(37,61): error TS1508: Unexpected '}'. Did you mean regularExpressionScanning.ts(37,63): error TS1517: Range out of order in character class. regularExpressionScanning.ts(37,76): error TS1535: This character cannot be escaped in a regular expression. regularExpressionScanning.ts(38,8): error TS1005: '--' expected. -regularExpressionScanning.ts(38,9): error TS1520: Expected a class set oprand. -regularExpressionScanning.ts(38,11): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,9): error TS1520: Expected a class set operand. +regularExpressionScanning.ts(38,11): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,12): error TS1005: '--' expected. regularExpressionScanning.ts(38,15): error TS1522: A character class must not contain a reserved double punctuator. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,20): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. @@ -150,15 +150,15 @@ regularExpressionScanning.ts(38,28): error TS1519: Operators must not be mixed w regularExpressionScanning.ts(38,40): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. regularExpressionScanning.ts(38,47): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. regularExpressionScanning.ts(38,49): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. -regularExpressionScanning.ts(38,50): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,50): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,55): error TS1511: '\q' is only available inside character class. regularExpressionScanning.ts(38,57): error TS1508: Unexpected '{'. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,61): error TS1508: Unexpected '}'. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,66): error TS1508: Unexpected '-'. Did you mean to escape it with backslash? regularExpressionScanning.ts(38,67): error TS1005: '--' expected. -regularExpressionScanning.ts(38,70): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,70): error TS1520: Expected a class set operand. regularExpressionScanning.ts(38,75): error TS1508: Unexpected '&'. Did you mean to escape it with backslash? -regularExpressionScanning.ts(38,85): error TS1520: Expected a class set oprand. +regularExpressionScanning.ts(38,85): error TS1520: Expected a class set operand. regularExpressionScanning.ts(39,56): error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. regularExpressionScanning.ts(39,67): error TS1005: '&&' expected. regularExpressionScanning.ts(41,5): error TS1518: Anything that would possibly match more than a single character is invalid inside a negated character class. @@ -503,9 +503,9 @@ regularExpressionScanning.ts(47,89): error TS1518: Anything that would possibly !!! error TS1005: '--' expected. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. !!! error TS1005: '--' expected. ~~ @@ -521,7 +521,7 @@ regularExpressionScanning.ts(47,89): error TS1518: Anything that would possibly ~ !!! error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. ~~ !!! error TS1511: '\q' is only available inside character class. ~ @@ -533,11 +533,11 @@ regularExpressionScanning.ts(47,89): error TS1518: Anything that would possibly !!! error TS1005: '--' expected. -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. ~ !!! error TS1508: Unexpected '&'. Did you mean to escape it with backslash? -!!! error TS1520: Expected a class set oprand. +!!! error TS1520: Expected a class set operand. /[[^\P{Decimal_Number}&&[0-9]]&&\p{L}&&\p{ID_Continue}--\p{ASCII}\p{CWCF}]/v, ~~ !!! error TS1519: Operators must not be mixed within a character class. Wrap it in a nested class instead. From 2af307eae78d7e4985ebced3052fbb5faf89db71 Mon Sep 17 00:00:00 2001 From: Ron Buckton Date: Fri, 26 Apr 2024 16:46:30 -0400 Subject: [PATCH 3/8] Reduce array/set allocation count --- src/compiler/scanner.ts | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index a254b3bc0c114..ed6885ea480c3 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -21,7 +21,6 @@ import { KeywordSyntaxKind, LanguageFeatureMinimumTarget, LanguageVariant, - last, LineAndCharacter, MapLike, parsePseudoBigInt, @@ -2485,10 +2484,14 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } function scanRegularExpressionWorker(text: string, end: number, regExpFlags: RegularExpressionFlags, isUnterminated: boolean, annexB: boolean) { + // Why var? It avoids TDZ checks in the runtime which can be costly. + // See: https://github.com/microsoft/TypeScript/issues/52924 + /* eslint-disable no-var */ + /** Grammar parameter */ - const unicodeSetsMode = !!(regExpFlags & RegularExpressionFlags.UnicodeSets); + var unicodeSetsMode = !!(regExpFlags & RegularExpressionFlags.UnicodeSets); /** Grammar parameter */ - const unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode); + var unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode); if (unicodeMode) { // Annex B treats any unicode mode as the strict syntax. @@ -2496,25 +2499,27 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } /** @see {scanClassSetExpression} */ - let mayContainStrings = false; + var mayContainStrings = false; /** The number of numeric (anonymous) capturing groups defined in the regex. */ - let numberOfCapturingGroups = 0; + var numberOfCapturingGroups = 0; /** All named capturing groups defined in the regex. */ - const groupSpecifiers = new Set(); + var groupSpecifiers: Set | undefined; /** All references to named capturing groups in the regex. */ - const groupNameReferences: (TextRange & { name: string; })[] = []; + var groupNameReferences: (TextRange & { name: string; })[] | undefined; /** All numeric backreferences within the regex. */ - const decimalEscapes: (TextRange & { value: number; })[] = []; + var decimalEscapes: (TextRange & { value: number; })[] | undefined; /** A stack of scopes for named capturing groups. @see {scanGroupName} */ - const namedCapturingGroups: Set[] = []; + var namedCapturingGroupsScopeStack: (Set | undefined)[] = []; + var topNamedCapturingGroupsScope: Set | undefined; // Disjunction ::= Alternative ('|' Alternative)* function scanDisjunction(isInGroup: boolean) { while (true) { - namedCapturingGroups.push(new Set()); + namedCapturingGroupsScopeStack.push(topNamedCapturingGroupsScope); + topNamedCapturingGroupsScope = undefined; scanAlternative(isInGroup); - namedCapturingGroups.pop(); + topNamedCapturingGroupsScope = namedCapturingGroupsScopeStack.pop(); if (text.charCodeAt(pos) !== CharacterCodes.bar) { return; } @@ -2786,7 +2791,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean if (ch >= CharacterCodes._1 && ch <= CharacterCodes._9) { const start = pos; scanDigits(); - decimalEscapes.push({ pos: start, end: pos, value: +tokenValue }); + decimalEscapes = append(decimalEscapes, { pos: start, end: pos, value: +tokenValue }); return true; } return false; @@ -2858,13 +2863,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean error(Diagnostics.Expected_a_capturing_group_name); } else if (isReference) { - groupNameReferences.push({ pos: tokenStart, end: pos, name: tokenValue }); + groupNameReferences = append(groupNameReferences, { pos: tokenStart, end: pos, name: tokenValue }); } - else if (namedCapturingGroups.some(group => group.has(tokenValue))) { + else if (topNamedCapturingGroupsScope?.has(tokenValue) || namedCapturingGroupsScopeStack.some(group => group?.has(tokenValue))) { error(Diagnostics.Named_capturing_groups_with_the_same_name_must_be_mutually_exclusive_to_each_other, tokenStart, pos - tokenStart); } else { - last(namedCapturingGroups).add(tokenValue); + topNamedCapturingGroupsScope ??= new Set(); + topNamedCapturingGroupsScope.add(tokenValue); + groupSpecifiers ??= new Set(); groupSpecifiers.add(tokenValue); } } @@ -3422,7 +3429,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean scanDisjunction(/*isInGroup*/ false); forEach(groupNameReferences, reference => { - if (!groupSpecifiers.has(reference.name)) { + if (!groupSpecifiers?.has(reference.name)) { error(Diagnostics.There_is_no_capturing_group_named_0_in_this_regular_expression, reference.pos, reference.end - reference.pos, reference.name); } }); From 673d843985a62e438e00c2f2a8fd6485371cb6dd Mon Sep 17 00:00:00 2001 From: Ron Buckton Date: Fri, 26 Apr 2024 16:59:40 -0400 Subject: [PATCH 4/8] Use existing text/end from closure --- src/compiler/scanner.ts | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index ed6885ea480c3..85d9fcdf439b4 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -2463,19 +2463,19 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } p++; } + pos = p; if (reportErrors) { - pos = tokenStart + 1; - const saveTokenPos = tokenStart; + const saveTokenStart = tokenStart; const saveTokenFlags = tokenFlags; - scanRegularExpressionWorker(text, endOfBody, regExpFlags, isUnterminated, /*annexB*/ true); - if (!isUnterminated) { - pos = p; - } - tokenStart = saveTokenPos; + const savePos = pos; + const saveEnd = end; + pos = tokenStart + 1; + end = endOfBody; + scanRegularExpressionWorker(regExpFlags, isUnterminated, /*annexB*/ true); + tokenStart = saveTokenStart; tokenFlags = saveTokenFlags; - } - else { - pos = p; + pos = savePos; + end = saveEnd; } tokenValue = text.substring(tokenStart, pos); token = SyntaxKind.RegularExpressionLiteral; @@ -2483,7 +2483,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return token; } - function scanRegularExpressionWorker(text: string, end: number, regExpFlags: RegularExpressionFlags, isUnterminated: boolean, annexB: boolean) { + function scanRegularExpressionWorker(regExpFlags: RegularExpressionFlags, isUnterminated: boolean, annexB: boolean) { // Why var? It avoids TDZ checks in the runtime which can be costly. // See: https://github.com/microsoft/TypeScript/issues/52924 /* eslint-disable no-var */ From 83379bea4208d019243c4c96a2effd63cf729eb7 Mon Sep 17 00:00:00 2001 From: Ron Buckton Date: Fri, 26 Apr 2024 18:10:52 -0400 Subject: [PATCH 5/8] Make unchecked reads of 'text' explicit --- src/compiler/scanner.ts | 352 ++++++++++++++++++++++------------------ src/compiler/types.ts | 1 + 2 files changed, 194 insertions(+), 159 deletions(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 85d9fcdf439b4..0415871062c6a 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -1100,6 +1100,40 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return scanner; + /** + * Returns the code point for the character at the given position within `text`. This + * should only be used when pos is guaranteed to be within the bounds of `text` as this + * function does not perform bounds checks. + */ + function codePointUnchecked(pos: number) { + return codePointAt(text, pos); + } + + // /** + // * Returns the code point for the character at the given position within `text`. If + // * `pos` is outside the bounds set for `text`, `CharacterCodes.EOF` is returned instead. + // */ + // function codePointChecked(pos: number) { + // return pos >= 0 && pos < end ? codePointUnchecked(pos) : CharacterCodes.EOF; + // } + + /** + * Returns the char code for the character at the given position within `text`. This + * should only be used when pos is guaranteed to be within the bounds of `text` as this + * function does not perform bounds checks. + */ + function charCodeUnchecked(pos: number) { + return text.charCodeAt(pos); + } + + // /** + // * Returns the char code for the character at the given position within `text`. If + // * `pos` is outside the bounds set for `text`, `CharacterCodes.EOF` is returned instead. + // */ + // function charCodeChecked(pos: number) { + // return pos >= 0 && pos < end ? charCodeUnchecked(pos) : CharacterCodes.EOF; + // } + function error(message: DiagnosticMessage): void; function error(message: DiagnosticMessage, errPos: number, length: number, arg0?: any): void; function error(message: DiagnosticMessage, errPos: number = pos, length?: number, arg0?: any): void { @@ -1117,7 +1151,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean let isPreviousTokenSeparator = false; let result = ""; while (true) { - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); if (ch === CharacterCodes._) { tokenFlags |= TokenFlags.ContainsSeparator; if (allowSeparator) { @@ -1146,7 +1180,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } break; } - if (text.charCodeAt(pos - 1) === CharacterCodes._) { + if (charCodeUnchecked(pos - 1) === CharacterCodes._) { tokenFlags |= TokenFlags.ContainsInvalidSeparator; error(Diagnostics.Numeric_separators_are_not_allowed_here, pos - 1, 1); } @@ -1176,9 +1210,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanNumber(): SyntaxKind { let start = pos; let mainFragment: string; - if (text.charCodeAt(pos) === CharacterCodes._0) { + if (charCodeUnchecked(pos) === CharacterCodes._0) { pos++; - if (text.charCodeAt(pos) === CharacterCodes._) { + if (charCodeUnchecked(pos) === CharacterCodes._) { tokenFlags |= TokenFlags.ContainsSeparator | TokenFlags.ContainsInvalidSeparator; error(Diagnostics.Numeric_separators_are_not_allowed_here, pos, 1); // treat it as a normal number literal @@ -1212,15 +1246,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } let decimalFragment: string | undefined; let scientificFragment: string | undefined; - if (text.charCodeAt(pos) === CharacterCodes.dot) { + if (charCodeUnchecked(pos) === CharacterCodes.dot) { pos++; decimalFragment = scanNumberFragment(); } let end = pos; - if (text.charCodeAt(pos) === CharacterCodes.E || text.charCodeAt(pos) === CharacterCodes.e) { + if (charCodeUnchecked(pos) === CharacterCodes.E || charCodeUnchecked(pos) === CharacterCodes.e) { pos++; tokenFlags |= TokenFlags.Scientific; - if (text.charCodeAt(pos) === CharacterCodes.plus || text.charCodeAt(pos) === CharacterCodes.minus) pos++; + if (charCodeUnchecked(pos) === CharacterCodes.plus || charCodeUnchecked(pos) === CharacterCodes.minus) pos++; const preNumericPart = pos; const finalFragment = scanNumberFragment(); if (!finalFragment) { @@ -1267,7 +1301,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } function checkForIdentifierStartAfterNumericLiteral(numericStart: number, isScientific?: boolean) { - if (!isIdentifierStart(codePointAt(text, pos), languageVersion)) { + if (!isIdentifierStart(codePointUnchecked(pos), languageVersion)) { return; } @@ -1291,8 +1325,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanDigits(): boolean { const start = pos; let isOctal = true; - while (isDigit(text.charCodeAt(pos))) { - if (!isOctalDigit(text.charCodeAt(pos))) { + while (isDigit(charCodeUnchecked(pos))) { + if (!isOctalDigit(charCodeUnchecked(pos))) { isOctal = false; } pos++; @@ -1323,7 +1357,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean let allowSeparator = false; let isPreviousTokenSeparator = false; while (valueChars.length < minCount || scanAsManyAsPossible) { - let ch = text.charCodeAt(pos); + let ch = charCodeUnchecked(pos); if (canHaveSeparators && ch === CharacterCodes._) { tokenFlags |= TokenFlags.ContainsSeparator; if (allowSeparator) { @@ -1356,14 +1390,14 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean if (valueChars.length < minCount) { valueChars = []; } - if (text.charCodeAt(pos - 1) === CharacterCodes._) { + if (charCodeUnchecked(pos - 1) === CharacterCodes._) { error(Diagnostics.Numeric_separators_are_not_allowed_here, pos - 1, 1); } return String.fromCharCode(...valueChars); } function scanString(jsxAttributeString = false): string { - const quote = text.charCodeAt(pos); + const quote = charCodeUnchecked(pos); pos++; let result = ""; let start = pos; @@ -1374,7 +1408,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean error(Diagnostics.Unterminated_string_literal); break; } - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); if (ch === quote) { result += text.substring(start, pos); pos++; @@ -1403,7 +1437,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean * a literal component of a TemplateExpression. */ function scanTemplateAndSetTokenValue(shouldEmitInvalidEscapeError: boolean): SyntaxKind { - const startedWithBacktick = text.charCodeAt(pos) === CharacterCodes.backtick; + const startedWithBacktick = charCodeUnchecked(pos) === CharacterCodes.backtick; pos++; let start = pos; @@ -1419,7 +1453,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean break; } - const currChar = text.charCodeAt(pos); + const currChar = charCodeUnchecked(pos); // '`' if (currChar === CharacterCodes.backtick) { @@ -1430,7 +1464,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } // '${' - if (currChar === CharacterCodes.$ && pos + 1 < end && text.charCodeAt(pos + 1) === CharacterCodes.openBrace) { + if (currChar === CharacterCodes.$ && pos + 1 < end && charCodeUnchecked(pos + 1) === CharacterCodes.openBrace) { contents += text.substring(start, pos); pos += 2; resultingToken = startedWithBacktick ? SyntaxKind.TemplateHead : SyntaxKind.TemplateMiddle; @@ -1451,7 +1485,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean contents += text.substring(start, pos); pos++; - if (pos < end && text.charCodeAt(pos) === CharacterCodes.lineFeed) { + if (pos < end && charCodeUnchecked(pos) === CharacterCodes.lineFeed) { pos++; } @@ -1490,13 +1524,13 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean error(Diagnostics.Unexpected_end_of_text); return ""; } - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); pos++; switch (ch) { case CharacterCodes._0: // Although '0' preceding any digit is treated as LegacyOctalEscapeSequence, // '\08' should separately be interpreted as '\0' + '8'. - if (pos >= end || !isDigit(text.charCodeAt(pos))) { + if (pos >= end || !isDigit(charCodeUnchecked(pos))) { return "\0"; } // '\01', '\011' @@ -1505,7 +1539,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean case CharacterCodes._2: case CharacterCodes._3: // '\1', '\17', '\177' - if (pos < end && isOctalDigit(text.charCodeAt(pos))) { + if (pos < end && isOctalDigit(charCodeUnchecked(pos))) { pos++; } // '\17', '\177' @@ -1515,7 +1549,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean case CharacterCodes._6: case CharacterCodes._7: // '\4', '\47' but not '\477' - if (pos < end && isOctalDigit(text.charCodeAt(pos))) { + if (pos < end && isOctalDigit(charCodeUnchecked(pos))) { pos++; } // '\47' @@ -1556,7 +1590,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean case CharacterCodes.u: if ( (!isRegularExpression || shouldEmitInvalidEscapeError) && - pos < end && text.charCodeAt(pos) === CharacterCodes.openBrace + pos < end && charCodeUnchecked(pos) === CharacterCodes.openBrace ) { // '\u{DDDDDD}' pos -= 2; @@ -1564,7 +1598,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } // '\uDDDD' for (; pos < start + 6; pos++) { - if (!(pos < end && isHexDigit(text.charCodeAt(pos)))) { + if (!(pos < end && isHexDigit(charCodeUnchecked(pos)))) { tokenFlags |= TokenFlags.ContainsInvalidEscape; if (isRegularExpression || shouldEmitInvalidEscapeError) { error(Diagnostics.Hexadecimal_digit_expected); @@ -1577,7 +1611,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean const escapedValueString = String.fromCharCode(escapedValue); if ( isRegularExpression && shouldEmitInvalidEscapeError && escapedValue >= 0xD800 && escapedValue <= 0xDBFF && - pos + 6 < end && text.substring(pos, pos + 2) === "\\u" && text.charCodeAt(pos + 2) !== CharacterCodes.openBrace + pos + 6 < end && text.substring(pos, pos + 2) === "\\u" && charCodeUnchecked(pos + 2) !== CharacterCodes.openBrace ) { // For regular expressions in Unicode mode, \u HexLeadSurrogate \u HexTrailSurrogate is treated as a single character // for the purpose of determining whether a character class range is out of order @@ -1585,7 +1619,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean const nextStart = pos; let nextPos = pos + 2; for (; nextPos < nextStart + 6; nextPos++) { - if (!isHexDigit(text.charCodeAt(pos))) { + if (!isHexDigit(charCodeUnchecked(pos))) { // leave the error to the next call return escapedValueString; } @@ -1601,7 +1635,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean case CharacterCodes.x: // '\xDD' for (; pos < start + 4; pos++) { - if (!(pos < end && isHexDigit(text.charCodeAt(pos)))) { + if (!(pos < end && isHexDigit(charCodeUnchecked(pos)))) { tokenFlags |= TokenFlags.ContainsInvalidEscape; if (isRegularExpression || shouldEmitInvalidEscapeError) { error(Diagnostics.Hexadecimal_digit_expected); @@ -1615,7 +1649,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // when encountering a LineContinuation (i.e. a backslash and a line terminator sequence), // the line terminator is interpreted to be "the empty code unit sequence". case CharacterCodes.carriageReturn: - if (pos < end && text.charCodeAt(pos) === CharacterCodes.lineFeed) { + if (pos < end && charCodeUnchecked(pos) === CharacterCodes.lineFeed) { pos++; } // falls through @@ -1659,7 +1693,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } isInvalidExtendedEscape = true; } - else if (text.charCodeAt(pos) === CharacterCodes.closeBrace) { + else if (charCodeUnchecked(pos) === CharacterCodes.closeBrace) { // Only swallow the following character up if it's a '}'. pos++; } @@ -1682,7 +1716,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // Current character is known to be a backslash. Check for Unicode escape of the form '\uXXXX' // and return code point value if valid Unicode escape is found. Otherwise return -1. function peekUnicodeEscape(): number { - if (pos + 5 < end && text.charCodeAt(pos + 1) === CharacterCodes.u) { + if (pos + 5 < end && charCodeUnchecked(pos + 1) === CharacterCodes.u) { const start = pos; pos += 2; const value = scanExactNumberOfHexDigits(4, /*canHaveSeparators*/ false); @@ -1693,7 +1727,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } function peekExtendedUnicodeEscape(): number { - if (codePointAt(text, pos + 1) === CharacterCodes.u && codePointAt(text, pos + 2) === CharacterCodes.openBrace) { + if (codePointUnchecked(pos + 1) === CharacterCodes.u && codePointUnchecked(pos + 2) === CharacterCodes.openBrace) { const start = pos; pos += 3; const escapedValueString = scanMinimumNumberOfHexDigits(1, /*canHaveSeparators*/ false); @@ -1708,7 +1742,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean let result = ""; let start = pos; while (pos < end) { - let ch = codePointAt(text, pos); + let ch = codePointUnchecked(pos); if (isIdentifierPart(ch, languageVersion)) { pos += charSize(ch); } @@ -1760,7 +1794,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean let separatorAllowed = false; let isPreviousTokenSeparator = false; while (true) { - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); // Numeric separators are allowed anywhere within a numeric literal, except not at the beginning, or following another separator if (ch === CharacterCodes._) { tokenFlags |= TokenFlags.ContainsSeparator; @@ -1785,7 +1819,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; isPreviousTokenSeparator = false; } - if (text.charCodeAt(pos - 1) === CharacterCodes._) { + if (charCodeUnchecked(pos - 1) === CharacterCodes._) { // Literal ends with underscore - not allowed error(Diagnostics.Numeric_separators_are_not_allowed_here, pos - 1, 1); } @@ -1793,7 +1827,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } function checkBigIntSuffix(): SyntaxKind { - if (text.charCodeAt(pos) === CharacterCodes.n) { + if (charCodeUnchecked(pos) === CharacterCodes.n) { tokenValue += "n"; // Use base 10 instead of base 2 or base 8 for shorter literals if (tokenFlags & TokenFlags.BinaryOrOctalSpecifier) { @@ -1824,7 +1858,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return token = SyntaxKind.EndOfFileToken; } - const ch = codePointAt(text, pos); + const ch = codePointUnchecked(pos); if (pos === 0) { // Special handling for shebang if (ch === CharacterCodes.hash && isShebangTrivia(text, pos)) { @@ -1847,7 +1881,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean continue; } else { - if (ch === CharacterCodes.carriageReturn && pos + 1 < end && text.charCodeAt(pos + 1) === CharacterCodes.lineFeed) { + if (ch === CharacterCodes.carriageReturn && pos + 1 < end && charCodeUnchecked(pos + 1) === CharacterCodes.lineFeed) { // consume both CR and LF pos += 2; } @@ -1883,14 +1917,14 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean continue; } else { - while (pos < end && isWhiteSpaceSingleLine(text.charCodeAt(pos))) { + while (pos < end && isWhiteSpaceSingleLine(charCodeUnchecked(pos))) { pos++; } return token = SyntaxKind.WhitespaceTrivia; } case CharacterCodes.exclamation: - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { - if (text.charCodeAt(pos + 2) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { return pos += 3, token = SyntaxKind.ExclamationEqualsEqualsToken; } return pos += 2, token = SyntaxKind.ExclamationEqualsToken; @@ -1904,19 +1938,19 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean case CharacterCodes.backtick: return token = scanTemplateAndSetTokenValue(/*shouldEmitInvalidEscapeError*/ false); case CharacterCodes.percent: - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.PercentEqualsToken; } pos++; return token = SyntaxKind.PercentToken; case CharacterCodes.ampersand: - if (text.charCodeAt(pos + 1) === CharacterCodes.ampersand) { - if (text.charCodeAt(pos + 2) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.ampersand) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { return pos += 3, token = SyntaxKind.AmpersandAmpersandEqualsToken; } return pos += 2, token = SyntaxKind.AmpersandAmpersandToken; } - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.AmpersandEqualsToken; } pos++; @@ -1928,11 +1962,11 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; return token = SyntaxKind.CloseParenToken; case CharacterCodes.asterisk: - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.AsteriskEqualsToken; } - if (text.charCodeAt(pos + 1) === CharacterCodes.asterisk) { - if (text.charCodeAt(pos + 2) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.asterisk) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { return pos += 3, token = SyntaxKind.AsteriskAsteriskEqualsToken; } return pos += 2, token = SyntaxKind.AsteriskAsteriskToken; @@ -1945,10 +1979,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } return token = SyntaxKind.AsteriskToken; case CharacterCodes.plus: - if (text.charCodeAt(pos + 1) === CharacterCodes.plus) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.plus) { return pos += 2, token = SyntaxKind.PlusPlusToken; } - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.PlusEqualsToken; } pos++; @@ -1957,31 +1991,31 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; return token = SyntaxKind.CommaToken; case CharacterCodes.minus: - if (text.charCodeAt(pos + 1) === CharacterCodes.minus) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.minus) { return pos += 2, token = SyntaxKind.MinusMinusToken; } - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.MinusEqualsToken; } pos++; return token = SyntaxKind.MinusToken; case CharacterCodes.dot: - if (isDigit(text.charCodeAt(pos + 1))) { + if (isDigit(charCodeUnchecked(pos + 1))) { scanNumber(); return token = SyntaxKind.NumericLiteral; } - if (text.charCodeAt(pos + 1) === CharacterCodes.dot && text.charCodeAt(pos + 2) === CharacterCodes.dot) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.dot && charCodeUnchecked(pos + 2) === CharacterCodes.dot) { return pos += 3, token = SyntaxKind.DotDotDotToken; } pos++; return token = SyntaxKind.DotToken; case CharacterCodes.slash: // Single-line comment - if (text.charCodeAt(pos + 1) === CharacterCodes.slash) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.slash) { pos += 2; while (pos < end) { - if (isLineBreak(text.charCodeAt(pos))) { + if (isLineBreak(charCodeUnchecked(pos))) { break; } pos++; @@ -2002,16 +2036,16 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } } // Multi-line comment - if (text.charCodeAt(pos + 1) === CharacterCodes.asterisk) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.asterisk) { pos += 2; - const isJSDoc = text.charCodeAt(pos) === CharacterCodes.asterisk && text.charCodeAt(pos + 1) !== CharacterCodes.slash; + const isJSDoc = charCodeUnchecked(pos) === CharacterCodes.asterisk && charCodeUnchecked(pos + 1) !== CharacterCodes.slash; let commentClosed = false; let lastLineStart = tokenStart; while (pos < end) { - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); - if (ch === CharacterCodes.asterisk && text.charCodeAt(pos + 1) === CharacterCodes.slash) { + if (ch === CharacterCodes.asterisk && charCodeUnchecked(pos + 1) === CharacterCodes.slash) { pos += 2; commentClosed = true; break; @@ -2046,7 +2080,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } } - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.SlashEqualsToken; } @@ -2054,7 +2088,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return token = SyntaxKind.SlashToken; case CharacterCodes._0: - if (pos + 2 < end && (text.charCodeAt(pos + 1) === CharacterCodes.X || text.charCodeAt(pos + 1) === CharacterCodes.x)) { + if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.X || charCodeUnchecked(pos + 1) === CharacterCodes.x)) { pos += 2; tokenValue = scanMinimumNumberOfHexDigits(1, /*canHaveSeparators*/ true); if (!tokenValue) { @@ -2065,7 +2099,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean tokenFlags |= TokenFlags.HexSpecifier; return token = checkBigIntSuffix(); } - else if (pos + 2 < end && (text.charCodeAt(pos + 1) === CharacterCodes.B || text.charCodeAt(pos + 1) === CharacterCodes.b)) { + else if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.B || charCodeUnchecked(pos + 1) === CharacterCodes.b)) { pos += 2; tokenValue = scanBinaryOrOctalDigits(/* base */ 2); if (!tokenValue) { @@ -2076,7 +2110,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean tokenFlags |= TokenFlags.BinarySpecifier; return token = checkBigIntSuffix(); } - else if (pos + 2 < end && (text.charCodeAt(pos + 1) === CharacterCodes.O || text.charCodeAt(pos + 1) === CharacterCodes.o)) { + else if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.O || charCodeUnchecked(pos + 1) === CharacterCodes.o)) { pos += 2; tokenValue = scanBinaryOrOctalDigits(/* base */ 8); if (!tokenValue) { @@ -2115,19 +2149,19 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } } - if (text.charCodeAt(pos + 1) === CharacterCodes.lessThan) { - if (text.charCodeAt(pos + 2) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.lessThan) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { return pos += 3, token = SyntaxKind.LessThanLessThanEqualsToken; } return pos += 2, token = SyntaxKind.LessThanLessThanToken; } - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.LessThanEqualsToken; } if ( languageVariant === LanguageVariant.JSX && - text.charCodeAt(pos + 1) === CharacterCodes.slash && - text.charCodeAt(pos + 2) !== CharacterCodes.asterisk + charCodeUnchecked(pos + 1) === CharacterCodes.slash && + charCodeUnchecked(pos + 2) !== CharacterCodes.asterisk ) { return pos += 2, token = SyntaxKind.LessThanSlashToken; } @@ -2144,13 +2178,13 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } } - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { - if (text.charCodeAt(pos + 2) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { return pos += 3, token = SyntaxKind.EqualsEqualsEqualsToken; } return pos += 2, token = SyntaxKind.EqualsEqualsToken; } - if (text.charCodeAt(pos + 1) === CharacterCodes.greaterThan) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.greaterThan) { return pos += 2, token = SyntaxKind.EqualsGreaterThanToken; } pos++; @@ -2169,11 +2203,11 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; return token = SyntaxKind.GreaterThanToken; case CharacterCodes.question: - if (text.charCodeAt(pos + 1) === CharacterCodes.dot && !isDigit(text.charCodeAt(pos + 2))) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.dot && !isDigit(charCodeUnchecked(pos + 2))) { return pos += 2, token = SyntaxKind.QuestionDotToken; } - if (text.charCodeAt(pos + 1) === CharacterCodes.question) { - if (text.charCodeAt(pos + 2) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.question) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { return pos += 3, token = SyntaxKind.QuestionQuestionEqualsToken; } return pos += 2, token = SyntaxKind.QuestionQuestionToken; @@ -2187,7 +2221,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; return token = SyntaxKind.CloseBracketToken; case CharacterCodes.caret: - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.CaretEqualsToken; } pos++; @@ -2206,13 +2240,13 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } } - if (text.charCodeAt(pos + 1) === CharacterCodes.bar) { - if (text.charCodeAt(pos + 2) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.bar) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { return pos += 3, token = SyntaxKind.BarBarEqualsToken; } return pos += 2, token = SyntaxKind.BarBarToken; } - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.BarEqualsToken; } pos++; @@ -2251,7 +2285,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return token = SyntaxKind.Unknown; } - const charAfterHash = codePointAt(text, pos + 1); + const charAfterHash = codePointUnchecked(pos + 1); if (charAfterHash === CharacterCodes.backslash) { pos++; const extendedCookedChar = peekExtendedUnicodeEscape(); @@ -2336,7 +2370,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean Debug.assert(token === SyntaxKind.Unknown, "'reScanInvalidIdentifier' should only be called when the current token is 'SyntaxKind.Unknown'."); pos = tokenStart = fullStartPos; tokenFlags = 0; - const ch = codePointAt(text, pos); + const ch = codePointUnchecked(pos); const identifierKind = scanIdentifier(ch, ScriptTarget.ESNext); if (identifierKind) { return token = identifierKind; @@ -2349,7 +2383,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean let ch = startCharacter; if (isIdentifierStart(ch, languageVersion)) { pos += charSize(ch); - while (pos < end && isIdentifierPart(ch = codePointAt(text, pos), languageVersion)) pos += charSize(ch); + while (pos < end && isIdentifierPart(ch = codePointUnchecked(pos), languageVersion)) pos += charSize(ch); tokenValue = text.substring(tokenStart, pos); if (ch === CharacterCodes.backslash) { tokenValue += scanIdentifierParts(); @@ -2360,20 +2394,20 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function reScanGreaterToken(): SyntaxKind { if (token === SyntaxKind.GreaterThanToken) { - if (text.charCodeAt(pos) === CharacterCodes.greaterThan) { - if (text.charCodeAt(pos + 1) === CharacterCodes.greaterThan) { - if (text.charCodeAt(pos + 2) === CharacterCodes.equals) { + if (charCodeUnchecked(pos) === CharacterCodes.greaterThan) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.greaterThan) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { return pos += 3, token = SyntaxKind.GreaterThanGreaterThanGreaterThanEqualsToken; } return pos += 2, token = SyntaxKind.GreaterThanGreaterThanGreaterThanToken; } - if (text.charCodeAt(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { return pos += 2, token = SyntaxKind.GreaterThanGreaterThanEqualsToken; } pos++; return token = SyntaxKind.GreaterThanGreaterThanToken; } - if (text.charCodeAt(pos) === CharacterCodes.equals) { + if (charCodeUnchecked(pos) === CharacterCodes.equals) { pos++; return token = SyntaxKind.GreaterThanEqualsToken; } @@ -2408,7 +2442,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean break; } - const ch = text.charCodeAt(p); + const ch = charCodeUnchecked(p); if (isLineBreak(ch)) { tokenFlags |= TokenFlags.Unterminated; error(Diagnostics.Unterminated_regular_expression_literal); @@ -2441,7 +2475,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean const endOfBody = p - (isUnterminated ? 0 : 1); let regExpFlags = RegularExpressionFlags.None; while (p < end) { - const ch = text.charCodeAt(p); + const ch = charCodeUnchecked(p); if (!isIdentifierPart(ch, languageVersion)) { break; } @@ -2520,7 +2554,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean topNamedCapturingGroupsScope = undefined; scanAlternative(isInGroup); topNamedCapturingGroupsScope = namedCapturingGroupsScopeStack.pop(); - if (text.charCodeAt(pos) !== CharacterCodes.bar) { + if (charCodeUnchecked(pos) !== CharacterCodes.bar) { return; } pos++; @@ -2560,7 +2594,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean let isPreviousTermQuantifiable = false; while (pos < end) { const start = pos; - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); switch (ch) { case CharacterCodes.caret: case CharacterCodes.$: @@ -2569,7 +2603,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean break; case CharacterCodes.backslash: pos++; - switch (text.charCodeAt(pos)) { + switch (charCodeUnchecked(pos)) { case CharacterCodes.b: case CharacterCodes.B: pos++; @@ -2583,9 +2617,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean break; case CharacterCodes.openParen: pos++; - if (text.charCodeAt(pos) === CharacterCodes.question) { + if (charCodeUnchecked(pos) === CharacterCodes.question) { pos++; - switch (text.charCodeAt(pos)) { + switch (charCodeUnchecked(pos)) { case CharacterCodes.equals: case CharacterCodes.exclamation: pos++; @@ -2595,7 +2629,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean case CharacterCodes.lessThan: const groupNameStart = pos; pos++; - switch (text.charCodeAt(pos)) { + switch (charCodeUnchecked(pos)) { case CharacterCodes.equals: case CharacterCodes.exclamation: pos++; @@ -2615,7 +2649,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean default: const start = pos; const setFlags = scanPatternModifiers(RegularExpressionFlags.None); - if (text.charCodeAt(pos) === CharacterCodes.minus) { + if (charCodeUnchecked(pos) === CharacterCodes.minus) { pos++; scanPatternModifiers(setFlags); if (pos === start + 1) { @@ -2639,12 +2673,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean const digitsStart = pos; scanDigits(); const min = tokenValue; - if (text.charCodeAt(pos) === CharacterCodes.comma) { + if (charCodeUnchecked(pos) === CharacterCodes.comma) { pos++; scanDigits(); const max = tokenValue; if (!min) { - if (max || text.charCodeAt(pos) === CharacterCodes.closeBrace) { + if (max || charCodeUnchecked(pos) === CharacterCodes.closeBrace) { error(Diagnostics.Incomplete_quantifier_Digit_expected, digitsStart, 0); } else { @@ -2673,7 +2707,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean case CharacterCodes.plus: case CharacterCodes.question: pos++; - if (text.charCodeAt(pos) === CharacterCodes.question) { + if (charCodeUnchecked(pos) === CharacterCodes.question) { // Non-greedy pos++; } @@ -2727,7 +2761,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanPatternModifiers(currFlags: RegularExpressionFlags): RegularExpressionFlags { while (pos < end) { - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); if (!isIdentifierPart(ch, languageVersion)) { break; } @@ -2756,11 +2790,11 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // | CharacterEscape // | 'k<' RegExpIdentifierName '>' function scanAtomEscape() { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash); - switch (text.charCodeAt(pos)) { + Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.backslash); + switch (charCodeUnchecked(pos)) { case CharacterCodes.k: pos++; - if (text.charCodeAt(pos) === CharacterCodes.lessThan) { + if (charCodeUnchecked(pos) === CharacterCodes.lessThan) { pos++; scanGroupName(/*isReference*/ true); scanExpectedChar(CharacterCodes.greaterThan); @@ -2786,8 +2820,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // DecimalEscape ::= [1-9] [0-9]* function scanDecimalEscape(): boolean { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash); - const ch = text.charCodeAt(pos); + Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.backslash); + const ch = charCodeUnchecked(pos); if (ch >= CharacterCodes._1 && ch <= CharacterCodes._9) { const start = pos; scanDigits(); @@ -2805,12 +2839,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // | '^' | '$' | '/' | '\' | '.' | '*' | '+' | '?' | '(' | ')' | '[' | ']' | '{' | '}' | '|' // | [~UnicodeMode] (any other non-identifier characters) function scanCharacterEscape(atomEscape: boolean): string { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash); - let ch = text.charCodeAt(pos); + Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.backslash); + let ch = charCodeUnchecked(pos); switch (ch) { case CharacterCodes.c: pos++; - ch = text.charCodeAt(pos); + ch = charCodeUnchecked(pos); if (isASCIILetter(ch)) { pos++; return String.fromCharCode(ch & 0x1f); @@ -2856,9 +2890,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } function scanGroupName(isReference: boolean) { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.lessThan); + Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.lessThan); tokenStart = pos; - scanIdentifier(codePointAt(text, pos), languageVersion); + scanIdentifier(codePointUnchecked(pos), languageVersion); if (pos === tokenStart) { error(Diagnostics.Expected_a_capturing_group_name); } @@ -2882,21 +2916,21 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // ClassRanges ::= '^'? (ClassAtom ('-' ClassAtom)?)* function scanClassRanges() { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.openBracket); - if (text.charCodeAt(pos) === CharacterCodes.caret) { + Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.openBracket); + if (charCodeUnchecked(pos) === CharacterCodes.caret) { // character complement pos++; } while (pos < end) { - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); if (isClassContentExit(ch)) { return; } const minStart = pos; const minCharacter = scanClassAtom(); - if (text.charCodeAt(pos) === CharacterCodes.minus) { + if (charCodeUnchecked(pos) === CharacterCodes.minus) { pos++; - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); if (isClassContentExit(ch)) { return; } @@ -2941,14 +2975,14 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // ClassSubtraction ::= ClassSetOperand ('--' ClassSetOperand)+ // ClassSetRange ::= ClassSetCharacter '-' ClassSetCharacter function scanClassSetExpression() { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.openBracket); + Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.openBracket); let isCharacterComplement = false; - if (text.charCodeAt(pos) === CharacterCodes.caret) { + if (charCodeUnchecked(pos) === CharacterCodes.caret) { pos++; isCharacterComplement = true; } let expressionMayContainStrings = false; - let ch = text.charCodeAt(pos); + let ch = charCodeUnchecked(pos); if (isClassContentExit(ch)) { return; } @@ -2964,9 +2998,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean operand = scanClassSetOperand(); break; } - switch (text.charCodeAt(pos)) { + switch (charCodeUnchecked(pos)) { case CharacterCodes.minus: - if (text.charCodeAt(pos + 1) === CharacterCodes.minus) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.minus) { if (isCharacterComplement && mayContainStrings) { error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, start, pos - start); } @@ -2977,7 +3011,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } break; case CharacterCodes.ampersand: - if (text.charCodeAt(pos + 1) === CharacterCodes.ampersand) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.ampersand) { scanClassSetSubExpression(ClassSetExpressionType.ClassIntersection); if (isCharacterComplement && mayContainStrings) { error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, start, pos - start); @@ -2998,11 +3032,11 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean break; } while (pos < end) { - ch = text.charCodeAt(pos); + ch = charCodeUnchecked(pos); switch (ch) { case CharacterCodes.minus: pos++; - ch = text.charCodeAt(pos); + ch = charCodeUnchecked(pos); if (isClassContentExit(ch)) { mayContainStrings = !isCharacterComplement && expressionMayContainStrings; return; @@ -3045,10 +3079,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean case CharacterCodes.ampersand: start = pos; pos++; - if (text.charCodeAt(pos) === CharacterCodes.ampersand) { + if (charCodeUnchecked(pos) === CharacterCodes.ampersand) { pos++; error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); - if (text.charCodeAt(pos) === CharacterCodes.ampersand) { + if (charCodeUnchecked(pos) === CharacterCodes.ampersand) { error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); pos++; } @@ -3059,7 +3093,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean operand = text.slice(start, pos); continue; } - if (isClassContentExit(text.charCodeAt(pos))) { + if (isClassContentExit(charCodeUnchecked(pos))) { break; } start = pos; @@ -3081,7 +3115,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanClassSetSubExpression(expressionType: ClassSetExpressionType) { let expressionMayContainStrings = mayContainStrings; while (pos < end) { - let ch = text.charCodeAt(pos); + let ch = charCodeUnchecked(pos); if (isClassContentExit(ch)) { break; } @@ -3089,7 +3123,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean switch (ch) { case CharacterCodes.minus: pos++; - if (text.charCodeAt(pos) === CharacterCodes.minus) { + if (charCodeUnchecked(pos) === CharacterCodes.minus) { pos++; if (expressionType !== ClassSetExpressionType.ClassSubtraction) { error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); @@ -3101,12 +3135,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean break; case CharacterCodes.ampersand: pos++; - if (text.charCodeAt(pos) === CharacterCodes.ampersand) { + if (charCodeUnchecked(pos) === CharacterCodes.ampersand) { pos++; if (expressionType !== ClassSetExpressionType.ClassIntersection) { error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); } - if (text.charCodeAt(pos) === CharacterCodes.ampersand) { + if (charCodeUnchecked(pos) === CharacterCodes.ampersand) { error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); pos++; } @@ -3128,7 +3162,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } break; } - ch = text.charCodeAt(pos); + ch = charCodeUnchecked(pos); if (isClassContentExit(ch)) { error(Diagnostics.Expected_a_class_set_operand); break; @@ -3147,7 +3181,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // | ClassSetCharacter function scanClassSetOperand(): string { mayContainStrings = false; - switch (text.charCodeAt(pos)) { + switch (charCodeUnchecked(pos)) { case CharacterCodes.openBracket: pos++; scanClassSetExpression(); @@ -3158,9 +3192,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean if (scanCharacterClassEscape()) { return ""; } - else if (text.charCodeAt(pos) === CharacterCodes.q) { + else if (charCodeUnchecked(pos) === CharacterCodes.q) { pos++; - if (text.charCodeAt(pos) === CharacterCodes.openBrace) { + if (charCodeUnchecked(pos) === CharacterCodes.openBrace) { pos++; scanClassStringDisjunctionContents(); scanExpectedChar(CharacterCodes.closeBrace); @@ -3180,10 +3214,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // ClassStringDisjunctionContents ::= ClassSetCharacter* ('|' ClassSetCharacter*)* function scanClassStringDisjunctionContents() { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.openBrace); + Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.openBrace); let characterCount = 0; while (pos < end) { - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); switch (ch) { case CharacterCodes.closeBrace: if (characterCount !== 1) { @@ -3210,10 +3244,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // | SourceCharacter -- ClassSetSyntaxCharacter -- ClassSetReservedDoublePunctuator // | '\' (CharacterEscape | ClassSetReservedPunctuator | 'b') function scanClassSetCharacter(): string { - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); if (ch === CharacterCodes.backslash) { pos++; - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); switch (ch) { case CharacterCodes.b: pos++; @@ -3238,7 +3272,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return scanCharacterEscape(/*atomEscape*/ false); } } - else if (ch === text.charCodeAt(pos + 1)) { + else if (ch === charCodeUnchecked(pos + 1)) { switch (ch) { case CharacterCodes.ampersand: case CharacterCodes.exclamation: @@ -3288,9 +3322,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // | CharacterClassEscape // | CharacterEscape function scanClassAtom(): string { - if (text.charCodeAt(pos) === CharacterCodes.backslash) { + if (charCodeUnchecked(pos) === CharacterCodes.backslash) { pos++; - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); switch (ch) { case CharacterCodes.b: pos++; @@ -3314,10 +3348,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // | 'd' | 'D' | 's' | 'S' | 'w' | 'W' // | [+UnicodeMode] ('P' | 'p') '{' UnicodePropertyValueExpression '}' function scanCharacterClassEscape(): boolean { - Debug.assertEqual(text.charCodeAt(pos - 1), CharacterCodes.backslash); + Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.backslash); let isCharacterComplement = false; const start = pos - 1; - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); switch (ch) { case CharacterCodes.d: case CharacterCodes.D: @@ -3332,11 +3366,11 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // falls through case CharacterCodes.p: pos++; - if (text.charCodeAt(pos) === CharacterCodes.openBrace) { + if (charCodeUnchecked(pos) === CharacterCodes.openBrace) { pos++; const propertyNameOrValueStart = pos; const propertyNameOrValue = scanWordCharacters(); - if (text.charCodeAt(pos) === CharacterCodes.equals) { + if (charCodeUnchecked(pos) === CharacterCodes.equals) { const propertyName = nonBinaryUnicodeProperties.get(propertyNameOrValue); if (pos === propertyNameOrValueStart) { error(Diagnostics.Expected_a_Unicode_property_name); @@ -3401,7 +3435,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanWordCharacters(): string { let value = ""; while (pos < end) { - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); if (!isWordCharacter(ch)) { break; } @@ -3412,13 +3446,13 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } function scanSourceCharacter(): string { - const size = unicodeMode ? charSize(codePointAt(text, pos)) : 1; + const size = unicodeMode ? charSize(codePointUnchecked(pos)) : 1; pos += size; return text.substring(pos - size, pos); } function scanExpectedChar(ch: CharacterCodes) { - if (text.charCodeAt(pos) === ch) { + if (charCodeUnchecked(pos) === ch) { pos++; } else { @@ -3538,9 +3572,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return token = SyntaxKind.EndOfFileToken; } - let char = text.charCodeAt(pos); + let char = charCodeUnchecked(pos); if (char === CharacterCodes.lessThan) { - if (text.charCodeAt(pos + 1) === CharacterCodes.slash) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.slash) { pos += 2; return token = SyntaxKind.LessThanSlashToken; } @@ -3560,7 +3594,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // firstNonWhitespace = 0 to indicate that we want leading whitespace, while (pos < end) { - char = text.charCodeAt(pos); + char = charCodeUnchecked(pos); if (char === CharacterCodes.openBrace) { break; } @@ -3613,7 +3647,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // Do note that this means that `scanJsxIdentifier` effectively _mutates_ the visible token without advancing to a new token // Any caller should be expecting this behavior and should only read the pos or token value after calling it. while (pos < end) { - const ch = text.charCodeAt(pos); + const ch = charCodeUnchecked(pos); if (ch === CharacterCodes.minus) { tokenValue += "-"; pos++; @@ -3633,7 +3667,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanJsxAttributeValue(): SyntaxKind { fullStartPos = pos; - switch (text.charCodeAt(pos)) { + switch (charCodeUnchecked(pos)) { case CharacterCodes.doubleQuote: case CharacterCodes.singleQuote: tokenValue = scanString(/*jsxAttributeString*/ true); @@ -3655,15 +3689,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean if (pos >= end) { return token = SyntaxKind.EndOfFileToken; } - for (let ch = text.charCodeAt(pos); pos < end && (!isLineBreak(ch) && ch !== CharacterCodes.backtick); ch = codePointAt(text, ++pos)) { + for (let ch = charCodeUnchecked(pos); pos < end && (!isLineBreak(ch) && ch !== CharacterCodes.backtick); ch = codePointUnchecked(++pos)) { if (!inBackticks) { if (ch === CharacterCodes.openBrace) { break; } else if ( ch === CharacterCodes.at - && pos - 1 >= 0 && isWhiteSpaceSingleLine(text.charCodeAt(pos - 1)) - && !(pos + 1 < end && isWhiteSpaceLike(text.charCodeAt(pos + 1))) + && pos - 1 >= 0 && isWhiteSpaceSingleLine(charCodeUnchecked(pos - 1)) + && !(pos + 1 < end && isWhiteSpaceLike(charCodeUnchecked(pos + 1))) ) { // @ doesn't start a new tag inside ``, and elsewhere, only after whitespace and before non-whitespace break; @@ -3684,21 +3718,21 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return token = SyntaxKind.EndOfFileToken; } - const ch = codePointAt(text, pos); + const ch = codePointUnchecked(pos); pos += charSize(ch); switch (ch) { case CharacterCodes.tab: case CharacterCodes.verticalTab: case CharacterCodes.formFeed: case CharacterCodes.space: - while (pos < end && isWhiteSpaceSingleLine(text.charCodeAt(pos))) { + while (pos < end && isWhiteSpaceSingleLine(charCodeUnchecked(pos))) { pos++; } return token = SyntaxKind.WhitespaceTrivia; case CharacterCodes.at: return token = SyntaxKind.AtToken; case CharacterCodes.carriageReturn: - if (text.charCodeAt(pos) === CharacterCodes.lineFeed) { + if (charCodeUnchecked(pos) === CharacterCodes.lineFeed) { pos++; } // falls through @@ -3754,7 +3788,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean if (isIdentifierStart(ch, languageVersion)) { let char = ch; - while (pos < end && isIdentifierPart(char = codePointAt(text, pos), languageVersion) || text.charCodeAt(pos) === CharacterCodes.minus) pos += charSize(char); + while (pos < end && isIdentifierPart(char = codePointUnchecked(pos), languageVersion) || charCodeUnchecked(pos) === CharacterCodes.minus) pos += charSize(char); tokenValue = text.substring(tokenStart, pos); if (char === CharacterCodes.backslash) { tokenValue += scanIdentifierParts(); diff --git a/src/compiler/types.ts b/src/compiler/types.ts index 76f8193ae127d..85bcc8f69e2e5 100644 --- a/src/compiler/types.ts +++ b/src/compiler/types.ts @@ -7612,6 +7612,7 @@ export type CommandLineOption = CommandLineOptionOfCustomType | CommandLineOptio // dprint-ignore /** @internal */ export const enum CharacterCodes { + EOF = -1, nullCharacter = 0, maxAsciiCharacter = 0x7F, From 0785328da12c984709f770b3d072597cff755afd Mon Sep 17 00:00:00 2001 From: Ron Buckton Date: Fri, 26 Apr 2024 18:58:10 -0400 Subject: [PATCH 6/8] Use charCodeChecked where possible in regex scanner --- src/compiler/checker.ts | 2 +- src/compiler/scanner.ts | 133 +++++++++++++++++++++------------------- 2 files changed, 71 insertions(+), 64 deletions(-) diff --git a/src/compiler/checker.ts b/src/compiler/checker.ts index 6bb0efc2c8322..0dd481e113481 100644 --- a/src/compiler/checker.ts +++ b/src/compiler/checker.ts @@ -31360,7 +31360,7 @@ export function createTypeChecker(host: TypeCheckerHost): TypeChecker { function checkGrammarRegularExpressionLiteral(node: RegularExpressionLiteral) { const sourceFile = getSourceFileOfNode(node); - if (!hasParseDiagnostics(sourceFile)) { + if (!hasParseDiagnostics(sourceFile) && !node.isUnterminated) { let lastError: DiagnosticWithLocation | undefined; scanner ??= createScanner(ScriptTarget.ESNext, /*skipTrivia*/ true); scanner.setScriptTarget(sourceFile.languageVersion); diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 0415871062c6a..e63bac014bb44 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -1109,13 +1109,13 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return codePointAt(text, pos); } - // /** - // * Returns the code point for the character at the given position within `text`. If - // * `pos` is outside the bounds set for `text`, `CharacterCodes.EOF` is returned instead. - // */ - // function codePointChecked(pos: number) { - // return pos >= 0 && pos < end ? codePointUnchecked(pos) : CharacterCodes.EOF; - // } + /** + * Returns the code point for the character at the given position within `text`. If + * `pos` is outside the bounds set for `text`, `CharacterCodes.EOF` is returned instead. + */ + function codePointChecked(pos: number) { + return pos >= 0 && pos < end ? codePointUnchecked(pos) : CharacterCodes.EOF; + } /** * Returns the char code for the character at the given position within `text`. This @@ -1126,13 +1126,13 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return text.charCodeAt(pos); } - // /** - // * Returns the char code for the character at the given position within `text`. If - // * `pos` is outside the bounds set for `text`, `CharacterCodes.EOF` is returned instead. - // */ - // function charCodeChecked(pos: number) { - // return pos >= 0 && pos < end ? charCodeUnchecked(pos) : CharacterCodes.EOF; - // } + /** + * Returns the char code for the character at the given position within `text`. If + * `pos` is outside the bounds set for `text`, `CharacterCodes.EOF` is returned instead. + */ + function charCodeChecked(pos: number) { + return pos >= 0 && pos < end ? charCodeUnchecked(pos) : CharacterCodes.EOF; + } function error(message: DiagnosticMessage): void; function error(message: DiagnosticMessage, errPos: number, length: number, arg0?: any): void; @@ -1325,7 +1325,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanDigits(): boolean { const start = pos; let isOctal = true; - while (isDigit(charCodeUnchecked(pos))) { + while (isDigit(charCodeChecked(pos))) { if (!isOctalDigit(charCodeUnchecked(pos))) { isOctal = false; } @@ -2554,7 +2554,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean topNamedCapturingGroupsScope = undefined; scanAlternative(isInGroup); topNamedCapturingGroupsScope = namedCapturingGroupsScopeStack.pop(); - if (charCodeUnchecked(pos) !== CharacterCodes.bar) { + if (charCodeChecked(pos) !== CharacterCodes.bar) { return; } pos++; @@ -2603,7 +2603,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean break; case CharacterCodes.backslash: pos++; - switch (charCodeUnchecked(pos)) { + switch (charCodeChecked(pos)) { case CharacterCodes.b: case CharacterCodes.B: pos++; @@ -2617,9 +2617,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean break; case CharacterCodes.openParen: pos++; - if (charCodeUnchecked(pos) === CharacterCodes.question) { + if (charCodeChecked(pos) === CharacterCodes.question) { pos++; - switch (charCodeUnchecked(pos)) { + switch (charCodeChecked(pos)) { case CharacterCodes.equals: case CharacterCodes.exclamation: pos++; @@ -2629,7 +2629,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean case CharacterCodes.lessThan: const groupNameStart = pos; pos++; - switch (charCodeUnchecked(pos)) { + switch (charCodeChecked(pos)) { case CharacterCodes.equals: case CharacterCodes.exclamation: pos++; @@ -2649,7 +2649,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean default: const start = pos; const setFlags = scanPatternModifiers(RegularExpressionFlags.None); - if (charCodeUnchecked(pos) === CharacterCodes.minus) { + if (charCodeChecked(pos) === CharacterCodes.minus) { pos++; scanPatternModifiers(setFlags); if (pos === start + 1) { @@ -2673,12 +2673,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean const digitsStart = pos; scanDigits(); const min = tokenValue; - if (charCodeUnchecked(pos) === CharacterCodes.comma) { + if (charCodeChecked(pos) === CharacterCodes.comma) { pos++; scanDigits(); const max = tokenValue; if (!min) { - if (max || charCodeUnchecked(pos) === CharacterCodes.closeBrace) { + if (max || charCodeChecked(pos) === CharacterCodes.closeBrace) { error(Diagnostics.Incomplete_quantifier_Digit_expected, digitsStart, 0); } else { @@ -2707,7 +2707,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean case CharacterCodes.plus: case CharacterCodes.question: pos++; - if (charCodeUnchecked(pos) === CharacterCodes.question) { + if (charCodeChecked(pos) === CharacterCodes.question) { // Non-greedy pos++; } @@ -2791,10 +2791,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // | 'k<' RegExpIdentifierName '>' function scanAtomEscape() { Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.backslash); - switch (charCodeUnchecked(pos)) { + switch (charCodeChecked(pos)) { case CharacterCodes.k: pos++; - if (charCodeUnchecked(pos) === CharacterCodes.lessThan) { + if (charCodeChecked(pos) === CharacterCodes.lessThan) { pos++; scanGroupName(/*isReference*/ true); scanExpectedChar(CharacterCodes.greaterThan); @@ -2821,7 +2821,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // DecimalEscape ::= [1-9] [0-9]* function scanDecimalEscape(): boolean { Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.backslash); - const ch = charCodeUnchecked(pos); + const ch = charCodeChecked(pos); if (ch >= CharacterCodes._1 && ch <= CharacterCodes._9) { const start = pos; scanDigits(); @@ -2840,11 +2840,11 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // | [~UnicodeMode] (any other non-identifier characters) function scanCharacterEscape(atomEscape: boolean): string { Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.backslash); - let ch = charCodeUnchecked(pos); + let ch = charCodeChecked(pos); switch (ch) { case CharacterCodes.c: pos++; - ch = charCodeUnchecked(pos); + ch = charCodeChecked(pos); if (isASCIILetter(ch)) { pos++; return String.fromCharCode(ch & 0x1f); @@ -2881,7 +2881,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return String.fromCharCode(ch); default: if (pos >= end) { - error(Diagnostics.Undetermined_character_escape, pos - 1, 1, ch); + error(Diagnostics.Undetermined_character_escape, pos - 1, 1); return "\\"; } pos--; @@ -2892,7 +2892,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanGroupName(isReference: boolean) { Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.lessThan); tokenStart = pos; - scanIdentifier(codePointUnchecked(pos), languageVersion); + scanIdentifier(codePointChecked(pos), languageVersion); if (pos === tokenStart) { error(Diagnostics.Expected_a_capturing_group_name); } @@ -2911,13 +2911,13 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } function isClassContentExit(ch: number) { - return ch === CharacterCodes.closeBracket || pos >= end; + return ch === CharacterCodes.closeBracket || ch === CharacterCodes.EOF || pos >= end; } // ClassRanges ::= '^'? (ClassAtom ('-' ClassAtom)?)* function scanClassRanges() { Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.openBracket); - if (charCodeUnchecked(pos) === CharacterCodes.caret) { + if (charCodeChecked(pos) === CharacterCodes.caret) { // character complement pos++; } @@ -2928,9 +2928,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } const minStart = pos; const minCharacter = scanClassAtom(); - if (charCodeUnchecked(pos) === CharacterCodes.minus) { + if (charCodeChecked(pos) === CharacterCodes.minus) { pos++; - const ch = charCodeUnchecked(pos); + const ch = charCodeChecked(pos); if (isClassContentExit(ch)) { return; } @@ -2977,12 +2977,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanClassSetExpression() { Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.openBracket); let isCharacterComplement = false; - if (charCodeUnchecked(pos) === CharacterCodes.caret) { + if (charCodeChecked(pos) === CharacterCodes.caret) { pos++; isCharacterComplement = true; } let expressionMayContainStrings = false; - let ch = charCodeUnchecked(pos); + let ch = charCodeChecked(pos); if (isClassContentExit(ch)) { return; } @@ -2998,9 +2998,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean operand = scanClassSetOperand(); break; } - switch (charCodeUnchecked(pos)) { + switch (charCodeChecked(pos)) { case CharacterCodes.minus: - if (charCodeUnchecked(pos + 1) === CharacterCodes.minus) { + if (charCodeChecked(pos + 1) === CharacterCodes.minus) { if (isCharacterComplement && mayContainStrings) { error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, start, pos - start); } @@ -3011,7 +3011,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } break; case CharacterCodes.ampersand: - if (charCodeUnchecked(pos + 1) === CharacterCodes.ampersand) { + if (charCodeChecked(pos + 1) === CharacterCodes.ampersand) { scanClassSetSubExpression(ClassSetExpressionType.ClassIntersection); if (isCharacterComplement && mayContainStrings) { error(Diagnostics.Anything_that_would_possibly_match_more_than_a_single_character_is_invalid_inside_a_negated_character_class, start, pos - start); @@ -3036,7 +3036,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean switch (ch) { case CharacterCodes.minus: pos++; - ch = charCodeUnchecked(pos); + ch = charCodeChecked(pos); if (isClassContentExit(ch)) { mayContainStrings = !isCharacterComplement && expressionMayContainStrings; return; @@ -3079,10 +3079,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean case CharacterCodes.ampersand: start = pos; pos++; - if (charCodeUnchecked(pos) === CharacterCodes.ampersand) { + if (charCodeChecked(pos) === CharacterCodes.ampersand) { pos++; error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); - if (charCodeUnchecked(pos) === CharacterCodes.ampersand) { + if (charCodeChecked(pos) === CharacterCodes.ampersand) { error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); pos++; } @@ -3093,7 +3093,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean operand = text.slice(start, pos); continue; } - if (isClassContentExit(charCodeUnchecked(pos))) { + if (isClassContentExit(charCodeChecked(pos))) { break; } start = pos; @@ -3123,7 +3123,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean switch (ch) { case CharacterCodes.minus: pos++; - if (charCodeUnchecked(pos) === CharacterCodes.minus) { + if (charCodeChecked(pos) === CharacterCodes.minus) { pos++; if (expressionType !== ClassSetExpressionType.ClassSubtraction) { error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); @@ -3135,12 +3135,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean break; case CharacterCodes.ampersand: pos++; - if (charCodeUnchecked(pos) === CharacterCodes.ampersand) { + if (charCodeChecked(pos) === CharacterCodes.ampersand) { pos++; if (expressionType !== ClassSetExpressionType.ClassIntersection) { error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos - 2, 2); } - if (charCodeUnchecked(pos) === CharacterCodes.ampersand) { + if (charCodeChecked(pos) === CharacterCodes.ampersand) { error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); pos++; } @@ -3162,7 +3162,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } break; } - ch = charCodeUnchecked(pos); + ch = charCodeChecked(pos); if (isClassContentExit(ch)) { error(Diagnostics.Expected_a_class_set_operand); break; @@ -3181,7 +3181,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // | ClassSetCharacter function scanClassSetOperand(): string { mayContainStrings = false; - switch (charCodeUnchecked(pos)) { + switch (charCodeChecked(pos)) { case CharacterCodes.openBracket: pos++; scanClassSetExpression(); @@ -3192,9 +3192,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean if (scanCharacterClassEscape()) { return ""; } - else if (charCodeUnchecked(pos) === CharacterCodes.q) { + else if (charCodeChecked(pos) === CharacterCodes.q) { pos++; - if (charCodeUnchecked(pos) === CharacterCodes.openBrace) { + if (charCodeChecked(pos) === CharacterCodes.openBrace) { pos++; scanClassStringDisjunctionContents(); scanExpectedChar(CharacterCodes.closeBrace); @@ -3244,10 +3244,14 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // | SourceCharacter -- ClassSetSyntaxCharacter -- ClassSetReservedDoublePunctuator // | '\' (CharacterEscape | ClassSetReservedPunctuator | 'b') function scanClassSetCharacter(): string { - const ch = charCodeUnchecked(pos); + const ch = charCodeChecked(pos); + if (ch === CharacterCodes.EOF) { + // no need to report an error, the initial scan will already have reported that the RegExp is unterminated. + return ""; + } if (ch === CharacterCodes.backslash) { pos++; - const ch = charCodeUnchecked(pos); + const ch = charCodeChecked(pos); switch (ch) { case CharacterCodes.b: pos++; @@ -3272,7 +3276,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return scanCharacterEscape(/*atomEscape*/ false); } } - else if (ch === charCodeUnchecked(pos + 1)) { + else if (ch === charCodeChecked(pos + 1)) { switch (ch) { case CharacterCodes.ampersand: case CharacterCodes.exclamation: @@ -3322,9 +3326,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // | CharacterClassEscape // | CharacterEscape function scanClassAtom(): string { - if (charCodeUnchecked(pos) === CharacterCodes.backslash) { + if (charCodeChecked(pos) === CharacterCodes.backslash) { pos++; - const ch = charCodeUnchecked(pos); + const ch = charCodeChecked(pos); switch (ch) { case CharacterCodes.b: pos++; @@ -3351,7 +3355,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.backslash); let isCharacterComplement = false; const start = pos - 1; - const ch = charCodeUnchecked(pos); + const ch = charCodeChecked(pos); switch (ch) { case CharacterCodes.d: case CharacterCodes.D: @@ -3366,11 +3370,11 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // falls through case CharacterCodes.p: pos++; - if (charCodeUnchecked(pos) === CharacterCodes.openBrace) { + if (charCodeChecked(pos) === CharacterCodes.openBrace) { pos++; const propertyNameOrValueStart = pos; const propertyNameOrValue = scanWordCharacters(); - if (charCodeUnchecked(pos) === CharacterCodes.equals) { + if (charCodeChecked(pos) === CharacterCodes.equals) { const propertyName = nonBinaryUnicodeProperties.get(propertyNameOrValue); if (pos === propertyNameOrValueStart) { error(Diagnostics.Expected_a_Unicode_property_name); @@ -3446,13 +3450,13 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } function scanSourceCharacter(): string { - const size = unicodeMode ? charSize(codePointUnchecked(pos)) : 1; + const size = unicodeMode ? charSize(charCodeChecked(pos)) : 1; pos += size; - return text.substring(pos - size, pos); + return size > 0 ? text.substring(pos - size, pos) : ""; } function scanExpectedChar(ch: CharacterCodes) { - if (charCodeUnchecked(pos) === ch) { + if (charCodeChecked(pos) === ch) { pos++; } else { @@ -3788,7 +3792,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean if (isIdentifierStart(ch, languageVersion)) { let char = ch; - while (pos < end && isIdentifierPart(char = codePointUnchecked(pos), languageVersion) || charCodeUnchecked(pos) === CharacterCodes.minus) pos += charSize(char); + while (pos < end && isIdentifierPart(char = codePointUnchecked(pos), languageVersion) || char === CharacterCodes.minus) pos += charSize(char); tokenValue = text.substring(tokenStart, pos); if (char === CharacterCodes.backslash) { tokenValue += scanIdentifierParts(); @@ -3913,6 +3917,9 @@ function charSize(ch: number) { if (ch >= 0x10000) { return 2; } + if (ch === CharacterCodes.EOF) { + return 0; + } return 1; } From d413625b25e8fa333bf923b246e5b11407bb911c Mon Sep 17 00:00:00 2001 From: Ron Buckton Date: Fri, 26 Apr 2024 20:34:33 -0400 Subject: [PATCH 7/8] Use charCodeChecked in a few more places --- src/compiler/scanner.ts | 45 ++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index e63bac014bb44..1fb0c6302456b 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -2592,10 +2592,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // : '[' ClassSetExpression ']' function scanAlternative(isInGroup: boolean) { let isPreviousTermQuantifiable = false; - while (pos < end) { + while (true) { const start = pos; - const ch = charCodeUnchecked(pos); + const ch = charCodeChecked(pos); switch (ch) { + case CharacterCodes.EOF: + return; case CharacterCodes.caret: case CharacterCodes.$: pos++; @@ -2760,9 +2762,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } function scanPatternModifiers(currFlags: RegularExpressionFlags): RegularExpressionFlags { - while (pos < end) { - const ch = charCodeUnchecked(pos); - if (!isIdentifierPart(ch, languageVersion)) { + while (true) { + const ch = charCodeChecked(pos); + if (ch === CharacterCodes.EOF || !isIdentifierPart(ch, languageVersion)) { break; } const flag = characterToRegularExpressionFlag(String.fromCharCode(ch)); @@ -2921,8 +2923,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean // character complement pos++; } - while (pos < end) { - const ch = charCodeUnchecked(pos); + while (true) { + const ch = charCodeChecked(pos); if (isClassContentExit(ch)) { return; } @@ -2988,7 +2990,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } let start = pos; let operand!: string; - switch (text.slice(pos, pos + 2)) { + switch (text.slice(pos, pos + 2)) { // TODO: don't use slice case "--": case "&&": error(Diagnostics.Expected_a_class_set_operand); @@ -3031,8 +3033,11 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean expressionMayContainStrings = mayContainStrings; break; } - while (pos < end) { - ch = charCodeUnchecked(pos); + while (true) { + ch = charCodeChecked(pos); + if (ch === CharacterCodes.EOF) { + break; + } switch (ch) { case CharacterCodes.minus: pos++; @@ -3097,7 +3102,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean break; } start = pos; - switch (text.slice(pos, pos + 2)) { + switch (text.slice(pos, pos + 2)) { // TODO: don't use slice case "--": case "&&": error(Diagnostics.Operators_must_not_be_mixed_within_a_character_class_Wrap_it_in_a_nested_class_instead, pos, 2); @@ -3114,8 +3119,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanClassSetSubExpression(expressionType: ClassSetExpressionType) { let expressionMayContainStrings = mayContainStrings; - while (pos < end) { - let ch = charCodeUnchecked(pos); + while (true) { + let ch = charCodeChecked(pos); if (isClassContentExit(ch)) { break; } @@ -3182,6 +3187,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanClassSetOperand(): string { mayContainStrings = false; switch (charCodeChecked(pos)) { + case CharacterCodes.EOF: + return ""; case CharacterCodes.openBracket: pos++; scanClassSetExpression(); @@ -3216,9 +3223,11 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanClassStringDisjunctionContents() { Debug.assertEqual(charCodeUnchecked(pos - 1), CharacterCodes.openBrace); let characterCount = 0; - while (pos < end) { - const ch = charCodeUnchecked(pos); + while (true) { + const ch = charCodeChecked(pos); switch (ch) { + case CharacterCodes.EOF: + return; case CharacterCodes.closeBrace: if (characterCount !== 1) { mayContainStrings = true; @@ -3438,9 +3447,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanWordCharacters(): string { let value = ""; - while (pos < end) { - const ch = charCodeUnchecked(pos); - if (!isWordCharacter(ch)) { + while (true) { + const ch = charCodeChecked(pos); + if (ch === CharacterCodes.EOF || !isWordCharacter(ch)) { break; } value += String.fromCharCode(ch); From de73b3272abdf8e10c185f141773376b8bc814af Mon Sep 17 00:00:00 2001 From: Ron Buckton Date: Sat, 27 Apr 2024 00:52:25 -0400 Subject: [PATCH 8/8] Update src/compiler/scanner.ts Co-authored-by: Jake Bailey <5341706+jakebailey@users.noreply.github.com> --- src/compiler/scanner.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 1fb0c6302456b..7b7e8875c6711 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -2546,7 +2546,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean /** A stack of scopes for named capturing groups. @see {scanGroupName} */ var namedCapturingGroupsScopeStack: (Set | undefined)[] = []; var topNamedCapturingGroupsScope: Set | undefined; - + /* eslint-enable no-var */ // Disjunction ::= Alternative ('|' Alternative)* function scanDisjunction(isInGroup: boolean) { while (true) {