From 5e7834aa2ecb0cd01b72979f393a517f1c1e5add Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Sun, 13 Aug 2023 18:33:50 -0700 Subject: [PATCH] refactor(es/parser): Don't attempt to handle shebangs in `read_token_number_sign` (#7803) bump() has a safety invariant that there must be input in the buffer to read (See https://github.com/swc-project/swc/issues/7709). This function as currently written calls `bump()` after calling `read_token_interpreter()`, which *may* have exhausted the buffer already in the specific case that it is handling a shebang. (The input string `#!/bin/js` with no newline is sufficient to cause UB here) Fortunately, shebangs never reach this function, due to `read_shebang`! Hurray! It's cleaner to not attempt to handle shebangs here when this code path will never be reached, and use a debug assertion to ensure that we're not dealing with shebangs. --- crates/swc_ecma_parser/src/lexer/mod.rs | 34 +++++-------------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/mod.rs b/crates/swc_ecma_parser/src/lexer/mod.rs index adc6b39232fb..2e818cfab9af 100644 --- a/crates/swc_ecma_parser/src/lexer/mod.rs +++ b/crates/swc_ecma_parser/src/lexer/mod.rs @@ -192,35 +192,15 @@ impl<'a> Lexer<'a> { fn read_token_number_sign(&mut self) -> LexResult> { debug_assert!(self.cur().is_some()); - if self.input.is_at_start() && self.read_token_interpreter()? { - return Ok(None); - } - self.input.bump(); // '#' - Ok(Some(Token::Hash)) - } - - #[inline(never)] - fn read_token_interpreter(&mut self) -> LexResult { - if !self.input.is_at_start() { - return Ok(false); - } - let start = self.input.cur_pos(); - self.input.bump(); - let c = self.input.cur(); - if c == Some('!') { - while let Some(c) = self.input.cur() { - self.input.bump(); - if c == '\n' || c == '\r' || c == '\u{8232}' || c == '\u{8233}' { - return Ok(true); - } - } - Ok(false) - } else { - self.input.reset_to(start); - Ok(false) - } + // `#` can also be a part of shebangs, however they should have been + // handled by `read_shebang()` + debug_assert!( + !self.input.is_at_start() || self.cur() != Some('!'), + "#! should have already been handled by read_shebang()" + ); + Ok(Some(Token::Hash)) } /// Read a token given `.`.