Skip to content

Commit

Permalink
fix(es/parser): Fix parsing of regexp (#6469)
Browse files Browse the repository at this point in the history
**Related issue:**

 - Closes #6322.
 - Closes #6323.
  • Loading branch information
alexander-akait committed Nov 18, 2022
1 parent d78c545 commit 3e702b9
Show file tree
Hide file tree
Showing 8 changed files with 160 additions and 49 deletions.
31 changes: 19 additions & 12 deletions crates/swc_ecma_parser/src/lexer/mod.rs
Expand Up @@ -695,12 +695,6 @@ impl<'a, I: Input> Lexer<'a, I> {
#[inline(never)]
fn read_slash(&mut self) -> LexResult<Option<Token>> {
debug_assert_eq!(self.cur(), Some('/'));
// let start = self.cur_pos();

// Regex
if self.state.is_expr_allowed {
return self.read_regexp().map(Some);
}

// Divide operator
self.bump();
Expand Down Expand Up @@ -1120,19 +1114,25 @@ impl<'a, I: Input> Lexer<'a, I> {
}

/// Expects current char to be '/'
fn read_regexp(&mut self) -> LexResult<Token> {
fn read_regexp(&mut self, start: BytePos) -> LexResult<Token> {
self.input.reset_to(start);

debug_assert_eq!(self.cur(), Some('/'));

let start = self.cur_pos();

self.bump();

let (mut escaped, mut in_class) = (false, false);
// let content_start = self.cur_pos();

let content = self.with_buf(|l, buf| {
while let Some(c) = l.cur() {
// This is ported from babel.
// Seems like regexp literal cannot contain linebreak.
if c.is_line_terminator() {
l.error(start, SyntaxError::UnterminatedRegExp)?;
let span = l.span(start);

return Err(Error::new(span, SyntaxError::UnterminatedRegExp));
}

if escaped {
Expand All @@ -1145,20 +1145,22 @@ impl<'a, I: Input> Lexer<'a, I> {
'/' if !in_class => break,
_ => {}
}

escaped = c == '\\';
}

l.bump();
buf.push(c);
}

Ok(Atom::new(&**buf))
})?;
// let content_span = Span::new(content_start, self.cur_pos(),
// Default::default());

// input is terminated without following `/`
if !self.is(b'/') {
self.error(start, SyntaxError::UnterminatedRegExp)?;
let span = self.span(start);

return Err(Error::new(span, SyntaxError::UnterminatedRegExp));
}

self.bump(); // '/'
Expand Down Expand Up @@ -1287,6 +1289,11 @@ impl<'a, I: Input> Lexer<'a, I> {
pub fn set_expr_allowed(&mut self, allow: bool) {
self.state.is_expr_allowed = allow;
}

#[inline]
pub fn set_next_regexp(&mut self, start: Option<BytePos>) {
self.state.next_regexp = start;
}
}

fn pos_span(p: BytePos) -> Span {
Expand Down
11 changes: 11 additions & 0 deletions crates/swc_ecma_parser/src/lexer/state.rs
Expand Up @@ -22,6 +22,7 @@ use crate::{
#[derive(Clone)]
pub(super) struct State {
pub is_expr_allowed: bool,
pub next_regexp: Option<BytePos>,
/// if line break exists between previous token and new token?
pub had_line_break: bool,
/// TODO: Remove this field.
Expand Down Expand Up @@ -152,6 +153,11 @@ impl<I: Input> Tokens for Lexer<'_, I> {
self.set_expr_allowed(allow)
}

#[inline]
fn set_next_regexp(&mut self, start: Option<BytePos>) {
self.state.next_regexp = start;
}

#[inline]
fn token_context(&self) -> &TokenContexts {
&self.state.context
Expand Down Expand Up @@ -191,6 +197,10 @@ impl<'a, I: Input> Iterator for Lexer<'a, I> {
let mut start = self.cur_pos();

let res = (|| -> Result<Option<_>, _> {
if let Some(start) = self.state.next_regexp {
return Ok(Some(self.read_regexp(start)?));
}

if self.state.is_first {
if let Some(shebang) = self.read_shebang()? {
return Ok(Some(Token::Shebang(shebang)));
Expand Down Expand Up @@ -363,6 +373,7 @@ impl State {

State {
is_expr_allowed: true,
next_regexp: None,
is_first: true,
had_line_break: false,
prev_hi: start_pos,
Expand Down
62 changes: 53 additions & 9 deletions crates/swc_ecma_parser/src/lexer/tests.rs
Expand Up @@ -416,15 +416,19 @@ fn regexp_unary_void() {
lex(Syntax::default(), "void /test/"),
vec![
Void.span(0..4).lb(),
Regex("test".into(), "".into()).span(5..11),
BinOp(Div).span(5),
Word(Word::Ident("test".into())).span(6..10),
BinOp(Div).span(10),
]
);
assert_eq!(
lex(Syntax::default(), "void (/test/)"),
vec![
Void.span(0..4).lb(),
LParen.span(5..6),
Regex("test".into(), "".into()).span(6..12),
BinOp(Div).span(6),
Word(Word::Ident("test".into())).span(7..11),
BinOp(Div).span(11),
RParen.span(12..13),
]
);
Expand Down Expand Up @@ -483,13 +487,28 @@ fn simple_regex() {
vec![
"x".span(0).lb(),
Assign.span(2),
Regex("42".into(), "i".into(),).span(4..9),
BinOp(Div).span(4),
42.span(5..7),
BinOp(Div).span(7),
Word(Word::Ident("i".into())).span(8),
],
);

assert_eq!(
lex(Syntax::default(), "/42/"),
vec![Regex("42".into(), "".into()).span(0..4).lb(),]
vec![
TokenAndSpan {
token: Token::BinOp(BinOpToken::Div),
had_line_break: true,
span: Span {
lo: BytePos(1),
hi: BytePos(2),
ctxt: Default::default(),
},
},
42.span(1..3),
BinOp(Div).span(3)
]
);
}

Expand All @@ -508,7 +527,13 @@ fn complex_regex() {
RParen,
LBrace,
RBrace,
Regex("42".into(), "i".into(),),
BinOp(Div),
Num {
value: 42.0,
raw: Atom::new("42")
},
BinOp(Div),
Word(Word::Ident("i".into())),
]
)
}
Expand Down Expand Up @@ -595,7 +620,9 @@ fn after_if() {
RParen.span(4),
LBrace.span(5),
RBrace.span(6),
Regex("y".into(), "".into()).span(8..11),
Div.span(8),
"y".span(9),
Div.span(10),
Dot.span(11),
"test".span(12..16),
LParen.span(16),
Expand Down Expand Up @@ -639,7 +666,9 @@ fn migrated_0002() {
vec![
"tokenize".span(0..8).lb(),
LParen.span(8),
Regex("42".into(), "".into()).span(9..13),
BinOp(Div).span(9),
42.span(10..12),
BinOp(Div).span(12),
RParen.span(13),
],
)
Expand Down Expand Up @@ -671,7 +700,9 @@ fn migrated_0004() {
RParen.span(11),
LBrace.span(12),
RBrace.span(13),
Regex("42".into(), "".into()).span(15..19),
BinOp(Div).span(15),
42.span(16..18),
BinOp(Div).span(18),
]
);
}
Expand Down Expand Up @@ -707,7 +738,20 @@ fn migrated_0006() {
vec![
LBrace.span(0).lb(),
RBrace.span(1),
Regex("42".into(), "".into()).span(3..7),
BinOp(Div).span(3),
TokenAndSpan {
token: Num {
value: 42.0,
raw: "42".into(),
},
had_line_break: false,
span: Span {
lo: BytePos(5),
hi: BytePos(7),
ctxt: Default::default(),
}
},
BinOp(Div).span(6),
],
)
}
Expand Down
70 changes: 46 additions & 24 deletions crates/swc_ecma_parser/src/parser/expr.rs
Expand Up @@ -332,31 +332,50 @@ impl<I: Tokens> Parser<I> {
}

// Regexp
Token::Regex(..) => match bump!(self) {
Token::Regex(exp, flags) => {
let span = span!(self, start);

let mut flags_count = flags.chars().fold(
AHashMap::<char, usize>::default(),
|mut map, flag| {
let key = match flag {
'g' | 'i' | 'm' | 's' | 'u' | 'y' | 'd' => flag,
_ => '\u{0000}', // special marker for unknown flags
};
map.entry(key).and_modify(|count| *count += 1).or_insert(1);
map
},
);
if flags_count.remove(&'\u{0000}').is_some() {
self.emit_err(span, SyntaxError::UnknownRegExpFlags);
}
if let Some((flag, _)) = flags_count.iter().find(|(_, count)| **count > 1) {
self.emit_err(span, SyntaxError::DuplicatedRegExpFlags(*flag));
tok!('/') | tok!("/=") => {
bump!(self);

self.input.set_next_regexp(Some(start));

if let Some(Token::Regex(..)) = self.input.cur() {
self.input.set_next_regexp(None);

match bump!(self) {
Token::Regex(exp, flags) => {
let span = span!(self, start);

let mut flags_count = flags.chars().fold(
AHashMap::<char, usize>::default(),
|mut map, flag| {
let key = match flag {
'g' | 'i' | 'm' | 's' | 'u' | 'y' | 'd' => flag,
_ => '\u{0000}', // special marker for unknown flags
};
map.entry(key).and_modify(|count| *count += 1).or_insert(1);
map
},
);

if flags_count.remove(&'\u{0000}').is_some() {
self.emit_err(span, SyntaxError::UnknownRegExpFlags);
}

if let Some((flag, _)) =
flags_count.iter().find(|(_, count)| **count > 1)
{
self.emit_err(span, SyntaxError::DuplicatedRegExpFlags(*flag));
}

return Ok(Box::new(Expr::Lit(Lit::Regex(Regex {
span,
exp,
flags,
}))));
}
_ => unreachable!(),
}
return Ok(Box::new(Expr::Lit(Lit::Regex(Regex { span, exp, flags }))));
}
_ => unreachable!(),
},
}

tok!('`') => {
// parse template literal
Expand Down Expand Up @@ -1880,7 +1899,10 @@ impl<I: Tokens> Parser<I> {
}

if is!(self, ';')
|| (!is!(self, '*') && !cur!(self, false).map(Token::starts_expr).unwrap_or(true))
|| (!is!(self, '*')
&& !is!(self, '/')
&& !is!(self, "/=")
&& !cur!(self, false).map(Token::starts_expr).unwrap_or(true))
{
Ok(Box::new(Expr::Yield(YieldExpr {
span: span!(self, start),
Expand Down
13 changes: 13 additions & 0 deletions crates/swc_ecma_parser/src/parser/input.rs
Expand Up @@ -24,6 +24,8 @@ pub trait Tokens: Clone + Iterator<Item = TokenAndSpan> {
}

fn set_expr_allowed(&mut self, allow: bool);
fn set_next_regexp(&mut self, start: Option<BytePos>);

fn token_context(&self) -> &lexer::TokenContexts;
fn token_context_mut(&mut self) -> &mut lexer::TokenContexts;
fn set_token_context(&mut self, _c: lexer::TokenContexts);
Expand Down Expand Up @@ -110,6 +112,8 @@ impl Tokens for TokensInput {

fn set_expr_allowed(&mut self, _: bool) {}

fn set_next_regexp(&mut self, _: Option<BytePos>) {}

fn token_context(&self) -> &TokenContexts {
&self.token_ctx
}
Expand Down Expand Up @@ -222,6 +226,10 @@ impl<I: Tokens> Tokens for Capturing<I> {
self.inner.set_expr_allowed(allow)
}

fn set_next_regexp(&mut self, start: Option<BytePos>) {
self.inner.set_next_regexp(start);
}

fn token_context(&self) -> &TokenContexts {
self.inner.token_context()
}
Expand Down Expand Up @@ -467,6 +475,11 @@ impl<I: Tokens> Buffer<I> {
self.iter.set_expr_allowed(allow)
}

#[inline]
pub fn set_next_regexp(&mut self, start: Option<BytePos>) {
self.iter.set_next_regexp(start);
}

#[inline]
pub(crate) fn token_context(&self) -> &lexer::TokenContexts {
self.iter.token_context()
Expand Down
14 changes: 14 additions & 0 deletions crates/swc_ecma_parser/src/parser/stmt.rs
Expand Up @@ -2527,4 +2527,18 @@ const foo;"#;

test_parser(src, Default::default(), |p| p.parse_script());
}

#[test]
fn issue_6322() {
let src = "for ( ; { } / 1 ; ) ;";

test_parser(src, Default::default(), |p| p.parse_script());
}

#[test]
fn issue_6323() {
let src = "let x = 0 < { } / 0 ;";

test_parser(src, Default::default(), |p| p.parse_script());
}
}
@@ -1,6 +1,6 @@

x Unexpected eof
x Unterminated regexp literal
,-[$DIR/tests/test262-parser/fail/095bea002b10b8e1.js:1:1]
1 | foo[/42
: ^
: ^^^
`----

1 comment on commit 3e702b9

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: 3e702b9 Previous: 072bd13 Ratio
es/full/bugs-1 354348 ns/iter (± 21258) 343594 ns/iter (± 32186) 1.03
es/full/minify/libraries/antd 1874993974 ns/iter (± 38128542) 1850216271 ns/iter (± 21092518) 1.01
es/full/minify/libraries/d3 418175522 ns/iter (± 23574855) 421398538 ns/iter (± 15840516) 0.99
es/full/minify/libraries/echarts 1609079337 ns/iter (± 34145013) 1643710542 ns/iter (± 35233875) 0.98
es/full/minify/libraries/jquery 104126257 ns/iter (± 7991135) 125999302 ns/iter (± 11772171) 0.83
es/full/minify/libraries/lodash 133579282 ns/iter (± 5373792) 144649144 ns/iter (± 9654283) 0.92
es/full/minify/libraries/moment 64179040 ns/iter (± 1799088) 62482010 ns/iter (± 2747536) 1.03
es/full/minify/libraries/react 22172744 ns/iter (± 610282) 20179618 ns/iter (± 956791) 1.10
es/full/minify/libraries/terser 307930356 ns/iter (± 7505502) 339925271 ns/iter (± 16835106) 0.91
es/full/minify/libraries/three 571502761 ns/iter (± 11258727) 551570111 ns/iter (± 15569460) 1.04
es/full/minify/libraries/typescript 3377745729 ns/iter (± 21289701) 3416571859 ns/iter (± 178801335) 0.99
es/full/minify/libraries/victory 826574577 ns/iter (± 10985643) 850330127 ns/iter (± 26003414) 0.97
es/full/minify/libraries/vue 154079012 ns/iter (± 2955872) 158961652 ns/iter (± 7147358) 0.97
es/full/codegen/es3 33318 ns/iter (± 1333) 35129 ns/iter (± 2524) 0.95
es/full/codegen/es5 33387 ns/iter (± 2257) 34913 ns/iter (± 4435) 0.96
es/full/codegen/es2015 33359 ns/iter (± 1085) 34697 ns/iter (± 1443) 0.96
es/full/codegen/es2016 33319 ns/iter (± 537) 34496 ns/iter (± 1247) 0.97
es/full/codegen/es2017 33230 ns/iter (± 488) 34699 ns/iter (± 2110) 0.96
es/full/codegen/es2018 33224 ns/iter (± 797) 34733 ns/iter (± 1703) 0.96
es/full/codegen/es2019 33353 ns/iter (± 871) 34912 ns/iter (± 2622) 0.96
es/full/codegen/es2020 33354 ns/iter (± 1243) 34958 ns/iter (± 4246) 0.95
es/full/all/es3 190515266 ns/iter (± 4057535) 196268968 ns/iter (± 14400859) 0.97
es/full/all/es5 181250320 ns/iter (± 3842697) 189624038 ns/iter (± 17101929) 0.96
es/full/all/es2015 145989342 ns/iter (± 2792108) 147226418 ns/iter (± 12806903) 0.99
es/full/all/es2016 144170725 ns/iter (± 4023659) 148034573 ns/iter (± 11793679) 0.97
es/full/all/es2017 143825266 ns/iter (± 3088115) 150245391 ns/iter (± 13282719) 0.96
es/full/all/es2018 145854294 ns/iter (± 5875440) 152738976 ns/iter (± 14703883) 0.95
es/full/all/es2019 143259939 ns/iter (± 6590229) 158289970 ns/iter (± 12220238) 0.91
es/full/all/es2020 137798688 ns/iter (± 14307266) 140494470 ns/iter (± 12421699) 0.98
es/full/parser 729658 ns/iter (± 23042) 743271 ns/iter (± 48681) 0.98
es/full/base/fixer 26561 ns/iter (± 1053) 26765 ns/iter (± 2688) 0.99
es/full/base/resolver_and_hygiene 91726 ns/iter (± 3084) 94809 ns/iter (± 10527) 0.97
serialization of ast node 219 ns/iter (± 18) 230 ns/iter (± 6) 0.95
serialization of serde 218 ns/iter (± 6) 232 ns/iter (± 5) 0.94

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.