Skip to content

Commit ca26eb7

Browse files
authoredJun 16, 2024··
perf(es/parser): Reduce allocations while lexing numbers (#9057)
1 parent 064af53 commit ca26eb7

File tree

3 files changed

+32
-77
lines changed

3 files changed

+32
-77
lines changed
 

‎crates/swc_ecma_parser/src/lexer/mod.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ use std::{cell::RefCell, char, iter::FusedIterator, rc::Rc};
44

55
use either::Either::{Left, Right};
66
use smallvec::{smallvec, SmallVec};
7-
use smartstring::SmartString;
87
use swc_atoms::{Atom, AtomStoreCell};
98
use swc_common::{comments::Comments, input::StringInput, BytePos, Span};
109
use swc_ecma_ast::{op, AssignOp, EsVersion};
@@ -472,7 +471,7 @@ impl<'a> Lexer<'a> {
472471
'x' => {
473472
self.bump(); // 'x'
474473

475-
match self.read_int_u32::<16>(2, &mut Raw(None))? {
474+
match self.read_int_u32::<16>(2)? {
476475
Some(val) => return Ok(Some(vec![Char::from(val)])),
477476
None => self.error(
478477
start,
@@ -880,7 +879,7 @@ impl<'a> Lexer<'a> {
880879
}
881880

882881
let state = self.input.cur_pos();
883-
let c = match self.read_int_u32::<16>(if is_curly { 0 } else { 4 }, &mut Raw(None)) {
882+
let c = match self.read_int_u32::<16>(if is_curly { 0 } else { 4 }) {
884883
Ok(Some(val)) => {
885884
if 0x0010_ffff >= val {
886885
char::from_u32(val)

‎crates/swc_ecma_parser/src/lexer/number.rs

+30-52
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,11 @@
22
//!
33
//!
44
//! See https://tc39.github.io/ecma262/#sec-literals-numeric-literals
5-
use std::{borrow::Cow, fmt::Write};
5+
use std::borrow::Cow;
66

77
use either::Either;
88
use num_bigint::BigInt as BigIntValue;
99
use num_traits::{Num as NumTrait, ToPrimitive};
10-
use smartstring::LazyCompact;
1110
use swc_common::SyntaxContext;
1211
use tracing::trace;
1312

@@ -46,7 +45,6 @@ impl<'a> Lexer<'a> {
4645
}
4746

4847
let start = self.cur_pos();
49-
let mut raw_val = SmartString::<LazyCompact>::new();
5048

5149
let val = if starts_with_dot {
5250
// first char is '.'
@@ -70,8 +68,6 @@ impl<'a> Lexer<'a> {
7068
)));
7169
}
7270

73-
write!(raw_val, "{}", &s.value).unwrap();
74-
7571
if starts_with_zero {
7672
// TODO: I guess it would be okay if I don't use -ffast-math
7773
// (or something like that), but needs review.
@@ -146,29 +142,28 @@ impl<'a> Lexer<'a> {
146142
//
147143
// `.1.a`, `.1e-4.a` are valid,
148144
if self.cur() == Some('.') {
149-
raw_val.push('.');
150-
151145
self.bump();
152146

153147
if starts_with_dot {
154148
debug_assert!(self.cur().is_some());
155149
debug_assert!(self.cur().unwrap().is_ascii_digit());
156150
}
157151

158-
let mut raw = Raw(Some(Default::default()));
159152
// Read numbers after dot
160-
let dec_val = self.read_int::<10>(0, &mut raw)?;
153+
self.read_int::<10>(0)?;
161154

162155
val = {
163-
if dec_val.is_some() {
164-
raw_val.push_str(raw.0.as_ref().unwrap());
165-
}
156+
let end = self.cur_pos();
157+
let raw = unsafe {
158+
// Safety: We got both start and end position from `self.input`
159+
self.input.slice(start, end)
160+
};
166161

167162
// Remove number separator from number
168-
if raw_val.contains('_') {
169-
Cow::Owned(raw_val.replace('_', ""))
163+
if raw.contains('_') {
164+
Cow::Owned(raw.replace('_', ""))
170165
} else {
171-
Cow::Borrowed(&*raw_val)
166+
Cow::Borrowed(raw)
172167
}
173168
.parse()
174169
.expect("failed to parse float using rust's impl")
@@ -193,8 +188,6 @@ impl<'a> Lexer<'a> {
193188
}
194189
};
195190

196-
raw_val.push('e');
197-
198191
let positive = if next == '+' || next == '-' {
199192
self.bump(); // remove '+', '-'
200193

@@ -203,8 +196,7 @@ impl<'a> Lexer<'a> {
203196
true
204197
};
205198

206-
let mut raw = Raw(Some(Default::default()));
207-
let exp = self.read_number_no_dot::<10>(&mut raw)?;
199+
let exp = self.read_number_no_dot::<10>()?;
208200

209201
val = if exp == f64::INFINITY {
210202
if positive && val != 0.0 {
@@ -213,16 +205,16 @@ impl<'a> Lexer<'a> {
213205
0.0
214206
}
215207
} else {
216-
let flag = if positive { '+' } else { '-' };
217-
218-
raw_val.push(flag);
219-
220-
write!(raw_val, "{}", exp).unwrap();
221-
222-
if raw_val.contains('_') {
223-
Cow::Owned(raw_val.replace('_', ""))
208+
let end = self.cur_pos();
209+
let raw = unsafe {
210+
// Safety: We got both start and end position from `self.input`
211+
self.input.slice(start, end)
212+
};
213+
214+
if raw.contains('_') {
215+
Cow::Owned(raw.replace('_', ""))
224216
} else {
225-
Cow::Borrowed(&*raw_val)
217+
Cow::Borrowed(raw)
226218
}
227219
.parse()
228220
.expect("failed to parse float literal")
@@ -293,7 +285,7 @@ impl<'a> Lexer<'a> {
293285

294286
/// This can read long integers like
295287
/// "13612536612375123612312312312312312312312".
296-
fn read_number_no_dot<const RADIX: u8>(&mut self, raw: &mut Raw) -> LexResult<f64> {
288+
fn read_number_no_dot<const RADIX: u8>(&mut self) -> LexResult<f64> {
297289
debug_assert!(
298290
RADIX == 2 || RADIX == 8 || RADIX == 10 || RADIX == 16,
299291
"radix for read_number_no_dot should be one of 2, 8, 10, 16, but got {}",
@@ -309,7 +301,6 @@ impl<'a> Lexer<'a> {
309301

310302
Ok((f64::mul_add(total, radix as f64, v as f64), true))
311303
},
312-
raw,
313304
true,
314305
);
315306

@@ -336,8 +327,6 @@ impl<'a> Lexer<'a> {
336327
let mut non_octal = false;
337328
let mut read_any = false;
338329

339-
let mut raw = Raw(Some(Default::default()));
340-
341330
self.read_digits::<_, f64, RADIX>(
342331
|total, radix, v| {
343332
read_any = true;
@@ -348,17 +337,20 @@ impl<'a> Lexer<'a> {
348337

349338
Ok((f64::mul_add(total, radix as f64, v as f64), true))
350339
},
351-
&mut raw,
352340
true,
353341
)?;
354342

355343
if !read_any {
356344
self.error(start, SyntaxError::ExpectedDigit { radix: RADIX })?;
357345
}
358346

359-
let raw_str = raw.0.take().unwrap();
347+
let end = self.cur_pos();
348+
let raw = unsafe {
349+
// Safety: We got both start and end position from `self.input`
350+
self.input.slice(start, end)
351+
};
360352
// Remove number separator from number
361-
let raw_number_str = raw_str.replace('_', "");
353+
let raw_number_str = raw.replace('_', "");
362354
let parsed_float = BigIntValue::from_str_radix(&raw_number_str, RADIX as u32)
363355
.expect("failed to parse float using BigInt")
364356
.to_f64()
@@ -381,11 +373,7 @@ impl<'a> Lexer<'a> {
381373
/// were read, the integer value otherwise.
382374
/// When `len` is not zero, this
383375
/// will return `None` unless the integer has exactly `len` digits.
384-
pub(super) fn read_int<const RADIX: u8>(
385-
&mut self,
386-
len: u8,
387-
raw: &mut Raw,
388-
) -> LexResult<Option<f64>> {
376+
pub(super) fn read_int<const RADIX: u8>(&mut self, len: u8) -> LexResult<Option<f64>> {
389377
let mut count = 0u16;
390378
let v = self.read_digits::<_, Option<f64>, RADIX>(
391379
|opt: Option<f64>, radix, val| {
@@ -394,7 +382,6 @@ impl<'a> Lexer<'a> {
394382

395383
Ok((Some(total), count != len as u16))
396384
},
397-
raw,
398385
true,
399386
)?;
400387
if len != 0 && count != len as u16 {
@@ -404,11 +391,7 @@ impl<'a> Lexer<'a> {
404391
}
405392
}
406393

407-
pub(super) fn read_int_u32<const RADIX: u8>(
408-
&mut self,
409-
len: u8,
410-
raw: &mut Raw,
411-
) -> LexResult<Option<u32>> {
394+
pub(super) fn read_int_u32<const RADIX: u8>(&mut self, len: u8) -> LexResult<Option<u32>> {
412395
let start = self.state.start;
413396

414397
let mut count = 0;
@@ -427,7 +410,6 @@ impl<'a> Lexer<'a> {
427410

428411
Ok((Some(total), count != len))
429412
},
430-
raw,
431413
true,
432414
)?;
433415
if len != 0 && count != len {
@@ -441,7 +423,6 @@ impl<'a> Lexer<'a> {
441423
fn read_digits<F, Ret, const RADIX: u8>(
442424
&mut self,
443425
mut op: F,
444-
raw: &mut Raw,
445426
allow_num_separator: bool,
446427
) -> LexResult<Ret>
447428
where
@@ -499,7 +480,6 @@ impl<'a> Lexer<'a> {
499480
// Safety: cur() returns Some(c) where c is a valid char
500481
self.input.bump();
501482
}
502-
raw.push(c);
503483

504484
continue;
505485
}
@@ -511,8 +491,6 @@ impl<'a> Lexer<'a> {
511491
return Ok(total);
512492
};
513493

514-
raw.push(c);
515-
516494
self.bump();
517495

518496
let (t, cont) = op(total, RADIX, val)?;
@@ -574,7 +552,7 @@ mod tests {
574552

575553
fn int<const RADIX: u8>(s: &'static str) -> u32 {
576554
lex(s, |l| {
577-
l.read_int_u32::<RADIX>(0, &mut Raw(None))
555+
l.read_int_u32::<RADIX>(0)
578556
.unwrap()
579557
.expect("read_int returned None")
580558
})

‎crates/swc_ecma_parser/src/lexer/util.rs

-22
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
//! [babylon/util/identifier.js]:https://github.com/babel/babel/blob/master/packages/babylon/src/util/identifier.js
55
use std::char;
66

7-
use smartstring::{LazyCompact, SmartString};
87
use swc_common::{
98
comments::{Comment, CommentKind},
109
BytePos, Span, SyntaxContext,
@@ -22,27 +21,6 @@ use crate::{
2221
Tokens,
2322
};
2423

25-
/// Collector for raw string.
26-
///
27-
/// Methods of this struct is noop if the value is [None].
28-
pub(super) struct Raw(pub Option<SmartString<LazyCompact>>);
29-
30-
impl Raw {
31-
#[inline]
32-
pub fn push(&mut self, c: char) {
33-
if let Some(ref mut st) = self.0 {
34-
st.push(c)
35-
}
36-
}
37-
}
38-
39-
// pub const BACKSPACE: char = 8 as char;
40-
// pub const SHIFT_OUT: char = 14 as char;
41-
// pub const OGHAM_SPACE_MARK: char = '\u{1680}'; // ' '
42-
// pub const LINE_FEED: char = '\n';
43-
// pub const LINE_SEPARATOR: char = '\u{2028}';
44-
// pub const PARAGRAPH_SEPARATOR: char = '\u{2029}';
45-
4624
impl<'a> Lexer<'a> {
4725
pub(super) fn span(&self, start: BytePos) -> Span {
4826
let end = self.last_pos();

0 commit comments

Comments
 (0)
Please sign in to comment.