Skip to content

Commit c255cfd

Browse files
authoredOct 27, 2022
refactor(html/parser): Refactor (#6267)
1 parent 09b2961 commit c255cfd

File tree

3 files changed

+54
-66
lines changed

3 files changed

+54
-66
lines changed
 

‎crates/swc_html_parser/src/lexer/mod.rs

+51-64
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ where
234234
take(&mut self.errors)
235235
}
236236

237-
fn set_last_start_tag_name(&mut self, tag_name: &str) {
237+
fn set_last_start_tag_name(&mut self, tag_name: &JsWord) {
238238
self.last_start_tag_name = Some(tag_name.into());
239239
}
240240

@@ -596,16 +596,11 @@ where
596596
}
597597
}
598598

599-
fn start_new_attribute(&mut self, c: Option<char>) {
599+
fn start_new_attribute(&mut self) {
600600
if let Some(Tag { attributes, .. }) = &mut self.current_tag_token {
601601
// The longest known attribute is "glyph-orientation-horizontal" for SVG tags
602-
let mut name = String::with_capacity(28);
603-
let mut raw_name = String::with_capacity(28);
604-
605-
if let Some(c) = c {
606-
name.push(c);
607-
raw_name.push(c);
608-
};
602+
let name = String::with_capacity(28);
603+
let raw_name = String::with_capacity(28);
609604

610605
attributes.push(Attribute {
611606
span: Default::default(),
@@ -619,49 +614,49 @@ where
619614
}
620615
}
621616

622-
fn append_to_attribute(
623-
&mut self,
624-
name: Option<(char, char)>,
625-
value: Option<(bool, Option<char>, Option<char>)>,
626-
) {
617+
fn append_name_to_attribute(&mut self, c: char, raw_c: Option<char>) {
627618
if let Some(Tag { attributes, .. }) = &mut self.current_tag_token {
628619
if let Some(attribute) = attributes.last_mut() {
629-
if let Some(name) = name {
630-
attribute.name.push(name.0);
620+
attribute.name.push(c);
631621

622+
if let Some(raw_c) = raw_c {
632623
if let Some(raw_name) = &mut attribute.raw_name {
633-
raw_name.push(name.1);
624+
raw_name.push(raw_c);
634625
}
635626
}
627+
}
628+
}
629+
}
636630

637-
if let Some(value) = value {
638-
if let Some(c) = value.1 {
639-
if let Some(old_value) = &mut attribute.value {
640-
old_value.push(c);
641-
} else {
642-
let mut new_value = String::with_capacity(255);
631+
fn append_value_to_attribute(&mut self, quotes: bool, c: Option<char>, raw_c: Option<char>) {
632+
if let Some(Tag { attributes, .. }) = &mut self.current_tag_token {
633+
if let Some(attribute) = attributes.last_mut() {
634+
if let Some(c) = c {
635+
if let Some(old_value) = &mut attribute.value {
636+
old_value.push(c);
637+
} else {
638+
let mut new_value = String::with_capacity(255);
643639

644-
new_value.push(c);
640+
new_value.push(c);
645641

646-
attribute.value = Some(new_value);
647-
}
642+
attribute.value = Some(new_value);
648643
}
644+
}
649645

650-
if let Some(c) = value.2 {
651-
// Quote for attribute was found, so we set empty value by default
652-
if value.0 && attribute.value.is_none() {
653-
attribute.value = Some(String::with_capacity(255));
654-
}
646+
if let Some(raw_c) = raw_c {
647+
// Quote for attribute was found, so we set empty value by default
648+
if quotes && attribute.value.is_none() {
649+
attribute.value = Some(String::with_capacity(255));
650+
}
655651

656-
if let Some(raw_value) = &mut attribute.raw_value {
657-
raw_value.push(c);
658-
} else {
659-
let mut raw_new_value = String::with_capacity(255);
652+
if let Some(raw_value) = &mut attribute.raw_value {
653+
raw_value.push(raw_c);
654+
} else {
655+
let mut raw_new_value = String::with_capacity(255);
660656

661-
raw_new_value.push(c);
657+
raw_new_value.push(raw_c);
662658

663-
attribute.raw_value = Some(raw_new_value);
664-
}
659+
attribute.raw_value = Some(raw_new_value);
665660
}
666661
}
667662
}
@@ -2178,23 +2173,24 @@ where
21782173
// We set `None` for `value` to support boolean attributes in AST
21792174
Some(c @ '=') => {
21802175
self.emit_error(ErrorKind::UnexpectedEqualsSignBeforeAttributeName);
2181-
self.start_new_attribute(Some(c));
2176+
self.start_new_attribute();
2177+
self.append_name_to_attribute(c, Some(c));
21822178
self.state = State::AttributeName;
21832179
}
21842180
// Anything else
21852181
// Start a new attribute in the current tag token. Set that attribute name
21862182
// and value to the empty string. Reconsume in the attribute name state.
21872183
// We set `None` for `value` to support boolean attributes in AST
21882184
_ => {
2189-
self.start_new_attribute(None);
2185+
self.start_new_attribute();
21902186
self.reconsume_in_state(State::AttributeName);
21912187
}
21922188
}
21932189
}
21942190
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
21952191
State::AttributeName => {
21962192
let anything_else = |lexer: &mut Lexer<I>, c: char| {
2197-
lexer.append_to_attribute(Some((c, c)), None);
2193+
lexer.append_name_to_attribute(c, Some(c));
21982194
};
21992195

22002196
// Consume the next input character:
@@ -2225,14 +2221,14 @@ where
22252221
// Append the lowercase version of the current input character (add 0x0020
22262222
// to the character's code point) to the current attribute's name.
22272223
Some(c) if is_ascii_upper_alpha(c) => {
2228-
self.append_to_attribute(Some((c.to_ascii_lowercase(), c)), None);
2224+
self.append_name_to_attribute(c.to_ascii_lowercase(), Some(c));
22292225
}
22302226
// U+0000 NULL
22312227
// This is an unexpected-null-character parse error. Append a U+FFFD
22322228
// REPLACEMENT CHARACTER character to the current attribute's name.
22332229
Some(c @ '\x00') => {
22342230
self.emit_error(ErrorKind::UnexpectedNullCharacter);
2235-
self.append_to_attribute(Some((REPLACEMENT_CHARACTER, c)), None);
2231+
self.append_name_to_attribute(REPLACEMENT_CHARACTER, Some(c));
22362232
}
22372233
// U+0022 QUOTATION MARK (")
22382234
// U+0027 APOSTROPHE (')
@@ -2304,7 +2300,7 @@ where
23042300
// and value to the empty string. Reconsume in the attribute name state.
23052301
// We set `None` for `value` to support boolean attributes in AST
23062302
_ => {
2307-
self.start_new_attribute(None);
2303+
self.start_new_attribute();
23082304
self.reconsume_in_state(State::AttributeName);
23092305
}
23102306
}
@@ -2324,13 +2320,13 @@ where
23242320
// U+0022 QUOTATION MARK (")
23252321
// Switch to the attribute value (double-quoted) state.
23262322
Some(c @ '"') => {
2327-
self.append_to_attribute(None, Some((true, None, Some(c))));
2323+
self.append_value_to_attribute(true, None, Some(c));
23282324
self.state = State::AttributeValueDoubleQuoted;
23292325
}
23302326
// U+0027 APOSTROPHE (')
23312327
// Switch to the attribute value (single-quoted) state.
23322328
Some(c @ '\'') => {
2333-
self.append_to_attribute(None, Some((true, None, Some(c))));
2329+
self.append_value_to_attribute(true, None, Some(c));
23342330
self.state = State::AttributeValueSingleQuoted;
23352331
}
23362332
// U+003E GREATER-THAN SIGN (>)
@@ -2356,7 +2352,7 @@ where
23562352
// Switch to the after attribute value (quoted) state.
23572353
// We set value to support empty attributes (i.e. `attr=""`)
23582354
Some(c @ '"') => {
2359-
self.append_to_attribute(None, Some((false, None, Some(c))));
2355+
self.append_value_to_attribute(false, None, Some(c));
23602356
self.state = State::AfterAttributeValueQuoted;
23612357
}
23622358
// U+0026 AMPERSAND (&)
@@ -2371,10 +2367,7 @@ where
23712367
// REPLACEMENT CHARACTER character to the current attribute's value.
23722368
Some(c @ '\x00') => {
23732369
self.emit_error(ErrorKind::UnexpectedNullCharacter);
2374-
self.append_to_attribute(
2375-
None,
2376-
Some((false, Some(REPLACEMENT_CHARACTER), Some(c))),
2377-
);
2370+
self.append_value_to_attribute(false, Some(REPLACEMENT_CHARACTER), Some(c));
23782371
}
23792372
// EOF
23802373
// This is an eof-in-tag parse error. Emit an end-of-file token.
@@ -2388,7 +2381,7 @@ where
23882381
// Append the current input character to the current attribute's value.
23892382
Some(c) => {
23902383
self.validate_input_stream_character(c);
2391-
self.append_to_attribute(None, Some((false, Some(c), Some(c))));
2384+
self.append_value_to_attribute(false, Some(c), Some(c));
23922385
}
23932386
}
23942387
}
@@ -2400,7 +2393,7 @@ where
24002393
// Switch to the after attribute value (quoted) state.
24012394
// We set value to support empty attributes (i.e. `attr=''`)
24022395
Some(c @ '\'') => {
2403-
self.append_to_attribute(None, Some((false, None, Some(c))));
2396+
self.append_value_to_attribute(false, None, Some(c));
24042397
self.state = State::AfterAttributeValueQuoted;
24052398
}
24062399
// U+0026 AMPERSAND (&)
@@ -2415,10 +2408,7 @@ where
24152408
// REPLACEMENT CHARACTER character to the current attribute's value.
24162409
Some(c @ '\x00') => {
24172410
self.emit_error(ErrorKind::UnexpectedNullCharacter);
2418-
self.append_to_attribute(
2419-
None,
2420-
Some((false, Some(REPLACEMENT_CHARACTER), Some(c))),
2421-
);
2411+
self.append_value_to_attribute(false, Some(REPLACEMENT_CHARACTER), Some(c));
24222412
}
24232413
// EOF
24242414
// This is an eof-in-tag parse error. Emit an end-of-file token.
@@ -2432,14 +2422,14 @@ where
24322422
// Append the current input character to the current attribute's value.
24332423
Some(c) => {
24342424
self.validate_input_stream_character(c);
2435-
self.append_to_attribute(None, Some((false, Some(c), Some(c))));
2425+
self.append_value_to_attribute(false, Some(c), Some(c));
24362426
}
24372427
}
24382428
}
24392429
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(unquoted)-state
24402430
State::AttributeValueUnquoted => {
24412431
let anything_else = |lexer: &mut Lexer<I>, c: char| {
2442-
lexer.append_to_attribute(None, Some((false, Some(c), Some(c))));
2432+
lexer.append_value_to_attribute(false, Some(c), Some(c));
24432433
};
24442434

24452435
// Consume the next input character:
@@ -2473,10 +2463,7 @@ where
24732463
// REPLACEMENT CHARACTER character to the current attribute's value.
24742464
Some(c @ '\x00') => {
24752465
self.emit_error(ErrorKind::UnexpectedNullCharacter);
2476-
self.append_to_attribute(
2477-
None,
2478-
Some((false, Some(REPLACEMENT_CHARACTER), Some(c))),
2479-
);
2466+
self.append_value_to_attribute(false, Some(REPLACEMENT_CHARACTER), Some(c));
24802467
}
24812468
// U+0022 QUOTATION MARK (")
24822469
// U+0027 APOSTROPHE (')
@@ -4196,7 +4183,7 @@ where
41964183
// Otherwise, emit the current input character as a character token.
41974184
Some(c) if c.is_ascii_alphanumeric() => {
41984185
if self.is_consumed_as_part_of_an_attribute() {
4199-
self.append_to_attribute(None, Some((false, Some(c), Some(c))));
4186+
self.append_value_to_attribute(false, Some(c), Some(c));
42004187
} else {
42014188
self.emit_character_token(c)?;
42024189
}

‎crates/swc_html_parser/src/parser/input.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use std::{fmt::Debug, mem::take};
22

3+
use swc_atoms::JsWord;
34
use swc_common::{BytePos, Span};
45
use swc_html_ast::{Token, TokenAndSpan};
56

@@ -13,7 +14,7 @@ pub trait ParserInput: Iterator<Item = TokenAndSpan> {
1314

1415
fn take_errors(&mut self) -> Vec<Error>;
1516

16-
fn set_last_start_tag_name(&mut self, tag_name: &str);
17+
fn set_last_start_tag_name(&mut self, tag_name: &JsWord);
1718

1819
fn set_input_state(&mut self, state: State);
1920

‎crates/swc_html_parser/tests/html5lib_tests.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ fn html5lib_test_tokenizer(input: PathBuf) {
146146
lexer.set_input_state(state.clone());
147147

148148
if let Some(last_start_tag) = test.get("lastStartTag") {
149-
let last_start_tag: String = serde_json::from_value(last_start_tag.clone())
149+
let last_start_tag: JsWord = serde_json::from_value(last_start_tag.clone())
150150
.expect("failed to get lastStartTag in test");
151151

152152
lexer.set_last_start_tag_name(&last_start_tag);

1 commit comments

Comments
 (1)

github-actions[bot] commented on Oct 27, 2022

@github-actions[bot]

Benchmark

Benchmark suite Current: c255cfd Previous: c318446 Ratio
es/full/bugs-1 425656 ns/iter (± 12124) 351098 ns/iter (± 13736) 1.21
es/full/minify/libraries/antd 2112013799 ns/iter (± 23917214) 1902755724 ns/iter (± 30591665) 1.11
es/full/minify/libraries/d3 436986000 ns/iter (± 8385848) 412876150 ns/iter (± 16936844) 1.06
es/full/minify/libraries/echarts 1800964043 ns/iter (± 18143263) 1665869386 ns/iter (± 165102758) 1.08
es/full/minify/libraries/jquery 119725938 ns/iter (± 3836487) 97019617 ns/iter (± 6327132) 1.23
es/full/minify/libraries/lodash 140705740 ns/iter (± 2894877) 142928270 ns/iter (± 28017963) 0.98
es/full/minify/libraries/moment 70106299 ns/iter (± 460609) 61512055 ns/iter (± 2424335) 1.14
es/full/minify/libraries/react 24144169 ns/iter (± 281704) 20458342 ns/iter (± 699566) 1.18
es/full/minify/libraries/terser 330787959 ns/iter (± 13183300) 349326588 ns/iter (± 16923562) 0.95
es/full/minify/libraries/three 604775193 ns/iter (± 11840143) 592443534 ns/iter (± 19889612) 1.02
es/full/minify/libraries/typescript 4009473205 ns/iter (± 30798593) 3640111607 ns/iter (± 775570662) 1.10
es/full/minify/libraries/victory 904122343 ns/iter (± 20983518) 818085181 ns/iter (± 21394040) 1.11
es/full/minify/libraries/vue 178210396 ns/iter (± 4913465) 150694166 ns/iter (± 16114547) 1.18
es/full/codegen/es3 40808 ns/iter (± 486) 32709 ns/iter (± 694) 1.25
es/full/codegen/es5 40599 ns/iter (± 540) 32685 ns/iter (± 551) 1.24
es/full/codegen/es2015 40717 ns/iter (± 990) 32861 ns/iter (± 1124) 1.24
es/full/codegen/es2016 40545 ns/iter (± 712) 32689 ns/iter (± 932) 1.24
es/full/codegen/es2017 40537 ns/iter (± 846) 32759 ns/iter (± 1962) 1.24
es/full/codegen/es2018 40572 ns/iter (± 1096) 32785 ns/iter (± 4431) 1.24
es/full/codegen/es2019 40580 ns/iter (± 1502) 32721 ns/iter (± 595) 1.24
es/full/codegen/es2020 40719 ns/iter (± 864) 33265 ns/iter (± 548) 1.22
es/full/all/es3 232201716 ns/iter (± 4903607) 192896187 ns/iter (± 7174584) 1.20
es/full/all/es5 219331415 ns/iter (± 3412543) 182716767 ns/iter (± 7374127) 1.20
es/full/all/es2015 177321052 ns/iter (± 4010221) 145588591 ns/iter (± 7520312) 1.22
es/full/all/es2016 177739154 ns/iter (± 3677578) 144217344 ns/iter (± 6207969) 1.23
es/full/all/es2017 177918666 ns/iter (± 4961369) 144833095 ns/iter (± 10954996) 1.23
es/full/all/es2018 175457004 ns/iter (± 4340355) 142447115 ns/iter (± 4386895) 1.23
es/full/all/es2019 176206418 ns/iter (± 4462392) 141718989 ns/iter (± 4894981) 1.24
es/full/all/es2020 166514582 ns/iter (± 4176959) 136371626 ns/iter (± 4117309) 1.22
es/full/parser 882187 ns/iter (± 48365) 723572 ns/iter (± 34702) 1.22
es/full/base/fixer 32344 ns/iter (± 3300) 25784 ns/iter (± 398) 1.25
es/full/base/resolver_and_hygiene 114658 ns/iter (± 4395) 91475 ns/iter (± 3030) 1.25
serialization of ast node 259 ns/iter (± 3) 216 ns/iter (± 4) 1.20
serialization of serde 263 ns/iter (± 22) 217 ns/iter (± 5) 1.21

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.