Skip to content

Commit

Permalink
fix(xml/parser): Fix parsing of pi (#6450)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-akait committed Nov 16, 2022
1 parent df74981 commit 6388920
Show file tree
Hide file tree
Showing 71 changed files with 2,595 additions and 29 deletions.
2 changes: 0 additions & 2 deletions crates/swc_xml_codegen/src/lib.rs
Expand Up @@ -285,8 +285,6 @@ where
processing_instruction.push_str("?>");

write_multiline_raw!(self, n.span, &processing_instruction);
// TODO only for top instructions
newline!(self);
}

fn create_context_for_element(&self, n: &Element) -> Ctx {
Expand Down
10 changes: 5 additions & 5 deletions crates/swc_xml_codegen/src/macros.rs
Expand Up @@ -22,11 +22,11 @@ macro_rules! write_multiline_raw {
}};
}

macro_rules! newline {
($g:expr) => {{
$g.wr.write_newline()?;
}};
}
// macro_rules! newline {
// ($g:expr) => {{
// $g.wr.write_newline()?;
// }};
// }

macro_rules! formatting_newline {
($g:expr) => {{
Expand Down
3 changes: 1 addition & 2 deletions crates/swc_xml_codegen/tests/fixture/base/output.min.xml
@@ -1,5 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<note>
<?xml version="1.0" encoding="UTF-8"?><note>
<to>Tove</to>
<from>Jani</from>
<heading>Reminder</heading>
Expand Down
3 changes: 1 addition & 2 deletions crates/swc_xml_codegen/tests/fixture/base/output.xml
@@ -1,5 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<note>
<?xml version="1.0" encoding="UTF-8"?><note>
<to>Tove</to>
<from>Jani</from>
<heading>Reminder</heading>
Expand Down
3 changes: 1 addition & 2 deletions crates/swc_xml_codegen/tests/options/indent_type/output.xml
@@ -1,5 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<note>
<?xml version="1.0" encoding="UTF-8"?><note>
<to>Tove</to>
<from>Jani</from>
<heading>Reminder</heading>
Expand Down
6 changes: 6 additions & 0 deletions crates/swc_xml_parser/src/error.rs
Expand Up @@ -91,6 +91,10 @@ impl Error {
"Unexpected colon before attribute name".into()
}
ErrorKind::UnexpectedSolidusInTag => "Unexpected solidus in tag".into(),
ErrorKind::NoTargetNameInProcessingInstruction => "No target name".into(),
ErrorKind::MissingWhitespaceBeforeQuestionInProcessingInstruction => {
"Missing whitespace before '?'".into()
}

// Parser errors
ErrorKind::UnexpectedTokenInStartPhase => "Unexpected token in start phase".into(),
Expand Down Expand Up @@ -152,6 +156,8 @@ pub enum ErrorKind {
UnexpectedCharacterAfterDoctypeSystemIdentifier,
UnexpectedColonBeforeAttributeName,
UnexpectedSolidusInTag,
NoTargetNameInProcessingInstruction,
MissingWhitespaceBeforeQuestionInProcessingInstruction,

// Parser errors
UnexpectedTokenInStartPhase,
Expand Down
68 changes: 52 additions & 16 deletions crates/swc_xml_parser/src/lexer/mod.rs
Expand Up @@ -19,9 +19,10 @@ pub enum State {
EndTagNameAfter,
Pi,
PiTarget,
PiTargetQuestion,
PiTargetAfter,
PiData,
PiAfter,
PiEnd,
MarkupDeclaration,
CommentStart,
CommentStartDash,
Expand Down Expand Up @@ -1166,15 +1167,26 @@ where
// U+000A LINE FEED (LF)
// U+0020 SPACE
// EOF
// Parse error. Reprocess the current input character in the bogus comment
// state.
// Parse error.
// Switch to the pi target after state.
Some(c) if is_spacy_except_ff(c) => {
self.emit_error(ErrorKind::InvalidCharacterOfProcessingInstruction);
self.reconsume_in_state(State::BogusComment);
self.create_processing_instruction_token();
self.state = State::PiTargetAfter;
}
None => {
self.emit_error(ErrorKind::EofInTag);
self.reconsume_in_state(State::BogusComment);
self.emit_error(ErrorKind::EofInProcessingInstruction);
self.create_processing_instruction_token();
self.emit_current_processing_instruction();
self.reconsume_in_state(State::Data);
}
// U+003F QUESTION MARK(?)
// Emit error
// Reprocess the current input character in the pi end state (recovery mode).
Some('?') => {
self.emit_error(ErrorKind::NoTargetNameInProcessingInstruction);
self.create_processing_instruction_token();
self.state = State::PiEnd;
}
Some(c) => {
self.validate_input_stream_character(c);
Expand All @@ -1190,7 +1202,7 @@ where
// U+0009 CHARACTER TABULATION (tab)
// U+000A LINE FEED (LF)
// U+0020 SPACE
// Switch to the before attribute name state.
// Switch to the pi target state.
Some(c) if is_spacy_except_ff(c) => {
self.state = State::PiTargetAfter;
}
Expand All @@ -1203,9 +1215,9 @@ where
self.reconsume_in_state(State::Data);
}
// U+003F QUESTION MARK(?)
// Switch to the pi after state.
// Switch to the pi target question.
Some('?') => {
self.state = State::PiAfter;
self.state = State::PiTargetQuestion;
}
// Anything else
// Append the current input character to the processing instruction target and
Expand All @@ -1216,6 +1228,27 @@ where
}
}
}
State::PiTargetQuestion => {
// Consume the next input character:
match self.consume_next_char() {
// U+003E GREATER-THAN SIGN (>)
Some('>') => {
self.reconsume_in_state(State::PiEnd);
}
_ => {
self.errors.push(Error::new(
Span::new(
self.cur_pos - BytePos(1),
self.input.cur_pos() - BytePos(1),
Default::default(),
),
ErrorKind::MissingWhitespaceBeforeQuestionInProcessingInstruction,
));
self.set_processing_instruction_token(None, Some('?'));
self.reconsume_in_state(State::PiData);
}
}
}
State::PiTargetAfter => {
// Consume the next input character:
match self.consume_next_char() {
Expand All @@ -1239,7 +1272,7 @@ where
// U+003F QUESTION MARK(?)
// Switch to the pi after state.
Some('?') => {
self.state = State::PiAfter;
self.state = State::PiEnd;
}
// EOF
// Parse error. Emit the current processing instruction token and then reprocess
Expand All @@ -1258,7 +1291,7 @@ where
}
}
}
State::PiAfter => {
State::PiEnd => {
// Consume the next input character:
match self.consume_next_char() {
// U+003E GREATER-THAN SIGN (>)
Expand All @@ -1267,15 +1300,18 @@ where
self.emit_current_processing_instruction();
self.state = State::Data;
}
// U+003F QUESTION MARK(?)
// Append the current input character to the PI’s data and stay in the current
// state.
Some(c @ '?') => {
self.set_processing_instruction_token(None, Some(c));
// EOF
// Parse error. Emit the current processing instruction token and then reprocess
// the current input character in the data state.
None => {
self.emit_error(ErrorKind::EofInProcessingInstruction);
self.emit_current_processing_instruction();
self.reconsume_in_state(State::Data);
}
// Anything else
// Reprocess the current input character in the pi data state.
_ => {
self.set_processing_instruction_token(None, Some('?'));
self.reconsume_in_state(State::PiData);
}
}
Expand Down
70 changes: 70 additions & 0 deletions crates/swc_xml_parser/tests/fixture/pi/dom.rust-debug
@@ -0,0 +1,70 @@
| <root>
| "
"
<?xslt ma>
| "
"
<?xslt
m>
| "
"
<?xslt >
| "
"
<?xslt >
| "
"
<?xml-stylesheet >
| "
"
<?foo version="1.0">
| "
"
<?foo version='1.0'>
| "
"
<?foo version='1.0' encoding="UTF-8">
| "
"
<?foo version='1.0' encoding='UTF-8'>
| "
"
<?foo version='1.0' encoding='utf-8'>
| "
"
<?foo version='1.0' encoding='EUC-JP'>
| "
"
<?foo version='1.0' encoding='UTF-8' standalone='yes'>
| "
"
<?foo version='1.0' encoding='UTF-8' standalone='no'>
| "
"
<?foo version='1.0' standalone='no'>
| "
"
<?foo version='1.0' standalone='no' >
| "
"
<?foo >
| "
"
<?f ?oo>
| "
"
<?f ?oo?>
| "
"
<?f ?????>
| "
"
<?test aaa >
| "
"
<?test ? >
| "
"
<?test a a a >
| "
"
25 changes: 25 additions & 0 deletions crates/swc_xml_parser/tests/fixture/pi/input.xml
@@ -0,0 +1,25 @@
<root>
<?xslt ma?>
<?xslt
m?>
<?xslt?>
<?xslt ?>
<?xml-stylesheet?>
<?foo version="1.0"?>
<?foo version='1.0'?>
<?foo version='1.0' encoding="UTF-8"?>
<?foo version='1.0' encoding='UTF-8'?>
<?foo version='1.0' encoding='utf-8'?>
<?foo version='1.0' encoding='EUC-JP'?>
<?foo version='1.0' encoding='UTF-8' standalone='yes'?>
<?foo version='1.0' encoding='UTF-8' standalone='no'?>
<?foo version='1.0' standalone='no'?>
<?foo version='1.0' standalone='no' ?>
<?foo?>
<?f ?oo?>
<?f ?oo??>
<?f ??????>
<?test aaa ?>
<?test ? ?>
<?test a a a ?>
</root>

1 comment on commit 6388920

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: 6388920 Previous: 1dfd8ce Ratio
es/full/bugs-1 344232 ns/iter (± 20153) 357899 ns/iter (± 29349) 0.96
es/full/minify/libraries/antd 1870017561 ns/iter (± 53797770) 1995326402 ns/iter (± 79291138) 0.94
es/full/minify/libraries/d3 419934434 ns/iter (± 14523502) 456519937 ns/iter (± 44126815) 0.92
es/full/minify/libraries/echarts 1588121067 ns/iter (± 93895552) 1939151547 ns/iter (± 124765219) 0.82
es/full/minify/libraries/jquery 110251465 ns/iter (± 7453291) 129731158 ns/iter (± 10642916) 0.85
es/full/minify/libraries/lodash 126503063 ns/iter (± 12762936) 155720302 ns/iter (± 20475401) 0.81
es/full/minify/libraries/moment 60735024 ns/iter (± 1532977) 75765692 ns/iter (± 5778983) 0.80
es/full/minify/libraries/react 20708172 ns/iter (± 322676) 24480300 ns/iter (± 2005651) 0.85
es/full/minify/libraries/terser 321355080 ns/iter (± 16845243) 388452515 ns/iter (± 23837383) 0.83
es/full/minify/libraries/three 564867714 ns/iter (± 17924549) 627710057 ns/iter (± 29120172) 0.90
es/full/minify/libraries/typescript 3584339278 ns/iter (± 118889990) 4048571408 ns/iter (± 1382412454) 0.89
es/full/minify/libraries/victory 872678416 ns/iter (± 47295398) 976403133 ns/iter (± 35867447) 0.89
es/full/minify/libraries/vue 178966369 ns/iter (± 16255938) 195630668 ns/iter (± 21116209) 0.91
es/full/codegen/es3 34200 ns/iter (± 2200) 39693 ns/iter (± 6405) 0.86
es/full/codegen/es5 34074 ns/iter (± 840) 38896 ns/iter (± 5542) 0.88
es/full/codegen/es2015 34126 ns/iter (± 1534) 37283 ns/iter (± 5955) 0.92
es/full/codegen/es2016 34873 ns/iter (± 1778) 40515 ns/iter (± 10345) 0.86
es/full/codegen/es2017 34514 ns/iter (± 2488) 36930 ns/iter (± 7646) 0.93
es/full/codegen/es2018 34407 ns/iter (± 2145) 37856 ns/iter (± 6874) 0.91
es/full/codegen/es2019 34657 ns/iter (± 1562) 45958 ns/iter (± 23401) 0.75
es/full/codegen/es2020 34543 ns/iter (± 3634) 38990 ns/iter (± 6135) 0.89
es/full/all/es3 207515867 ns/iter (± 21111045) 238580830 ns/iter (± 28722168) 0.87
es/full/all/es5 199288045 ns/iter (± 20306594) 224927333 ns/iter (± 26666477) 0.89
es/full/all/es2015 155166491 ns/iter (± 13875040) 177884956 ns/iter (± 23786388) 0.87
es/full/all/es2016 155754607 ns/iter (± 13486061) 179526493 ns/iter (± 24409673) 0.87
es/full/all/es2017 158548077 ns/iter (± 14597397) 176323122 ns/iter (± 22744149) 0.90
es/full/all/es2018 159638772 ns/iter (± 16745968) 174981188 ns/iter (± 25129657) 0.91
es/full/all/es2019 154337469 ns/iter (± 18614991) 174284662 ns/iter (± 20953279) 0.89
es/full/all/es2020 148226789 ns/iter (± 18017006) 163635784 ns/iter (± 13875128) 0.91
es/full/parser 744271 ns/iter (± 64880) 830683 ns/iter (± 123839) 0.90
es/full/base/fixer 27685 ns/iter (± 3536) 31749 ns/iter (± 6214) 0.87
es/full/base/resolver_and_hygiene 96451 ns/iter (± 11987) 101760 ns/iter (± 18473) 0.95
serialization of ast node 217 ns/iter (± 24) 261 ns/iter (± 44) 0.83
serialization of serde 232 ns/iter (± 15) 275 ns/iter (± 58) 0.84

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.