Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(xml/parser): pi parsing #6450

Merged
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 0 additions & 2 deletions crates/swc_xml_codegen/src/lib.rs
Expand Up @@ -285,8 +285,6 @@ where
processing_instruction.push_str("?>");

write_multiline_raw!(self, n.span, &processing_instruction);
// TODO only for top instructions
newline!(self);
}

fn create_context_for_element(&self, n: &Element) -> Ctx {
Expand Down
10 changes: 5 additions & 5 deletions crates/swc_xml_codegen/src/macros.rs
Expand Up @@ -22,11 +22,11 @@ macro_rules! write_multiline_raw {
}};
}

macro_rules! newline {
($g:expr) => {{
$g.wr.write_newline()?;
}};
}
// macro_rules! newline {
// ($g:expr) => {{
// $g.wr.write_newline()?;
// }};
// }

macro_rules! formatting_newline {
($g:expr) => {{
Expand Down
3 changes: 1 addition & 2 deletions crates/swc_xml_codegen/tests/fixture/base/output.min.xml
@@ -1,5 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<note>
<?xml version="1.0" encoding="UTF-8"?><note>
<to>Tove</to>
<from>Jani</from>
<heading>Reminder</heading>
Expand Down
3 changes: 1 addition & 2 deletions crates/swc_xml_codegen/tests/fixture/base/output.xml
@@ -1,5 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<note>
<?xml version="1.0" encoding="UTF-8"?><note>
<to>Tove</to>
<from>Jani</from>
<heading>Reminder</heading>
Expand Down
@@ -1,5 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<note>
<?xml version="1.0" encoding="UTF-8"?><note>
<to>Tove</to>
<from>Jani</from>
<heading>Reminder</heading>
Expand Down
6 changes: 6 additions & 0 deletions crates/swc_xml_parser/src/error.rs
Expand Up @@ -91,6 +91,10 @@ impl Error {
"Unexpected colon before attribute name".into()
}
ErrorKind::UnexpectedSolidusInTag => "Unexpected solidus in tag".into(),
ErrorKind::NoTargetNameInProcessingInstruction => "No target name".into(),
ErrorKind::MissingWhitespaceBeforeQuestionInProcessingInstruction => {
"Missing whitespace before '?'".into()
}

// Parser errors
ErrorKind::UnexpectedTokenInStartPhase => "Unexpected token in start phase".into(),
Expand Down Expand Up @@ -152,6 +156,8 @@ pub enum ErrorKind {
UnexpectedCharacterAfterDoctypeSystemIdentifier,
UnexpectedColonBeforeAttributeName,
UnexpectedSolidusInTag,
NoTargetNameInProcessingInstruction,
MissingWhitespaceBeforeQuestionInProcessingInstruction,

// Parser errors
UnexpectedTokenInStartPhase,
Expand Down
68 changes: 52 additions & 16 deletions crates/swc_xml_parser/src/lexer/mod.rs
Expand Up @@ -19,9 +19,10 @@ pub enum State {
EndTagNameAfter,
Pi,
PiTarget,
PiTargetQuestion,
PiTargetAfter,
PiData,
PiAfter,
PiEnd,
MarkupDeclaration,
CommentStart,
CommentStartDash,
Expand Down Expand Up @@ -1166,15 +1167,26 @@ where
// U+000A LINE FEED (LF)
// U+0020 SPACE
// EOF
// Parse error. Reprocess the current input character in the bogus comment
// state.
// Parse error.
// Switch to the pi target after state.
Some(c) if is_spacy_except_ff(c) => {
self.emit_error(ErrorKind::InvalidCharacterOfProcessingInstruction);
self.reconsume_in_state(State::BogusComment);
self.create_processing_instruction_token();
self.state = State::PiTargetAfter;
}
None => {
self.emit_error(ErrorKind::EofInTag);
self.reconsume_in_state(State::BogusComment);
self.emit_error(ErrorKind::EofInProcessingInstruction);
self.create_processing_instruction_token();
self.emit_current_processing_instruction();
self.reconsume_in_state(State::Data);
}
// U+003F QUESTION MARK(?)
// Emit error
// Reprocess the current input character in the pi end state (recovery mode).
Some('?') => {
self.emit_error(ErrorKind::NoTargetNameInProcessingInstruction);
self.create_processing_instruction_token();
self.state = State::PiEnd;
}
Some(c) => {
self.validate_input_stream_character(c);
Expand All @@ -1190,7 +1202,7 @@ where
// U+0009 CHARACTER TABULATION (tab)
// U+000A LINE FEED (LF)
// U+0020 SPACE
// Switch to the before attribute name state.
// Switch to the pi target state.
Some(c) if is_spacy_except_ff(c) => {
self.state = State::PiTargetAfter;
}
Expand All @@ -1203,9 +1215,9 @@ where
self.reconsume_in_state(State::Data);
}
// U+003F QUESTION MARK(?)
// Switch to the pi after state.
// Switch to the pi target question.
Some('?') => {
self.state = State::PiAfter;
self.state = State::PiTargetQuestion;
}
// Anything else
// Append the current input character to the processing instruction target and
Expand All @@ -1216,6 +1228,27 @@ where
}
}
}
State::PiTargetQuestion => {
// Consume the next input character:
match self.consume_next_char() {
// U+003E GREATER-THAN SIGN (>)
Some('>') => {
self.reconsume_in_state(State::PiEnd);
}
_ => {
self.errors.push(Error::new(
Span::new(
self.cur_pos - BytePos(1),
self.input.cur_pos() - BytePos(1),
Default::default(),
),
ErrorKind::MissingWhitespaceBeforeQuestionInProcessingInstruction,
));
self.set_processing_instruction_token(None, Some('?'));
self.reconsume_in_state(State::PiData);
}
}
}
State::PiTargetAfter => {
// Consume the next input character:
match self.consume_next_char() {
Expand All @@ -1239,7 +1272,7 @@ where
// U+003F QUESTION MARK(?)
// Switch to the pi after state.
Some('?') => {
self.state = State::PiAfter;
self.state = State::PiEnd;
}
// EOF
// Parse error. Emit the current processing instruction token and then reprocess
Expand All @@ -1258,7 +1291,7 @@ where
}
}
}
State::PiAfter => {
State::PiEnd => {
// Consume the next input character:
match self.consume_next_char() {
// U+003E GREATER-THAN SIGN (>)
Expand All @@ -1267,15 +1300,18 @@ where
self.emit_current_processing_instruction();
self.state = State::Data;
}
// U+003F QUESTION MARK(?)
// Append the current input character to the PI’s data and stay in the current
// state.
Some(c @ '?') => {
self.set_processing_instruction_token(None, Some(c));
// EOF
// Parse error. Emit the current processing instruction token and then reprocess
// the current input character in the data state.
None => {
self.emit_error(ErrorKind::EofInProcessingInstruction);
self.emit_current_processing_instruction();
self.reconsume_in_state(State::Data);
}
// Anything else
// Reprocess the current input character in the pi data state.
_ => {
self.set_processing_instruction_token(None, Some('?'));
self.reconsume_in_state(State::PiData);
}
}
Expand Down
70 changes: 70 additions & 0 deletions crates/swc_xml_parser/tests/fixture/pi/dom.rust-debug
@@ -0,0 +1,70 @@
| <root>
| "
"
<?xslt ma>
| "
"
<?xslt
m>
| "
"
<?xslt >
| "
"
<?xslt >
| "
"
<?xml-stylesheet >
| "
"
<?foo version="1.0">
| "
"
<?foo version='1.0'>
| "
"
<?foo version='1.0' encoding="UTF-8">
| "
"
<?foo version='1.0' encoding='UTF-8'>
| "
"
<?foo version='1.0' encoding='utf-8'>
| "
"
<?foo version='1.0' encoding='EUC-JP'>
| "
"
<?foo version='1.0' encoding='UTF-8' standalone='yes'>
| "
"
<?foo version='1.0' encoding='UTF-8' standalone='no'>
| "
"
<?foo version='1.0' standalone='no'>
| "
"
<?foo version='1.0' standalone='no' >
| "
"
<?foo >
| "
"
<?f ?oo>
| "
"
<?f ?oo?>
| "
"
<?f ?????>
| "
"
<?test aaa >
| "
"
<?test ? >
| "
"
<?test a a a >
| "
"
25 changes: 25 additions & 0 deletions crates/swc_xml_parser/tests/fixture/pi/input.xml
@@ -0,0 +1,25 @@
<root>
<?xslt ma?>
<?xslt
m?>
<?xslt?>
<?xslt ?>
<?xml-stylesheet?>
<?foo version="1.0"?>
<?foo version='1.0'?>
<?foo version='1.0' encoding="UTF-8"?>
<?foo version='1.0' encoding='UTF-8'?>
<?foo version='1.0' encoding='utf-8'?>
<?foo version='1.0' encoding='EUC-JP'?>
<?foo version='1.0' encoding='UTF-8' standalone='yes'?>
<?foo version='1.0' encoding='UTF-8' standalone='no'?>
<?foo version='1.0' standalone='no'?>
<?foo version='1.0' standalone='no' ?>
<?foo?>
<?f ?oo?>
<?f ?oo??>
<?f ??????>
<?test aaa ?>
<?test ? ?>
<?test a a a ?>
</root>