From 72516fd864cd3d2ff94496d991f370c9103c0555 Mon Sep 17 00:00:00 2001 From: "alexander.akait" Date: Wed, 30 Nov 2022 22:01:35 +0300 Subject: [PATCH 1/9] fix(xml/parser): avoid short tags --- crates/swc_xml_ast/src/token.rs | 4 - crates/swc_xml_parser/src/lexer/mod.rs | 40 +-- crates/swc_xml_parser/src/parser/mod.rs | 12 - .../swc_xml_parser/tests/fixture/tags/dom.txt | 16 +- .../tests/fixture/tags/input.xml | 8 +- .../tests/fixture/tags/output.json | 164 +++++++--- .../tests/fixture/tags/span.swc-stderr | 284 +++++++++++++----- 7 files changed, 356 insertions(+), 172 deletions(-) diff --git a/crates/swc_xml_ast/src/token.rs b/crates/swc_xml_ast/src/token.rs index a7e61f60b7c0..148b553c303f 100644 --- a/crates/swc_xml_ast/src/token.rs +++ b/crates/swc_xml_ast/src/token.rs @@ -40,10 +40,6 @@ pub enum Token { tag_name: JsWord, attributes: Vec, }, - ShortTag { - tag_name: JsWord, - attributes: Vec, - }, EmptyTag { tag_name: JsWord, attributes: Vec, diff --git a/crates/swc_xml_parser/src/lexer/mod.rs b/crates/swc_xml_parser/src/lexer/mod.rs index 3512c557ce98..1e527eadb177 100644 --- a/crates/swc_xml_parser/src/lexer/mod.rs +++ b/crates/swc_xml_parser/src/lexer/mod.rs @@ -79,7 +79,6 @@ struct Doctype { enum TagKind { Start, End, - Short, Empty, } @@ -738,43 +737,6 @@ where self.emit_token(end_tag_token); } - TagKind::Short => { - if !current_tag_token.attributes.is_empty() { - self.emit_error(ErrorKind::ShortTagWithAttributes); - } - - let mut already_seen: AHashSet = Default::default(); - - let short_tag = Token::ShortTag { - tag_name: current_tag_token.tag_name.into(), - attributes: current_tag_token - .attributes - .drain(..) - .map(|attribute| { - let name = JsWord::from(attribute.name); - - if already_seen.contains(&name) { - self.errors.push(Error::new( - attribute.span, - ErrorKind::DuplicateAttribute, - )); - } - - already_seen.insert(name.clone()); - - AttributeToken { - span: attribute.span, - name, - raw_name: attribute.raw_name.map(JsWord::from), - value: attribute.value.map(JsWord::from), - raw_value: attribute.raw_value.map(JsWord::from), - } - }) - .collect(), - }; - - self.emit_token(short_tag); - } TagKind::Empty => { let mut already_seen: AHashSet = Default::default(); @@ -1088,7 +1050,7 @@ where // U+003E GREATER-THAN SIGN (>) // Emit a short end tag token and then switch to the data state. Some('>') => { - self.emit_tag_token(Some(TagKind::Short)); + self.emit_tag_token(Some(TagKind::End)); self.state = State::Data; } // U+0009 CHARACTER TABULATION (Tab) diff --git a/crates/swc_xml_parser/src/parser/mod.rs b/crates/swc_xml_parser/src/parser/mod.rs index bb76cf5c2fb3..abe540f52ba7 100644 --- a/crates/swc_xml_parser/src/parser/mod.rs +++ b/crates/swc_xml_parser/src/parser/mod.rs @@ -350,13 +350,6 @@ where self.phase = Phase::EndPhase; } } - Token::ShortTag { .. } => { - self.open_elements_stack.items.pop(); - - if self.open_elements_stack.items.is_empty() { - self.phase = Phase::EndPhase; - } - } Token::Comment { .. } => { let comment = self.create_comment(token_and_info); @@ -482,11 +475,6 @@ where attributes, .. } - | Token::ShortTag { - tag_name, - attributes, - .. - } | Token::EmptyTag { tag_name, attributes, diff --git a/crates/swc_xml_parser/tests/fixture/tags/dom.txt b/crates/swc_xml_parser/tests/fixture/tags/dom.txt index 338a98a8f619..2c22f5320b8b 100644 --- a/crates/swc_xml_parser/tests/fixture/tags/dom.txt +++ b/crates/swc_xml_parser/tests/fixture/tags/dom.txt @@ -21,10 +21,20 @@ | | " " -| +| | " - " -| +| <тест> +| " +" +| <тест> +| "test" +| " +" +| +| " +" +| +| "test" | " " diff --git a/crates/swc_xml_parser/tests/fixture/tags/input.xml b/crates/swc_xml_parser/tests/fixture/tags/input.xml index df83f4999d2d..7f8cac04c3fd 100644 --- a/crates/swc_xml_parser/tests/fixture/tags/input.xml +++ b/crates/swc_xml_parser/tests/fixture/tags/input.xml @@ -1,11 +1,13 @@ - + Start - - +<тест/> +<тест>test + +test \ No newline at end of file diff --git a/crates/swc_xml_parser/tests/fixture/tags/output.json b/crates/swc_xml_parser/tests/fixture/tags/output.json index 0d1b1f8da8b3..dfe98b078050 100644 --- a/crates/swc_xml_parser/tests/fixture/tags/output.json +++ b/crates/swc_xml_parser/tests/fixture/tags/output.json @@ -2,7 +2,7 @@ "type": "Document", "span": { "start": 1, - "end": 174, + "end": 224, "ctxt": 0 }, "children": [ @@ -10,7 +10,7 @@ "type": "Element", "span": { "start": 1, - "end": 174, + "end": 224, "ctxt": 0 }, "tagName": "root", @@ -41,7 +41,7 @@ "type": "Element", "span": { "start": 15, - "end": 22, + "end": 19, "ctxt": 0 }, "tagName": "a", @@ -51,8 +51,8 @@ { "type": "Text", "span": { - "start": 22, - "end": 23, + "start": 19, + "end": 20, "ctxt": 0 }, "data": "\n", @@ -61,8 +61,8 @@ { "type": "Comment", "span": { - "start": 23, - "end": 53, + "start": 20, + "end": 50, "ctxt": 0 }, "data": " start tag and end tag ", @@ -71,8 +71,8 @@ { "type": "Text", "span": { - "start": 53, - "end": 54, + "start": 50, + "end": 51, "ctxt": 0 }, "data": "\n", @@ -81,8 +81,8 @@ { "type": "Element", "span": { - "start": 54, - "end": 82, + "start": 51, + "end": 79, "ctxt": 0 }, "tagName": "start-tag", @@ -91,8 +91,8 @@ { "type": "Text", "span": { - "start": 65, - "end": 70, + "start": 62, + "end": 67, "ctxt": 0 }, "data": "Start", @@ -103,8 +103,8 @@ { "type": "Text", "span": { - "start": 82, - "end": 83, + "start": 79, + "end": 80, "ctxt": 0 }, "data": "\n", @@ -113,8 +113,8 @@ { "type": "Comment", "span": { - "start": 83, - "end": 101, + "start": 80, + "end": 98, "ctxt": 0 }, "data": " empty tag ", @@ -123,8 +123,8 @@ { "type": "Text", "span": { - "start": 101, - "end": 102, + "start": 98, + "end": 99, "ctxt": 0 }, "data": "\n", @@ -133,8 +133,8 @@ { "type": "Element", "span": { - "start": 102, - "end": 115, + "start": 99, + "end": 112, "ctxt": 0 }, "tagName": "short-tag", @@ -144,8 +144,8 @@ { "type": "Text", "span": { - "start": 115, - "end": 116, + "start": 112, + "end": 113, "ctxt": 0 }, "data": "\n", @@ -154,8 +154,8 @@ { "type": "Element", "span": { - "start": 116, - "end": 129, + "start": 113, + "end": 126, "ctxt": 0 }, "tagName": "test", @@ -165,49 +165,135 @@ { "type": "Text", "span": { - "start": 129, - "end": 130, + "start": 126, + "end": 127, "ctxt": 0 }, "data": "\n", "raw": "\n" }, { - "type": "Comment", + "type": "Element", "span": { - "start": 130, - "end": 148, + "start": 127, + "end": 140, "ctxt": 0 }, - "data": " short tag ", - "raw": "" + "tagName": "test", + "attributes": [], + "children": [] }, { "type": "Text", "span": { - "start": 148, + "start": 140, + "end": 141, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 141, + "end": 152, + "ctxt": 0 + }, + "tagName": "тест", + "attributes": [], + "children": [] + }, + { + "type": "Text", + "span": { + "start": 152, "end": 153, "ctxt": 0 }, - "data": "\n\n", - "raw": "\n\n" + "data": "\n", + "raw": "\n" }, { "type": "Element", "span": { "start": 153, - "end": 166, + "end": 178, "ctxt": 0 }, - "tagName": "test", + "tagName": "тест", + "attributes": [], + "children": [ + { + "type": "Text", + "span": { + "start": 163, + "end": 167, + "ctxt": 0 + }, + "data": "test", + "raw": "test" + } + ] + }, + { + "type": "Text", + "span": { + "start": 178, + "end": 179, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 179, + "end": 190, + "ctxt": 0 + }, + "tagName": "html:bar", "attributes": [], "children": [] }, { "type": "Text", "span": { - "start": 166, - "end": 167, + "start": 190, + "end": 191, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 191, + "end": 216, + "ctxt": 0 + }, + "tagName": "html:bar", + "attributes": [], + "children": [ + { + "type": "Text", + "span": { + "start": 201, + "end": 205, + "ctxt": 0 + }, + "data": "test", + "raw": "test" + } + ] + }, + { + "type": "Text", + "span": { + "start": 216, + "end": 217, "ctxt": 0 }, "data": "\n", diff --git a/crates/swc_xml_parser/tests/fixture/tags/span.swc-stderr b/crates/swc_xml_parser/tests/fixture/tags/span.swc-stderr index c488f81d9d97..399496b6bae7 100644 --- a/crates/swc_xml_parser/tests/fixture/tags/span.swc-stderr +++ b/crates/swc_xml_parser/tests/fixture/tags/span.swc-stderr @@ -2,53 +2,59 @@ x Document ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | ,-> - 2 | | + 2 | | 3 | | 4 | | Start 5 | | 6 | | 7 | | - 8 | | - 9 | | - 10 | | - 11 | `-> + 8 | | + 9 | | <тест/> + 10 | | <тест>test + 11 | | + 12 | | test + 13 | `-> `---- x Child ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | ,-> - 2 | | + 2 | | 3 | | 4 | | Start 5 | | 6 | | 7 | | - 8 | | - 9 | | - 10 | | - 11 | `-> + 8 | | + 9 | | <тест/> + 10 | | <тест>test + 11 | | + 12 | | test + 13 | `-> `---- x Element ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | ,-> - 2 | | + 2 | | 3 | | 4 | | Start 5 | | 6 | | 7 | | - 8 | | - 9 | | - 10 | | - 11 | `-> + 8 | | + 9 | | <тест/> + 10 | | <тест>test + 11 | | + 12 | | test + 13 | `-> `---- x Child ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | : ^ - 2 | + 2 | 3 | `---- @@ -56,14 +62,14 @@ ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | : ^ - 2 | + 2 | 3 | `---- x Child ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | - 2 | + 2 | : ^^^^^^^ 3 | `---- @@ -71,7 +77,7 @@ x Element ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | - 2 | + 2 | : ^^^^^^^ 3 | `---- @@ -79,24 +85,24 @@ x Child ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | - 2 | - : ^^^^^^^ + 2 | + : ^^^^ 3 | `---- x Element ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | - 2 | - : ^^^^^^^ + 2 | + : ^^^^ 3 | `---- x Child ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | - 2 | - : ^ + 2 | + : ^ 3 | 4 | Start `---- @@ -104,15 +110,15 @@ x Text ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | - 2 | - : ^ + 2 | + : ^ 3 | 4 | Start `---- x Child ,-[$DIR/tests/fixture/tags/input.xml:2:1] - 2 | + 2 | 3 | : ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 4 | Start @@ -120,7 +126,7 @@ x Comment ,-[$DIR/tests/fixture/tags/input.xml:2:1] - 2 | + 2 | 3 | : ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 4 | Start @@ -128,7 +134,7 @@ x Child ,-[$DIR/tests/fixture/tags/input.xml:2:1] - 2 | + 2 | 3 | : ^ 4 | Start @@ -137,7 +143,7 @@ x Text ,-[$DIR/tests/fixture/tags/input.xml:2:1] - 2 | + 2 | 3 | : ^ 4 | Start @@ -250,7 +256,7 @@ 6 | : ^ 7 | - 8 | + 8 | `---- x Text @@ -259,7 +265,7 @@ 6 | : ^ 7 | - 8 | + 8 | `---- x Child @@ -267,7 +273,7 @@ 6 | 7 | : ^^^^^^^^^^^^^ - 8 | + 8 | `---- x Element @@ -275,7 +281,7 @@ 6 | 7 | : ^^^^^^^^^^^^^ - 8 | + 8 | `---- x Child @@ -283,8 +289,8 @@ 6 | 7 | : ^ - 8 | - 9 | + 8 | + 9 | <тест/> `---- x Text @@ -292,72 +298,206 @@ 6 | 7 | : ^ - 8 | - 9 | + 8 | + 9 | <тест/> `---- x Child ,-[$DIR/tests/fixture/tags/input.xml:7:1] 7 | - 8 | - : ^^^^^^^^^^^^^^^^^^ - 9 | + 8 | + : ^^^^^^^^^^^^^ + 9 | <тест/> `---- - x Comment + x Element ,-[$DIR/tests/fixture/tags/input.xml:7:1] 7 | - 8 | - : ^^^^^^^^^^^^^^^^^^ - 9 | + 8 | + : ^^^^^^^^^^^^^ + 9 | <тест/> `---- x Child ,-[$DIR/tests/fixture/tags/input.xml:7:1] - 7 | - 8 | ,-> - 9 | `-> - 10 | - 11 | + 7 | + 8 | + : ^ + 9 | <тест/> + 10 | <тест>test `---- x Text ,-[$DIR/tests/fixture/tags/input.xml:7:1] - 7 | - 8 | ,-> - 9 | `-> - 10 | - 11 | + 7 | + 8 | + : ^ + 9 | <тест/> + 10 | <тест>test + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:8:1] + 8 | + 9 | <тест/> + : ^^^^^^^^^^^ + 10 | <тест>test + `---- + + x Element + ,-[$DIR/tests/fixture/tags/input.xml:8:1] + 8 | + 9 | <тест/> + : ^^^^^^^^^^^ + 10 | <тест>test + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:8:1] + 8 | + 9 | <тест/> + : ^ + 10 | <тест>test + 11 | + `---- + + x Text + ,-[$DIR/tests/fixture/tags/input.xml:8:1] + 8 | + 9 | <тест/> + : ^ + 10 | <тест>test + 11 | `---- x Child ,-[$DIR/tests/fixture/tags/input.xml:9:1] - 9 | - 10 | - : ^^^^^^^^^^^^^ - 11 | + 9 | <тест/> + 10 | <тест>test + : ^^^^^^^^^^^^^^^^^^^^^^^^^ + 11 | `---- x Element ,-[$DIR/tests/fixture/tags/input.xml:9:1] - 9 | - 10 | - : ^^^^^^^^^^^^^ - 11 | + 9 | <тест/> + 10 | <тест>test + : ^^^^^^^^^^^^^^^^^^^^^^^^^ + 11 | `---- x Child ,-[$DIR/tests/fixture/tags/input.xml:9:1] - 9 | - 10 | - : ^ - 11 | + 9 | <тест/> + 10 | <тест>test + : ^^^^ + 11 | `---- x Text ,-[$DIR/tests/fixture/tags/input.xml:9:1] - 9 | - 10 | - : ^ - 11 | + 9 | <тест/> + 10 | <тест>test + : ^^^^ + 11 | + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:9:1] + 9 | <тест/> + 10 | <тест>test + : ^ + 11 | + 12 | test + `---- + + x Text + ,-[$DIR/tests/fixture/tags/input.xml:9:1] + 9 | <тест/> + 10 | <тест>test + : ^ + 11 | + 12 | test + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:10:1] + 10 | <тест>test + 11 | + : ^^^^^^^^^^^ + 12 | test + `---- + + x Element + ,-[$DIR/tests/fixture/tags/input.xml:10:1] + 10 | <тест>test + 11 | + : ^^^^^^^^^^^ + 12 | test + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:10:1] + 10 | <тест>test + 11 | + : ^ + 12 | test + 13 | + `---- + + x Text + ,-[$DIR/tests/fixture/tags/input.xml:10:1] + 10 | <тест>test + 11 | + : ^ + 12 | test + 13 | + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:11:1] + 11 | + 12 | test + : ^^^^^^^^^^^^^^^^^^^^^^^^^ + 13 | + `---- + + x Element + ,-[$DIR/tests/fixture/tags/input.xml:11:1] + 11 | + 12 | test + : ^^^^^^^^^^^^^^^^^^^^^^^^^ + 13 | + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:11:1] + 11 | + 12 | test + : ^^^^ + 13 | + `---- + + x Text + ,-[$DIR/tests/fixture/tags/input.xml:11:1] + 11 | + 12 | test + : ^^^^ + 13 | + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:11:1] + 11 | + 12 | test + : ^ + 13 | + `---- + + x Text + ,-[$DIR/tests/fixture/tags/input.xml:11:1] + 11 | + 12 | test + : ^ + 13 | `---- From 46eee607c45a7a4b3bef3cfe8c1f7f1215586e72 Mon Sep 17 00:00:00 2001 From: "alexander.akait" Date: Wed, 30 Nov 2022 22:02:37 +0300 Subject: [PATCH 2/9] refactor: code --- crates/swc_xml_parser/src/lexer/mod.rs | 329 ++++++++++++------------- 1 file changed, 164 insertions(+), 165 deletions(-) diff --git a/crates/swc_xml_parser/src/lexer/mod.rs b/crates/swc_xml_parser/src/lexer/mod.rs index 1e527eadb177..d207501cfe37 100644 --- a/crates/swc_xml_parser/src/lexer/mod.rs +++ b/crates/swc_xml_parser/src/lexer/mod.rs @@ -13,10 +13,6 @@ use crate::{ pub enum State { Data, CharacterReferenceInData, - TagOpen, - EndTagOpen, - EndTagName, - EndTagNameAfter, Pi, PiTarget, PiTargetQuestion, @@ -37,6 +33,10 @@ pub enum State { Cdata, CdataBracket, CdataEnd, + TagOpen, + EndTagOpen, + EndTagName, + EndTagNameAfter, TagName, EmptyTag, TagAttributeNameBefore, @@ -996,166 +996,6 @@ where self.emit_character_token(('&', '&')); } } - State::TagOpen => { - // Consume the next input character: - match self.consume_next_char() { - // U+002F SOLIDUS (/) - // Switch to the end tag open state. - Some('/') => { - self.state = State::EndTagOpen; - } - // U+003F QUESTION MARK(?) - // Switch to the pi state. - Some('?') => { - self.state = State::Pi; - } - // U+0021 EXCLAMATION MARK (!) - // Switch to the markup declaration open state. - Some('!') => { - self.state = State::MarkupDeclaration; - } - // U+0009 CHARACTER TABULATION (Tab) - // U+000A LINE FEED (LF) - // U+0020 SPACE (Space) - // U+003A (:) - // U+003C LESSER-THAN SIGN (<) - // U+003E GREATER-THAN SIGN (>) - // EOF - // Parse error. Emit a U+003C LESSER-THAN SIGN (<) character. Reconsume the - // current input character in the data state. - Some(c) if is_spacy_except_ff(c) => { - self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName); - self.emit_character_token(('<', '<')); - self.reconsume_in_state(State::Data); - } - Some(':') | Some('<') | Some('>') | None => { - self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName); - self.emit_character_token(('<', '<')); - self.reconsume_in_state(State::Data); - } - // Anything else - // Create a new tag token and set its name to the input character, then switch - // to the tag name state. - Some(c) => { - self.validate_input_stream_character(c); - self.create_tag_token(TagKind::Start); - self.append_to_tag_token_name(c); - self.state = State::TagName; - } - } - } - State::EndTagOpen => { - // Consume the next input character: - match self.consume_next_char() { - // U+003E GREATER-THAN SIGN (>) - // Emit a short end tag token and then switch to the data state. - Some('>') => { - self.emit_tag_token(Some(TagKind::End)); - self.state = State::Data; - } - // U+0009 CHARACTER TABULATION (Tab) - // U+000A LINE FEED (LF) - // U+0020 SPACE (Space) - // U+003C LESSER-THAN SIGN (<) - // U+003A (:) - // EOF - // Parse error. Emit a U+003C LESSER-THAN SIGN (<) character token and a U+002F - // SOLIDUS (/) character token. Reconsume the current input character in the - // data state. - Some(c) if is_spacy_except_ff(c) => { - self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName); - self.emit_character_token(('<', '<')); - self.emit_character_token(('/', '/')); - self.reconsume_in_state(State::Data); - } - Some('<') | Some(':') | None => { - self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName); - self.emit_character_token(('<', '<')); - self.emit_character_token(('/', '/')); - self.reconsume_in_state(State::Data); - } - // Anything else - // Create an end tag token and set its name to the input character, then switch - // to the end tag name state. - Some(c) => { - self.validate_input_stream_character(c); - self.create_tag_token(TagKind::End); - self.append_to_tag_token_name(c); - self.state = State::EndTagName - } - } - } - State::EndTagName => { - // Consume the next input character: - match self.consume_next_char() { - // U+0009 CHARACTER TABULATION (Tab) - // U+000A LINE FEED (LF) - // U+0020 SPACE (Space) - // Switch to the end tag name after state. - Some(c) if is_spacy_except_ff(c) => { - self.state = State::EndTagNameAfter; - } - // U+002F SOLIDUS (/) - // Parse error. Switch to the end tag name after state. - Some('/') => { - self.emit_error(ErrorKind::EndTagWithTrailingSolidus); - self.state = State::EndTagNameAfter; - } - // EOF - // Parse error. Emit the start tag token and then reprocess the current input - // character in the data state. - None => { - self.emit_error(ErrorKind::EofInTag); - self.emit_tag_token(Some(TagKind::Start)); - self.reconsume_in_state(State::Data); - } - // U+003E GREATER-THAN SIGN (>) - // Emit the current token and then switch to the data state. - Some('>') => { - self.emit_tag_token(None); - self.state = State::Data; - } - // Anything else - // Append the current input character to the tag name and stay in the current - // state. - Some(c) => { - self.validate_input_stream_character(c); - self.append_to_tag_token_name(c); - } - } - } - State::EndTagNameAfter => { - // Consume the next input character: - match self.consume_next_char() { - // U+003E GREATER-THAN SIGN (>) - // Emit the current token and then switch to the data state. - Some('>') => { - self.emit_tag_token(None); - self.state = State::Data; - } - // U+0009 CHARACTER TABULATION (Tab) - // U+000A LINE FEED (LF) - // U+0020 SPACE (Space) - // Stay in the current state. - Some(c) if is_spacy_except_ff(c) => { - self.skip_next_lf(c); - } - // EOF - // Parse error. Emit the current token and then reprocess the current input - // character in the data state. - None => { - self.emit_error(ErrorKind::EofInTag); - self.emit_tag_token(None); - self.reconsume_in_state(State::Data); - } - // Anything else - // Parse error. Stay in the current state. - Some(c) => { - self.validate_input_stream_character(c); - self.emit_error(ErrorKind::InvalidCharacterInTag); - } - } - } State::Pi => { // Consume the next input character: match self.consume_next_char() { @@ -1797,7 +1637,166 @@ where } } } - // https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state + State::TagOpen => { + // Consume the next input character: + match self.consume_next_char() { + // U+002F SOLIDUS (/) + // Switch to the end tag open state. + Some('/') => { + self.state = State::EndTagOpen; + } + // U+003F QUESTION MARK(?) + // Switch to the pi state. + Some('?') => { + self.state = State::Pi; + } + // U+0021 EXCLAMATION MARK (!) + // Switch to the markup declaration open state. + Some('!') => { + self.state = State::MarkupDeclaration; + } + // U+0009 CHARACTER TABULATION (Tab) + // U+000A LINE FEED (LF) + // U+0020 SPACE (Space) + // U+003A (:) + // U+003C LESSER-THAN SIGN (<) + // U+003E GREATER-THAN SIGN (>) + // EOF + // Parse error. Emit a U+003C LESSER-THAN SIGN (<) character. Reconsume the + // current input character in the data state. + Some(c) if is_spacy_except_ff(c) => { + self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName); + self.emit_character_token(('<', '<')); + self.reconsume_in_state(State::Data); + } + Some(':') | Some('<') | Some('>') | None => { + self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName); + self.emit_character_token(('<', '<')); + self.reconsume_in_state(State::Data); + } + // Anything else + // Create a new tag token and set its name to the input character, then switch + // to the tag name state. + Some(c) => { + self.validate_input_stream_character(c); + self.create_tag_token(TagKind::Start); + self.append_to_tag_token_name(c); + self.state = State::TagName; + } + } + } + State::EndTagOpen => { + // Consume the next input character: + match self.consume_next_char() { + // U+003E GREATER-THAN SIGN (>) + // Emit a short end tag token and then switch to the data state. + Some('>') => { + self.emit_tag_token(Some(TagKind::End)); + self.state = State::Data; + } + // U+0009 CHARACTER TABULATION (Tab) + // U+000A LINE FEED (LF) + // U+0020 SPACE (Space) + // U+003C LESSER-THAN SIGN (<) + // U+003A (:) + // EOF + // Parse error. Emit a U+003C LESSER-THAN SIGN (<) character token and a U+002F + // SOLIDUS (/) character token. Reconsume the current input character in the + // data state. + Some(c) if is_spacy_except_ff(c) => { + self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName); + self.emit_character_token(('<', '<')); + self.emit_character_token(('/', '/')); + self.reconsume_in_state(State::Data); + } + Some('<') | Some(':') | None => { + self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName); + self.emit_character_token(('<', '<')); + self.emit_character_token(('/', '/')); + self.reconsume_in_state(State::Data); + } + // Anything else + // Create an end tag token and set its name to the input character, then switch + // to the end tag name state. + Some(c) => { + self.validate_input_stream_character(c); + self.create_tag_token(TagKind::End); + self.append_to_tag_token_name(c); + self.state = State::EndTagName + } + } + } + State::EndTagName => { + // Consume the next input character: + match self.consume_next_char() { + // U+0009 CHARACTER TABULATION (Tab) + // U+000A LINE FEED (LF) + // U+0020 SPACE (Space) + // Switch to the end tag name after state. + Some(c) if is_spacy_except_ff(c) => { + self.state = State::EndTagNameAfter; + } + // U+002F SOLIDUS (/) + // Parse error. Switch to the end tag name after state. + Some('/') => { + self.emit_error(ErrorKind::EndTagWithTrailingSolidus); + self.state = State::EndTagNameAfter; + } + // EOF + // Parse error. Emit the start tag token and then reprocess the current input + // character in the data state. + None => { + self.emit_error(ErrorKind::EofInTag); + self.emit_tag_token(Some(TagKind::Start)); + self.reconsume_in_state(State::Data); + } + // U+003E GREATER-THAN SIGN (>) + // Emit the current token and then switch to the data state. + Some('>') => { + self.emit_tag_token(None); + self.state = State::Data; + } + // Anything else + // Append the current input character to the tag name and stay in the current + // state. + Some(c) => { + self.validate_input_stream_character(c); + self.append_to_tag_token_name(c); + } + } + } + State::EndTagNameAfter => { + // Consume the next input character: + match self.consume_next_char() { + // U+003E GREATER-THAN SIGN (>) + // Emit the current token and then switch to the data state. + Some('>') => { + self.emit_tag_token(None); + self.state = State::Data; + } + // U+0009 CHARACTER TABULATION (Tab) + // U+000A LINE FEED (LF) + // U+0020 SPACE (Space) + // Stay in the current state. + Some(c) if is_spacy_except_ff(c) => { + self.skip_next_lf(c); + } + // EOF + // Parse error. Emit the current token and then reprocess the current input + // character in the data state. + None => { + self.emit_error(ErrorKind::EofInTag); + self.emit_tag_token(None); + self.reconsume_in_state(State::Data); + } + // Anything else + // Parse error. Stay in the current state. + Some(c) => { + self.validate_input_stream_character(c); + self.emit_error(ErrorKind::InvalidCharacterInTag); + } + } + } State::TagName => { // Consume the next input character: match self.consume_next_char() { From 259bc1ba018fb340386f5825c9ca292d328c48fc Mon Sep 17 00:00:00 2001 From: "alexander.akait" Date: Wed, 30 Nov 2022 22:17:43 +0300 Subject: [PATCH 3/9] refactor: code --- crates/swc_html_parser/src/lexer/mod.rs | 10 ++-- crates/swc_xml_parser/src/lexer/mod.rs | 63 +++++++++++++++---------- 2 files changed, 43 insertions(+), 30 deletions(-) diff --git a/crates/swc_html_parser/src/lexer/mod.rs b/crates/swc_html_parser/src/lexer/mod.rs index 10542e943a18..88950c5fb878 100644 --- a/crates/swc_html_parser/src/lexer/mod.rs +++ b/crates/swc_html_parser/src/lexer/mod.rs @@ -1067,16 +1067,16 @@ where State::TagOpen => { // Consume the next input character: match self.consume_next_char() { - // U+0021 EXCLAMATION MARK (!) - // Switch to the markup declaration open state. - Some('!') => { - self.state = State::MarkupDeclarationOpen; - } // U+002F SOLIDUS (/) // Switch to the end tag open state. Some('/') => { self.state = State::EndTagOpen; } + // U+0021 EXCLAMATION MARK (!) + // Switch to the markup declaration open state. + Some('!') => { + self.state = State::MarkupDeclarationOpen; + } // ASCII alpha // Create a new start tag token, set its tag name to the empty string. // Reconsume in the tag name state. diff --git a/crates/swc_xml_parser/src/lexer/mod.rs b/crates/swc_xml_parser/src/lexer/mod.rs index d207501cfe37..eb35da16299a 100644 --- a/crates/swc_xml_parser/src/lexer/mod.rs +++ b/crates/swc_xml_parser/src/lexer/mod.rs @@ -1645,44 +1645,41 @@ where Some('/') => { self.state = State::EndTagOpen; } + // U+0021 EXCLAMATION MARK (!) + // Switch to the markup declaration open state. + Some('!') => { + self.state = State::MarkupDeclaration; + } // U+003F QUESTION MARK(?) // Switch to the pi state. Some('?') => { self.state = State::Pi; } - // U+0021 EXCLAMATION MARK (!) - // Switch to the markup declaration open state. - Some('!') => { - self.state = State::MarkupDeclaration; + // Anything else + // Create a new tag token and set its name to the input character, then switch + // to the tag name state. + Some(c) if is_name_start_char(c) => { + self.create_tag_token(TagKind::Start); + self.reconsume_in_state(State::TagName); } - // U+0009 CHARACTER TABULATION (Tab) - // U+000A LINE FEED (LF) - // U+0020 SPACE (Space) - // U+003A (:) - // U+003C LESSER-THAN SIGN (<) - // U+003E GREATER-THAN SIGN (>) // EOF - // Parse error. Emit a U+003C LESSER-THAN SIGN (<) character. Reconsume the - // current input character in the data state. - Some(c) if is_spacy_except_ff(c) => { - self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName); + // This is an eof-before-tag-name parse error. Emit a U+003C LESS-THAN SIGN + // character token and an end-of-file token. + None => { + self.emit_error(ErrorKind::EofBeforeTagName); self.emit_character_token(('<', '<')); - self.reconsume_in_state(State::Data); + self.emit_token(Token::Eof); + + return Ok(()); } - Some(':') | Some('<') | Some('>') | None => { + // Anything else + // This is an invalid-first-character-of-tag-name parse error. Emit a U+003C + // LESS-THAN SIGN character token. Reconsume in the data state. + _ => { self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName); self.emit_character_token(('<', '<')); self.reconsume_in_state(State::Data); } - // Anything else - // Create a new tag token and set its name to the input character, then switch - // to the tag name state. - Some(c) => { - self.validate_input_stream_character(c); - self.create_tag_token(TagKind::Start); - self.append_to_tag_token_name(c); - self.state = State::TagName; - } } } State::EndTagOpen => { @@ -3087,3 +3084,19 @@ fn is_upper_hex_digit(c: char) -> bool { fn is_lower_hex_digit(c: char) -> bool { matches!(c, '0'..='9' | 'a'..='f') } + +// NameStartChar ::= +// ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | +// [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | +// [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | +// [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] +#[inline(always)] +fn is_name_start_char(c: char) -> bool { + match c { + ':' | 'A'..='Z' | '_' | 'a'..='z' => true, + _ if matches!(c as u32, 0xc0..=0xd6 | 0xd8..=0x2ff | 0x370..=0x37d | 0x37f..=0x1fff | 0x200c..=0x200d | 0x2070..=0x218f | 0x2c00..=0x2fef | 0x3001..=0xd7ff | 0xf900..=0xfdcf | 0xfdf0..=0xfffd | 0x10000..=0xeffff) => { + true + } + _ => false, + } +} From cc1094030d73e6e75b73ff5fd5c15559412c1dcb Mon Sep 17 00:00:00 2001 From: "alexander.akait" Date: Wed, 30 Nov 2022 22:24:32 +0300 Subject: [PATCH 4/9] refactor: code --- crates/swc_xml_parser/src/lexer/mod.rs | 155 ++++++++++--------------- 1 file changed, 63 insertions(+), 92 deletions(-) diff --git a/crates/swc_xml_parser/src/lexer/mod.rs b/crates/swc_xml_parser/src/lexer/mod.rs index eb35da16299a..c3dcda638ef1 100644 --- a/crates/swc_xml_parser/src/lexer/mod.rs +++ b/crates/swc_xml_parser/src/lexer/mod.rs @@ -666,36 +666,36 @@ where current_tag_token.kind = kind; } + let mut already_seen: AHashSet = Default::default(); + + let attributes = current_tag_token + .attributes + .drain(..) + .map(|attribute| { + let name = JsWord::from(attribute.name); + + if already_seen.contains(&name) { + self.errors + .push(Error::new(attribute.span, ErrorKind::DuplicateAttribute)); + } + + already_seen.insert(name.clone()); + + AttributeToken { + span: attribute.span, + name, + raw_name: attribute.raw_name.map(JsWord::from), + value: attribute.value.map(JsWord::from), + raw_value: attribute.raw_value.map(JsWord::from), + } + }) + .collect(); + match current_tag_token.kind { TagKind::Start => { - let mut already_seen: AHashSet = Default::default(); - let start_tag_token = Token::StartTag { tag_name: current_tag_token.tag_name.into(), - attributes: current_tag_token - .attributes - .drain(..) - .map(|attribute| { - let name = JsWord::from(attribute.name); - - if already_seen.contains(&name) { - self.errors.push(Error::new( - attribute.span, - ErrorKind::DuplicateAttribute, - )); - } - - already_seen.insert(name.clone()); - - AttributeToken { - span: attribute.span, - name, - raw_name: attribute.raw_name.map(JsWord::from), - value: attribute.value.map(JsWord::from), - raw_value: attribute.raw_value.map(JsWord::from), - } - }) - .collect(), + attributes, }; self.emit_token(start_tag_token); @@ -705,67 +705,17 @@ where self.emit_error(ErrorKind::EndTagWithAttributes); } - let mut already_seen: AHashSet = Default::default(); - let end_tag_token = Token::EndTag { tag_name: current_tag_token.tag_name.into(), - attributes: current_tag_token - .attributes - .drain(..) - .map(|attribute| { - let name = JsWord::from(attribute.name); - - if already_seen.contains(&name) { - self.errors.push(Error::new( - attribute.span, - ErrorKind::DuplicateAttribute, - )); - } - - already_seen.insert(name.clone()); - - AttributeToken { - span: attribute.span, - name, - raw_name: attribute.raw_name.map(JsWord::from), - value: attribute.value.map(JsWord::from), - raw_value: attribute.raw_value.map(JsWord::from), - } - }) - .collect(), + attributes, }; self.emit_token(end_tag_token); } TagKind::Empty => { - let mut already_seen: AHashSet = Default::default(); - let empty_tag = Token::EmptyTag { tag_name: current_tag_token.tag_name.into(), - attributes: current_tag_token - .attributes - .drain(..) - .map(|attribute| { - let name = JsWord::from(attribute.name); - - if already_seen.contains(&name) { - self.errors.push(Error::new( - attribute.span, - ErrorKind::DuplicateAttribute, - )); - } - - already_seen.insert(name.clone()); - - AttributeToken { - span: attribute.span, - name, - raw_name: attribute.raw_name.map(JsWord::from), - value: attribute.value.map(JsWord::from), - raw_value: attribute.raw_value.map(JsWord::from), - } - }) - .collect(), + attributes, }; self.emit_token(empty_tag); @@ -1655,7 +1605,7 @@ where Some('?') => { self.state = State::Pi; } - // Anything else + // Name start character // Create a new tag token and set its name to the input character, then switch // to the tag name state. Some(c) if is_name_start_char(c) => { @@ -1805,30 +1755,38 @@ where self.skip_next_lf(c); self.state = State::TagAttributeNameBefore; } + // U+002F SOLIDUS (/) + // Set current tag to empty tag. Switch to the empty tag state. + Some('/') => { + self.set_tag_to_empty_tag(); + self.state = State::EmptyTag; + } // U+003E GREATER-THAN SIGN (>) - // Emit the start tag token and then switch to the data state. + // Switch to the data state. Emit the current tag token. Some('>') => { - self.emit_tag_token(Some(TagKind::Start)); self.state = State::Data; + self.emit_tag_token(Some(TagKind::Start)); } // EOF - // Parse error. Emit the current token and then reprocess the current input - // character in the data state. + // This is an eof-in-tag parse error. Emit an end-of-file token. None => { self.emit_error(ErrorKind::EofInTag); self.emit_tag_token(None); - self.reconsume_in_state(State::Data); - } - // U+002F SOLIDUS (/) - // Set current tag to empty tag. Switch to the empty tag state. - Some('/') => { - self.set_tag_to_empty_tag(); - self.state = State::EmptyTag; + + return Ok(()); } - // Anything else + // Name character // Append the current input character to the tag name and stay in the current // state. + Some(c) if is_name_char(c) => { + self.validate_input_stream_character(c); + self.append_to_tag_token_name(c); + } + // Anything else + // Parse error. Append the current input character to the tag name and stay in + // the current state. Some(c) => { + self.emit_error(ErrorKind::InvalidCharacterInTag); self.validate_input_stream_character(c); self.append_to_tag_token_name(c); } @@ -1841,7 +1799,7 @@ where // Emit the current tag token as empty tag token and then switch to the data // state. Some('>') => { - self.emit_tag_token(None); + self.emit_tag_token(Some(TagKind::Empty)); self.state = State::Data; } // Anything else @@ -3100,3 +3058,16 @@ fn is_name_start_char(c: char) -> bool { _ => false, } } + +// NameChar ::= +// NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | +// [#x203F-#x2040] +#[inline(always)] +fn is_name_char(c: char) -> bool { + match c { + '-' | '.' | '0'..='9' => true, + _ if matches!(c as u32, 0xb7 | 0x0300..=0x036f | 0x203f..=0x2040) => true, + _ if is_name_start_char(c) => true, + _ => false, + } +} From cec06a0ab2937fd2221644bfc1692734ac14e9f5 Mon Sep 17 00:00:00 2001 From: "alexander.akait" Date: Wed, 30 Nov 2022 23:28:45 +0300 Subject: [PATCH 5/9] test: more --- .../swc_xml_parser/tests/fixture/tags/dom.txt | 11 ++ .../tests/fixture/tags/input.xml | 3 + .../tests/fixture/tags/output.json | 93 ++++++++++- .../tests/fixture/tags/span.swc-stderr | 151 ++++++++++++++++-- .../tests/recovery/element-1/dom.txt | 4 + .../tests/recovery/element-1/input.xml | 3 + .../tests/recovery/element-1/output.json | 32 ++++ .../recovery/element-1/output.swc-stderr | 8 + .../tests/recovery/element-1/span.swc-stderr | 35 ++++ .../tests/recovery/element/dom.txt | 4 + .../tests/recovery/element/input.xml | 3 + .../tests/recovery/element/output.json | 32 ++++ .../tests/recovery/element/output.swc-stderr | 8 + .../tests/recovery/element/span.swc-stderr | 35 ++++ 14 files changed, 409 insertions(+), 13 deletions(-) create mode 100644 crates/swc_xml_parser/tests/recovery/element-1/dom.txt create mode 100644 crates/swc_xml_parser/tests/recovery/element-1/input.xml create mode 100644 crates/swc_xml_parser/tests/recovery/element-1/output.json create mode 100644 crates/swc_xml_parser/tests/recovery/element-1/output.swc-stderr create mode 100644 crates/swc_xml_parser/tests/recovery/element-1/span.swc-stderr create mode 100644 crates/swc_xml_parser/tests/recovery/element/dom.txt create mode 100644 crates/swc_xml_parser/tests/recovery/element/input.xml create mode 100644 crates/swc_xml_parser/tests/recovery/element/output.json create mode 100644 crates/swc_xml_parser/tests/recovery/element/output.swc-stderr create mode 100644 crates/swc_xml_parser/tests/recovery/element/span.swc-stderr diff --git a/crates/swc_xml_parser/tests/fixture/tags/dom.txt b/crates/swc_xml_parser/tests/fixture/tags/dom.txt index 2c22f5320b8b..93bedab8ea1a 100644 --- a/crates/swc_xml_parser/tests/fixture/tags/dom.txt +++ b/crates/swc_xml_parser/tests/fixture/tags/dom.txt @@ -38,3 +38,14 @@ | "test" | " " +| <俄语> +| լեզու="ռուսերեն" +| "данные" +| " +" +| +| " +" +| <:circle> +| " +" diff --git a/crates/swc_xml_parser/tests/fixture/tags/input.xml b/crates/swc_xml_parser/tests/fixture/tags/input.xml index 7f8cac04c3fd..974fcf67ad4e 100644 --- a/crates/swc_xml_parser/tests/fixture/tags/input.xml +++ b/crates/swc_xml_parser/tests/fixture/tags/input.xml @@ -10,4 +10,7 @@ <тест>test test +<俄语 լեզու="ռուսերեն">данные + +<:circle/> \ No newline at end of file diff --git a/crates/swc_xml_parser/tests/fixture/tags/output.json b/crates/swc_xml_parser/tests/fixture/tags/output.json index dfe98b078050..6d317aa3da9b 100644 --- a/crates/swc_xml_parser/tests/fixture/tags/output.json +++ b/crates/swc_xml_parser/tests/fixture/tags/output.json @@ -2,7 +2,7 @@ "type": "Document", "span": { "start": 1, - "end": 224, + "end": 321, "ctxt": 0 }, "children": [ @@ -10,7 +10,7 @@ "type": "Element", "span": { "start": 1, - "end": 224, + "end": 321, "ctxt": 0 }, "tagName": "root", @@ -298,6 +298,95 @@ }, "data": "\n", "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 217, + "end": 276, + "ctxt": 0 + }, + "tagName": "俄语", + "attributes": [ + { + "type": "Attribute", + "span": { + "start": 0, + "end": 0, + "ctxt": 0 + }, + "namespace": null, + "prefix": null, + "name": "լեզու", + "rawName": "լեզու", + "value": "ռուսերեն", + "rawValue": "\"ռուսերեն\"" + } + ], + "children": [ + { + "type": "Text", + "span": { + "start": 255, + "end": 267, + "ctxt": 0 + }, + "data": "данные", + "raw": "данные" + } + ] + }, + { + "type": "Text", + "span": { + "start": 276, + "end": 277, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 277, + "end": 302, + "ctxt": 0 + }, + "tagName": "svg:circle", + "attributes": [], + "children": [] + }, + { + "type": "Text", + "span": { + "start": 302, + "end": 303, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 303, + "end": 313, + "ctxt": 0 + }, + "tagName": ":circle", + "attributes": [], + "children": [] + }, + { + "type": "Text", + "span": { + "start": 313, + "end": 314, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" } ] } diff --git a/crates/swc_xml_parser/tests/fixture/tags/span.swc-stderr b/crates/swc_xml_parser/tests/fixture/tags/span.swc-stderr index 399496b6bae7..c73071ae3b28 100644 --- a/crates/swc_xml_parser/tests/fixture/tags/span.swc-stderr +++ b/crates/swc_xml_parser/tests/fixture/tags/span.swc-stderr @@ -13,7 +13,10 @@ 10 | | <тест>test 11 | | 12 | | test - 13 | `-> + 13 | | <俄语 լեզու="ռուսերեն">данные + 14 | | + 15 | | <:circle/> + 16 | `-> `---- x Child @@ -30,7 +33,10 @@ 10 | | <тест>test 11 | | 12 | | test - 13 | `-> + 13 | | <俄语 լեզու="ռուսերեն">данные + 14 | | + 15 | | <:circle/> + 16 | `-> `---- x Element @@ -47,7 +53,10 @@ 10 | | <тест>test 11 | | 12 | | test - 13 | `-> + 13 | | <俄语 լեզու="ռուսերեն">данные + 14 | | + 15 | | <:circle/> + 16 | `-> `---- x Child @@ -442,7 +451,7 @@ 11 | : ^ 12 | test - 13 | + 13 | <俄语 լեզու="ռուսերեն">данные `---- x Text @@ -451,7 +460,7 @@ 11 | : ^ 12 | test - 13 | + 13 | <俄语 լեզու="ռուսերեն">данные `---- x Child @@ -459,7 +468,7 @@ 11 | 12 | test : ^^^^^^^^^^^^^^^^^^^^^^^^^ - 13 | + 13 | <俄语 լեզու="ռուսերեն">данные `---- x Element @@ -467,7 +476,7 @@ 11 | 12 | test : ^^^^^^^^^^^^^^^^^^^^^^^^^ - 13 | + 13 | <俄语 լեզու="ռուսերեն">данные `---- x Child @@ -475,7 +484,7 @@ 11 | 12 | test : ^^^^ - 13 | + 13 | <俄语 լեզու="ռուսերեն">данные `---- x Text @@ -483,7 +492,7 @@ 11 | 12 | test : ^^^^ - 13 | + 13 | <俄语 լեզու="ռուսերեն">данные `---- x Child @@ -491,7 +500,8 @@ 11 | 12 | test : ^ - 13 | + 13 | <俄语 լեզու="ռուսերեն">данные + 14 | `---- x Text @@ -499,5 +509,124 @@ 11 | 12 | test : ^ - 13 | + 13 | <俄语 լեզու="ռուսերեն">данные + 14 | + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:12:1] + 12 | test + 13 | <俄语 լեզու="ռուսերեն">данные + : ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + 14 | + `---- + + x Element + ,-[$DIR/tests/fixture/tags/input.xml:12:1] + 12 | test + 13 | <俄语 լեզու="ռուսերեն">данные + : ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + 14 | + `---- + + x Attribute + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:12:1] + 12 | test + 13 | <俄语 լեզու="ռուսերեն">данные + : ^^^^^^^^^^^^ + 14 | + `---- + + x Text + ,-[$DIR/tests/fixture/tags/input.xml:12:1] + 12 | test + 13 | <俄语 լեզու="ռուսերեն">данные + : ^^^^^^^^^^^^ + 14 | + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:12:1] + 12 | test + 13 | <俄语 լեզու="ռուսերեն">данные + : ^ + 14 | + 15 | <:circle/> + `---- + + x Text + ,-[$DIR/tests/fixture/tags/input.xml:12:1] + 12 | test + 13 | <俄语 լեզու="ռուսերեն">данные + : ^ + 14 | + 15 | <:circle/> + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:13:1] + 13 | <俄语 լեզու="ռուսերեն">данные + 14 | + : ^^^^^^^^^^^^^^^^^^^^^^^^^ + 15 | <:circle/> + `---- + + x Element + ,-[$DIR/tests/fixture/tags/input.xml:13:1] + 13 | <俄语 լեզու="ռուսերեն">данные + 14 | + : ^^^^^^^^^^^^^^^^^^^^^^^^^ + 15 | <:circle/> + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:13:1] + 13 | <俄语 լեզու="ռուսերեն">данные + 14 | + : ^ + 15 | <:circle/> + 16 | + `---- + + x Text + ,-[$DIR/tests/fixture/tags/input.xml:13:1] + 13 | <俄语 լեզու="ռուսերեն">данные + 14 | + : ^ + 15 | <:circle/> + 16 | + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:14:1] + 14 | + 15 | <:circle/> + : ^^^^^^^^^^ + 16 | + `---- + + x Element + ,-[$DIR/tests/fixture/tags/input.xml:14:1] + 14 | + 15 | <:circle/> + : ^^^^^^^^^^ + 16 | + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:14:1] + 14 | + 15 | <:circle/> + : ^ + 16 | + `---- + + x Text + ,-[$DIR/tests/fixture/tags/input.xml:14:1] + 14 | + 15 | <:circle/> + : ^ + 16 | `---- diff --git a/crates/swc_xml_parser/tests/recovery/element-1/dom.txt b/crates/swc_xml_parser/tests/recovery/element-1/dom.txt new file mode 100644 index 000000000000..fc713a32716a --- /dev/null +++ b/crates/swc_xml_parser/tests/recovery/element-1/dom.txt @@ -0,0 +1,4 @@ +| +| " + <-svg/> +" diff --git a/crates/swc_xml_parser/tests/recovery/element-1/input.xml b/crates/swc_xml_parser/tests/recovery/element-1/input.xml new file mode 100644 index 000000000000..c5378cba4648 --- /dev/null +++ b/crates/swc_xml_parser/tests/recovery/element-1/input.xml @@ -0,0 +1,3 @@ + + <-svg/> + \ No newline at end of file diff --git a/crates/swc_xml_parser/tests/recovery/element-1/output.json b/crates/swc_xml_parser/tests/recovery/element-1/output.json new file mode 100644 index 000000000000..d748c6bb939f --- /dev/null +++ b/crates/swc_xml_parser/tests/recovery/element-1/output.json @@ -0,0 +1,32 @@ +{ + "type": "Document", + "span": { + "start": 1, + "end": 27, + "ctxt": 0 + }, + "children": [ + { + "type": "Element", + "span": { + "start": 1, + "end": 27, + "ctxt": 0 + }, + "tagName": "root", + "attributes": [], + "children": [ + { + "type": "Text", + "span": { + "start": 7, + "end": 20, + "ctxt": 0 + }, + "data": "\n <-svg/>\n", + "raw": "\n <-svg/>\n" + } + ] + } + ] +} diff --git a/crates/swc_xml_parser/tests/recovery/element-1/output.swc-stderr b/crates/swc_xml_parser/tests/recovery/element-1/output.swc-stderr new file mode 100644 index 000000000000..a57ea658ed13 --- /dev/null +++ b/crates/swc_xml_parser/tests/recovery/element-1/output.swc-stderr @@ -0,0 +1,8 @@ + + x Invalid first character of tag name + ,-[$DIR/tests/recovery/element-1/input.xml:1:1] + 1 | + 2 | <-svg/> + : ^ + 3 | + `---- diff --git a/crates/swc_xml_parser/tests/recovery/element-1/span.swc-stderr b/crates/swc_xml_parser/tests/recovery/element-1/span.swc-stderr new file mode 100644 index 000000000000..a9641aadd512 --- /dev/null +++ b/crates/swc_xml_parser/tests/recovery/element-1/span.swc-stderr @@ -0,0 +1,35 @@ + + x Document + ,-[$DIR/tests/recovery/element-1/input.xml:1:1] + 1 | ,-> + 2 | | <-svg/> + 3 | `-> + `---- + + x Child + ,-[$DIR/tests/recovery/element-1/input.xml:1:1] + 1 | ,-> + 2 | | <-svg/> + 3 | `-> + `---- + + x Element + ,-[$DIR/tests/recovery/element-1/input.xml:1:1] + 1 | ,-> + 2 | | <-svg/> + 3 | `-> + `---- + + x Child + ,-[$DIR/tests/recovery/element-1/input.xml:1:1] + 1 | ,-> + 2 | `-> <-svg/> + 3 | + `---- + + x Text + ,-[$DIR/tests/recovery/element-1/input.xml:1:1] + 1 | ,-> + 2 | `-> <-svg/> + 3 | + `---- diff --git a/crates/swc_xml_parser/tests/recovery/element/dom.txt b/crates/swc_xml_parser/tests/recovery/element/dom.txt new file mode 100644 index 000000000000..53330b82434c --- /dev/null +++ b/crates/swc_xml_parser/tests/recovery/element/dom.txt @@ -0,0 +1,4 @@ +| +| " + <> +" diff --git a/crates/swc_xml_parser/tests/recovery/element/input.xml b/crates/swc_xml_parser/tests/recovery/element/input.xml new file mode 100644 index 000000000000..cc7592ec05df --- /dev/null +++ b/crates/swc_xml_parser/tests/recovery/element/input.xml @@ -0,0 +1,3 @@ + + <> + \ No newline at end of file diff --git a/crates/swc_xml_parser/tests/recovery/element/output.json b/crates/swc_xml_parser/tests/recovery/element/output.json new file mode 100644 index 000000000000..aa2619aa7e45 --- /dev/null +++ b/crates/swc_xml_parser/tests/recovery/element/output.json @@ -0,0 +1,32 @@ +{ + "type": "Document", + "span": { + "start": 1, + "end": 22, + "ctxt": 0 + }, + "children": [ + { + "type": "Element", + "span": { + "start": 1, + "end": 22, + "ctxt": 0 + }, + "tagName": "root", + "attributes": [], + "children": [ + { + "type": "Text", + "span": { + "start": 7, + "end": 15, + "ctxt": 0 + }, + "data": "\n <>\n", + "raw": "\n <>\n" + } + ] + } + ] +} diff --git a/crates/swc_xml_parser/tests/recovery/element/output.swc-stderr b/crates/swc_xml_parser/tests/recovery/element/output.swc-stderr new file mode 100644 index 000000000000..6de567e3805f --- /dev/null +++ b/crates/swc_xml_parser/tests/recovery/element/output.swc-stderr @@ -0,0 +1,8 @@ + + x Invalid first character of tag name + ,-[$DIR/tests/recovery/element/input.xml:1:1] + 1 | + 2 | <> + : ^ + 3 | + `---- diff --git a/crates/swc_xml_parser/tests/recovery/element/span.swc-stderr b/crates/swc_xml_parser/tests/recovery/element/span.swc-stderr new file mode 100644 index 000000000000..d000ea1873ed --- /dev/null +++ b/crates/swc_xml_parser/tests/recovery/element/span.swc-stderr @@ -0,0 +1,35 @@ + + x Document + ,-[$DIR/tests/recovery/element/input.xml:1:1] + 1 | ,-> + 2 | | <> + 3 | `-> + `---- + + x Child + ,-[$DIR/tests/recovery/element/input.xml:1:1] + 1 | ,-> + 2 | | <> + 3 | `-> + `---- + + x Element + ,-[$DIR/tests/recovery/element/input.xml:1:1] + 1 | ,-> + 2 | | <> + 3 | `-> + `---- + + x Child + ,-[$DIR/tests/recovery/element/input.xml:1:1] + 1 | ,-> + 2 | `-> <> + 3 | + `---- + + x Text + ,-[$DIR/tests/recovery/element/input.xml:1:1] + 1 | ,-> + 2 | `-> <> + 3 | + `---- From 5170df166a84805613200c5160d3fa8bd4725ebb Mon Sep 17 00:00:00 2001 From: "alexander.akait" Date: Thu, 1 Dec 2022 00:16:31 +0300 Subject: [PATCH 6/9] refactor: more --- crates/swc_xml_parser/src/lexer/mod.rs | 78 +------------------------- 1 file changed, 2 insertions(+), 76 deletions(-) diff --git a/crates/swc_xml_parser/src/lexer/mod.rs b/crates/swc_xml_parser/src/lexer/mod.rs index c3dcda638ef1..bc9fcf3b6c4c 100644 --- a/crates/swc_xml_parser/src/lexer/mod.rs +++ b/crates/swc_xml_parser/src/lexer/mod.rs @@ -35,8 +35,6 @@ pub enum State { CdataEnd, TagOpen, EndTagOpen, - EndTagName, - EndTagNameAfter, TagName, EmptyTag, TagAttributeNameBefore, @@ -1668,79 +1666,7 @@ where Some(c) => { self.validate_input_stream_character(c); self.create_tag_token(TagKind::End); - self.append_to_tag_token_name(c); - self.state = State::EndTagName - } - } - } - State::EndTagName => { - // Consume the next input character: - match self.consume_next_char() { - // U+0009 CHARACTER TABULATION (Tab) - // U+000A LINE FEED (LF) - // U+0020 SPACE (Space) - // Switch to the end tag name after state. - Some(c) if is_spacy_except_ff(c) => { - self.state = State::EndTagNameAfter; - } - // U+002F SOLIDUS (/) - // Parse error. Switch to the end tag name after state. - Some('/') => { - self.emit_error(ErrorKind::EndTagWithTrailingSolidus); - self.state = State::EndTagNameAfter; - } - // EOF - // Parse error. Emit the start tag token and then reprocess the current input - // character in the data state. - None => { - self.emit_error(ErrorKind::EofInTag); - self.emit_tag_token(Some(TagKind::Start)); - self.reconsume_in_state(State::Data); - } - // U+003E GREATER-THAN SIGN (>) - // Emit the current token and then switch to the data state. - Some('>') => { - self.emit_tag_token(None); - self.state = State::Data; - } - // Anything else - // Append the current input character to the tag name and stay in the current - // state. - Some(c) => { - self.validate_input_stream_character(c); - self.append_to_tag_token_name(c); - } - } - } - State::EndTagNameAfter => { - // Consume the next input character: - match self.consume_next_char() { - // U+003E GREATER-THAN SIGN (>) - // Emit the current token and then switch to the data state. - Some('>') => { - self.emit_tag_token(None); - self.state = State::Data; - } - // U+0009 CHARACTER TABULATION (Tab) - // U+000A LINE FEED (LF) - // U+0020 SPACE (Space) - // Stay in the current state. - Some(c) if is_spacy_except_ff(c) => { - self.skip_next_lf(c); - } - // EOF - // Parse error. Emit the current token and then reprocess the current input - // character in the data state. - None => { - self.emit_error(ErrorKind::EofInTag); - self.emit_tag_token(None); - self.reconsume_in_state(State::Data); - } - // Anything else - // Parse error. Stay in the current state. - Some(c) => { - self.validate_input_stream_character(c); - self.emit_error(ErrorKind::InvalidCharacterInTag); + self.reconsume_in_state(State::TagName); } } } @@ -1765,7 +1691,7 @@ where // Switch to the data state. Emit the current tag token. Some('>') => { self.state = State::Data; - self.emit_tag_token(Some(TagKind::Start)); + self.emit_tag_token(None); } // EOF // This is an eof-in-tag parse error. Emit an end-of-file token. From 5be5d4af139bbbebd634f366b67baa304365ed23 Mon Sep 17 00:00:00 2001 From: "alexander.akait" Date: Thu, 1 Dec 2022 00:26:07 +0300 Subject: [PATCH 7/9] refactor: more --- crates/swc_xml_parser/src/error.rs | 2 ++ crates/swc_xml_parser/src/lexer/mod.rs | 46 +++++++++++++------------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/crates/swc_xml_parser/src/error.rs b/crates/swc_xml_parser/src/error.rs index cab58d644b70..3a0031766bb9 100644 --- a/crates/swc_xml_parser/src/error.rs +++ b/crates/swc_xml_parser/src/error.rs @@ -80,6 +80,7 @@ impl Error { ErrorKind::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers => { "Missing whitespace between doctype public and system identifiers".into() } + ErrorKind::MissingEndTagName => "Missing end tag name".into(), ErrorKind::NestedComment => "Nested comment".into(), ErrorKind::DoubleHyphenWithInComment => "Double hyper within comment".into(), ErrorKind::NoncharacterInInputStream => "Noncharacter in input stream".into(), @@ -150,6 +151,7 @@ pub enum ErrorKind { MissingWhitespaceAfterDoctypeSystemKeyword, MissingWhitespaceBeforeDoctypeName, MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers, + MissingEndTagName, NestedComment, DoubleHyphenWithInComment, NoncharacterInInputStream, diff --git a/crates/swc_xml_parser/src/lexer/mod.rs b/crates/swc_xml_parser/src/lexer/mod.rs index bc9fcf3b6c4c..6af81a1f33ea 100644 --- a/crates/swc_xml_parser/src/lexer/mod.rs +++ b/crates/swc_xml_parser/src/lexer/mod.rs @@ -1633,40 +1633,40 @@ where State::EndTagOpen => { // Consume the next input character: match self.consume_next_char() { + // ASCII alpha + // Create a new end tag token, set its tag name to the empty string. + // Reconsume in the tag name state. + Some(c) if is_name_char(c) => { + self.create_tag_token(TagKind::End); + self.reconsume_in_state(State::TagName); + } // U+003E GREATER-THAN SIGN (>) - // Emit a short end tag token and then switch to the data state. + // This is a missing-end-tag-name parse error. Switch to the data state. Some('>') => { - self.emit_tag_token(Some(TagKind::End)); + self.emit_error(ErrorKind::MissingEndTagName); self.state = State::Data; } - // U+0009 CHARACTER TABULATION (Tab) - // U+000A LINE FEED (LF) - // U+0020 SPACE (Space) - // U+003C LESSER-THAN SIGN (<) - // U+003A (:) // EOF - // Parse error. Emit a U+003C LESSER-THAN SIGN (<) character token and a U+002F - // SOLIDUS (/) character token. Reconsume the current input character in the - // data state. - Some(c) if is_spacy_except_ff(c) => { - self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName); + // This is an eof-before-tag-name parse error. Emit a U+003C LESS-THAN SIGN + // character token, a U+002F SOLIDUS character token and an end-of-file + // token. + None => { + self.emit_error(ErrorKind::EofBeforeTagName); self.emit_character_token(('<', '<')); self.emit_character_token(('/', '/')); - self.reconsume_in_state(State::Data); + self.emit_token(Token::Eof); + + return Ok(()); } - Some('<') | Some(':') | None => { + // Anything else + // This is an invalid-first-character-of-tag-name parse error. Create a + // comment token whose data is the empty string. Reconsume in the bogus + // comment state. + _ => { self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName); self.emit_character_token(('<', '<')); self.emit_character_token(('/', '/')); - self.reconsume_in_state(State::Data); - } - // Anything else - // Create an end tag token and set its name to the input character, then switch - // to the end tag name state. - Some(c) => { - self.validate_input_stream_character(c); - self.create_tag_token(TagKind::End); - self.reconsume_in_state(State::TagName); + self.reconsume_in_state(State::BogusComment); } } } From f7b5ca1244f9a0a046a3b7cda23dc44d6b133e8d Mon Sep 17 00:00:00 2001 From: "alexander.akait" Date: Thu, 1 Dec 2022 01:01:44 +0300 Subject: [PATCH 8/9] fix: codegen --- crates/swc_xml_codegen/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/swc_xml_codegen/src/lib.rs b/crates/swc_xml_codegen/src/lib.rs index 391ccf275599..eba6ba723df1 100644 --- a/crates/swc_xml_codegen/src/lib.rs +++ b/crates/swc_xml_codegen/src/lib.rs @@ -462,7 +462,6 @@ fn escape_string(value: &str, is_attribute_mode: bool) -> String { '&' => { result.push_str("&"); } - '\u{00A0}' => result.push_str(" "), '"' if is_attribute_mode => result.push_str("""), '<' if !is_attribute_mode => { result.push_str("<"); From 60d2bc25acb060b3499e531a1c7c75141c778fba Mon Sep 17 00:00:00 2001 From: "alexander.akait" Date: Thu, 1 Dec 2022 01:02:23 +0300 Subject: [PATCH 9/9] test: remove invalid tests --- .../tests/recovery/element-1/dom.txt | 4 --- .../tests/recovery/element-1/input.xml | 3 -- .../tests/recovery/element-1/output.json | 32 ----------------- .../recovery/element-1/output.swc-stderr | 8 ----- .../tests/recovery/element-1/span.swc-stderr | 35 ------------------- .../tests/recovery/element/dom.txt | 4 --- .../tests/recovery/element/input.xml | 3 -- .../tests/recovery/element/output.json | 32 ----------------- .../tests/recovery/element/output.swc-stderr | 8 ----- .../tests/recovery/element/span.swc-stderr | 35 ------------------- 10 files changed, 164 deletions(-) delete mode 100644 crates/swc_xml_parser/tests/recovery/element-1/dom.txt delete mode 100644 crates/swc_xml_parser/tests/recovery/element-1/input.xml delete mode 100644 crates/swc_xml_parser/tests/recovery/element-1/output.json delete mode 100644 crates/swc_xml_parser/tests/recovery/element-1/output.swc-stderr delete mode 100644 crates/swc_xml_parser/tests/recovery/element-1/span.swc-stderr delete mode 100644 crates/swc_xml_parser/tests/recovery/element/dom.txt delete mode 100644 crates/swc_xml_parser/tests/recovery/element/input.xml delete mode 100644 crates/swc_xml_parser/tests/recovery/element/output.json delete mode 100644 crates/swc_xml_parser/tests/recovery/element/output.swc-stderr delete mode 100644 crates/swc_xml_parser/tests/recovery/element/span.swc-stderr diff --git a/crates/swc_xml_parser/tests/recovery/element-1/dom.txt b/crates/swc_xml_parser/tests/recovery/element-1/dom.txt deleted file mode 100644 index fc713a32716a..000000000000 --- a/crates/swc_xml_parser/tests/recovery/element-1/dom.txt +++ /dev/null @@ -1,4 +0,0 @@ -| -| " - <-svg/> -" diff --git a/crates/swc_xml_parser/tests/recovery/element-1/input.xml b/crates/swc_xml_parser/tests/recovery/element-1/input.xml deleted file mode 100644 index c5378cba4648..000000000000 --- a/crates/swc_xml_parser/tests/recovery/element-1/input.xml +++ /dev/null @@ -1,3 +0,0 @@ - - <-svg/> - \ No newline at end of file diff --git a/crates/swc_xml_parser/tests/recovery/element-1/output.json b/crates/swc_xml_parser/tests/recovery/element-1/output.json deleted file mode 100644 index d748c6bb939f..000000000000 --- a/crates/swc_xml_parser/tests/recovery/element-1/output.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "type": "Document", - "span": { - "start": 1, - "end": 27, - "ctxt": 0 - }, - "children": [ - { - "type": "Element", - "span": { - "start": 1, - "end": 27, - "ctxt": 0 - }, - "tagName": "root", - "attributes": [], - "children": [ - { - "type": "Text", - "span": { - "start": 7, - "end": 20, - "ctxt": 0 - }, - "data": "\n <-svg/>\n", - "raw": "\n <-svg/>\n" - } - ] - } - ] -} diff --git a/crates/swc_xml_parser/tests/recovery/element-1/output.swc-stderr b/crates/swc_xml_parser/tests/recovery/element-1/output.swc-stderr deleted file mode 100644 index a57ea658ed13..000000000000 --- a/crates/swc_xml_parser/tests/recovery/element-1/output.swc-stderr +++ /dev/null @@ -1,8 +0,0 @@ - - x Invalid first character of tag name - ,-[$DIR/tests/recovery/element-1/input.xml:1:1] - 1 | - 2 | <-svg/> - : ^ - 3 | - `---- diff --git a/crates/swc_xml_parser/tests/recovery/element-1/span.swc-stderr b/crates/swc_xml_parser/tests/recovery/element-1/span.swc-stderr deleted file mode 100644 index a9641aadd512..000000000000 --- a/crates/swc_xml_parser/tests/recovery/element-1/span.swc-stderr +++ /dev/null @@ -1,35 +0,0 @@ - - x Document - ,-[$DIR/tests/recovery/element-1/input.xml:1:1] - 1 | ,-> - 2 | | <-svg/> - 3 | `-> - `---- - - x Child - ,-[$DIR/tests/recovery/element-1/input.xml:1:1] - 1 | ,-> - 2 | | <-svg/> - 3 | `-> - `---- - - x Element - ,-[$DIR/tests/recovery/element-1/input.xml:1:1] - 1 | ,-> - 2 | | <-svg/> - 3 | `-> - `---- - - x Child - ,-[$DIR/tests/recovery/element-1/input.xml:1:1] - 1 | ,-> - 2 | `-> <-svg/> - 3 | - `---- - - x Text - ,-[$DIR/tests/recovery/element-1/input.xml:1:1] - 1 | ,-> - 2 | `-> <-svg/> - 3 | - `---- diff --git a/crates/swc_xml_parser/tests/recovery/element/dom.txt b/crates/swc_xml_parser/tests/recovery/element/dom.txt deleted file mode 100644 index 53330b82434c..000000000000 --- a/crates/swc_xml_parser/tests/recovery/element/dom.txt +++ /dev/null @@ -1,4 +0,0 @@ -| -| " - <> -" diff --git a/crates/swc_xml_parser/tests/recovery/element/input.xml b/crates/swc_xml_parser/tests/recovery/element/input.xml deleted file mode 100644 index cc7592ec05df..000000000000 --- a/crates/swc_xml_parser/tests/recovery/element/input.xml +++ /dev/null @@ -1,3 +0,0 @@ - - <> - \ No newline at end of file diff --git a/crates/swc_xml_parser/tests/recovery/element/output.json b/crates/swc_xml_parser/tests/recovery/element/output.json deleted file mode 100644 index aa2619aa7e45..000000000000 --- a/crates/swc_xml_parser/tests/recovery/element/output.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "type": "Document", - "span": { - "start": 1, - "end": 22, - "ctxt": 0 - }, - "children": [ - { - "type": "Element", - "span": { - "start": 1, - "end": 22, - "ctxt": 0 - }, - "tagName": "root", - "attributes": [], - "children": [ - { - "type": "Text", - "span": { - "start": 7, - "end": 15, - "ctxt": 0 - }, - "data": "\n <>\n", - "raw": "\n <>\n" - } - ] - } - ] -} diff --git a/crates/swc_xml_parser/tests/recovery/element/output.swc-stderr b/crates/swc_xml_parser/tests/recovery/element/output.swc-stderr deleted file mode 100644 index 6de567e3805f..000000000000 --- a/crates/swc_xml_parser/tests/recovery/element/output.swc-stderr +++ /dev/null @@ -1,8 +0,0 @@ - - x Invalid first character of tag name - ,-[$DIR/tests/recovery/element/input.xml:1:1] - 1 | - 2 | <> - : ^ - 3 | - `---- diff --git a/crates/swc_xml_parser/tests/recovery/element/span.swc-stderr b/crates/swc_xml_parser/tests/recovery/element/span.swc-stderr deleted file mode 100644 index d000ea1873ed..000000000000 --- a/crates/swc_xml_parser/tests/recovery/element/span.swc-stderr +++ /dev/null @@ -1,35 +0,0 @@ - - x Document - ,-[$DIR/tests/recovery/element/input.xml:1:1] - 1 | ,-> - 2 | | <> - 3 | `-> - `---- - - x Child - ,-[$DIR/tests/recovery/element/input.xml:1:1] - 1 | ,-> - 2 | | <> - 3 | `-> - `---- - - x Element - ,-[$DIR/tests/recovery/element/input.xml:1:1] - 1 | ,-> - 2 | | <> - 3 | `-> - `---- - - x Child - ,-[$DIR/tests/recovery/element/input.xml:1:1] - 1 | ,-> - 2 | `-> <> - 3 | - `---- - - x Text - ,-[$DIR/tests/recovery/element/input.xml:1:1] - 1 | ,-> - 2 | `-> <> - 3 | - `----