diff --git a/crates/swc_html_parser/src/lexer/mod.rs b/crates/swc_html_parser/src/lexer/mod.rs index 10542e943a18..88950c5fb878 100644 --- a/crates/swc_html_parser/src/lexer/mod.rs +++ b/crates/swc_html_parser/src/lexer/mod.rs @@ -1067,16 +1067,16 @@ where State::TagOpen => { // Consume the next input character: match self.consume_next_char() { - // U+0021 EXCLAMATION MARK (!) - // Switch to the markup declaration open state. - Some('!') => { - self.state = State::MarkupDeclarationOpen; - } // U+002F SOLIDUS (/) // Switch to the end tag open state. Some('/') => { self.state = State::EndTagOpen; } + // U+0021 EXCLAMATION MARK (!) + // Switch to the markup declaration open state. + Some('!') => { + self.state = State::MarkupDeclarationOpen; + } // ASCII alpha // Create a new start tag token, set its tag name to the empty string. // Reconsume in the tag name state. diff --git a/crates/swc_xml_ast/src/token.rs b/crates/swc_xml_ast/src/token.rs index a7e61f60b7c0..148b553c303f 100644 --- a/crates/swc_xml_ast/src/token.rs +++ b/crates/swc_xml_ast/src/token.rs @@ -40,10 +40,6 @@ pub enum Token { tag_name: JsWord, attributes: Vec, }, - ShortTag { - tag_name: JsWord, - attributes: Vec, - }, EmptyTag { tag_name: JsWord, attributes: Vec, diff --git a/crates/swc_xml_codegen/src/lib.rs b/crates/swc_xml_codegen/src/lib.rs index 391ccf275599..eba6ba723df1 100644 --- a/crates/swc_xml_codegen/src/lib.rs +++ b/crates/swc_xml_codegen/src/lib.rs @@ -462,7 +462,6 @@ fn escape_string(value: &str, is_attribute_mode: bool) -> String { '&' => { result.push_str("&"); } - '\u{00A0}' => result.push_str(" "), '"' if is_attribute_mode => result.push_str("""), '<' if !is_attribute_mode => { result.push_str("<"); diff --git a/crates/swc_xml_parser/src/error.rs b/crates/swc_xml_parser/src/error.rs index cab58d644b70..3a0031766bb9 100644 --- a/crates/swc_xml_parser/src/error.rs +++ b/crates/swc_xml_parser/src/error.rs @@ -80,6 +80,7 @@ impl Error { ErrorKind::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers => { "Missing whitespace between doctype public and system identifiers".into() } + ErrorKind::MissingEndTagName => "Missing end tag name".into(), ErrorKind::NestedComment => "Nested comment".into(), ErrorKind::DoubleHyphenWithInComment => "Double hyper within comment".into(), ErrorKind::NoncharacterInInputStream => "Noncharacter in input stream".into(), @@ -150,6 +151,7 @@ pub enum ErrorKind { MissingWhitespaceAfterDoctypeSystemKeyword, MissingWhitespaceBeforeDoctypeName, MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers, + MissingEndTagName, NestedComment, DoubleHyphenWithInComment, NoncharacterInInputStream, diff --git a/crates/swc_xml_parser/src/lexer/mod.rs b/crates/swc_xml_parser/src/lexer/mod.rs index 3512c557ce98..6af81a1f33ea 100644 --- a/crates/swc_xml_parser/src/lexer/mod.rs +++ b/crates/swc_xml_parser/src/lexer/mod.rs @@ -13,10 +13,6 @@ use crate::{ pub enum State { Data, CharacterReferenceInData, - TagOpen, - EndTagOpen, - EndTagName, - EndTagNameAfter, Pi, PiTarget, PiTargetQuestion, @@ -37,6 +33,8 @@ pub enum State { Cdata, CdataBracket, CdataEnd, + TagOpen, + EndTagOpen, TagName, EmptyTag, TagAttributeNameBefore, @@ -79,7 +77,6 @@ struct Doctype { enum TagKind { Start, End, - Short, Empty, } @@ -667,36 +664,36 @@ where current_tag_token.kind = kind; } + let mut already_seen: AHashSet = Default::default(); + + let attributes = current_tag_token + .attributes + .drain(..) + .map(|attribute| { + let name = JsWord::from(attribute.name); + + if already_seen.contains(&name) { + self.errors + .push(Error::new(attribute.span, ErrorKind::DuplicateAttribute)); + } + + already_seen.insert(name.clone()); + + AttributeToken { + span: attribute.span, + name, + raw_name: attribute.raw_name.map(JsWord::from), + value: attribute.value.map(JsWord::from), + raw_value: attribute.raw_value.map(JsWord::from), + } + }) + .collect(); + match current_tag_token.kind { TagKind::Start => { - let mut already_seen: AHashSet = Default::default(); - let start_tag_token = Token::StartTag { tag_name: current_tag_token.tag_name.into(), - attributes: current_tag_token - .attributes - .drain(..) - .map(|attribute| { - let name = JsWord::from(attribute.name); - - if already_seen.contains(&name) { - self.errors.push(Error::new( - attribute.span, - ErrorKind::DuplicateAttribute, - )); - } - - already_seen.insert(name.clone()); - - AttributeToken { - span: attribute.span, - name, - raw_name: attribute.raw_name.map(JsWord::from), - value: attribute.value.map(JsWord::from), - raw_value: attribute.raw_value.map(JsWord::from), - } - }) - .collect(), + attributes, }; self.emit_token(start_tag_token); @@ -706,104 +703,17 @@ where self.emit_error(ErrorKind::EndTagWithAttributes); } - let mut already_seen: AHashSet = Default::default(); - let end_tag_token = Token::EndTag { tag_name: current_tag_token.tag_name.into(), - attributes: current_tag_token - .attributes - .drain(..) - .map(|attribute| { - let name = JsWord::from(attribute.name); - - if already_seen.contains(&name) { - self.errors.push(Error::new( - attribute.span, - ErrorKind::DuplicateAttribute, - )); - } - - already_seen.insert(name.clone()); - - AttributeToken { - span: attribute.span, - name, - raw_name: attribute.raw_name.map(JsWord::from), - value: attribute.value.map(JsWord::from), - raw_value: attribute.raw_value.map(JsWord::from), - } - }) - .collect(), + attributes, }; self.emit_token(end_tag_token); } - TagKind::Short => { - if !current_tag_token.attributes.is_empty() { - self.emit_error(ErrorKind::ShortTagWithAttributes); - } - - let mut already_seen: AHashSet = Default::default(); - - let short_tag = Token::ShortTag { - tag_name: current_tag_token.tag_name.into(), - attributes: current_tag_token - .attributes - .drain(..) - .map(|attribute| { - let name = JsWord::from(attribute.name); - - if already_seen.contains(&name) { - self.errors.push(Error::new( - attribute.span, - ErrorKind::DuplicateAttribute, - )); - } - - already_seen.insert(name.clone()); - - AttributeToken { - span: attribute.span, - name, - raw_name: attribute.raw_name.map(JsWord::from), - value: attribute.value.map(JsWord::from), - raw_value: attribute.raw_value.map(JsWord::from), - } - }) - .collect(), - }; - - self.emit_token(short_tag); - } TagKind::Empty => { - let mut already_seen: AHashSet = Default::default(); - let empty_tag = Token::EmptyTag { tag_name: current_tag_token.tag_name.into(), - attributes: current_tag_token - .attributes - .drain(..) - .map(|attribute| { - let name = JsWord::from(attribute.name); - - if already_seen.contains(&name) { - self.errors.push(Error::new( - attribute.span, - ErrorKind::DuplicateAttribute, - )); - } - - already_seen.insert(name.clone()); - - AttributeToken { - span: attribute.span, - name, - raw_name: attribute.raw_name.map(JsWord::from), - value: attribute.value.map(JsWord::from), - raw_value: attribute.raw_value.map(JsWord::from), - } - }) - .collect(), + attributes, }; self.emit_token(empty_tag); @@ -1034,166 +944,6 @@ where self.emit_character_token(('&', '&')); } } - State::TagOpen => { - // Consume the next input character: - match self.consume_next_char() { - // U+002F SOLIDUS (/) - // Switch to the end tag open state. - Some('/') => { - self.state = State::EndTagOpen; - } - // U+003F QUESTION MARK(?) - // Switch to the pi state. - Some('?') => { - self.state = State::Pi; - } - // U+0021 EXCLAMATION MARK (!) - // Switch to the markup declaration open state. - Some('!') => { - self.state = State::MarkupDeclaration; - } - // U+0009 CHARACTER TABULATION (Tab) - // U+000A LINE FEED (LF) - // U+0020 SPACE (Space) - // U+003A (:) - // U+003C LESSER-THAN SIGN (<) - // U+003E GREATER-THAN SIGN (>) - // EOF - // Parse error. Emit a U+003C LESSER-THAN SIGN (<) character. Reconsume the - // current input character in the data state. - Some(c) if is_spacy_except_ff(c) => { - self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName); - self.emit_character_token(('<', '<')); - self.reconsume_in_state(State::Data); - } - Some(':') | Some('<') | Some('>') | None => { - self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName); - self.emit_character_token(('<', '<')); - self.reconsume_in_state(State::Data); - } - // Anything else - // Create a new tag token and set its name to the input character, then switch - // to the tag name state. - Some(c) => { - self.validate_input_stream_character(c); - self.create_tag_token(TagKind::Start); - self.append_to_tag_token_name(c); - self.state = State::TagName; - } - } - } - State::EndTagOpen => { - // Consume the next input character: - match self.consume_next_char() { - // U+003E GREATER-THAN SIGN (>) - // Emit a short end tag token and then switch to the data state. - Some('>') => { - self.emit_tag_token(Some(TagKind::Short)); - self.state = State::Data; - } - // U+0009 CHARACTER TABULATION (Tab) - // U+000A LINE FEED (LF) - // U+0020 SPACE (Space) - // U+003C LESSER-THAN SIGN (<) - // U+003A (:) - // EOF - // Parse error. Emit a U+003C LESSER-THAN SIGN (<) character token and a U+002F - // SOLIDUS (/) character token. Reconsume the current input character in the - // data state. - Some(c) if is_spacy_except_ff(c) => { - self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName); - self.emit_character_token(('<', '<')); - self.emit_character_token(('/', '/')); - self.reconsume_in_state(State::Data); - } - Some('<') | Some(':') | None => { - self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName); - self.emit_character_token(('<', '<')); - self.emit_character_token(('/', '/')); - self.reconsume_in_state(State::Data); - } - // Anything else - // Create an end tag token and set its name to the input character, then switch - // to the end tag name state. - Some(c) => { - self.validate_input_stream_character(c); - self.create_tag_token(TagKind::End); - self.append_to_tag_token_name(c); - self.state = State::EndTagName - } - } - } - State::EndTagName => { - // Consume the next input character: - match self.consume_next_char() { - // U+0009 CHARACTER TABULATION (Tab) - // U+000A LINE FEED (LF) - // U+0020 SPACE (Space) - // Switch to the end tag name after state. - Some(c) if is_spacy_except_ff(c) => { - self.state = State::EndTagNameAfter; - } - // U+002F SOLIDUS (/) - // Parse error. Switch to the end tag name after state. - Some('/') => { - self.emit_error(ErrorKind::EndTagWithTrailingSolidus); - self.state = State::EndTagNameAfter; - } - // EOF - // Parse error. Emit the start tag token and then reprocess the current input - // character in the data state. - None => { - self.emit_error(ErrorKind::EofInTag); - self.emit_tag_token(Some(TagKind::Start)); - self.reconsume_in_state(State::Data); - } - // U+003E GREATER-THAN SIGN (>) - // Emit the current token and then switch to the data state. - Some('>') => { - self.emit_tag_token(None); - self.state = State::Data; - } - // Anything else - // Append the current input character to the tag name and stay in the current - // state. - Some(c) => { - self.validate_input_stream_character(c); - self.append_to_tag_token_name(c); - } - } - } - State::EndTagNameAfter => { - // Consume the next input character: - match self.consume_next_char() { - // U+003E GREATER-THAN SIGN (>) - // Emit the current token and then switch to the data state. - Some('>') => { - self.emit_tag_token(None); - self.state = State::Data; - } - // U+0009 CHARACTER TABULATION (Tab) - // U+000A LINE FEED (LF) - // U+0020 SPACE (Space) - // Stay in the current state. - Some(c) if is_spacy_except_ff(c) => { - self.skip_next_lf(c); - } - // EOF - // Parse error. Emit the current token and then reprocess the current input - // character in the data state. - None => { - self.emit_error(ErrorKind::EofInTag); - self.emit_tag_token(None); - self.reconsume_in_state(State::Data); - } - // Anything else - // Parse error. Stay in the current state. - Some(c) => { - self.validate_input_stream_character(c); - self.emit_error(ErrorKind::InvalidCharacterInTag); - } - } - } State::Pi => { // Consume the next input character: match self.consume_next_char() { @@ -1835,7 +1585,91 @@ where } } } - // https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state + State::TagOpen => { + // Consume the next input character: + match self.consume_next_char() { + // U+002F SOLIDUS (/) + // Switch to the end tag open state. + Some('/') => { + self.state = State::EndTagOpen; + } + // U+0021 EXCLAMATION MARK (!) + // Switch to the markup declaration open state. + Some('!') => { + self.state = State::MarkupDeclaration; + } + // U+003F QUESTION MARK(?) + // Switch to the pi state. + Some('?') => { + self.state = State::Pi; + } + // Name start character + // Create a new tag token and set its name to the input character, then switch + // to the tag name state. + Some(c) if is_name_start_char(c) => { + self.create_tag_token(TagKind::Start); + self.reconsume_in_state(State::TagName); + } + // EOF + // This is an eof-before-tag-name parse error. Emit a U+003C LESS-THAN SIGN + // character token and an end-of-file token. + None => { + self.emit_error(ErrorKind::EofBeforeTagName); + self.emit_character_token(('<', '<')); + self.emit_token(Token::Eof); + + return Ok(()); + } + // Anything else + // This is an invalid-first-character-of-tag-name parse error. Emit a U+003C + // LESS-THAN SIGN character token. Reconsume in the data state. + _ => { + self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName); + self.emit_character_token(('<', '<')); + self.reconsume_in_state(State::Data); + } + } + } + State::EndTagOpen => { + // Consume the next input character: + match self.consume_next_char() { + // ASCII alpha + // Create a new end tag token, set its tag name to the empty string. + // Reconsume in the tag name state. + Some(c) if is_name_char(c) => { + self.create_tag_token(TagKind::End); + self.reconsume_in_state(State::TagName); + } + // U+003E GREATER-THAN SIGN (>) + // This is a missing-end-tag-name parse error. Switch to the data state. + Some('>') => { + self.emit_error(ErrorKind::MissingEndTagName); + self.state = State::Data; + } + // EOF + // This is an eof-before-tag-name parse error. Emit a U+003C LESS-THAN SIGN + // character token, a U+002F SOLIDUS character token and an end-of-file + // token. + None => { + self.emit_error(ErrorKind::EofBeforeTagName); + self.emit_character_token(('<', '<')); + self.emit_character_token(('/', '/')); + self.emit_token(Token::Eof); + + return Ok(()); + } + // Anything else + // This is an invalid-first-character-of-tag-name parse error. Create a + // comment token whose data is the empty string. Reconsume in the bogus + // comment state. + _ => { + self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName); + self.emit_character_token(('<', '<')); + self.emit_character_token(('/', '/')); + self.reconsume_in_state(State::BogusComment); + } + } + } State::TagName => { // Consume the next input character: match self.consume_next_char() { @@ -1847,30 +1681,38 @@ where self.skip_next_lf(c); self.state = State::TagAttributeNameBefore; } + // U+002F SOLIDUS (/) + // Set current tag to empty tag. Switch to the empty tag state. + Some('/') => { + self.set_tag_to_empty_tag(); + self.state = State::EmptyTag; + } // U+003E GREATER-THAN SIGN (>) - // Emit the start tag token and then switch to the data state. + // Switch to the data state. Emit the current tag token. Some('>') => { - self.emit_tag_token(Some(TagKind::Start)); self.state = State::Data; + self.emit_tag_token(None); } // EOF - // Parse error. Emit the current token and then reprocess the current input - // character in the data state. + // This is an eof-in-tag parse error. Emit an end-of-file token. None => { self.emit_error(ErrorKind::EofInTag); self.emit_tag_token(None); - self.reconsume_in_state(State::Data); - } - // U+002F SOLIDUS (/) - // Set current tag to empty tag. Switch to the empty tag state. - Some('/') => { - self.set_tag_to_empty_tag(); - self.state = State::EmptyTag; + + return Ok(()); } - // Anything else + // Name character // Append the current input character to the tag name and stay in the current // state. + Some(c) if is_name_char(c) => { + self.validate_input_stream_character(c); + self.append_to_tag_token_name(c); + } + // Anything else + // Parse error. Append the current input character to the tag name and stay in + // the current state. Some(c) => { + self.emit_error(ErrorKind::InvalidCharacterInTag); self.validate_input_stream_character(c); self.append_to_tag_token_name(c); } @@ -1883,7 +1725,7 @@ where // Emit the current tag token as empty tag token and then switch to the data // state. Some('>') => { - self.emit_tag_token(None); + self.emit_tag_token(Some(TagKind::Empty)); self.state = State::Data; } // Anything else @@ -3126,3 +2968,32 @@ fn is_upper_hex_digit(c: char) -> bool { fn is_lower_hex_digit(c: char) -> bool { matches!(c, '0'..='9' | 'a'..='f') } + +// NameStartChar ::= +// ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | +// [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | +// [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | +// [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] +#[inline(always)] +fn is_name_start_char(c: char) -> bool { + match c { + ':' | 'A'..='Z' | '_' | 'a'..='z' => true, + _ if matches!(c as u32, 0xc0..=0xd6 | 0xd8..=0x2ff | 0x370..=0x37d | 0x37f..=0x1fff | 0x200c..=0x200d | 0x2070..=0x218f | 0x2c00..=0x2fef | 0x3001..=0xd7ff | 0xf900..=0xfdcf | 0xfdf0..=0xfffd | 0x10000..=0xeffff) => { + true + } + _ => false, + } +} + +// NameChar ::= +// NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | +// [#x203F-#x2040] +#[inline(always)] +fn is_name_char(c: char) -> bool { + match c { + '-' | '.' | '0'..='9' => true, + _ if matches!(c as u32, 0xb7 | 0x0300..=0x036f | 0x203f..=0x2040) => true, + _ if is_name_start_char(c) => true, + _ => false, + } +} diff --git a/crates/swc_xml_parser/src/parser/mod.rs b/crates/swc_xml_parser/src/parser/mod.rs index bb76cf5c2fb3..abe540f52ba7 100644 --- a/crates/swc_xml_parser/src/parser/mod.rs +++ b/crates/swc_xml_parser/src/parser/mod.rs @@ -350,13 +350,6 @@ where self.phase = Phase::EndPhase; } } - Token::ShortTag { .. } => { - self.open_elements_stack.items.pop(); - - if self.open_elements_stack.items.is_empty() { - self.phase = Phase::EndPhase; - } - } Token::Comment { .. } => { let comment = self.create_comment(token_and_info); @@ -482,11 +475,6 @@ where attributes, .. } - | Token::ShortTag { - tag_name, - attributes, - .. - } | Token::EmptyTag { tag_name, attributes, diff --git a/crates/swc_xml_parser/tests/fixture/tags/dom.txt b/crates/swc_xml_parser/tests/fixture/tags/dom.txt index 338a98a8f619..93bedab8ea1a 100644 --- a/crates/swc_xml_parser/tests/fixture/tags/dom.txt +++ b/crates/swc_xml_parser/tests/fixture/tags/dom.txt @@ -21,10 +21,31 @@ | | " " -| +| | " - " -| +| <тест> +| " +" +| <тест> +| "test" +| " +" +| +| " +" +| +| "test" +| " +" +| <俄语> +| լեզու="ռուսերեն" +| "данные" +| " +" +| +| " +" +| <:circle> | " " diff --git a/crates/swc_xml_parser/tests/fixture/tags/input.xml b/crates/swc_xml_parser/tests/fixture/tags/input.xml index df83f4999d2d..974fcf67ad4e 100644 --- a/crates/swc_xml_parser/tests/fixture/tags/input.xml +++ b/crates/swc_xml_parser/tests/fixture/tags/input.xml @@ -1,11 +1,16 @@ - + Start - - +<тест/> +<тест>test + +test +<俄语 լեզու="ռուսերեն">данные + +<:circle/> \ No newline at end of file diff --git a/crates/swc_xml_parser/tests/fixture/tags/output.json b/crates/swc_xml_parser/tests/fixture/tags/output.json index 0d1b1f8da8b3..6d317aa3da9b 100644 --- a/crates/swc_xml_parser/tests/fixture/tags/output.json +++ b/crates/swc_xml_parser/tests/fixture/tags/output.json @@ -2,7 +2,7 @@ "type": "Document", "span": { "start": 1, - "end": 174, + "end": 321, "ctxt": 0 }, "children": [ @@ -10,7 +10,7 @@ "type": "Element", "span": { "start": 1, - "end": 174, + "end": 321, "ctxt": 0 }, "tagName": "root", @@ -41,7 +41,7 @@ "type": "Element", "span": { "start": 15, - "end": 22, + "end": 19, "ctxt": 0 }, "tagName": "a", @@ -51,8 +51,8 @@ { "type": "Text", "span": { - "start": 22, - "end": 23, + "start": 19, + "end": 20, "ctxt": 0 }, "data": "\n", @@ -61,8 +61,8 @@ { "type": "Comment", "span": { - "start": 23, - "end": 53, + "start": 20, + "end": 50, "ctxt": 0 }, "data": " start tag and end tag ", @@ -71,8 +71,8 @@ { "type": "Text", "span": { - "start": 53, - "end": 54, + "start": 50, + "end": 51, "ctxt": 0 }, "data": "\n", @@ -81,8 +81,8 @@ { "type": "Element", "span": { - "start": 54, - "end": 82, + "start": 51, + "end": 79, "ctxt": 0 }, "tagName": "start-tag", @@ -91,8 +91,8 @@ { "type": "Text", "span": { - "start": 65, - "end": 70, + "start": 62, + "end": 67, "ctxt": 0 }, "data": "Start", @@ -103,8 +103,8 @@ { "type": "Text", "span": { - "start": 82, - "end": 83, + "start": 79, + "end": 80, "ctxt": 0 }, "data": "\n", @@ -113,8 +113,8 @@ { "type": "Comment", "span": { - "start": 83, - "end": 101, + "start": 80, + "end": 98, "ctxt": 0 }, "data": " empty tag ", @@ -123,8 +123,8 @@ { "type": "Text", "span": { - "start": 101, - "end": 102, + "start": 98, + "end": 99, "ctxt": 0 }, "data": "\n", @@ -133,8 +133,8 @@ { "type": "Element", "span": { - "start": 102, - "end": 115, + "start": 99, + "end": 112, "ctxt": 0 }, "tagName": "short-tag", @@ -144,8 +144,8 @@ { "type": "Text", "span": { - "start": 115, - "end": 116, + "start": 112, + "end": 113, "ctxt": 0 }, "data": "\n", @@ -154,8 +154,8 @@ { "type": "Element", "span": { - "start": 116, - "end": 129, + "start": 113, + "end": 126, "ctxt": 0 }, "tagName": "test", @@ -165,49 +165,224 @@ { "type": "Text", "span": { - "start": 129, - "end": 130, + "start": 126, + "end": 127, "ctxt": 0 }, "data": "\n", "raw": "\n" }, { - "type": "Comment", + "type": "Element", "span": { - "start": 130, - "end": 148, + "start": 127, + "end": 140, "ctxt": 0 }, - "data": " short tag ", - "raw": "" + "tagName": "test", + "attributes": [], + "children": [] }, { "type": "Text", "span": { - "start": 148, + "start": 140, + "end": 141, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 141, + "end": 152, + "ctxt": 0 + }, + "tagName": "тест", + "attributes": [], + "children": [] + }, + { + "type": "Text", + "span": { + "start": 152, "end": 153, "ctxt": 0 }, - "data": "\n\n", - "raw": "\n\n" + "data": "\n", + "raw": "\n" }, { "type": "Element", "span": { "start": 153, - "end": 166, + "end": 178, "ctxt": 0 }, - "tagName": "test", + "tagName": "тест", + "attributes": [], + "children": [ + { + "type": "Text", + "span": { + "start": 163, + "end": 167, + "ctxt": 0 + }, + "data": "test", + "raw": "test" + } + ] + }, + { + "type": "Text", + "span": { + "start": 178, + "end": 179, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 179, + "end": 190, + "ctxt": 0 + }, + "tagName": "html:bar", + "attributes": [], + "children": [] + }, + { + "type": "Text", + "span": { + "start": 190, + "end": 191, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 191, + "end": 216, + "ctxt": 0 + }, + "tagName": "html:bar", + "attributes": [], + "children": [ + { + "type": "Text", + "span": { + "start": 201, + "end": 205, + "ctxt": 0 + }, + "data": "test", + "raw": "test" + } + ] + }, + { + "type": "Text", + "span": { + "start": 216, + "end": 217, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 217, + "end": 276, + "ctxt": 0 + }, + "tagName": "俄语", + "attributes": [ + { + "type": "Attribute", + "span": { + "start": 0, + "end": 0, + "ctxt": 0 + }, + "namespace": null, + "prefix": null, + "name": "լեզու", + "rawName": "լեզու", + "value": "ռուսերեն", + "rawValue": "\"ռուսերեն\"" + } + ], + "children": [ + { + "type": "Text", + "span": { + "start": 255, + "end": 267, + "ctxt": 0 + }, + "data": "данные", + "raw": "данные" + } + ] + }, + { + "type": "Text", + "span": { + "start": 276, + "end": 277, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 277, + "end": 302, + "ctxt": 0 + }, + "tagName": "svg:circle", + "attributes": [], + "children": [] + }, + { + "type": "Text", + "span": { + "start": 302, + "end": 303, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 303, + "end": 313, + "ctxt": 0 + }, + "tagName": ":circle", "attributes": [], "children": [] }, { "type": "Text", "span": { - "start": 166, - "end": 167, + "start": 313, + "end": 314, "ctxt": 0 }, "data": "\n", diff --git a/crates/swc_xml_parser/tests/fixture/tags/span.swc-stderr b/crates/swc_xml_parser/tests/fixture/tags/span.swc-stderr index c488f81d9d97..c73071ae3b28 100644 --- a/crates/swc_xml_parser/tests/fixture/tags/span.swc-stderr +++ b/crates/swc_xml_parser/tests/fixture/tags/span.swc-stderr @@ -2,53 +2,68 @@ x Document ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | ,-> - 2 | | + 2 | | 3 | | 4 | | Start 5 | | 6 | | 7 | | - 8 | | - 9 | | - 10 | | - 11 | `-> + 8 | | + 9 | | <тест/> + 10 | | <тест>test + 11 | | + 12 | | test + 13 | | <俄语 լեզու="ռուսերեն">данные + 14 | | + 15 | | <:circle/> + 16 | `-> `---- x Child ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | ,-> - 2 | | + 2 | | 3 | | 4 | | Start 5 | | 6 | | 7 | | - 8 | | - 9 | | - 10 | | - 11 | `-> + 8 | | + 9 | | <тест/> + 10 | | <тест>test + 11 | | + 12 | | test + 13 | | <俄语 լեզու="ռուսերեն">данные + 14 | | + 15 | | <:circle/> + 16 | `-> `---- x Element ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | ,-> - 2 | | + 2 | | 3 | | 4 | | Start 5 | | 6 | | 7 | | - 8 | | - 9 | | - 10 | | - 11 | `-> + 8 | | + 9 | | <тест/> + 10 | | <тест>test + 11 | | + 12 | | test + 13 | | <俄语 լեզու="ռուսերեն">данные + 14 | | + 15 | | <:circle/> + 16 | `-> `---- x Child ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | : ^ - 2 | + 2 | 3 | `---- @@ -56,14 +71,14 @@ ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | : ^ - 2 | + 2 | 3 | `---- x Child ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | - 2 | + 2 | : ^^^^^^^ 3 | `---- @@ -71,7 +86,7 @@ x Element ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | - 2 | + 2 | : ^^^^^^^ 3 | `---- @@ -79,24 +94,24 @@ x Child ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | - 2 | - : ^^^^^^^ + 2 | + : ^^^^ 3 | `---- x Element ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | - 2 | - : ^^^^^^^ + 2 | + : ^^^^ 3 | `---- x Child ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | - 2 | - : ^ + 2 | + : ^ 3 | 4 | Start `---- @@ -104,15 +119,15 @@ x Text ,-[$DIR/tests/fixture/tags/input.xml:1:1] 1 | - 2 | - : ^ + 2 | + : ^ 3 | 4 | Start `---- x Child ,-[$DIR/tests/fixture/tags/input.xml:2:1] - 2 | + 2 | 3 | : ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 4 | Start @@ -120,7 +135,7 @@ x Comment ,-[$DIR/tests/fixture/tags/input.xml:2:1] - 2 | + 2 | 3 | : ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 4 | Start @@ -128,7 +143,7 @@ x Child ,-[$DIR/tests/fixture/tags/input.xml:2:1] - 2 | + 2 | 3 | : ^ 4 | Start @@ -137,7 +152,7 @@ x Text ,-[$DIR/tests/fixture/tags/input.xml:2:1] - 2 | + 2 | 3 | : ^ 4 | Start @@ -250,7 +265,7 @@ 6 | : ^ 7 | - 8 | + 8 | `---- x Text @@ -259,7 +274,7 @@ 6 | : ^ 7 | - 8 | + 8 | `---- x Child @@ -267,7 +282,7 @@ 6 | 7 | : ^^^^^^^^^^^^^ - 8 | + 8 | `---- x Element @@ -275,7 +290,7 @@ 6 | 7 | : ^^^^^^^^^^^^^ - 8 | + 8 | `---- x Child @@ -283,8 +298,8 @@ 6 | 7 | : ^ - 8 | - 9 | + 8 | + 9 | <тест/> `---- x Text @@ -292,72 +307,326 @@ 6 | 7 | : ^ - 8 | - 9 | + 8 | + 9 | <тест/> `---- x Child ,-[$DIR/tests/fixture/tags/input.xml:7:1] 7 | - 8 | - : ^^^^^^^^^^^^^^^^^^ - 9 | + 8 | + : ^^^^^^^^^^^^^ + 9 | <тест/> `---- - x Comment + x Element ,-[$DIR/tests/fixture/tags/input.xml:7:1] 7 | - 8 | - : ^^^^^^^^^^^^^^^^^^ - 9 | + 8 | + : ^^^^^^^^^^^^^ + 9 | <тест/> `---- x Child ,-[$DIR/tests/fixture/tags/input.xml:7:1] - 7 | - 8 | ,-> - 9 | `-> - 10 | - 11 | + 7 | + 8 | + : ^ + 9 | <тест/> + 10 | <тест>test `---- x Text ,-[$DIR/tests/fixture/tags/input.xml:7:1] - 7 | - 8 | ,-> - 9 | `-> - 10 | - 11 | + 7 | + 8 | + : ^ + 9 | <тест/> + 10 | <тест>test + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:8:1] + 8 | + 9 | <тест/> + : ^^^^^^^^^^^ + 10 | <тест>test + `---- + + x Element + ,-[$DIR/tests/fixture/tags/input.xml:8:1] + 8 | + 9 | <тест/> + : ^^^^^^^^^^^ + 10 | <тест>test + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:8:1] + 8 | + 9 | <тест/> + : ^ + 10 | <тест>test + 11 | + `---- + + x Text + ,-[$DIR/tests/fixture/tags/input.xml:8:1] + 8 | + 9 | <тест/> + : ^ + 10 | <тест>test + 11 | `---- x Child ,-[$DIR/tests/fixture/tags/input.xml:9:1] - 9 | - 10 | - : ^^^^^^^^^^^^^ - 11 | + 9 | <тест/> + 10 | <тест>test + : ^^^^^^^^^^^^^^^^^^^^^^^^^ + 11 | `---- x Element ,-[$DIR/tests/fixture/tags/input.xml:9:1] - 9 | - 10 | - : ^^^^^^^^^^^^^ - 11 | + 9 | <тест/> + 10 | <тест>test + : ^^^^^^^^^^^^^^^^^^^^^^^^^ + 11 | `---- x Child ,-[$DIR/tests/fixture/tags/input.xml:9:1] - 9 | - 10 | - : ^ - 11 | + 9 | <тест/> + 10 | <тест>test + : ^^^^ + 11 | `---- x Text ,-[$DIR/tests/fixture/tags/input.xml:9:1] - 9 | - 10 | - : ^ - 11 | + 9 | <тест/> + 10 | <тест>test + : ^^^^ + 11 | + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:9:1] + 9 | <тест/> + 10 | <тест>test + : ^ + 11 | + 12 | test + `---- + + x Text + ,-[$DIR/tests/fixture/tags/input.xml:9:1] + 9 | <тест/> + 10 | <тест>test + : ^ + 11 | + 12 | test + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:10:1] + 10 | <тест>test + 11 | + : ^^^^^^^^^^^ + 12 | test + `---- + + x Element + ,-[$DIR/tests/fixture/tags/input.xml:10:1] + 10 | <тест>test + 11 | + : ^^^^^^^^^^^ + 12 | test + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:10:1] + 10 | <тест>test + 11 | + : ^ + 12 | test + 13 | <俄语 լեզու="ռուսերեն">данные + `---- + + x Text + ,-[$DIR/tests/fixture/tags/input.xml:10:1] + 10 | <тест>test + 11 | + : ^ + 12 | test + 13 | <俄语 լեզու="ռուսերեն">данные + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:11:1] + 11 | + 12 | test + : ^^^^^^^^^^^^^^^^^^^^^^^^^ + 13 | <俄语 լեզու="ռուսերեն">данные + `---- + + x Element + ,-[$DIR/tests/fixture/tags/input.xml:11:1] + 11 | + 12 | test + : ^^^^^^^^^^^^^^^^^^^^^^^^^ + 13 | <俄语 լեզու="ռուսերեն">данные + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:11:1] + 11 | + 12 | test + : ^^^^ + 13 | <俄语 լեզու="ռուսերեն">данные + `---- + + x Text + ,-[$DIR/tests/fixture/tags/input.xml:11:1] + 11 | + 12 | test + : ^^^^ + 13 | <俄语 լեզու="ռուսերեն">данные + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:11:1] + 11 | + 12 | test + : ^ + 13 | <俄语 լեզու="ռուսերեն">данные + 14 | + `---- + + x Text + ,-[$DIR/tests/fixture/tags/input.xml:11:1] + 11 | + 12 | test + : ^ + 13 | <俄语 լեզու="ռուսերեն">данные + 14 | + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:12:1] + 12 | test + 13 | <俄语 լեզու="ռուսերեն">данные + : ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + 14 | + `---- + + x Element + ,-[$DIR/tests/fixture/tags/input.xml:12:1] + 12 | test + 13 | <俄语 լեզու="ռուսերեն">данные + : ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + 14 | + `---- + + x Attribute + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:12:1] + 12 | test + 13 | <俄语 լեզու="ռուսերեն">данные + : ^^^^^^^^^^^^ + 14 | + `---- + + x Text + ,-[$DIR/tests/fixture/tags/input.xml:12:1] + 12 | test + 13 | <俄语 լեզու="ռուսերեն">данные + : ^^^^^^^^^^^^ + 14 | + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:12:1] + 12 | test + 13 | <俄语 լեզու="ռուսերեն">данные + : ^ + 14 | + 15 | <:circle/> + `---- + + x Text + ,-[$DIR/tests/fixture/tags/input.xml:12:1] + 12 | test + 13 | <俄语 լեզու="ռուսերեն">данные + : ^ + 14 | + 15 | <:circle/> + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:13:1] + 13 | <俄语 լեզու="ռուսերեն">данные + 14 | + : ^^^^^^^^^^^^^^^^^^^^^^^^^ + 15 | <:circle/> + `---- + + x Element + ,-[$DIR/tests/fixture/tags/input.xml:13:1] + 13 | <俄语 լեզու="ռուսերեն">данные + 14 | + : ^^^^^^^^^^^^^^^^^^^^^^^^^ + 15 | <:circle/> + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:13:1] + 13 | <俄语 լեզու="ռուսերեն">данные + 14 | + : ^ + 15 | <:circle/> + 16 | + `---- + + x Text + ,-[$DIR/tests/fixture/tags/input.xml:13:1] + 13 | <俄语 լեզու="ռուսերեն">данные + 14 | + : ^ + 15 | <:circle/> + 16 | + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:14:1] + 14 | + 15 | <:circle/> + : ^^^^^^^^^^ + 16 | + `---- + + x Element + ,-[$DIR/tests/fixture/tags/input.xml:14:1] + 14 | + 15 | <:circle/> + : ^^^^^^^^^^ + 16 | + `---- + + x Child + ,-[$DIR/tests/fixture/tags/input.xml:14:1] + 14 | + 15 | <:circle/> + : ^ + 16 | + `---- + + x Text + ,-[$DIR/tests/fixture/tags/input.xml:14:1] + 14 | + 15 | <:circle/> + : ^ + 16 | `----