From 2b503c16d5f338c6a786a19c7c0acbc4c06bcfee Mon Sep 17 00:00:00 2001 From: Justin Ridgewell Date: Sat, 7 Jan 2023 23:57:26 -0500 Subject: [PATCH] fix(es/codegen): Fix LineCol calculation of printed files (#6763) **Description:** There were several issues with the way we updated the current `LineCol` position during the printing of the generated file: - We used `chars` and `char_indices` (UTF-32) instead of `encode_utf16` (UTF-16) chars. - JS uses UCS-2 (basically UTF-16) for its strings, and source maps default to that implicitly. - `\r` was incorrectly handled - it didn't add a `line_start`; only `\n` did - `\r\n` was incorrectly handled - It was trying to let the `\n` path handle the `line_start`, but it called `chars.next()` which ate the `\n` char. I also took the opportunity to avoid the `Vec` allocations and reduced some code duplication. See the [before](https://evanw.github.io/source-map-visualization/#ODEzAC8qKgogKiBmb28KICogQHBhcmFtIGRhdGEgZm9vCiAqIEByZXR1cm5zIGZvbwogKi8gZXhwb3J0IGNvbnN0IGZpeHVwUmlza0NvbmZpZ0RhdGEgPSAoZGF0YSk9PnsKICAgIGlmICh4KSB7CiAgICAgICAgcmV0dXJuIDEyMzsKICAgIH0gZWxzZSB7CiAgICAgICAgcmV0dXJuIDQ1NjsKICAgIH0KfTsKCi8vIyBzb3VyY2VNYXBwaW5nVVJMPWRhdGE6YXBwbGljYXRpb24vanNvbjtiYXNlNjQsZXlKMlpYSnphVzl1SWpvekxDSnpiM1Z5WTJWeklqcGJJbWx1Y0hWMExuUnpJbDBzSW5OdmRYSmpaWE5EYjI1MFpXNTBJanBiSWk4cUtseHlYRzRnS2lCbWIyOWNjbHh1SUNvZ1FIQmhjbUZ0SUdSaGRHRWdabTl2WEhKY2JpQXFJRUJ5WlhSMWNtNXpJR1p2YjF4eVhHNGdLaTljY2x4dVpYaHdiM0owSUdOdmJuTjBJR1pwZUhWd1VtbHphME52Ym1acFowUmhkR0VnUFNBb1pHRjBZVG9nWVc1NUtUb2dkSGx3WlhNdVVtbHphME52Ym1acFoxUjVjR1VnUFQ0Z2UxeHlYRzRnSUdsbUlDaDRLU0I3WEhKY2JpQWdJQ0J5WlhSMWNtNGdNVEl6TzF4eVhHNGdJSDBnWld4elpTQjdYSEpjYmlBZ0lDQnlaWFIxY200Z05EVTJPMXh5WEc0Z0lIMWNjbHh1ZlRzaVhTd2libUZ0WlhNaU9sc2labWw0ZFhCU2FYTnJRMjl1Wm1sblJHRjBZU0lzSW1SaGRHRWlMQ0o0SWwwc0ltMWhjSEJwYm1keklqb2lRVUZCUVN4dFJFRkpReXhIUVVORUxFOUJRVThzVFVGQlRVRXNjMEpCUVhOQ0xFTkJRVU5ETEU5QlFXOURPMGxCUTNSRkxFbEJRVWxETEVkQlFVYzdVVUZEVEN4UFFVRlBPMGxCUTFRc1QwRkJUenRSUVVOTUxFOUJRVTg3U1VGRFZDeERRVUZETzBGQlEwZ3NSVUZCUlNKOTQ0NAB7InZlcnNpb24iOjMsInNvdXJjZXMiOlsiaW5wdXQudHMiXSwic291cmNlc0NvbnRlbnQiOlsiLyoqXHJcbiAqIGZvb1xyXG4gKiBAcGFyYW0gZGF0YSBmb29cclxuICogQHJldHVybnMgZm9vXHJcbiAqL1xyXG5leHBvcnQgY29uc3QgZml4dXBSaXNrQ29uZmlnRGF0YSA9IChkYXRhOiBhbnkpOiB0eXBlcy5SaXNrQ29uZmlnVHlwZSA9PiB7XHJcbiAgaWYgKHgpIHtcclxuICAgIHJldHVybiAxMjM7XHJcbiAgfSBlbHNlIHtcclxuICAgIHJldHVybiA0NTY7XHJcbiAgfVxyXG59OyJdLCJuYW1lcyI6WyJmaXh1cFJpc2tDb25maWdEYXRhIiwiZGF0YSIsIngiXSwibWFwcGluZ3MiOiJBQUFBLG1EQUlDLEdBQ0QsT0FBTyxNQUFNQSxzQkFBc0IsQ0FBQ0MsT0FBb0M7SUFDdEUsSUFBSUMsR0FBRztRQUNMLE9BQU87SUFDVCxPQUFPO1FBQ0wsT0FBTztJQUNULENBQUM7QUFDSCxFQUFFIn0=) and [after](https://evanw.github.io/source-map-visualization/#ODIyAC8qKgogKiBmb28KICogQHBhcmFtIGRhdGEgZm9vCiAqIEByZXR1cm5zIGZvbwogKi8gZXhwb3J0IHZhciBmaXh1cFJpc2tDb25maWdEYXRhID0gZnVuY3Rpb24oZGF0YSkgewogICAgaWYgKHgpIHsKICAgICAgICByZXR1cm4gMTIzOwogICAgfSBlbHNlIHsKICAgICAgICByZXR1cm4gNDU2OwogICAgfQp9OwoKLy8jIHNvdXJjZU1hcHBpbmdVUkw9ZGF0YTphcHBsaWNhdGlvbi9qc29uO2Jhc2U2NCxleUoyWlhKemFXOXVJam96TENKemIzVnlZMlZ6SWpwYklpNHZZWEJ3TG1weklsMHNJbk52ZFhKalpYTkRiMjUwWlc1MElqcGJJaThxS2x4eVhHNGdLaUJtYjI5Y2NseHVJQ29nUUhCaGNtRnRJR1JoZEdFZ1ptOXZYSEpjYmlBcUlFQnlaWFIxY201eklHWnZiMXh5WEc0Z0tpOWNjbHh1Wlhod2IzSjBJR052Ym5OMElHWnBlSFZ3VW1semEwTnZibVpwWjBSaGRHRWdQU0FvWkdGMFlUb2dZVzU1S1RvZ2RIbHdaWE11VW1semEwTnZibVpwWjFSNWNHVWdQVDRnZTF4eVhHNGdJR2xtSUNoNEtTQjdYSEpjYmlBZ0lDQnlaWFIxY200Z01USXpPMXh5WEc0Z0lIMGdaV3h6WlNCN1hISmNiaUFnSUNCeVpYUjFjbTRnTkRVMk8xeHlYRzRnSUgxY2NseHVmVHNpWFN3aWJtRnRaWE1pT2xzaVptbDRkWEJTYVhOclEyOXVabWxuUkdGMFlTSXNJbVJoZEdFaUxDSjRJbDBzSW0xaGNIQnBibWR6SWpvaVFVRkJRVHM3T3p0RFFVbERMRWRCUTBRc1QwRkJUeXhKUVVGTlFTeHpRa0ZCYzBJc1UwRkJRME1zVFVGQmIwTTdTVUZEZEVVc1NVRkJTVU1zUjBGQlJ6dFJRVU5NTEU5QlFVODdTVUZEVkN4UFFVRlBPMUZCUTB3c1QwRkJUenRKUVVOVUxFTkJRVU03UVVGRFNDeEZRVUZGSW4wPTQ0NgB7InZlcnNpb24iOjMsInNvdXJjZXMiOlsiLi9hcHAuanMiXSwic291cmNlc0NvbnRlbnQiOlsiLyoqXHJcbiAqIGZvb1xyXG4gKiBAcGFyYW0gZGF0YSBmb29cclxuICogQHJldHVybnMgZm9vXHJcbiAqL1xyXG5leHBvcnQgY29uc3QgZml4dXBSaXNrQ29uZmlnRGF0YSA9IChkYXRhOiBhbnkpOiB0eXBlcy5SaXNrQ29uZmlnVHlwZSA9PiB7XHJcbiAgaWYgKHgpIHtcclxuICAgIHJldHVybiAxMjM7XHJcbiAgfSBlbHNlIHtcclxuICAgIHJldHVybiA0NTY7XHJcbiAgfVxyXG59OyJdLCJuYW1lcyI6WyJmaXh1cFJpc2tDb25maWdEYXRhIiwiZGF0YSIsIngiXSwibWFwcGluZ3MiOiJBQUFBOzs7O0NBSUMsR0FDRCxPQUFPLElBQU1BLHNCQUFzQixTQUFDQyxNQUFvQztJQUN0RSxJQUFJQyxHQUFHO1FBQ0wsT0FBTztJQUNULE9BQU87UUFDTCxPQUFPO0lBQ1QsQ0FBQztBQUNILEVBQUUifQ==) **Related issue:** - Closes https://github.com/swc-project/swc/issues/6694. --- crates/swc/tests/source_map.rs | 85 ++++++++++++++++ crates/swc_common/src/syntax_pos.rs | 3 + .../src/syntax_pos/analyze_source_file.rs | 22 ++++- .../src/text_writer/basic_impl.rs | 99 +++++++++---------- 4 files changed, 157 insertions(+), 52 deletions(-) diff --git a/crates/swc/tests/source_map.rs b/crates/swc/tests/source_map.rs index 9a3f0331a207..212551317c6c 100644 --- a/crates/swc/tests/source_map.rs +++ b/crates/swc/tests/source_map.rs @@ -16,6 +16,7 @@ use swc::{ }, Compiler, }; +use swc_ecma_parser::Syntax; use testing::{assert_eq, NormalizedOutput, StdErr, Tester}; use walkdir::WalkDir; @@ -426,3 +427,87 @@ fn issue_4578() { ) .unwrap(); } + +#[test] +fn issue_6694() { + Tester::new().print_errors(|cm, handler| { + let c = Compiler::new(cm.clone()); + let fm = cm.new_source_file( + swc_common::FileName::Real("./app.js".into()), + r#"/** + * foo + * @param data foo + * @returns foo + */ +export const fixupRiskConfigData = (data: any): types.RiskConfigType => { + if (x) { + return 123; + } else { + return 456; + } +};"# + .replace('\n', "\r\n"), + ); + let result = c.process_js_file( + fm, + &handler, + &Options { + swcrc: false, + source_maps: Some(SourceMapsConfig::Bool(true)), + config: Config { + jsc: JscConfig { + target: Some(swc_ecma_ast::EsVersion::Es5), + syntax: Some(Syntax::Typescript(Default::default())), + ..Default::default() + }, + is_module: IsModule::Bool(true), + inline_sources_content: true.into(), + emit_source_map_columns: true.into(), + ..Default::default() + }, + ..Default::default() + }, + ); + + fn line_col(needle: &str, haystack: &str) -> Option<(u32, u32)> { + let lines = haystack.lines().enumerate(); + for (i, line) in lines { + if let Some(c) = line.find(needle) { + return Some((i as _, c as _)); + } + } + + None + } + + match result { + Ok(result) => { + assert!(result.map.is_some()); + let map = result.map.unwrap(); + + let source_map = sourcemap::SourceMap::from_slice(map.as_bytes()) + .expect("failed to deserialize sourcemap"); + + // "export" + let export_line_col = + line_col("export", &result.code).expect("failed to find `export`"); + let token = source_map + .lookup_token(export_line_col.0, export_line_col.1) + .expect("failed to find token"); + assert_eq!(token.get_src(), (5, 0)); + + // "if" + let if_line_col = line_col("if", &result.code).expect("failed to find `export`"); + let token = source_map + .lookup_token(if_line_col.0, export_line_col.1) + .expect("failed to find token"); + assert_eq!(token.get_src(), (6, 2)); + } + Err(err) => { + panic!("Error: {:#?}", err); + } + } + + Ok(()) + }); +} diff --git a/crates/swc_common/src/syntax_pos.rs b/crates/swc_common/src/syntax_pos.rs index 97b6fed1c595..31a4175958e4 100644 --- a/crates/swc_common/src/syntax_pos.rs +++ b/crates/swc_common/src/syntax_pos.rs @@ -1220,7 +1220,10 @@ pub struct LineInfo { /// Used to create a `.map` file. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub struct LineCol { + /// Index of line, starting from 0. pub line: u32, + + /// UTF-16 column in line, starting from 0. pub col: u32, } diff --git a/crates/swc_common/src/syntax_pos/analyze_source_file.rs b/crates/swc_common/src/syntax_pos/analyze_source_file.rs index e68b51b544b3..d34bd9d71c2e 100644 --- a/crates/swc_common/src/syntax_pos/analyze_source_file.rs +++ b/crates/swc_common/src/syntax_pos/analyze_source_file.rs @@ -282,11 +282,29 @@ mod tests { non_narrow_chars: vec![(2 + 1000, 4), (24 + 1000, 0)], ); + test!( + case: unix_lf, + text: "/**\n * foo\n */\n012345678\nabcdef012345678\na", + source_file_start_pos: 0, + lines: vec![0, 4, 11, 15, 25, 41], + multi_byte_chars: vec![], + non_narrow_chars: vec![], + ); + + test!( + case: windows_cr, + text: "/**\r * foo\r */\r012345678\rabcdef012345678\ra", + source_file_start_pos: 0, + lines: vec![0, 4, 11, 15, 25, 41], + multi_byte_chars: vec![], + non_narrow_chars: vec![], + ); + test!( case: windows_crlf, - text: "012345678\r\nabcdef012345678\r\na", + text: "/**\r\n * foo\r\n */\r\n012345678\r\nabcdef012345678\r\na", source_file_start_pos: 0, - lines: vec![0, 11, 28], + lines: vec![0, 5, 13, 18, 29, 46], multi_byte_chars: vec![], non_narrow_chars: vec![], ); diff --git a/crates/swc_ecma_codegen/src/text_writer/basic_impl.rs b/crates/swc_ecma_codegen/src/text_writer/basic_impl.rs index 681850a7bc3a..09ab4a487c40 100644 --- a/crates/swc_ecma_codegen/src/text_writer/basic_impl.rs +++ b/crates/swc_ecma_codegen/src/text_writer/basic_impl.rs @@ -51,6 +51,9 @@ impl<'a, W: Write> JsWriter<'a, W> { for _ in 0..self.indent { self.raw_write(INDENT)?; } + if self.srcmap.is_some() { + self.line_pos += INDENT.len() * self.indent; + } Ok(()) } @@ -59,11 +62,7 @@ impl<'a, W: Write> JsWriter<'a, W> { fn raw_write(&mut self, data: &str) -> Result { // #[cfg(debug_assertions)] // tracing::trace!("Write: `{}`", data); - self.wr.write_all(data.as_bytes())?; - if self.srcmap.is_some() { - self.line_pos += data.chars().count(); - } Ok(()) } @@ -86,6 +85,7 @@ impl<'a, W: Write> JsWriter<'a, W> { } self.raw_write(data)?; + self.update_pos(data); if let Some(span) = span { self.srcmap(span.hi()); @@ -95,6 +95,21 @@ impl<'a, W: Write> JsWriter<'a, W> { Ok(()) } + #[inline] + fn update_pos(&mut self, s: &str) { + if self.srcmap.is_some() { + let line_start_of_s = compute_line_starts(s); + self.line_count += line_start_of_s.line_count; + + let chars = s[line_start_of_s.byte_pos..].encode_utf16().count(); + if line_start_of_s.line_count > 0 { + self.line_pos = chars; + } else { + self.line_pos += chars; + } + } + } + #[inline] fn srcmap(&mut self, byte_pos: BytePos) { if byte_pos.is_dummy() && byte_pos != BytePos(u32::MAX) { @@ -183,8 +198,10 @@ impl<'a, W: Write> WriteJs for JsWriter<'a, W> { let pending = self.pending_srcmap.take(); if !self.line_start { self.raw_write(self.new_line)?; - self.line_count += 1; - self.line_pos = 0; + if self.srcmap.is_some() { + self.line_count += 1; + self.line_pos = 0; + } self.line_start = true; if let Some(pending) = pending { @@ -200,18 +217,7 @@ impl<'a, W: Write> WriteJs for JsWriter<'a, W> { fn write_lit(&mut self, span: Span, s: &str) -> Result { if !s.is_empty() { self.srcmap(span.lo()); - self.write(None, s)?; - - if self.srcmap.is_some() { - let line_start_of_s = compute_line_starts(s); - if line_start_of_s.len() > 1 { - self.line_count = self.line_count + line_start_of_s.len() - 1; - let last_line_byte_index = line_start_of_s.last().cloned().unwrap_or(0); - self.line_pos = s[last_line_byte_index..].chars().count(); - } - } - self.srcmap(span.hi()); } @@ -222,14 +228,6 @@ impl<'a, W: Write> WriteJs for JsWriter<'a, W> { #[cfg_attr(debug_assertions, tracing::instrument(skip_all))] fn write_comment(&mut self, s: &str) -> Result { self.write(None, s)?; - if self.srcmap.is_some() { - let line_start_of_s = compute_line_starts(s); - if line_start_of_s.len() > 1 { - self.line_count = self.line_count + line_start_of_s.len() - 1; - let last_line_byte_index = line_start_of_s.last().cloned().unwrap_or(0); - self.line_pos = s[last_line_byte_index..].chars().count(); - } - } Ok(()) } @@ -239,16 +237,6 @@ impl<'a, W: Write> WriteJs for JsWriter<'a, W> { if !s.is_empty() { self.srcmap(span.lo()); self.write(None, s)?; - - if self.srcmap.is_some() { - let line_start_of_s = compute_line_starts(s); - if line_start_of_s.len() > 1 { - self.line_count = self.line_count + line_start_of_s.len() - 1; - let last_line_byte_index = line_start_of_s.last().cloned().unwrap_or(0); - self.line_pos = s[last_line_byte_index..].chars().count(); - } - } - self.srcmap(span.hi()); } @@ -285,10 +273,12 @@ impl<'a, W: Write> WriteJs for JsWriter<'a, W> { #[inline] #[cfg_attr(debug_assertions, tracing::instrument(skip_all))] fn add_srcmap(&mut self, pos: BytePos) -> Result { - if self.line_start { - self.pending_srcmap = Some(pos); - } else { - self.srcmap(pos); + if self.srcmap.is_some() { + if self.line_start { + self.pending_srcmap = Some(pos); + } else { + self.srcmap(pos); + } } Ok(()) } @@ -300,23 +290,31 @@ impl<'a, W: Write> WriteJs for JsWriter<'a, W> { } } -fn compute_line_starts(s: &str) -> Vec { - let mut res = vec![]; - +#[derive(Debug)] +struct LineStart { + line_count: usize, + byte_pos: usize, +} +fn compute_line_starts(s: &str) -> LineStart { + let mut count = 0; let mut line_start = 0; - let mut chars = s.char_indices().peekable(); + let mut chars = s.as_bytes().iter().enumerate().peekable(); while let Some((pos, c)) = chars.next() { match c { - '\r' => { - if let Some(&(_, '\n')) = chars.peek() { + b'\r' => { + count += 1; + if let Some(&(_, b'\n')) = chars.peek() { let _ = chars.next(); + line_start = pos + 2 + } else { + line_start = pos + 1 } } - '\n' => { - res.push(line_start); + b'\n' => { + count += 1; line_start = pos + 1; } @@ -324,7 +322,8 @@ fn compute_line_starts(s: &str) -> Vec { } } - // Last line. - res.push(line_start); - res + LineStart { + line_count: count, + byte_pos: line_start, + } }