From ef0c185ce28375bd9855e43d2b3f3af45d321e2b Mon Sep 17 00:00:00 2001 From: Justin Ridgewell Date: Sat, 3 Dec 2022 17:40:46 -0500 Subject: [PATCH 1/4] Fix UTF-8 -> UTF-16 calculation There were a few issues in the old code: 1. UTF-8 maps 1-3 bytes into 1 UTF-16 char, but 4 bytes into 2 UTF-16 chars 2. The starting offset was not recorded when we end the `multibyte_chars` iteration 3. The `mappings` can be unordered, meaning we need to restart UTF-16 offset calculation --- .../issue-6552/input-map/input/.swcrc | 20 ++++ .../issue-6552/input-map/input/index.js | 4 + .../issue-6552/input-map/output/index.js | 1 + .../issue-6552/input-map/output/index.map | 17 +++ .../sourcemap/issue-6552/no-map/input/.swcrc | 20 ++++ .../issue-6552/no-map/input/index.js | 4 + .../issue-6552/no-map/output/index.js | 1 + .../issue-6552/no-map/output/index.map | 17 +++ crates/swc_common/src/source_map.rs | 110 +++++++++++++++--- crates/swc_estree_compat/src/babelify/mod.rs | 26 +++-- 10 files changed, 190 insertions(+), 30 deletions(-) create mode 100644 crates/swc/tests/fixture/sourcemap/issue-6552/input-map/input/.swcrc create mode 100644 crates/swc/tests/fixture/sourcemap/issue-6552/input-map/input/index.js create mode 100644 crates/swc/tests/fixture/sourcemap/issue-6552/input-map/output/index.js create mode 100644 crates/swc/tests/fixture/sourcemap/issue-6552/input-map/output/index.map create mode 100644 crates/swc/tests/fixture/sourcemap/issue-6552/no-map/input/.swcrc create mode 100644 crates/swc/tests/fixture/sourcemap/issue-6552/no-map/input/index.js create mode 100644 crates/swc/tests/fixture/sourcemap/issue-6552/no-map/output/index.js create mode 100644 crates/swc/tests/fixture/sourcemap/issue-6552/no-map/output/index.map diff --git a/crates/swc/tests/fixture/sourcemap/issue-6552/input-map/input/.swcrc b/crates/swc/tests/fixture/sourcemap/issue-6552/input-map/input/.swcrc new file mode 100644 index 000000000000..84bd4c6fcaa0 --- /dev/null +++ b/crates/swc/tests/fixture/sourcemap/issue-6552/input-map/input/.swcrc @@ -0,0 +1,20 @@ +{ + "sourceMaps": true, + "jsc": { + "parser": { + "syntax": "ecmascript", + "jsx": false + }, + "target": "es5", + "loose": false, + "minify": { + "compress": false, + "mangle": false + } + }, + "module": { + "type": "commonjs" + }, + "minify": true, + "isModule": true +} diff --git a/crates/swc/tests/fixture/sourcemap/issue-6552/input-map/input/index.js b/crates/swc/tests/fixture/sourcemap/issue-6552/input-map/input/index.js new file mode 100644 index 000000000000..5efd35bfca96 --- /dev/null +++ b/crates/swc/tests/fixture/sourcemap/issue-6552/input-map/input/index.js @@ -0,0 +1,4 @@ +const xxx = ', something'; +console.error(`❌ ${message}`); +const bbb = ''; +//# sourceMappingURL=data:application/json;charset=utf-8;base64,eyJ2ZXJzaW9uIjozLCJuYW1lcyI6WyJ4eHgiLCJjb25zb2xlIiwiZXJyb3IiLCJtZXNzYWdlIiwiYmJiIl0sInNvdXJjZXMiOlsidW5rbm93biJdLCJzb3VyY2VzQ29udGVudCI6WyJjb25zdCB4eHggPSAnLCBzb21ldGhpbmcnXG5jb25zb2xlLmVycm9yKGDinYwgJHttZXNzYWdlfWApO1xuXG5jb25zdCBiYmIgPSAnJ1xuIl0sIm1hcHBpbmdzIjoiQUFBQSxNQUFNQSxHQUFHLEdBQUcsYUFBWjtBQUNBQyxPQUFPLENBQUNDLEtBQVIsQ0FBZSxLQUFJQyxPQUFRLEVBQTNCO0FBRUEsTUFBTUMsR0FBRyxHQUFHLEVBQVoifQ== diff --git a/crates/swc/tests/fixture/sourcemap/issue-6552/input-map/output/index.js b/crates/swc/tests/fixture/sourcemap/issue-6552/input-map/output/index.js new file mode 100644 index 000000000000..06857c462ead --- /dev/null +++ b/crates/swc/tests/fixture/sourcemap/issue-6552/input-map/output/index.js @@ -0,0 +1 @@ +"use strict";var xxx=", something";console.error("❌ ".concat(message));var bbb=""; diff --git a/crates/swc/tests/fixture/sourcemap/issue-6552/input-map/output/index.map b/crates/swc/tests/fixture/sourcemap/issue-6552/input-map/output/index.map new file mode 100644 index 000000000000..ceabf60c9144 --- /dev/null +++ b/crates/swc/tests/fixture/sourcemap/issue-6552/input-map/output/index.map @@ -0,0 +1,17 @@ +{ + "mappings": "AAAA,aAAA,IAAMA,IAAM,cACZC,QAAQC,KAAK,CAAC,AAAC,KAAY,OAARC,UACnB,IAAMC,IAAM", + "names": [ + "xxx", + "console", + "error", + "message", + "bbb" + ], + "sources": [ + "../../input/index.js" + ], + "sourcesContent": [ + "const xxx = ', something';\nconsole.error(`❌ ${message}`);\nconst bbb = '';\n//# sourceMappingURL=data:application/json;charset=utf-8;base64,eyJ2ZXJzaW9uIjozLCJuYW1lcyI6WyJ4eHgiLCJjb25zb2xlIiwiZXJyb3IiLCJtZXNzYWdlIiwiYmJiIl0sInNvdXJjZXMiOlsidW5rbm93biJdLCJzb3VyY2VzQ29udGVudCI6WyJjb25zdCB4eHggPSAnLCBzb21ldGhpbmcnXG5jb25zb2xlLmVycm9yKGDinYwgJHttZXNzYWdlfWApO1xuXG5jb25zdCBiYmIgPSAnJ1xuIl0sIm1hcHBpbmdzIjoiQUFBQSxNQUFNQSxHQUFHLEdBQUcsYUFBWjtBQUNBQyxPQUFPLENBQUNDLEtBQVIsQ0FBZSxLQUFJQyxPQUFRLEVBQTNCO0FBRUEsTUFBTUMsR0FBRyxHQUFHLEVBQVoifQ==\n" + ], + "version": 3 +} diff --git a/crates/swc/tests/fixture/sourcemap/issue-6552/no-map/input/.swcrc b/crates/swc/tests/fixture/sourcemap/issue-6552/no-map/input/.swcrc new file mode 100644 index 000000000000..84bd4c6fcaa0 --- /dev/null +++ b/crates/swc/tests/fixture/sourcemap/issue-6552/no-map/input/.swcrc @@ -0,0 +1,20 @@ +{ + "sourceMaps": true, + "jsc": { + "parser": { + "syntax": "ecmascript", + "jsx": false + }, + "target": "es5", + "loose": false, + "minify": { + "compress": false, + "mangle": false + } + }, + "module": { + "type": "commonjs" + }, + "minify": true, + "isModule": true +} diff --git a/crates/swc/tests/fixture/sourcemap/issue-6552/no-map/input/index.js b/crates/swc/tests/fixture/sourcemap/issue-6552/no-map/input/index.js new file mode 100644 index 000000000000..d3eeaa6cd519 --- /dev/null +++ b/crates/swc/tests/fixture/sourcemap/issue-6552/no-map/input/index.js @@ -0,0 +1,4 @@ +const xxx = ', something' +console.error(`❌ ${message}`); + +const bbb = '' diff --git a/crates/swc/tests/fixture/sourcemap/issue-6552/no-map/output/index.js b/crates/swc/tests/fixture/sourcemap/issue-6552/no-map/output/index.js new file mode 100644 index 000000000000..06857c462ead --- /dev/null +++ b/crates/swc/tests/fixture/sourcemap/issue-6552/no-map/output/index.js @@ -0,0 +1 @@ +"use strict";var xxx=", something";console.error("❌ ".concat(message));var bbb=""; diff --git a/crates/swc/tests/fixture/sourcemap/issue-6552/no-map/output/index.map b/crates/swc/tests/fixture/sourcemap/issue-6552/no-map/output/index.map new file mode 100644 index 000000000000..7f2baf65599f --- /dev/null +++ b/crates/swc/tests/fixture/sourcemap/issue-6552/no-map/output/index.map @@ -0,0 +1,17 @@ +{ + "mappings": "AAAA,aAAA,IAAMA,IAAM,cACZC,QAAQC,KAAK,CAAC,AAAC,KAAY,OAARC,UAEnB,IAAMC,IAAM", + "names": [ + "xxx", + "console", + "error", + "message", + "bbb" + ], + "sources": [ + "../../input/index.js" + ], + "sourcesContent": [ + "const xxx = ', something'\nconsole.error(`❌ ${message}`);\n\nconst bbb = ''\n" + ], + "version": 3 +} diff --git a/crates/swc_common/src/source_map.rs b/crates/swc_common/src/source_map.rs index 7fc14f8bb674..36cfc9c0ddd8 100644 --- a/crates/swc_common/src/source_map.rs +++ b/crates/swc_common/src/source_map.rs @@ -17,9 +17,7 @@ //! within the SourceMap, which upon request can be converted to line and column //! information, source code snippets, etc. use std::{ - cmp, - cmp::{max, min}, - env, fs, + cmp, env, fs, hash::Hash, io, path::{Path, PathBuf}, @@ -295,8 +293,7 @@ impl SourceMap { ); let linechpos = self.bytepos_to_file_charpos_with(&f, linebpos); - - let col = max(chpos, linechpos) - min(chpos, linechpos); + let col = chpos - linechpos; let col_display = { let start_width_idx = f @@ -954,7 +951,7 @@ impl SourceMap { } fn bytepos_to_file_charpos_with(&self, map: &SourceFile, bpos: BytePos) -> CharPos { - let total_extra_bytes = self.calc_extra_bytes(map, &mut 0, &mut 0, bpos); + let total_extra_bytes = self.calc_utf16_offset(map, &mut 0, &mut 0, bpos); assert!( map.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32(), "map.start_pos = {:?}; total_extra_bytes = {}; bpos = {:?}", @@ -966,7 +963,7 @@ impl SourceMap { } /// Converts an absolute BytePos to a CharPos relative to the source_file. - fn calc_extra_bytes( + pub fn calc_utf16_offset( &self, map: &SourceFile, prev_total_extra_bytes: &mut u32, @@ -975,13 +972,18 @@ impl SourceMap { ) -> u32 { // The number of extra bytes due to multibyte chars in the SourceFile let mut total_extra_bytes = *prev_total_extra_bytes; + let mut i = *start; - for (i, &mbc) in map.multibyte_chars[*start..].iter().enumerate() { + for &mbc in map.multibyte_chars[i..].iter() { debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos); if mbc.pos < bpos { - // every character is at least one byte, so we only - // count the actual extra bytes. - total_extra_bytes += mbc.bytes as u32 - 1; + // 1, 2, and 3 UTF-8 bytes maps to 1 UTF-16 char, but 4 UTF-8 + // bytes maps to 2. + total_extra_bytes += if mbc.bytes == 4 { + 2 + } else { + mbc.bytes as u32 - 1 + }; // We should never see a byte position in the middle of a // character debug_assert!( @@ -991,13 +993,14 @@ impl SourceMap { mbc.pos, mbc.bytes ); + i += 1; } else { - *start += i; break; } } *prev_total_extra_bytes = total_extra_bytes; + *start = i; total_extra_bytes } @@ -1197,6 +1200,9 @@ impl SourceMap { let mut line_ch_start = 0; let mut inline_sources_content = false; + let mut prev_bpos = BytePos(0); + let mut prev_linebpos = BytePos(0); + for (pos, lc) in mappings.iter() { let pos = *pos; @@ -1235,6 +1241,9 @@ impl SourceMap { line_prev_extra_bytes = 0; line_ch_start = 0; + prev_bpos = BytePos(0); + prev_linebpos = BytePos(0); + cur_file = Some(f.clone()); &f } @@ -1253,7 +1262,6 @@ impl SourceMap { Some(line) => line as u32, None => continue, }; - let mut name = config.name_for_bytepos(pos); let linebpos = f.lines[line as usize]; debug_assert!( @@ -1263,18 +1271,43 @@ impl SourceMap { pos, linebpos, ); - let chpos = - pos.to_u32() - self.calc_extra_bytes(f, &mut prev_extra_bytes, &mut ch_start, pos); + // TODO: mappings really should be ordered, but it's not. + // debug_assert!(line >= prev_line); + if linebpos < prev_linebpos { + line_prev_extra_bytes = 0; + line_ch_start = 0; + } + prev_linebpos = linebpos; + let linechpos = linebpos.to_u32() - - self.calc_extra_bytes( + - self.calc_utf16_offset( f, &mut line_prev_extra_bytes, &mut line_ch_start, linebpos, ); - let mut col = max(chpos, linechpos) - min(chpos, linechpos); + // TODO: mappings really should be ordered, but it's not. + // debug_assert(pos >= prev_bpos); + if pos < prev_bpos { + prev_extra_bytes = line_prev_extra_bytes; + ch_start = line_ch_start; + } + prev_bpos = pos; + + let chpos = + pos.to_u32() - self.calc_utf16_offset(f, &mut prev_extra_bytes, &mut ch_start, pos); + + debug_assert!( + chpos >= linechpos, + "{}: chpos = {:?}; linechpos = {:?};", + f.name, + chpos, + linechpos, + ); + let mut col = chpos - linechpos; + let mut name = None; if let Some(orig) = &orig { if let Some(token) = orig .lookup_token(line, col) @@ -1298,7 +1331,9 @@ impl SourceMap { } } - let name_idx = name.map(|name| builder.add_name(name)); + let name_idx = name + .or_else(|| config.name_for_bytepos(pos)) + .map(|name| builder.add_name(name)); builder.add_raw(lc.line, lc.col, line, col, Some(src_id), name_idx); prev_dst_line = lc.line; @@ -1653,6 +1688,45 @@ mod tests { assert!(sm.merge_spans(span1, span2).is_none()); } + #[test] + fn calc_utf16_offset() { + let input = "t¢e∆s💩t"; + let sm = SourceMap::new(FilePathMapping::empty()); + let file = sm.new_source_file(PathBuf::from("blork.rs").into(), input.to_string()); + + let mut prev_extra_bytes = 0_u32; + let mut start = 0; + let mut bpos = file.start_pos; + let mut cpos = CharPos(bpos.to_usize()); + for c in input.chars() { + let actual = bpos.to_u32() + - sm.calc_utf16_offset(&file, &mut prev_extra_bytes, &mut start, bpos); + + assert_eq!(actual, cpos.to_u32()); + + bpos = bpos + BytePos(c.len_utf8() as u32); + cpos = cpos + CharPos(c.len_utf16()); + } + } + + #[test] + fn bytepos_to_charpos() { + let input = "t¢e∆s💩t"; + let sm = SourceMap::new(FilePathMapping::empty()); + let file = sm.new_source_file(PathBuf::from("blork.rs").into(), input.to_string()); + + let mut bpos = file.start_pos; + let mut cpos = CharPos(0); + for c in input.chars() { + let actual = sm.bytepos_to_file_charpos_with(&file, bpos); + + assert_eq!(actual, cpos); + + bpos = bpos + BytePos(c.len_utf8() as u32); + cpos = cpos + CharPos(c.len_utf16()); + } + } + /// Returns the span corresponding to the `n`th occurrence of /// `substring` in `source_text`. trait SourceMapExtension { diff --git a/crates/swc_estree_compat/src/babelify/mod.rs b/crates/swc_estree_compat/src/babelify/mod.rs index cb9cd18f3819..9c82dfb5c8c2 100644 --- a/crates/swc_estree_compat/src/babelify/mod.rs +++ b/crates/swc_estree_compat/src/babelify/mod.rs @@ -4,6 +4,7 @@ use rayon::prelude::*; use serde::{de::DeserializeOwned, Serialize}; use swc_common::{ comments::{CommentKind, Comments}, + source_map::Pos, sync::Lrc, BytePos, SourceFile, SourceMap, Span, }; @@ -43,18 +44,19 @@ impl Context { // We rename this to feel more comfortable while doing math. let start_offset = self.fm.start_pos; - let mut start = span.lo.0 - start_offset.0; - let mut end = span.hi.0 - start_offset.0; - - for mb in self.fm.multibyte_chars.iter() { - if mb.pos < span.lo { - start -= (mb.bytes - 1) as u32; - } - - if mb.pos < span.hi { - end -= (mb.bytes - 1) as u32; - } - } + let mut prev_extra_bytes = 0; + let mut ch_start = 0; + + let start = span.lo.to_u32() + - start_offset.to_u32() + - self + .cm + .calc_utf16_offset(&self.fm, &mut prev_extra_bytes, &mut ch_start, span.lo); + let end = span.hi.to_u32() + - start_offset.to_u32() + - self + .cm + .calc_utf16_offset(&self.fm, &mut prev_extra_bytes, &mut ch_start, span.hi); (Some(start), Some(end)) } From 8ff64941f8f83171dcaa372b7732af8020ef851f Mon Sep 17 00:00:00 2001 From: Justin Ridgewell Date: Sat, 3 Dec 2022 23:00:18 -0500 Subject: [PATCH 2/4] Implement reverse conversion --- crates/swc_common/src/source_map.rs | 160 +++++++++++-------- crates/swc_common/src/syntax_pos.rs | 19 ++- crates/swc_estree_compat/src/babelify/mod.rs | 17 +- 3 files changed, 110 insertions(+), 86 deletions(-) diff --git a/crates/swc_common/src/source_map.rs b/crates/swc_common/src/source_map.rs index 36cfc9c0ddd8..67e6d669ac6a 100644 --- a/crates/swc_common/src/source_map.rs +++ b/crates/swc_common/src/source_map.rs @@ -951,7 +951,7 @@ impl SourceMap { } fn bytepos_to_file_charpos_with(&self, map: &SourceFile, bpos: BytePos) -> CharPos { - let total_extra_bytes = self.calc_utf16_offset(map, &mut 0, &mut 0, bpos); + let total_extra_bytes = self.calc_utf16_offset(map, bpos, &mut Default::default()); assert!( map.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32(), "map.start_pos = {:?}; total_extra_bytes = {}; bpos = {:?}", @@ -962,28 +962,43 @@ impl SourceMap { CharPos(bpos.to_usize() - map.start_pos.to_usize() - total_extra_bytes as usize) } - /// Converts an absolute BytePos to a CharPos relative to the source_file. - pub fn calc_utf16_offset( + /// Converts a span of absolute BytePos to a CharPos relative to the + /// source_file. + pub fn span_to_char_offset(&self, file: &SourceFile, span: Span) -> (u32, u32) { + // We rename this to feel more comfortable while doing math. + let start_offset = file.start_pos; + + let mut state = ByteToCharPosState::default(); + let start = span.lo.to_u32() + - start_offset.to_u32() + - self.calc_utf16_offset(file, span.lo, &mut state); + let end = span.hi.to_u32() + - start_offset.to_u32() + - self.calc_utf16_offset(file, span.hi, &mut state); + + (start, end) + } + + /// Calculates the number of excess chars seen in the UTF-8 encoding of a + /// file compared with the UTF-16 encoding. + fn calc_utf16_offset( &self, - map: &SourceFile, - prev_total_extra_bytes: &mut u32, - start: &mut usize, + file: &SourceFile, bpos: BytePos, + state: &mut ByteToCharPosState, ) -> u32 { - // The number of extra bytes due to multibyte chars in the SourceFile - let mut total_extra_bytes = *prev_total_extra_bytes; - let mut i = *start; - - for &mbc in map.multibyte_chars[i..].iter() { - debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos); - if mbc.pos < bpos { - // 1, 2, and 3 UTF-8 bytes maps to 1 UTF-16 char, but 4 UTF-8 - // bytes maps to 2. - total_extra_bytes += if mbc.bytes == 4 { - 2 - } else { - mbc.bytes as u32 - 1 - }; + let mut total_extra_bytes = state.total_extra_bytes; + let mut index = state.mbc_index; + + if bpos >= state.pos { + let range = index..file.multibyte_chars.len(); + for i in range { + let mbc = &file.multibyte_chars[i]; + debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos); + if mbc.pos >= bpos { + break; + } + total_extra_bytes += mbc.byte_to_char_diff() as u32; // We should never see a byte position in the middle of a // character debug_assert!( @@ -993,14 +1008,32 @@ impl SourceMap { mbc.pos, mbc.bytes ); - i += 1; - } else { - break; + index = i; + } + } else { + let range = 0..index; + for i in range.rev() { + let mbc = &file.multibyte_chars[i]; + debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos); + if mbc.pos < bpos { + break; + } + total_extra_bytes -= mbc.byte_to_char_diff() as u32; + // We should never see a byte position in the middle of a + // character + debug_assert!( + bpos.to_u32() <= mbc.pos.to_u32(), + "bpos = {:?}, mbc.pos = {:?}", + bpos, + mbc.pos, + ); + index = i; } } - *prev_total_extra_bytes = total_extra_bytes; - *start = i; + state.pos = bpos; + state.total_extra_bytes = total_extra_bytes; + state.mbc_index = index; total_extra_bytes } @@ -1194,14 +1227,9 @@ impl SourceMap { let mut prev_dst_line = u32::MAX; - let mut prev_extra_bytes = 0; - let mut ch_start = 0; - let mut line_prev_extra_bytes = 0; - let mut line_ch_start = 0; let mut inline_sources_content = false; - - let mut prev_bpos = BytePos(0); - let mut prev_linebpos = BytePos(0); + let mut ch_state = ByteToCharPosState::default(); + let mut line_state = ByteToCharPosState::default(); for (pos, lc) in mappings.iter() { let pos = *pos; @@ -1235,14 +1263,8 @@ impl SourceMap { builder.set_source_contents(src_id, Some(&f.src)); } - prev_extra_bytes = 0; - ch_start = 0; - - line_prev_extra_bytes = 0; - line_ch_start = 0; - - prev_bpos = BytePos(0); - prev_linebpos = BytePos(0); + ch_state = ByteToCharPosState::default(); + line_state = ByteToCharPosState::default(); cur_file = Some(f.clone()); &f @@ -1271,32 +1293,10 @@ impl SourceMap { pos, linebpos, ); - // TODO: mappings really should be ordered, but it's not. - // debug_assert!(line >= prev_line); - if linebpos < prev_linebpos { - line_prev_extra_bytes = 0; - line_ch_start = 0; - } - prev_linebpos = linebpos; - - let linechpos = linebpos.to_u32() - - self.calc_utf16_offset( - f, - &mut line_prev_extra_bytes, - &mut line_ch_start, - linebpos, - ); - - // TODO: mappings really should be ordered, but it's not. - // debug_assert(pos >= prev_bpos); - if pos < prev_bpos { - prev_extra_bytes = line_prev_extra_bytes; - ch_start = line_ch_start; - } - prev_bpos = pos; - let chpos = - pos.to_u32() - self.calc_utf16_offset(f, &mut prev_extra_bytes, &mut ch_start, pos); + let linechpos = + linebpos.to_u32() - self.calc_utf16_offset(f, linebpos, &mut line_state); + let chpos = pos.to_u32() - self.calc_utf16_offset(f, pos, &mut ch_state); debug_assert!( chpos >= linechpos, @@ -1469,6 +1469,20 @@ impl SourceMapGenConfig for DefaultSourceMapGenConfig { } } +/// Stores the state of the last conversion between BytePos and CharPos. +#[derive(Debug, Clone, Default)] +pub struct ByteToCharPosState { + /// The last BytePos to convert. + pos: BytePos, + + /// The total number of extra chars in the UTF-8 encoding. + total_extra_bytes: u32, + + /// The index of the last MultiByteChar read to compute the extra bytes of + /// the last conversion. + mbc_index: usize, +} + // _____________________________________________________________________________ // Tests // @@ -1694,19 +1708,27 @@ mod tests { let sm = SourceMap::new(FilePathMapping::empty()); let file = sm.new_source_file(PathBuf::from("blork.rs").into(), input.to_string()); - let mut prev_extra_bytes = 0_u32; - let mut start = 0; + let mut state = ByteToCharPosState::default(); let mut bpos = file.start_pos; let mut cpos = CharPos(bpos.to_usize()); for c in input.chars() { - let actual = bpos.to_u32() - - sm.calc_utf16_offset(&file, &mut prev_extra_bytes, &mut start, bpos); + let actual = bpos.to_u32() - sm.calc_utf16_offset(&file, bpos, &mut state); + dbg!(&bpos, &cpos, &state); assert_eq!(actual, cpos.to_u32()); bpos = bpos + BytePos(c.len_utf8() as u32); cpos = cpos + CharPos(c.len_utf16()); } + + for c in input.chars().rev() { + bpos = bpos - BytePos(c.len_utf8() as u32); + cpos = cpos - CharPos(c.len_utf16()); + + let actual = bpos.to_u32() - sm.calc_utf16_offset(&file, bpos, &mut state); + + assert_eq!(actual, cpos.to_u32()); + } } #[test] diff --git a/crates/swc_common/src/syntax_pos.rs b/crates/swc_common/src/syntax_pos.rs index a9bb8bbb6fe4..97b6fed1c595 100644 --- a/crates/swc_common/src/syntax_pos.rs +++ b/crates/swc_common/src/syntax_pos.rs @@ -737,6 +737,21 @@ pub struct MultiByteChar { pub bytes: u8, } +impl MultiByteChar { + /// Computes the extra number of UTF-8 bytes necessary to encode a code + /// point, compared to UTF-16 encoding. + /// + /// 1, 2, and 3 UTF-8 bytes encode into 1 UTF-16 char, but 4 UTF-8 bytes + /// encode into 2. + pub fn byte_to_char_diff(&self) -> u8 { + if self.bytes == 4 { + 2 + } else { + self.bytes - 1 + } + } +} + /// Identifies an offset of a non-narrow character in a SourceFile #[cfg_attr( any(feature = "rkyv-impl", feature = "rkyv-bytecheck-impl"), @@ -1002,7 +1017,9 @@ pub trait Pos { /// - Values larger than `u32::MAX - 2^16` are reserved for the comments. /// /// `u32::MAX` is special value used to generate source map entries. -#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Serialize, Deserialize)] +#[derive( + Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug, Serialize, Deserialize, Default, +)] #[serde(transparent)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] #[cfg_attr( diff --git a/crates/swc_estree_compat/src/babelify/mod.rs b/crates/swc_estree_compat/src/babelify/mod.rs index 9c82dfb5c8c2..401c49b2bc9a 100644 --- a/crates/swc_estree_compat/src/babelify/mod.rs +++ b/crates/swc_estree_compat/src/babelify/mod.rs @@ -41,22 +41,7 @@ impl Context { return (None, None); } - // We rename this to feel more comfortable while doing math. - let start_offset = self.fm.start_pos; - - let mut prev_extra_bytes = 0; - let mut ch_start = 0; - - let start = span.lo.to_u32() - - start_offset.to_u32() - - self - .cm - .calc_utf16_offset(&self.fm, &mut prev_extra_bytes, &mut ch_start, span.lo); - let end = span.hi.to_u32() - - start_offset.to_u32() - - self - .cm - .calc_utf16_offset(&self.fm, &mut prev_extra_bytes, &mut ch_start, span.hi); + let (start, end) = self.cm.span_to_char_offset(&self.fm, span); (Some(start), Some(end)) } From 10bc02c5ba9bd0f2230dda29d500b17e02b250ef Mon Sep 17 00:00:00 2001 From: Justin Ridgewell Date: Sat, 3 Dec 2022 23:02:26 -0500 Subject: [PATCH 3/4] Remove debug statement --- crates/swc_common/src/source_map.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/swc_common/src/source_map.rs b/crates/swc_common/src/source_map.rs index 67e6d669ac6a..80234bfddc47 100644 --- a/crates/swc_common/src/source_map.rs +++ b/crates/swc_common/src/source_map.rs @@ -1714,7 +1714,6 @@ mod tests { for c in input.chars() { let actual = bpos.to_u32() - sm.calc_utf16_offset(&file, bpos, &mut state); - dbg!(&bpos, &cpos, &state); assert_eq!(actual, cpos.to_u32()); bpos = bpos + BytePos(c.len_utf8() as u32); From 8944cd3c8d54f0f36c544fb1db4611df53dc84aa Mon Sep 17 00:00:00 2001 From: Justin Ridgewell Date: Sat, 3 Dec 2022 23:22:12 -0500 Subject: [PATCH 4/4] Fixes --- crates/swc_common/src/source_map.rs | 4 ++-- crates/swc_estree_compat/src/babelify/mod.rs | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/swc_common/src/source_map.rs b/crates/swc_common/src/source_map.rs index 80234bfddc47..8029b6c57c61 100644 --- a/crates/swc_common/src/source_map.rs +++ b/crates/swc_common/src/source_map.rs @@ -1008,7 +1008,7 @@ impl SourceMap { mbc.pos, mbc.bytes ); - index = i; + index += 1; } } else { let range = 0..index; @@ -1027,7 +1027,7 @@ impl SourceMap { bpos, mbc.pos, ); - index = i; + index -= 1; } } diff --git a/crates/swc_estree_compat/src/babelify/mod.rs b/crates/swc_estree_compat/src/babelify/mod.rs index 401c49b2bc9a..c5713970f609 100644 --- a/crates/swc_estree_compat/src/babelify/mod.rs +++ b/crates/swc_estree_compat/src/babelify/mod.rs @@ -4,7 +4,6 @@ use rayon::prelude::*; use serde::{de::DeserializeOwned, Serialize}; use swc_common::{ comments::{CommentKind, Comments}, - source_map::Pos, sync::Lrc, BytePos, SourceFile, SourceMap, Span, };