Skip to content

Commit

Permalink
fix(common): Fix BytePos -> CharPos calculations (#6574)
Browse files Browse the repository at this point in the history
**Description:**

This fixes the BytePos -> CharPos calculation necessary for source maps.
There were a few issues in the old code:

1. UTF-8 maps 1-3 bytes into 1 UTF-16 char, but 4 bytes into 2 UTF-16 chars
2. The starting offset was not recorded when we reached the end of the `multibyte_chars` iteration
3. The `mappings` can be unordered, meaning we need to restart the UTF-16 offset calculation

**Related issue:**

 - Closes #6552.
  • Loading branch information
jridgewell committed Dec 4, 2022
1 parent 8bee06f commit a203fdb
Show file tree
Hide file tree
Showing 11 changed files with 243 additions and 61 deletions.
@@ -0,0 +1,20 @@
{
"sourceMaps": true,
"jsc": {
"parser": {
"syntax": "ecmascript",
"jsx": false
},
"target": "es5",
"loose": false,
"minify": {
"compress": false,
"mangle": false
}
},
"module": {
"type": "commonjs"
},
"minify": true,
"isModule": true
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

@@ -0,0 +1 @@
"use strict";var xxx=", something";console.error("❌ ".concat(message));var bbb="";
@@ -0,0 +1,17 @@
{
"mappings": "AAAA,aAAA,IAAMA,IAAM,cACZC,QAAQC,KAAK,CAAC,AAAC,KAAY,OAARC,UACnB,IAAMC,IAAM",
"names": [
"xxx",
"console",
"error",
"message",
"bbb"
],
"sources": [
"../../input/index.js"
],
"sourcesContent": [
"const xxx = ', something';\nconsole.error(`❌ ${message}`);\nconst bbb = '';\n//# sourceMappingURL=data:application/json;charset=utf-8;base64,eyJ2ZXJzaW9uIjozLCJuYW1lcyI6WyJ4eHgiLCJjb25zb2xlIiwiZXJyb3IiLCJtZXNzYWdlIiwiYmJiIl0sInNvdXJjZXMiOlsidW5rbm93biJdLCJzb3VyY2VzQ29udGVudCI6WyJjb25zdCB4eHggPSAnLCBzb21ldGhpbmcnXG5jb25zb2xlLmVycm9yKGDinYwgJHttZXNzYWdlfWApO1xuXG5jb25zdCBiYmIgPSAnJ1xuIl0sIm1hcHBpbmdzIjoiQUFBQSxNQUFNQSxHQUFHLEdBQUcsYUFBWjtBQUNBQyxPQUFPLENBQUNDLEtBQVIsQ0FBZSxLQUFJQyxPQUFRLEVBQTNCO0FBRUEsTUFBTUMsR0FBRyxHQUFHLEVBQVoifQ==\n"
],
"version": 3
}
20 changes: 20 additions & 0 deletions crates/swc/tests/fixture/sourcemap/issue-6552/no-map/input/.swcrc
@@ -0,0 +1,20 @@
{
"sourceMaps": true,
"jsc": {
"parser": {
"syntax": "ecmascript",
"jsx": false
},
"target": "es5",
"loose": false,
"minify": {
"compress": false,
"mangle": false
}
},
"module": {
"type": "commonjs"
},
"minify": true,
"isModule": true
}
@@ -0,0 +1,4 @@
const xxx = ', something'
console.error(`❌ ${message}`);

const bbb = ''
@@ -0,0 +1 @@
"use strict";var xxx=", something";console.error("❌ ".concat(message));var bbb="";
@@ -0,0 +1,17 @@
{
"mappings": "AAAA,aAAA,IAAMA,IAAM,cACZC,QAAQC,KAAK,CAAC,AAAC,KAAY,OAARC,UAEnB,IAAMC,IAAM",
"names": [
"xxx",
"console",
"error",
"message",
"bbb"
],
"sources": [
"../../input/index.js"
],
"sourcesContent": [
"const xxx = ', something'\nconsole.error(`❌ ${message}`);\n\nconst bbb = ''\n"
],
"version": 3
}
185 changes: 140 additions & 45 deletions crates/swc_common/src/source_map.rs
Expand Up @@ -17,9 +17,7 @@
//! within the SourceMap, which upon request can be converted to line and column
//! information, source code snippets, etc.
use std::{
cmp,
cmp::{max, min},
env, fs,
cmp, env, fs,
hash::Hash,
io,
path::{Path, PathBuf},
Expand Down Expand Up @@ -295,8 +293,7 @@ impl SourceMap {
);

let linechpos = self.bytepos_to_file_charpos_with(&f, linebpos);

let col = max(chpos, linechpos) - min(chpos, linechpos);
let col = chpos - linechpos;

let col_display = {
let start_width_idx = f
Expand Down Expand Up @@ -954,7 +951,7 @@ impl SourceMap {
}

fn bytepos_to_file_charpos_with(&self, map: &SourceFile, bpos: BytePos) -> CharPos {
let total_extra_bytes = self.calc_extra_bytes(map, &mut 0, &mut 0, bpos);
let total_extra_bytes = self.calc_utf16_offset(map, bpos, &mut Default::default());
assert!(
map.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32(),
"map.start_pos = {:?}; total_extra_bytes = {}; bpos = {:?}",
Expand All @@ -965,23 +962,43 @@ impl SourceMap {
CharPos(bpos.to_usize() - map.start_pos.to_usize() - total_extra_bytes as usize)
}

/// Converts an absolute BytePos to a CharPos relative to the source_file.
fn calc_extra_bytes(
/// Converts a span of absolute BytePos to a CharPos relative to the
/// source_file.
pub fn span_to_char_offset(&self, file: &SourceFile, span: Span) -> (u32, u32) {
// We rename this to feel more comfortable while doing math.
let start_offset = file.start_pos;

let mut state = ByteToCharPosState::default();
let start = span.lo.to_u32()
- start_offset.to_u32()
- self.calc_utf16_offset(file, span.lo, &mut state);
let end = span.hi.to_u32()
- start_offset.to_u32()
- self.calc_utf16_offset(file, span.hi, &mut state);

(start, end)
}

/// Calculates the number of excess chars seen in the UTF-8 encoding of a
/// file compared with the UTF-16 encoding.
fn calc_utf16_offset(
&self,
map: &SourceFile,
prev_total_extra_bytes: &mut u32,
start: &mut usize,
file: &SourceFile,
bpos: BytePos,
state: &mut ByteToCharPosState,
) -> u32 {
// The number of extra bytes due to multibyte chars in the SourceFile
let mut total_extra_bytes = *prev_total_extra_bytes;

for (i, &mbc) in map.multibyte_chars[*start..].iter().enumerate() {
debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
if mbc.pos < bpos {
// every character is at least one byte, so we only
// count the actual extra bytes.
total_extra_bytes += mbc.bytes as u32 - 1;
let mut total_extra_bytes = state.total_extra_bytes;
let mut index = state.mbc_index;

if bpos >= state.pos {
let range = index..file.multibyte_chars.len();
for i in range {
let mbc = &file.multibyte_chars[i];
debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
if mbc.pos >= bpos {
break;
}
total_extra_bytes += mbc.byte_to_char_diff() as u32;
// We should never see a byte position in the middle of a
// character
debug_assert!(
Expand All @@ -991,13 +1008,32 @@ impl SourceMap {
mbc.pos,
mbc.bytes
);
} else {
*start += i;
break;
index += 1;
}
} else {
let range = 0..index;
for i in range.rev() {
let mbc = &file.multibyte_chars[i];
debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
if mbc.pos < bpos {
break;
}
total_extra_bytes -= mbc.byte_to_char_diff() as u32;
// We should never see a byte position in the middle of a
// character
debug_assert!(
bpos.to_u32() <= mbc.pos.to_u32(),
"bpos = {:?}, mbc.pos = {:?}",
bpos,
mbc.pos,
);
index -= 1;
}
}

*prev_total_extra_bytes = total_extra_bytes;
state.pos = bpos;
state.total_extra_bytes = total_extra_bytes;
state.mbc_index = index;

total_extra_bytes
}
Expand Down Expand Up @@ -1191,11 +1227,9 @@ impl SourceMap {

let mut prev_dst_line = u32::MAX;

let mut prev_extra_bytes = 0;
let mut ch_start = 0;
let mut line_prev_extra_bytes = 0;
let mut line_ch_start = 0;
let mut inline_sources_content = false;
let mut ch_state = ByteToCharPosState::default();
let mut line_state = ByteToCharPosState::default();

for (pos, lc) in mappings.iter() {
let pos = *pos;
Expand Down Expand Up @@ -1229,11 +1263,8 @@ impl SourceMap {
builder.set_source_contents(src_id, Some(&f.src));
}

prev_extra_bytes = 0;
ch_start = 0;

line_prev_extra_bytes = 0;
line_ch_start = 0;
ch_state = ByteToCharPosState::default();
line_state = ByteToCharPosState::default();

cur_file = Some(f.clone());
&f
Expand All @@ -1253,7 +1284,6 @@ impl SourceMap {
Some(line) => line as u32,
None => continue,
};
let mut name = config.name_for_bytepos(pos);

let linebpos = f.lines[line as usize];
debug_assert!(
Expand All @@ -1263,18 +1293,21 @@ impl SourceMap {
pos,
linebpos,
);
let chpos =
pos.to_u32() - self.calc_extra_bytes(f, &mut prev_extra_bytes, &mut ch_start, pos);
let linechpos = linebpos.to_u32()
- self.calc_extra_bytes(
f,
&mut line_prev_extra_bytes,
&mut line_ch_start,
linebpos,
);

let mut col = max(chpos, linechpos) - min(chpos, linechpos);
let linechpos =
linebpos.to_u32() - self.calc_utf16_offset(f, linebpos, &mut line_state);
let chpos = pos.to_u32() - self.calc_utf16_offset(f, pos, &mut ch_state);

debug_assert!(
chpos >= linechpos,
"{}: chpos = {:?}; linechpos = {:?};",
f.name,
chpos,
linechpos,
);

let mut col = chpos - linechpos;
let mut name = None;
if let Some(orig) = &orig {
if let Some(token) = orig
.lookup_token(line, col)
Expand All @@ -1298,7 +1331,9 @@ impl SourceMap {
}
}

let name_idx = name.map(|name| builder.add_name(name));
let name_idx = name
.or_else(|| config.name_for_bytepos(pos))
.map(|name| builder.add_name(name));

builder.add_raw(lc.line, lc.col, line, col, Some(src_id), name_idx);
prev_dst_line = lc.line;
Expand Down Expand Up @@ -1434,6 +1469,20 @@ impl SourceMapGenConfig for DefaultSourceMapGenConfig {
}
}

/// Stores the state of the last conversion between BytePos and CharPos.
#[derive(Debug, Clone, Default)]
pub struct ByteToCharPosState {
/// The last BytePos to convert.
pos: BytePos,

/// The total number of extra chars in the UTF-8 encoding.
total_extra_bytes: u32,

/// The index of the last MultiByteChar read to compute the extra bytes of
/// the last conversion.
mbc_index: usize,
}

// _____________________________________________________________________________
// Tests
//
Expand Down Expand Up @@ -1653,6 +1702,52 @@ mod tests {
assert!(sm.merge_spans(span1, span2).is_none());
}

#[test]
fn calc_utf16_offset() {
let input = "t¢e∆s💩t";
let sm = SourceMap::new(FilePathMapping::empty());
let file = sm.new_source_file(PathBuf::from("blork.rs").into(), input.to_string());

let mut state = ByteToCharPosState::default();
let mut bpos = file.start_pos;
let mut cpos = CharPos(bpos.to_usize());
for c in input.chars() {
let actual = bpos.to_u32() - sm.calc_utf16_offset(&file, bpos, &mut state);

assert_eq!(actual, cpos.to_u32());

bpos = bpos + BytePos(c.len_utf8() as u32);
cpos = cpos + CharPos(c.len_utf16());
}

for c in input.chars().rev() {
bpos = bpos - BytePos(c.len_utf8() as u32);
cpos = cpos - CharPos(c.len_utf16());

let actual = bpos.to_u32() - sm.calc_utf16_offset(&file, bpos, &mut state);

assert_eq!(actual, cpos.to_u32());
}
}

#[test]
fn bytepos_to_charpos() {
let input = "t¢e∆s💩t";
let sm = SourceMap::new(FilePathMapping::empty());
let file = sm.new_source_file(PathBuf::from("blork.rs").into(), input.to_string());

let mut bpos = file.start_pos;
let mut cpos = CharPos(0);
for c in input.chars() {
let actual = sm.bytepos_to_file_charpos_with(&file, bpos);

assert_eq!(actual, cpos);

bpos = bpos + BytePos(c.len_utf8() as u32);
cpos = cpos + CharPos(c.len_utf16());
}
}

/// Returns the span corresponding to the `n`th occurrence of
/// `substring` in `source_text`.
trait SourceMapExtension {
Expand Down

1 comment on commit a203fdb

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: a203fdb Previous: 01232f0 Ratio
es/full/bugs-1 433858 ns/iter (± 60049) 360532 ns/iter (± 22667) 1.20
es/full/minify/libraries/antd 2421609627 ns/iter (± 69302733) 2059896790 ns/iter (± 21826036) 1.18
es/full/minify/libraries/d3 471887492 ns/iter (± 17014566) 472027800 ns/iter (± 23829735) 1.00
es/full/minify/libraries/echarts 2117942431 ns/iter (± 71712587) 1741553955 ns/iter (± 49207818) 1.22
es/full/minify/libraries/jquery 132923529 ns/iter (± 6559938) 109253087 ns/iter (± 2209958) 1.22
es/full/minify/libraries/lodash 152517116 ns/iter (± 5581911) 125198057 ns/iter (± 3714508) 1.22
es/full/minify/libraries/moment 84859642 ns/iter (± 52635634) 64127061 ns/iter (± 7665962) 1.32
es/full/minify/libraries/react 28651895 ns/iter (± 15209523) 21947713 ns/iter (± 460113) 1.31
es/full/minify/libraries/terser 463019636 ns/iter (± 33428381) 331681260 ns/iter (± 15671794) 1.40
es/full/minify/libraries/three 736593634 ns/iter (± 66658898) 612215724 ns/iter (± 23303045) 1.20
es/full/minify/libraries/typescript 4838105034 ns/iter (± 148445746) 3793185807 ns/iter (± 58044728) 1.28
es/full/minify/libraries/victory 1112553848 ns/iter (± 83094318) 982142784 ns/iter (± 84583244) 1.13
es/full/minify/libraries/vue 215182264 ns/iter (± 24983236) 203116835 ns/iter (± 19896332) 1.06
es/full/codegen/es3 41999 ns/iter (± 886) 35936 ns/iter (± 5000) 1.17
es/full/codegen/es5 41891 ns/iter (± 2310) 34247 ns/iter (± 2453) 1.22
es/full/codegen/es2015 41479 ns/iter (± 2752) 34262 ns/iter (± 1185) 1.21
es/full/codegen/es2016 41953 ns/iter (± 5247) 34741 ns/iter (± 1521) 1.21
es/full/codegen/es2017 41691 ns/iter (± 3435) 34367 ns/iter (± 1609) 1.21
es/full/codegen/es2018 43614 ns/iter (± 25098) 34532 ns/iter (± 2530) 1.26
es/full/codegen/es2019 41370 ns/iter (± 4333) 34565 ns/iter (± 1558) 1.20
es/full/codegen/es2020 42106 ns/iter (± 6950) 34832 ns/iter (± 3878) 1.21
es/full/all/es3 237538178 ns/iter (± 28711261) 237315028 ns/iter (± 22871198) 1.00
es/full/all/es5 225625443 ns/iter (± 17751187) 225492813 ns/iter (± 19749924) 1.00
es/full/all/es2015 179765309 ns/iter (± 17404803) 181221843 ns/iter (± 15816575) 0.99
es/full/all/es2016 182401069 ns/iter (± 15335932) 178068147 ns/iter (± 19930294) 1.02
es/full/all/es2017 177936455 ns/iter (± 15811367) 174244091 ns/iter (± 17486793) 1.02
es/full/all/es2018 175847881 ns/iter (± 19239088) 172144942 ns/iter (± 14584821) 1.02
es/full/all/es2019 175094541 ns/iter (± 17610050) 169395980 ns/iter (± 21142491) 1.03
es/full/all/es2020 168017276 ns/iter (± 15066806) 167952429 ns/iter (± 16215571) 1.00
es/full/parser 873718 ns/iter (± 35587) 765715 ns/iter (± 78211) 1.14
es/full/base/fixer 32642 ns/iter (± 1935) 28320 ns/iter (± 5716) 1.15
es/full/base/resolver_and_hygiene 117221 ns/iter (± 3824) 98593 ns/iter (± 21215) 1.19
serialization of ast node 252 ns/iter (± 13) 208 ns/iter (± 7) 1.21
serialization of serde 266 ns/iter (± 20) 222 ns/iter (± 6) 1.20

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.