Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix BytePos -> CharPos calculations #6574

Merged
merged 4 commits into from Dec 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -0,0 +1,20 @@
{
"sourceMaps": true,
"jsc": {
"parser": {
"syntax": "ecmascript",
"jsx": false
},
"target": "es5",
"loose": false,
"minify": {
"compress": false,
"mangle": false
}
},
"module": {
"type": "commonjs"
},
"minify": true,
"isModule": true
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

@@ -0,0 +1 @@
"use strict";var xxx=", something";console.error("❌ ".concat(message));var bbb="";
@@ -0,0 +1,17 @@
{
"mappings": "AAAA,aAAA,IAAMA,IAAM,cACZC,QAAQC,KAAK,CAAC,AAAC,KAAY,OAARC,UACnB,IAAMC,IAAM",
"names": [
"xxx",
"console",
"error",
"message",
"bbb"
],
"sources": [
"../../input/index.js"
],
"sourcesContent": [
"const xxx = ', something';\nconsole.error(`❌ ${message}`);\nconst bbb = '';\n//# sourceMappingURL=data:application/json;charset=utf-8;base64,eyJ2ZXJzaW9uIjozLCJuYW1lcyI6WyJ4eHgiLCJjb25zb2xlIiwiZXJyb3IiLCJtZXNzYWdlIiwiYmJiIl0sInNvdXJjZXMiOlsidW5rbm93biJdLCJzb3VyY2VzQ29udGVudCI6WyJjb25zdCB4eHggPSAnLCBzb21ldGhpbmcnXG5jb25zb2xlLmVycm9yKGDinYwgJHttZXNzYWdlfWApO1xuXG5jb25zdCBiYmIgPSAnJ1xuIl0sIm1hcHBpbmdzIjoiQUFBQSxNQUFNQSxHQUFHLEdBQUcsYUFBWjtBQUNBQyxPQUFPLENBQUNDLEtBQVIsQ0FBZSxLQUFJQyxPQUFRLEVBQTNCO0FBRUEsTUFBTUMsR0FBRyxHQUFHLEVBQVoifQ==\n"
],
"version": 3
}
@@ -0,0 +1,20 @@
{
"sourceMaps": true,
"jsc": {
"parser": {
"syntax": "ecmascript",
"jsx": false
},
"target": "es5",
"loose": false,
"minify": {
"compress": false,
"mangle": false
}
},
"module": {
"type": "commonjs"
},
"minify": true,
"isModule": true
}
@@ -0,0 +1,4 @@
const xxx = ', something'
console.error(`❌ ${message}`);

const bbb = ''
@@ -0,0 +1 @@
"use strict";var xxx=", something";console.error("❌ ".concat(message));var bbb="";
@@ -0,0 +1,17 @@
{
"mappings": "AAAA,aAAA,IAAMA,IAAM,cACZC,QAAQC,KAAK,CAAC,AAAC,KAAY,OAARC,UAEnB,IAAMC,IAAM",
"names": [
"xxx",
"console",
"error",
"message",
"bbb"
],
"sources": [
"../../input/index.js"
],
"sourcesContent": [
"const xxx = ', something'\nconsole.error(`❌ ${message}`);\n\nconst bbb = ''\n"
],
"version": 3
}
185 changes: 140 additions & 45 deletions crates/swc_common/src/source_map.rs
Expand Up @@ -17,9 +17,7 @@
//! within the SourceMap, which upon request can be converted to line and column
//! information, source code snippets, etc.
use std::{
cmp,
cmp::{max, min},
env, fs,
cmp, env, fs,
hash::Hash,
io,
path::{Path, PathBuf},
Expand Down Expand Up @@ -295,8 +293,7 @@ impl SourceMap {
);

let linechpos = self.bytepos_to_file_charpos_with(&f, linebpos);

let col = max(chpos, linechpos) - min(chpos, linechpos);
let col = chpos - linechpos;

let col_display = {
let start_width_idx = f
Expand Down Expand Up @@ -954,7 +951,7 @@ impl SourceMap {
}

fn bytepos_to_file_charpos_with(&self, map: &SourceFile, bpos: BytePos) -> CharPos {
let total_extra_bytes = self.calc_extra_bytes(map, &mut 0, &mut 0, bpos);
let total_extra_bytes = self.calc_utf16_offset(map, bpos, &mut Default::default());
assert!(
map.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32(),
"map.start_pos = {:?}; total_extra_bytes = {}; bpos = {:?}",
Expand All @@ -965,23 +962,43 @@ impl SourceMap {
CharPos(bpos.to_usize() - map.start_pos.to_usize() - total_extra_bytes as usize)
}

/// Converts an absolute BytePos to a CharPos relative to the source_file.
fn calc_extra_bytes(
/// Converts a span of absolute BytePos to a CharPos relative to the
/// source_file.
pub fn span_to_char_offset(&self, file: &SourceFile, span: Span) -> (u32, u32) {
// We rename this to feel more comfortable while doing math.
let start_offset = file.start_pos;

let mut state = ByteToCharPosState::default();
let start = span.lo.to_u32()
- start_offset.to_u32()
- self.calc_utf16_offset(file, span.lo, &mut state);
let end = span.hi.to_u32()
- start_offset.to_u32()
- self.calc_utf16_offset(file, span.hi, &mut state);

(start, end)
}

/// Calculates the number of excess chars seen in the UTF-8 encoding of a
/// file compared with the UTF-16 encoding.
fn calc_utf16_offset(
&self,
map: &SourceFile,
prev_total_extra_bytes: &mut u32,
start: &mut usize,
file: &SourceFile,
bpos: BytePos,
state: &mut ByteToCharPosState,
) -> u32 {
// The number of extra bytes due to multibyte chars in the SourceFile
let mut total_extra_bytes = *prev_total_extra_bytes;

for (i, &mbc) in map.multibyte_chars[*start..].iter().enumerate() {
debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
if mbc.pos < bpos {
// every character is at least one byte, so we only
// count the actual extra bytes.
total_extra_bytes += mbc.bytes as u32 - 1;
let mut total_extra_bytes = state.total_extra_bytes;
let mut index = state.mbc_index;

if bpos >= state.pos {
let range = index..file.multibyte_chars.len();
for i in range {
let mbc = &file.multibyte_chars[i];
debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
if mbc.pos >= bpos {
break;
}
total_extra_bytes += mbc.byte_to_char_diff() as u32;
// We should never see a byte position in the middle of a
// character
debug_assert!(
Expand All @@ -991,13 +1008,32 @@ impl SourceMap {
mbc.pos,
mbc.bytes
);
} else {
*start += i;
break;
index += 1;
}
} else {
let range = 0..index;
for i in range.rev() {
let mbc = &file.multibyte_chars[i];
debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
if mbc.pos < bpos {
break;
}
total_extra_bytes -= mbc.byte_to_char_diff() as u32;
// We should never see a byte position in the middle of a
// character
debug_assert!(
bpos.to_u32() <= mbc.pos.to_u32(),
"bpos = {:?}, mbc.pos = {:?}",
bpos,
mbc.pos,
);
index -= 1;
}
}

*prev_total_extra_bytes = total_extra_bytes;
state.pos = bpos;
state.total_extra_bytes = total_extra_bytes;
state.mbc_index = index;

total_extra_bytes
}
Expand Down Expand Up @@ -1191,11 +1227,9 @@ impl SourceMap {

let mut prev_dst_line = u32::MAX;

let mut prev_extra_bytes = 0;
let mut ch_start = 0;
let mut line_prev_extra_bytes = 0;
let mut line_ch_start = 0;
let mut inline_sources_content = false;
let mut ch_state = ByteToCharPosState::default();
let mut line_state = ByteToCharPosState::default();

for (pos, lc) in mappings.iter() {
let pos = *pos;
Expand Down Expand Up @@ -1229,11 +1263,8 @@ impl SourceMap {
builder.set_source_contents(src_id, Some(&f.src));
}

prev_extra_bytes = 0;
ch_start = 0;

line_prev_extra_bytes = 0;
line_ch_start = 0;
ch_state = ByteToCharPosState::default();
line_state = ByteToCharPosState::default();

cur_file = Some(f.clone());
&f
Expand All @@ -1253,7 +1284,6 @@ impl SourceMap {
Some(line) => line as u32,
None => continue,
};
let mut name = config.name_for_bytepos(pos);

let linebpos = f.lines[line as usize];
debug_assert!(
Expand All @@ -1263,18 +1293,21 @@ impl SourceMap {
pos,
linebpos,
);
let chpos =
pos.to_u32() - self.calc_extra_bytes(f, &mut prev_extra_bytes, &mut ch_start, pos);
let linechpos = linebpos.to_u32()
- self.calc_extra_bytes(
f,
&mut line_prev_extra_bytes,
&mut line_ch_start,
linebpos,
);

let mut col = max(chpos, linechpos) - min(chpos, linechpos);
let linechpos =
linebpos.to_u32() - self.calc_utf16_offset(f, linebpos, &mut line_state);
let chpos = pos.to_u32() - self.calc_utf16_offset(f, pos, &mut ch_state);

debug_assert!(
chpos >= linechpos,
"{}: chpos = {:?}; linechpos = {:?};",
f.name,
chpos,
linechpos,
);

let mut col = chpos - linechpos;
let mut name = None;
if let Some(orig) = &orig {
if let Some(token) = orig
.lookup_token(line, col)
Expand All @@ -1298,7 +1331,9 @@ impl SourceMap {
}
}

let name_idx = name.map(|name| builder.add_name(name));
let name_idx = name
.or_else(|| config.name_for_bytepos(pos))
.map(|name| builder.add_name(name));

builder.add_raw(lc.line, lc.col, line, col, Some(src_id), name_idx);
prev_dst_line = lc.line;
Expand Down Expand Up @@ -1434,6 +1469,20 @@ impl SourceMapGenConfig for DefaultSourceMapGenConfig {
}
}

/// Stores the state of the last conversion between BytePos and CharPos.
#[derive(Debug, Clone, Default)]
pub struct ByteToCharPosState {
/// The last BytePos to convert.
pos: BytePos,

/// The total number of extra chars in the UTF-8 encoding.
total_extra_bytes: u32,

/// The index of the last MultiByteChar read to compute the extra bytes of
/// the last conversion.
mbc_index: usize,
}

// _____________________________________________________________________________
// Tests
//
Expand Down Expand Up @@ -1653,6 +1702,52 @@ mod tests {
assert!(sm.merge_spans(span1, span2).is_none());
}

#[test]
fn calc_utf16_offset() {
let input = "t¢e∆s💩t";
let sm = SourceMap::new(FilePathMapping::empty());
let file = sm.new_source_file(PathBuf::from("blork.rs").into(), input.to_string());

let mut state = ByteToCharPosState::default();
let mut bpos = file.start_pos;
let mut cpos = CharPos(bpos.to_usize());
for c in input.chars() {
let actual = bpos.to_u32() - sm.calc_utf16_offset(&file, bpos, &mut state);

assert_eq!(actual, cpos.to_u32());

bpos = bpos + BytePos(c.len_utf8() as u32);
cpos = cpos + CharPos(c.len_utf16());
}

for c in input.chars().rev() {
bpos = bpos - BytePos(c.len_utf8() as u32);
cpos = cpos - CharPos(c.len_utf16());

let actual = bpos.to_u32() - sm.calc_utf16_offset(&file, bpos, &mut state);

assert_eq!(actual, cpos.to_u32());
}
}

#[test]
fn bytepos_to_charpos() {
let input = "t¢e∆s💩t";
let sm = SourceMap::new(FilePathMapping::empty());
let file = sm.new_source_file(PathBuf::from("blork.rs").into(), input.to_string());

let mut bpos = file.start_pos;
let mut cpos = CharPos(0);
for c in input.chars() {
let actual = sm.bytepos_to_file_charpos_with(&file, bpos);

assert_eq!(actual, cpos);

bpos = bpos + BytePos(c.len_utf8() as u32);
cpos = cpos + CharPos(c.len_utf16());
}
}

/// Returns the span corresponding to the `n`th occurrence of
/// `substring` in `source_text`.
trait SourceMapExtension {
Expand Down