17
17
//! within the SourceMap, which upon request can be converted to line and column
18
18
//! information, source code snippets, etc.
19
19
use std:: {
20
- cmp,
21
- cmp:: { max, min} ,
22
- env, fs,
20
+ cmp, env, fs,
23
21
hash:: Hash ,
24
22
io,
25
23
path:: { Path , PathBuf } ,
@@ -295,8 +293,7 @@ impl SourceMap {
295
293
) ;
296
294
297
295
let linechpos = self . bytepos_to_file_charpos_with ( & f, linebpos) ;
298
-
299
- let col = max ( chpos, linechpos) - min ( chpos, linechpos) ;
296
+ let col = chpos - linechpos;
300
297
301
298
let col_display = {
302
299
let start_width_idx = f
@@ -954,7 +951,7 @@ impl SourceMap {
954
951
}
955
952
956
953
fn bytepos_to_file_charpos_with ( & self , map : & SourceFile , bpos : BytePos ) -> CharPos {
957
- let total_extra_bytes = self . calc_extra_bytes ( map, & mut 0 , & mut 0 , bpos ) ;
954
+ let total_extra_bytes = self . calc_utf16_offset ( map, bpos , & mut Default :: default ( ) ) ;
958
955
assert ! (
959
956
map. start_pos. to_u32( ) + total_extra_bytes <= bpos. to_u32( ) ,
960
957
"map.start_pos = {:?}; total_extra_bytes = {}; bpos = {:?}" ,
@@ -965,23 +962,43 @@ impl SourceMap {
965
962
CharPos ( bpos. to_usize ( ) - map. start_pos . to_usize ( ) - total_extra_bytes as usize )
966
963
}
967
964
968
- /// Converts an absolute BytePos to a CharPos relative to the source_file.
969
- fn calc_extra_bytes (
965
+ /// Converts a span of absolute BytePos to a CharPos relative to the
966
+ /// source_file.
967
+ pub fn span_to_char_offset ( & self , file : & SourceFile , span : Span ) -> ( u32 , u32 ) {
968
+ // We rename this to feel more comfortable while doing math.
969
+ let start_offset = file. start_pos ;
970
+
971
+ let mut state = ByteToCharPosState :: default ( ) ;
972
+ let start = span. lo . to_u32 ( )
973
+ - start_offset. to_u32 ( )
974
+ - self . calc_utf16_offset ( file, span. lo , & mut state) ;
975
+ let end = span. hi . to_u32 ( )
976
+ - start_offset. to_u32 ( )
977
+ - self . calc_utf16_offset ( file, span. hi , & mut state) ;
978
+
979
+ ( start, end)
980
+ }
981
+
982
+ /// Calculates the number of excess chars seen in the UTF-8 encoding of a
983
+ /// file compared with the UTF-16 encoding.
984
+ fn calc_utf16_offset (
970
985
& self ,
971
- map : & SourceFile ,
972
- prev_total_extra_bytes : & mut u32 ,
973
- start : & mut usize ,
986
+ file : & SourceFile ,
974
987
bpos : BytePos ,
988
+ state : & mut ByteToCharPosState ,
975
989
) -> u32 {
976
- // The number of extra bytes due to multibyte chars in the SourceFile
977
- let mut total_extra_bytes = * prev_total_extra_bytes;
978
-
979
- for ( i, & mbc) in map. multibyte_chars [ * start..] . iter ( ) . enumerate ( ) {
980
- debug ! ( "{}-byte char at {:?}" , mbc. bytes, mbc. pos) ;
981
- if mbc. pos < bpos {
982
- // every character is at least one byte, so we only
983
- // count the actual extra bytes.
984
- total_extra_bytes += mbc. bytes as u32 - 1 ;
990
+ let mut total_extra_bytes = state. total_extra_bytes ;
991
+ let mut index = state. mbc_index ;
992
+
993
+ if bpos >= state. pos {
994
+ let range = index..file. multibyte_chars . len ( ) ;
995
+ for i in range {
996
+ let mbc = & file. multibyte_chars [ i] ;
997
+ debug ! ( "{}-byte char at {:?}" , mbc. bytes, mbc. pos) ;
998
+ if mbc. pos >= bpos {
999
+ break ;
1000
+ }
1001
+ total_extra_bytes += mbc. byte_to_char_diff ( ) as u32 ;
985
1002
// We should never see a byte position in the middle of a
986
1003
// character
987
1004
debug_assert ! (
@@ -991,13 +1008,32 @@ impl SourceMap {
991
1008
mbc. pos,
992
1009
mbc. bytes
993
1010
) ;
994
- } else {
995
- * start += i;
996
- break ;
1011
+ index += 1 ;
1012
+ }
1013
+ } else {
1014
+ let range = 0 ..index;
1015
+ for i in range. rev ( ) {
1016
+ let mbc = & file. multibyte_chars [ i] ;
1017
+ debug ! ( "{}-byte char at {:?}" , mbc. bytes, mbc. pos) ;
1018
+ if mbc. pos < bpos {
1019
+ break ;
1020
+ }
1021
+ total_extra_bytes -= mbc. byte_to_char_diff ( ) as u32 ;
1022
+ // We should never see a byte position in the middle of a
1023
+ // character
1024
+ debug_assert ! (
1025
+ bpos. to_u32( ) <= mbc. pos. to_u32( ) ,
1026
+ "bpos = {:?}, mbc.pos = {:?}" ,
1027
+ bpos,
1028
+ mbc. pos,
1029
+ ) ;
1030
+ index -= 1 ;
997
1031
}
998
1032
}
999
1033
1000
- * prev_total_extra_bytes = total_extra_bytes;
1034
+ state. pos = bpos;
1035
+ state. total_extra_bytes = total_extra_bytes;
1036
+ state. mbc_index = index;
1001
1037
1002
1038
total_extra_bytes
1003
1039
}
@@ -1191,11 +1227,9 @@ impl SourceMap {
1191
1227
1192
1228
let mut prev_dst_line = u32:: MAX ;
1193
1229
1194
- let mut prev_extra_bytes = 0 ;
1195
- let mut ch_start = 0 ;
1196
- let mut line_prev_extra_bytes = 0 ;
1197
- let mut line_ch_start = 0 ;
1198
1230
let mut inline_sources_content = false ;
1231
+ let mut ch_state = ByteToCharPosState :: default ( ) ;
1232
+ let mut line_state = ByteToCharPosState :: default ( ) ;
1199
1233
1200
1234
for ( pos, lc) in mappings. iter ( ) {
1201
1235
let pos = * pos;
@@ -1229,11 +1263,8 @@ impl SourceMap {
1229
1263
builder. set_source_contents ( src_id, Some ( & f. src ) ) ;
1230
1264
}
1231
1265
1232
- prev_extra_bytes = 0 ;
1233
- ch_start = 0 ;
1234
-
1235
- line_prev_extra_bytes = 0 ;
1236
- line_ch_start = 0 ;
1266
+ ch_state = ByteToCharPosState :: default ( ) ;
1267
+ line_state = ByteToCharPosState :: default ( ) ;
1237
1268
1238
1269
cur_file = Some ( f. clone ( ) ) ;
1239
1270
& f
@@ -1253,7 +1284,6 @@ impl SourceMap {
1253
1284
Some ( line) => line as u32 ,
1254
1285
None => continue ,
1255
1286
} ;
1256
- let mut name = config. name_for_bytepos ( pos) ;
1257
1287
1258
1288
let linebpos = f. lines [ line as usize ] ;
1259
1289
debug_assert ! (
@@ -1263,18 +1293,21 @@ impl SourceMap {
1263
1293
pos,
1264
1294
linebpos,
1265
1295
) ;
1266
- let chpos =
1267
- pos. to_u32 ( ) - self . calc_extra_bytes ( f, & mut prev_extra_bytes, & mut ch_start, pos) ;
1268
- let linechpos = linebpos. to_u32 ( )
1269
- - self . calc_extra_bytes (
1270
- f,
1271
- & mut line_prev_extra_bytes,
1272
- & mut line_ch_start,
1273
- linebpos,
1274
- ) ;
1275
1296
1276
- let mut col = max ( chpos, linechpos) - min ( chpos, linechpos) ;
1297
+ let linechpos =
1298
+ linebpos. to_u32 ( ) - self . calc_utf16_offset ( f, linebpos, & mut line_state) ;
1299
+ let chpos = pos. to_u32 ( ) - self . calc_utf16_offset ( f, pos, & mut ch_state) ;
1300
+
1301
+ debug_assert ! (
1302
+ chpos >= linechpos,
1303
+ "{}: chpos = {:?}; linechpos = {:?};" ,
1304
+ f. name,
1305
+ chpos,
1306
+ linechpos,
1307
+ ) ;
1277
1308
1309
+ let mut col = chpos - linechpos;
1310
+ let mut name = None ;
1278
1311
if let Some ( orig) = & orig {
1279
1312
if let Some ( token) = orig
1280
1313
. lookup_token ( line, col)
@@ -1298,7 +1331,9 @@ impl SourceMap {
1298
1331
}
1299
1332
}
1300
1333
1301
- let name_idx = name. map ( |name| builder. add_name ( name) ) ;
1334
+ let name_idx = name
1335
+ . or_else ( || config. name_for_bytepos ( pos) )
1336
+ . map ( |name| builder. add_name ( name) ) ;
1302
1337
1303
1338
builder. add_raw ( lc. line , lc. col , line, col, Some ( src_id) , name_idx) ;
1304
1339
prev_dst_line = lc. line ;
@@ -1434,6 +1469,20 @@ impl SourceMapGenConfig for DefaultSourceMapGenConfig {
1434
1469
}
1435
1470
}
1436
1471
1472
+ /// Stores the state of the last conversion between BytePos and CharPos.
1473
+ #[ derive( Debug , Clone , Default ) ]
1474
+ pub struct ByteToCharPosState {
1475
+ /// The last BytePos to convert.
1476
+ pos : BytePos ,
1477
+
1478
+ /// The total number of extra chars in the UTF-8 encoding.
1479
+ total_extra_bytes : u32 ,
1480
+
1481
+ /// The index of the last MultiByteChar read to compute the extra bytes of
1482
+ /// the last conversion.
1483
+ mbc_index : usize ,
1484
+ }
1485
+
1437
1486
// _____________________________________________________________________________
1438
1487
// Tests
1439
1488
//
@@ -1653,6 +1702,52 @@ mod tests {
1653
1702
assert ! ( sm. merge_spans( span1, span2) . is_none( ) ) ;
1654
1703
}
1655
1704
1705
+ #[ test]
1706
+ fn calc_utf16_offset ( ) {
1707
+ let input = "t¢e∆s💩t" ;
1708
+ let sm = SourceMap :: new ( FilePathMapping :: empty ( ) ) ;
1709
+ let file = sm. new_source_file ( PathBuf :: from ( "blork.rs" ) . into ( ) , input. to_string ( ) ) ;
1710
+
1711
+ let mut state = ByteToCharPosState :: default ( ) ;
1712
+ let mut bpos = file. start_pos ;
1713
+ let mut cpos = CharPos ( bpos. to_usize ( ) ) ;
1714
+ for c in input. chars ( ) {
1715
+ let actual = bpos. to_u32 ( ) - sm. calc_utf16_offset ( & file, bpos, & mut state) ;
1716
+
1717
+ assert_eq ! ( actual, cpos. to_u32( ) ) ;
1718
+
1719
+ bpos = bpos + BytePos ( c. len_utf8 ( ) as u32 ) ;
1720
+ cpos = cpos + CharPos ( c. len_utf16 ( ) ) ;
1721
+ }
1722
+
1723
+ for c in input. chars ( ) . rev ( ) {
1724
+ bpos = bpos - BytePos ( c. len_utf8 ( ) as u32 ) ;
1725
+ cpos = cpos - CharPos ( c. len_utf16 ( ) ) ;
1726
+
1727
+ let actual = bpos. to_u32 ( ) - sm. calc_utf16_offset ( & file, bpos, & mut state) ;
1728
+
1729
+ assert_eq ! ( actual, cpos. to_u32( ) ) ;
1730
+ }
1731
+ }
1732
+
1733
+ #[ test]
1734
+ fn bytepos_to_charpos ( ) {
1735
+ let input = "t¢e∆s💩t" ;
1736
+ let sm = SourceMap :: new ( FilePathMapping :: empty ( ) ) ;
1737
+ let file = sm. new_source_file ( PathBuf :: from ( "blork.rs" ) . into ( ) , input. to_string ( ) ) ;
1738
+
1739
+ let mut bpos = file. start_pos ;
1740
+ let mut cpos = CharPos ( 0 ) ;
1741
+ for c in input. chars ( ) {
1742
+ let actual = sm. bytepos_to_file_charpos_with ( & file, bpos) ;
1743
+
1744
+ assert_eq ! ( actual, cpos) ;
1745
+
1746
+ bpos = bpos + BytePos ( c. len_utf8 ( ) as u32 ) ;
1747
+ cpos = cpos + CharPos ( c. len_utf16 ( ) ) ;
1748
+ }
1749
+ }
1750
+
1656
1751
/// Returns the span corresponding to the `n`th occurrence of
1657
1752
/// `substring` in `source_text`.
1658
1753
trait SourceMapExtension {
1 commit comments
github-actions[bot] commentedon Dec 4, 2022
Benchmark
es/full/bugs-1
433858
ns/iter (± 60049
)360532
ns/iter (± 22667
)1.20
es/full/minify/libraries/antd
2421609627
ns/iter (± 69302733
)2059896790
ns/iter (± 21826036
)1.18
es/full/minify/libraries/d3
471887492
ns/iter (± 17014566
)472027800
ns/iter (± 23829735
)1.00
es/full/minify/libraries/echarts
2117942431
ns/iter (± 71712587
)1741553955
ns/iter (± 49207818
)1.22
es/full/minify/libraries/jquery
132923529
ns/iter (± 6559938
)109253087
ns/iter (± 2209958
)1.22
es/full/minify/libraries/lodash
152517116
ns/iter (± 5581911
)125198057
ns/iter (± 3714508
)1.22
es/full/minify/libraries/moment
84859642
ns/iter (± 52635634
)64127061
ns/iter (± 7665962
)1.32
es/full/minify/libraries/react
28651895
ns/iter (± 15209523
)21947713
ns/iter (± 460113
)1.31
es/full/minify/libraries/terser
463019636
ns/iter (± 33428381
)331681260
ns/iter (± 15671794
)1.40
es/full/minify/libraries/three
736593634
ns/iter (± 66658898
)612215724
ns/iter (± 23303045
)1.20
es/full/minify/libraries/typescript
4838105034
ns/iter (± 148445746
)3793185807
ns/iter (± 58044728
)1.28
es/full/minify/libraries/victory
1112553848
ns/iter (± 83094318
)982142784
ns/iter (± 84583244
)1.13
es/full/minify/libraries/vue
215182264
ns/iter (± 24983236
)203116835
ns/iter (± 19896332
)1.06
es/full/codegen/es3
41999
ns/iter (± 886
)35936
ns/iter (± 5000
)1.17
es/full/codegen/es5
41891
ns/iter (± 2310
)34247
ns/iter (± 2453
)1.22
es/full/codegen/es2015
41479
ns/iter (± 2752
)34262
ns/iter (± 1185
)1.21
es/full/codegen/es2016
41953
ns/iter (± 5247
)34741
ns/iter (± 1521
)1.21
es/full/codegen/es2017
41691
ns/iter (± 3435
)34367
ns/iter (± 1609
)1.21
es/full/codegen/es2018
43614
ns/iter (± 25098
)34532
ns/iter (± 2530
)1.26
es/full/codegen/es2019
41370
ns/iter (± 4333
)34565
ns/iter (± 1558
)1.20
es/full/codegen/es2020
42106
ns/iter (± 6950
)34832
ns/iter (± 3878
)1.21
es/full/all/es3
237538178
ns/iter (± 28711261
)237315028
ns/iter (± 22871198
)1.00
es/full/all/es5
225625443
ns/iter (± 17751187
)225492813
ns/iter (± 19749924
)1.00
es/full/all/es2015
179765309
ns/iter (± 17404803
)181221843
ns/iter (± 15816575
)0.99
es/full/all/es2016
182401069
ns/iter (± 15335932
)178068147
ns/iter (± 19930294
)1.02
es/full/all/es2017
177936455
ns/iter (± 15811367
)174244091
ns/iter (± 17486793
)1.02
es/full/all/es2018
175847881
ns/iter (± 19239088
)172144942
ns/iter (± 14584821
)1.02
es/full/all/es2019
175094541
ns/iter (± 17610050
)169395980
ns/iter (± 21142491
)1.03
es/full/all/es2020
168017276
ns/iter (± 15066806
)167952429
ns/iter (± 16215571
)1.00
es/full/parser
873718
ns/iter (± 35587
)765715
ns/iter (± 78211
)1.14
es/full/base/fixer
32642
ns/iter (± 1935
)28320
ns/iter (± 5716
)1.15
es/full/base/resolver_and_hygiene
117221
ns/iter (± 3824
)98593
ns/iter (± 21215
)1.19
serialization of ast node
252
ns/iter (± 13
)208
ns/iter (± 7
)1.21
serialization of serde
266
ns/iter (± 20
)222
ns/iter (± 6
)1.20
This comment was automatically generated by workflow using github-action-benchmark.