Skip to content

Commit 8d5670e

Browse files
authoredJul 14, 2024··
refactor(allocator): Use & instead of a thread-local (#9235)
**Description:** This is a part of #9230. I profiled the performance, and `thread_local` took too long to get the address of the thread-local variable. So, I inlined the reference into the allocator. # Benchmark result ``` Gnuplot not found, using plotters backend common/allocator/alloc/std/1000000 time: [4.9478 ms 4.9653 ms 4.9922 ms] Found 17 outliers among 100 measurements (17.00%) 4 (4.00%) high mild 13 (13.00%) high severe common/allocator/alloc/no-scope/1000000 time: [5.4821 ms 5.4938 ms 5.5068 ms] Found 17 outliers among 100 measurements (17.00%) 2 (2.00%) high mild 15 (15.00%) high severe common/allocator/alloc/scoped/1000000 time: [3.1401 ms 3.1456 ms 3.1518 ms] Found 12 outliers among 100 measurements (12.00%) 3 (3.00%) high mild 9 (9.00%) high severe common/allocator/alloc/cached-no-scope/1000000 time: [5.0992 ms 5.1090 ms 5.1198 ms] Found 11 outliers among 100 measurements (11.00%) 2 (2.00%) high mild 9 (9.00%) high severe common/allocator/alloc/cached-scoped/1000000 time: [3.0191 ms 3.0230 ms 3.0273 ms] Found 11 outliers among 100 measurements (11.00%) 2 (2.00%) low mild 1 (1.00%) high mild 8 (8.00%) high severe ```
1 parent 83e75ba commit 8d5670e

File tree

8 files changed

+86
-105
lines changed

8 files changed

+86
-105
lines changed
 

‎Cargo.lock

+3-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎crates/swc_allocator/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ rkyv = { workspace = true, optional = true }
2424
scoped-tls = { workspace = true }
2525
serde = { workspace = true, optional = true }
2626
serde_derive = { workspace = true, optional = true }
27+
triomphe = "0.1.13"
2728

2829

2930
[dev-dependencies]

‎crates/swc_allocator/benches/bench.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
extern crate swc_malloc;
22

33
use codspeed_criterion_compat::{black_box, criterion_group, criterion_main, Bencher, Criterion};
4-
use swc_allocator::{FastAlloc, MemorySpace};
4+
use swc_allocator::{FastAlloc, SwcAllocator};
55

66
fn bench_alloc(c: &mut Criterion) {
77
fn direct_alloc_std(b: &mut Bencher, times: usize) {
@@ -40,7 +40,7 @@ fn bench_alloc(c: &mut Criterion) {
4040

4141
fn direct_alloc_scoped(b: &mut Bencher, times: usize) {
4242
b.iter(|| {
43-
let allocator = MemorySpace::default();
43+
let allocator = SwcAllocator::default();
4444

4545
allocator.scope(|| {
4646
let mut vec = swc_allocator::vec::Vec::new();
@@ -56,7 +56,7 @@ fn bench_alloc(c: &mut Criterion) {
5656

5757
fn fast_alloc_scoped(b: &mut Bencher, times: usize) {
5858
b.iter(|| {
59-
MemorySpace::default().scope(|| {
59+
SwcAllocator::default().scope(|| {
6060
let allocator = FastAlloc::default();
6161

6262
let mut vec = allocator.vec();

‎crates/swc_allocator/src/alloc.rs

+54-55
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,51 @@
1-
use std::{alloc::Layout, ptr::NonNull};
1+
use std::{alloc::Layout, mem::transmute, ptr::NonNull};
22

33
use allocator_api2::alloc::Global;
44
use scoped_tls::scoped_thread_local;
55

66
use crate::{FastAlloc, MemorySpace};
77

8-
scoped_thread_local!(pub(crate) static ALLOC: MemorySpace);
8+
scoped_thread_local!(pub(crate) static ALLOC: &'static SwcAllocator);
99

10-
#[derive(Debug, Clone, Copy)]
11-
pub struct SwcAlloc {
12-
pub(crate) is_arena_mode: bool,
13-
}
10+
#[derive(Default)]
11+
pub struct SwcAllocator(MemorySpace);
1412

15-
impl Default for FastAlloc {
16-
fn default() -> Self {
17-
Self {
18-
is_arena_mode: ALLOC.is_set(),
19-
}
13+
impl SwcAllocator {
14+
/// Invokes `f` in a scope where the allocations are done in this allocator.
15+
#[inline(always)]
16+
pub fn scope<'a, F, R>(&'a self, f: F) -> R
17+
where
18+
F: FnOnce() -> R,
19+
{
20+
let s = unsafe {
21+
// Safery: We are using a scoped API
22+
transmute::<&'a SwcAllocator, &'static SwcAllocator>(self)
23+
};
24+
25+
ALLOC.set(&s, f)
2026
}
2127
}
2228

23-
impl Default for SwcAlloc {
29+
impl Default for FastAlloc {
2430
fn default() -> Self {
25-
SwcAlloc {
26-
is_arena_mode: ALLOC.is_set(),
31+
Self {
32+
alloc: if ALLOC.is_set() {
33+
Some(ALLOC.with(|v| *v))
34+
} else {
35+
None
36+
},
2737
}
2838
}
2939
}
3040

31-
impl SwcAlloc {
41+
impl FastAlloc {
3242
/// `true` is passed to `f` if the box is allocated with a custom allocator.
3343
fn with_allocator<T>(
3444
&self,
3545
f: impl FnOnce(&dyn allocator_api2::alloc::Allocator, bool) -> T,
3646
) -> T {
37-
if self.is_arena_mode {
38-
ALLOC.with(|a| {
39-
//
40-
f(&&**a as &dyn allocator_api2::alloc::Allocator, true)
41-
})
47+
if let Some(arena) = &self.alloc {
48+
f((&&*arena.0) as &dyn allocator_api2::alloc::Allocator, true)
4249
} else {
4350
f(&allocator_api2::alloc::Global, false)
4451
}
@@ -49,7 +56,7 @@ fn mark_ptr_as_arena_mode(ptr: NonNull<[u8]>) -> NonNull<[u8]> {
4956
ptr
5057
}
5158

52-
unsafe impl allocator_api2::alloc::Allocator for SwcAlloc {
59+
unsafe impl allocator_api2::alloc::Allocator for FastAlloc {
5360
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, allocator_api2::alloc::AllocError> {
5461
self.with_allocator(|a, is_arena_mode| {
5562
let ptr = a.allocate(layout)?;
@@ -78,18 +85,13 @@ unsafe impl allocator_api2::alloc::Allocator for SwcAlloc {
7885
}
7986

8087
unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
81-
if self.is_arena_mode {
88+
if self.alloc.is_some() {
8289
debug_assert!(
8390
ALLOC.is_set(),
8491
"Deallocating a pointer allocated with arena mode with a non-arena mode allocator"
8592
);
8693

87-
ALLOC.with(|alloc| {
88-
unsafe {
89-
// Safety: We are in unsafe fn
90-
(&**alloc).deallocate(ptr, layout)
91-
}
92-
})
94+
self.with_allocator(|alloc, _| alloc.deallocate(ptr, layout))
9395
} else {
9496
Global.deallocate(ptr, layout)
9597
}
@@ -101,16 +103,15 @@ unsafe impl allocator_api2::alloc::Allocator for SwcAlloc {
101103
old_layout: Layout,
102104
new_layout: Layout,
103105
) -> Result<NonNull<[u8]>, allocator_api2::alloc::AllocError> {
104-
if self.is_arena_mode {
105-
debug_assert!(
106-
ALLOC.is_set(),
107-
"Growing a pointer allocated with arena mode with a non-arena mode allocator"
108-
);
106+
self.with_allocator(|alloc, is_arena_mode| {
107+
let ptr = alloc.grow(ptr, old_layout, new_layout)?;
109108

110-
ALLOC.with(|alloc| (&**alloc).grow(ptr, old_layout, new_layout))
111-
} else {
112-
Global.grow(ptr, old_layout, new_layout)
113-
}
109+
if is_arena_mode {
110+
Ok(mark_ptr_as_arena_mode(ptr))
111+
} else {
112+
Ok(ptr)
113+
}
114+
})
114115
}
115116

116117
unsafe fn grow_zeroed(
@@ -119,16 +120,15 @@ unsafe impl allocator_api2::alloc::Allocator for SwcAlloc {
119120
old_layout: Layout,
120121
new_layout: Layout,
121122
) -> Result<NonNull<[u8]>, allocator_api2::alloc::AllocError> {
122-
if self.is_arena_mode {
123-
debug_assert!(
124-
ALLOC.is_set(),
125-
"Growing a pointer allocated with arena mode with a non-arena mode allocator"
126-
);
123+
self.with_allocator(|alloc, is_arena_mode| {
124+
let ptr = alloc.grow_zeroed(ptr, old_layout, new_layout)?;
127125

128-
ALLOC.with(|alloc| (&**alloc).grow_zeroed(ptr, old_layout, new_layout))
129-
} else {
130-
Global.grow_zeroed(ptr, old_layout, new_layout)
131-
}
126+
if is_arena_mode {
127+
Ok(mark_ptr_as_arena_mode(ptr))
128+
} else {
129+
Ok(ptr)
130+
}
131+
})
132132
}
133133

134134
unsafe fn shrink(
@@ -137,16 +137,15 @@ unsafe impl allocator_api2::alloc::Allocator for SwcAlloc {
137137
old_layout: Layout,
138138
new_layout: Layout,
139139
) -> Result<NonNull<[u8]>, allocator_api2::alloc::AllocError> {
140-
if self.is_arena_mode {
141-
debug_assert!(
142-
ALLOC.is_set(),
143-
"Shrinking a pointer allocated with arena mode with a non-arena mode allocator"
144-
);
140+
self.with_allocator(|alloc, is_arena_mode| {
141+
let ptr = alloc.shrink(ptr, old_layout, new_layout)?;
145142

146-
ALLOC.with(|alloc| (&**alloc).shrink(ptr, old_layout, new_layout))
147-
} else {
148-
Global.shrink(ptr, old_layout, new_layout)
149-
}
143+
if is_arena_mode {
144+
Ok(mark_ptr_as_arena_mode(ptr))
145+
} else {
146+
Ok(ptr)
147+
}
148+
})
150149
}
151150

152151
fn by_ref(&self) -> &Self

‎crates/swc_allocator/src/boxed/mod.rs

+8-8
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use std::{
77
pin::Pin,
88
};
99

10-
use crate::{alloc::SwcAlloc, FastAlloc};
10+
use crate::FastAlloc;
1111

1212
#[cfg(feature = "rkyv")]
1313
mod rkyv;
@@ -23,7 +23,7 @@ mod serde;
2323
/// The last bit is 1 if the box is allocated with a custom allocator.
2424
#[repr(transparent)]
2525
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
26-
pub struct Box<T: ?Sized>(pub(crate) allocator_api2::boxed::Box<T, SwcAlloc>);
26+
pub struct Box<T: ?Sized>(pub(crate) allocator_api2::boxed::Box<T, FastAlloc>);
2727

2828
impl<T> From<T> for Box<T> {
2929
#[inline(always)]
@@ -32,9 +32,9 @@ impl<T> From<T> for Box<T> {
3232
}
3333
}
3434

35-
impl<T: ?Sized> From<allocator_api2::boxed::Box<T, SwcAlloc>> for Box<T> {
35+
impl<T: ?Sized> From<allocator_api2::boxed::Box<T, FastAlloc>> for Box<T> {
3636
#[inline(always)]
37-
fn from(v: allocator_api2::boxed::Box<T, SwcAlloc>) -> Self {
37+
fn from(v: allocator_api2::boxed::Box<T, FastAlloc>) -> Self {
3838
Box(v)
3939
}
4040
}
@@ -56,7 +56,7 @@ impl<T> Box<T> {
5656
pub fn new(value: T) -> Self {
5757
Self(allocator_api2::boxed::Box::new_in(
5858
value,
59-
SwcAlloc::default(),
59+
FastAlloc::default(),
6060
))
6161
}
6262

@@ -111,7 +111,7 @@ impl<T: ?Sized> Box<T> {
111111
pub unsafe fn from_raw(raw: *mut T) -> Self {
112112
Self(allocator_api2::boxed::Box::from_raw_in(
113113
raw,
114-
SwcAlloc::default(),
114+
FastAlloc::default(),
115115
))
116116
}
117117

@@ -629,7 +629,7 @@ where
629629
}
630630

631631
impl FastAlloc {
632-
pub fn alloc<T>(self, t: T) -> Box<T> {
633-
Box(allocator_api2::boxed::Box::new_in(t, self.swc_alloc()))
632+
pub fn alloc<T>(&self, t: T) -> Box<T> {
633+
Box(allocator_api2::boxed::Box::new_in(t, self.clone()))
634634
}
635635
}

‎crates/swc_allocator/src/lib.rs

+4-24
Original file line numberDiff line numberDiff line change
@@ -4,46 +4,26 @@
44
55
#![allow(clippy::needless_doctest_main)]
66

7-
use alloc::SwcAlloc;
87
use std::ops::{Deref, DerefMut};
98

109
use bumpalo::Bump;
1110

12-
use crate::alloc::ALLOC;
11+
pub use crate::alloc::SwcAllocator;
1312

1413
mod alloc;
1514
pub mod boxed;
1615
pub mod vec;
1716

18-
#[derive(Debug, Clone, Copy)]
17+
#[derive(Clone)]
1918
pub struct FastAlloc {
20-
is_arena_mode: bool,
21-
}
22-
23-
impl FastAlloc {
24-
fn swc_alloc(self) -> SwcAlloc {
25-
SwcAlloc {
26-
is_arena_mode: self.is_arena_mode,
27-
}
28-
}
19+
alloc: Option<&'static SwcAllocator>,
2920
}
3021

3122
#[derive(Default)]
32-
pub struct MemorySpace {
23+
struct MemorySpace {
3324
alloc: Bump,
3425
}
3526

36-
impl MemorySpace {
37-
/// Invokes `f` in a scope where the allocations are done in this allocator.
38-
#[inline(always)]
39-
pub fn scope<F, R>(&self, f: F) -> R
40-
where
41-
F: FnOnce() -> R,
42-
{
43-
ALLOC.set(self, f)
44-
}
45-
}
46-
4727
impl From<Bump> for MemorySpace {
4828
fn from(alloc: Bump) -> Self {
4929
Self { alloc }

‎crates/swc_allocator/src/vec/mod.rs

+11-11
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@ use std::ops::{Deref, DerefMut};
33
#[cfg(feature = "rkyv")]
44
mod rkyv;
55

6-
use crate::{alloc::SwcAlloc, boxed::Box, FastAlloc};
6+
use crate::{boxed::Box, FastAlloc};
77

88
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
99
#[repr(transparent)]
1010
#[cfg_attr(
1111
feature = "serde",
1212
derive(serde_derive::Serialize, serde_derive::Deserialize)
1313
)]
14-
pub struct Vec<T>(allocator_api2::vec::Vec<T, SwcAlloc>);
14+
pub struct Vec<T>(allocator_api2::vec::Vec<T, FastAlloc>);
1515

1616
impl<T> Vec<T> {
1717
pub fn new() -> Self {
@@ -21,7 +21,7 @@ impl<T> Vec<T> {
2121
pub fn with_capacity(capacity: usize) -> Self {
2222
Self(allocator_api2::vec::Vec::with_capacity_in(
2323
capacity,
24-
SwcAlloc::default(),
24+
FastAlloc::default(),
2525
))
2626
}
2727

@@ -167,13 +167,13 @@ impl<T> Vec<T> {
167167
ptr,
168168
length,
169169
capacity,
170-
SwcAlloc::default(),
170+
FastAlloc::default(),
171171
))
172172
}
173173
}
174174

175175
impl<T> Deref for Vec<T> {
176-
type Target = allocator_api2::vec::Vec<T, SwcAlloc>;
176+
type Target = allocator_api2::vec::Vec<T, FastAlloc>;
177177

178178
fn deref(&self) -> &Self::Target {
179179
&self.0
@@ -188,12 +188,12 @@ impl<T> DerefMut for Vec<T> {
188188

189189
impl<T> Default for Vec<T> {
190190
fn default() -> Self {
191-
Self(allocator_api2::vec::Vec::new_in(SwcAlloc::default()))
191+
Self(allocator_api2::vec::Vec::new_in(FastAlloc::default()))
192192
}
193193
}
194194

195195
impl<T> IntoIterator for Vec<T> {
196-
type IntoIter = allocator_api2::vec::IntoIter<T, SwcAlloc>;
196+
type IntoIter = allocator_api2::vec::IntoIter<T, FastAlloc>;
197197
type Item = T;
198198

199199
fn into_iter(self) -> Self::IntoIter {
@@ -245,14 +245,14 @@ impl<T> Extend<T> for Vec<T> {
245245
}
246246

247247
impl FastAlloc {
248-
pub fn vec<T>(self) -> Vec<T> {
249-
Vec(allocator_api2::vec::Vec::new_in(self.swc_alloc()))
248+
pub fn vec<T>(&self) -> Vec<T> {
249+
Vec(allocator_api2::vec::Vec::new_in(self.clone()))
250250
}
251251

252-
pub fn vec_with_capacity<T>(self, capacity: usize) -> Vec<T> {
252+
pub fn vec_with_capacity<T>(&self, capacity: usize) -> Vec<T> {
253253
Vec(allocator_api2::vec::Vec::with_capacity_in(
254254
capacity,
255-
self.swc_alloc(),
255+
self.clone(),
256256
))
257257
}
258258
}

‎crates/swc_allocator/tests/apis.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use criterion::black_box;
2-
use swc_allocator::MemorySpace;
2+
use swc_allocator::SwcAllocator;
33

44
#[test]
55
fn direct_alloc_std() {
@@ -22,7 +22,7 @@ fn direct_alloc_no_scope() {
2222

2323
#[test]
2424
fn direct_alloc_in_scope() {
25-
let allocator = MemorySpace::default();
25+
let allocator = SwcAllocator::default();
2626

2727
allocator.scope(|| {
2828
let mut vec = swc_allocator::vec::Vec::new();

0 commit comments

Comments
 (0)
Please sign in to comment.