Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Strict Provenance MVP #95241

Merged
merged 15 commits into from
Mar 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
26 changes: 17 additions & 9 deletions compiler/rustc_arena/src/lib.rs
Expand Up @@ -18,6 +18,7 @@
#![feature(decl_macro)]
#![feature(rustc_attrs)]
#![cfg_attr(test, feature(test))]
#![feature(strict_provenance)]

use smallvec::SmallVec;

Expand Down Expand Up @@ -87,7 +88,7 @@ impl<T> ArenaChunk<T> {
unsafe {
if mem::size_of::<T>() == 0 {
// A pointer as large as possible for zero-sized elements.
!0 as *mut T
ptr::invalid_mut(!0)
} else {
self.start().add(self.storage.len())
}
Expand Down Expand Up @@ -199,7 +200,7 @@ impl<T> TypedArena<T> {
unsafe {
if mem::size_of::<T>() == 0 {
self.ptr.set((self.ptr.get() as *mut u8).wrapping_offset(1) as *mut T);
let ptr = mem::align_of::<T>() as *mut T;
let ptr = ptr::NonNull::<T>::dangling().as_ptr();
// Don't drop the object. This `write` is equivalent to `forget`.
ptr::write(ptr, object);
&mut *ptr
Expand All @@ -216,7 +217,9 @@ impl<T> TypedArena<T> {

#[inline]
fn can_allocate(&self, additional: usize) -> bool {
let available_bytes = self.end.get() as usize - self.ptr.get() as usize;
// FIXME: this should *likely* use `offset_from`, but more
// investigation is needed (including running tests in miri).
let available_bytes = self.end.get().addr() - self.ptr.get().addr();
RalfJung marked this conversation as resolved.
Show resolved Hide resolved
let additional_bytes = additional.checked_mul(mem::size_of::<T>()).unwrap();
available_bytes >= additional_bytes
}
Expand Down Expand Up @@ -262,7 +265,9 @@ impl<T> TypedArena<T> {
// If a type is `!needs_drop`, we don't need to keep track of how many elements
// the chunk stores - the field will be ignored anyway.
if mem::needs_drop::<T>() {
let used_bytes = self.ptr.get() as usize - last_chunk.start() as usize;
// FIXME: this should *likely* use `offset_from`, but more
// investigation is needed (including running tests in miri).
let used_bytes = self.ptr.get().addr() - last_chunk.start().addr();
last_chunk.entries = used_bytes / mem::size_of::<T>();
}

Expand All @@ -288,9 +293,9 @@ impl<T> TypedArena<T> {
// chunks.
fn clear_last_chunk(&self, last_chunk: &mut ArenaChunk<T>) {
// Determine how much was filled.
let start = last_chunk.start() as usize;
let start = last_chunk.start().addr();
// We obtain the value of the pointer to the first uninitialized element.
let end = self.ptr.get() as usize;
let end = self.ptr.get().addr();
// We then calculate the number of elements to be dropped in the last chunk,
// which is the filled area's length.
let diff = if mem::size_of::<T>() == 0 {
Expand All @@ -299,6 +304,8 @@ impl<T> TypedArena<T> {
// Recall that `end` was incremented for each allocated value.
end - start
} else {
// FIXME: this should *likely* use `offset_from`, but more
// investigation is needed (including running tests in miri).
(end - start) / mem::size_of::<T>()
};
// Pass that to the `destroy` method.
Expand Down Expand Up @@ -395,15 +402,16 @@ impl DroplessArena {
/// request.
#[inline]
fn alloc_raw_without_grow(&self, layout: Layout) -> Option<*mut u8> {
let start = self.start.get() as usize;
let end = self.end.get() as usize;
let start = self.start.get().addr();
let old_end = self.end.get();
let end = old_end.addr();

let align = layout.align();
let bytes = layout.size();

let new_end = end.checked_sub(bytes)? & !(align - 1);
if start <= new_end {
let new_end = new_end as *mut u8;
let new_end = old_end.with_addr(new_end);
self.end.set(new_end);
Some(new_end)
} else {
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_codegen_ssa/src/lib.rs
Expand Up @@ -6,6 +6,7 @@
#![feature(once_cell)]
#![feature(nll)]
#![feature(associated_type_bounds)]
#![feature(strict_provenance)]
#![recursion_limit = "256"]
#![allow(rustc::potential_query_instability)]

Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_ssa/src/mono_item.rs
Expand Up @@ -116,7 +116,7 @@ impl<'a, 'tcx: 'a> MonoItemExt<'a, 'tcx> for MonoItem<'tcx> {
fn to_raw_string(&self) -> String {
match *self {
MonoItem::Fn(instance) => {
format!("Fn({:?}, {})", instance.def, instance.substs.as_ptr() as usize)
format!("Fn({:?}, {})", instance.def, instance.substs.as_ptr().addr())
}
MonoItem::Static(id) => format!("Static({:?})", id),
MonoItem::GlobalAsm(id) => format!("GlobalAsm({:?})", id),
Expand Down
1 change: 1 addition & 0 deletions library/alloc/src/lib.rs
Expand Up @@ -127,6 +127,7 @@
#![feature(slice_ptr_len)]
#![feature(slice_range)]
#![feature(str_internals)]
#![feature(strict_provenance)]
#![feature(trusted_len)]
#![feature(trusted_random_access)]
#![feature(try_trait_v2)]
Expand Down
5 changes: 2 additions & 3 deletions library/alloc/src/rc.rs
Expand Up @@ -2115,13 +2115,12 @@ impl<T> Weak<T> {
#[rustc_const_unstable(feature = "const_weak_new", issue = "95091", reason = "recently added")]
#[must_use]
pub const fn new() -> Weak<T> {
Weak { ptr: unsafe { NonNull::new_unchecked(usize::MAX as *mut RcBox<T>) } }
Weak { ptr: unsafe { NonNull::new_unchecked(ptr::invalid_mut::<RcBox<T>>(usize::MAX)) } }
}
}

pub(crate) fn is_dangling<T: ?Sized>(ptr: *mut T) -> bool {
let address = ptr as *mut () as usize;
address == usize::MAX
(ptr as *mut ()).addr() == usize::MAX
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't a vital question for this PR, but it makes me wonder if .addr() should work on ?Sized directly, making this just ptr.addr() == usize::MAX despite ptr being potentially wide.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume historically as usize isn't allowed on DST pointers because it's a footgun in a world where we claim this roundtrips. With a clear addr method it might be fine to do this, but I'd like to be conservative against people just abusing these APIs and roundtripping anyways? Not sure I'm convinced by that argument.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could definitely live without it, wasn't really sure what to think.

}

/// Helper type to allow accessing the reference counts without
Expand Down
4 changes: 2 additions & 2 deletions library/alloc/src/slice.rs
Expand Up @@ -1043,9 +1043,9 @@ where

impl<T> Drop for MergeHole<T> {
fn drop(&mut self) {
// `T` is not a zero-sized type, so it's okay to divide by its size.
let len = (self.end as usize - self.start as usize) / mem::size_of::<T>();
// `T` is not a zero-sized type, and these are pointers into a slice's elements.
unsafe {
let len = self.end.offset_from(self.start) as usize;
ptr::copy_nonoverlapping(self.start, self.dest, len);
}
}
Expand Down
2 changes: 1 addition & 1 deletion library/alloc/src/sync.rs
Expand Up @@ -1746,7 +1746,7 @@ impl<T> Weak<T> {
#[rustc_const_unstable(feature = "const_weak_new", issue = "95091", reason = "recently added")]
#[must_use]
pub const fn new() -> Weak<T> {
Weak { ptr: unsafe { NonNull::new_unchecked(usize::MAX as *mut ArcInner<T>) } }
Weak { ptr: unsafe { NonNull::new_unchecked(ptr::invalid_mut::<ArcInner<T>>(usize::MAX)) } }
}
}

Expand Down
2 changes: 1 addition & 1 deletion library/alloc/src/vec/into_iter.rs
Expand Up @@ -159,7 +159,7 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let exact = if mem::size_of::<T>() == 0 {
(self.end as usize).wrapping_sub(self.ptr as usize)
self.end.addr().wrapping_sub(self.ptr.addr())
} else {
unsafe { self.end.offset_from(self.ptr) as usize }
};
Expand Down
2 changes: 1 addition & 1 deletion library/core/src/alloc/layout.rs
Expand Up @@ -194,7 +194,7 @@ impl Layout {
#[inline]
pub const fn dangling(&self) -> NonNull<u8> {
// SAFETY: align is guaranteed to be non-zero
unsafe { NonNull::new_unchecked(self.align() as *mut u8) }
unsafe { NonNull::new_unchecked(crate::ptr::invalid_mut::<u8>(self.align())) }
}

/// Creates a layout describing the record that can hold a value
Expand Down
6 changes: 5 additions & 1 deletion library/core/src/fmt/mod.rs
Expand Up @@ -352,6 +352,10 @@ impl<'a> ArgumentV1<'a> {
}

fn as_usize(&self) -> Option<usize> {
// We are type punning a bit here: USIZE_MARKER only takes an &usize but
// formatter takes an &Opaque. Rust understandably doesn't think we should compare
// the function pointers if they don't have the same signature, so we cast to
// usizes to tell it that we just want to compare addresses.
if self.formatter as usize == USIZE_MARKER as usize {
// SAFETY: The `formatter` field is only set to USIZE_MARKER if
// the value is a usize, so this is safe
Expand Down Expand Up @@ -2246,7 +2250,7 @@ impl<T: ?Sized> Pointer for *const T {
}
f.flags |= 1 << (FlagV1::Alternate as u32);

let ret = LowerHex::fmt(&(ptr as usize), f);
let ret = LowerHex::fmt(&(ptr.addr()), f);

f.width = old_width;
f.flags = old_flags;
Expand Down
4 changes: 2 additions & 2 deletions library/core/src/hash/mod.rs
Expand Up @@ -793,7 +793,7 @@ mod impls {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
let (address, metadata) = self.to_raw_parts();
state.write_usize(address as usize);
state.write_usize(address.addr());
metadata.hash(state);
}
}
Expand All @@ -803,7 +803,7 @@ mod impls {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
let (address, metadata) = self.to_raw_parts();
state.write_usize(address as usize);
state.write_usize(address.addr());
metadata.hash(state);
}
}
Expand Down
6 changes: 3 additions & 3 deletions library/core/src/intrinsics.rs
Expand Up @@ -1972,15 +1972,15 @@ extern "rust-intrinsic" {
/// Checks whether `ptr` is properly aligned with respect to
/// `align_of::<T>()`.
pub(crate) fn is_aligned_and_not_null<T>(ptr: *const T) -> bool {
!ptr.is_null() && ptr as usize % mem::align_of::<T>() == 0
!ptr.is_null() && ptr.addr() % mem::align_of::<T>() == 0
}

/// Checks whether the regions of memory starting at `src` and `dst` of size
/// `count * size_of::<T>()` do *not* overlap.
#[cfg(debug_assertions)]
pub(crate) fn is_nonoverlapping<T>(src: *const T, dst: *const T, count: usize) -> bool {
let src_usize = src as usize;
let dst_usize = dst as usize;
let src_usize = src.addr();
let dst_usize = dst.addr();
let size = mem::size_of::<T>().checked_mul(count).unwrap();
let diff = if src_usize > dst_usize { src_usize - dst_usize } else { dst_usize - src_usize };
// If the absolute distance between the ptrs is at least as big as the size of the buffer,
Expand Down
75 changes: 74 additions & 1 deletion library/core/src/ptr/const_ptr.rs
Expand Up @@ -150,6 +150,79 @@ impl<T: ?Sized> *const T {
bits as Self
}

/// Gets the "address" portion of the pointer.
///
/// This is equivalent to `self as usize`, which semantically discards
/// *provenance* and *address-space* information. To properly restore that information,
/// use [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr].
///
/// On most platforms this will produce a value with the same bytes as the original
/// pointer, because all the bytes are dedicated to describing the address.
/// Platforms which need to store additional information in the pointer may
/// perform a change of representation to produce a value containing only the address
/// portion of the pointer. What that means is up to the platform to define.
///
/// This API and its claimed semantics are part of the Strict Provenance experiment,
/// see the [module documentation][crate::ptr] for details.
#[must_use]
Gankra marked this conversation as resolved.
Show resolved Hide resolved
#[inline]
#[unstable(feature = "strict_provenance", issue = "95228")]
pub fn addr(self) -> usize
where
T: Sized,
{
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
self as usize
}

/// Creates a new pointer with the given address.
///
/// This performs the same operation as an `addr as ptr` cast, but copies
/// the *address-space* and *provenance* of `self` to the new pointer.
/// This allows us to dynamically preserve and propagate this important
/// information in a way that is otherwise impossible with a unary cast.
///
/// This is equivalent to using [`wrapping_offset`][pointer::wrapping_offset] to offset
/// `self` to the given address, and therefore has all the same capabilities and restrictions.
///
/// This API and its claimed semantics are part of the Strict Provenance experiment,
/// see the [module documentation][crate::ptr] for details.
#[must_use]
Gankra marked this conversation as resolved.
Show resolved Hide resolved
#[inline]
#[unstable(feature = "strict_provenance", issue = "95228")]
pub fn with_addr(self, addr: usize) -> Self
where
T: Sized,
{
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
//
// In the mean-time, this operation is defined to be "as if" it was
// a wrapping_offset, so we can emulate it as such. This should properly
// restore pointer provenance even under today's compiler.
let self_addr = self.addr() as isize;
let dest_addr = addr as isize;
let offset = dest_addr.wrapping_sub(self_addr);

// This is the canonical desugarring of this operation
self.cast::<u8>().wrapping_offset(offset).cast::<T>()
}

/// Creates a new pointer by mapping `self`'s address to a new one.
///
/// This is a convenience for [`with_addr`][pointer::with_addr], see that method for details.
///
/// This API and its claimed semantics are part of the Strict Provenance experiment,
/// see the [module documentation][crate::ptr] for details.
#[must_use]
Gankra marked this conversation as resolved.
Show resolved Hide resolved
#[inline]
#[unstable(feature = "strict_provenance", issue = "95228")]
pub fn map_addr(self, f: impl FnOnce(usize) -> usize) -> Self
where
T: Sized,
{
self.with_addr(f(self.addr()))
}

/// Decompose a (possibly wide) pointer into its address and metadata components.
///
/// The pointer can be later reconstructed with [`from_raw_parts`].
Expand Down Expand Up @@ -1006,7 +1079,7 @@ impl<T> *const [T] {
/// use std::ptr;
///
/// let slice: *const [i8] = ptr::slice_from_raw_parts(ptr::null(), 3);
/// assert_eq!(slice.as_ptr(), 0 as *const i8);
/// assert_eq!(slice.as_ptr(), ptr::null());
/// ```
#[inline]
#[unstable(feature = "slice_ptr_get", issue = "74265")]
Expand Down