Skip to content

Commit

Permalink
Auto merge of #95241 - Gankra:cleaned-provenance, r=workingjubilee
Browse files Browse the repository at this point in the history
Strict Provenance MVP

This patch series examines the question: how bad would it be if we adopted
an extremely strict pointer provenance model that completely banished all
int<->ptr casts.

The key insight to making this approach even *vaguely* pallatable is the

ptr.with_addr(addr) -> ptr

function, which takes a pointer and an address and creates a new pointer
with that address and the provenance of the input pointer. In this way
the "chain of custody" is completely and dynamically restored, making the
model suitable even for dynamic checkers like CHERI and Miri.

This is not a formal model, but lots of the docs discussing the model
have been updated to try to the *concept* of this design in the hopes
that it can be iterated on.

See #95228
  • Loading branch information
bors committed Mar 30, 2022
2 parents 05142a7 + e3a3afe commit e50ff9b
Show file tree
Hide file tree
Showing 39 changed files with 621 additions and 126 deletions.
26 changes: 17 additions & 9 deletions compiler/rustc_arena/src/lib.rs
Expand Up @@ -18,6 +18,7 @@
#![feature(decl_macro)]
#![feature(rustc_attrs)]
#![cfg_attr(test, feature(test))]
#![feature(strict_provenance)]

use smallvec::SmallVec;

Expand Down Expand Up @@ -87,7 +88,7 @@ impl<T> ArenaChunk<T> {
unsafe {
if mem::size_of::<T>() == 0 {
// A pointer as large as possible for zero-sized elements.
!0 as *mut T
ptr::invalid_mut(!0)
} else {
self.start().add(self.storage.len())
}
Expand Down Expand Up @@ -199,7 +200,7 @@ impl<T> TypedArena<T> {
unsafe {
if mem::size_of::<T>() == 0 {
self.ptr.set((self.ptr.get() as *mut u8).wrapping_offset(1) as *mut T);
let ptr = mem::align_of::<T>() as *mut T;
let ptr = ptr::NonNull::<T>::dangling().as_ptr();
// Don't drop the object. This `write` is equivalent to `forget`.
ptr::write(ptr, object);
&mut *ptr
Expand All @@ -216,7 +217,9 @@ impl<T> TypedArena<T> {

#[inline]
fn can_allocate(&self, additional: usize) -> bool {
let available_bytes = self.end.get() as usize - self.ptr.get() as usize;
// FIXME: this should *likely* use `offset_from`, but more
// investigation is needed (including running tests in miri).
let available_bytes = self.end.get().addr() - self.ptr.get().addr();
let additional_bytes = additional.checked_mul(mem::size_of::<T>()).unwrap();
available_bytes >= additional_bytes
}
Expand Down Expand Up @@ -262,7 +265,9 @@ impl<T> TypedArena<T> {
// If a type is `!needs_drop`, we don't need to keep track of how many elements
// the chunk stores - the field will be ignored anyway.
if mem::needs_drop::<T>() {
let used_bytes = self.ptr.get() as usize - last_chunk.start() as usize;
// FIXME: this should *likely* use `offset_from`, but more
// investigation is needed (including running tests in miri).
let used_bytes = self.ptr.get().addr() - last_chunk.start().addr();
last_chunk.entries = used_bytes / mem::size_of::<T>();
}

Expand All @@ -288,9 +293,9 @@ impl<T> TypedArena<T> {
// chunks.
fn clear_last_chunk(&self, last_chunk: &mut ArenaChunk<T>) {
// Determine how much was filled.
let start = last_chunk.start() as usize;
let start = last_chunk.start().addr();
// We obtain the value of the pointer to the first uninitialized element.
let end = self.ptr.get() as usize;
let end = self.ptr.get().addr();
// We then calculate the number of elements to be dropped in the last chunk,
// which is the filled area's length.
let diff = if mem::size_of::<T>() == 0 {
Expand All @@ -299,6 +304,8 @@ impl<T> TypedArena<T> {
// Recall that `end` was incremented for each allocated value.
end - start
} else {
// FIXME: this should *likely* use `offset_from`, but more
// investigation is needed (including running tests in miri).
(end - start) / mem::size_of::<T>()
};
// Pass that to the `destroy` method.
Expand Down Expand Up @@ -395,15 +402,16 @@ impl DroplessArena {
/// request.
#[inline]
fn alloc_raw_without_grow(&self, layout: Layout) -> Option<*mut u8> {
let start = self.start.get() as usize;
let end = self.end.get() as usize;
let start = self.start.get().addr();
let old_end = self.end.get();
let end = old_end.addr();

let align = layout.align();
let bytes = layout.size();

let new_end = end.checked_sub(bytes)? & !(align - 1);
if start <= new_end {
let new_end = new_end as *mut u8;
let new_end = old_end.with_addr(new_end);
self.end.set(new_end);
Some(new_end)
} else {
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_codegen_ssa/src/lib.rs
Expand Up @@ -6,6 +6,7 @@
#![feature(once_cell)]
#![feature(nll)]
#![feature(associated_type_bounds)]
#![feature(strict_provenance)]
#![recursion_limit = "256"]
#![allow(rustc::potential_query_instability)]

Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_ssa/src/mono_item.rs
Expand Up @@ -116,7 +116,7 @@ impl<'a, 'tcx: 'a> MonoItemExt<'a, 'tcx> for MonoItem<'tcx> {
fn to_raw_string(&self) -> String {
match *self {
MonoItem::Fn(instance) => {
format!("Fn({:?}, {})", instance.def, instance.substs.as_ptr() as usize)
format!("Fn({:?}, {})", instance.def, instance.substs.as_ptr().addr())
}
MonoItem::Static(id) => format!("Static({:?})", id),
MonoItem::GlobalAsm(id) => format!("GlobalAsm({:?})", id),
Expand Down
1 change: 1 addition & 0 deletions library/alloc/src/lib.rs
Expand Up @@ -127,6 +127,7 @@
#![feature(slice_ptr_len)]
#![feature(slice_range)]
#![feature(str_internals)]
#![feature(strict_provenance)]
#![feature(trusted_len)]
#![feature(trusted_random_access)]
#![feature(try_trait_v2)]
Expand Down
5 changes: 2 additions & 3 deletions library/alloc/src/rc.rs
Expand Up @@ -2115,13 +2115,12 @@ impl<T> Weak<T> {
#[rustc_const_unstable(feature = "const_weak_new", issue = "95091", reason = "recently added")]
#[must_use]
pub const fn new() -> Weak<T> {
Weak { ptr: unsafe { NonNull::new_unchecked(usize::MAX as *mut RcBox<T>) } }
Weak { ptr: unsafe { NonNull::new_unchecked(ptr::invalid_mut::<RcBox<T>>(usize::MAX)) } }
}
}

pub(crate) fn is_dangling<T: ?Sized>(ptr: *mut T) -> bool {
let address = ptr as *mut () as usize;
address == usize::MAX
(ptr as *mut ()).addr() == usize::MAX
}

/// Helper type to allow accessing the reference counts without
Expand Down
4 changes: 2 additions & 2 deletions library/alloc/src/slice.rs
Expand Up @@ -1043,9 +1043,9 @@ where

impl<T> Drop for MergeHole<T> {
fn drop(&mut self) {
// `T` is not a zero-sized type, so it's okay to divide by its size.
let len = (self.end as usize - self.start as usize) / mem::size_of::<T>();
// `T` is not a zero-sized type, and these are pointers into a slice's elements.
unsafe {
let len = self.end.offset_from(self.start) as usize;
ptr::copy_nonoverlapping(self.start, self.dest, len);
}
}
Expand Down
2 changes: 1 addition & 1 deletion library/alloc/src/sync.rs
Expand Up @@ -1746,7 +1746,7 @@ impl<T> Weak<T> {
#[rustc_const_unstable(feature = "const_weak_new", issue = "95091", reason = "recently added")]
#[must_use]
pub const fn new() -> Weak<T> {
Weak { ptr: unsafe { NonNull::new_unchecked(usize::MAX as *mut ArcInner<T>) } }
Weak { ptr: unsafe { NonNull::new_unchecked(ptr::invalid_mut::<ArcInner<T>>(usize::MAX)) } }
}
}

Expand Down
2 changes: 1 addition & 1 deletion library/alloc/src/vec/into_iter.rs
Expand Up @@ -159,7 +159,7 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let exact = if mem::size_of::<T>() == 0 {
(self.end as usize).wrapping_sub(self.ptr as usize)
self.end.addr().wrapping_sub(self.ptr.addr())
} else {
unsafe { self.end.offset_from(self.ptr) as usize }
};
Expand Down
2 changes: 1 addition & 1 deletion library/core/src/alloc/layout.rs
Expand Up @@ -194,7 +194,7 @@ impl Layout {
#[inline]
pub const fn dangling(&self) -> NonNull<u8> {
// SAFETY: align is guaranteed to be non-zero
unsafe { NonNull::new_unchecked(self.align() as *mut u8) }
unsafe { NonNull::new_unchecked(crate::ptr::invalid_mut::<u8>(self.align())) }
}

/// Creates a layout describing the record that can hold a value
Expand Down
6 changes: 5 additions & 1 deletion library/core/src/fmt/mod.rs
Expand Up @@ -352,6 +352,10 @@ impl<'a> ArgumentV1<'a> {
}

fn as_usize(&self) -> Option<usize> {
// We are type punning a bit here: USIZE_MARKER only takes an &usize but
// formatter takes an &Opaque. Rust understandably doesn't think we should compare
// the function pointers if they don't have the same signature, so we cast to
// usizes to tell it that we just want to compare addresses.
if self.formatter as usize == USIZE_MARKER as usize {
// SAFETY: The `formatter` field is only set to USIZE_MARKER if
// the value is a usize, so this is safe
Expand Down Expand Up @@ -2246,7 +2250,7 @@ impl<T: ?Sized> Pointer for *const T {
}
f.flags |= 1 << (FlagV1::Alternate as u32);

let ret = LowerHex::fmt(&(ptr as usize), f);
let ret = LowerHex::fmt(&(ptr.addr()), f);

f.width = old_width;
f.flags = old_flags;
Expand Down
4 changes: 2 additions & 2 deletions library/core/src/hash/mod.rs
Expand Up @@ -793,7 +793,7 @@ mod impls {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
let (address, metadata) = self.to_raw_parts();
state.write_usize(address as usize);
state.write_usize(address.addr());
metadata.hash(state);
}
}
Expand All @@ -803,7 +803,7 @@ mod impls {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
let (address, metadata) = self.to_raw_parts();
state.write_usize(address as usize);
state.write_usize(address.addr());
metadata.hash(state);
}
}
Expand Down
6 changes: 3 additions & 3 deletions library/core/src/intrinsics.rs
Expand Up @@ -1972,15 +1972,15 @@ extern "rust-intrinsic" {
/// Checks whether `ptr` is properly aligned with respect to
/// `align_of::<T>()`.
pub(crate) fn is_aligned_and_not_null<T>(ptr: *const T) -> bool {
!ptr.is_null() && ptr as usize % mem::align_of::<T>() == 0
!ptr.is_null() && ptr.addr() % mem::align_of::<T>() == 0
}

/// Checks whether the regions of memory starting at `src` and `dst` of size
/// `count * size_of::<T>()` do *not* overlap.
#[cfg(debug_assertions)]
pub(crate) fn is_nonoverlapping<T>(src: *const T, dst: *const T, count: usize) -> bool {
let src_usize = src as usize;
let dst_usize = dst as usize;
let src_usize = src.addr();
let dst_usize = dst.addr();
let size = mem::size_of::<T>().checked_mul(count).unwrap();
let diff = if src_usize > dst_usize { src_usize - dst_usize } else { dst_usize - src_usize };
// If the absolute distance between the ptrs is at least as big as the size of the buffer,
Expand Down
75 changes: 74 additions & 1 deletion library/core/src/ptr/const_ptr.rs
Expand Up @@ -150,6 +150,79 @@ impl<T: ?Sized> *const T {
bits as Self
}

/// Gets the "address" portion of the pointer.
///
/// This is equivalent to `self as usize`, which semantically discards
/// *provenance* and *address-space* information. To properly restore that information,
/// use [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr].
///
/// On most platforms this will produce a value with the same bytes as the original
/// pointer, because all the bytes are dedicated to describing the address.
/// Platforms which need to store additional information in the pointer may
/// perform a change of representation to produce a value containing only the address
/// portion of the pointer. What that means is up to the platform to define.
///
/// This API and its claimed semantics are part of the Strict Provenance experiment,
/// see the [module documentation][crate::ptr] for details.
#[must_use]
#[inline]
#[unstable(feature = "strict_provenance", issue = "95228")]
pub fn addr(self) -> usize
where
T: Sized,
{
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
self as usize
}

/// Creates a new pointer with the given address.
///
/// This performs the same operation as an `addr as ptr` cast, but copies
/// the *address-space* and *provenance* of `self` to the new pointer.
/// This allows us to dynamically preserve and propagate this important
/// information in a way that is otherwise impossible with a unary cast.
///
/// This is equivalent to using [`wrapping_offset`][pointer::wrapping_offset] to offset
/// `self` to the given address, and therefore has all the same capabilities and restrictions.
///
/// This API and its claimed semantics are part of the Strict Provenance experiment,
/// see the [module documentation][crate::ptr] for details.
#[must_use]
#[inline]
#[unstable(feature = "strict_provenance", issue = "95228")]
pub fn with_addr(self, addr: usize) -> Self
where
T: Sized,
{
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
//
// In the mean-time, this operation is defined to be "as if" it was
// a wrapping_offset, so we can emulate it as such. This should properly
// restore pointer provenance even under today's compiler.
let self_addr = self.addr() as isize;
let dest_addr = addr as isize;
let offset = dest_addr.wrapping_sub(self_addr);

// This is the canonical desugarring of this operation
self.cast::<u8>().wrapping_offset(offset).cast::<T>()
}

/// Creates a new pointer by mapping `self`'s address to a new one.
///
/// This is a convenience for [`with_addr`][pointer::with_addr], see that method for details.
///
/// This API and its claimed semantics are part of the Strict Provenance experiment,
/// see the [module documentation][crate::ptr] for details.
#[must_use]
#[inline]
#[unstable(feature = "strict_provenance", issue = "95228")]
pub fn map_addr(self, f: impl FnOnce(usize) -> usize) -> Self
where
T: Sized,
{
self.with_addr(f(self.addr()))
}

/// Decompose a (possibly wide) pointer into its address and metadata components.
///
/// The pointer can be later reconstructed with [`from_raw_parts`].
Expand Down Expand Up @@ -1006,7 +1079,7 @@ impl<T> *const [T] {
/// use std::ptr;
///
/// let slice: *const [i8] = ptr::slice_from_raw_parts(ptr::null(), 3);
/// assert_eq!(slice.as_ptr(), 0 as *const i8);
/// assert_eq!(slice.as_ptr(), ptr::null());
/// ```
#[inline]
#[unstable(feature = "slice_ptr_get", issue = "74265")]
Expand Down

0 comments on commit e50ff9b

Please sign in to comment.