Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add array-based chunks and windows from slices #789

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
236 changes: 236 additions & 0 deletions src/slice/array.rs
@@ -0,0 +1,236 @@
use crate::iter::plumbing::*;
use crate::iter::*;

use super::{Iter, IterMut, ParallelSlice};

/// Parallel iterator over immutable non-overlapping chunks of a slice
#[derive(Debug)]
pub struct ArrayChunks<'data, T: Sync, const N: usize> {
iter: Iter<'data, [T; N]>,
rem: &'data [T],
}

impl<'data, T: Sync, const N: usize> ArrayChunks<'data, T, N> {
pub(super) fn new(slice: &'data [T]) -> Self {
assert_ne!(N, 0);
let len = slice.len() / N;
let (fst, snd) = slice.split_at(len * N);
// SAFETY: We cast a slice of `len * N` elements into
// a slice of `len` many `N` elements chunks.
let array_slice: &'data [[T; N]] = unsafe {
let ptr = fst.as_ptr() as *const [T; N];
::std::slice::from_raw_parts(ptr, len)
};
Self {
iter: array_slice.par_iter(),
rem: snd,
}
}

/// Return the remainder of the original slice that is not going to be
/// returned by the iterator. The returned slice has at most `N-1`
/// elements.
pub fn remainder(&self) -> &'data [T] {
self.rem
}
}

impl<'data, T: Sync, const N: usize> Clone for ArrayChunks<'data, T, N> {
fn clone(&self) -> Self {
ArrayChunks {
iter: self.iter.clone(),
rem: self.rem,
}
}
}

impl<'data, T: Sync + 'data, const N: usize> ParallelIterator for ArrayChunks<'data, T, N> {
type Item = &'data [T; N];

fn drive_unindexed<C>(self, consumer: C) -> C::Result
where
C: UnindexedConsumer<Self::Item>,
{
bridge(self, consumer)
}

fn opt_len(&self) -> Option<usize> {
Some(self.len())
}
}

impl<'data, T: Sync + 'data, const N: usize> IndexedParallelIterator for ArrayChunks<'data, T, N> {
fn drive<C>(self, consumer: C) -> C::Result
where
C: Consumer<Self::Item>,
{
bridge(self, consumer)
}

fn len(&self) -> usize {
self.iter.len()
}

fn with_producer<CB>(self, callback: CB) -> CB::Output
where
CB: ProducerCallback<Self::Item>,
{
self.iter.with_producer(callback)
}
}

/// Parallel iterator over immutable non-overlapping chunks of a slice
#[derive(Debug)]
pub struct ArrayChunksMut<'data, T: Send, const N: usize> {
iter: IterMut<'data, [T; N]>,
rem: &'data mut [T],
}

impl<'data, T: Send, const N: usize> ArrayChunksMut<'data, T, N> {
pub(super) fn new(slice: &'data mut [T]) -> Self {
assert_ne!(N, 0);
let len = slice.len() / N;
let (fst, snd) = slice.split_at_mut(len * N);
// SAFETY: We cast a slice of `len * N` elements into
// a slice of `len` many `N` elements chunks.
let array_slice: &'data mut [[T; N]] = unsafe {
let ptr = fst.as_mut_ptr() as *mut [T; N];
::std::slice::from_raw_parts_mut(ptr, len)
};
Self {
iter: array_slice.par_iter_mut(),
rem: snd,
}
}

/// Return the remainder of the original slice that is not going to be
/// returned by the iterator. The returned slice has at most `N-1`
/// elements.
///
/// Note that this has to consume `self` to return the original lifetime of
/// the data, which prevents this from actually being used as a parallel
/// iterator since that also consumes. This method is provided for parity
/// with `std::iter::ArrayChunksMut`, but consider calling `remainder()` or
/// `take_remainder()` as alternatives.
pub fn into_remainder(self) -> &'data mut [T] {
self.rem
}

/// Return the remainder of the original slice that is not going to be
/// returned by the iterator. The returned slice has at most `N-1`
/// elements.
///
/// Consider `take_remainder()` if you need access to the data with its
/// original lifetime, rather than borrowing through `&mut self` here.
pub fn remainder(&mut self) -> &mut [T] {
self.rem
}

/// Return the remainder of the original slice that is not going to be
/// returned by the iterator. The returned slice has at most `N-1`
/// elements. Subsequent calls will return an empty slice.
pub fn take_remainder(&mut self) -> &'data mut [T] {
std::mem::replace(&mut self.rem, &mut [])
}
}

impl<'data, T: Send + 'data, const N: usize> ParallelIterator for ArrayChunksMut<'data, T, N> {
type Item = &'data mut [T; N];

fn drive_unindexed<C>(self, consumer: C) -> C::Result
where
C: UnindexedConsumer<Self::Item>,
{
bridge(self, consumer)
}

fn opt_len(&self) -> Option<usize> {
Some(self.len())
}
}

impl<'data, T: Send + 'data, const N: usize> IndexedParallelIterator
for ArrayChunksMut<'data, T, N>
{
fn drive<C>(self, consumer: C) -> C::Result
where
C: Consumer<Self::Item>,
{
bridge(self, consumer)
}

fn len(&self) -> usize {
self.iter.len()
}

fn with_producer<CB>(self, callback: CB) -> CB::Output
where
CB: ProducerCallback<Self::Item>,
{
self.iter.with_producer(callback)
}
}

/// Parallel iterator over immutable overlapping windows of a slice
#[derive(Debug)]
pub struct ArrayWindows<'data, T: Sync, const N: usize> {
slice: &'data [T],
}

impl<'data, T: Sync, const N: usize> ArrayWindows<'data, T, N> {
pub(super) fn new(slice: &'data [T]) -> Self {
ArrayWindows { slice }
}
}

impl<'data, T: Sync, const N: usize> Clone for ArrayWindows<'data, T, N> {
fn clone(&self) -> Self {
ArrayWindows { ..*self }
}
}

impl<'data, T: Sync + 'data, const N: usize> ParallelIterator for ArrayWindows<'data, T, N> {
type Item = &'data [T; N];

fn drive_unindexed<C>(self, consumer: C) -> C::Result
where
C: UnindexedConsumer<Self::Item>,
{
bridge(self, consumer)
}

fn opt_len(&self) -> Option<usize> {
Some(self.len())
}
}

impl<'data, T: Sync + 'data, const N: usize> IndexedParallelIterator for ArrayWindows<'data, T, N> {
fn drive<C>(self, consumer: C) -> C::Result
where
C: Consumer<Self::Item>,
{
bridge(self, consumer)
}

fn len(&self) -> usize {
assert!(N >= 1);
self.slice.len().saturating_sub(N - 1)
}

fn with_producer<CB>(self, callback: CB) -> CB::Output
where
CB: ProducerCallback<Self::Item>,
{
fn array<T, const N: usize>(slice: &[T]) -> &[T; N] {
debug_assert_eq!(slice.len(), N);
let ptr = slice.as_ptr() as *const [T; N];
unsafe { &*ptr }
}

// FIXME: use our own producer and the standard `array_windows`, rust-lang/rust#75027
self.slice
.par_windows(N)
.map(array::<T, N>)
.with_producer(callback)
}
}
55 changes: 55 additions & 0 deletions src/slice/mod.rs
Expand Up @@ -5,13 +5,16 @@
//!
//! [std::slice]: https://doc.rust-lang.org/stable/std/slice/

mod array;
mod chunks;
mod mergesort;
mod quicksort;
mod rchunks;

mod test;

pub use self::array::{ArrayChunks, ArrayChunksMut, ArrayWindows};

use self::mergesort::par_mergesort;
use self::quicksort::par_quicksort;
use crate::iter::plumbing::*;
Expand Down Expand Up @@ -71,6 +74,20 @@ pub trait ParallelSlice<T: Sync> {
}
}

/// Returns a parallel iterator over all contiguous array windows of
/// length `N`. The windows overlap.
///
/// # Examples
///
/// ```
/// use rayon::prelude::*;
/// let windows: Vec<_> = [1, 2, 3].par_array_windows().collect();
/// assert_eq!(vec![&[1, 2], &[2, 3]], windows);
/// ```
fn par_array_windows<const N: usize>(&self) -> ArrayWindows<'_, T, N> {
cuviper marked this conversation as resolved.
Show resolved Hide resolved
ArrayWindows::new(self.as_parallel_slice())
}

/// Returns a parallel iterator over at most `chunk_size` elements of
/// `self` at a time. The chunks do not overlap.
///
Expand Down Expand Up @@ -150,6 +167,24 @@ pub trait ParallelSlice<T: Sync> {
assert!(chunk_size != 0, "chunk_size must not be zero");
RChunksExact::new(chunk_size, self.as_parallel_slice())
}

/// Returns a parallel iterator over `N`-element chunks of
/// `self` at a time. The chunks do not overlap.
///
/// If `N` does not divide the length of the slice, then the
/// last up to `N-1` elements will be omitted and can be
/// retrieved from the remainder function of the iterator.
///
/// # Examples
///
/// ```
/// use rayon::prelude::*;
/// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_array_chunks().collect();
/// assert_eq!(chunks, vec![&[1, 2], &[3, 4]]);
/// ```
fn par_array_chunks<const N: usize>(&self) -> ArrayChunks<'_, T, N> {
ArrayChunks::new(self.as_parallel_slice())
}
}

impl<T: Sync> ParallelSlice<T> for [T] {
Expand Down Expand Up @@ -275,6 +310,26 @@ pub trait ParallelSliceMut<T: Send> {
RChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
}

/// Returns a parallel iterator over `N`-element chunks of
/// `self` at a time. The chunks are mutable and do not overlap.
///
/// If `N` does not divide the length of the slice, then the
/// last up to `N-1` elements will be omitted and can be
/// retrieved from the remainder function of the iterator.
///
/// # Examples
///
/// ```
/// use rayon::prelude::*;
/// let mut array = [1, 2, 3, 4, 5];
/// array.par_array_chunks_mut()
/// .for_each(|[a, _, b]| std::mem::swap(a, b));
/// assert_eq!(array, [3, 2, 1, 4, 5]);
/// ```
fn par_array_chunks_mut<const N: usize>(&mut self) -> ArrayChunksMut<'_, T, N> {
ArrayChunksMut::new(self.as_parallel_slice_mut())
}

/// Sorts the slice in parallel.
///
/// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case.
Expand Down
22 changes: 22 additions & 0 deletions src/slice/test.rs
Expand Up @@ -168,3 +168,25 @@ fn test_par_rchunks_exact_mut_remainder() {
assert_eq!(c.take_remainder(), &[]);
assert_eq!(c.len(), 2);
}

#[test]
fn test_par_array_chunks_remainder() {
let v: &[i32] = &[0, 1, 2, 3, 4];
let c = v.par_array_chunks::<2>();
assert_eq!(c.remainder(), &[4]);
assert_eq!(c.len(), 2);
}

#[test]
fn test_par_array_chunks_mut_remainder() {
let v: &mut [i32] = &mut [0, 1, 2, 3, 4];
let mut c = v.par_array_chunks_mut::<2>();
assert_eq!(c.remainder(), &[4]);
assert_eq!(c.len(), 2);
assert_eq!(c.into_remainder(), &[4]);

let mut c = v.par_array_chunks_mut::<2>();
assert_eq!(c.take_remainder(), &[4]);
assert_eq!(c.take_remainder(), &[]);
assert_eq!(c.len(), 2);
}
2 changes: 2 additions & 0 deletions tests/clones.rs
Expand Up @@ -111,7 +111,9 @@ fn clone_vec() {
check(v.par_chunks_exact(42));
check(v.par_rchunks(42));
check(v.par_rchunks_exact(42));
check(v.par_array_chunks::<42>());
check(v.par_windows(42));
check(v.par_array_windows::<42>());
check(v.par_split(|x| x % 3 == 0));
check(v.into_par_iter());
}
Expand Down
3 changes: 3 additions & 0 deletions tests/debug.rs
Expand Up @@ -121,13 +121,16 @@ fn debug_vec() {
check(v.par_iter_mut());
check(v.par_chunks(42));
check(v.par_chunks_exact(42));
check(v.par_array_chunks::<42>());
check(v.par_chunks_mut(42));
check(v.par_chunks_exact_mut(42));
check(v.par_array_chunks_mut::<42>());
check(v.par_rchunks(42));
check(v.par_rchunks_exact(42));
check(v.par_rchunks_mut(42));
check(v.par_rchunks_exact_mut(42));
check(v.par_windows(42));
check(v.par_array_windows::<42>());
check(v.par_split(|x| x % 3 == 0));
check(v.par_split_mut(|x| x % 3 == 0));
check(v.par_drain(..));
Expand Down