Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ordered stream utils #2517

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion futures-util/src/async_await/random.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pub fn shuffle<T>(slice: &mut [T]) {
}

/// Return a value from `0..n`.
fn gen_index(n: usize) -> usize {
pub fn gen_index(n: usize) -> usize {
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I needed an rng for the 'delayed streams' tests. pub(crate) didn't seem to work for that, I guess because this is in futures-util and the tests are all in futures.

(random() % n as u64) as usize
}

Expand Down
121 changes: 121 additions & 0 deletions futures-util/src/stream/diff_ascending.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
use crate::stream::{Fuse, StreamExt};
use core::cmp::Ordering;
use core::pin::Pin;
use futures_core::stream::{FusedStream, Stream};
use futures_core::task::{Context, Poll};
use pin_project_lite::pin_project;

/// Diff two sorted streams in constant space. `diff_ascending(x, y)` returns a
/// stream containing all elements in `x` not present in `y`. This operation is
/// not commutative. The precondition (input sterams are ascending) is not
/// checked.
///
/// ```
/// # futures::executor::block_on(async {
/// use futures::stream::{self, StreamExt};
///
/// let s1 = stream::iter(0..10);
/// let s2 = stream::iter(0..5);
/// let collected: Vec<i32> = stream::diff_ascending(s1, s2).collect().await;
/// assert_eq!(collected, vec![5,6,7,8,9]);
/// # });
/// ```
pub fn diff_ascending<T: Ord, St1: Stream<Item = T>, St2: Stream<Item = T>>(
st1: St1,
st2: St2,
) -> DiffAscending<T, St1, St2> {
DiffAscending {
left: st1.fuse(),
right: st2.fuse(),
left_peek: None,
right_peek: None,
right_terminated: false,
}
}

pin_project! {
/// Struct for the [`diff_ascending`] method.
#[derive(Debug)]
#[must_use = "streams do nothing unless polled"]
pub struct DiffAscending<T, St1: Stream<Item = T>, St2: Stream<Item = T>> {
#[pin]
left: Fuse<St1>,
#[pin]
right: Fuse<St2>,
left_peek: Option<T>,
right_peek: Option<T>,
right_terminated: bool,
}
}

impl<T: Ord, St1: Stream<Item = T>, St2: Stream<Item = T>> Stream for DiffAscending<T, St1, St2> {
type Item = T;

// For reference
// diff :: Ord a => [a] -> [a] -> [a]
// diff (x : xs) (y : ys)
// | x == y = diff xs ys
// | x < y = x : diff xs (y : ys)
// | x > y = diff (x : xs) ys
// diff xs [] = xs
// diff [] _ = []

Comment on lines +54 to +62
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prototyped the actual algorithm in Haskell. Happy to delete this if it's not helpful.

fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let mut this = self.project();
if *this.right_terminated {
return match this.left_peek.take() {
Some(l) => Poll::Ready(Some(l)),
None => this.left.poll_next(cx),
};
}
loop {
let l = match this.left_peek.take() {
Some(l) => l,
None => match this.left.as_mut().poll_next(cx) {
Poll::Ready(Some(x)) => x,
Poll::Ready(None) => return Poll::Ready(None),
Poll::Pending => return Poll::Pending,
},
};
let r = match this.right_peek.take() {
Some(r) => r,
None => match this.right.as_mut().poll_next(cx) {
Poll::Ready(Some(x)) => x,
Poll::Ready(None) => {
*this.right_terminated = true;
return Poll::Ready(Some(l));
}
Poll::Pending => {
*this.left_peek = Some(l);
return Poll::Pending;
}
},
};
match l.cmp(&r) {
Ordering::Less => {
*this.right_peek = Some(r);
return Poll::Ready(Some(l));
}
Ordering::Equal => {}
Ordering::Greater => *this.left_peek = Some(l),
}
}
}

fn size_hint(&self) -> (usize, Option<usize>) {
let (l_low, l_high) = self.left.size_hint();
let (r_low, _) = self.right.size_hint();
(l_low - r_low, l_high)
}
}

impl<T: Ord, St1: Stream<Item = T>, St2: Stream<Item = T>> FusedStream
for DiffAscending<T, St1, St2>
{
fn is_terminated(&self) -> bool {
self.left_peek.is_none()
&& self.right_peek.is_none()
&& self.left.is_terminated()
&& self.right.is_terminated()
}
}
100 changes: 100 additions & 0 deletions futures-util/src/stream/merge_ascending.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
use crate::stream::{Fuse, StreamExt};
use core::pin::Pin;
use futures_core::stream::{FusedStream, Stream};
use futures_core::task::{Context, Poll};
use pin_project_lite::pin_project;

/// Merge two ordered streams in constant space. The precondition (input streams
/// are ascending) is not checked.
///
/// ```
/// # futures::executor::block_on(async {
/// use futures::stream::{self, StreamExt};
///
/// let evens = stream::iter((0..5).map(|x| x * 2));
/// let odds = stream::iter((0..5).map(|x| x * 2 + 1));
/// let collected: Vec<i32> = stream::merge_ascending(evens, odds).collect().await;
/// assert_eq!(collected, vec![0,1,2,3,4,5,6,7,8,9]);
/// # });
/// ```
pub fn merge_ascending<T: Ord, St1: Stream<Item = T>, St2: Stream<Item = T>>(
st1: St1,
st2: St2,
) -> MergeAscending<T, St1, St2> {
MergeAscending { left: st1.fuse(), right: st2.fuse(), left_peek: None, right_peek: None }
}

pin_project! {
/// Struct for the [`merge_ascending`] method.
#[derive(Debug)]
#[must_use = "streams do nothing unless polled"]
pub struct MergeAscending<T, St1: Stream<Item = T>, St2: Stream<Item = T>> {
#[pin]
left: Fuse<St1>,
#[pin]
right: Fuse<St2>,
left_peek: Option<T>,
right_peek: Option<T>,
}
}

impl<T: Ord, St1: Stream<Item = T>, St2: Stream<Item = T>> Stream for MergeAscending<T, St1, St2> {
type Item = T;

fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<T>> {
let mut this = self.project();
let l = match this.left_peek.take() {
Some(l) => Some(l),
None => match this.left.as_mut().poll_next(cx) {
Poll::Ready(Some(l)) => Some(l),
Poll::Ready(None) => None,
Poll::Pending => return Poll::Pending,
},
};
let r = match this.right_peek.take() {
Some(r) => Some(r),
None => match this.right.as_mut().poll_next(cx) {
Poll::Ready(Some(r)) => Some(r),
Poll::Ready(None) => None,
Poll::Pending => {
*this.left_peek = l;
return Poll::Pending;
}
},
};
match (l, r) {
(Some(l), Some(r)) if l <= r => {
*this.right_peek = Some(r);
Poll::Ready(Some(l))
}
(Some(l), Some(r)) => {
*this.left_peek = Some(l);
Poll::Ready(Some(r))
}
(Some(l), None) => Poll::Ready(Some(l)),
(None, Some(r)) => Poll::Ready(Some(r)),
(None, None) => Poll::Ready(None),
}
}

fn size_hint(&self) -> (usize, Option<usize>) {
let (l_low, l_high) = self.left.size_hint();
let (r_low, r_high) = self.right.size_hint();
let high = match (l_high, r_high) {
(Some(l), Some(r)) => Some(l + r),
_ => None,
};
(l_low + r_low, high)
}
}

impl<T: Ord, St1: Stream<Item = T>, St2: Stream<Item = T>> FusedStream
for MergeAscending<T, St1, St2>
{
fn is_terminated(&self) -> bool {
self.left_peek.is_none()
&& self.right_peek.is_none()
&& self.left.is_terminated()
&& self.right.is_terminated()
}
}
139 changes: 139 additions & 0 deletions futures-util/src/stream/merge_multiple_ascending.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
use crate::stream::{Fuse, StreamExt};
use alloc::vec::Vec;
use core::pin::Pin;
use futures_core::stream::{FusedStream, Stream};
use futures_core::task::{Context, Poll};
use pin_project_lite::pin_project;

/// Merge multiple ordered streams in constant space. The precondition (input
/// streams are ascending) is not checked. This function is only available when
/// the `std` or `alloc` feature of this library is activated, and it is
/// activated by default.
///
/// ```
/// # futures::executor::block_on(async {
/// use futures::stream::{self, StreamExt};
/// let s1 = stream::iter(0..10);
/// let s2 = stream::iter(10..20);
/// let s3 = stream::iter(20..30);
/// let collected: Vec<i32> = stream::merge_multiple_ascending([s1, s2, s3]).collect().await;
/// assert_eq!(collected, (0..30).collect::<Vec<i32>>());
/// # });
/// ```
///
/// NOTE: this is not as easy to use as
/// [`merge_ascending`](crate::stream::merge_ascending). Every stream in the
/// iterator must be `Unpin` and have exactly the same type as opposed to the
/// two stream case where both streams need only implement the `Stream<Item =
/// T>` trait. In practice, you will likely need to Box your streams into a
/// `dyn` trait object if you want to use this function.
///
/// ```
/// # futures::executor::block_on(async {
/// use futures::Future;
/// use futures::stream::{self, StreamExt};
/// use std::pin::Pin;
/// type BoxedUnfoldGenerator = Box<dyn Fn(i32) -> Pin<Box<dyn Future<Output = Option<(i32, i32)>>>>>;
/// let f1: BoxedUnfoldGenerator = Box::new(|state: i32| {
/// Box::pin(async move {
/// if state >= 5 {
/// None
/// } else {
/// Some((state * 3, state + 1))
/// }
/// })
/// });
/// let f2: BoxedUnfoldGenerator = Box::new(|state: i32| {
/// Box::pin(async move {
/// if state >= 5 {
/// None
/// } else {
/// Some((state * 3 + 1, state + 1))
/// }
/// })
/// });
/// let f3: BoxedUnfoldGenerator = Box::new(|state: i32| {
/// Box::pin(async move {
/// if state >= 5 {
/// None
/// } else {
/// Some((state * 3 + 2, state + 1))
/// }
/// })
/// });
/// let s1 = stream::unfold(0, f1);
/// let s2 = stream::unfold(0, f2);
/// let s3 = stream::unfold(0, f3);
/// let collected: Vec<i32> = stream::merge_multiple_ascending([s1, s2, s3]).collect().await;
/// assert_eq!(collected, (0..15).collect::<Vec<i32>>());
/// # });
/// ```
pub fn merge_multiple_ascending<T: Ord, St: Stream<Item = T>>(
streams: impl IntoIterator<Item = St>,
) -> MergeMultipleAscending<T, St> {
let stream_vec: Vec<_> = streams.into_iter().map(|s| s.fuse()).collect();
let n = stream_vec.len();
let mut peeks = Vec::with_capacity(n);
peeks.resize_with(n, || None);
MergeMultipleAscending { streams: stream_vec, peeks }
}

pin_project! {
/// Struct for the `merge_multiple_ascending` method.
#[derive(Debug)]
#[must_use = "streams do nothing unless polled"]
pub struct MergeMultipleAscending<T, St: Stream<Item = T>> {
streams: Vec<Fuse<St>>,
peeks: Vec<Option<T>>,
}
}

impl<T: Ord, St: Stream<Item = T> + Unpin> Stream for MergeMultipleAscending<T, St> {
type Item = T;

fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<T>> {
let this = self.project();
let mut vals = Vec::with_capacity(this.streams.len());
vals.resize_with(this.streams.len(), || None);
for (i, peek) in this.peeks.iter_mut().enumerate() {
match peek.take() {
Some(val) => vals[i] = Some(val),
None => match this.streams[i].poll_next_unpin(cx) {
Poll::Ready(Some(val)) => vals[i] = Some(val),
Poll::Ready(None) => vals[i] = None,
Poll::Pending => {
// Clippy suggests
// for (j, <item>) in vals.iter_mut().enumerate().take(i) {
#[allow(clippy::needless_range_loop)]
for j in 0..i {
this.peeks[j] = vals[j].take()
}
return Poll::Pending;
}
},
}
}
let mut min_ix = None;
let mut min_val = None;
for (i, val) in vals.iter_mut().enumerate() {
let val = val.take();
if min_val.is_none() || val < min_val {
if let Some(j) = min_ix {
this.peeks[j] = min_val;
}
min_val = val;
min_ix = Some(i);
} else {
this.peeks[i] = val;
}
}
Poll::Ready(min_val)
}
}

impl<T: Ord, St: Stream<Item = T> + Unpin> FusedStream for MergeMultipleAscending<T, St> {
fn is_terminated(&self) -> bool {
self.peeks.iter().all(|peek| peek.is_none())
&& self.streams.iter().all(|stream| stream.is_terminated())
}
}
13 changes: 13 additions & 0 deletions futures-util/src/stream/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,19 @@ pub use crate::abortable::{AbortHandle, AbortRegistration, Abortable, Aborted};
#[cfg(feature = "alloc")]
pub use abortable::abortable;

mod diff_ascending;
pub use self::diff_ascending::{diff_ascending, DiffAscending};

mod merge_ascending;
pub use self::merge_ascending::{merge_ascending, MergeAscending};

#[cfg(not(futures_no_atomic_cas))]
#[cfg(feature = "alloc")]
mod merge_multiple_ascending;
#[cfg(not(futures_no_atomic_cas))]
#[cfg(feature = "alloc")]
pub use self::merge_multiple_ascending::{merge_multiple_ascending, MergeMultipleAscending};

// Just a helper function to ensure the streams we're returning all have the
// right implementations.
pub(crate) fn assert_stream<T, S>(stream: S) -> S
Expand Down