Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ordered stream utils #2517

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion futures-util/src/async_await/random.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pub fn shuffle<T>(slice: &mut [T]) {
}

/// Return a value from `0..n`.
fn gen_index(n: usize) -> usize {
pub fn gen_index(n: usize) -> usize {
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I needed an rng for the 'delayed streams' tests. pub(crate) didn't seem to work for that, I guess because this is in futures-util and the tests are all in futures.

(random() % n as u64) as usize
}

Expand Down
123 changes: 123 additions & 0 deletions futures-util/src/stream/diff_ascending.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
use std::cmp::Ordering;
use std::pin::Pin;
use std::task::{Context, Poll};
use Poll::*;

use crate::stream::{Fuse, StreamExt};
use futures_core::stream::{FusedStream, Stream};
use pin_project_lite::pin_project;

/// Diff two sorted streams in constant space. `diff_ascending(x, y)` returns a
/// stream containing all elements in `x` not present in `y`. This operation is
/// not commutative. The precondition (input sterams are ascending) is not
/// checked.
///
/// ```
/// # futures::executor::block_on(async {
/// use futures::stream::{self, StreamExt};
///
/// let s1 = stream::iter(0..10);
/// let s2 = stream::iter(0..5);
/// let collected: Vec<i32> = stream::diff_ascending(s1, s2).collect().await;
/// assert_eq!(collected, vec![5,6,7,8,9]);
/// # });
/// ```
pub fn diff_ascending<T: Ord, St1: Stream<Item = T>, St2: Stream<Item = T>>(
st1: St1,
st2: St2,
) -> DiffAscending<T, St1, St2> {
DiffAscending {
left: st1.fuse(),
right: st2.fuse(),
left_peek: None,
right_peek: None,
right_terminated: false,
}
}

pin_project! {
/// Struct for the `diff_ascending` method.
#[derive(Debug)]
#[must_use = "streams do nothing unless polled"]
pub struct DiffAscending<T, St1: Stream<Item = T>, St2: Stream<Item = T>> {
#[pin]
left: Fuse<St1>,
#[pin]
right: Fuse<St2>,
left_peek: Option<T>,
right_peek: Option<T>,
right_terminated: bool,
}
}

impl<T: Ord, St1: Stream<Item = T>, St2: Stream<Item = T>> Stream for DiffAscending<T, St1, St2> {
type Item = T;

// For reference
// diff :: Ord a => [a] -> [a] -> [a]
// diff (x : xs) (y : ys)
// | x == y = diff xs ys
// | x < y = x : diff xs (y : ys)
// | x > y = diff (x : xs) ys
// diff xs [] = xs
// diff [] _ = []

Comment on lines +54 to +62
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prototyped the actual algorithm in Haskell. Happy to delete this if it's not helpful.

fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let mut this = self.project();
if *this.right_terminated {
return match this.left_peek.take() {
Some(l) => Ready(Some(l)),
None => this.left.poll_next(cx),
};
}
loop {
let l = match this.left_peek.take() {
Some(l) => l,
None => match this.left.as_mut().poll_next(cx) {
Ready(Some(x)) => x,
Ready(None) => return Ready(None),
Pending => return Pending,
},
};
let r = match this.right_peek.take() {
Some(r) => r,
None => match this.right.as_mut().poll_next(cx) {
Ready(Some(x)) => x,
Ready(None) => {
*this.right_terminated = true;
return Ready(Some(l));
}
Pending => {
*this.left_peek = Some(l);
return Pending;
}
},
};
match l.cmp(&r) {
Ordering::Less => {
*this.right_peek = Some(r);
return Ready(Some(l));
}
Ordering::Equal => {}
Ordering::Greater => *this.left_peek = Some(l),
}
}
}

fn size_hint(&self) -> (usize, Option<usize>) {
let (l_low, l_high) = self.left.size_hint();
let (r_low, _) = self.right.size_hint();
(l_low - r_low, l_high)
}
}

impl<T: Ord, St1: Stream<Item = T>, St2: Stream<Item = T>> FusedStream
for DiffAscending<T, St1, St2>
{
fn is_terminated(&self) -> bool {
self.left_peek.is_none()
&& self.right_peek.is_none()
&& self.left.is_terminated()
&& self.right.is_terminated()
}
}
102 changes: 102 additions & 0 deletions futures-util/src/stream/merge_ascending.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
use std::pin::Pin;
use std::task::{Context, Poll};
use Poll::*;

use crate::stream::{Fuse, StreamExt};
use futures_core::stream::{FusedStream, Stream};
use pin_project_lite::pin_project;

/// Merge two ordered streams in constant space. The precondition (input streams
/// are ascending) is not checked.
///
/// ```
/// # futures::executor::block_on(async {
/// use futures::stream::{self, StreamExt};
///
/// let evens = stream::iter((0..5).map(|x| x * 2));
/// let odds = stream::iter((0..5).map(|x| x * 2 + 1));
/// let collected: Vec<i32> = stream::merge_ascending(evens, odds).collect().await;
/// assert_eq!(collected, vec![0,1,2,3,4,5,6,7,8,9]);
/// # });
/// ```
pub fn merge_ascending<T: Ord, St1: Stream<Item = T>, St2: Stream<Item = T>>(
st1: St1,
st2: St2,
) -> MergeAscending<T, St1, St2> {
MergeAscending { left: st1.fuse(), right: st2.fuse(), left_peek: None, right_peek: None }
}

pin_project! {
/// Struct for the `merge_ascending` method.
#[derive(Debug)]
#[must_use = "streams do nothing unless polled"]
pub struct MergeAscending<T, St1: Stream<Item = T>, St2: Stream<Item = T>> {
#[pin]
left: Fuse<St1>,
#[pin]
right: Fuse<St2>,
left_peek: Option<T>,
right_peek: Option<T>,
}
}

impl<T: Ord, St1: Stream<Item = T>, St2: Stream<Item = T>> Stream for MergeAscending<T, St1, St2> {
type Item = T;

fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<T>> {
let mut this = self.project();
let l = match this.left_peek.take() {
Some(l) => Some(l),
None => match this.left.as_mut().poll_next(cx) {
Ready(Some(l)) => Some(l),
Ready(None) => None,
Pending => return Pending,
},
};
let r = match this.right_peek.take() {
Some(r) => Some(r),
None => match this.right.as_mut().poll_next(cx) {
Ready(Some(r)) => Some(r),
Ready(None) => None,
Pending => {
*this.left_peek = l;
return Pending;
}
},
};
match (l, r) {
(Some(l), Some(r)) if l <= r => {
*this.right_peek = Some(r);
Ready(Some(l))
}
(Some(l), Some(r)) => {
*this.left_peek = Some(l);
Ready(Some(r))
}
(Some(l), None) => Ready(Some(l)),
(None, Some(r)) => Ready(Some(r)),
(None, None) => Ready(None),
}
}

fn size_hint(&self) -> (usize, Option<usize>) {
let (l_low, l_high) = self.left.size_hint();
let (r_low, r_high) = self.right.size_hint();
let high = match (l_high, r_high) {
(Some(l), Some(r)) => Some(l + r),
_ => None,
};
(l_low + r_low, high)
}
}

impl<T: Ord, St1: Stream<Item = T>, St2: Stream<Item = T>> FusedStream
for MergeAscending<T, St1, St2>
{
fn is_terminated(&self) -> bool {
self.left_peek.is_none()
&& self.right_peek.is_none()
&& self.left.is_terminated()
&& self.right.is_terminated()
}
}
137 changes: 137 additions & 0 deletions futures-util/src/stream/merge_multiple_ascending.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
use std::pin::Pin;
use std::task::{Context, Poll};
use Poll::*;

use crate::stream::{Fuse, StreamExt};
use futures_core::stream::{FusedStream, Stream};
use pin_project_lite::pin_project;

/// Merge multiple ordered streams in constant space. The precondition (input
/// streams are ascending) is not checked.
///
/// ```
/// # futures::executor::block_on(async {
/// use futures::stream::{self, StreamExt};
/// let s1 = stream::iter(0..10);
/// let s2 = stream::iter(10..20);
/// let s3 = stream::iter(20..30);
/// let collected: Vec<i32> = stream::merge_multiple_ascending([s1, s2, s3]).collect().await;
/// assert_eq!(collected, (0..30).collect::<Vec<i32>>());
/// # });
/// ```
///
/// NOTE: this is not as easy to use as `merge_ascending`. Every stream in the
/// iterator must be `Unpin` and have _exactly_ the same type as opposed to the
/// two stream case where both streams need only implement the `Stream<Item =
/// T>` trait. In practice, you will likely need to Box your streams into a
/// `dyn` trait object if you want to use this function.
///
/// ```
/// # futures::executor::block_on(async {
/// use futures::Future;
/// use futures::stream::{self, StreamExt};
/// use std::pin::Pin;
/// type BoxedUnfoldGenerator = Box<dyn Fn(i32) -> Pin<Box<dyn Future<Output = Option<(i32, i32)>>>>>;
/// let f1: BoxedUnfoldGenerator = Box::new(|state: i32| {
/// Box::pin(async move {
/// if state >= 5 {
/// None
/// } else {
/// Some((state * 3, state + 1))
/// }
/// })
/// });
/// let f2: BoxedUnfoldGenerator = Box::new(|state: i32| {
/// Box::pin(async move {
/// if state >= 5 {
/// None
/// } else {
/// Some((state * 3 + 1, state + 1))
/// }
/// })
/// });
/// let f3: BoxedUnfoldGenerator = Box::new(|state: i32| {
/// Box::pin(async move {
/// if state >= 5 {
/// None
/// } else {
/// Some((state * 3 + 2, state + 1))
/// }
/// })
/// });
/// let s1 = stream::unfold(0, f1);
/// let s2 = stream::unfold(0, f2);
/// let s3 = stream::unfold(0, f3);
/// let collected: Vec<i32> = stream::merge_multiple_ascending([s1, s2, s3]).collect().await;
/// assert_eq!(collected, (0..15).collect::<Vec<i32>>());
/// # });
/// ```
pub fn merge_multiple_ascending<T: Ord, St: Stream<Item = T>>(
streams: impl IntoIterator<Item = St>,
) -> MergeMultipleAscending<T, St> {
let stream_vec: Vec<_> = streams.into_iter().map(|s| s.fuse()).collect();
let n = stream_vec.len();
let mut peeks = Vec::with_capacity(n);
peeks.resize_with(n, || None);
MergeMultipleAscending { streams: stream_vec, peeks }
}

pin_project! {
/// Struct for the `merge_multiple_ascending` method.
#[derive(Debug)]
#[must_use = "streams do nothing unless polled"]
pub struct MergeMultipleAscending<T, St: Stream<Item = T>> {
streams: Vec<Fuse<St>>,
peeks: Vec<Option<T>>,
}
}

impl<T: Ord, St: Stream<Item = T> + Unpin> Stream for MergeMultipleAscending<T, St> {
type Item = T;

fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<T>> {
let this = self.project();
let mut vals = Vec::with_capacity(this.streams.len());
vals.resize_with(this.streams.len(), || None);
for (i, peek) in this.peeks.iter_mut().enumerate() {
match peek.take() {
Some(val) => vals[i] = Some(val),
None => match this.streams[i].poll_next_unpin(cx) {
Ready(Some(val)) => vals[i] = Some(val),
Ready(None) => vals[i] = None,
Pending => {
// Clippy suggests
// for (j, <item>) in vals.iter_mut().enumerate().take(i) {
#[allow(clippy::needless_range_loop)]
for j in 0..i {
this.peeks[j] = vals[j].take()
}
return Pending;
}
},
}
}
let mut min_ix = None;
let mut min_val = None;
for (i, val) in vals.iter_mut().enumerate() {
let val = val.take();
if min_val.is_none() || val < min_val {
if let Some(j) = min_ix {
this.peeks[j] = min_val;
}
min_val = val;
min_ix = Some(i);
} else {
this.peeks[i] = val;
}
}
Ready(min_val)
}
}

impl<T: Ord, St: Stream<Item = T> + Unpin> FusedStream for MergeMultipleAscending<T, St> {
fn is_terminated(&self) -> bool {
self.peeks.iter().all(|peek| peek.is_none())
&& self.streams.iter().all(|stream| stream.is_terminated())
}
}
10 changes: 10 additions & 0 deletions futures-util/src/stream/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,16 @@ pub use crate::abortable::{AbortHandle, AbortRegistration, Abortable, Aborted};
#[cfg(feature = "alloc")]
pub use abortable::abortable;

mod diff_ascending;
pub use self::diff_ascending::{diff_ascending, DiffAscending};

mod merge_ascending;
pub use self::merge_ascending::{merge_ascending, MergeAscending};

#[cfg(feature = "alloc")]
mod merge_multiple_ascending;
pub use self::merge_multiple_ascending::{merge_multiple_ascending, MergeMultipleAscending};

// Just a helper function to ensure the streams we're returning all have the
// right implementations.
pub(crate) fn assert_stream<T, S>(stream: S) -> S
Expand Down