Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

a proposal for a sort of partition by #99

Open
sprmnt21 opened this issue Mar 2, 2023 · 1 comment
Open

a proposal for a sort of partition by #99

sprmnt21 opened this issue Mar 2, 2023 · 1 comment

Comments

@sprmnt21
Copy link

sprmnt21 commented Mar 2, 2023

Could it be convenient to have an iterator that is somewhere between groupby and partition?
The application of the function refers to the case in which we want to take some consecutive slices of variable dimensions (steps) from an iterator

julia> itr=10:-1:1
10:-1:1

julia> steps=[1,2,3,2]
4-element Vector{Int64}:
 1
 2
 3
 2

julia> collect(partby(itr,steps))
4-element Vector{Tuple{Vararg{Int64}}}:
 (10,)
 (9, 8)
 (7, 6, 5)
 (4, 3)

julia> steps=[1,2,3,5]
4-element Vector{Int64}:
 1
 2
 3
 5

julia> collect(partby(itr,steps))
3-element Vector{Tuple{Vararg{Int64}}}:
 (10,)
 (9, 8)
 (7, 6, 5)

julia> steps=[4,2,3,5]
4-element Vector{Int64}:
 4
 2
 3
 5

julia> collect(partby(itr,steps))
3-element Vector{Tuple{Vararg{Int64}}}:
 (10, 9, 8, 7)
 (6, 5)
 (4, 3, 2)

julia> steps=[4,2,3,1, 5]
5-element Vector{Int64}:
 4
 2
 3
 1
 5

julia> collect(partby(itr,steps))
4-element Vector{Tuple{Vararg{Int64}}}:
 (10, 9, 8, 7)
 (6, 5)
 (4, 3, 2)
 (1,)

julia> steps=[2,3,1, 2,7]
5-element Vector{Int64}:
 2
 3
 1
 2
 7

julia> collect(partby(itr,steps))
4-element Vector{Tuple{Vararg{Int64}}}:
 (10, 9)
 (8, 7, 6)
 (5,)
 (4, 3)

julia> collect(partby(partition(itr,2,1),steps))
4-element Vector{Tuple{Vararg{Tuple{Int64, Int64}}}}:    
 ((10, 9), (9, 8))
 ((8, 7), (7, 6), (6, 5))
 ((5, 4),)
 ((4, 3), (3, 2))

#-------------


struct PartBy{I, S}
    xs::I
    steps::S
end
_length_partby(i,s)= findlast(<=(length(i)), accumulate(+, s))
eltype(::Type{<:PartBy{I,S}}) where {I,S} = Tuple{Vararg{eltype(I)}}# Tuple{eltype(I),Vararg{eltype(I)}} #Vector{eltype(I)}
IteratorSize(::Type{<:PartBy{I,S}}) where {I,S} = HasLength()
length(it::PartBy{I,S}) where {I,S} = _length_partby(it.xs, it.steps)


function partby(xs::I, steps::S) where {I, S}
    if any(<=(0),steps)
        throw(ArgumentError("all steps must be positives."))
    end
    PartBy{I, S}(xs, steps)
end

macro ifsomething(ex)
    quote
        result = $(esc(ex))
        result === nothing && return nothing
        result
    end
end

function iterate(it::PartBy{I, S}, state=nothing) where {I, S}
    if state === nothing
        xs_val, xs_state = @ifsomething iterate(it.xs)
        step_val, step_state = @ifsomething iterate(it.steps)
        result = Vector{eltype(I)}(undef, step_val)
        result[1]=xs_val
        kgo = true
        for i in 2:step_val
            result[i], xs_state = @ifsomething iterate(it.xs, xs_state)
        end
       step_iter = iterate(it.steps, step_state)
        if isnothing(step_iter)
            return (tuple(result...),(false, xs_val, xs_state, step_val, step_state))
        else
            step_val, step_state = step_iter
        end step_val, step_state = @ifsomething iterate(it.steps, step_state)
    else
        (kgo, xs_val, xs_state, step_val, step_state) = state
        kgo || return nothing
        result = Vector{eltype(I)}(undef, step_val)       
        for i in 1:step_val
            result[i], xs_state = @ifsomething iterate(it.xs, xs_state)
        end
        step_iter = iterate(it.steps, step_state)
        if isnothing(step_iter)
            return (tuple(result...),(false, xs_val, xs_state, step_val, step_state))
        else
            step_val, step_state = step_iter
        end
    end
    return (tuple(result...), (kgo,xs_val, xs_state, step_val, step_state))
end
    
@sprmnt21
Copy link
Author

sprmnt21 commented Mar 2, 2023

Consider the following problem.
Given a list of strings, find the groups of consecutive strings led by a string starting with "AT".

julia> itr=[randstring("ACTG") for _ in 1:20]
20-element Vector{String}:
 "ATTCCGAG"
 "CCCGTGGT"
 "TCAAGGGT"
 "ATTAGATC"
 "TCTTACAC"
 "TTTCCGCC"
 "TCCGACCG"
 "GTCAGCTA"
 "CATGTTGC"
 "GAGGAACG"
 "GTCAATGC"
 "TACTCATT"
 "ATACTCTA"
 "AATTCACA"
 "AATCATAT"
 "GTATACCT"
 "ATTTTACT"
 "TTCAGAAG"
 "GTTGATGA"
 "GACGGCGG"

julia> steps=diff([findall(startswith("AT"), itr);length(itr)])        
4-element Vector{Int64}:
 3
 9
 4
 3

julia> collect(partby(itr,steps))
4-element Vector{Tuple{Vararg{String}}}:
 ("ATTCCGAG", "CCCGTGGT", "TCAAGGGT")
 ("ATTAGATC", "TCTTACAC", "TTTCCGCC", "TCCGACCG", "GTCAGCTA", "CATGTTGC", "GAGGAACG", "GTCAATGC", "TACTCATT")
 ("ATACTCTA", "AATTCACA", "AATCATAT", "GTATACCT")
 ("ATTTTACT", "TTCAGAAG", "GTTGATGA")

or better

st=findall(startswith(somesubstring), itr)
steps=st[1]!=1 ? diff([1;st;length(itr)+1]) : diff([st;length(itr)+1])
collect(partby(itr,steps))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant