-
Notifications
You must be signed in to change notification settings - Fork 5
/
fastremap.pyx
1431 lines (1163 loc) · 38 KB
/
fastremap.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# cython: language_level=3
"""
Functions related to remapping image volumes.
Renumber volumes into smaller data types, mask out labels
or their complement, and remap the values of image volumes.
This module also constains the facilities for performing
and in-place matrix transposition for up to 4D arrays. This is
helpful for converting between C and Fortran order in memory
constrained environments when format shifting.
Author: William Silversmith
Affiliation: Seung Lab, Princeton Neuroscience Institute
Date: August 2018 - May 2022
"""
from typing import Sequence, List
cimport cython
from libc.stdint cimport (
uint8_t, uint16_t, uint32_t, uint64_t,
int8_t, int16_t, int32_t, int64_t,
uintptr_t
)
cimport fastremap
from collections import defaultdict
from functools import reduce
import operator
import numpy as np
cimport numpy as cnp
__version__ = '1.14.0'
__VERSION__ = __version__
ctypedef fused UINT:
uint8_t
uint16_t
uint32_t
uint64_t
ctypedef fused ALLINT:
UINT
int8_t
int16_t
int32_t
int64_t
ctypedef fused ALLINT_2:
ALLINT
ctypedef fused NUMBER:
ALLINT
float
double
ctypedef fused COMPLEX_NUMBER:
NUMBER
float complex
ctypedef fused COUNT_T:
uint32_t
uint64_t
cdef extern from "ipt.hpp" namespace "pyipt":
cdef void _ipt2d[T](T* arr, size_t sx, size_t sy)
cdef void _ipt3d[T](
T* arr, size_t sx, size_t sy, size_t sz
)
cdef void _ipt4d[T](
T* arr, size_t sx, size_t sy, size_t sz, size_t sw
)
def minmax(arr):
"""
Returns (min(arr), max(arr)) computed in a single pass.
Returns (None, None) if array is size zero.
"""
return _minmax(reshape(arr, (arr.size,)))
def _minmax(cnp.ndarray[NUMBER, ndim=1] arr):
cdef size_t i = 0
cdef size_t size = arr.size
if size == 0:
return None, None
cdef NUMBER minval = arr[0]
cdef NUMBER maxval = arr[0]
for i in range(1, size):
if minval > arr[i]:
minval = arr[i]
if maxval < arr[i]:
maxval = arr[i]
return minval, maxval
def match_array_orders(*arrs, order="K"):
if len(arrs) == 0:
return []
if order == "C" or (order == "K" and arrs[0].flags.c_contiguous):
return [ np.ascontiguousarray(arr) for arr in arrs ]
else:
return [ np.asfortranarray(arr) for arr in arrs ]
def renumber(arr, start=1, preserve_zero=True, in_place=False):
"""
renumber(arr, start=1, preserve_zero=True, in_place=False)
Given an array of integers, renumber all the unique values starting
from 1. This can allow us to reduce the size of the data width required
to represent it.
arr: A numpy array
start (default: 1): Start renumbering from this value
preserve_zero (default: True): Don't renumber zero.
in_place (default: False): Perform the renumbering in-place to avoid
an extra copy. This option depends on a fortran or C contiguous
array. A copy will be made if the array is not contiguous.
Return: a renumbered array, dict with remapping of oldval => newval
"""
if arr.size == 0:
return arr, {}
if arr.dtype == bool and preserve_zero:
return arr, { 0: 0, 1: start }
elif arr.dtype == bool:
arr = arr.view(np.uint8)
cdef int nbytes = np.dtype(arr.dtype).itemsize
shape = arr.shape
order = 'F' if arr.flags['F_CONTIGUOUS'] else 'C'
in_place = in_place and (arr.flags['F_CONTIGUOUS'] or arr.flags['C_CONTIGUOUS'])
if not in_place:
arr = np.copy(arr, order=order)
arr = np.lib.stride_tricks.as_strided(arr, shape=(arr.size,), strides=(nbytes,))
arr, remap_dict = _renumber(arr, <int64_t>start, preserve_zero)
arr = reshape(arr, shape, order)
return arr, remap_dict
def reshape(arr, shape, order=None):
"""
If the array is contiguous, attempt an in place reshape
rather than potentially making a copy.
Required:
arr: The input numpy array.
shape: The desired shape (must be the same size as arr)
Optional:
order: 'C', 'F', or None (determine automatically)
Returns: reshaped array
"""
if order is None:
if arr.flags['F_CONTIGUOUS']:
order = 'F'
elif arr.flags['C_CONTIGUOUS']:
order = 'C'
else:
return arr.reshape(shape)
cdef int nbytes = np.dtype(arr.dtype).itemsize
if order == 'C':
strides = [ reduce(operator.mul, shape[i:]) * nbytes for i in range(1, len(shape)) ]
strides += [ nbytes ]
return np.lib.stride_tricks.as_strided(arr, shape=shape, strides=strides)
else:
strides = [ reduce(operator.mul, shape[:i]) * nbytes for i in range(1, len(shape)) ]
strides = [ nbytes ] + strides
return np.lib.stride_tricks.as_strided(arr, shape=shape, strides=strides)
@cython.boundscheck(False)
@cython.wraparound(False) # turn off negative index wrapping for entire function
@cython.nonecheck(False)
def _renumber(cnp.ndarray[NUMBER, cast=True, ndim=1] arr, int64_t start=1, preserve_zero=True):
"""
renumber(arr, int64_t start=1, preserve_zero=True)
Given an array of integers, renumber all the unique values starting
from 1. This can allow us to reduce the size of the data width required
to represent it.
arr: A numpy array
start (default: 1): Start renumbering from this value
preserve_zero (default ):
Return: a renumbered array, dict with remapping of oldval => newval
"""
cdef flat_hash_map[NUMBER, NUMBER] remap_dict
if arr.size == 0:
return refit(np.zeros((0,), dtype=arr.dtype), 0), {}
remap_dict.reserve(1024)
if preserve_zero:
remap_dict[0] = 0
cdef NUMBER[:] arrview = arr
cdef NUMBER remap_id = start
cdef NUMBER elem
# some value that isn't the first value
# and won't cause an overflow
cdef NUMBER last_elem = <NUMBER>(~<uint64_t>arr[0])
cdef NUMBER last_remap_id = start
cdef size_t size = arr.size
cdef size_t i = 0
for i in range(size):
elem = arrview[i]
if elem == last_elem:
arrview[i] = last_remap_id
continue
if remap_dict.find(elem) == remap_dict.end():
arrview[i] = remap_id
remap_dict[elem] = remap_id
remap_id += 1
else:
arrview[i] = remap_dict[elem]
last_elem = elem
last_remap_id = arrview[i]
factor = remap_id
if abs(start) > abs(factor):
factor = start
return refit(arr, factor), { k:v for k,v in remap_dict }
def refit(arr, value=None, increase_only=False, exotics=False):
"""
Resize the array to the smallest dtype of the
same kind that will fit a given value.
For example, if the input array is uint8 and
the value is 2^20 return the array as a
uint32.
Works for standard floating, integer,
unsigned integer, and complex types.
arr: numpy array
value: value to fit array to. if None,
it is set to the value of the absolutely
larger of the min and max value in the array.
increase_only: if true, only resize the array if it can't
contain value. if false, always resize to the
smallest size that fits.
exotics: if true, allow e.g. half precision floats (16-bit)
or double complex (128-bit)
Return: refitted array
"""
if value is None:
min_value, max_value = minmax(arr)
if min_value is None or max_value is None:
min_value = 0
max_value = 0
if abs(max_value) > abs(min_value):
value = max_value
else:
value = min_value
dtype = fit_dtype(arr.dtype, value, exotics=exotics)
if increase_only and np.dtype(dtype).itemsize <= np.dtype(arr.dtype).itemsize:
return arr
elif dtype == arr.dtype:
return arr
return arr.astype(dtype)
def fit_dtype(dtype, value, exotics=False):
"""
Find the smallest dtype of the
same kind that will fit a given value.
For example, if the input array is uint8 and
the value is 2^20 return the array as a
uint32.
Works for standard floating, integer,
unsigned integer, and complex types.
exotics: if True, allow fitting to
e.g. float16 (half-precision, 16-bits)
or double complex (which takes 128-bits).
Return: refitted array
"""
dtype = np.dtype(dtype)
if np.issubdtype(dtype, np.floating):
if exotics:
sequence = [ np.float16, np.float32, np.float64 ]
else:
sequence = [ np.float32, np.float64 ]
infofn = np.finfo
elif np.issubdtype(dtype, np.unsignedinteger):
sequence = [ np.uint8, np.uint16, np.uint32, np.uint64 ]
infofn = np.iinfo
if value < 0:
raise ValueError(str(value) + " is negative but unsigned data type {} is selected.".format(dtype))
elif np.issubdtype(dtype, np.complexfloating):
if exotics:
sequence = [ np.csingle, np.cdouble ]
else:
sequence = [ np.csingle ]
infofn = np.finfo
elif np.issubdtype(dtype, np.integer):
sequence = [ np.int8, np.int16, np.int32, np.int64 ]
infofn = np.iinfo
else:
raise ValueError(
"Unsupported dtype: {} Only standard floats, integers, and complex types are supported.".format(dtype)
)
test_value = np.real(value)
if abs(np.real(value)) < abs(np.imag(value)):
test_value = np.imag(value)
for seq_dtype in sequence:
if test_value >= 0 and infofn(seq_dtype).max >= test_value:
return seq_dtype
elif test_value < 0 and infofn(seq_dtype).min <= test_value:
return seq_dtype
raise ValueError("Unable to find a compatible dtype for {} that can fit {}".format(
dtype, value
))
def mask(arr, labels, in_place=False, value=0):
"""
mask(arr, labels, in_place=False, value=0)
Mask out designated labels in an array with the
given value.
Alternative implementation of:
arr[np.isin(labels)] = value
arr: an N-dimensional numpy array
labels: an iterable list of integers
in_place: if True, modify the input array to reduce
memory consumption.
value: mask value
Returns: arr with `labels` masked out
"""
labels = { lbl: value for lbl in labels }
return remap(arr, labels, preserve_missing_labels=True, in_place=in_place)
def mask_except(arr, labels, in_place=False, value=0):
"""
mask_except(arr, labels, in_place=False, value=0)
Mask out all labels except the provided list.
Alternative implementation of:
arr[~np.isin(labels)] = value
arr: an N-dimensional numpy array
labels: an iterable list of integers
in_place: if True, modify the input array to reduce
memory consumption.
value: mask value
Returns: arr with all labels except `labels` masked out
"""
shape = arr.shape
if arr.flags['F_CONTIGUOUS']:
order = 'F'
else:
order = 'C'
if not in_place:
arr = np.copy(arr, order=order)
arr = reshape(arr, (arr.size,))
arr = _mask_except(arr, labels, value)
return reshape(arr, shape, order=order)
@cython.boundscheck(False)
@cython.wraparound(False) # turn off negative index wrapping for entire function
@cython.nonecheck(False)
def _mask_except(cnp.ndarray[ALLINT] arr, list labels, ALLINT value):
cdef ALLINT[:] arrview = arr
cdef size_t i = 0
cdef size_t size = arr.size
if size == 0:
return arr
cdef flat_hash_map[ALLINT, ALLINT] tbl
for label in labels:
tbl[label] = label
cdef ALLINT last_elem = arrview[0]
cdef ALLINT last_elem_value = 0
if tbl.find(last_elem) == tbl.end():
last_elem_value = value
else:
last_elem_value = last_elem
for i in range(size):
if arrview[i] == last_elem:
arrview[i] = last_elem_value
elif tbl.find(arrview[i]) == tbl.end():
last_elem = arrview[i]
last_elem_value = value
arrview[i] = value
else:
last_elem = arrview[i]
last_elem_value = arrview[i]
return arr
def component_map(component_labels, parent_labels):
"""
Given two sets of images that have a surjective mapping between their labels,
generate a dictionary for that mapping.
For example, generate a mapping from connected components of labels to their
parent labels.
e.g. component_map([ 1, 2, 3, 4 ], [ 5, 5, 6, 7 ])
returns { 1: 5, 2: 5, 3: 6, 4: 7 }
Returns: { $COMPONENT_LABEL: $PARENT_LABEL }
"""
if not isinstance(component_labels, np.ndarray):
component_labels = np.array(component_labels)
if not isinstance(parent_labels, np.ndarray):
parent_labels = np.array(parent_labels)
if component_labels.size == 0:
return {}
if component_labels.shape != parent_labels.shape:
raise ValueError("The size of the inputs must match: {} vs {}".format(
component_labels.shape, parent_labels.shape
))
shape = component_labels.shape
component_labels, parent_labels = match_array_orders(
component_labels, parent_labels
)
component_labels = reshape(component_labels, (component_labels.size,))
parent_labels = reshape(parent_labels, (parent_labels.size,))
return _component_map(component_labels, parent_labels)
@cython.boundscheck(False)
@cython.wraparound(False) # turn off negative index wrapping for entire function
@cython.nonecheck(False)
def _component_map(
cnp.ndarray[ALLINT, ndim=1, cast=True] component_labels,
cnp.ndarray[ALLINT_2, ndim=1, cast=True] parent_labels
):
cdef size_t size = component_labels.size
if size == 0:
return {}
cdef dict remap = {}
cdef size_t i = 0
cdef ALLINT last_label = component_labels[0]
remap[component_labels[0]] = parent_labels[0]
for i in range(size):
if last_label == component_labels[i]:
continue
remap[component_labels[i]] = parent_labels[i]
last_label = component_labels[i]
return remap
def inverse_component_map(parent_labels, component_labels):
"""
Given two sets of images that have a mapping between their labels,
generate a dictionary for that mapping.
For example, generate a mapping from connected components of labels to their
parent labels.
e.g. inverse_component_map([ 1, 2, 1, 3 ], [ 4, 4, 5, 6 ])
returns { 1: [ 4, 5 ], 2: [ 4 ], 3: [ 6 ] }
Returns: { $PARENT_LABEL: [ $COMPONENT_LABELS, ... ] }
"""
if not isinstance(component_labels, np.ndarray):
component_labels = np.array(component_labels)
if not isinstance(parent_labels, np.ndarray):
parent_labels = np.array(parent_labels)
if component_labels.size == 0:
return {}
if component_labels.shape != parent_labels.shape:
raise ValueError("The size of the inputs must match: {} vs {}".format(
component_labels.shape, parent_labels.shape
))
shape = component_labels.shape
component_labels, parent_labels = match_array_orders(
component_labels, parent_labels
)
component_labels = reshape(component_labels, (component_labels.size,))
parent_labels = reshape(parent_labels, (parent_labels.size,))
return _inverse_component_map(parent_labels, component_labels)
@cython.boundscheck(False)
@cython.wraparound(False) # turn off negative index wrapping for entire function
@cython.nonecheck(False)
def _inverse_component_map(
cnp.ndarray[ALLINT, ndim=1, cast=True] parent_labels,
cnp.ndarray[ALLINT_2, ndim=1, cast=True] component_labels
):
cdef size_t size = parent_labels.size
if size == 0:
return {}
remap = defaultdict(set)
cdef size_t i = 0
cdef ALLINT last_label = parent_labels[0]
cdef ALLINT_2 last_component = component_labels[0]
remap[parent_labels[0]].add(component_labels[0])
for i in range(size):
if last_label == parent_labels[i] and last_component == component_labels[i]:
continue
remap[parent_labels[i]].add(component_labels[i])
last_label = parent_labels[i]
last_component = component_labels[i]
# for backwards compatibility
for key in remap:
remap[key] = list(remap[key])
remap.default_factory = list
return remap
def remap(arr, table, preserve_missing_labels=False, in_place=False):
"""
remap(cnp.ndarray[COMPLEX_NUMBER] arr, dict table,
preserve_missing_labels=False, in_place=False)
Remap an input numpy array in-place according to the values in the given
dictionary "table".
arr: an N-dimensional numpy array
table: { label: new_label_value, ... }
preserve_missing_labels: If an array value is not present in "table"...
True: Leave it alone.
False: Throw a KeyError.
in_place: if True, modify the input array to reduce
memory consumption.
Returns: remapped array
"""
if type(arr) == list:
arr = np.array(arr)
shape = arr.shape
if arr.flags['F_CONTIGUOUS']:
order = 'F'
else:
order = 'C'
original_dtype = arr.dtype
if len(table):
min_label, max_label = min(table.values()), max(table.values())
fit_value = min_label if abs(min_label) > abs(max_label) else max_label
arr = refit(arr, fit_value, increase_only=True)
if not in_place and original_dtype == arr.dtype:
arr = np.copy(arr, order=order)
if all([ k == v for k,v in table.items() ]) and preserve_missing_labels:
return arr
arr = reshape(arr, (arr.size,))
arr = _remap(arr, table, preserve_missing_labels)
return reshape(arr, shape, order=order)
@cython.boundscheck(False)
@cython.wraparound(False) # turn off negative index wrapping for entire function
@cython.nonecheck(False)
def _remap(cnp.ndarray[NUMBER] arr, dict table, uint8_t preserve_missing_labels):
cdef NUMBER[:] arrview = arr
cdef size_t i = 0
cdef size_t size = arr.size
cdef NUMBER elem = 0
if size == 0:
return arr
# fast path for remapping only a single label
# e.g. for masking something out
cdef NUMBER before = 0
cdef NUMBER after = 0
if preserve_missing_labels and len(table) == 1:
before = next(iter(table.keys()))
after = table[before]
if before == after:
return arr
for i in range(size):
if arr[i] == before:
arr[i] = after
return arr
cdef flat_hash_map[NUMBER, NUMBER] tbl
for k, v in table.items():
tbl[k] = v
cdef NUMBER last_elem = arrview[0]
cdef NUMBER last_remap_id = 0
with nogil:
if tbl.find(last_elem) == tbl.end():
if not preserve_missing_labels:
raise KeyError("{} was not in the remap table.".format(last_elem))
else:
last_remap_id = last_elem
else:
arrview[0] = tbl[last_elem]
last_remap_id = arrview[0]
for i in range(1, size):
elem = arrview[i]
if elem == last_elem:
arrview[i] = last_remap_id
continue
if tbl.find(elem) == tbl.end():
if preserve_missing_labels:
last_elem = elem
last_remap_id = elem
continue
else:
raise KeyError("{} was not in the remap table.".format(elem))
else:
arrview[i] = tbl[elem]
last_elem = elem
last_remap_id = arrview[i]
return arr
@cython.boundscheck(False)
def remap_from_array(cnp.ndarray[UINT] arr, cnp.ndarray[UINT] vals):
"""
remap_from_array(cnp.ndarray[UINT] arr, cnp.ndarray[UINT] vals)
"""
cdef UINT[:] valview = vals
cdef UINT[:] arrview = arr
cdef size_t i = 0
cdef size_t size = arr.size
cdef size_t maxkey = vals.size - 1
cdef UINT elem
with nogil:
for i in range(size):
elem = arr[i]
if elem < 0 or elem > maxkey:
continue
arrview[i] = vals[elem]
return arr
@cython.boundscheck(False)
def remap_from_array_kv(cnp.ndarray[ALLINT] arr, cnp.ndarray[ALLINT] keys, cnp.ndarray[ALLINT] vals):
"""
remap_from_array_kv(cnp.ndarray[ALLINT] arr, cnp.ndarray[ALLINT] keys, cnp.ndarray[ALLINT] vals)
"""
cdef ALLINT[:] keyview = keys
cdef ALLINT[:] valview = vals
cdef ALLINT[:] arrview = arr
cdef flat_hash_map[ALLINT, ALLINT] remap_dict
assert keys.size == vals.size
cdef size_t i = 0
cdef size_t size = keys.size
cdef ALLINT elem
with nogil:
for i in range(size):
remap_dict[keys[i]] = vals[i]
i = 0
size = arr.size
with nogil:
for i in range(size):
elem = arr[i]
if remap_dict.find(elem) == remap_dict.end():
continue
else:
arrview[i] = remap_dict[elem]
return arr
def pixel_pairs(labels):
"""
Computes the number of matching adjacent memory locations.
This is useful for rapidly evaluating whether an image is
more binary or more connectomics like.
"""
if labels.size == 0:
return 0
return _pixel_pairs(reshape(labels, (labels.size,)))
def _pixel_pairs(cnp.ndarray[ALLINT, ndim=1] labels):
cdef size_t voxels = labels.size
cdef size_t pairs = 0
cdef ALLINT label = labels[0]
cdef size_t i = 0
for i in range(1, voxels):
if label == labels[i]:
pairs += 1
else:
label = labels[i]
return pairs
@cython.binding(True)
def unique(labels, return_index=False, return_inverse=False, return_counts=False, axis=None):
"""
Compute the sorted set of unique labels in the input array.
return_index: also return the index of the first detected occurance
of each label.
return_counts: also return the unique label frequency as an array.
Returns:
if return_counts or return_index:
return (unique_labels, unique_index, unique_counts)
else:
return unique_labels
"""
if not isinstance(labels, np.ndarray):
labels = np.array(labels)
if not np.issubdtype(labels.dtype, np.integer):
raise TypeError("fastremap.unique only supports integer types.")
# These flags are currently unsupported so call uncle and
# use the standard implementation instead.
if return_inverse or (axis is not None):
return np.unique(
labels,
return_index=return_index,
return_inverse=return_inverse,
return_counts=return_counts,
axis=axis
)
cdef size_t voxels = labels.size
shape = labels.shape
fortran_order = labels.flags['F_CONTIGUOUS']
labels_orig = labels
labels = reshape(labels, (voxels,))
cdef int64_t max_label
cdef int64_t min_label
if voxels > 0:
min_label, max_label = minmax(labels)
else:
min_label, max_label = (0, 0)
def c_order_index(index):
if len(shape) > 1 and fortran_order:
return np.ravel_multi_index(
np.unravel_index(index, shape, order='F'),
shape, order='C'
)
return index
if voxels == 0:
uniq = np.array([], dtype=labels.dtype)
counts = np.array([], dtype=np.uint32)
index = np.array([], dtype=np.uint64)
elif min_label >= 0 and max_label < <int64_t>voxels:
uniq, index, counts = unique_via_array(labels, max_label, return_index=return_index)
elif (max_label - min_label) <= <int64_t>voxels:
uniq, index, counts = unique_via_shifted_array(labels, min_label, max_label, return_index=return_index)
elif float(pixel_pairs(labels)) / float(voxels) > 0.66:
uniq, index, counts = unique_via_renumber(labels, return_index=return_index)
elif return_index:
return np.unique(labels_orig, return_index=return_index, return_counts=return_counts)
else:
uniq, counts = unique_via_sort(labels)
results = [ uniq ]
if return_index:
# This is required to match numpy's behavior
results.append(c_order_index(index))
if return_counts:
results.append(counts)
if len(results) > 1:
return tuple(results)
return uniq
def unique_via_shifted_array(labels, min_label=None, max_label=None, return_index=False):
if min_label is None or max_label is None:
min_label, max_label = minmax(labels)
labels -= min_label
uniq, idx, counts = unique_via_array(labels, max_label - min_label + 1, return_index)
labels += min_label
uniq += min_label
return uniq, idx, counts
def unique_via_renumber(labels, return_index=False):
dtype = labels.dtype
labels, remap = renumber(labels)
remap = { v:k for k,v in remap.items() }
uniq, idx, counts = unique_via_array(labels, max(remap.keys()), return_index)
uniq = np.array([ remap[segid] for segid in uniq ], dtype=dtype)
return uniq, idx, counts
def unique_via_sort(labels):
if labels.size > np.iinfo(np.uint32).max:
return _unique_via_sort(labels, <uint64_t>0)
else:
return _unique_via_sort(labels, <uint32_t>0)
@cython.boundscheck(False)
@cython.wraparound(False) # turn off negative index wrapping for entire function
@cython.nonecheck(False)
def _unique_via_sort(cnp.ndarray[ALLINT, ndim=1] labels, COUNT_T dummy):
"""Slower than unique_via_array but can handle any label."""
labels = np.copy(labels)
labels.sort()
cdef size_t voxels = labels.size
count_dtype = np.uint32
if sizeof(COUNT_T) == 8:
count_dtype = np.uint64
cdef cnp.ndarray[ALLINT, ndim=1] uniq = np.zeros((voxels,), dtype=labels.dtype)
cdef cnp.ndarray[COUNT_T, ndim=1] counts = np.zeros((voxels,), dtype=count_dtype)
cdef size_t i = 0
cdef size_t j = 0
cdef ALLINT cur = labels[0]
cdef size_t accum = 1
for i in range(1, voxels):
if cur == labels[i]:
accum += 1
else:
uniq[j] = cur
counts[j] = accum
accum = 1
cur = labels[i]
j += 1
uniq[j] = cur
counts[j] = accum
return uniq[:j+1], counts[:j+1]
def unique_via_array(labels, max_label, return_index=False):
"""
unique(cnp.ndarray[ALLINT, ndim=1] labels, return_counts=False)
Faster implementation of np.unique that depends
on the maximum label in the array being less than
the size of the array.
"""
if labels.size > np.iinfo(np.uint32).max:
return _unique_via_array(labels, max_label, return_index, <uint64_t>0)
else:
return _unique_via_array(labels, max_label, return_index, <uint32_t>0)
@cython.boundscheck(False)
@cython.wraparound(False) # turn off negative index wrapping for entire function
@cython.nonecheck(False)
def _unique_via_array(
cnp.ndarray[ALLINT, ndim=1] labels,
size_t max_label,
return_index,
COUNT_T dummy
):
count_dtype = np.uint32
if sizeof(COUNT_T) == 8:
count_dtype = np.uint64
cdef cnp.ndarray[COUNT_T, ndim=1] counts = np.zeros(
(max_label+1,), dtype=count_dtype
)
cdef cnp.ndarray[uintptr_t, ndim=1] index
cdef uintptr_t sentinel = np.iinfo(np.uintp).max
if return_index:
index = np.zeros(
(max_label+1,), dtype=np.uintp
) + sentinel
cdef size_t voxels = labels.shape[0]
cdef size_t i = 0
for i in range(voxels):
counts[labels[i]] += 1
if return_index:
for i in range(voxels):
if index[labels[i]] == sentinel:
index[labels[i]] = i
cdef size_t real_size = 0
for i in range(max_label + 1):
if counts[i] > 0:
real_size += 1
cdef cnp.ndarray[ALLINT, ndim=1] segids = np.zeros(
(real_size,), dtype=labels.dtype
)
cdef cnp.ndarray[uint32_t, ndim=1] cts = np.zeros(
(real_size,), dtype=np.uint32
)
cdef cnp.ndarray[uintptr_t, ndim=1] idx
cdef size_t j = 0
for i in range(max_label + 1):
if counts[i] > 0:
segids[j] = i
cts[j] = counts[i]
j += 1
if return_index:
idx = np.zeros( (real_size,), dtype=np.uintp)
j = 0
for i in range(max_label + 1):
if counts[i] > 0:
idx[j] = index[i]
j += 1
if return_index:
return segids, idx, cts
else:
return segids, None, cts
def transpose(arr):
"""
transpose(arr)
For up to four dimensional matrices, perform in-place transposition.
Square matrices up to three dimensions are faster than numpy's out-of-place
algorithm. Default to the out-of-place implementation numpy uses for cases
that aren't specially handled.
Returns: transposed numpy array
"""
if not arr.flags['F_CONTIGUOUS'] and not arr.flags['C_CONTIGUOUS']:
arr = np.copy(arr, order='C')
shape = arr.shape
strides = arr.strides
cdef int nbytes = np.dtype(arr.dtype).itemsize
dtype = arr.dtype
if arr.dtype == bool:
arr = arr.view(np.uint8)
if arr.ndim == 2:
arr = ipt2d(arr)
return arr.view(dtype)
elif arr.ndim == 3: