Skip to content

Commit

Permalink
CLN: Simplify map_infer_mask (#58483)
Browse files Browse the repository at this point in the history
* CLN: Simplify map_infer_mask

* fix some tests

* fix tests?

* Fix types?

* fixup annotations
  • Loading branch information
lithomas1 committed May 10, 2024
1 parent e67241b commit 24182c2
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 74 deletions.
16 changes: 10 additions & 6 deletions asv_bench/benchmarks/series_methods.py
Expand Up @@ -148,10 +148,14 @@ def time_searchsorted(self, dtype):


class Map:
params = (["dict", "Series", "lambda"], ["object", "category", "int"])
param_names = "mapper"

def setup(self, mapper, dtype):
params = (
["dict", "Series", "lambda"],
["object", "category", "int"],
[None, "ignore"],
)
param_names = ["mapper", "dtype", "na_action"]

def setup(self, mapper, dtype, na_action):
map_size = 1000
map_data = Series(map_size - np.arange(map_size), dtype=dtype)

Expand All @@ -168,8 +172,8 @@ def setup(self, mapper, dtype):

self.s = Series(np.random.randint(0, map_size, 10000), dtype=dtype)

def time_map(self, mapper, *args, **kwargs):
self.s.map(self.map_data)
def time_map(self, mapper, dtype, na_action):
self.s.map(self.map_data, na_action=na_action)


class Clip:
Expand Down
5 changes: 0 additions & 5 deletions pandas/_libs/dtypes.pxd
Expand Up @@ -34,8 +34,3 @@ ctypedef fused numeric_t:
ctypedef fused numeric_object_t:
numeric_t
object

ctypedef fused uint8_int64_object_t:
uint8_t
int64_t
object
70 changes: 29 additions & 41 deletions pandas/_libs/lib.pyx
Expand Up @@ -53,6 +53,7 @@ from numpy cimport (
PyArray_ITER_DATA,
PyArray_ITER_NEXT,
PyArray_IterNew,
PyArray_SETITEM,
complex128_t,
flatiter,
float64_t,
Expand All @@ -75,7 +76,6 @@ cdef extern from "pandas/parser/pd_parser.h":
PandasParser_IMPORT

from pandas._libs cimport util
from pandas._libs.dtypes cimport uint8_int64_object_t
from pandas._libs.util cimport (
INT64_MAX,
INT64_MIN,
Expand Down Expand Up @@ -2845,14 +2845,16 @@ no_default = _NoDefault.no_default # Sentinel indicating the default value.
NoDefault = Literal[_NoDefault.no_default]


@cython.boundscheck(False)
@cython.wraparound(False)
def map_infer_mask(
ndarray[object] arr,
object f,
const uint8_t[:] mask,
*,
bint convert=True,
object na_value=no_default,
cnp.dtype dtype=np.dtype(object)
ndarray arr,
object f,
const uint8_t[:] mask,
*,
bint convert=True,
object na_value=no_default,
cnp.dtype dtype=np.dtype(object)
) -> "ArrayLike":
"""
Substitute for np.vectorize with pandas-friendly dtype inference.
Expand All @@ -2875,53 +2877,39 @@ def map_infer_mask(
-------
np.ndarray or an ExtensionArray
"""
cdef Py_ssize_t n = len(arr)
result = np.empty(n, dtype=dtype)

_map_infer_mask(
result,
arr,
f,
mask,
na_value,
)
if convert:
return maybe_convert_objects(result)
else:
return result


@cython.boundscheck(False)
@cython.wraparound(False)
def _map_infer_mask(
ndarray[uint8_int64_object_t] out,
ndarray[object] arr,
object f,
const uint8_t[:] mask,
object na_value=no_default,
) -> None:
"""
Helper for map_infer_mask, split off to use fused types based on the result.
"""
cdef:
Py_ssize_t i, n
Py_ssize_t i
Py_ssize_t n = len(arr)
object val

n = len(arr)
ndarray result = np.empty(n, dtype=dtype)

flatiter arr_it = PyArray_IterNew(arr)
flatiter result_it = PyArray_IterNew(result)

for i in range(n):
if mask[i]:
if na_value is no_default:
val = arr[i]
val = PyArray_GETITEM(arr, PyArray_ITER_DATA(arr_it))
else:
val = na_value
else:
val = f(arr[i])
val = PyArray_GETITEM(arr, PyArray_ITER_DATA(arr_it))
val = f(val)

if cnp.PyArray_IsZeroDim(val):
# unbox 0-dim arrays, GH#690
val = val.item()

out[i] = val
PyArray_SETITEM(result, PyArray_ITER_DATA(result_it), val)

PyArray_ITER_NEXT(arr_it)
PyArray_ITER_NEXT(result_it)

if convert:
return maybe_convert_objects(result)
else:
return result


@cython.boundscheck(False)
Expand Down
41 changes: 19 additions & 22 deletions pandas/core/arrays/string_arrow.py
Expand Up @@ -629,28 +629,25 @@ def _str_map(
na_value = np.nan
else:
na_value = False
try:
result = lib.map_infer_mask(
arr,
f,
mask.view("uint8"),
convert=False,
na_value=na_value,
dtype=np.dtype(cast(type, dtype)),
)
return result

except ValueError:
result = lib.map_infer_mask(
arr,
f,
mask.view("uint8"),
convert=False,
na_value=na_value,
)
if convert and result.dtype == object:
result = lib.maybe_convert_objects(result)
return result

dtype = np.dtype(cast(type, dtype))
if mask.any():
# numpy int/bool dtypes cannot hold NaNs so we must convert to
# float64 for int (to match maybe_convert_objects) or
# object for bool (again to match maybe_convert_objects)
if is_integer_dtype(dtype):
dtype = np.dtype("float64")
else:
dtype = np.dtype(object)
result = lib.map_infer_mask(
arr,
f,
mask.view("uint8"),
convert=False,
na_value=na_value,
dtype=dtype,
)
return result

elif is_string_dtype(dtype) and not is_object_dtype(dtype):
# i.e. StringDtype
Expand Down

0 comments on commit 24182c2

Please sign in to comment.