Skip to content

Commit

Permalink
REF: move MaskedArray subclass attributes to dtypes (#58423)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Apr 29, 2024
1 parent cf0014a commit a2bce66
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 63 deletions.
4 changes: 2 additions & 2 deletions pandas/_libs/lib.pyx
Expand Up @@ -2808,14 +2808,14 @@ def maybe_convert_objects(ndarray[object] objects,
from pandas.core.arrays import IntegerArray

# Set these values to 1 to be deterministic, match
# IntegerArray._internal_fill_value
# IntegerDtype._internal_fill_value
result[mask] = 1
result = IntegerArray(result, mask)
elif result is floats and convert_to_nullable_dtype:
from pandas.core.arrays import FloatingArray

# Set these values to 1.0 to be deterministic, match
# FloatingArray._internal_fill_value
# FloatingDtype._internal_fill_value
result[mask] = 1.0
result = FloatingArray(result, mask)

Expand Down
10 changes: 3 additions & 7 deletions pandas/core/arrays/boolean.py
Expand Up @@ -68,6 +68,9 @@ class BooleanDtype(BaseMaskedDtype):

name: ClassVar[str] = "boolean"

# The value used to fill '_data' to avoid upcasting
_internal_fill_value = False

# https://github.com/python/mypy/issues/4125
# error: Signature of "type" incompatible with supertype "BaseMaskedDtype"
@property
Expand Down Expand Up @@ -293,13 +296,6 @@ class BooleanArray(BaseMaskedArray):
Length: 3, dtype: boolean
"""

# The value used to fill '_data' to avoid upcasting
_internal_fill_value = False
# Fill values used for any/all
# Incompatible types in assignment (expression has type "bool", base class
# "BaseMaskedArray" defined the type as "<typing special form>")
_truthy_value = True # type: ignore[assignment]
_falsey_value = False # type: ignore[assignment]
_TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"}
_FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"}

Expand Down
10 changes: 2 additions & 8 deletions pandas/core/arrays/floating.py
Expand Up @@ -23,6 +23,8 @@ class FloatingDtype(NumericDtype):
The attributes name & type are set when these subclasses are created.
"""

# The value used to fill '_data' to avoid upcasting
_internal_fill_value = np.nan
_default_np_dtype = np.dtype(np.float64)
_checker = is_float_dtype

Expand Down Expand Up @@ -113,14 +115,6 @@ class FloatingArray(NumericArray):

_dtype_cls = FloatingDtype

# The value used to fill '_data' to avoid upcasting
_internal_fill_value = np.nan
# Fill values used for any/all
# Incompatible types in assignment (expression has type "float", base class
# "BaseMaskedArray" defined the type as "<typing special form>")
_truthy_value = 1.0 # type: ignore[assignment]
_falsey_value = 0.0 # type: ignore[assignment]


_dtype_docstring = """
An ExtensionDtype for {dtype} data.
Expand Down
10 changes: 2 additions & 8 deletions pandas/core/arrays/integer.py
Expand Up @@ -23,6 +23,8 @@ class IntegerDtype(NumericDtype):
The attributes name & type are set when these subclasses are created.
"""

# The value used to fill '_data' to avoid upcasting
_internal_fill_value = 1
_default_np_dtype = np.dtype(np.int64)
_checker = is_integer_dtype

Expand Down Expand Up @@ -128,14 +130,6 @@ class IntegerArray(NumericArray):

_dtype_cls = IntegerDtype

# The value used to fill '_data' to avoid upcasting
_internal_fill_value = 1
# Fill values used for any/all
# Incompatible types in assignment (expression has type "int", base class
# "BaseMaskedArray" defined the type as "<typing special form>")
_truthy_value = 1 # type: ignore[assignment]
_falsey_value = 0 # type: ignore[assignment]


_dtype_docstring = """
An ExtensionDtype for {dtype} integer data.
Expand Down
60 changes: 23 additions & 37 deletions pandas/core/arrays/masked.py
Expand Up @@ -5,6 +5,7 @@
Any,
Callable,
Literal,
cast,
overload,
)
import warnings
Expand All @@ -16,22 +17,6 @@
missing as libmissing,
)
from pandas._libs.tslibs import is_supported_dtype
from pandas._typing import (
ArrayLike,
AstypeArg,
AxisInt,
DtypeObj,
FillnaOptions,
InterpolateOptions,
NpDtype,
PositionalIndexer,
Scalar,
ScalarIndexer,
Self,
SequenceIndexer,
Shape,
npt,
)
from pandas.compat import (
IS64,
is_platform_windows,
Expand Down Expand Up @@ -97,6 +82,20 @@
from pandas._typing import (
NumpySorter,
NumpyValueArrayLike,
ArrayLike,
AstypeArg,
AxisInt,
DtypeObj,
FillnaOptions,
InterpolateOptions,
NpDtype,
PositionalIndexer,
Scalar,
ScalarIndexer,
Self,
SequenceIndexer,
Shape,
npt,
)
from pandas._libs.missing import NAType
from pandas.core.arrays import FloatingArray
Expand All @@ -111,16 +110,10 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
numpy based
"""

# The value used to fill '_data' to avoid upcasting
_internal_fill_value: Scalar
# our underlying data and mask are each ndarrays
_data: np.ndarray
_mask: npt.NDArray[np.bool_]

# Fill values used for any/all
_truthy_value = Scalar # bool(_truthy_value) = True
_falsey_value = Scalar # bool(_falsey_value) = False

@classmethod
def _simple_new(cls, values: np.ndarray, mask: npt.NDArray[np.bool_]) -> Self:
result = BaseMaskedArray.__new__(cls)
Expand Down Expand Up @@ -155,8 +148,9 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self:
@classmethod
@doc(ExtensionArray._empty)
def _empty(cls, shape: Shape, dtype: ExtensionDtype) -> Self:
values = np.empty(shape, dtype=dtype.type)
values.fill(cls._internal_fill_value)
dtype = cast(BaseMaskedDtype, dtype)
values: np.ndarray = np.empty(shape, dtype=dtype.type)
values.fill(dtype._internal_fill_value)
mask = np.ones(shape, dtype=bool)
result = cls(values, mask)
if not isinstance(result, cls) or dtype != result.dtype:
Expand Down Expand Up @@ -917,7 +911,9 @@ def take(
) -> Self:
# we always fill with 1 internally
# to avoid upcasting
data_fill_value = self._internal_fill_value if isna(fill_value) else fill_value
data_fill_value = (
self.dtype._internal_fill_value if isna(fill_value) else fill_value
)
result = take(
self._data,
indexer,
Expand Down Expand Up @@ -1397,12 +1393,7 @@ def any(
nv.validate_any((), kwargs)

values = self._data.copy()
# error: Argument 3 to "putmask" has incompatible type "object";
# expected "Union[_SupportsArray[dtype[Any]],
# _NestedSequence[_SupportsArray[dtype[Any]]],
# bool, int, float, complex, str, bytes,
# _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
np.putmask(values, self._mask, self._falsey_value) # type: ignore[arg-type]
np.putmask(values, self._mask, self.dtype._falsey_value)
result = values.any()
if skipna:
return result
Expand Down Expand Up @@ -1490,12 +1481,7 @@ def all(
nv.validate_all((), kwargs)

values = self._data.copy()
# error: Argument 3 to "putmask" has incompatible type "object";
# expected "Union[_SupportsArray[dtype[Any]],
# _NestedSequence[_SupportsArray[dtype[Any]]],
# bool, int, float, complex, str, bytes,
# _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
np.putmask(values, self._mask, self._truthy_value) # type: ignore[arg-type]
np.putmask(values, self._mask, self.dtype._truthy_value)
result = values.all(axis=axis)

if skipna:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/numeric.py
Expand Up @@ -221,7 +221,7 @@ def _coerce_to_data_and_mask(
# we copy as need to coerce here
if mask.any():
values = values.copy()
values[mask] = cls._internal_fill_value
values[mask] = dtype_cls._internal_fill_value
if inferred_type in ("string", "unicode"):
# casts from str are always safe since they raise
# a ValueError if the str cannot be parsed into a float
Expand Down
20 changes: 20 additions & 0 deletions pandas/core/dtypes/dtypes.py
Expand Up @@ -79,6 +79,7 @@
DtypeObj,
IntervalClosedType,
Ordered,
Scalar,
Self,
npt,
type_t,
Expand Down Expand Up @@ -1551,6 +1552,25 @@ class BaseMaskedDtype(ExtensionDtype):

base = None
type: type
_internal_fill_value: Scalar

@property
def _truthy_value(self):
# Fill values used for 'any'
if self.kind == "f":
return 1.0
if self.kind in "iu":
return 1
return True

@property
def _falsey_value(self):
# Fill values used for 'all'
if self.kind == "f":
return 0.0
if self.kind in "iu":
return 0
return False

@property
def na_value(self) -> libmissing.NAType:
Expand Down

0 comments on commit a2bce66

Please sign in to comment.