REF: move MaskedArray subclass attributes to dtypes (#58423)

pandas-dev · Apr 29, 2024 · a2bce66 · a2bce66
1 parent cf0014a
commit a2bce66
Show file tree

Hide file tree

Showing 7 changed files with 53 additions and 63 deletions.
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -2808,14 +2808,14 @@ def maybe_convert_objects(ndarray[object] objects,
             from pandas.core.arrays import IntegerArray
 
             # Set these values to 1 to be deterministic, match
-            #  IntegerArray._internal_fill_value
+            #  IntegerDtype._internal_fill_value
             result[mask] = 1
             result = IntegerArray(result, mask)
         elif result is floats and convert_to_nullable_dtype:
             from pandas.core.arrays import FloatingArray
 
             # Set these values to 1.0 to be deterministic, match
-            #  FloatingArray._internal_fill_value
+            #  FloatingDtype._internal_fill_value
             result[mask] = 1.0
             result = FloatingArray(result, mask)
 

diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -68,6 +68,9 @@ class BooleanDtype(BaseMaskedDtype):
 
     name: ClassVar[str] = "boolean"
 
+    # The value used to fill '_data' to avoid upcasting
+    _internal_fill_value = False
+
     # https://github.com/python/mypy/issues/4125
     # error: Signature of "type" incompatible with supertype "BaseMaskedDtype"
     @property
@@ -293,13 +296,6 @@ class BooleanArray(BaseMaskedArray):
     Length: 3, dtype: boolean
     """
 
-    # The value used to fill '_data' to avoid upcasting
-    _internal_fill_value = False
-    # Fill values used for any/all
-    # Incompatible types in assignment (expression has type "bool", base class
-    # "BaseMaskedArray" defined the type as "<typing special form>")
-    _truthy_value = True  # type: ignore[assignment]
-    _falsey_value = False  # type: ignore[assignment]
     _TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"}
     _FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"}
 

diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py
@@ -23,6 +23,8 @@ class FloatingDtype(NumericDtype):
     The attributes name & type are set when these subclasses are created.
     """
 
+    # The value used to fill '_data' to avoid upcasting
+    _internal_fill_value = np.nan
     _default_np_dtype = np.dtype(np.float64)
     _checker = is_float_dtype
 
@@ -113,14 +115,6 @@ class FloatingArray(NumericArray):
 
     _dtype_cls = FloatingDtype
 
-    # The value used to fill '_data' to avoid upcasting
-    _internal_fill_value = np.nan
-    # Fill values used for any/all
-    # Incompatible types in assignment (expression has type "float", base class
-    # "BaseMaskedArray" defined the type as "<typing special form>")
-    _truthy_value = 1.0  # type: ignore[assignment]
-    _falsey_value = 0.0  # type: ignore[assignment]
-
 
 _dtype_docstring = """
 An ExtensionDtype for {dtype} data.

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -23,6 +23,8 @@ class IntegerDtype(NumericDtype):
     The attributes name & type are set when these subclasses are created.
     """
 
+    # The value used to fill '_data' to avoid upcasting
+    _internal_fill_value = 1
     _default_np_dtype = np.dtype(np.int64)
     _checker = is_integer_dtype
 
@@ -128,14 +130,6 @@ class IntegerArray(NumericArray):
 
     _dtype_cls = IntegerDtype
 
-    # The value used to fill '_data' to avoid upcasting
-    _internal_fill_value = 1
-    # Fill values used for any/all
-    # Incompatible types in assignment (expression has type "int", base class
-    # "BaseMaskedArray" defined the type as "<typing special form>")
-    _truthy_value = 1  # type: ignore[assignment]
-    _falsey_value = 0  # type: ignore[assignment]
-
 
 _dtype_docstring = """
 An ExtensionDtype for {dtype} integer data.

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -5,6 +5,7 @@
     Any,
     Callable,
     Literal,
+    cast,
     overload,
 )
 import warnings
@@ -16,22 +17,6 @@
     missing as libmissing,
 )
 from pandas._libs.tslibs import is_supported_dtype
-from pandas._typing import (
-    ArrayLike,
-    AstypeArg,
-    AxisInt,
-    DtypeObj,
-    FillnaOptions,
-    InterpolateOptions,
-    NpDtype,
-    PositionalIndexer,
-    Scalar,
-    ScalarIndexer,
-    Self,
-    SequenceIndexer,
-    Shape,
-    npt,
-)
 from pandas.compat import (
     IS64,
     is_platform_windows,
@@ -97,6 +82,20 @@
     from pandas._typing import (
         NumpySorter,
         NumpyValueArrayLike,
+        ArrayLike,
+        AstypeArg,
+        AxisInt,
+        DtypeObj,
+        FillnaOptions,
+        InterpolateOptions,
+        NpDtype,
+        PositionalIndexer,
+        Scalar,
+        ScalarIndexer,
+        Self,
+        SequenceIndexer,
+        Shape,
+        npt,
     )
     from pandas._libs.missing import NAType
     from pandas.core.arrays import FloatingArray
@@ -111,16 +110,10 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
     numpy based
     """
 
-    # The value used to fill '_data' to avoid upcasting
-    _internal_fill_value: Scalar
     # our underlying data and mask are each ndarrays
     _data: np.ndarray
     _mask: npt.NDArray[np.bool_]
 
-    # Fill values used for any/all
-    _truthy_value = Scalar  # bool(_truthy_value) = True
-    _falsey_value = Scalar  # bool(_falsey_value) = False
-
     @classmethod
     def _simple_new(cls, values: np.ndarray, mask: npt.NDArray[np.bool_]) -> Self:
         result = BaseMaskedArray.__new__(cls)
@@ -155,8 +148,9 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self:
     @classmethod
     @doc(ExtensionArray._empty)
     def _empty(cls, shape: Shape, dtype: ExtensionDtype) -> Self:
-        values = np.empty(shape, dtype=dtype.type)
-        values.fill(cls._internal_fill_value)
+        dtype = cast(BaseMaskedDtype, dtype)
+        values: np.ndarray = np.empty(shape, dtype=dtype.type)
+        values.fill(dtype._internal_fill_value)
         mask = np.ones(shape, dtype=bool)
         result = cls(values, mask)
         if not isinstance(result, cls) or dtype != result.dtype:
@@ -917,7 +911,9 @@ def take(
     ) -> Self:
         # we always fill with 1 internally
         # to avoid upcasting
-        data_fill_value = self._internal_fill_value if isna(fill_value) else fill_value
+        data_fill_value = (
+            self.dtype._internal_fill_value if isna(fill_value) else fill_value
+        )
         result = take(
             self._data,
             indexer,
@@ -1397,12 +1393,7 @@ def any(
         nv.validate_any((), kwargs)
 
         values = self._data.copy()
-        # error: Argument 3 to "putmask" has incompatible type "object";
-        # expected "Union[_SupportsArray[dtype[Any]],
-        # _NestedSequence[_SupportsArray[dtype[Any]]],
-        # bool, int, float, complex, str, bytes,
-        # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
-        np.putmask(values, self._mask, self._falsey_value)  # type: ignore[arg-type]
+        np.putmask(values, self._mask, self.dtype._falsey_value)
         result = values.any()
         if skipna:
             return result
@@ -1490,12 +1481,7 @@ def all(
         nv.validate_all((), kwargs)
 
         values = self._data.copy()
-        # error: Argument 3 to "putmask" has incompatible type "object";
-        # expected "Union[_SupportsArray[dtype[Any]],
-        # _NestedSequence[_SupportsArray[dtype[Any]]],
-        # bool, int, float, complex, str, bytes,
-        # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
-        np.putmask(values, self._mask, self._truthy_value)  # type: ignore[arg-type]
+        np.putmask(values, self._mask, self.dtype._truthy_value)
         result = values.all(axis=axis)
 
         if skipna:

diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
@@ -221,7 +221,7 @@ def _coerce_to_data_and_mask(
     # we copy as need to coerce here
     if mask.any():
         values = values.copy()
-        values[mask] = cls._internal_fill_value
+        values[mask] = dtype_cls._internal_fill_value
     if inferred_type in ("string", "unicode"):
         # casts from str are always safe since they raise
         # a ValueError if the str cannot be parsed into a float

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -79,6 +79,7 @@
         DtypeObj,
         IntervalClosedType,
         Ordered,
+        Scalar,
         Self,
         npt,
         type_t,
@@ -1551,6 +1552,25 @@ class BaseMaskedDtype(ExtensionDtype):
 
     base = None
     type: type
+    _internal_fill_value: Scalar
+
+    @property
+    def _truthy_value(self):
+        # Fill values used for 'any'
+        if self.kind == "f":
+            return 1.0
+        if self.kind in "iu":
+            return 1
+        return True
+
+    @property
+    def _falsey_value(self):
+        # Fill values used for 'all'
+        if self.kind == "f":
+            return 0.0
+        if self.kind in "iu":
+            return 0
+        return False
 
     @property
     def na_value(self) -> libmissing.NAType: