Skip to content

Commit

Permalink
Backport PR #52577 on branch 2.0.x (BUG: describe not respecting Arro…
Browse files Browse the repository at this point in the history
…wDtype in include/exclude) (#52879)

Backport PR #52577: BUG: describe not respecting ArrowDtype in include/exclude

Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
  • Loading branch information
meeseeksmachine and phofl committed Apr 23, 2023
1 parent 22f9e93 commit 6e7efb4
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 0 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.1.rst
Expand Up @@ -38,6 +38,7 @@ Bug fixes
- Bug in :func:`to_datetime` and :func:`to_timedelta` when trying to convert numeric data with a :class:`ArrowDtype` (:issue:`52425`)
- Bug in :func:`to_numeric` with ``errors='coerce'`` and ``dtype_backend='pyarrow'`` with :class:`ArrowDtype` data (:issue:`52588`)
- Bug in :meth:`ArrowDtype.__from_arrow__` not respecting if dtype is explicitly given (:issue:`52533`)
- Bug in :meth:`DataFrame.describe` not respecting ``ArrowDtype`` in ``include`` and ``exclude`` (:issue:`52570`)
- Bug in :meth:`DataFrame.max` and related casting different :class:`Timestamp` resolutions always to nanoseconds (:issue:`52524`)
- Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`)
- Bug in :meth:`Series.dt.tz_localize` incorrectly localizing timestamps with :class:`ArrowDtype` (:issue:`52677`)
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/dtypes/common.py
Expand Up @@ -1565,6 +1565,10 @@ def infer_dtype_from_object(dtype) -> type:
except TypeError:
# Should still pass if we don't have a date-like
pass
if hasattr(dtype, "numpy_dtype"):
# TODO: Implement this properly
# https://github.com/pandas-dev/pandas/issues/52576
return dtype.numpy_dtype.type
return dtype.type

try:
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/frame.py
Expand Up @@ -171,6 +171,7 @@
PeriodArray,
TimedeltaArray,
)
from pandas.core.arrays.arrow import ArrowDtype
from pandas.core.arrays.sparse import SparseFrameAccessor
from pandas.core.construction import (
ensure_wrapped_if_datetimelike,
Expand Down Expand Up @@ -4695,6 +4696,7 @@ def check_int_infer_dtype(dtypes):

def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool:
# GH 46870: BooleanDtype._is_numeric == True but should be excluded
dtype = dtype if not isinstance(dtype, ArrowDtype) else dtype.numpy_dtype
return issubclass(dtype.type, tuple(dtypes_set)) or (
np.number in dtypes_set
and getattr(dtype, "_is_numeric", False)
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/frame/methods/test_describe.py
Expand Up @@ -395,3 +395,23 @@ def test_ea_with_na(self, any_numeric_ea_dtype):
dtype="Float64",
)
tm.assert_frame_equal(result, expected)

def test_describe_exclude_pa_dtype(self):
# GH#52570
pa = pytest.importorskip("pyarrow")
df = DataFrame(
{
"a": Series([1, 2, 3], dtype=pd.ArrowDtype(pa.int8())),
"b": Series([1, 2, 3], dtype=pd.ArrowDtype(pa.int16())),
"c": Series([1, 2, 3], dtype=pd.ArrowDtype(pa.int32())),
}
)
result = df.describe(
include=pd.ArrowDtype(pa.int8()), exclude=pd.ArrowDtype(pa.int32())
)
expected = DataFrame(
{"a": [3, 2, 1, 1, 1.5, 2, 2.5, 3]},
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
dtype=pd.ArrowDtype(pa.float64()),
)
tm.assert_frame_equal(result, expected)

0 comments on commit 6e7efb4

Please sign in to comment.