A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://github.com/pandas-dev/pandas/commit/67e8c4c3761ab1da4b0a341a472c0fe2ea393e8b below:

DTI/DTA.astype support non-nano (#47579) · pandas-dev/pandas@67e8c4c · GitHub

File tree Expand file treeCollapse file tree 12 files changed

+97

-14

lines changed

Filter options

Expand file treeCollapse file tree 12 files changed

+97

-14

lines changed Original file line number Diff line number Diff line change

@@ -275,7 +275,9 @@ Other enhancements

275 275

- :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, and :class:`.IndexingError` are now exposed in ``pandas.errors`` (:issue:`27656`)

276 276

- Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`)

277 277

- Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files)

278 +

- :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`)

278 279

- :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`)

280 +

-

279 281 280 282

.. ---------------------------------------------------------------------------

281 283

.. _whatsnew_150.notable_bug_fixes:

Original file line number Diff line number Diff line change

@@ -30,12 +30,14 @@

30 30

"get_unit_from_dtype",

31 31

"periods_per_day",

32 32

"periods_per_second",

33 +

"is_supported_unit",

33 34

]

34 35 35 36

from pandas._libs.tslibs import dtypes

36 37

from pandas._libs.tslibs.conversion import localize_pydatetime

37 38

from pandas._libs.tslibs.dtypes import (

38 39

Resolution,

40 +

is_supported_unit,

39 41

periods_per_day,

40 42

periods_per_second,

41 43

)

Original file line number Diff line number Diff line change

@@ -7,6 +7,7 @@ _period_code_map: dict[str, int]

7 7 8 8

def periods_per_day(reso: int) -> int: ...

9 9

def periods_per_second(reso: int) -> int: ...

10 +

def is_supported_unit(reso: int) -> bool: ...

10 11 11 12

class PeriodDtypeBase:

12 13

_dtype_code: int # PeriodDtypeCode

Original file line number Diff line number Diff line change

@@ -277,6 +277,15 @@ class NpyDatetimeUnit(Enum):

277 277

NPY_FR_GENERIC = NPY_DATETIMEUNIT.NPY_FR_GENERIC

278 278 279 279 280 +

def is_supported_unit(NPY_DATETIMEUNIT reso):

281 +

return (

282 +

reso == NPY_DATETIMEUNIT.NPY_FR_ns

283 +

or reso == NPY_DATETIMEUNIT.NPY_FR_us

284 +

or reso == NPY_DATETIMEUNIT.NPY_FR_ms

285 +

or reso == NPY_DATETIMEUNIT.NPY_FR_s

286 +

)

287 + 288 + 280 289

cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit):

281 290

if unit == NPY_DATETIMEUNIT.NPY_FR_ns or unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC:

282 291

# generic -> default to nanoseconds

Original file line number Diff line number Diff line change

@@ -31,6 +31,7 @@

31 31

get_unit_from_dtype,

32 32

ints_to_pydatetime,

33 33

is_date_array_normalized,

34 +

is_supported_unit,

34 35

is_unitless,

35 36

normalize_i8_timestamps,

36 37

timezones,

@@ -603,12 +604,26 @@ def astype(self, dtype, copy: bool = True):

603 604

return self.copy()

604 605

return self

605 606 607 +

elif (

608 +

self.tz is None

609 +

and is_datetime64_dtype(dtype)

610 +

and not is_unitless(dtype)

611 +

and is_supported_unit(get_unit_from_dtype(dtype))

612 +

):

613 +

# unit conversion e.g. datetime64[s]

614 +

res_values = astype_overflowsafe(self._ndarray, dtype, copy=True)

615 +

return type(self)._simple_new(res_values, dtype=res_values.dtype)

616 +

# TODO: preserve freq?

617 + 606 618

elif is_datetime64_ns_dtype(dtype):

607 619

return astype_dt64_to_dt64tz(self, dtype, copy, via_utc=False)

608 620 609 -

elif self.tz is None and is_datetime64_dtype(dtype) and dtype != self.dtype:

610 -

# unit conversion e.g. datetime64[s]

611 -

return self._ndarray.astype(dtype)

621 +

elif self.tz is not None and isinstance(dtype, DatetimeTZDtype):

622 +

# tzaware unit conversion e.g. datetime64[s, UTC]

623 +

np_dtype = np.dtype(dtype.str)

624 +

res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy)

625 +

return type(self)._simple_new(res_values, dtype=dtype)

626 +

# TODO: preserve freq?

612 627 613 628

elif is_period_dtype(dtype):

614 629

return self.to_period(freq=dtype.freq)

Original file line number Diff line number Diff line change

@@ -15,6 +15,7 @@

15 15

import numpy as np

16 16 17 17

from pandas._libs import lib

18 +

from pandas._libs.tslibs import is_unitless

18 19

from pandas._libs.tslibs.timedeltas import array_to_timedelta64

19 20

from pandas._typing import (

20 21

ArrayLike,

@@ -280,6 +281,20 @@ def astype_array_safe(

280 281

# Ensure we don't end up with a PandasArray

281 282

dtype = dtype.numpy_dtype

282 283 284 +

if (

285 +

is_datetime64_dtype(values.dtype)

286 +

# need to do np.dtype check instead of is_datetime64_dtype

287 +

# otherwise pyright complains

288 +

and isinstance(dtype, np.dtype)

289 +

and dtype.kind == "M"

290 +

and not is_unitless(dtype)

291 +

and not is_dtype_equal(dtype, values.dtype)

292 +

):

293 +

# unit conversion, we would re-cast to nanosecond, so this is

294 +

# effectively just a copy (regardless of copy kwd)

295 +

# TODO(2.0): remove special-case

296 +

return values.copy()

297 + 283 298

try:

284 299

new_values = astype_array(values, dtype, copy=copy)

285 300

except (ValueError, TypeError):

Original file line number Diff line number Diff line change

@@ -966,7 +966,9 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool:

966 966

tipo = get_dtype(arr_or_dtype.dtype)

967 967

else:

968 968

return False

969 -

return tipo == DT64NS_DTYPE or getattr(tipo, "base", None) == DT64NS_DTYPE

969 +

return tipo == DT64NS_DTYPE or (

970 +

isinstance(tipo, DatetimeTZDtype) and tipo._unit == "ns"

971 +

)

970 972 971 973 972 974

def is_timedelta64_ns_dtype(arr_or_dtype) -> bool:

Original file line number Diff line number Diff line change

@@ -1064,16 +1064,6 @@ def astype(self, dtype, copy: bool = True):

1064 1064

# Ensure that self.astype(self.dtype) is self

1065 1065

return self.copy() if copy else self

1066 1066 1067 -

if (

1068 -

self.dtype == np.dtype("M8[ns]")

1069 -

and isinstance(dtype, np.dtype)

1070 -

and dtype.kind == "M"

1071 -

and dtype != np.dtype("M8[ns]")

1072 -

):

1073 -

# For now DatetimeArray supports this by unwrapping ndarray,

1074 -

# but DatetimeIndex doesn't

1075 -

raise TypeError(f"Cannot cast {type(self).__name__} to dtype")

1076 - 1077 1067

values = self._data

1078 1068

if isinstance(values, ExtensionArray):

1079 1069

with rewrite_exception(type(values).__name__, type(self).__name__):

Original file line number Diff line number Diff line change

@@ -48,6 +48,7 @@

48 48

from pandas.core.dtypes.common import (

49 49

is_datetime64_dtype,

50 50

is_datetime64tz_dtype,

51 +

is_dtype_equal,

51 52

is_scalar,

52 53

)

53 54

from pandas.core.dtypes.missing import is_valid_na_for_dtype

@@ -338,6 +339,18 @@ def __new__(

338 339

if copy:

339 340

data = data.copy()

340 341

return cls._simple_new(data, name=name)

342 +

elif (

343 +

isinstance(data, DatetimeArray)

344 +

and freq is lib.no_default

345 +

and tz is None

346 +

and is_dtype_equal(data.dtype, dtype)

347 +

):

348 +

# Reached via Index.__new__ when we call .astype

349 +

# TODO(2.0): special casing can be removed once _from_sequence_not_strict

350 +

# no longer chokes on non-nano

351 +

if copy:

352 +

data = data.copy()

353 +

return cls._simple_new(data, name=name)

341 354 342 355

dtarr = DatetimeArray._from_sequence_not_strict(

343 356

data,

Original file line number Diff line number Diff line change

@@ -207,6 +207,36 @@ def test_cmp_dt64_arraylike_tznaive(self, comparison_op):

207 207 208 208 209 209

class TestDatetimeArray:

210 +

def test_astype_non_nano_tznaive(self):

211 +

dti = pd.date_range("2016-01-01", periods=3)

212 + 213 +

res = dti.astype("M8[s]")

214 +

assert res.dtype == "M8[s]"

215 + 216 +

dta = dti._data

217 +

res = dta.astype("M8[s]")

218 +

assert res.dtype == "M8[s]"

219 +

assert isinstance(res, pd.core.arrays.DatetimeArray) # used to be ndarray

220 + 221 +

def test_astype_non_nano_tzaware(self):

222 +

dti = pd.date_range("2016-01-01", periods=3, tz="UTC")

223 + 224 +

res = dti.astype("M8[s, US/Pacific]")

225 +

assert res.dtype == "M8[s, US/Pacific]"

226 + 227 +

dta = dti._data

228 +

res = dta.astype("M8[s, US/Pacific]")

229 +

assert res.dtype == "M8[s, US/Pacific]"

230 + 231 +

# from non-nano to non-nano, preserving reso

232 +

res2 = res.astype("M8[s, UTC]")

233 +

assert res2.dtype == "M8[s, UTC]"

234 +

assert not tm.shares_memory(res2, res)

235 + 236 +

res3 = res.astype("M8[s, UTC]", copy=False)

237 +

assert res2.dtype == "M8[s, UTC]"

238 +

assert tm.shares_memory(res3, res)

239 + 210 240

def test_astype_to_same(self):

211 241

arr = DatetimeArray._from_sequence(

212 242

["2000"], dtype=DatetimeTZDtype(tz="US/Central")

You can’t perform that action at this time.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4