A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://github.com/pandas-dev/pandas/commit/2198f51911dd5a5a0cc1c12c3f083b86c96bd33b below:

add and register Arrow extension types for Period and Interval (… · pandas-dev/pandas@2198f51 · GitHub

1 +

from distutils.version import LooseVersion

2 +

import json

3 + 4 +

import numpy as np

5 +

import pyarrow

6 + 7 +

from pandas.core.arrays.interval import _VALID_CLOSED

8 + 9 +

_pyarrow_version_ge_015 = LooseVersion(pyarrow.__version__) >= LooseVersion("0.15")

10 + 11 + 12 +

def pyarrow_array_to_numpy_and_mask(arr, dtype):

13 +

"""

14 +

Convert a primitive pyarrow.Array to a numpy array and boolean mask based

15 +

on the buffers of the Array.

16 + 17 +

Parameters

18 +

----------

19 +

arr : pyarrow.Array

20 +

dtype : numpy.dtype

21 + 22 +

Returns

23 +

-------

24 +

(data, mask)

25 +

Tuple of two numpy arrays with the raw data (with specified dtype) and

26 +

a boolean mask (validity mask, so False means missing)

27 +

"""

28 +

buflist = arr.buffers()

29 +

data = np.frombuffer(buflist[1], dtype=dtype)[arr.offset : arr.offset + len(arr)]

30 +

bitmask = buflist[0]

31 +

if bitmask is not None:

32 +

mask = pyarrow.BooleanArray.from_buffers(

33 +

pyarrow.bool_(), len(arr), [None, bitmask]

34 +

)

35 +

mask = np.asarray(mask)

36 +

else:

37 +

mask = np.ones(len(arr), dtype=bool)

38 +

return data, mask

39 + 40 + 41 +

if _pyarrow_version_ge_015:

42 +

# the pyarrow extension types are only available for pyarrow 0.15+

43 + 44 +

class ArrowPeriodType(pyarrow.ExtensionType):

45 +

def __init__(self, freq):

46 +

# attributes need to be set first before calling

47 +

# super init (as that calls serialize)

48 +

self._freq = freq

49 +

pyarrow.ExtensionType.__init__(self, pyarrow.int64(), "pandas.period")

50 + 51 +

@property

52 +

def freq(self):

53 +

return self._freq

54 + 55 +

def __arrow_ext_serialize__(self):

56 +

metadata = {"freq": self.freq}

57 +

return json.dumps(metadata).encode()

58 + 59 +

@classmethod

60 +

def __arrow_ext_deserialize__(cls, storage_type, serialized):

61 +

metadata = json.loads(serialized.decode())

62 +

return ArrowPeriodType(metadata["freq"])

63 + 64 +

def __eq__(self, other):

65 +

if isinstance(other, pyarrow.BaseExtensionType):

66 +

return type(self) == type(other) and self.freq == other.freq

67 +

else:

68 +

return NotImplemented

69 + 70 +

def __hash__(self):

71 +

return hash((str(self), self.freq))

72 + 73 +

# register the type with a dummy instance

74 +

_period_type = ArrowPeriodType("D")

75 +

pyarrow.register_extension_type(_period_type)

76 + 77 +

class ArrowIntervalType(pyarrow.ExtensionType):

78 +

def __init__(self, subtype, closed):

79 +

# attributes need to be set first before calling

80 +

# super init (as that calls serialize)

81 +

assert closed in _VALID_CLOSED

82 +

self._closed = closed

83 +

if not isinstance(subtype, pyarrow.DataType):

84 +

subtype = pyarrow.type_for_alias(str(subtype))

85 +

self._subtype = subtype

86 + 87 +

storage_type = pyarrow.struct([("left", subtype), ("right", subtype)])

88 +

pyarrow.ExtensionType.__init__(self, storage_type, "pandas.interval")

89 + 90 +

@property

91 +

def subtype(self):

92 +

return self._subtype

93 + 94 +

@property

95 +

def closed(self):

96 +

return self._closed

97 + 98 +

def __arrow_ext_serialize__(self):

99 +

metadata = {"subtype": str(self.subtype), "closed": self.closed}

100 +

return json.dumps(metadata).encode()

101 + 102 +

@classmethod

103 +

def __arrow_ext_deserialize__(cls, storage_type, serialized):

104 +

metadata = json.loads(serialized.decode())

105 +

subtype = pyarrow.type_for_alias(metadata["subtype"])

106 +

closed = metadata["closed"]

107 +

return ArrowIntervalType(subtype, closed)

108 + 109 +

def __eq__(self, other):

110 +

if isinstance(other, pyarrow.BaseExtensionType):

111 +

return (

112 +

type(self) == type(other)

113 +

and self.subtype == other.subtype

114 +

and self.closed == other.closed

115 +

)

116 +

else:

117 +

return NotImplemented

118 + 119 +

def __hash__(self):

120 +

return hash((str(self), str(self.subtype), self.closed))

121 + 122 +

# register the type with a dummy instance

123 +

_interval_type = ArrowIntervalType(pyarrow.int64(), "left")

124 +

pyarrow.register_extension_type(_interval_type)


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4