42
42
43
43
from pandas.core import roperator
44
44
from pandas.core.arraylike import OpsMixin
45
+
from pandas.core.arrays._arrow_string_mixins import ArrowStringArrayMixin
45
46
from pandas.core.arrays.base import (
46
47
ExtensionArray,
47
48
ExtensionArraySupportsAnyAll,
@@ -184,7 +185,10 @@ def to_pyarrow_type(
184
185
185
186
186
187
class ArrowExtensionArray(
187
-
OpsMixin, ExtensionArraySupportsAnyAll, BaseStringArrayMethods
188
+
OpsMixin,
189
+
ExtensionArraySupportsAnyAll,
190
+
ArrowStringArrayMixin,
191
+
BaseStringArrayMethods,
188
192
):
189
193
"""
190
194
Pandas ExtensionArray backed by a PyArrow ChunkedArray.
@@ -1986,24 +1990,6 @@ def _str_count(self, pat: str, flags: int = 0):
1986
1990
raise NotImplementedError(f"count not implemented with {flags=}")
1987
1991
return type(self)(pc.count_substring_regex(self._pa_array, pat))
1988
1992
1989
-
def _str_pad(
1990
-
self,
1991
-
width: int,
1992
-
side: Literal["left", "right", "both"] = "left",
1993
-
fillchar: str = " ",
1994
-
):
1995
-
if side == "left":
1996
-
pa_pad = pc.utf8_lpad
1997
-
elif side == "right":
1998
-
pa_pad = pc.utf8_rpad
1999
-
elif side == "both":
2000
-
pa_pad = pc.utf8_center
2001
-
else:
2002
-
raise ValueError(
2003
-
f"Invalid side: {side}. Side must be one of 'left', 'right', 'both'"
2004
-
)
2005
-
return type(self)(pa_pad(self._pa_array, width=width, padding=fillchar))
2006
-
2007
1993
def _str_contains(
2008
1994
self, pat, case: bool = True, flags: int = 0, na=None, regex: bool = True
2009
1995
):
@@ -2088,26 +2074,6 @@ def _str_find(self, sub: str, start: int = 0, end: int | None = None):
2088
2074
)
2089
2075
return type(self)(result)
2090
2076
2091
-
def _str_get(self, i: int):
2092
-
lengths = pc.utf8_length(self._pa_array)
2093
-
if i >= 0:
2094
-
out_of_bounds = pc.greater_equal(i, lengths)
2095
-
start = i
2096
-
stop = i + 1
2097
-
step = 1
2098
-
else:
2099
-
out_of_bounds = pc.greater(-i, lengths)
2100
-
start = i
2101
-
stop = i - 1
2102
-
step = -1
2103
-
not_out_of_bounds = pc.invert(out_of_bounds.fill_null(True))
2104
-
selected = pc.utf8_slice_codeunits(
2105
-
self._pa_array, start=start, stop=stop, step=step
2106
-
)
2107
-
null_value = pa.scalar(None, type=self._pa_array.type)
2108
-
result = pc.if_else(not_out_of_bounds, selected, null_value)
2109
-
return type(self)(result)
2110
-
2111
2077
def _str_join(self, sep: str):
2112
2078
if pa.types.is_string(self._pa_array.type):
2113
2079
result = self._apply_elementwise(list)
@@ -2137,15 +2103,6 @@ def _str_slice(
2137
2103
pc.utf8_slice_codeunits(self._pa_array, start=start, stop=stop, step=step)
2138
2104
)
2139
2105
2140
-
def _str_slice_replace(
2141
-
self, start: int | None = None, stop: int | None = None, repl: str | None = None
2142
-
):
2143
-
if repl is None:
2144
-
repl = ""
2145
-
if start is None:
2146
-
start = 0
2147
-
return type(self)(pc.utf8_replace_slice(self._pa_array, start, stop, repl))
2148
-
2149
2106
def _str_isalnum(self):
2150
2107
return type(self)(pc.utf8_is_alnum(self._pa_array))
2151
2108
@@ -2170,18 +2127,9 @@ def _str_isspace(self):
2170
2127
def _str_istitle(self):
2171
2128
return type(self)(pc.utf8_is_title(self._pa_array))
2172
2129
2173
-
def _str_capitalize(self):
2174
-
return type(self)(pc.utf8_capitalize(self._pa_array))
2175
-
2176
-
def _str_title(self):
2177
-
return type(self)(pc.utf8_title(self._pa_array))
2178
-
2179
2130
def _str_isupper(self):
2180
2131
return type(self)(pc.utf8_is_upper(self._pa_array))
2181
2132
2182
-
def _str_swapcase(self):
2183
-
return type(self)(pc.utf8_swapcase(self._pa_array))
2184
-
2185
2133
def _str_len(self):
2186
2134
return type(self)(pc.utf8_length(self._pa_array))
2187
2135
@@ -2222,12 +2170,6 @@ def _str_removeprefix(self, prefix: str):
2222
2170
result = self._apply_elementwise(predicate)
2223
2171
return type(self)(pa.chunked_array(result))
2224
2172
2225
-
def _str_removesuffix(self, suffix: str):
2226
-
ends_with = pc.ends_with(self._pa_array, pattern=suffix)
2227
-
removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
2228
-
result = pc.if_else(ends_with, removed, self._pa_array)
2229
-
return type(self)(result)
2230
-
2231
2173
def _str_casefold(self):
2232
2174
predicate = lambda val: val.casefold()
2233
2175
result = self._apply_elementwise(predicate)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4