+49
-5
lines changedFilter options
+49
-5
lines changed Original file line number Diff line number Diff line change
@@ -20,7 +20,7 @@
20
20
21
21
22
22
def get_character_counts_vc(vc: pd.Series) -> pd.Series:
23
-
series = pd.Series(vc.index, index=vc)
23
+
series = pd.Series(vc.index, index=vc, dtype=object)
24
24
characters = series[series != ""].apply(list)
25
25
characters = characters.explode()
26
26
@@ -170,7 +170,7 @@ def word_summary_vc(vc: pd.Series, stop_words: List[str] = []) -> dict:
170
170
# TODO: configurable lowercase/punctuation etc.
171
171
# TODO: remove punctuation in words
172
172
173
-
series = pd.Series(vc.index, index=vc)
173
+
series = pd.Series(vc.index, index=vc, dtype=object)
174
174
word_lists = series.str.lower().str.split()
175
175
words = word_lists.explode().str.strip(string.punctuation + string.whitespace)
176
176
word_counts = pd.Series(words.index, index=words)
@@ -188,7 +188,7 @@ def word_summary_vc(vc: pd.Series, stop_words: List[str] = []) -> dict:
188
188
189
189
190
190
def length_summary_vc(vc: pd.Series) -> dict:
191
-
series = pd.Series(vc.index, index=vc)
191
+
series = pd.Series(vc.index, index=vc, dtype=object)
192
192
length = series.str.len()
193
193
length_counts = pd.Series(length.index, index=length)
194
194
length_counts = length_counts.groupby(level=0, sort=False).sum()
Original file line number Diff line number Diff line change
@@ -10,6 +10,7 @@
10
10
11
11
from ydata_profiling.config import Settings
12
12
from ydata_profiling.model.typeset import ProfilingTypeSet
13
+
from ydata_profiling.utils.compat import optional_option_context
13
14
from ydata_profiling.utils.dataframe import sort_column_names
14
15
15
16
BaseSummarizer: Any = "BaseSummarizer" # type: ignore
@@ -38,7 +39,8 @@ def pandas_describe_1d(
38
39
"""
39
40
40
41
# Make sure pd.NA is not in the series
41
-
series = series.fillna(np.nan)
42
+
with optional_option_context("future.no_silent_downcasting", True):
43
+
series = series.fillna(np.nan).infer_objects(copy=False)
42
44
43
45
has_cast_type = _is_cast_type_defined(typeset, series.name) # type:ignore
44
46
cast_type = (
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
1
1
"""Utility functions for (version) compatibility"""
2
+
3
+
from contextlib import contextmanager
2
4
from functools import lru_cache
3
-
from typing import Tuple
5
+
from typing import Generator, Tuple
4
6
5
7
import pandas as pd
6
8
@@ -12,3 +14,18 @@ def pandas_version_info() -> Tuple[int, ...]:
12
14
akin to `sys.version_info` for the Python version.
13
15
"""
14
16
return tuple(int(s) for s in pd.__version__.split("."))
17
+
18
+
19
+
@contextmanager
20
+
def optional_option_context(
21
+
option_key: str, value: object
22
+
) -> Generator[None, None, None]:
23
+
"""
24
+
A context manager that sets an option only if it is available in the
25
+
current pandas version; otherwise, it is a no-op.
26
+
"""
27
+
try:
28
+
with pd.option_context(option_key, value):
29
+
yield
30
+
except pd.errors.OptionError:
31
+
yield
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
1
+
import pandas as pd
2
+
import pytest
3
+
4
+
from ydata_profiling import ProfileReport
5
+
from ydata_profiling.utils.compat import pandas_version_info
6
+
7
+
8
+
@pytest.fixture()
9
+
def df():
10
+
df = pd.DataFrame(
11
+
{
12
+
"foo": [1, 2, 3],
13
+
"bar": ["", "", ""],
14
+
}
15
+
)
16
+
return df
17
+
18
+
19
+
@pytest.mark.skipif(
20
+
pandas_version_info() < (2, 1, 0), reason="requires pandas 2.1 or higher"
21
+
)
22
+
def test_pd_future_infer_string(df: pd.DataFrame):
23
+
with pd.option_context("future.infer_string", True):
24
+
profile_report = ProfileReport(df, title="Test Report", progress_bar=False)
25
+
assert len(profile_report.to_html()) > 0
You can’t perform that action at this time.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4