A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://github.com/ydataai/ydata-profiling/commit/45c47c7d514ebc58930ec9b7c3de81185ee9ba1e below:

Support Pandas future.infer_string=True in report generation (#1… · ydataai/ydata-profiling@45c47c7 · GitHub

File tree Expand file treeCollapse file tree 4 files changed

+49

-5

lines changed

Filter options

Expand file treeCollapse file tree 4 files changed

+49

-5

lines changed Original file line number Diff line number Diff line change

@@ -20,7 +20,7 @@

20 20 21 21 22 22

def get_character_counts_vc(vc: pd.Series) -> pd.Series:

23 -

series = pd.Series(vc.index, index=vc)

23 +

series = pd.Series(vc.index, index=vc, dtype=object)

24 24

characters = series[series != ""].apply(list)

25 25

characters = characters.explode()

26 26

@@ -170,7 +170,7 @@ def word_summary_vc(vc: pd.Series, stop_words: List[str] = []) -> dict:

170 170

# TODO: configurable lowercase/punctuation etc.

171 171

# TODO: remove punctuation in words

172 172 173 -

series = pd.Series(vc.index, index=vc)

173 +

series = pd.Series(vc.index, index=vc, dtype=object)

174 174

word_lists = series.str.lower().str.split()

175 175

words = word_lists.explode().str.strip(string.punctuation + string.whitespace)

176 176

word_counts = pd.Series(words.index, index=words)

@@ -188,7 +188,7 @@ def word_summary_vc(vc: pd.Series, stop_words: List[str] = []) -> dict:

188 188 189 189 190 190

def length_summary_vc(vc: pd.Series) -> dict:

191 -

series = pd.Series(vc.index, index=vc)

191 +

series = pd.Series(vc.index, index=vc, dtype=object)

192 192

length = series.str.len()

193 193

length_counts = pd.Series(length.index, index=length)

194 194

length_counts = length_counts.groupby(level=0, sort=False).sum()

Original file line number Diff line number Diff line change

@@ -10,6 +10,7 @@

10 10 11 11

from ydata_profiling.config import Settings

12 12

from ydata_profiling.model.typeset import ProfilingTypeSet

13 +

from ydata_profiling.utils.compat import optional_option_context

13 14

from ydata_profiling.utils.dataframe import sort_column_names

14 15 15 16

BaseSummarizer: Any = "BaseSummarizer" # type: ignore

@@ -38,7 +39,8 @@ def pandas_describe_1d(

38 39

"""

39 40 40 41

# Make sure pd.NA is not in the series

41 -

series = series.fillna(np.nan)

42 +

with optional_option_context("future.no_silent_downcasting", True):

43 +

series = series.fillna(np.nan).infer_objects(copy=False)

42 44 43 45

has_cast_type = _is_cast_type_defined(typeset, series.name) # type:ignore

44 46

cast_type = (

Original file line number Diff line number Diff line change

@@ -1,6 +1,8 @@

1 1

"""Utility functions for (version) compatibility"""

2 + 3 +

from contextlib import contextmanager

2 4

from functools import lru_cache

3 -

from typing import Tuple

5 +

from typing import Generator, Tuple

4 6 5 7

import pandas as pd

6 8

@@ -12,3 +14,18 @@ def pandas_version_info() -> Tuple[int, ...]:

12 14

akin to `sys.version_info` for the Python version.

13 15

"""

14 16

return tuple(int(s) for s in pd.__version__.split("."))

17 + 18 + 19 +

@contextmanager

20 +

def optional_option_context(

21 +

option_key: str, value: object

22 +

) -> Generator[None, None, None]:

23 +

"""

24 +

A context manager that sets an option only if it is available in the

25 +

current pandas version; otherwise, it is a no-op.

26 +

"""

27 +

try:

28 +

with pd.option_context(option_key, value):

29 +

yield

30 +

except pd.errors.OptionError:

31 +

yield

Original file line number Diff line number Diff line change

@@ -0,0 +1,25 @@

1 +

import pandas as pd

2 +

import pytest

3 + 4 +

from ydata_profiling import ProfileReport

5 +

from ydata_profiling.utils.compat import pandas_version_info

6 + 7 + 8 +

@pytest.fixture()

9 +

def df():

10 +

df = pd.DataFrame(

11 +

{

12 +

"foo": [1, 2, 3],

13 +

"bar": ["", "", ""],

14 +

}

15 +

)

16 +

return df

17 + 18 + 19 +

@pytest.mark.skipif(

20 +

pandas_version_info() < (2, 1, 0), reason="requires pandas 2.1 or higher"

21 +

)

22 +

def test_pd_future_infer_string(df: pd.DataFrame):

23 +

with pd.option_context("future.infer_string", True):

24 +

profile_report = ProfileReport(df, title="Test Report", progress_bar=False)

25 +

assert len(profile_report.to_html()) > 0

You can’t perform that action at this time.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4