A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://github.com/ydataai/ydata-profiling/commit/886be3800ce1b232270c092caa602b7c3ff6c70a below:

code formatting · ydataai/ydata-profiling@886be38 · GitHub

File tree Expand file treeCollapse file tree 3 files changed

+34

-23

lines changed

Filter options

Expand file treeCollapse file tree 3 files changed

+34

-23

lines changed Original file line number Diff line number Diff line change

@@ -199,7 +199,8 @@ def __initialize_dataframe(

199 199

) -> Optional[Union[pd.DataFrame, sDataFrame]]:

200 200 201 201

logger.info_def_report(

202 -

df=df, timeseries=report_config.vars.timeseries.active,

202 +

df=df,

203 +

timeseries=report_config.vars.timeseries.active,

203 204

)

204 205 205 206

if (

Original file line number Diff line number Diff line change

@@ -99,12 +99,9 @@ def convert_timestamp_to_datetime(timestamp: int) -> datetime:

99 99

return datetime(1970, 1, 1) + timedelta(seconds=int(timestamp))

100 100 101 101 102 -

def analytics_features(dataframe: str,

103 -

datatype: str,

104 -

report_type: str,

105 -

ncols: int,

106 -

nrows:int,

107 -

dbx: str) -> None:

102 +

def analytics_features(

103 +

dataframe: str, datatype: str, report_type: str, ncols: int, nrows: int, dbx: str

104 +

) -> None:

108 105

endpoint = "https://packages.ydata.ai/ydata-profiling?"

109 106

package_version = __version__

110 107

@@ -136,13 +133,15 @@ def analytics_features(dataframe: str,

136 133 137 134

requests.get(request_message)

138 135 136 + 139 137

def is_running_in_databricks():

140 -

mask = 'DATABRICKS_RUNTIME_VERSION' in os.environ

141 -

if 'DATABRICKS_RUNTIME_VERSION' in os.environ:

142 -

return os.environ['DATABRICKS_RUNTIME_VERSION']

138 +

mask = "DATABRICKS_RUNTIME_VERSION" in os.environ

139 +

if "DATABRICKS_RUNTIME_VERSION" in os.environ:

140 +

return os.environ["DATABRICKS_RUNTIME_VERSION"]

143 141

else:

144 142

return str(mask)

145 143 144 + 146 145

def calculate_nrows(df):

147 146

"""

148 147

Calculates the approx. number of rows spark dataframes

@@ -152,10 +151,15 @@ def calculate_nrows(df):

152 151

try:

153 152

n_partitions = df.rdd.getNumPartitions()

154 153 155 -

nrows = df.rdd.mapPartitionsWithIndex(

156 -

lambda idx, partition: [sum(1 for _ in partition)] if idx == 0 else [0]

157 -

).collect()[0] * n_partitions

154 +

nrows = (

155 +

df.rdd.mapPartitionsWithIndex(

156 +

lambda idx, partition: [sum(1 for _ in partition)] if idx == 0 else [0]

157 +

).collect()[0]

158 +

* n_partitions

159 +

)

158 160

except:

159 -

nrows = 0 # returns 0 in case it was not possible to compute it from the partition

161 +

nrows = (

162 +

0 # returns 0 in case it was not possible to compute it from the partition

163 +

)

160 164 161 165

return nrows

Original file line number Diff line number Diff line change

@@ -6,9 +6,11 @@

6 6 7 7

import pandas as pd

8 8 9 -

from ydata_profiling.utils.common import (calculate_nrows,

10 -

analytics_features,

11 -

is_running_in_databricks)

9 +

from ydata_profiling.utils.common import (

10 +

analytics_features,

11 +

calculate_nrows,

12 +

is_running_in_databricks,

13 +

)

12 14 13 15 14 16

class ProfilingLogger(logging.Logger):

@@ -20,22 +22,26 @@ def info_def_report(self, df, timeseries: bool) -> None: # noqa: ANN001

20 22

if isinstance(df, pd.DataFrame):

21 23

dataframe = "pandas"

22 24

report_type = "regular"

23 -

nrows=len(df)

25 +

nrows = len(df)

24 26

elif df is None:

25 27

dataframe = "pandas"

26 28

report_type = "compare"

27 -

nrows=len(df)

29 +

nrows = len(df)

28 30

else:

29 31

dataframe = "spark"

30 32

report_type = "regular"

31 -

nrows=calculate_nrows(df)

33 +

nrows = calculate_nrows(df)

32 34 33 -

dbx=is_running_in_databricks()

35 +

dbx = is_running_in_databricks()

34 36

datatype = "timeseries" if timeseries else "tabular"

35 37 36 38

analytics_features(

37 -

dataframe=dataframe, datatype=datatype, report_type=report_type,

38 -

nrows=nrows, ncols=ncols, dbx=dbx

39 +

dataframe=dataframe,

40 +

datatype=datatype,

41 +

report_type=report_type,

42 +

nrows=nrows,

43 +

ncols=ncols,

44 +

dbx=dbx,

39 45

)

40 46 41 47

super().info(

You can’t perform that action at this time.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4