RetroSearch Browse

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Showing content from https://github.com/ydataai/ydata-profiling/commit/886be3800ce1b232270c092caa602b7c3ff6c70a below:

code formatting · ydataai/ydata-profiling@886be38 · GitHub

File tree Expand file treeCollapse file tree 3 files changed

+34

-23

lines changed

Filter options

Expand file treeCollapse file tree 3 files changed

+34

-23

lines changed Original file line number Diff line number Diff line change


 @@ -199,7 +199,8 @@ def __initialize_dataframe(

199 199


 ) -> Optional[Union[pd.DataFrame, sDataFrame]]:

200 200 201 201


 logger.info_def_report(

202

-
 df=df, timeseries=report_config.vars.timeseries.active,

202

+
 df=df,

203

+
 timeseries=report_config.vars.timeseries.active,

203 204

204 205 205 206


 if (

Original file line number Diff line number Diff line change


 @@ -99,12 +99,9 @@ def convert_timestamp_to_datetime(timestamp: int) -> datetime:

99 99


 return datetime(1970, 1, 1) + timedelta(seconds=int(timestamp))

100 100 101 101 102

-
 def analytics_features(dataframe: str,

103

-
 datatype: str,

104

-
 report_type: str,

105

-
 ncols: int,

106

-
 nrows:int,

107

-
 dbx: str) -> None:

102

+
 def analytics_features(

103

+
 dataframe: str, datatype: str, report_type: str, ncols: int, nrows: int, dbx: str

104

+
 ) -> None:

108 105


 endpoint = "https://packages.ydata.ai/ydata-profiling?"

109 106


 package_version = __version__

110 107


 @@ -136,13 +133,15 @@ def analytics_features(dataframe: str,

136 133 137 134


 requests.get(request_message)

138 135 136 + 139 137


 def is_running_in_databricks():

140

-
 mask = 'DATABRICKS_RUNTIME_VERSION' in os.environ

141

-
 if 'DATABRICKS_RUNTIME_VERSION' in os.environ:

142

-
 return os.environ['DATABRICKS_RUNTIME_VERSION']

138

+
 mask = "DATABRICKS_RUNTIME_VERSION" in os.environ

139

+
 if "DATABRICKS_RUNTIME_VERSION" in os.environ:

140

+
 return os.environ["DATABRICKS_RUNTIME_VERSION"]

143 141


 else:

144 142


 return str(mask)

145 143 144 + 146 145


 def calculate_nrows(df):

147 146

"""

148 147


  Calculates the approx. number of rows spark dataframes


 @@ -152,10 +151,15 @@ def calculate_nrows(df):

152 151


 try:

153 152


 n_partitions = df.rdd.getNumPartitions()

154 153 155

-
 nrows = df.rdd.mapPartitionsWithIndex(

156

-
 lambda idx, partition: [sum(1 for _ in partition)] if idx == 0 else [0]

157

-
 ).collect()[0] * n_partitions

154

+
 nrows = (

155

+
 df.rdd.mapPartitionsWithIndex(

156

+
 lambda idx, partition: [sum(1 for _ in partition)] if idx == 0 else [0]

157

+
 ).collect()[0]

158

+
 * n_partitions

159

+
 )

158 160


 except:

159

-
 nrows = 0 # returns 0 in case it was not possible to compute it from the partition

161

+
 nrows = (

162

+
 0 # returns 0 in case it was not possible to compute it from the partition

163

+
 )

160 164 161 165


 return nrows

Original file line number Diff line number Diff line change


 @@ -6,9 +6,11 @@

6 6 7 7


 import pandas as pd

8 8 9

-
 from ydata_profiling.utils.common import (calculate_nrows,

10

-
 analytics_features,

11

-
 is_running_in_databricks)

9

+
 from ydata_profiling.utils.common import (

10

+
 analytics_features,

11

+
 calculate_nrows,

12

+
 is_running_in_databricks,

13

+
 )

12 14 13 15 14 16


 class ProfilingLogger(logging.Logger):


 @@ -20,22 +22,26 @@ def info_def_report(self, df, timeseries: bool) -> None: # noqa: ANN001

20 22


 if isinstance(df, pd.DataFrame):

21 23


 dataframe = "pandas"

22 24


 report_type = "regular"

23

-
 nrows=len(df)

25

+
 nrows = len(df)

24 26


 elif df is None:

25 27


 dataframe = "pandas"

26 28


 report_type = "compare"

27

-
 nrows=len(df)

29

+
 nrows = len(df)

28 30


 else:

29 31


 dataframe = "spark"

30 32


 report_type = "regular"

31

-
 nrows=calculate_nrows(df)

33

+
 nrows = calculate_nrows(df)

32 34 33

-
 dbx=is_running_in_databricks()

35

+
 dbx = is_running_in_databricks()

34 36


 datatype = "timeseries" if timeseries else "tabular"

35 37 36 38


 analytics_features(

37

-
 dataframe=dataframe, datatype=datatype, report_type=report_type,

38

-
 nrows=nrows, ncols=ncols, dbx=dbx

39

+
 dataframe=dataframe,

40

+
 datatype=datatype,

41

+
 report_type=report_type,

42

+
 nrows=nrows,

43

+
 ncols=ncols,

44

+
 dbx=dbx,

39 45

40 46 41 47


 super().info(

You can’t perform that action at this time.

RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4