+34
-23
lines changedFilter options
+34
-23
lines changed Original file line number Diff line number Diff line change
@@ -199,7 +199,8 @@ def __initialize_dataframe(
199
199
) -> Optional[Union[pd.DataFrame, sDataFrame]]:
200
200
201
201
logger.info_def_report(
202
-
df=df, timeseries=report_config.vars.timeseries.active,
202
+
df=df,
203
+
timeseries=report_config.vars.timeseries.active,
203
204
)
204
205
205
206
if (
Original file line number Diff line number Diff line change
@@ -99,12 +99,9 @@ def convert_timestamp_to_datetime(timestamp: int) -> datetime:
99
99
return datetime(1970, 1, 1) + timedelta(seconds=int(timestamp))
100
100
101
101
102
-
def analytics_features(dataframe: str,
103
-
datatype: str,
104
-
report_type: str,
105
-
ncols: int,
106
-
nrows:int,
107
-
dbx: str) -> None:
102
+
def analytics_features(
103
+
dataframe: str, datatype: str, report_type: str, ncols: int, nrows: int, dbx: str
104
+
) -> None:
108
105
endpoint = "https://packages.ydata.ai/ydata-profiling?"
109
106
package_version = __version__
110
107
@@ -136,13 +133,15 @@ def analytics_features(dataframe: str,
136
133
137
134
requests.get(request_message)
138
135
136
+
139
137
def is_running_in_databricks():
140
-
mask = 'DATABRICKS_RUNTIME_VERSION' in os.environ
141
-
if 'DATABRICKS_RUNTIME_VERSION' in os.environ:
142
-
return os.environ['DATABRICKS_RUNTIME_VERSION']
138
+
mask = "DATABRICKS_RUNTIME_VERSION" in os.environ
139
+
if "DATABRICKS_RUNTIME_VERSION" in os.environ:
140
+
return os.environ["DATABRICKS_RUNTIME_VERSION"]
143
141
else:
144
142
return str(mask)
145
143
144
+
146
145
def calculate_nrows(df):
147
146
"""
148
147
Calculates the approx. number of rows spark dataframes
@@ -152,10 +151,15 @@ def calculate_nrows(df):
152
151
try:
153
152
n_partitions = df.rdd.getNumPartitions()
154
153
155
-
nrows = df.rdd.mapPartitionsWithIndex(
156
-
lambda idx, partition: [sum(1 for _ in partition)] if idx == 0 else [0]
157
-
).collect()[0] * n_partitions
154
+
nrows = (
155
+
df.rdd.mapPartitionsWithIndex(
156
+
lambda idx, partition: [sum(1 for _ in partition)] if idx == 0 else [0]
157
+
).collect()[0]
158
+
* n_partitions
159
+
)
158
160
except:
159
-
nrows = 0 # returns 0 in case it was not possible to compute it from the partition
161
+
nrows = (
162
+
0 # returns 0 in case it was not possible to compute it from the partition
163
+
)
160
164
161
165
return nrows
Original file line number Diff line number Diff line change
@@ -6,9 +6,11 @@
6
6
7
7
import pandas as pd
8
8
9
-
from ydata_profiling.utils.common import (calculate_nrows,
10
-
analytics_features,
11
-
is_running_in_databricks)
9
+
from ydata_profiling.utils.common import (
10
+
analytics_features,
11
+
calculate_nrows,
12
+
is_running_in_databricks,
13
+
)
12
14
13
15
14
16
class ProfilingLogger(logging.Logger):
@@ -20,22 +22,26 @@ def info_def_report(self, df, timeseries: bool) -> None: # noqa: ANN001
20
22
if isinstance(df, pd.DataFrame):
21
23
dataframe = "pandas"
22
24
report_type = "regular"
23
-
nrows=len(df)
25
+
nrows = len(df)
24
26
elif df is None:
25
27
dataframe = "pandas"
26
28
report_type = "compare"
27
-
nrows=len(df)
29
+
nrows = len(df)
28
30
else:
29
31
dataframe = "spark"
30
32
report_type = "regular"
31
-
nrows=calculate_nrows(df)
33
+
nrows = calculate_nrows(df)
32
34
33
-
dbx=is_running_in_databricks()
35
+
dbx = is_running_in_databricks()
34
36
datatype = "timeseries" if timeseries else "tabular"
35
37
36
38
analytics_features(
37
-
dataframe=dataframe, datatype=datatype, report_type=report_type,
38
-
nrows=nrows, ncols=ncols, dbx=dbx
39
+
dataframe=dataframe,
40
+
datatype=datatype,
41
+
report_type=report_type,
42
+
nrows=nrows,
43
+
ncols=ncols,
44
+
dbx=dbx,
39
45
)
40
46
41
47
super().info(
You can’t perform that action at this time.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4