1
1
from rest_framework import serializers
2
2
from pandas import DataFrame
3
+
from pandas.api.types import is_numeric_dtype
3
4
from django.core.exceptions import ImproperlyConfigured
4
5
import datetime
6
+
from collections import OrderedDict
5
7
6
8
7
9
class PandasSerializer(serializers.ListSerializer):
@@ -222,12 +224,15 @@ def get_index(self, dataframe):
222
224
group_field = self.get_group_field()
223
225
date_field = self.get_date_field()
224
226
header_fields = self.get_header_fields()
227
+
extra_index_fields = self.get_extra_index_fields()
225
228
229
+
index = []
226
230
if date_field:
227
-
group_fields = [date_field, group_field]
228
-
else:
229
-
group_fields = [group_field]
230
-
return group_fields + header_fields
231
+
index.append(date_field)
232
+
index += extra_index_fields
233
+
index.append(group_field)
234
+
index += header_fields
235
+
return index
231
236
232
237
def transform_dataframe(self, dataframe):
233
238
"""
@@ -255,35 +260,30 @@ def transform_dataframe(self, dataframe):
255
260
interval = None
256
261
257
262
# Compute stats for each column, potentially grouped by year
258
-
all_stats = []
263
+
series_infos = OrderedDict()
259
264
for header, series in groups.items():
260
265
if interval:
261
266
series_stats = self.boxplots_for_interval(series, interval)
262
267
else:
263
-
interval = None
264
268
series_stats = [self.compute_boxplot(series)]
265
269
266
-
series_infos = []
267
270
for series_stat in series_stats:
268
-
series_info = {}
269
271
if isinstance(header, tuple):
270
272
value_name = header[0]
271
273
col_values = header[1:]
272
274
else:
273
275
value_name = header
274
276
col_values = []
275
-
col_names = zip(dataframe.columns.names[1:], col_values)
276
-
for col_name, value in col_names:
277
-
series_info[col_name] = value
277
+
col_names = tuple(zip(dataframe.columns.names[1:], col_values))
278
+
if interval in series_stat:
279
+
col_names += ((interval, series_stat[interval]),)
280
+
series_infos.setdefault(col_names, dict(col_names))
281
+
series_info = series_infos[col_names]
278
282
for stat_name, val in series_stat.items():
279
-
if stat_name == interval:
280
-
series_info[stat_name] = val
281
-
else:
283
+
if stat_name != interval:
282
284
series_info[value_name + '-' + stat_name] = val
283
-
series_infos.append(series_info)
284
-
all_stats += series_infos
285
285
286
-
dataframe = DataFrame(all_stats)
286
+
dataframe = DataFrame(list(series_infos.values()))
287
287
if 'series' in grouping:
288
288
index = header_fields + [group_field]
289
289
unstack = len(header_fields)
@@ -336,11 +336,19 @@ def compute_boxplot(self, series):
336
336
series = series[series.notnull()]
337
337
if len(series.values) == 0:
338
338
return {}
339
+
elif not is_numeric_dtype(series):
340
+
return self.non_numeric_stats(series)
339
341
stats = boxplot_stats(list(series.values))[0]
340
342
stats['count'] = len(series.values)
341
343
stats['fliers'] = "|".join(map(str, stats['fliers']))
342
344
return stats
343
345
346
+
def non_numeric_stats(self, series):
347
+
return {
348
+
'count': len(series),
349
+
'mode': series.mode()[0],
350
+
}
351
+
344
352
def get_group_field(self):
345
353
"""
346
354
Categorical field to group datasets by.
@@ -359,6 +367,12 @@ def get_header_fields(self):
359
367
"""
360
368
return self.get_meta_option('boxplot_header', [])
361
369
370
+
def get_extra_index_fields(self):
371
+
"""
372
+
Fields that identify each row but don't need to be considered for plot
373
+
"""
374
+
return self.get_meta_option('boxplot_extra_index', [])
375
+
362
376
363
377
class SimpleSerializer(serializers.Serializer):
364
378
"""
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4