diff --git a/api/api_v1/endpoints/query.py b/api/api_v1/endpoints/query.py index 2521912..27a6974 100644 --- a/api/api_v1/endpoints/query.py +++ b/api/api_v1/endpoints/query.py @@ -351,6 +351,7 @@ async def scatter_model( df = await ckdb.query_dataframe(sql) interval_type = res['interval_type'] analysis = res['analysis'] + groupby = res['groupby'] quota_interval_arr = res['quota_interval_arr'] if analysis != 'number_of_days': max_v = int(df['values'].max()) @@ -369,15 +370,30 @@ async def scatter_model( resp['label'].append(f'[{quota_interval_arr[i]},{v})') bins.append(v) - summary_data = None - # todo 整体的,需要补充分组的 + # 这是整体的 for key, tmp_df in df.groupby('date'): bins_s = pd.cut(tmp_df['values'], bins=bins, right=False).value_counts() bins_s.sort_index(inplace=True) total = int(bins_s.sum()) - resp['list'][key.strftime('%Y-%m-%d')] = {'n': bins_s.to_list(), 'total': total, - 'p': round(bins_s * 100 / total, 2).to_list()} + resp['list'][key.strftime('%Y-%m-%d')] = dict() + resp['list'][key.strftime('%Y-%m-%d')]['总体'] = {'n': bins_s.to_list(), 'total': total, + 'p': round(bins_s * 100 / total, 2).to_list(), + 'title': '总体'} + # 分组的 + if groupby: + for key, tmp_df in df.groupby(['date', *groupby]): + bins_s = pd.cut(tmp_df['values'], bins=bins, + right=False).value_counts() + bins_s.sort_index(inplace=True) + total = int(bins_s.sum()) + title = '.'.join(key[1:]) + date = key[0] + resp['list'][date.strftime('%Y-%m-%d')][title] = {'n': bins_s.to_list(), 'total': total, + 'p': round(bins_s * 100 / total, 2).to_list(), + 'title': title + } + return schemas.Msg(code=0, msg='ok', data=resp) if interval_type == 'def' and analysis == 'number_of_days': diff --git a/models/behavior_analysis.py b/models/behavior_analysis.py index cda4260..01a2442 100644 --- a/models/behavior_analysis.py +++ b/models/behavior_analysis.py @@ -83,6 +83,7 @@ class BehaviorAnalysis: user_filter = [] event_filter = [] filters = (*self.global_filters, *ext_filters) if g_f else (*ext_filters,) + filters = [] if filters == ([],) else filters for item in filters: if item['tableType'] == 'user': where = user_filter @@ -297,15 +298,16 @@ ORDER BY level event_date_col >= self.start_date, event_date_col <= self.end_date, event_name_col == event_name - ] + event_filter, _ = self.handler_filts(self.events[0]['filts']) + where.extend(event_filter) values_col = func.count().label('values') if analysis in ['number_of_days', 'number_of_hours']: values_col = func.count(func.distinct(e_account_id_col)).label('values') if analysis in ['times', 'number_of_days', 'number_of_hours']: - qry = sa.select(event_date_col, values_col) \ + qry = sa.select(event_date_col, *self.groupby, values_col) \ .where(and_(*where)) \ .group_by(event_date_col, *self.groupby, e_account_id_col) @@ -315,7 +317,8 @@ ORDER BY level 'sql': sql, 'interval_type': event['intervalType'], 'analysis': analysis, - 'quota_interval_arr': quota_interval_arr + 'quota_interval_arr': quota_interval_arr, + 'groupby': [i.key for i in self.groupby] } elif event.get('quota'): event_attr_col = getattr(self.event_tbl.c, event['quota'])