From 51d899c9d18364f6bd27383a589247650641d805 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E4=BC=9F?= <250213850@qq.com> Date: Thu, 28 Apr 2022 15:44:44 +0800 Subject: [PATCH] =?UTF-8?q?1.=E6=96=B0=E5=A2=9E=E5=88=86=E5=B8=83=E5=88=86?= =?UTF-8?q?=E6=9E=90=E4=B8=8B=E8=BD=BD=E5=8A=9F=E8=83=BD=202.=E4=BC=98?= =?UTF-8?q?=E5=8C=96=E6=96=B0=E5=A2=9E=E4=BB=98=E8=B4=B9=E4=BA=BA=E6=95=B0?= =?UTF-8?q?=E8=AE=A1=E7=AE=97=E5=85=AC=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/api_v1/endpoints/query.py | 51 +++++++++++----- models/behavior_analysis.py | 17 ++++++ utils/func.py | 107 +++++++++++++++++++++++++++++++++- 3 files changed, 159 insertions(+), 16 deletions(-) diff --git a/api/api_v1/endpoints/query.py b/api/api_v1/endpoints/query.py index 3ea8ede..ab67d70 100644 --- a/api/api_v1/endpoints/query.py +++ b/api/api_v1/endpoints/query.py @@ -21,7 +21,7 @@ from db.redisdb import get_redis_pool, RedisDrive from models.behavior_analysis import BehaviorAnalysis, CombinationEvent from models.user_analysis import UserAnalysis from models.x_analysis import XAnalysis -from utils import DfToStream, getEveryDay +from utils import DfToStream, getEveryDay, Download_xlsx,jiange_insert,create_df,create_neidf router = APIRouter() @@ -1036,7 +1036,7 @@ async def scatter_model( } if not quota_interval_arr: - resp['label'] = [f'[{i},{i + interval})' for i in range(min_v, max_v, interval)] + resp['label'] = [f'[{i},{i + interval})' for i in range(min_v, max_v, interval)] bins = [i for i in range(min_v, max_v + interval, interval)] else: quota_interval_arr = [-float('inf')] + quota_interval_arr + [float('inf')] @@ -1054,13 +1054,15 @@ async def scatter_model( total = int(bins_s.sum()) if res['time_particle'] == 'total': resp['list']['合计'] = dict() + p = list(round(bins_s * 100 / total, 2).to_list()) resp['list']['合计']['总体'] = {'n': bins_s.to_list(), 'total': total, - 'p': round(bins_s * 100 / total, 2).to_list(), + 'p': [str(i)+'%' for i in p], 'title': '总体'} else: + p=list(round(bins_s * 100 / total, 2).to_list()) resp['list'][key.strftime('%Y-%m-%d')] = dict() resp['list'][key.strftime('%Y-%m-%d')]['总体'] = {'n': bins_s.to_list(), 'total': total, - 'p': round(bins_s * 100 / total, 2).to_list(), + 'p':[str(i)+'%' for i in p], 'title': '总体'} # 分组的 # if groupby: @@ -1076,6 +1078,11 @@ async def scatter_model( # 2).to_list(), # 'title': title # } + download=analysis.event_view.get('download','') + if download == 1: + creat_df = create_df(resp) + Download=Download_xlsx(creat_df, '分布分析') + return Download return schemas.Msg(code=0, msg='ok', data=resp) # elif analysis == 'number_of_days': @@ -1137,9 +1144,12 @@ async def scatter_model( df = await ckdb.query_dataframe(sql) for i in range(len(df)): resp['list'][str(df['date'][i])]['总体']['total']=int(df['values'][i]) - - - + #兼容下载功能 + download=analysis.event_view.get('download','') + if download == 1: + creat_df=create_df(resp) + Download=Download_xlsx(creat_df,'分布分析') + return Download return schemas.Msg(code=0, msg='ok', data=resp) # bins_s = pd.cut(tmp_df['values'], bins=bins, @@ -1206,16 +1216,19 @@ async def scatter_model( for i, v in enumerate(quota_interval_arr[1:]): resp['label'].append(f'[{quota_interval_arr[i]},{v})') bins.append(v) - + if 'float' in str(df.dtypes['va']): + df['va'] = df['va'].astype(int) # 这是分组的 for key, tmp_df in df.groupby('va'): + bins_s = pd.cut(tmp_df['values'], bins=bins, right=False).value_counts() bins_s.sort_index(inplace=True) total = int(bins_s.sum()) if res['time_particle'] == 'total': resp['list']['合计'] = dict() - resp['list']['合计']['总体'] = {'n': bins_s.to_list(), 'total': total, + + resp['list']['合计'] = {'n': bins_s.to_list(), 'total': total, 'p': round(bins_s * 100 / total, 2).to_list(), 'title': '总体'} else: @@ -1226,14 +1239,19 @@ async def scatter_model( if 'time' not in columnName: resp['list'][key] = dict() resp['list'][key] = {'n': bins_s.to_list(), 'total': total, - 'p': p, + 'p': [str(i)+'%' for i in p], 'title': '总体'} else: resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = dict() resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = {'n': bins_s.to_list(), 'total': total, - 'p': p, + 'p': [str(i)+'%' for i in p], 'title': '总体'} - + # 兼容下载功能 + download = analysis.event_view.get('download', '') + if download == 1: + create_df = create_neidf(resp,columnName) + Download=Download_xlsx(create_df, '分布分析') + return Download return schemas.Msg(code=0, msg='ok', data=resp) else: resp = {'list': {}, 'label': [], @@ -1295,8 +1313,13 @@ async def scatter_model( sql = f"""SELECT toDate(addHours({game}.event."#event_time", 8)) AS date, count(DISTINCT {game}.event."#account_id") AS values FROM {game}.event WHERE addHours({game}.event."#event_time", 8) >= '{start_date}' AND addHours({game}.event."#event_time", 8) <= '{end_date}' AND {game}.event."#event_name" = 'create_account' GROUP BY toDate(addHours({game}.event."#event_time", 8)) ORDER BY date""" df = await ckdb.query_dataframe(sql) for i in range(len(df)): - resp['list'][str(df['date'][i])]['总体']['total'] = int(df['values'][i]) - + resp['list'][str(df['date'][i])]['total'] = int(df['values'][i]) + # 兼容下载功能 + download = analysis.event_view.get('download', '') + if download == 1: + create_df = create_neidf(resp,columnName) + Download=Download_xlsx(create_df, '分布分析') + return Download return schemas.Msg(code=0, msg='ok', data=resp) else: return schemas.Msg(code=-9, msg='没有添加分组项', data='') diff --git a/models/behavior_analysis.py b/models/behavior_analysis.py index fc4b05d..29b6a57 100644 --- a/models/behavior_analysis.py +++ b/models/behavior_analysis.py @@ -383,6 +383,7 @@ class BehaviorAnalysis: event_time_col = getattr(self.event_tbl.c, '#event_time') for event in self.events: operator_ = event.get('operator_val','') + #排头显示名 event_name_display = event.get('eventNameDisplay') is_show = event.get('is_show', True) @@ -513,6 +514,22 @@ FROM {game}.event WHERE addHours({game}.event."#event_time", 8) >= '{stat_date}' AND addHours({game}.event."#event_time", 8) <= '{end_date}' GROUP BY toDate(addHours({game}.event."#event_time", 8))) as bb on aa.date = bb.date ORDER by date """ + # 单独把新增付费人数(以设备为维度)拿出来 + if event.get('event_attr') == '触发用户数' and ['is_new_device', 'orderid']== [i['columnName'] for i in event.get('filts')]: + stat_date=self.start_date + end_date=self.end_date + game=self.game + sql=f"""SELECT toDate(addHours("#event_time", 8)) as date, +round(uniqExact("#distinct_id"), 2) AS values FROM +(SELECT toDate(addHours("#event_time", 8)) as date,"#event_time",`#event_name`,`#distinct_id`,`#account_id` from {game}.event WHERE +addHours("#event_time", 8) >= '{stat_date}' AND addHours("#event_time", 8) <= '{end_date}' +and `#event_name` = 'pay' and orderid NOT LIKE '%GM%') a +inner join +(SELECT toDate(addHours("#event_time", 8)) as date,"#event_time",is_new_device,`#distinct_id`,`#event_name`,`#account_id` from {game}.event WHERE +addHours("#event_time", 8) >= '{stat_date}' AND addHours("#event_time", 8) <= '{end_date}' and +`#event_name` = 'create_account' and is_new_device = 1) b on a.`#distinct_id`= b.`#distinct_id` and a.date = b.date +GROUP BY toDate(addHours("#event_time", 8))""" + sqls.append({'sql': sql, 'groupby': [i.key for i in self.groupby], 'date_range': self.date_range, diff --git a/utils/func.py b/utils/func.py index 5f001ad..8dee789 100644 --- a/utils/func.py +++ b/utils/func.py @@ -1,7 +1,7 @@ import random import time import datetime - +import pandas as pd def get_uid(): return hex(int(time.time() * 10 ** 7) + random.randint(0, 10000))[2:] @@ -46,4 +46,107 @@ def getEveryDay(begin_date,end_date): date_list.append(date_str) begin_date += datetime.timedelta(days=1) return date_list -#print(getEveryDay('2016-01-01','2017-05-11')) \ No newline at end of file +#print(getEveryDay('2016-01-01','2017-05-11')) +def Download_xlsx(df,name): + """ + 下载功能 + name为文件名 + """ + from urllib.parse import quote + import mimetypes + from utils import DfToStream + from fastapi.responses import StreamingResponse + file_name=quote(f'{name}.xlsx') + mime = mimetypes.guess_type(file_name)[0] + df_to_stream = DfToStream((df, name)) + with df_to_stream as d: + export = d.to_stream() + Download=StreamingResponse(export, media_type=mime, headers={'Content-Disposition': f'filename="{file_name}"'}) + return Download + +def jiange_insert(list_date): + """ + 间隔1条插入一条数据插入数据 + :param day: list数据 + :return: list + """ + i = 1 + while i <= len(list_date): + list_date.insert(i, '-') + i += 2 + return list_date + +def create_df(resp): + """ + 分布分析外部下载功能的df数据 + """ + columns = resp['label'] + day = list(resp['list'].keys()) + jiange_insert(day) + date = [] + day_nu = 0 + for i in day: + if i == '-': + av = day[day_nu - 1] + day_date = resp['list'][av]['总体'] + else: + day_date = resp['list'][i]['总体'] + date_dict = {} + n = 0 + p = 0 + if i == '-': + date_dict['事件发生时间'] = '-' + date_dict['总人数'] = '-' + for nu in range(len(columns)): + date_dict[columns[nu]] = day_date['p'][p] + p += 1 + date.append(date_dict) + else: + date_dict['事件发生时间'] = i + date_dict['总人数'] = day_date['total'] + for nu in range(len(columns)): + date_dict[columns[nu]] = day_date['n'][n] + n += 1 + date.append(date_dict) + day_nu += 1 + columns.insert(0, '总人数') + columns.insert(0, '事件发生时间') + df = pd.DataFrame(data=date, columns=columns) + return df +def create_neidf(resp,columnName): + """ + 分布分析内部下载功能的df数据 + """ + columns = resp['label'] + day = list(resp['list'].keys()) + jiange_insert(day) + date = [] + day_nu = 0 + for i in day: + if i == '-': + av = day[day_nu - 1] + day_date = resp['list'][av] + else: + day_date = resp['list'][i] + date_dict = {} + n = 0 + p = 0 + if i == '-': + date_dict[columnName] = '-' + date_dict['全部用户数'] = '-' + for nu in range(len(columns)): + date_dict[columns[nu]] = day_date['p'][p] + p += 1 + date.append(date_dict) + else: + date_dict[columnName] = i + date_dict['全部用户数'] = day_date['total'] + for nu in range(len(columns)): + date_dict[columns[nu]] = day_date['n'][n] + n += 1 + date.append(date_dict) + day_nu += 1 + columns.insert(0, '全部用户数') + columns.insert(0, columnName) + df = pd.DataFrame(data=date, columns=columns) + return df \ No newline at end of file