From c33b583381c0d69fbb701c2a432637378e4903ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=80=C3=AE=C3=97=C3=9A=C3=95=C3=B1?= Date: Tue, 23 Aug 2022 17:13:47 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E4=BA=8B=E4=BB=B6=E9=A6=96?= =?UTF-8?q?=E6=AC=A1=E8=A7=A6=E5=8F=91=E9=97=B4=E9=9A=94=E5=88=86=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/api_v1/endpoints/query.py | 76 +++++++++++++++++++++++++++++++++++ models/behavior_analysis.py | 47 +++++++++++++++++++++- 2 files changed, 121 insertions(+), 2 deletions(-) diff --git a/api/api_v1/endpoints/query.py b/api/api_v1/endpoints/query.py index 1034160..4cd4b65 100644 --- a/api/api_v1/endpoints/query.py +++ b/api/api_v1/endpoints/query.py @@ -1643,6 +1643,82 @@ async def guide_model( return schemas.Msg(code=0, msg='ok', data=res_msg) +@router.post("/first_event_model") +async def first_event_model( + request: Request, + game: str, + ckdb: CKDrive = Depends(get_ck_db), + db: AsyncIOMotorDatabase = Depends(get_database), + analysis: BehaviorAnalysis = Depends(BehaviorAnalysis), + current_user: schemas.UserDB = Depends(deps.get_current_user) +) -> schemas.Msg: + """首次事件触发时间分析 模型""" + await analysis.init(data_where=current_user.data_where) + try: + res = await analysis.first_event_model_sql() + except Exception as e: + return schemas.Msg(code=-9, msg='报表配置参数异常') + + sql = res['sql'] + df = await ckdb.query_dataframe(sql) + if df.empty: + return schemas.Msg(code=-9, msg='无数据', data=None) + + step_list = [] + groups = analysis.event_view.get('group', [30, 60, 120, 240]) + for index, num_int in enumerate(groups): + # 第一个区间 + if index == 0: + step_list.append(['-', num_int]) + + if index + 1 < len(groups): + end_num = groups[index + 1] + step_list.append([num_int, end_num]) + + # 最后一个数 + if index + 1 >= len(groups): + step_list.append([num_int, '+']) + + dict_k = {} + for k, nedf in df.groupby("dff_time"): + value_list = nedf.iloc[:, -1].to_list() + sum_num = sum(value_list) + key = '' + for i in step_list: + if i[0] == '-': + if k < i[1]: + key = str(i) + break + else: + continue + if i[1] == '+': + if k >= i[0]: + key = str(i) + break + if i[0] <= k < i[1]: + key = str(i) + break + if key in dict_k: + dict_k[key] += sum_num + else: + dict_k[key] = sum_num + + for i in step_list: + if str(i) not in dict_k: + dict_k[str(i)] = 0 + + all_num = sum(list(dict_k.values())) + p_data = {key: round(v * 100 / all_num, 2) for key, v in dict_k.items()} + + true_step = [str(i) for i in step_list] + res_msg = { + 'level': true_step, + 'list': dict_k, + 'p': p_data + } + return schemas.Msg(code=0, msg='ok', data=res_msg) + + @router.post("/scatter_model_details") async def scatter_model( request: Request, diff --git a/models/behavior_analysis.py b/models/behavior_analysis.py index cfba173..ad6ca9f 100644 --- a/models/behavior_analysis.py +++ b/models/behavior_analysis.py @@ -599,7 +599,7 @@ class BehaviorAnalysis: where = [event_name_col.in_(event_name)] event_filter, _ = await self.handler_filts((event['filts'], event.get('relation')), (self.global_filters, self.global_relation), - self.ext_filters,nu=idx + self.ext_filters, nu=idx ) select_exprs.extend(self.groupby) qry = sa.select( @@ -631,7 +631,7 @@ class BehaviorAnalysis: event_filter, user_filter = await self.handler_filts( (event['filts'], event.get('relation', 'and')), (self.global_filters, self.global_relation) - , self.ext_filters,nu=idx + , self.ext_filters, nu=idx ) u_account_id_col = getattr(self.user_tbl.c, '#account_id') @@ -941,6 +941,49 @@ ORDER BY level 'end_date': self.end_date[:10], } + async def first_event_model_sql(self): + # 事件首次触发 + event_start = self.events[0] # 起始事件 + event_end = self.events[1] # 查询事件 + start_event_name = event_start.get('eventName', 'create_account') + end_event_name = event_end.get('eventName', 'pay') + sql_diff = self.event_view.get('event_diff', 'minute') # 类型选择 'year','month','day','hour','minute','second' + # end_where = event_end.get('filts', []) + # end_where_str = '' + # if end_where: + # for i in end_where: + """ + /*目前版本为clickhouse 20.12.5.14 + * dateDiff函数是将两个日期相减后得到精确的的时间差值,后上取整 + * */ + SELECT + /*计算 年份 这里两个日期差值大于0年但小于1年 结果为1*/ + dateDiff('year', toDateTime('2020-11-01 00:00:00'), toDateTime('2021-02-01 00:00:30')) AS year, + /*计算 月份 这里两个日期差值大于1个月但未到2个月 结果为2*/ + dateDiff('month', toDateTime('2021-01-21 00:00:00'), toDateTime('2021-03-02 00:00:30')) AS month, + /*计算 天 这里两个日期差值大于0天但未到1天 结果为1*/ + dateDiff('day', toDateTime('2021-01-01 10:00:00'), toDateTime('2021-01-02 01:00:30')) AS day, + /*计算 小时 这里两个日期差值大于1小时但未到1小时 结果为2*/ + dateDiff('hour', toDateTime('2021-01-01 00:40:00'), toDateTime('2021-01-01 02:10:30')) AS hour, + /*计算 分钟 这里两个日期差值大于1分钟但未到2分钟 结果为2*/ + dateDiff('minute', toDateTime('2021-01-01 10:10:40'), toDateTime('2021-01-01 10:12:12')) AS minute, + /*计算 毫秒 这里两个日期差值 结果为-10*/ + dateDiff('second', toDateTime('2021-01-01 00:00:40'), toDateTime('2021-01-01 00:00:30')) AS second; + """ + + # 待优化 + sql = f"""select dateDiff({sql_diff}, a.`#event_time`, b.`#event_time`) as dff_time, count() as values from(select `#event_time`, `#account_id` from + {self.game}.event where `#event_name` == '{start_event_name}') a, (select `#event_time`, `#account_id` from {self.game}.event + where `#event_name` == '{end_event_name}' and addHours(`#event_time`, {self.zone_time}) >= '{self.start_date}' and + addHours(`#event_time`, {self.zone_time}) <= '{self.end_date}' and islishishouci == 1) b where a.`#account_id` == b.`#account_id` group by dff_time + """ + print(sql) + return { + 'sql': sql, + 'start_date': self.start_date[:10], + 'end_date': self.end_date[:10] + } + async def guide_model_sql(self): # 事件步骤生成sql event = self.events[0]