From f6c8160424663237ecec668797fffbcd8652f990 Mon Sep 17 00:00:00 2001 From: wuaho Date: Wed, 25 Aug 2021 18:56:10 +0800 Subject: [PATCH] 1 --- api/api_v1/endpoints/query.py | 55 +++++++++++++++++++++++++++++- api/api_v1/endpoints/report.py | 2 +- crud/crud_report.py | 4 +-- models/behavior_analysis.py | 62 ++++++++++++++++++++++++++++++++++ sql/留存.sql | 40 +++++++++++++++------- sql/留存2.sql | 17 ---------- sql/留存3.sql | 7 ---- sql/留存4.sql | 4 --- sql/留存带分组.sql | 30 ++++++++++++++++ 9 files changed, 177 insertions(+), 44 deletions(-) delete mode 100644 sql/留存2.sql delete mode 100644 sql/留存3.sql delete mode 100644 sql/留存4.sql create mode 100644 sql/留存带分组.sql diff --git a/api/api_v1/endpoints/query.py b/api/api_v1/endpoints/query.py index 0fff571..8601000 100644 --- a/api/api_v1/endpoints/query.py +++ b/api/api_v1/endpoints/query.py @@ -1,3 +1,4 @@ +import datetime from collections import defaultdict import pandas as pd @@ -132,11 +133,63 @@ async def retention_model_sql( ) -> schemas.Msg: """留存查询 sql""" await analysis.init() - data = analysis.retention_model_sql() + data = analysis.retention_model_sql2() return schemas.Msg(code=0, msg='ok', data=[data]) @router.post("/retention_model") +async def retention_model(request: Request, + game: str, + ckdb: CKDrive = Depends(get_ck_db), + db: AsyncIOMotorDatabase = Depends(get_database), + analysis: BehaviorAnalysis = Depends(BehaviorAnalysis), + current_user: schemas.UserDB = Depends(deps.get_current_user) + ) -> schemas.Msg: + await analysis.init() + res = analysis.retention_model_sql2() + sql = res['sql'] + df = await ckdb.query_dataframe(sql) + + title = f'用户数' + date_range = res['date_range'] + unit_num = res['unit_num'] + df.set_index('reg_date', inplace=True) + for i in set(date_range) - set(df.index): + df.loc[i] = 0 + df.sort_index(inplace=True) + days = [i for i in range(unit_num+1)] + summary_values = {} + today = datetime.datetime.today().date() + for date, value in df.T.items(): + tmp = summary_values.setdefault(date.strftime('%Y-%m-%d'), dict()) + tmp['d0'] = int(value.cnt0) + tmp['p'] = [] + tmp['n'] = [] + tmp['p_outflow'] = [] + tmp['n_outflow'] = [] + for i in range((today - date).days+1): + if i > unit_num: + break + p = float(getattr(value, f'p{i+1}')) + n = int(getattr(value, f'cnt{i+1}')) + p_outflow = round(100 - p, 2) + n_outflow = value.cnt0 - n + tmp['p'].append(p) + tmp['n'].append(n) + tmp['p_outflow'].append(p_outflow) + tmp['n_outflow'].append(n_outflow) + + resp = { + 'summary_values': summary_values, + # 'values': values, + 'days': days, + 'date_range': [d.strftime('%Y-%m-%d') for d in date_range][:unit_num + 1], + 'title': title + } + return schemas.Msg(code=0, msg='ok', data=resp) + + +@router.post("/retention_model_del", deprecated=True) async def retention_model( request: Request, game: str, diff --git a/api/api_v1/endpoints/report.py b/api/api_v1/endpoints/report.py index 40bcd3d..7f3dd6a 100644 --- a/api/api_v1/endpoints/report.py +++ b/api/api_v1/endpoints/report.py @@ -83,7 +83,7 @@ async def read_report( dashboard = await crud.dashboard.get(db, id=data_in.dashboard_id) # projection = {'query': False} projection = None - reports = await crud.report.read_report(db, user_id=request.user.id, project_id=data_in.project_id, + reports = await crud.report.read_report(db,project_id=data_in.project_id, projection=projection, **ext_where) for item in reports: diff --git a/crud/crud_report.py b/crud/crud_report.py index af76aa6..dad1e3f 100644 --- a/crud/crud_report.py +++ b/crud/crud_report.py @@ -21,8 +21,8 @@ class CRUDReport(CRUDBase): [('project_id', pymongo.DESCENDING), ('name', pymongo.DESCENDING), ('user_id', pymongo.DESCENDING)], unique=True) - async def read_report(self, db, user_id, project_id, projection=None, **kwargs): - where = {'user_id': user_id, 'project_id': project_id} + async def read_report(self, db, project_id, projection=None, **kwargs): + where = {'project_id': project_id} where.update(**kwargs) res = await self.find_many(db, where, projection) return res diff --git a/models/behavior_analysis.py b/models/behavior_analysis.py index 9b9e5bf..48f5597 100644 --- a/models/behavior_analysis.py +++ b/models/behavior_analysis.py @@ -584,3 +584,65 @@ ORDER BY values desc""" return { 'sql': sql, } + + def retention_model_sql2(self): + event_name_a = self.events[0]['eventName'] + event_name_b = self.events[1]['eventName'] + + visit_name = self.events[0].get('event_attr_id') + + where, _ = self.handler_filts(*self.events[0].get('filts', [])) + where_a = '1' + if where: + qry = sa.select().where(*where) + sql = str(qry.compile(compile_kwargs={"literal_binds": True})) + where_a = sql.split('WHERE ')[1] + + where, _ = self.handler_filts(*self.events[1].get('filts', [])) + where_b = '1' + if where: + qry = sa.select().where(*where) + sql = str(qry.compile(compile_kwargs={"literal_binds": True})) + where_b = sql.split('WHERE ')[1] + + # 任意事件 + event_name_b = 1 if event_name_b == '*' else f"`#event_name` = '{event_name_b}'" + + days = (arrow.get(self.event_view['endTime']).date() - arrow.get(self.event_view['startTime']).date()).days + keep = [] + cnt = [] + for i in range(days+1): + keep.append( + f"""cnt{i + 1},round(cnt{i + 1} * 100 / cnt0, 2) as `p{i + 1}`""") + cnt.append(f"""sum(if(dateDiff('day',a.reg_date,b.visit_date)={i},1,0)) as cnt{i + 1}""") + keep_str = ','.join(keep) + cnt_str = ','.join(cnt) + + sql = f""" +with '{event_name_a}' as start_event, + {event_name_b} as retuen_visit, + `{visit_name}` as visit, + '{self.start_date}' as start_data, + '{self.end_date}' as end_data, + toDate(addHours(`#event_time`, {self.zone_time})) as date + +select reg_date, + cnt0 , + {keep_str} + + from(select date, uniqExact(visit) as cnt0 from {self.game}.event +where `#event_name` = start_event and addHours(`#event_time`, {self.zone_time}) >= start_data and addHours(`#event_time`, {self.zone_time}) <= end_data and {where_a} +group by date) reg left join +(select a.reg_date, + {cnt_str} +from (select date as reg_date, visit from {self.game}.event where `#event_name` = start_event and addHours(`#event_time`, {self.zone_time}) >= start_data and addHours(`#event_time`, {self.zone_time}) <= end_data and {where_a} group by reg_date, visit) a + left join (select date as visit_date, visit from {self.game}.event where retuen_visit and addHours(`#event_time`, {self.zone_time}) >= start_data and addHours(`#event_time`, {self.zone_time}) <= end_data and {where_b} group by visit_date, visit) b on +a.visit = b.visit +group by a.reg_date) log on reg.date=log.reg_date +""" + print(sql) + return { + 'sql': sql, + 'date_range':self.date_range, + 'unit_num': self.unit_num + } diff --git a/sql/留存.sql b/sql/留存.sql index 610fe4f..e69280d 100644 --- a/sql/留存.sql +++ b/sql/留存.sql @@ -1,13 +1,29 @@ -SELECT toStartOfDay(addHours(shjy.event."#event_time", 8)) AS date, - shjy.event."#event_name" AS event_name, - `app_name`, - arrayDistinct(groupArray(shjy.event."#account_id")) AS values, - length(values) as num +-- 无分组 +with 'create_account' as start_event, + 'login' as retuen_visit, + `#account_id` as visit, + toDate(addHours(`#event_time`, 8)) as date -FROM shjy.event -WHERE addHours(shjy.event."#event_time", 8) >= '2021-05-10 00:00:00' - AND addHours(shjy.event."#event_time", 8) < '2021-06-08 23:59:59' - AND shjy.event."#event_name" IN ('create_role', 'login') -GROUP BY toStartOfDay(addHours(shjy.event."#event_time", 8)), shjy.event."#event_name", `app_name` -ORDER BY date -LIMIT 1000 \ No newline at end of file +select reg_date, + cnt1, + if(dateDiff('day', reg_date, toDate(now())) >= 2, toString(round(cnt2 * 100 / cnt1, 2)), '-') as `2留`, + if(dateDiff('day', reg_date, toDate(now())) >= 3, toString(round(cnt3 * 100 / cnt1, 2)), '-') as `3留`, + if(dateDiff('day', reg_date, toDate(now())) >= 4, toString(round(cnt4 * 100 / cnt1, 2)), '-') as `4留`, + if(dateDiff('day', reg_date, toDate(now())) >= 5, toString(round(cnt5 * 100 / cnt1, 2)), '-') as `5留`, + if(dateDiff('day', reg_date, toDate(now())) >= 6, toString(round(cnt6 * 100 / cnt1, 2)), '-') as `6留`, + if(dateDiff('day', reg_date, toDate(now())) >= 7, toString(round(cnt7 * 100 / cnt1, 2)), '-') as `7留` + + from(select date, uniqExact(visit) as cnt1 from zhengba.event +where `#event_name` = start_event +group by date) reg left join +(select a.reg_date, + sum(if(dateDiff('day',a.reg_date,b.visit_date)=1,1,0)) as cnt2, + sum(if(dateDiff('day',a.reg_date,b.visit_date)=2,1,0)) as cnt3, + sum(if(dateDiff('day',a.reg_date,b.visit_date)=3,1,0)) as cnt4, + sum(if(dateDiff('day',a.reg_date,b.visit_date)=4,1,0)) as cnt5, + sum(if(dateDiff('day',a.reg_date,b.visit_date)=5,1,0)) as cnt6, + sum(if(dateDiff('day',a.reg_date,b.visit_date)=6,1,0)) as cnt7 +from (select date as reg_date, visit from zhengba.event where `#event_name` = start_event group by reg_date, visit) a + left join (select date as visit_date, visit from zhengba.event where `#event_name` = retuen_visit group by visit_date, visit) b on +a.visit = b.visit +group by a.reg_date) log on reg.date=log.reg_date diff --git a/sql/留存2.sql b/sql/留存2.sql deleted file mode 100644 index eba649e..0000000 --- a/sql/留存2.sql +++ /dev/null @@ -1,17 +0,0 @@ -select date, account, login_date, -arrayMap((x,y)->x,account,login_date) - from (with groupArray(`binduid`) as account, - toDate(addHours(`#event_time`, 8)) as date - select date, - account --- length(account) as num - from zhengba.event - where role_idx = 1 - group by date) as tb_a - left join (select arrayJoin(groupArray(date)) as dd, - groupArray((date, login_account)) as login_date - from (with groupArray(distinct binduid) as login_account, - toDate(addHours(`#event_time`, 8)) as date - select date, login_account - from zhengba.event - group by date)) as tb_b on tb_a.date = tb_b.dd \ No newline at end of file diff --git a/sql/留存3.sql b/sql/留存3.sql deleted file mode 100644 index 7fff510..0000000 --- a/sql/留存3.sql +++ /dev/null @@ -1,7 +0,0 @@ -select arrayJoin(groupArray(date)) as dd, - groupArray((date, login_account)) -from (with groupArray(distinct binduid) as login_account, - toDate(addHours(`#event_time`, 8)) as date - select date, login_account - from zhengba.event - group by date) \ No newline at end of file diff --git a/sql/留存4.sql b/sql/留存4.sql deleted file mode 100644 index 1d31e15..0000000 --- a/sql/留存4.sql +++ /dev/null @@ -1,4 +0,0 @@ -select toDate(addHours(`#event_time`, 8)) -from zhengba.event -where role_idx = 1 -group by `binduid` diff --git a/sql/留存带分组.sql b/sql/留存带分组.sql new file mode 100644 index 0000000..d920156 --- /dev/null +++ b/sql/留存带分组.sql @@ -0,0 +1,30 @@ +with 'create_account' as start_event, + 'login' as retuen_visit, + `#account_id` as visit, + toDate(addHours(`#event_time`, 8)) as date + +select reg_date, + owner_name, + cnt1, + if(dateDiff('day', reg_date, toDate(now())) >= 2, toString(round(cnt2 * 100 / cnt1, 2)), '-') as `2留`, + if(dateDiff('day', reg_date, toDate(now())) >= 3, toString(round(cnt3 * 100 / cnt1, 2)), '-') as `3留`, + if(dateDiff('day', reg_date, toDate(now())) >= 4, toString(round(cnt4 * 100 / cnt1, 2)), '-') as `4留`, + if(dateDiff('day', reg_date, toDate(now())) >= 5, toString(round(cnt5 * 100 / cnt1, 2)), '-') as `5留`, + if(dateDiff('day', reg_date, toDate(now())) >= 6, toString(round(cnt6 * 100 / cnt1, 2)), '-') as `6留`, + if(dateDiff('day', reg_date, toDate(now())) >= 7, toString(round(cnt7 * 100 / cnt1, 2)), '-') as `7留` + + from(select date,owner_name, uniqExact(visit) as cnt1 from zhengba.event +where `#event_name` = start_event +group by date,owner_name) reg left join +(select a.reg_date,owner_name, + sum(if(dateDiff('day',a.reg_date,b.visit_date)=1,1,0)) as cnt2, + sum(if(dateDiff('day',a.reg_date,b.visit_date)=2,1,0)) as cnt3, + sum(if(dateDiff('day',a.reg_date,b.visit_date)=3,1,0)) as cnt4, + sum(if(dateDiff('day',a.reg_date,b.visit_date)=4,1,0)) as cnt5, + sum(if(dateDiff('day',a.reg_date,b.visit_date)=5,1,0)) as cnt6, + sum(if(dateDiff('day',a.reg_date,b.visit_date)=6,1,0)) as cnt7 +from (select date as reg_date,owner_name, visit from zhengba.event where `#event_name` = start_event group by reg_date, visit,owner_name) a + left join (select date as visit_date,owner_name, visit from zhengba.event where `#event_name` = retuen_visit group by visit_date, visit,owner_name) b on +a.visit = b.visit and a.owner_name=b.owner_name +group by a.reg_date,a.owner_name) log on reg.date=log.reg_date and reg.owner_name=log.owner_name +order by reg_date \ No newline at end of file