This commit is contained in:
wuaho 2021-08-25 18:56:10 +08:00
parent 0964a0bae4
commit f6c8160424
9 changed files with 177 additions and 44 deletions

View File

@ -1,3 +1,4 @@
import datetime
from collections import defaultdict
import pandas as pd
@ -132,11 +133,63 @@ async def retention_model_sql(
) -> schemas.Msg:
"""留存查询 sql"""
await analysis.init()
data = analysis.retention_model_sql()
data = analysis.retention_model_sql2()
return schemas.Msg(code=0, msg='ok', data=[data])
@router.post("/retention_model")
async def retention_model(request: Request,
game: str,
ckdb: CKDrive = Depends(get_ck_db),
db: AsyncIOMotorDatabase = Depends(get_database),
analysis: BehaviorAnalysis = Depends(BehaviorAnalysis),
current_user: schemas.UserDB = Depends(deps.get_current_user)
) -> schemas.Msg:
await analysis.init()
res = analysis.retention_model_sql2()
sql = res['sql']
df = await ckdb.query_dataframe(sql)
title = f'用户数'
date_range = res['date_range']
unit_num = res['unit_num']
df.set_index('reg_date', inplace=True)
for i in set(date_range) - set(df.index):
df.loc[i] = 0
df.sort_index(inplace=True)
days = [i for i in range(unit_num+1)]
summary_values = {}
today = datetime.datetime.today().date()
for date, value in df.T.items():
tmp = summary_values.setdefault(date.strftime('%Y-%m-%d'), dict())
tmp['d0'] = int(value.cnt0)
tmp['p'] = []
tmp['n'] = []
tmp['p_outflow'] = []
tmp['n_outflow'] = []
for i in range((today - date).days+1):
if i > unit_num:
break
p = float(getattr(value, f'p{i+1}'))
n = int(getattr(value, f'cnt{i+1}'))
p_outflow = round(100 - p, 2)
n_outflow = value.cnt0 - n
tmp['p'].append(p)
tmp['n'].append(n)
tmp['p_outflow'].append(p_outflow)
tmp['n_outflow'].append(n_outflow)
resp = {
'summary_values': summary_values,
# 'values': values,
'days': days,
'date_range': [d.strftime('%Y-%m-%d') for d in date_range][:unit_num + 1],
'title': title
}
return schemas.Msg(code=0, msg='ok', data=resp)
@router.post("/retention_model_del", deprecated=True)
async def retention_model(
request: Request,
game: str,

View File

@ -83,7 +83,7 @@ async def read_report(
dashboard = await crud.dashboard.get(db, id=data_in.dashboard_id)
# projection = {'query': False}
projection = None
reports = await crud.report.read_report(db, user_id=request.user.id, project_id=data_in.project_id,
reports = await crud.report.read_report(db,project_id=data_in.project_id,
projection=projection, **ext_where)
for item in reports:

View File

@ -21,8 +21,8 @@ class CRUDReport(CRUDBase):
[('project_id', pymongo.DESCENDING), ('name', pymongo.DESCENDING), ('user_id', pymongo.DESCENDING)],
unique=True)
async def read_report(self, db, user_id, project_id, projection=None, **kwargs):
where = {'user_id': user_id, 'project_id': project_id}
async def read_report(self, db, project_id, projection=None, **kwargs):
where = {'project_id': project_id}
where.update(**kwargs)
res = await self.find_many(db, where, projection)
return res

View File

@ -584,3 +584,65 @@ ORDER BY values desc"""
return {
'sql': sql,
}
def retention_model_sql2(self):
event_name_a = self.events[0]['eventName']
event_name_b = self.events[1]['eventName']
visit_name = self.events[0].get('event_attr_id')
where, _ = self.handler_filts(*self.events[0].get('filts', []))
where_a = '1'
if where:
qry = sa.select().where(*where)
sql = str(qry.compile(compile_kwargs={"literal_binds": True}))
where_a = sql.split('WHERE ')[1]
where, _ = self.handler_filts(*self.events[1].get('filts', []))
where_b = '1'
if where:
qry = sa.select().where(*where)
sql = str(qry.compile(compile_kwargs={"literal_binds": True}))
where_b = sql.split('WHERE ')[1]
# 任意事件
event_name_b = 1 if event_name_b == '*' else f"`#event_name` = '{event_name_b}'"
days = (arrow.get(self.event_view['endTime']).date() - arrow.get(self.event_view['startTime']).date()).days
keep = []
cnt = []
for i in range(days+1):
keep.append(
f"""cnt{i + 1},round(cnt{i + 1} * 100 / cnt0, 2) as `p{i + 1}`""")
cnt.append(f"""sum(if(dateDiff('day',a.reg_date,b.visit_date)={i},1,0)) as cnt{i + 1}""")
keep_str = ','.join(keep)
cnt_str = ','.join(cnt)
sql = f"""
with '{event_name_a}' as start_event,
{event_name_b} as retuen_visit,
`{visit_name}` as visit,
'{self.start_date}' as start_data,
'{self.end_date}' as end_data,
toDate(addHours(`#event_time`, {self.zone_time})) as date
select reg_date,
cnt0 ,
{keep_str}
from(select date, uniqExact(visit) as cnt0 from {self.game}.event
where `#event_name` = start_event and addHours(`#event_time`, {self.zone_time}) >= start_data and addHours(`#event_time`, {self.zone_time}) <= end_data and {where_a}
group by date) reg left join
(select a.reg_date,
{cnt_str}
from (select date as reg_date, visit from {self.game}.event where `#event_name` = start_event and addHours(`#event_time`, {self.zone_time}) >= start_data and addHours(`#event_time`, {self.zone_time}) <= end_data and {where_a} group by reg_date, visit) a
left join (select date as visit_date, visit from {self.game}.event where retuen_visit and addHours(`#event_time`, {self.zone_time}) >= start_data and addHours(`#event_time`, {self.zone_time}) <= end_data and {where_b} group by visit_date, visit) b on
a.visit = b.visit
group by a.reg_date) log on reg.date=log.reg_date
"""
print(sql)
return {
'sql': sql,
'date_range':self.date_range,
'unit_num': self.unit_num
}

View File

@ -1,13 +1,29 @@
SELECT toStartOfDay(addHours(shjy.event."#event_time", 8)) AS date,
shjy.event."#event_name" AS event_name,
`app_name`,
arrayDistinct(groupArray(shjy.event."#account_id")) AS values,
length(values) as num
-- 无分组
with 'create_account' as start_event,
'login' as retuen_visit,
`#account_id` as visit,
toDate(addHours(`#event_time`, 8)) as date
FROM shjy.event
WHERE addHours(shjy.event."#event_time", 8) >= '2021-05-10 00:00:00'
AND addHours(shjy.event."#event_time", 8) < '2021-06-08 23:59:59'
AND shjy.event."#event_name" IN ('create_role', 'login')
GROUP BY toStartOfDay(addHours(shjy.event."#event_time", 8)), shjy.event."#event_name", `app_name`
ORDER BY date
LIMIT 1000
select reg_date,
cnt1,
if(dateDiff('day', reg_date, toDate(now())) >= 2, toString(round(cnt2 * 100 / cnt1, 2)), '-') as `2`,
if(dateDiff('day', reg_date, toDate(now())) >= 3, toString(round(cnt3 * 100 / cnt1, 2)), '-') as `3`,
if(dateDiff('day', reg_date, toDate(now())) >= 4, toString(round(cnt4 * 100 / cnt1, 2)), '-') as `4`,
if(dateDiff('day', reg_date, toDate(now())) >= 5, toString(round(cnt5 * 100 / cnt1, 2)), '-') as `5`,
if(dateDiff('day', reg_date, toDate(now())) >= 6, toString(round(cnt6 * 100 / cnt1, 2)), '-') as `6`,
if(dateDiff('day', reg_date, toDate(now())) >= 7, toString(round(cnt7 * 100 / cnt1, 2)), '-') as `7`
from(select date, uniqExact(visit) as cnt1 from zhengba.event
where `#event_name` = start_event
group by date) reg left join
(select a.reg_date,
sum(if(dateDiff('day',a.reg_date,b.visit_date)=1,1,0)) as cnt2,
sum(if(dateDiff('day',a.reg_date,b.visit_date)=2,1,0)) as cnt3,
sum(if(dateDiff('day',a.reg_date,b.visit_date)=3,1,0)) as cnt4,
sum(if(dateDiff('day',a.reg_date,b.visit_date)=4,1,0)) as cnt5,
sum(if(dateDiff('day',a.reg_date,b.visit_date)=5,1,0)) as cnt6,
sum(if(dateDiff('day',a.reg_date,b.visit_date)=6,1,0)) as cnt7
from (select date as reg_date, visit from zhengba.event where `#event_name` = start_event group by reg_date, visit) a
left join (select date as visit_date, visit from zhengba.event where `#event_name` = retuen_visit group by visit_date, visit) b on
a.visit = b.visit
group by a.reg_date) log on reg.date=log.reg_date

View File

@ -1,17 +0,0 @@
select date, account, login_date,
arrayMap((x,y)->x,account,login_date)
from (with groupArray(`binduid`) as account,
toDate(addHours(`#event_time`, 8)) as date
select date,
account
-- length(account) as num
from zhengba.event
where role_idx = 1
group by date) as tb_a
left join (select arrayJoin(groupArray(date)) as dd,
groupArray((date, login_account)) as login_date
from (with groupArray(distinct binduid) as login_account,
toDate(addHours(`#event_time`, 8)) as date
select date, login_account
from zhengba.event
group by date)) as tb_b on tb_a.date = tb_b.dd

View File

@ -1,7 +0,0 @@
select arrayJoin(groupArray(date)) as dd,
groupArray((date, login_account))
from (with groupArray(distinct binduid) as login_account,
toDate(addHours(`#event_time`, 8)) as date
select date, login_account
from zhengba.event
group by date)

View File

@ -1,4 +0,0 @@
select toDate(addHours(`#event_time`, 8))
from zhengba.event
where role_idx = 1
group by `binduid`

30
sql/留存带分组.sql Normal file
View File

@ -0,0 +1,30 @@
with 'create_account' as start_event,
'login' as retuen_visit,
`#account_id` as visit,
toDate(addHours(`#event_time`, 8)) as date
select reg_date,
owner_name,
cnt1,
if(dateDiff('day', reg_date, toDate(now())) >= 2, toString(round(cnt2 * 100 / cnt1, 2)), '-') as `2`,
if(dateDiff('day', reg_date, toDate(now())) >= 3, toString(round(cnt3 * 100 / cnt1, 2)), '-') as `3`,
if(dateDiff('day', reg_date, toDate(now())) >= 4, toString(round(cnt4 * 100 / cnt1, 2)), '-') as `4`,
if(dateDiff('day', reg_date, toDate(now())) >= 5, toString(round(cnt5 * 100 / cnt1, 2)), '-') as `5`,
if(dateDiff('day', reg_date, toDate(now())) >= 6, toString(round(cnt6 * 100 / cnt1, 2)), '-') as `6`,
if(dateDiff('day', reg_date, toDate(now())) >= 7, toString(round(cnt7 * 100 / cnt1, 2)), '-') as `7`
from(select date,owner_name, uniqExact(visit) as cnt1 from zhengba.event
where `#event_name` = start_event
group by date,owner_name) reg left join
(select a.reg_date,owner_name,
sum(if(dateDiff('day',a.reg_date,b.visit_date)=1,1,0)) as cnt2,
sum(if(dateDiff('day',a.reg_date,b.visit_date)=2,1,0)) as cnt3,
sum(if(dateDiff('day',a.reg_date,b.visit_date)=3,1,0)) as cnt4,
sum(if(dateDiff('day',a.reg_date,b.visit_date)=4,1,0)) as cnt5,
sum(if(dateDiff('day',a.reg_date,b.visit_date)=5,1,0)) as cnt6,
sum(if(dateDiff('day',a.reg_date,b.visit_date)=6,1,0)) as cnt7
from (select date as reg_date,owner_name, visit from zhengba.event where `#event_name` = start_event group by reg_date, visit,owner_name) a
left join (select date as visit_date,owner_name, visit from zhengba.event where `#event_name` = retuen_visit group by visit_date, visit,owner_name) b on
a.visit = b.visit and a.owner_name=b.owner_name
group by a.reg_date,a.owner_name) log on reg.date=log.reg_date and reg.owner_name=log.owner_name
order by reg_date