新增事件首次触发间隔分析

This commit is contained in:
Àî×ÚÕñ 2022-08-23 17:13:47 +08:00
parent 91eb67da02
commit c33b583381
2 changed files with 121 additions and 2 deletions

View File

@ -1643,6 +1643,82 @@ async def guide_model(
return schemas.Msg(code=0, msg='ok', data=res_msg)
@router.post("/first_event_model")
async def first_event_model(
request: Request,
game: str,
ckdb: CKDrive = Depends(get_ck_db),
db: AsyncIOMotorDatabase = Depends(get_database),
analysis: BehaviorAnalysis = Depends(BehaviorAnalysis),
current_user: schemas.UserDB = Depends(deps.get_current_user)
) -> schemas.Msg:
"""首次事件触发时间分析 模型"""
await analysis.init(data_where=current_user.data_where)
try:
res = await analysis.first_event_model_sql()
except Exception as e:
return schemas.Msg(code=-9, msg='报表配置参数异常')
sql = res['sql']
df = await ckdb.query_dataframe(sql)
if df.empty:
return schemas.Msg(code=-9, msg='无数据', data=None)
step_list = []
groups = analysis.event_view.get('group', [30, 60, 120, 240])
for index, num_int in enumerate(groups):
# 第一个区间
if index == 0:
step_list.append(['-', num_int])
if index + 1 < len(groups):
end_num = groups[index + 1]
step_list.append([num_int, end_num])
# 最后一个数
if index + 1 >= len(groups):
step_list.append([num_int, '+'])
dict_k = {}
for k, nedf in df.groupby("dff_time"):
value_list = nedf.iloc[:, -1].to_list()
sum_num = sum(value_list)
key = ''
for i in step_list:
if i[0] == '-':
if k < i[1]:
key = str(i)
break
else:
continue
if i[1] == '+':
if k >= i[0]:
key = str(i)
break
if i[0] <= k < i[1]:
key = str(i)
break
if key in dict_k:
dict_k[key] += sum_num
else:
dict_k[key] = sum_num
for i in step_list:
if str(i) not in dict_k:
dict_k[str(i)] = 0
all_num = sum(list(dict_k.values()))
p_data = {key: round(v * 100 / all_num, 2) for key, v in dict_k.items()}
true_step = [str(i) for i in step_list]
res_msg = {
'level': true_step,
'list': dict_k,
'p': p_data
}
return schemas.Msg(code=0, msg='ok', data=res_msg)
@router.post("/scatter_model_details")
async def scatter_model(
request: Request,

View File

@ -599,7 +599,7 @@ class BehaviorAnalysis:
where = [event_name_col.in_(event_name)]
event_filter, _ = await self.handler_filts((event['filts'], event.get('relation')),
(self.global_filters, self.global_relation),
self.ext_filters,nu=idx
self.ext_filters, nu=idx
)
select_exprs.extend(self.groupby)
qry = sa.select(
@ -631,7 +631,7 @@ class BehaviorAnalysis:
event_filter, user_filter = await self.handler_filts(
(event['filts'], event.get('relation', 'and')),
(self.global_filters, self.global_relation)
, self.ext_filters,nu=idx
, self.ext_filters, nu=idx
)
u_account_id_col = getattr(self.user_tbl.c, '#account_id')
@ -941,6 +941,49 @@ ORDER BY level
'end_date': self.end_date[:10],
}
async def first_event_model_sql(self):
# 事件首次触发
event_start = self.events[0] # 起始事件
event_end = self.events[1] # 查询事件
start_event_name = event_start.get('eventName', 'create_account')
end_event_name = event_end.get('eventName', 'pay')
sql_diff = self.event_view.get('event_diff', 'minute') # 类型选择 'year','month','day','hour','minute','second'
# end_where = event_end.get('filts', [])
# end_where_str = ''
# if end_where:
# for i in end_where:
"""
/*目前版本为clickhouse 20.12.5.14
* dateDiff函数是将两个日期相减后得到精确的的时间差值后上取整
* */
SELECT
/*计算 年份 这里两个日期差值大于0年但小于1年 结果为1*/
dateDiff('year', toDateTime('2020-11-01 00:00:00'), toDateTime('2021-02-01 00:00:30')) AS year,
/*计算 月份 这里两个日期差值大于1个月但未到2个月 结果为2*/
dateDiff('month', toDateTime('2021-01-21 00:00:00'), toDateTime('2021-03-02 00:00:30')) AS month,
/*计算 这里两个日期差值大于0天但未到1天 结果为1*/
dateDiff('day', toDateTime('2021-01-01 10:00:00'), toDateTime('2021-01-02 01:00:30')) AS day,
/*计算 小时 这里两个日期差值大于1小时但未到1小时 结果为2*/
dateDiff('hour', toDateTime('2021-01-01 00:40:00'), toDateTime('2021-01-01 02:10:30')) AS hour,
/*计算 分钟 这里两个日期差值大于1分钟但未到2分钟 结果为2*/
dateDiff('minute', toDateTime('2021-01-01 10:10:40'), toDateTime('2021-01-01 10:12:12')) AS minute,
/*计算 毫秒 这里两个日期差值 结果为-10*/
dateDiff('second', toDateTime('2021-01-01 00:00:40'), toDateTime('2021-01-01 00:00:30')) AS second;
"""
# 待优化
sql = f"""select dateDiff({sql_diff}, a.`#event_time`, b.`#event_time`) as dff_time, count() as values from(select `#event_time`, `#account_id` from
{self.game}.event where `#event_name` == '{start_event_name}') a, (select `#event_time`, `#account_id` from {self.game}.event
where `#event_name` == '{end_event_name}' and addHours(`#event_time`, {self.zone_time}) >= '{self.start_date}' and
addHours(`#event_time`, {self.zone_time}) <= '{self.end_date}' and islishishouci == 1) b where a.`#account_id` == b.`#account_id` group by dff_time
"""
print(sql)
return {
'sql': sql,
'start_date': self.start_date[:10],
'end_date': self.end_date[:10]
}
async def guide_model_sql(self):
# 事件步骤生成sql
event = self.events[0]