留存时长修复
This commit is contained in:
parent
0cadeb283a
commit
ad231cb82a
@ -287,46 +287,54 @@ async def retention_model(request: Request,
|
|||||||
res = await analysis.retention_model_sql2()
|
res = await analysis.retention_model_sql2()
|
||||||
sql = res['sql']
|
sql = res['sql']
|
||||||
df = await ckdb.query_dataframe(sql)
|
df = await ckdb.query_dataframe(sql)
|
||||||
if len(df) == 0:
|
if df.empty:
|
||||||
return schemas.Msg(code=0, msg='无数据', data=None)
|
return schemas.Msg(code=0, msg='无数据', data=None)
|
||||||
|
|
||||||
title = f'用户数'
|
|
||||||
date_range = res['date_range']
|
date_range = res['date_range']
|
||||||
unit_num = res['unit_num']
|
unit_num = res['unit_num']
|
||||||
|
retention_n = res['retention_n']
|
||||||
filter_item_type = res['filter_item_type']
|
filter_item_type = res['filter_item_type']
|
||||||
filter_item = res['filter_item']
|
filter_item = res['filter_item']
|
||||||
df.set_index('reg_date', inplace=True)
|
df.set_index('reg_date', inplace=True)
|
||||||
for i in set(date_range) - set(df.index):
|
for d in set(res['date_range']) - set(df.index):
|
||||||
df.loc[i] = 0
|
df.loc[d] = 0
|
||||||
df.sort_index(inplace=True)
|
|
||||||
days = [i for i in range(1, unit_num )]
|
|
||||||
summary_values = {}
|
|
||||||
today = datetime.datetime.today().date()
|
|
||||||
|
|
||||||
for date, value in df.T.items():
|
df.sort_index(inplace=True)
|
||||||
tmp = summary_values.setdefault(date.strftime('%Y-%m-%d'), dict())
|
summary_values = {'均值': {}}
|
||||||
tmp['d0'] = int(value.cnt0)
|
max_retention_n = 1
|
||||||
|
avg = {}
|
||||||
|
avgo = {}
|
||||||
|
for date, v in df.T.items():
|
||||||
|
tmp = summary_values.setdefault(date, dict())
|
||||||
|
tmp['d0'] = int(v.cnt0)
|
||||||
tmp['p'] = []
|
tmp['p'] = []
|
||||||
tmp['n'] = []
|
tmp['n'] = []
|
||||||
tmp['p_outflow'] = []
|
tmp['p_outflow'] = []
|
||||||
tmp['n_outflow'] = []
|
tmp['n_outflow'] = []
|
||||||
for i in range(1, (today - date).days):
|
for i in retention_n:
|
||||||
if i >= unit_num:
|
n = (pd.Timestamp.now().date() - date).days
|
||||||
break
|
if i > n:
|
||||||
p = float(getattr(value, f'p{i + 1}'))
|
continue
|
||||||
n = int(getattr(value, f'cnt{i + 1}'))
|
max_retention_n = i if i > max_retention_n else max_retention_n
|
||||||
p_outflow = round(100 - p, 2)
|
avg[i] = avg.setdefault(i, 0) + v[f'cnt{i}']
|
||||||
n_outflow = value.cnt0 - n
|
avgo[i] = avgo.setdefault(i, 0) + v[f'on{i}']
|
||||||
tmp['p'].append(p)
|
tmp['p'].append(v[f'p{i}'])
|
||||||
tmp['n'].append(n)
|
tmp['n'].append(v[f'cnt{i}'])
|
||||||
tmp['p_outflow'].append(p_outflow)
|
tmp['p_outflow'].append(v[f'op{i}'])
|
||||||
tmp['n_outflow'].append(n_outflow)
|
tmp['n_outflow'].append(v[f'on{i}'])
|
||||||
|
tmp = summary_values['均值']
|
||||||
|
tmp['d0'] = int(df['cnt0'].sum())
|
||||||
|
tmp['p'] = (pd.Series(avg) * 100 / tmp['d0']).round(2).tolist()
|
||||||
|
tmp['n'] = pd.Series(avg).values.tolist()
|
||||||
|
tmp['p_outflow'] = (pd.Series(avgo) * 100 / tmp['d0']).round(2).tolist()
|
||||||
|
tmp['n_outflow'] = pd.Series(avgo).values.tolist()
|
||||||
|
|
||||||
|
title = ['日期', '用户数', '次留', *[f'{i}留' for i in retention_n[1:] if i <= max_retention_n]]
|
||||||
|
|
||||||
resp = {
|
resp = {
|
||||||
'summary_values': summary_values,
|
'summary_values': summary_values,
|
||||||
# 'values': values,
|
# 'values': values,
|
||||||
'days': days,
|
'date_range': [d.strftime('%Y-%m-%d') for d in date_range],
|
||||||
'date_range': [d.strftime('%Y-%m-%d') for d in date_range][:unit_num + 1],
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'filter_item_type': filter_item_type,
|
'filter_item_type': filter_item_type,
|
||||||
'filter_item': filter_item,
|
'filter_item': filter_item,
|
||||||
@ -757,7 +765,6 @@ async def scatter_model(
|
|||||||
resp['label'].append(f'[{quota_interval_arr[i]},{v})')
|
resp['label'].append(f'[{quota_interval_arr[i]},{v})')
|
||||||
bins.append(v)
|
bins.append(v)
|
||||||
|
|
||||||
|
|
||||||
# 这是整体的
|
# 这是整体的
|
||||||
for key, tmp_df in df.groupby('date'):
|
for key, tmp_df in df.groupby('date'):
|
||||||
bins_s = pd.cut(tmp_df['values'], bins=bins,
|
bins_s = pd.cut(tmp_df['values'], bins=bins,
|
||||||
@ -784,7 +791,8 @@ async def scatter_model(
|
|||||||
title = '.'.join(key[1:])
|
title = '.'.join(key[1:])
|
||||||
date = key[0]
|
date = key[0]
|
||||||
resp['list'][date.strftime('%Y-%m-%d')][title] = {'n': bins_s.to_list(), 'total': total,
|
resp['list'][date.strftime('%Y-%m-%d')][title] = {'n': bins_s.to_list(), 'total': total,
|
||||||
'p': round((bins_s * 100 / total).fillna(0), 2).to_list(),
|
'p': round((bins_s * 100 / total).fillna(0),
|
||||||
|
2).to_list(),
|
||||||
'title': title
|
'title': title
|
||||||
}
|
}
|
||||||
return schemas.Msg(code=0, msg='ok', data=resp)
|
return schemas.Msg(code=0, msg='ok', data=resp)
|
||||||
|
@ -772,10 +772,21 @@ ORDER BY values desc"""
|
|||||||
days = (arrow.get(self.end_date).date() - arrow.get(self.start_date).date()).days
|
days = (arrow.get(self.end_date).date() - arrow.get(self.start_date).date()).days
|
||||||
keep = []
|
keep = []
|
||||||
cnt = []
|
cnt = []
|
||||||
for i in range(1, days + 1):
|
retention_n = [*[k for k in range(1, 31)], 45, 60, 90, 120, 180, 240, 300, 360]
|
||||||
|
|
||||||
|
"""
|
||||||
|
cnt0-cnt1 as on1,
|
||||||
|
round(on1 * 100 / cnt0, 2) as `0p1`,
|
||||||
|
"""
|
||||||
|
|
||||||
|
for i in retention_n:
|
||||||
keep.append(
|
keep.append(
|
||||||
f"""cnt{i + 1},round(cnt{i + 1} * 100 / cnt0, 2) as `p{i + 1}`""")
|
f"""cnt{i},
|
||||||
cnt.append(f"""sum(if(dateDiff('day',a.reg_date,b.visit_date)={i},1,0)) as cnt{i + 1}""")
|
round(cnt{i} * 100 / cnt0, 2) as `p{i}`,
|
||||||
|
cnt0-cnt{i} as on{i},
|
||||||
|
round(on{i} * 100 / cnt0, 2) as `op{i}`
|
||||||
|
""")
|
||||||
|
cnt.append(f"""sum(if(dateDiff('day',a.reg_date,b.visit_date)={i},1,0)) as cnt{i}""")
|
||||||
keep_str = ','.join(keep)
|
keep_str = ','.join(keep)
|
||||||
cnt_str = ','.join(cnt)
|
cnt_str = ','.join(cnt)
|
||||||
|
|
||||||
@ -797,7 +808,7 @@ group by date) reg left join
|
|||||||
(select a.reg_date,
|
(select a.reg_date,
|
||||||
{cnt_str}
|
{cnt_str}
|
||||||
from (select date as reg_date, visit from {self.game}.event where `#event_name` = start_event and addHours(`#event_time`, {self.zone_time}) >= start_data and addHours(`#event_time`, {self.zone_time}) <= end_data and {where_a} group by reg_date, visit) a
|
from (select date as reg_date, visit from {self.game}.event where `#event_name` = start_event and addHours(`#event_time`, {self.zone_time}) >= start_data and addHours(`#event_time`, {self.zone_time}) <= end_data and {where_a} group by reg_date, visit) a
|
||||||
left join (select date as visit_date, visit from {self.game}.event where retuen_visit and addHours(`#event_time`, {self.zone_time}) >= start_data and addHours(`#event_time`, {self.zone_time}) <= end_data and {where_b} group by visit_date, visit) b on
|
left join (select date as visit_date, visit from {self.game}.event where retuen_visit and addHours(`#event_time`, {self.zone_time}) >= start_data group by visit_date, visit) b on
|
||||||
a.visit = b.visit
|
a.visit = b.visit
|
||||||
group by a.reg_date) log on reg.date=log.reg_date
|
group by a.reg_date) log on reg.date=log.reg_date
|
||||||
"""
|
"""
|
||||||
@ -806,6 +817,7 @@ group by a.reg_date) log on reg.date=log.reg_date
|
|||||||
'sql': sql,
|
'sql': sql,
|
||||||
'date_range': self.date_range,
|
'date_range': self.date_range,
|
||||||
'unit_num': self.unit_num,
|
'unit_num': self.unit_num,
|
||||||
|
'retention_n':retention_n,
|
||||||
'filter_item_type': filter_item_type,
|
'filter_item_type': filter_item_type,
|
||||||
'filter_item': filter_item,
|
'filter_item': filter_item,
|
||||||
'time_particle': self.time_particle,
|
'time_particle': self.time_particle,
|
||||||
|
Loading…
Reference in New Issue
Block a user