留存时长修复

This commit is contained in:
wuaho 2021-10-27 13:40:06 +08:00
parent 0cadeb283a
commit ad231cb82a
2 changed files with 51 additions and 31 deletions

View File

@ -287,46 +287,54 @@ async def retention_model(request: Request,
res = await analysis.retention_model_sql2()
sql = res['sql']
df = await ckdb.query_dataframe(sql)
if len(df) == 0:
if df.empty:
return schemas.Msg(code=0, msg='无数据', data=None)
title = f'用户数'
date_range = res['date_range']
unit_num = res['unit_num']
retention_n = res['retention_n']
filter_item_type = res['filter_item_type']
filter_item = res['filter_item']
df.set_index('reg_date', inplace=True)
for i in set(date_range) - set(df.index):
df.loc[i] = 0
df.sort_index(inplace=True)
days = [i for i in range(1, unit_num )]
summary_values = {}
today = datetime.datetime.today().date()
for d in set(res['date_range']) - set(df.index):
df.loc[d] = 0
for date, value in df.T.items():
tmp = summary_values.setdefault(date.strftime('%Y-%m-%d'), dict())
tmp['d0'] = int(value.cnt0)
df.sort_index(inplace=True)
summary_values = {'均值': {}}
max_retention_n = 1
avg = {}
avgo = {}
for date, v in df.T.items():
tmp = summary_values.setdefault(date, dict())
tmp['d0'] = int(v.cnt0)
tmp['p'] = []
tmp['n'] = []
tmp['p_outflow'] = []
tmp['n_outflow'] = []
for i in range(1, (today - date).days):
if i >= unit_num:
break
p = float(getattr(value, f'p{i + 1}'))
n = int(getattr(value, f'cnt{i + 1}'))
p_outflow = round(100 - p, 2)
n_outflow = value.cnt0 - n
tmp['p'].append(p)
tmp['n'].append(n)
tmp['p_outflow'].append(p_outflow)
tmp['n_outflow'].append(n_outflow)
for i in retention_n:
n = (pd.Timestamp.now().date() - date).days
if i > n:
continue
max_retention_n = i if i > max_retention_n else max_retention_n
avg[i] = avg.setdefault(i, 0) + v[f'cnt{i}']
avgo[i] = avgo.setdefault(i, 0) + v[f'on{i}']
tmp['p'].append(v[f'p{i}'])
tmp['n'].append(v[f'cnt{i}'])
tmp['p_outflow'].append(v[f'op{i}'])
tmp['n_outflow'].append(v[f'on{i}'])
tmp = summary_values['均值']
tmp['d0'] = int(df['cnt0'].sum())
tmp['p'] = (pd.Series(avg) * 100 / tmp['d0']).round(2).tolist()
tmp['n'] = pd.Series(avg).values.tolist()
tmp['p_outflow'] = (pd.Series(avgo) * 100 / tmp['d0']).round(2).tolist()
tmp['n_outflow'] = pd.Series(avgo).values.tolist()
title = ['日期', '用户数', '次留', *[f'{i}' for i in retention_n[1:] if i <= max_retention_n]]
resp = {
'summary_values': summary_values,
# 'values': values,
'days': days,
'date_range': [d.strftime('%Y-%m-%d') for d in date_range][:unit_num + 1],
'date_range': [d.strftime('%Y-%m-%d') for d in date_range],
'title': title,
'filter_item_type': filter_item_type,
'filter_item': filter_item,
@ -757,7 +765,6 @@ async def scatter_model(
resp['label'].append(f'[{quota_interval_arr[i]},{v})')
bins.append(v)
# 这是整体的
for key, tmp_df in df.groupby('date'):
bins_s = pd.cut(tmp_df['values'], bins=bins,
@ -784,7 +791,8 @@ async def scatter_model(
title = '.'.join(key[1:])
date = key[0]
resp['list'][date.strftime('%Y-%m-%d')][title] = {'n': bins_s.to_list(), 'total': total,
'p': round((bins_s * 100 / total).fillna(0), 2).to_list(),
'p': round((bins_s * 100 / total).fillna(0),
2).to_list(),
'title': title
}
return schemas.Msg(code=0, msg='ok', data=resp)

View File

@ -772,10 +772,21 @@ ORDER BY values desc"""
days = (arrow.get(self.end_date).date() - arrow.get(self.start_date).date()).days
keep = []
cnt = []
for i in range(1, days + 1):
retention_n = [*[k for k in range(1, 31)], 45, 60, 90, 120, 180, 240, 300, 360]
"""
cnt0-cnt1 as on1,
round(on1 * 100 / cnt0, 2) as `0p1`,
"""
for i in retention_n:
keep.append(
f"""cnt{i + 1},round(cnt{i + 1} * 100 / cnt0, 2) as `p{i + 1}`""")
cnt.append(f"""sum(if(dateDiff('day',a.reg_date,b.visit_date)={i},1,0)) as cnt{i + 1}""")
f"""cnt{i},
round(cnt{i} * 100 / cnt0, 2) as `p{i}`,
cnt0-cnt{i} as on{i},
round(on{i} * 100 / cnt0, 2) as `op{i}`
""")
cnt.append(f"""sum(if(dateDiff('day',a.reg_date,b.visit_date)={i},1,0)) as cnt{i}""")
keep_str = ','.join(keep)
cnt_str = ','.join(cnt)
@ -797,7 +808,7 @@ group by date) reg left join
(select a.reg_date,
{cnt_str}
from (select date as reg_date, visit from {self.game}.event where `#event_name` = start_event and addHours(`#event_time`, {self.zone_time}) >= start_data and addHours(`#event_time`, {self.zone_time}) <= end_data and {where_a} group by reg_date, visit) a
left join (select date as visit_date, visit from {self.game}.event where retuen_visit and addHours(`#event_time`, {self.zone_time}) >= start_data and addHours(`#event_time`, {self.zone_time}) <= end_data and {where_b} group by visit_date, visit) b on
left join (select date as visit_date, visit from {self.game}.event where retuen_visit and addHours(`#event_time`, {self.zone_time}) >= start_data group by visit_date, visit) b on
a.visit = b.visit
group by a.reg_date) log on reg.date=log.reg_date
"""
@ -806,6 +817,7 @@ group by a.reg_date) log on reg.date=log.reg_date
'sql': sql,
'date_range': self.date_range,
'unit_num': self.unit_num,
'retention_n':retention_n,
'filter_item_type': filter_item_type,
'filter_item': filter_item,
'time_particle': self.time_particle,