diff --git a/api/api_v1/endpoints/query.py b/api/api_v1/endpoints/query.py index 0111c0e..5a8eee2 100644 --- a/api/api_v1/endpoints/query.py +++ b/api/api_v1/endpoints/query.py @@ -287,46 +287,54 @@ async def retention_model(request: Request, res = await analysis.retention_model_sql2() sql = res['sql'] df = await ckdb.query_dataframe(sql) - if len(df) == 0: + if df.empty: return schemas.Msg(code=0, msg='无数据', data=None) - title = f'用户数' date_range = res['date_range'] unit_num = res['unit_num'] + retention_n = res['retention_n'] filter_item_type = res['filter_item_type'] filter_item = res['filter_item'] df.set_index('reg_date', inplace=True) - for i in set(date_range) - set(df.index): - df.loc[i] = 0 - df.sort_index(inplace=True) - days = [i for i in range(1, unit_num )] - summary_values = {} - today = datetime.datetime.today().date() + for d in set(res['date_range']) - set(df.index): + df.loc[d] = 0 - for date, value in df.T.items(): - tmp = summary_values.setdefault(date.strftime('%Y-%m-%d'), dict()) - tmp['d0'] = int(value.cnt0) + df.sort_index(inplace=True) + summary_values = {'均值': {}} + max_retention_n = 1 + avg = {} + avgo = {} + for date, v in df.T.items(): + tmp = summary_values.setdefault(date, dict()) + tmp['d0'] = int(v.cnt0) tmp['p'] = [] tmp['n'] = [] tmp['p_outflow'] = [] tmp['n_outflow'] = [] - for i in range(1, (today - date).days): - if i >= unit_num: - break - p = float(getattr(value, f'p{i + 1}')) - n = int(getattr(value, f'cnt{i + 1}')) - p_outflow = round(100 - p, 2) - n_outflow = value.cnt0 - n - tmp['p'].append(p) - tmp['n'].append(n) - tmp['p_outflow'].append(p_outflow) - tmp['n_outflow'].append(n_outflow) + for i in retention_n: + n = (pd.Timestamp.now().date() - date).days + if i > n: + continue + max_retention_n = i if i > max_retention_n else max_retention_n + avg[i] = avg.setdefault(i, 0) + v[f'cnt{i}'] + avgo[i] = avgo.setdefault(i, 0) + v[f'on{i}'] + tmp['p'].append(v[f'p{i}']) + tmp['n'].append(v[f'cnt{i}']) + tmp['p_outflow'].append(v[f'op{i}']) + tmp['n_outflow'].append(v[f'on{i}']) + tmp = summary_values['均值'] + tmp['d0'] = int(df['cnt0'].sum()) + tmp['p'] = (pd.Series(avg) * 100 / tmp['d0']).round(2).tolist() + tmp['n'] = pd.Series(avg).values.tolist() + tmp['p_outflow'] = (pd.Series(avgo) * 100 / tmp['d0']).round(2).tolist() + tmp['n_outflow'] = pd.Series(avgo).values.tolist() + + title = ['日期', '用户数', '次留', *[f'{i}留' for i in retention_n[1:] if i <= max_retention_n]] resp = { 'summary_values': summary_values, # 'values': values, - 'days': days, - 'date_range': [d.strftime('%Y-%m-%d') for d in date_range][:unit_num + 1], + 'date_range': [d.strftime('%Y-%m-%d') for d in date_range], 'title': title, 'filter_item_type': filter_item_type, 'filter_item': filter_item, @@ -757,7 +765,6 @@ async def scatter_model( resp['label'].append(f'[{quota_interval_arr[i]},{v})') bins.append(v) - # 这是整体的 for key, tmp_df in df.groupby('date'): bins_s = pd.cut(tmp_df['values'], bins=bins, @@ -784,7 +791,8 @@ async def scatter_model( title = '.'.join(key[1:]) date = key[0] resp['list'][date.strftime('%Y-%m-%d')][title] = {'n': bins_s.to_list(), 'total': total, - 'p': round((bins_s * 100 / total).fillna(0), 2).to_list(), + 'p': round((bins_s * 100 / total).fillna(0), + 2).to_list(), 'title': title } return schemas.Msg(code=0, msg='ok', data=resp) @@ -801,7 +809,7 @@ async def scatter_model( for key, tmp_df in df.groupby(['date']): total = len(tmp_df) if res['time_particle'] == 'total': - dt='合计' + dt = '合计' else: dt = key.strftime('%Y-%m-%d') labels_dict = {} diff --git a/models/behavior_analysis.py b/models/behavior_analysis.py index 214608d..bee1221 100644 --- a/models/behavior_analysis.py +++ b/models/behavior_analysis.py @@ -772,10 +772,21 @@ ORDER BY values desc""" days = (arrow.get(self.end_date).date() - arrow.get(self.start_date).date()).days keep = [] cnt = [] - for i in range(1, days + 1): + retention_n = [*[k for k in range(1, 31)], 45, 60, 90, 120, 180, 240, 300, 360] + + """ + cnt0-cnt1 as on1, + round(on1 * 100 / cnt0, 2) as `0p1`, + """ + + for i in retention_n: keep.append( - f"""cnt{i + 1},round(cnt{i + 1} * 100 / cnt0, 2) as `p{i + 1}`""") - cnt.append(f"""sum(if(dateDiff('day',a.reg_date,b.visit_date)={i},1,0)) as cnt{i + 1}""") + f"""cnt{i}, + round(cnt{i} * 100 / cnt0, 2) as `p{i}`, + cnt0-cnt{i} as on{i}, + round(on{i} * 100 / cnt0, 2) as `op{i}` + """) + cnt.append(f"""sum(if(dateDiff('day',a.reg_date,b.visit_date)={i},1,0)) as cnt{i}""") keep_str = ','.join(keep) cnt_str = ','.join(cnt) @@ -797,7 +808,7 @@ group by date) reg left join (select a.reg_date, {cnt_str} from (select date as reg_date, visit from {self.game}.event where `#event_name` = start_event and addHours(`#event_time`, {self.zone_time}) >= start_data and addHours(`#event_time`, {self.zone_time}) <= end_data and {where_a} group by reg_date, visit) a - left join (select date as visit_date, visit from {self.game}.event where retuen_visit and addHours(`#event_time`, {self.zone_time}) >= start_data and addHours(`#event_time`, {self.zone_time}) <= end_data and {where_b} group by visit_date, visit) b on + left join (select date as visit_date, visit from {self.game}.event where retuen_visit and addHours(`#event_time`, {self.zone_time}) >= start_data group by visit_date, visit) b on a.visit = b.visit group by a.reg_date) log on reg.date=log.reg_date """ @@ -806,6 +817,7 @@ group by a.reg_date) log on reg.date=log.reg_date 'sql': sql, 'date_range': self.date_range, 'unit_num': self.unit_num, + 'retention_n':retention_n, 'filter_item_type': filter_item_type, 'filter_item': filter_item, 'time_particle': self.time_particle,