留存分析分组自定义
This commit is contained in:
parent
f2be717f3e
commit
27501bda49
@ -576,8 +576,51 @@ async def retention_model(request: Request,
|
||||
filter_item_type = res['filter_item_type'] # all
|
||||
filter_item = res['filter_item'] # 列表 0,1,3,7,14,21,30
|
||||
# 映射对应中文返回给前端展示
|
||||
groupby_list=analysis.event_view.get('groupBy')
|
||||
groupby_list = analysis.event_view.get('groupBy')
|
||||
groupby = [i['columnName'] for i in groupby_list if i['tableType'] != 'user_label']
|
||||
true_group = [] # 定义分组实际选择
|
||||
for g_data in groupby_list:
|
||||
data_type = g_data['data_type']
|
||||
|
||||
# 不是int类型
|
||||
if data_type != "int":
|
||||
true_group.append("str")
|
||||
continue
|
||||
|
||||
# 自定义区间
|
||||
if g_data['intervalType'] == 'user_defined':
|
||||
int_range = analysis.event_view.get('groupBy')[0]['quotaIntervalArr']
|
||||
chk_range = []
|
||||
for index, value in enumerate(int_range):
|
||||
# 开头
|
||||
if index == 0:
|
||||
chk_range.append(['-', value])
|
||||
# 只有两个数
|
||||
if len(int_range) >= 2:
|
||||
chk_range.append([value, int_range[index + 1]])
|
||||
continue
|
||||
# 结尾
|
||||
if index + 1 >= len(int_range):
|
||||
chk_range.append([value, '+'])
|
||||
continue
|
||||
# 中间
|
||||
chk_range.append([value, int_range[index + 1]])
|
||||
true_group.append(chk_range)
|
||||
|
||||
# 默认区间
|
||||
elif g_data['intervalType'] == 'def':
|
||||
zidai = []
|
||||
max_v = int(df[g_data['columnName']].max())
|
||||
min_v = int(df[g_data['columnName']].min())
|
||||
interval = (max_v - min_v) // 10 or 1
|
||||
for i in range(min_v, max_v, interval):
|
||||
zidai.append([i, i + interval])
|
||||
true_group.append(zidai)
|
||||
|
||||
# 离散数字
|
||||
else:
|
||||
true_group.append('discrete')
|
||||
|
||||
if len(groupby_list) == 1:
|
||||
max_v = int(df[groupby_list[0]['columnName']].max())
|
||||
min_v = int(df[groupby_list[0]['columnName']].min())
|
||||
@ -592,7 +635,7 @@ async def retention_model(request: Request,
|
||||
for k, v in chinese.items():
|
||||
# 开始映射
|
||||
df.loc[df['svrindex'] == k, 'svrindex'] = v
|
||||
times=df['reg_date'][0]
|
||||
times = df['reg_date'][0]
|
||||
df.set_index(groupby, inplace=True)
|
||||
# for d in set(res['date_range']) - set(df.index):
|
||||
# df.loc[d] = 0
|
||||
@ -676,65 +719,82 @@ async def retention_model(request: Request,
|
||||
tmp['p_outflow'].append(n)
|
||||
tmp['n_outflow'].append(rd['o_cntn'])
|
||||
# 如果分组项是int类型按选择的分组
|
||||
|
||||
# 默认区间
|
||||
if analysis.event_view.get('groupBy')[0]['intervalType'] == 'def':
|
||||
if '均值' in summary_valuess:
|
||||
summary_valuess.pop('均值')
|
||||
interval = (max_v - min_v) // 10 or 1
|
||||
lens = len(summary_valuess[max_v]['n'])
|
||||
ress = {}
|
||||
for i in range(min_v, max_v, interval):
|
||||
d0 = 0
|
||||
n1 = []
|
||||
n_outflow1 = []
|
||||
for k, v in summary_valuess.items():
|
||||
if k >= i and k < i + interval:
|
||||
d0 += v['d0']
|
||||
n1.append(v['n'])
|
||||
n_outflow1.append(v['n_outflow'])
|
||||
if len(n1) > 0:
|
||||
re_dict = {}
|
||||
n = np.sum([ii for ii in n1], axis=0).tolist()
|
||||
n_outflow = np.sum([iii for iii in n_outflow1], axis=0).tolist()
|
||||
p = [round(nu*100 / d0, 2) for nu in n]
|
||||
p_outflow = [round(num*100 / d0, 2) for num in n_outflow]
|
||||
re_dict['d0'] = d0
|
||||
re_dict['n'] = n
|
||||
re_dict['n_outflow'] = n_outflow
|
||||
re_dict['p'] = p
|
||||
re_dict['p_outflow'] = p_outflow
|
||||
ress[f"[{i},{i + interval})"] = re_dict
|
||||
else:
|
||||
re_dict = {'d0': 0}
|
||||
n = []
|
||||
n_outflow = []
|
||||
p = []
|
||||
p_outflow = []
|
||||
for cishu in range(0, lens):
|
||||
n.append(0)
|
||||
n_outflow.append(0)
|
||||
p.append(0)
|
||||
p_outflow.append(0)
|
||||
re_dict['n'] = n
|
||||
re_dict['n_outflow'] = n_outflow
|
||||
re_dict['p'] = p
|
||||
re_dict['p_outflow'] = p_outflow
|
||||
ress[f"[{i},{i + interval})"] = re_dict
|
||||
summary_valuess=ress
|
||||
# 自定义区间
|
||||
elif analysis.event_view.get('groupBy')[0]['intervalType'] == 'user_defined':
|
||||
pass
|
||||
# 次留数
|
||||
if "['均值']" in summary_valuess:
|
||||
summary_valuess.pop("['均值']")
|
||||
new_summary_valuess = {}
|
||||
for group_key, group_data in summary_valuess.items():
|
||||
key_list = eval(group_key)
|
||||
true_key = [] # 重新定义后的分组
|
||||
for index, value in enumerate(key_list):
|
||||
|
||||
true_group_index = true_group[index]
|
||||
# 默认区间或者自定义区间
|
||||
if isinstance(true_group_index, list):
|
||||
for defined_list in true_group_index:
|
||||
defined_list_max = defined_list[1]
|
||||
defined_list_min = defined_list[0]
|
||||
if defined_list_min == '-':
|
||||
if value < defined_list_max:
|
||||
true_key.append(defined_list)
|
||||
break
|
||||
else:
|
||||
continue
|
||||
if defined_list_max == '+':
|
||||
if value >= defined_list_min:
|
||||
true_key.append(defined_list)
|
||||
break
|
||||
else:
|
||||
continue
|
||||
|
||||
if defined_list_min <= value < defined_list_max:
|
||||
true_key.append(defined_list)
|
||||
break
|
||||
continue
|
||||
continue
|
||||
|
||||
# 分组是字符串或者离散直接取这个值得str类型
|
||||
if true_group_index in ['str', 'discrete']:
|
||||
true_key.append(str(value))
|
||||
continue
|
||||
|
||||
# 这个分组不存在:
|
||||
if str(true_key) not in new_summary_valuess:
|
||||
new_summary_valuess[str(true_key)] = group_data
|
||||
continue
|
||||
|
||||
# 这个分组已存在
|
||||
# d0相加
|
||||
new_summary_valuess[str(true_key)]['d0'] += group_data['d0']
|
||||
|
||||
# n相加
|
||||
n_list = new_summary_valuess[str(true_key)]['n']
|
||||
n_list1 = group_data['n']
|
||||
sum_n_lst = [x + y for x, y in zip(n_list, n_list1)]
|
||||
new_summary_valuess[str(true_key)]['n'] = sum_n_lst
|
||||
|
||||
# n_outflow相加
|
||||
n_outflow_list = new_summary_valuess[str(true_key)]['n_outflow']
|
||||
n_outflow_list1 = group_data['n_outflow']
|
||||
sum_n_ourflow_lst = [x + y for x, y in zip(n_outflow_list, n_outflow_list1)]
|
||||
new_summary_valuess[str(true_key)]['n_outflow'] = sum_n_ourflow_lst
|
||||
|
||||
# 计算概率
|
||||
for key1, value1 in new_summary_valuess.items():
|
||||
new_summary_valuess[key1]['p'] = [round(i / value1['d0'], 2) for i in value1['n']]
|
||||
new_summary_valuess[key1]['p_outflow'] = [round(i1 / value1['d0'], 2) for i1 in value1['n_outflow']]
|
||||
|
||||
title = ['分组项', '用户数', '次留', *[f'{i + 1}留' for i in retention_n[1:]]]
|
||||
|
||||
# 未到达的日期需要补齐-
|
||||
retention_length = len(retention_n)
|
||||
for _, items in summary_valuess.items():
|
||||
for _, items in new_summary_valuess.items():
|
||||
for key in ['p', 'n', 'p_outflow', 'n_outflow']:
|
||||
items[key].extend(['-'] * (retention_length - len(items[key])))
|
||||
|
||||
resp = {
|
||||
'summary_values': summary_valuess,
|
||||
'summary_values': new_summary_valuess,
|
||||
# 'values': values,
|
||||
'date_range': [d.strftime('%Y-%m-%d') for d in date_range],
|
||||
'title': title,
|
||||
@ -1324,7 +1384,7 @@ async def scatter_model(
|
||||
# 这是整体的
|
||||
for key, tmp_df in df.groupby('date'):
|
||||
bins_s = pd.cut(tmp_df['values'], bins=bins,
|
||||
right=False,include_lowest=True).value_counts()
|
||||
right=False, include_lowest=True).value_counts()
|
||||
bins_s.sort_index(inplace=True)
|
||||
total = int(bins_s.sum())
|
||||
if res['time_particle'] == 'total':
|
||||
@ -1583,8 +1643,8 @@ async def scatter_model(
|
||||
# if 'time' not in groupby:
|
||||
resp['list'][str(key)] = dict()
|
||||
resp['list'][str(key)] = {'n': bins_s.to_list(), 'total': total,
|
||||
'p': [str(i) + '%' for i in p],
|
||||
'title': '总体'}
|
||||
'p': [str(i) + '%' for i in p],
|
||||
'title': '总体'}
|
||||
# else:
|
||||
# resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = dict()
|
||||
# resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = {'n': bins_s.to_list(), 'total': total,
|
||||
@ -1675,7 +1735,7 @@ async def scatter_model(
|
||||
list_p.append(number_str)
|
||||
|
||||
resp['list'][str(dt)] = {'n': [labels_dict01.get(i, 0) for i in labels], 'total': total,
|
||||
'p': list_p}
|
||||
'p': list_p}
|
||||
else:
|
||||
list_p = []
|
||||
for i in labels:
|
||||
@ -1683,7 +1743,7 @@ async def scatter_model(
|
||||
number_str = str(number_int) + '%'
|
||||
list_p.append(number_str)
|
||||
resp['list'][str(dt)] = {'n': [labels_dict.get(i, 0) for i in labels], 'total': total,
|
||||
'p': list_p}
|
||||
'p': list_p}
|
||||
# resp['list'][dt] = {'总体': {'n': [labels_dict.get(i, 0) for i in labels], 'total': total,
|
||||
# 'p': [round(labels_dict.get(i, 0) * 100 / total, 2) for i in labels]}}
|
||||
if where == "step_id" and event_type == "guide":
|
||||
@ -1985,17 +2045,17 @@ async def user_property_model(
|
||||
if data_in.user_arrt_type == 'datetime':
|
||||
sql = f"""select `#account_id`,`{ziduan}` from {game}.`user` WHERE addHours(`{ziduan}`, 8) >= '{data_in.start_time}'
|
||||
and addHours(`{ziduan}`, 8) <= '{data_in.end_time}' ORDER BY `#reg_time` LIMIT 10 OFFSET {(
|
||||
data_in.pages - 1) * 10}"""
|
||||
data_in.pages - 1) * 10}"""
|
||||
# 如果查询'#account_id',则不多余返回一个account_id
|
||||
elif ziduan == '#account_id':
|
||||
sql = f"""select `{ziduan}`,name from {game}.`user` WHERE `{ziduan}` {tiaojian} '{data_in.condition}' ORDER BY `#reg_time` LIMIT 10 OFFSET {(
|
||||
data_in.pages - 1) * 10} """
|
||||
data_in.pages - 1) * 10} """
|
||||
elif data_in.user_arrt_type == 'int':
|
||||
sql = f"""select `#account_id`,`{ziduan}` from {game}.`user` WHERE `{ziduan}` {tiaojian} {data_in.condition} ORDER BY `#reg_time` LIMIT 10 OFFSET {(
|
||||
data_in.pages - 1) * 10}"""
|
||||
data_in.pages - 1) * 10}"""
|
||||
else:
|
||||
sql = f"""select `#account_id`,`{ziduan}` from `{game}`.`user` WHERE `{ziduan}` {tiaojian} '{data}' ORDER BY `#reg_time` LIMIT 10 OFFSET {(
|
||||
data_in.pages - 1) * 10}"""
|
||||
data_in.pages - 1) * 10}"""
|
||||
# 查询数据
|
||||
try:
|
||||
df = await ckdb.query_dataframe(sql)
|
||||
|
Loading…
Reference in New Issue
Block a user