留存分析分组自定义

This commit is contained in:
Àî×ÚÕñ 2022-08-02 16:02:49 +08:00
parent f2be717f3e
commit 27501bda49

View File

@ -578,6 +578,49 @@ async def retention_model(request: Request,
# 映射对应中文返回给前端展示
groupby_list = analysis.event_view.get('groupBy')
groupby = [i['columnName'] for i in groupby_list if i['tableType'] != 'user_label']
true_group = [] # 定义分组实际选择
for g_data in groupby_list:
data_type = g_data['data_type']
# 不是int类型
if data_type != "int":
true_group.append("str")
continue
# 自定义区间
if g_data['intervalType'] == 'user_defined':
int_range = analysis.event_view.get('groupBy')[0]['quotaIntervalArr']
chk_range = []
for index, value in enumerate(int_range):
# 开头
if index == 0:
chk_range.append(['-', value])
# 只有两个数
if len(int_range) >= 2:
chk_range.append([value, int_range[index + 1]])
continue
# 结尾
if index + 1 >= len(int_range):
chk_range.append([value, '+'])
continue
# 中间
chk_range.append([value, int_range[index + 1]])
true_group.append(chk_range)
# 默认区间
elif g_data['intervalType'] == 'def':
zidai = []
max_v = int(df[g_data['columnName']].max())
min_v = int(df[g_data['columnName']].min())
interval = (max_v - min_v) // 10 or 1
for i in range(min_v, max_v, interval):
zidai.append([i, i + interval])
true_group.append(zidai)
# 离散数字
else:
true_group.append('discrete')
if len(groupby_list) == 1:
max_v = int(df[groupby_list[0]['columnName']].max())
min_v = int(df[groupby_list[0]['columnName']].min())
@ -676,65 +719,82 @@ async def retention_model(request: Request,
tmp['p_outflow'].append(n)
tmp['n_outflow'].append(rd['o_cntn'])
# 如果分组项是int类型按选择的分组
# 默认区间
if analysis.event_view.get('groupBy')[0]['intervalType'] == 'def':
if '均值' in summary_valuess:
summary_valuess.pop('均值')
interval = (max_v - min_v) // 10 or 1
lens = len(summary_valuess[max_v]['n'])
ress = {}
for i in range(min_v, max_v, interval):
d0 = 0
n1 = []
n_outflow1 = []
for k, v in summary_valuess.items():
if k >= i and k < i + interval:
d0 += v['d0']
n1.append(v['n'])
n_outflow1.append(v['n_outflow'])
if len(n1) > 0:
re_dict = {}
n = np.sum([ii for ii in n1], axis=0).tolist()
n_outflow = np.sum([iii for iii in n_outflow1], axis=0).tolist()
p = [round(nu*100 / d0, 2) for nu in n]
p_outflow = [round(num*100 / d0, 2) for num in n_outflow]
re_dict['d0'] = d0
re_dict['n'] = n
re_dict['n_outflow'] = n_outflow
re_dict['p'] = p
re_dict['p_outflow'] = p_outflow
ress[f"[{i},{i + interval})"] = re_dict
if "['均值']" in summary_valuess:
summary_valuess.pop("['均值']")
new_summary_valuess = {}
for group_key, group_data in summary_valuess.items():
key_list = eval(group_key)
true_key = [] # 重新定义后的分组
for index, value in enumerate(key_list):
true_group_index = true_group[index]
# 默认区间或者自定义区间
if isinstance(true_group_index, list):
for defined_list in true_group_index:
defined_list_max = defined_list[1]
defined_list_min = defined_list[0]
if defined_list_min == '-':
if value < defined_list_max:
true_key.append(defined_list)
break
else:
re_dict = {'d0': 0}
n = []
n_outflow = []
p = []
p_outflow = []
for cishu in range(0, lens):
n.append(0)
n_outflow.append(0)
p.append(0)
p_outflow.append(0)
re_dict['n'] = n
re_dict['n_outflow'] = n_outflow
re_dict['p'] = p
re_dict['p_outflow'] = p_outflow
ress[f"[{i},{i + interval})"] = re_dict
summary_valuess=ress
# 自定义区间
elif analysis.event_view.get('groupBy')[0]['intervalType'] == 'user_defined':
pass
# 次留数
continue
if defined_list_max == '+':
if value >= defined_list_min:
true_key.append(defined_list)
break
else:
continue
if defined_list_min <= value < defined_list_max:
true_key.append(defined_list)
break
continue
continue
# 分组是字符串或者离散直接取这个值得str类型
if true_group_index in ['str', 'discrete']:
true_key.append(str(value))
continue
# 这个分组不存在:
if str(true_key) not in new_summary_valuess:
new_summary_valuess[str(true_key)] = group_data
continue
# 这个分组已存在
# d0相加
new_summary_valuess[str(true_key)]['d0'] += group_data['d0']
# n相加
n_list = new_summary_valuess[str(true_key)]['n']
n_list1 = group_data['n']
sum_n_lst = [x + y for x, y in zip(n_list, n_list1)]
new_summary_valuess[str(true_key)]['n'] = sum_n_lst
# n_outflow相加
n_outflow_list = new_summary_valuess[str(true_key)]['n_outflow']
n_outflow_list1 = group_data['n_outflow']
sum_n_ourflow_lst = [x + y for x, y in zip(n_outflow_list, n_outflow_list1)]
new_summary_valuess[str(true_key)]['n_outflow'] = sum_n_ourflow_lst
# 计算概率
for key1, value1 in new_summary_valuess.items():
new_summary_valuess[key1]['p'] = [round(i / value1['d0'], 2) for i in value1['n']]
new_summary_valuess[key1]['p_outflow'] = [round(i1 / value1['d0'], 2) for i1 in value1['n_outflow']]
title = ['分组项', '用户数', '次留', *[f'{i + 1}' for i in retention_n[1:]]]
# 未到达的日期需要补齐-
retention_length = len(retention_n)
for _, items in summary_valuess.items():
for _, items in new_summary_valuess.items():
for key in ['p', 'n', 'p_outflow', 'n_outflow']:
items[key].extend(['-'] * (retention_length - len(items[key])))
resp = {
'summary_values': summary_valuess,
'summary_values': new_summary_valuess,
# 'values': values,
'date_range': [d.strftime('%Y-%m-%d') for d in date_range],
'title': title,