留存分析分组自定义
This commit is contained in:
parent
f2be717f3e
commit
27501bda49
@ -576,8 +576,51 @@ async def retention_model(request: Request,
|
|||||||
filter_item_type = res['filter_item_type'] # all
|
filter_item_type = res['filter_item_type'] # all
|
||||||
filter_item = res['filter_item'] # 列表 0,1,3,7,14,21,30
|
filter_item = res['filter_item'] # 列表 0,1,3,7,14,21,30
|
||||||
# 映射对应中文返回给前端展示
|
# 映射对应中文返回给前端展示
|
||||||
groupby_list=analysis.event_view.get('groupBy')
|
groupby_list = analysis.event_view.get('groupBy')
|
||||||
groupby = [i['columnName'] for i in groupby_list if i['tableType'] != 'user_label']
|
groupby = [i['columnName'] for i in groupby_list if i['tableType'] != 'user_label']
|
||||||
|
true_group = [] # 定义分组实际选择
|
||||||
|
for g_data in groupby_list:
|
||||||
|
data_type = g_data['data_type']
|
||||||
|
|
||||||
|
# 不是int类型
|
||||||
|
if data_type != "int":
|
||||||
|
true_group.append("str")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 自定义区间
|
||||||
|
if g_data['intervalType'] == 'user_defined':
|
||||||
|
int_range = analysis.event_view.get('groupBy')[0]['quotaIntervalArr']
|
||||||
|
chk_range = []
|
||||||
|
for index, value in enumerate(int_range):
|
||||||
|
# 开头
|
||||||
|
if index == 0:
|
||||||
|
chk_range.append(['-', value])
|
||||||
|
# 只有两个数
|
||||||
|
if len(int_range) >= 2:
|
||||||
|
chk_range.append([value, int_range[index + 1]])
|
||||||
|
continue
|
||||||
|
# 结尾
|
||||||
|
if index + 1 >= len(int_range):
|
||||||
|
chk_range.append([value, '+'])
|
||||||
|
continue
|
||||||
|
# 中间
|
||||||
|
chk_range.append([value, int_range[index + 1]])
|
||||||
|
true_group.append(chk_range)
|
||||||
|
|
||||||
|
# 默认区间
|
||||||
|
elif g_data['intervalType'] == 'def':
|
||||||
|
zidai = []
|
||||||
|
max_v = int(df[g_data['columnName']].max())
|
||||||
|
min_v = int(df[g_data['columnName']].min())
|
||||||
|
interval = (max_v - min_v) // 10 or 1
|
||||||
|
for i in range(min_v, max_v, interval):
|
||||||
|
zidai.append([i, i + interval])
|
||||||
|
true_group.append(zidai)
|
||||||
|
|
||||||
|
# 离散数字
|
||||||
|
else:
|
||||||
|
true_group.append('discrete')
|
||||||
|
|
||||||
if len(groupby_list) == 1:
|
if len(groupby_list) == 1:
|
||||||
max_v = int(df[groupby_list[0]['columnName']].max())
|
max_v = int(df[groupby_list[0]['columnName']].max())
|
||||||
min_v = int(df[groupby_list[0]['columnName']].min())
|
min_v = int(df[groupby_list[0]['columnName']].min())
|
||||||
@ -592,7 +635,7 @@ async def retention_model(request: Request,
|
|||||||
for k, v in chinese.items():
|
for k, v in chinese.items():
|
||||||
# 开始映射
|
# 开始映射
|
||||||
df.loc[df['svrindex'] == k, 'svrindex'] = v
|
df.loc[df['svrindex'] == k, 'svrindex'] = v
|
||||||
times=df['reg_date'][0]
|
times = df['reg_date'][0]
|
||||||
df.set_index(groupby, inplace=True)
|
df.set_index(groupby, inplace=True)
|
||||||
# for d in set(res['date_range']) - set(df.index):
|
# for d in set(res['date_range']) - set(df.index):
|
||||||
# df.loc[d] = 0
|
# df.loc[d] = 0
|
||||||
@ -676,65 +719,82 @@ async def retention_model(request: Request,
|
|||||||
tmp['p_outflow'].append(n)
|
tmp['p_outflow'].append(n)
|
||||||
tmp['n_outflow'].append(rd['o_cntn'])
|
tmp['n_outflow'].append(rd['o_cntn'])
|
||||||
# 如果分组项是int类型按选择的分组
|
# 如果分组项是int类型按选择的分组
|
||||||
|
if '均值' in summary_valuess:
|
||||||
# 默认区间
|
|
||||||
if analysis.event_view.get('groupBy')[0]['intervalType'] == 'def':
|
|
||||||
summary_valuess.pop('均值')
|
summary_valuess.pop('均值')
|
||||||
interval = (max_v - min_v) // 10 or 1
|
if "['均值']" in summary_valuess:
|
||||||
lens = len(summary_valuess[max_v]['n'])
|
summary_valuess.pop("['均值']")
|
||||||
ress = {}
|
new_summary_valuess = {}
|
||||||
for i in range(min_v, max_v, interval):
|
for group_key, group_data in summary_valuess.items():
|
||||||
d0 = 0
|
key_list = eval(group_key)
|
||||||
n1 = []
|
true_key = [] # 重新定义后的分组
|
||||||
n_outflow1 = []
|
for index, value in enumerate(key_list):
|
||||||
for k, v in summary_valuess.items():
|
|
||||||
if k >= i and k < i + interval:
|
true_group_index = true_group[index]
|
||||||
d0 += v['d0']
|
# 默认区间或者自定义区间
|
||||||
n1.append(v['n'])
|
if isinstance(true_group_index, list):
|
||||||
n_outflow1.append(v['n_outflow'])
|
for defined_list in true_group_index:
|
||||||
if len(n1) > 0:
|
defined_list_max = defined_list[1]
|
||||||
re_dict = {}
|
defined_list_min = defined_list[0]
|
||||||
n = np.sum([ii for ii in n1], axis=0).tolist()
|
if defined_list_min == '-':
|
||||||
n_outflow = np.sum([iii for iii in n_outflow1], axis=0).tolist()
|
if value < defined_list_max:
|
||||||
p = [round(nu*100 / d0, 2) for nu in n]
|
true_key.append(defined_list)
|
||||||
p_outflow = [round(num*100 / d0, 2) for num in n_outflow]
|
break
|
||||||
re_dict['d0'] = d0
|
else:
|
||||||
re_dict['n'] = n
|
continue
|
||||||
re_dict['n_outflow'] = n_outflow
|
if defined_list_max == '+':
|
||||||
re_dict['p'] = p
|
if value >= defined_list_min:
|
||||||
re_dict['p_outflow'] = p_outflow
|
true_key.append(defined_list)
|
||||||
ress[f"[{i},{i + interval})"] = re_dict
|
break
|
||||||
else:
|
else:
|
||||||
re_dict = {'d0': 0}
|
continue
|
||||||
n = []
|
|
||||||
n_outflow = []
|
if defined_list_min <= value < defined_list_max:
|
||||||
p = []
|
true_key.append(defined_list)
|
||||||
p_outflow = []
|
break
|
||||||
for cishu in range(0, lens):
|
continue
|
||||||
n.append(0)
|
continue
|
||||||
n_outflow.append(0)
|
|
||||||
p.append(0)
|
# 分组是字符串或者离散直接取这个值得str类型
|
||||||
p_outflow.append(0)
|
if true_group_index in ['str', 'discrete']:
|
||||||
re_dict['n'] = n
|
true_key.append(str(value))
|
||||||
re_dict['n_outflow'] = n_outflow
|
continue
|
||||||
re_dict['p'] = p
|
|
||||||
re_dict['p_outflow'] = p_outflow
|
# 这个分组不存在:
|
||||||
ress[f"[{i},{i + interval})"] = re_dict
|
if str(true_key) not in new_summary_valuess:
|
||||||
summary_valuess=ress
|
new_summary_valuess[str(true_key)] = group_data
|
||||||
# 自定义区间
|
continue
|
||||||
elif analysis.event_view.get('groupBy')[0]['intervalType'] == 'user_defined':
|
|
||||||
pass
|
# 这个分组已存在
|
||||||
# 次留数
|
# d0相加
|
||||||
|
new_summary_valuess[str(true_key)]['d0'] += group_data['d0']
|
||||||
|
|
||||||
|
# n相加
|
||||||
|
n_list = new_summary_valuess[str(true_key)]['n']
|
||||||
|
n_list1 = group_data['n']
|
||||||
|
sum_n_lst = [x + y for x, y in zip(n_list, n_list1)]
|
||||||
|
new_summary_valuess[str(true_key)]['n'] = sum_n_lst
|
||||||
|
|
||||||
|
# n_outflow相加
|
||||||
|
n_outflow_list = new_summary_valuess[str(true_key)]['n_outflow']
|
||||||
|
n_outflow_list1 = group_data['n_outflow']
|
||||||
|
sum_n_ourflow_lst = [x + y for x, y in zip(n_outflow_list, n_outflow_list1)]
|
||||||
|
new_summary_valuess[str(true_key)]['n_outflow'] = sum_n_ourflow_lst
|
||||||
|
|
||||||
|
# 计算概率
|
||||||
|
for key1, value1 in new_summary_valuess.items():
|
||||||
|
new_summary_valuess[key1]['p'] = [round(i / value1['d0'], 2) for i in value1['n']]
|
||||||
|
new_summary_valuess[key1]['p_outflow'] = [round(i1 / value1['d0'], 2) for i1 in value1['n_outflow']]
|
||||||
|
|
||||||
title = ['分组项', '用户数', '次留', *[f'{i + 1}留' for i in retention_n[1:]]]
|
title = ['分组项', '用户数', '次留', *[f'{i + 1}留' for i in retention_n[1:]]]
|
||||||
|
|
||||||
# 未到达的日期需要补齐-
|
# 未到达的日期需要补齐-
|
||||||
retention_length = len(retention_n)
|
retention_length = len(retention_n)
|
||||||
for _, items in summary_valuess.items():
|
for _, items in new_summary_valuess.items():
|
||||||
for key in ['p', 'n', 'p_outflow', 'n_outflow']:
|
for key in ['p', 'n', 'p_outflow', 'n_outflow']:
|
||||||
items[key].extend(['-'] * (retention_length - len(items[key])))
|
items[key].extend(['-'] * (retention_length - len(items[key])))
|
||||||
|
|
||||||
resp = {
|
resp = {
|
||||||
'summary_values': summary_valuess,
|
'summary_values': new_summary_valuess,
|
||||||
# 'values': values,
|
# 'values': values,
|
||||||
'date_range': [d.strftime('%Y-%m-%d') for d in date_range],
|
'date_range': [d.strftime('%Y-%m-%d') for d in date_range],
|
||||||
'title': title,
|
'title': title,
|
||||||
@ -1324,7 +1384,7 @@ async def scatter_model(
|
|||||||
# 这是整体的
|
# 这是整体的
|
||||||
for key, tmp_df in df.groupby('date'):
|
for key, tmp_df in df.groupby('date'):
|
||||||
bins_s = pd.cut(tmp_df['values'], bins=bins,
|
bins_s = pd.cut(tmp_df['values'], bins=bins,
|
||||||
right=False,include_lowest=True).value_counts()
|
right=False, include_lowest=True).value_counts()
|
||||||
bins_s.sort_index(inplace=True)
|
bins_s.sort_index(inplace=True)
|
||||||
total = int(bins_s.sum())
|
total = int(bins_s.sum())
|
||||||
if res['time_particle'] == 'total':
|
if res['time_particle'] == 'total':
|
||||||
@ -1583,8 +1643,8 @@ async def scatter_model(
|
|||||||
# if 'time' not in groupby:
|
# if 'time' not in groupby:
|
||||||
resp['list'][str(key)] = dict()
|
resp['list'][str(key)] = dict()
|
||||||
resp['list'][str(key)] = {'n': bins_s.to_list(), 'total': total,
|
resp['list'][str(key)] = {'n': bins_s.to_list(), 'total': total,
|
||||||
'p': [str(i) + '%' for i in p],
|
'p': [str(i) + '%' for i in p],
|
||||||
'title': '总体'}
|
'title': '总体'}
|
||||||
# else:
|
# else:
|
||||||
# resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = dict()
|
# resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = dict()
|
||||||
# resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = {'n': bins_s.to_list(), 'total': total,
|
# resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = {'n': bins_s.to_list(), 'total': total,
|
||||||
@ -1675,7 +1735,7 @@ async def scatter_model(
|
|||||||
list_p.append(number_str)
|
list_p.append(number_str)
|
||||||
|
|
||||||
resp['list'][str(dt)] = {'n': [labels_dict01.get(i, 0) for i in labels], 'total': total,
|
resp['list'][str(dt)] = {'n': [labels_dict01.get(i, 0) for i in labels], 'total': total,
|
||||||
'p': list_p}
|
'p': list_p}
|
||||||
else:
|
else:
|
||||||
list_p = []
|
list_p = []
|
||||||
for i in labels:
|
for i in labels:
|
||||||
@ -1683,7 +1743,7 @@ async def scatter_model(
|
|||||||
number_str = str(number_int) + '%'
|
number_str = str(number_int) + '%'
|
||||||
list_p.append(number_str)
|
list_p.append(number_str)
|
||||||
resp['list'][str(dt)] = {'n': [labels_dict.get(i, 0) for i in labels], 'total': total,
|
resp['list'][str(dt)] = {'n': [labels_dict.get(i, 0) for i in labels], 'total': total,
|
||||||
'p': list_p}
|
'p': list_p}
|
||||||
# resp['list'][dt] = {'总体': {'n': [labels_dict.get(i, 0) for i in labels], 'total': total,
|
# resp['list'][dt] = {'总体': {'n': [labels_dict.get(i, 0) for i in labels], 'total': total,
|
||||||
# 'p': [round(labels_dict.get(i, 0) * 100 / total, 2) for i in labels]}}
|
# 'p': [round(labels_dict.get(i, 0) * 100 / total, 2) for i in labels]}}
|
||||||
if where == "step_id" and event_type == "guide":
|
if where == "step_id" and event_type == "guide":
|
||||||
@ -1985,17 +2045,17 @@ async def user_property_model(
|
|||||||
if data_in.user_arrt_type == 'datetime':
|
if data_in.user_arrt_type == 'datetime':
|
||||||
sql = f"""select `#account_id`,`{ziduan}` from {game}.`user` WHERE addHours(`{ziduan}`, 8) >= '{data_in.start_time}'
|
sql = f"""select `#account_id`,`{ziduan}` from {game}.`user` WHERE addHours(`{ziduan}`, 8) >= '{data_in.start_time}'
|
||||||
and addHours(`{ziduan}`, 8) <= '{data_in.end_time}' ORDER BY `#reg_time` LIMIT 10 OFFSET {(
|
and addHours(`{ziduan}`, 8) <= '{data_in.end_time}' ORDER BY `#reg_time` LIMIT 10 OFFSET {(
|
||||||
data_in.pages - 1) * 10}"""
|
data_in.pages - 1) * 10}"""
|
||||||
# 如果查询'#account_id',则不多余返回一个account_id
|
# 如果查询'#account_id',则不多余返回一个account_id
|
||||||
elif ziduan == '#account_id':
|
elif ziduan == '#account_id':
|
||||||
sql = f"""select `{ziduan}`,name from {game}.`user` WHERE `{ziduan}` {tiaojian} '{data_in.condition}' ORDER BY `#reg_time` LIMIT 10 OFFSET {(
|
sql = f"""select `{ziduan}`,name from {game}.`user` WHERE `{ziduan}` {tiaojian} '{data_in.condition}' ORDER BY `#reg_time` LIMIT 10 OFFSET {(
|
||||||
data_in.pages - 1) * 10} """
|
data_in.pages - 1) * 10} """
|
||||||
elif data_in.user_arrt_type == 'int':
|
elif data_in.user_arrt_type == 'int':
|
||||||
sql = f"""select `#account_id`,`{ziduan}` from {game}.`user` WHERE `{ziduan}` {tiaojian} {data_in.condition} ORDER BY `#reg_time` LIMIT 10 OFFSET {(
|
sql = f"""select `#account_id`,`{ziduan}` from {game}.`user` WHERE `{ziduan}` {tiaojian} {data_in.condition} ORDER BY `#reg_time` LIMIT 10 OFFSET {(
|
||||||
data_in.pages - 1) * 10}"""
|
data_in.pages - 1) * 10}"""
|
||||||
else:
|
else:
|
||||||
sql = f"""select `#account_id`,`{ziduan}` from `{game}`.`user` WHERE `{ziduan}` {tiaojian} '{data}' ORDER BY `#reg_time` LIMIT 10 OFFSET {(
|
sql = f"""select `#account_id`,`{ziduan}` from `{game}`.`user` WHERE `{ziduan}` {tiaojian} '{data}' ORDER BY `#reg_time` LIMIT 10 OFFSET {(
|
||||||
data_in.pages - 1) * 10}"""
|
data_in.pages - 1) * 10}"""
|
||||||
# 查询数据
|
# 查询数据
|
||||||
try:
|
try:
|
||||||
df = await ckdb.query_dataframe(sql)
|
df = await ckdb.query_dataframe(sql)
|
||||||
|
Loading…
Reference in New Issue
Block a user