事件分析标签分组项

This commit is contained in:
Àî×ÚÕñ 2022-06-10 17:19:41 +08:00
parent 68186cc5fa
commit e6735e8dfa
2 changed files with 78 additions and 29 deletions

View File

@ -188,7 +188,7 @@ async def event_model(
return schemas.Msg(code=-9, msg='报表配置参数异常')
res = []
is_hide = []
group_label = {}
for idx, item in enumerate(sqls): #列出索引下标
if item.get('is_show') == False:
is_hide.append(idx)
@ -373,6 +373,7 @@ async def event_model(
# q['eventNameDisplay']=item['event_name_display']
res.append(q)
group_label = item['group_label']
# 按总和排序
for item in res:
try:
@ -388,6 +389,20 @@ async def event_model(
sort_key = np.argsort(np.array(item['sum']))[::-1]#将sum中的元素从小到大排列后的结果提取其对应的索引然后倒着输出到变量之中
if item.get('groups'):
item['groups'] = np.array(item['groups'])[sort_key].tolist()
groups = []
for gitem in item['groups']:
gb = []
if '(' in gitem:
gitem = gitem.strip('(').strip(')').replace(' ', '').replace("'", '')
true_list = gitem.split(',')
for gstr in true_list:
gb.append(gstr)
# 存在标签分组项
if group_label:
for name, idx in group_label.items():
gb.insert(idx, name)
groups.append(str(gb))
item['groups'] = groups
item['values'] = np.array(item['values'])[sort_key].tolist()
item['sum'] = np.array(item['sum'])[sort_key].tolist()
item['avg'] = np.array(item['avg'])[sort_key].tolist()

View File

@ -115,6 +115,7 @@ class BehaviorAnalysis:
self.user_tbl = None
self.event_tbl = None
self.data_in = data_in
self.group_label = {}
self.event_view = dict()
self.events = [dict()]
@ -166,8 +167,8 @@ class BehaviorAnalysis:
self.zone_time = self._get_zone_time()
self.time_particle = self._get_time_particle_size()
self.start_date, self.end_date, self.date_range = self._get_date_range()
self.global_filters = self._get_global_filters()
self.groupby = self._get_group_by()
self.global_filters = self._get_global_filters()
self.unit_num = self._get_unit_num()
self.global_relation = self.event_view.get('relation', 'and')
@ -184,8 +185,29 @@ class BehaviorAnalysis:
return self.event_view.get('unitNum')
def _get_group_by(self):
return [getattr(self.event_tbl.c, item['columnName']) for item in self.event_view.get('groupBy', [])]
res = []
# 存在删选条件
groupBy = self.event_view.get('groupBy', [])
if not groupBy:
return []
for idx, item in enumerate(groupBy):
# 如果是标签
if item['data_type'] == 'user_label':
item.update({
'comparator': "in",
'comparator_name': ""
})
# 加入分组标签
self.group_label.update({item['columnDesc']: idx})
# 加入events中每个event的filts条件中
if self.events:
for i in self.events:
i['filts'].append(item)
continue
# 不是标签加入分组项中
res.append(getattr(self.event_tbl.c, item['columnName']))
return res
# return [getattr(self.event_tbl.c, item['columnName']) for item in self.event_view.get('groupBy', [])]
def _get_zone_time(self):
return int(self.event_view.get('zone_time', 8))
@ -466,22 +488,30 @@ class BehaviorAnalysis:
'values')]
else:
selectd = select_exprs + [
func.round(getattr(func, analysis)(getattr(self.event_tbl.c, event['event_attr_id'])), 2).label(
func.round(getattr(func, analysis)(getattr(self.event_tbl.c, event['event_attr_id'])),
2).label(
'values')]
else:
operator_val = int(operator_)
operator = event['operator'] # 运算符号
if analysis == 'total_count':
selectd = select_exprs + [settings.ARITHMETIC[operator](func.count(),operator_val).label('values')]
selectd = select_exprs + [
settings.ARITHMETIC[operator](func.count(), operator_val).label('values')]
elif analysis == 'touch_user_count':
selectd = select_exprs + [settings.ARITHMETIC[operator](func.count(sa.distinct(e_account_id_col)),operator_val).label('values')]
selectd = select_exprs + [
settings.ARITHMETIC[operator](func.count(sa.distinct(e_account_id_col)),
operator_val).label('values')]
elif analysis == 'touch_user_avg':
selectd = select_exprs + [
settings.ARITHMETIC[operator](func.round((func.count() / func.count(sa.distinct(e_account_id_col))), 2),operator_val).label(
settings.ARITHMETIC[operator](
func.round((func.count() / func.count(sa.distinct(e_account_id_col))), 2),
operator_val).label(
'values')]
else:
selectd = select_exprs + [
settings.ARITHMETIC[operator](func.round(getattr(func, analysis)(getattr(self.event_tbl.c, event['event_attr_id'])), 2),operator_val).label(
settings.ARITHMETIC[operator](
func.round(getattr(func, analysis)(getattr(self.event_tbl.c, event['event_attr_id'])),
2), operator_val).label(
'values')]
if user_filter:
@ -518,7 +548,8 @@ WHERE addHours({game}.event."#event_time", 8) >= '{stat_date}' AND addHours({gam
GROUP BY toDate(addHours({game}.event."#event_time", 8))) as bb on aa.date = bb.date ORDER by date
"""
# 单独把新增付费人数(以设备为维度)拿出来
if event.get('event_attr') == '触发用户数' and ['is_new_device', 'orderid']== [i['columnName'] for i in event.get('filts')]:
if event.get('event_attr') == '触发用户数' and ['is_new_device', 'orderid'] == [i['columnName'] for i in
event.get('filts')]:
stat_date = self.start_date
end_date = self.end_date
game = self.game
@ -534,6 +565,7 @@ addHours("#event_time", 8) >= '{stat_date}' AND addHours("#event_time", 8) <= '{
GROUP BY toDate(addHours("#event_time", 8))"""
sqls.append({'sql': sql,
'group_label':self.group_label,
'groupby': [i.key for i in self.groupby],
'date_range': self.date_range,
'event_name': event_name_display or event_name,
@ -546,6 +578,7 @@ GROUP BY toDate(addHours("#event_time", 8))"""
})
return sqls
# 在漏斗分析,事件分析模型里面都有用到这块
async def funnel_model_sql(self):
"""
@ -842,7 +875,8 @@ ORDER BY values desc"""
days = (arrow.get(self.end_date).date() - arrow.get(self.start_date).date()).days
keep = []
cnt = []
retention_n = [*[k for k in range(1, 60)], 70-1, 75-1, 80-1, 85-1, 90-1, 95-1, 100-1, 110-1, 120-1, 150-1, 180-1, 210-1, 240-1, 270-1, 300-1,
retention_n = [*[k for k in range(1, 60)], 70 - 1, 75 - 1, 80 - 1, 85 - 1, 90 - 1, 95 - 1, 100 - 1, 110 - 1,
120 - 1, 150 - 1, 180 - 1, 210 - 1, 240 - 1, 270 - 1, 300 - 1,
360 - 1]
"""