事件分析标签分组项
This commit is contained in:
parent
68186cc5fa
commit
e6735e8dfa
@ -188,7 +188,7 @@ async def event_model(
|
|||||||
return schemas.Msg(code=-9, msg='报表配置参数异常')
|
return schemas.Msg(code=-9, msg='报表配置参数异常')
|
||||||
res = []
|
res = []
|
||||||
is_hide = []
|
is_hide = []
|
||||||
|
group_label = {}
|
||||||
for idx, item in enumerate(sqls): #列出索引下标
|
for idx, item in enumerate(sqls): #列出索引下标
|
||||||
if item.get('is_show') == False:
|
if item.get('is_show') == False:
|
||||||
is_hide.append(idx)
|
is_hide.append(idx)
|
||||||
@ -373,6 +373,7 @@ async def event_model(
|
|||||||
|
|
||||||
# q['eventNameDisplay']=item['event_name_display']
|
# q['eventNameDisplay']=item['event_name_display']
|
||||||
res.append(q)
|
res.append(q)
|
||||||
|
group_label = item['group_label']
|
||||||
# 按总和排序
|
# 按总和排序
|
||||||
for item in res:
|
for item in res:
|
||||||
try:
|
try:
|
||||||
@ -388,6 +389,20 @@ async def event_model(
|
|||||||
sort_key = np.argsort(np.array(item['sum']))[::-1]#将sum中的元素从小到大排列后的结果,提取其对应的索引,然后倒着输出到变量之中
|
sort_key = np.argsort(np.array(item['sum']))[::-1]#将sum中的元素从小到大排列后的结果,提取其对应的索引,然后倒着输出到变量之中
|
||||||
if item.get('groups'):
|
if item.get('groups'):
|
||||||
item['groups'] = np.array(item['groups'])[sort_key].tolist()
|
item['groups'] = np.array(item['groups'])[sort_key].tolist()
|
||||||
|
groups = []
|
||||||
|
for gitem in item['groups']:
|
||||||
|
gb = []
|
||||||
|
if '(' in gitem:
|
||||||
|
gitem = gitem.strip('(').strip(')').replace(' ', '').replace("'", '')
|
||||||
|
true_list = gitem.split(',')
|
||||||
|
for gstr in true_list:
|
||||||
|
gb.append(gstr)
|
||||||
|
# 存在标签分组项
|
||||||
|
if group_label:
|
||||||
|
for name, idx in group_label.items():
|
||||||
|
gb.insert(idx, name)
|
||||||
|
groups.append(str(gb))
|
||||||
|
item['groups'] = groups
|
||||||
item['values'] = np.array(item['values'])[sort_key].tolist()
|
item['values'] = np.array(item['values'])[sort_key].tolist()
|
||||||
item['sum'] = np.array(item['sum'])[sort_key].tolist()
|
item['sum'] = np.array(item['sum'])[sort_key].tolist()
|
||||||
item['avg'] = np.array(item['avg'])[sort_key].tolist()
|
item['avg'] = np.array(item['avg'])[sort_key].tolist()
|
||||||
|
@ -115,6 +115,7 @@ class BehaviorAnalysis:
|
|||||||
self.user_tbl = None
|
self.user_tbl = None
|
||||||
self.event_tbl = None
|
self.event_tbl = None
|
||||||
self.data_in = data_in
|
self.data_in = data_in
|
||||||
|
self.group_label = {}
|
||||||
self.event_view = dict()
|
self.event_view = dict()
|
||||||
self.events = [dict()]
|
self.events = [dict()]
|
||||||
|
|
||||||
@ -166,8 +167,8 @@ class BehaviorAnalysis:
|
|||||||
self.zone_time = self._get_zone_time()
|
self.zone_time = self._get_zone_time()
|
||||||
self.time_particle = self._get_time_particle_size()
|
self.time_particle = self._get_time_particle_size()
|
||||||
self.start_date, self.end_date, self.date_range = self._get_date_range()
|
self.start_date, self.end_date, self.date_range = self._get_date_range()
|
||||||
self.global_filters = self._get_global_filters()
|
|
||||||
self.groupby = self._get_group_by()
|
self.groupby = self._get_group_by()
|
||||||
|
self.global_filters = self._get_global_filters()
|
||||||
self.unit_num = self._get_unit_num()
|
self.unit_num = self._get_unit_num()
|
||||||
self.global_relation = self.event_view.get('relation', 'and')
|
self.global_relation = self.event_view.get('relation', 'and')
|
||||||
|
|
||||||
@ -184,8 +185,29 @@ class BehaviorAnalysis:
|
|||||||
return self.event_view.get('unitNum')
|
return self.event_view.get('unitNum')
|
||||||
|
|
||||||
def _get_group_by(self):
|
def _get_group_by(self):
|
||||||
|
res = []
|
||||||
return [getattr(self.event_tbl.c, item['columnName']) for item in self.event_view.get('groupBy', [])]
|
# 存在删选条件
|
||||||
|
groupBy = self.event_view.get('groupBy', [])
|
||||||
|
if not groupBy:
|
||||||
|
return []
|
||||||
|
for idx, item in enumerate(groupBy):
|
||||||
|
# 如果是标签
|
||||||
|
if item['data_type'] == 'user_label':
|
||||||
|
item.update({
|
||||||
|
'comparator': "in",
|
||||||
|
'comparator_name': "是"
|
||||||
|
})
|
||||||
|
# 加入分组标签
|
||||||
|
self.group_label.update({item['columnDesc']: idx})
|
||||||
|
# 加入events中每个event的filts条件中
|
||||||
|
if self.events:
|
||||||
|
for i in self.events:
|
||||||
|
i['filts'].append(item)
|
||||||
|
continue
|
||||||
|
# 不是标签加入分组项中
|
||||||
|
res.append(getattr(self.event_tbl.c, item['columnName']))
|
||||||
|
return res
|
||||||
|
# return [getattr(self.event_tbl.c, item['columnName']) for item in self.event_view.get('groupBy', [])]
|
||||||
|
|
||||||
def _get_zone_time(self):
|
def _get_zone_time(self):
|
||||||
return int(self.event_view.get('zone_time', 8))
|
return int(self.event_view.get('zone_time', 8))
|
||||||
@ -385,8 +407,8 @@ class BehaviorAnalysis:
|
|||||||
sqls = []
|
sqls = []
|
||||||
event_time_col = getattr(self.event_tbl.c, '#event_time')
|
event_time_col = getattr(self.event_tbl.c, '#event_time')
|
||||||
for event in self.events:
|
for event in self.events:
|
||||||
operator_ = event.get('operator_val','')
|
operator_ = event.get('operator_val', '')
|
||||||
#排头显示名
|
# 排头显示名
|
||||||
event_name_display = event.get('eventNameDisplay')
|
event_name_display = event.get('eventNameDisplay')
|
||||||
is_show = event.get('is_show', True)
|
is_show = event.get('is_show', True)
|
||||||
|
|
||||||
@ -408,7 +430,7 @@ class BehaviorAnalysis:
|
|||||||
|
|
||||||
if event.get('customType') == 'formula':
|
if event.get('customType') == 'formula':
|
||||||
if event.get('customEvent'):
|
if event.get('customEvent'):
|
||||||
#组合公式的内容
|
# 组合公式的内容
|
||||||
formula = event.get('customEvent')
|
formula = event.get('customEvent')
|
||||||
custom = CustomEvent(self.event_tbl, formula, format).parse()
|
custom = CustomEvent(self.event_tbl, formula, format).parse()
|
||||||
event_name = custom['event_name']
|
event_name = custom['event_name']
|
||||||
@ -466,22 +488,30 @@ class BehaviorAnalysis:
|
|||||||
'values')]
|
'values')]
|
||||||
else:
|
else:
|
||||||
selectd = select_exprs + [
|
selectd = select_exprs + [
|
||||||
func.round(getattr(func, analysis)(getattr(self.event_tbl.c, event['event_attr_id'])), 2).label(
|
func.round(getattr(func, analysis)(getattr(self.event_tbl.c, event['event_attr_id'])),
|
||||||
|
2).label(
|
||||||
'values')]
|
'values')]
|
||||||
else:
|
else:
|
||||||
operator_val=int(operator_)
|
operator_val = int(operator_)
|
||||||
operator=event['operator'] #运算符号
|
operator = event['operator'] # 运算符号
|
||||||
if analysis == 'total_count':
|
if analysis == 'total_count':
|
||||||
selectd = select_exprs + [settings.ARITHMETIC[operator](func.count(),operator_val).label('values')]
|
selectd = select_exprs + [
|
||||||
|
settings.ARITHMETIC[operator](func.count(), operator_val).label('values')]
|
||||||
elif analysis == 'touch_user_count':
|
elif analysis == 'touch_user_count':
|
||||||
selectd = select_exprs + [settings.ARITHMETIC[operator](func.count(sa.distinct(e_account_id_col)),operator_val).label('values')]
|
selectd = select_exprs + [
|
||||||
|
settings.ARITHMETIC[operator](func.count(sa.distinct(e_account_id_col)),
|
||||||
|
operator_val).label('values')]
|
||||||
elif analysis == 'touch_user_avg':
|
elif analysis == 'touch_user_avg':
|
||||||
selectd = select_exprs + [
|
selectd = select_exprs + [
|
||||||
settings.ARITHMETIC[operator](func.round((func.count() / func.count(sa.distinct(e_account_id_col))), 2),operator_val).label(
|
settings.ARITHMETIC[operator](
|
||||||
|
func.round((func.count() / func.count(sa.distinct(e_account_id_col))), 2),
|
||||||
|
operator_val).label(
|
||||||
'values')]
|
'values')]
|
||||||
else:
|
else:
|
||||||
selectd = select_exprs + [
|
selectd = select_exprs + [
|
||||||
settings.ARITHMETIC[operator](func.round(getattr(func, analysis)(getattr(self.event_tbl.c, event['event_attr_id'])), 2),operator_val).label(
|
settings.ARITHMETIC[operator](
|
||||||
|
func.round(getattr(func, analysis)(getattr(self.event_tbl.c, event['event_attr_id'])),
|
||||||
|
2), operator_val).label(
|
||||||
'values')]
|
'values')]
|
||||||
|
|
||||||
if user_filter:
|
if user_filter:
|
||||||
@ -503,10 +533,10 @@ class BehaviorAnalysis:
|
|||||||
print(sql)
|
print(sql)
|
||||||
# 单独付费率的拿出来
|
# 单独付费率的拿出来
|
||||||
if event.get('customEvent') == 'pay.touch_user_count/login.touch_user_count':
|
if event.get('customEvent') == 'pay.touch_user_count/login.touch_user_count':
|
||||||
stat_date=self.start_date
|
stat_date = self.start_date
|
||||||
end_date=self.end_date
|
end_date = self.end_date
|
||||||
game=self.game
|
game = self.game
|
||||||
sql=f"""
|
sql = f"""
|
||||||
select aa.date as date,round((a/b)*100,2) as values from
|
select aa.date as date,round((a/b)*100,2) as values from
|
||||||
(select toDate(addHours({game}.event."#event_time", 8)) AS date,uniqCombined(if({game}.event."#event_name" = 'pay', {game}.event."#account_id", NULL)) as a from {game}.event
|
(select toDate(addHours({game}.event."#event_time", 8)) AS date,uniqCombined(if({game}.event."#event_name" = 'pay', {game}.event."#account_id", NULL)) as a from {game}.event
|
||||||
WHERE addHours({game}.event."#event_time", 8) >= '{stat_date}' AND addHours({game}.event."#event_time", 8) <= '{end_date}'
|
WHERE addHours({game}.event."#event_time", 8) >= '{stat_date}' AND addHours({game}.event."#event_time", 8) <= '{end_date}'
|
||||||
@ -518,11 +548,12 @@ WHERE addHours({game}.event."#event_time", 8) >= '{stat_date}' AND addHours({gam
|
|||||||
GROUP BY toDate(addHours({game}.event."#event_time", 8))) as bb on aa.date = bb.date ORDER by date
|
GROUP BY toDate(addHours({game}.event."#event_time", 8))) as bb on aa.date = bb.date ORDER by date
|
||||||
"""
|
"""
|
||||||
# 单独把新增付费人数(以设备为维度)拿出来
|
# 单独把新增付费人数(以设备为维度)拿出来
|
||||||
if event.get('event_attr') == '触发用户数' and ['is_new_device', 'orderid']== [i['columnName'] for i in event.get('filts')]:
|
if event.get('event_attr') == '触发用户数' and ['is_new_device', 'orderid'] == [i['columnName'] for i in
|
||||||
stat_date=self.start_date
|
event.get('filts')]:
|
||||||
end_date=self.end_date
|
stat_date = self.start_date
|
||||||
game=self.game
|
end_date = self.end_date
|
||||||
sql=f"""SELECT toDate(addHours("#event_time", 8)) as date,
|
game = self.game
|
||||||
|
sql = f"""SELECT toDate(addHours("#event_time", 8)) as date,
|
||||||
round(uniqExact("#distinct_id"), 2) AS values FROM
|
round(uniqExact("#distinct_id"), 2) AS values FROM
|
||||||
(SELECT toDate(addHours("#event_time", 8)) as date,"#event_time",`#event_name`,`#distinct_id`,`#account_id` from {game}.event WHERE
|
(SELECT toDate(addHours("#event_time", 8)) as date,"#event_time",`#event_name`,`#distinct_id`,`#account_id` from {game}.event WHERE
|
||||||
addHours("#event_time", 8) >= '{stat_date}' AND addHours("#event_time", 8) <= '{end_date}'
|
addHours("#event_time", 8) >= '{stat_date}' AND addHours("#event_time", 8) <= '{end_date}'
|
||||||
@ -534,6 +565,7 @@ addHours("#event_time", 8) >= '{stat_date}' AND addHours("#event_time", 8) <= '{
|
|||||||
GROUP BY toDate(addHours("#event_time", 8))"""
|
GROUP BY toDate(addHours("#event_time", 8))"""
|
||||||
|
|
||||||
sqls.append({'sql': sql,
|
sqls.append({'sql': sql,
|
||||||
|
'group_label':self.group_label,
|
||||||
'groupby': [i.key for i in self.groupby],
|
'groupby': [i.key for i in self.groupby],
|
||||||
'date_range': self.date_range,
|
'date_range': self.date_range,
|
||||||
'event_name': event_name_display or event_name,
|
'event_name': event_name_display or event_name,
|
||||||
@ -546,7 +578,8 @@ GROUP BY toDate(addHours("#event_time", 8))"""
|
|||||||
})
|
})
|
||||||
|
|
||||||
return sqls
|
return sqls
|
||||||
#在漏斗分析,事件分析模型里面都有用到这块
|
|
||||||
|
# 在漏斗分析,事件分析模型里面都有用到这块
|
||||||
async def funnel_model_sql(self):
|
async def funnel_model_sql(self):
|
||||||
"""
|
"""
|
||||||
SELECT level, count(*) AS values
|
SELECT level, count(*) AS values
|
||||||
@ -614,7 +647,7 @@ ORDER BY level
|
|||||||
event = self.events[0]
|
event = self.events[0]
|
||||||
event_name = event['eventName']
|
event_name = event['eventName']
|
||||||
analysis = event['analysis']
|
analysis = event['analysis']
|
||||||
if analysis in ['list_distinct',"set_distinct","ele_distinct"]:
|
if analysis in ['list_distinct', "set_distinct", "ele_distinct"]:
|
||||||
analysis = 'max'
|
analysis = 'max'
|
||||||
e_account_id_col = getattr(self.event_tbl.c, '#account_id').label('uid')
|
e_account_id_col = getattr(self.event_tbl.c, '#account_id').label('uid')
|
||||||
u_account_id_col = getattr(self.user_tbl.c, '#account_id')
|
u_account_id_col = getattr(self.user_tbl.c, '#account_id')
|
||||||
@ -680,9 +713,9 @@ ORDER BY level
|
|||||||
qry = sa.select(event_date_col, e_account_id_col,
|
qry = sa.select(event_date_col, e_account_id_col,
|
||||||
settings.CK_FUNC[analysis](event_attr_col).label('values')) \
|
settings.CK_FUNC[analysis](event_attr_col).label('values')) \
|
||||||
.where(and_(*where)) \
|
.where(and_(*where)) \
|
||||||
.group_by(event_date_col,e_account_id_col)
|
.group_by(event_date_col, e_account_id_col)
|
||||||
sql = str(qry.compile(compile_kwargs={"literal_binds": True}))
|
sql = str(qry.compile(compile_kwargs={"literal_binds": True}))
|
||||||
columnName = event.get('label_id','')
|
columnName = event.get('label_id', '')
|
||||||
if columnName != '':
|
if columnName != '':
|
||||||
sql = sql.replace('SELECT', f'SELECT {columnName},', 1)
|
sql = sql.replace('SELECT', f'SELECT {columnName},', 1)
|
||||||
sql += f',{columnName}'
|
sql += f',{columnName}'
|
||||||
@ -842,8 +875,9 @@ ORDER BY values desc"""
|
|||||||
days = (arrow.get(self.end_date).date() - arrow.get(self.start_date).date()).days
|
days = (arrow.get(self.end_date).date() - arrow.get(self.start_date).date()).days
|
||||||
keep = []
|
keep = []
|
||||||
cnt = []
|
cnt = []
|
||||||
retention_n = [*[k for k in range(1, 60)], 70-1, 75-1, 80-1, 85-1, 90-1, 95-1, 100-1, 110-1, 120-1, 150-1, 180-1, 210-1, 240-1, 270-1, 300-1,
|
retention_n = [*[k for k in range(1, 60)], 70 - 1, 75 - 1, 80 - 1, 85 - 1, 90 - 1, 95 - 1, 100 - 1, 110 - 1,
|
||||||
360-1]
|
120 - 1, 150 - 1, 180 - 1, 210 - 1, 240 - 1, 270 - 1, 300 - 1,
|
||||||
|
360 - 1]
|
||||||
|
|
||||||
"""
|
"""
|
||||||
cnt0-cnt1 as on1,
|
cnt0-cnt1 as on1,
|
||||||
|
Loading…
Reference in New Issue
Block a user