From e6735e8dfa5f5dcce6ff979b786b0c293c81eac4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=80=C3=AE=C3=97=C3=9A=C3=95=C3=B1?= Date: Fri, 10 Jun 2022 17:19:41 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BA=8B=E4=BB=B6=E5=88=86=E6=9E=90=E6=A0=87?= =?UTF-8?q?=E7=AD=BE=E5=88=86=E7=BB=84=E9=A1=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/api_v1/endpoints/query.py | 17 ++++++- models/behavior_analysis.py | 90 ++++++++++++++++++++++++----------- 2 files changed, 78 insertions(+), 29 deletions(-) diff --git a/api/api_v1/endpoints/query.py b/api/api_v1/endpoints/query.py index b7133c3..6b5e3c1 100644 --- a/api/api_v1/endpoints/query.py +++ b/api/api_v1/endpoints/query.py @@ -188,7 +188,7 @@ async def event_model( return schemas.Msg(code=-9, msg='报表配置参数异常') res = [] is_hide = [] - + group_label = {} for idx, item in enumerate(sqls): #列出索引下标 if item.get('is_show') == False: is_hide.append(idx) @@ -373,6 +373,7 @@ async def event_model( # q['eventNameDisplay']=item['event_name_display'] res.append(q) + group_label = item['group_label'] # 按总和排序 for item in res: try: @@ -388,6 +389,20 @@ async def event_model( sort_key = np.argsort(np.array(item['sum']))[::-1]#将sum中的元素从小到大排列后的结果,提取其对应的索引,然后倒着输出到变量之中 if item.get('groups'): item['groups'] = np.array(item['groups'])[sort_key].tolist() + groups = [] + for gitem in item['groups']: + gb = [] + if '(' in gitem: + gitem = gitem.strip('(').strip(')').replace(' ', '').replace("'", '') + true_list = gitem.split(',') + for gstr in true_list: + gb.append(gstr) + # 存在标签分组项 + if group_label: + for name, idx in group_label.items(): + gb.insert(idx, name) + groups.append(str(gb)) + item['groups'] = groups item['values'] = np.array(item['values'])[sort_key].tolist() item['sum'] = np.array(item['sum'])[sort_key].tolist() item['avg'] = np.array(item['avg'])[sort_key].tolist() diff --git a/models/behavior_analysis.py b/models/behavior_analysis.py index 18284b8..673c507 100644 --- a/models/behavior_analysis.py +++ b/models/behavior_analysis.py @@ -115,6 +115,7 @@ class BehaviorAnalysis: self.user_tbl = None self.event_tbl = None self.data_in = data_in + self.group_label = {} self.event_view = dict() self.events = [dict()] @@ -166,8 +167,8 @@ class BehaviorAnalysis: self.zone_time = self._get_zone_time() self.time_particle = self._get_time_particle_size() self.start_date, self.end_date, self.date_range = self._get_date_range() - self.global_filters = self._get_global_filters() self.groupby = self._get_group_by() + self.global_filters = self._get_global_filters() self.unit_num = self._get_unit_num() self.global_relation = self.event_view.get('relation', 'and') @@ -184,8 +185,29 @@ class BehaviorAnalysis: return self.event_view.get('unitNum') def _get_group_by(self): - - return [getattr(self.event_tbl.c, item['columnName']) for item in self.event_view.get('groupBy', [])] + res = [] + # 存在删选条件 + groupBy = self.event_view.get('groupBy', []) + if not groupBy: + return [] + for idx, item in enumerate(groupBy): + # 如果是标签 + if item['data_type'] == 'user_label': + item.update({ + 'comparator': "in", + 'comparator_name': "是" + }) + # 加入分组标签 + self.group_label.update({item['columnDesc']: idx}) + # 加入events中每个event的filts条件中 + if self.events: + for i in self.events: + i['filts'].append(item) + continue + # 不是标签加入分组项中 + res.append(getattr(self.event_tbl.c, item['columnName'])) + return res + # return [getattr(self.event_tbl.c, item['columnName']) for item in self.event_view.get('groupBy', [])] def _get_zone_time(self): return int(self.event_view.get('zone_time', 8)) @@ -385,8 +407,8 @@ class BehaviorAnalysis: sqls = [] event_time_col = getattr(self.event_tbl.c, '#event_time') for event in self.events: - operator_ = event.get('operator_val','') - #排头显示名 + operator_ = event.get('operator_val', '') + # 排头显示名 event_name_display = event.get('eventNameDisplay') is_show = event.get('is_show', True) @@ -408,7 +430,7 @@ class BehaviorAnalysis: if event.get('customType') == 'formula': if event.get('customEvent'): - #组合公式的内容 + # 组合公式的内容 formula = event.get('customEvent') custom = CustomEvent(self.event_tbl, formula, format).parse() event_name = custom['event_name'] @@ -466,22 +488,30 @@ class BehaviorAnalysis: 'values')] else: selectd = select_exprs + [ - func.round(getattr(func, analysis)(getattr(self.event_tbl.c, event['event_attr_id'])), 2).label( + func.round(getattr(func, analysis)(getattr(self.event_tbl.c, event['event_attr_id'])), + 2).label( 'values')] else: - operator_val=int(operator_) - operator=event['operator'] #运算符号 + operator_val = int(operator_) + operator = event['operator'] # 运算符号 if analysis == 'total_count': - selectd = select_exprs + [settings.ARITHMETIC[operator](func.count(),operator_val).label('values')] + selectd = select_exprs + [ + settings.ARITHMETIC[operator](func.count(), operator_val).label('values')] elif analysis == 'touch_user_count': - selectd = select_exprs + [settings.ARITHMETIC[operator](func.count(sa.distinct(e_account_id_col)),operator_val).label('values')] + selectd = select_exprs + [ + settings.ARITHMETIC[operator](func.count(sa.distinct(e_account_id_col)), + operator_val).label('values')] elif analysis == 'touch_user_avg': selectd = select_exprs + [ - settings.ARITHMETIC[operator](func.round((func.count() / func.count(sa.distinct(e_account_id_col))), 2),operator_val).label( + settings.ARITHMETIC[operator]( + func.round((func.count() / func.count(sa.distinct(e_account_id_col))), 2), + operator_val).label( 'values')] else: selectd = select_exprs + [ - settings.ARITHMETIC[operator](func.round(getattr(func, analysis)(getattr(self.event_tbl.c, event['event_attr_id'])), 2),operator_val).label( + settings.ARITHMETIC[operator]( + func.round(getattr(func, analysis)(getattr(self.event_tbl.c, event['event_attr_id'])), + 2), operator_val).label( 'values')] if user_filter: @@ -503,10 +533,10 @@ class BehaviorAnalysis: print(sql) # 单独付费率的拿出来 if event.get('customEvent') == 'pay.touch_user_count/login.touch_user_count': - stat_date=self.start_date - end_date=self.end_date - game=self.game - sql=f""" + stat_date = self.start_date + end_date = self.end_date + game = self.game + sql = f""" select aa.date as date,round((a/b)*100,2) as values from (select toDate(addHours({game}.event."#event_time", 8)) AS date,uniqCombined(if({game}.event."#event_name" = 'pay', {game}.event."#account_id", NULL)) as a from {game}.event WHERE addHours({game}.event."#event_time", 8) >= '{stat_date}' AND addHours({game}.event."#event_time", 8) <= '{end_date}' @@ -518,11 +548,12 @@ WHERE addHours({game}.event."#event_time", 8) >= '{stat_date}' AND addHours({gam GROUP BY toDate(addHours({game}.event."#event_time", 8))) as bb on aa.date = bb.date ORDER by date """ # 单独把新增付费人数(以设备为维度)拿出来 - if event.get('event_attr') == '触发用户数' and ['is_new_device', 'orderid']== [i['columnName'] for i in event.get('filts')]: - stat_date=self.start_date - end_date=self.end_date - game=self.game - sql=f"""SELECT toDate(addHours("#event_time", 8)) as date, + if event.get('event_attr') == '触发用户数' and ['is_new_device', 'orderid'] == [i['columnName'] for i in + event.get('filts')]: + stat_date = self.start_date + end_date = self.end_date + game = self.game + sql = f"""SELECT toDate(addHours("#event_time", 8)) as date, round(uniqExact("#distinct_id"), 2) AS values FROM (SELECT toDate(addHours("#event_time", 8)) as date,"#event_time",`#event_name`,`#distinct_id`,`#account_id` from {game}.event WHERE addHours("#event_time", 8) >= '{stat_date}' AND addHours("#event_time", 8) <= '{end_date}' @@ -534,6 +565,7 @@ addHours("#event_time", 8) >= '{stat_date}' AND addHours("#event_time", 8) <= '{ GROUP BY toDate(addHours("#event_time", 8))""" sqls.append({'sql': sql, + 'group_label':self.group_label, 'groupby': [i.key for i in self.groupby], 'date_range': self.date_range, 'event_name': event_name_display or event_name, @@ -546,7 +578,8 @@ GROUP BY toDate(addHours("#event_time", 8))""" }) return sqls - #在漏斗分析,事件分析模型里面都有用到这块 + + # 在漏斗分析,事件分析模型里面都有用到这块 async def funnel_model_sql(self): """ SELECT level, count(*) AS values @@ -614,7 +647,7 @@ ORDER BY level event = self.events[0] event_name = event['eventName'] analysis = event['analysis'] - if analysis in ['list_distinct',"set_distinct","ele_distinct"]: + if analysis in ['list_distinct', "set_distinct", "ele_distinct"]: analysis = 'max' e_account_id_col = getattr(self.event_tbl.c, '#account_id').label('uid') u_account_id_col = getattr(self.user_tbl.c, '#account_id') @@ -680,9 +713,9 @@ ORDER BY level qry = sa.select(event_date_col, e_account_id_col, settings.CK_FUNC[analysis](event_attr_col).label('values')) \ .where(and_(*where)) \ - .group_by(event_date_col,e_account_id_col) + .group_by(event_date_col, e_account_id_col) sql = str(qry.compile(compile_kwargs={"literal_binds": True})) - columnName = event.get('label_id','') + columnName = event.get('label_id', '') if columnName != '': sql = sql.replace('SELECT', f'SELECT {columnName},', 1) sql += f',{columnName}' @@ -842,8 +875,9 @@ ORDER BY values desc""" days = (arrow.get(self.end_date).date() - arrow.get(self.start_date).date()).days keep = [] cnt = [] - retention_n = [*[k for k in range(1, 60)], 70-1, 75-1, 80-1, 85-1, 90-1, 95-1, 100-1, 110-1, 120-1, 150-1, 180-1, 210-1, 240-1, 270-1, 300-1, - 360-1] + retention_n = [*[k for k in range(1, 60)], 70 - 1, 75 - 1, 80 - 1, 85 - 1, 90 - 1, 95 - 1, 100 - 1, 110 - 1, + 120 - 1, 150 - 1, 180 - 1, 210 - 1, 240 - 1, 270 - 1, 300 - 1, + 360 - 1] """ cnt0-cnt1 as on1,