xbackend/models/x_analysis.py
李伟 317aa592a3 1.修改底层生成SQL代码
2.新增游戏埋点,并给配置新的表
2022-07-07 10:38:59 +08:00

209 lines
8.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from typing import Tuple
import arrow
import sqlalchemy as sa
import json
from fastapi import Depends
import pandas as pd
from sqlalchemy import func, or_, and_, not_, MetaData
import crud
import schemas
from core.config import settings
from db import get_database
from db.redisdb import get_redis_pool, RedisDrive
from models.user_label import UserClusterDef
from utils import get_event
class XAnalysis:
def __init__(self, data_in: schemas.CkQuery, game: str):
self.data_in = data_in
self.game = game
self.event_view = dict()
self.events = []
self.global_filters = []
self.account_filters = []
self.global_relation = 'and'
self.date_range = []
self.ext_filters = (self.data_in.ext_filter.get('filts', []), self.data_in.ext_filter.get('relation', 'and'))
self.start_date = None
self.end_date = None
self.data_where=[]
self.event_tbl=None
self.zone_time:int = 0
def _get_global_filters(self):
return self.event_view.get('filts') or [] #获取event_view字典里面filts的值或返回空列表
async def init(self, *args, **kwargs):
if self.data_in.report_id:
db = get_database()
report = await crud.report.get(db, id=self.data_in.report_id)
self.event_view = report['query']['eventView']
self.events = report['query']['events']
try:
e_days = self.event_view['e_days']
s_days = self.event_view['s_days']
except:
# 兼容以前的
e_days, s_days = self.event_view['recentDay'].split('-')
# self.event_view['endTime'] = arrow.get().shift(days=-int(e_days)+1).strftime('%Y-%m-%d 23:59:59')
# self.event_view['startTime'] = arrow.get().shift(days=-int(s_days)+1).strftime('%Y-%m-%d 00:00:00')
self.event_view['endTime'] = arrow.get().shift(days=-int(e_days)).strftime('%Y-%m-%d 23:59:59')
self.event_view['startTime'] = arrow.get().shift(days=-int(s_days)).strftime('%Y-%m-%d 00:00:00')
else:
self.event_view = self.data_in.eventView
self.events = self.data_in.events
for d in pd.date_range(self.event_view['startTime'], self.event_view['endTime'], freq='D', tz='UTC'):
self.date_range.append(d.date())
self.global_filters = self._get_global_filters()
self.global_relation = self.event_view.get('relation', 'and')
# 用户自带过滤
if 'data_where' in kwargs:
self.account_filters = kwargs['data_where'].get(self.game, [])
def handler_filts(self, *filters):
"""
:param filters: (filts:list,relation:str)
:param g_f:
:param relation:
:return:
"""
event_filters = []
for filter in filters:
filts = filter[0]
relation = filter[1]
event_filter = []
for item in filts:
where = event_filter
col = sa.Column(item['columnName'])
comparator = item['comparator']
ftv = item['ftv']
if comparator == '==':
if len(ftv) > 1:
where.append(or_(*[col == v for v in ftv]))
else:
where.append(col == ftv[0])
elif comparator == '>=':
where.append(col >= ftv[0])
elif comparator == '<=':
where.append(col <= ftv[0])
elif comparator == '>':
where.append(col > ftv[0])
elif comparator == '<':
where.append(col < ftv[0])
elif comparator == 'is not null':
where.append(col.isnot(None))
elif comparator == 'is null':
where.append(col.is_(None))
elif comparator == 'like':
where.append(col.like(f'%{ftv[0]}%'))
elif comparator == 'not like':
where.append(col.notlike(f'%{ftv[0]}%'))
elif comparator == 'in':
where.append(col.in_(ftv))
elif comparator == '!=':
where.append(col != ftv[0])
if relation == 'and':
if event_filter:
event_filters.append(and_(*event_filter))
else:
if event_filter:
event_filters.append(or_(*event_filter))
return event_filters
def ltv_model_sql(self):
# ltv的生成sql
days = (arrow.get(self.event_view['endTime']).date() - arrow.get(self.event_view['startTime']).date()).days
quota = self.event_view['quota']
select_ltv = []
sumpay = []
sum_money = []
# for i in range(1, days + 2):
ltv_n = [*[k for k in range(1, 61)], 70, 75, 80, 85, 90, 95, 100, 110, 120, 150, 180, 210, 240, 270, 300, 360]
for i in ltv_n:
# select_ltv.append(func.round(sa.Column(f'sumpay_{i}') / sa.Column('cnt1'), 2).label(f'LTV{i}'))
select_ltv.append(
f"if(dateDiff('day', reg.date, now())<{i - 1}, '-',toString(round(sumpay_{i} / cnt1, 2))) AS LTV{i}")
sumpay.append(f"sum(if(dateDiff('day', a.date, b.date) < {i}, money, 0)) as sumpay_{i}")
sum_money.append(f"sumpay_{i}")
# qry = sa.select(*select_ltv)
# select_ltv_str = str(qry.compile(compile_kwargs={"literal_binds": True}))
# select_ltv_str = select_ltv_str.split('SELECT ')[1]
sumpay_str = ','.join(sumpay)
select_ltv_str = ','.join(select_ltv)
sum_money_str = ','.join(sum_money)
where = [
sa.Column('date') >= self.event_view['startTime'].split(' ')[0],
sa.Column('date') <= self.event_view['endTime'].split(' ')[0]
]
if quota == '#distinct_id':
where.append(sa.Column('is_new_device') == 1)
qry = sa.select().where(*where)
sql = str(qry.compile(compile_kwargs={"literal_binds": True}))
where_str = sql.split('WHERE ')[1]
where_order = self.handler_filts((self.global_filters, self.global_relation)) #global_relation就是 and
where_order_str = 1
if where_order:
qry = sa.select().where(*where_order)
sql = str(qry.compile(compile_kwargs={"literal_binds": True}))
where_order_str = sql.split('WHERE ')[1]
where_account = self.handler_filts((self.account_filters, 'and'), self.ext_filters)
where_account_str = 1
if where_account:
qry = sa.select().where(*where_account)
sql = str(qry.compile(compile_kwargs={"literal_binds": True}))
where_account_str = sql.split('WHERE ')[1]
sql = f"""SELECT reg.date as date,
cnt1,
{select_ltv_str},
{sum_money_str}
FROM (SELECT toDate(addHours(`#event_time`, `#zone_offset`)) as date, uniqExact(`{quota}`) cnt1
FROM {self.game}.event
where `#event_name` = 'create_account'
AND {where_str} AND {where_account_str}
GROUP BY toDate(addHours(`#event_time`, `#zone_offset`))) as reg
left join
(select a.date,
{sumpay_str}
from (SELECT toDate(addHours(`#event_time`, `#zone_offset`)) as date, `{quota}`
FROM {self.game}.event
where `#event_name` = 'create_account'
AND {where_str} AND {where_account_str} ) as a
left join (select `{quota}`, unitPrice/100 as money, toDate(addHours(`#event_time`, `#zone_offset`)) as date
from {self.game}.event
where `#event_name` = 'pay' and {where_order_str} AND {where_account_str}) b
on a.`{quota}` = b.`{quota}`
group by a.date) log on reg.date = log.date
order by date
"""
print(sql)
return {'sql': sql, 'quota': quota,
'start_date': self.event_view['startTime'][:10],
'end_date': self.event_view['endTime'][:10],
'date_range': self.date_range,
'ltv_n': ltv_n
}