from typing import Tuple import arrow import sqlalchemy as sa import json from fastapi import Depends import pandas as pd from sqlalchemy import func, or_, and_, not_, MetaData import crud import schemas from core.config import settings from db import get_database from db.redisdb import get_redis_pool, RedisDrive from models.user_label import UserClusterDef from utils import get_event class XAnalysis: def __init__(self, data_in: schemas.CkQuery, game: str): self.data_in = data_in self.game = game self.event_view = dict() self.events = [] self.global_filters = [] self.account_filters = [] self.global_relation = 'and' self.date_range = [] self.ext_filters = (self.data_in.ext_filter.get('filts', []), self.data_in.ext_filter.get('relation', 'and')) self.start_date = None self.end_date = None self.data_where=[] self.event_tbl=None self.zone_time:int = 0 def _get_global_filters(self): return self.event_view.get('filts') or [] #获取event_view字典里面filts的值,或返回空列表 async def init(self, *args, **kwargs): if self.data_in.report_id: db = get_database() report = await crud.report.get(db, id=self.data_in.report_id) self.event_view = report['query']['eventView'] self.events = report['query']['events'] try: e_days = self.event_view['e_days'] s_days = self.event_view['s_days'] except: # 兼容以前的 e_days, s_days = self.event_view['recentDay'].split('-') # self.event_view['endTime'] = arrow.get().shift(days=-int(e_days)+1).strftime('%Y-%m-%d 23:59:59') # self.event_view['startTime'] = arrow.get().shift(days=-int(s_days)+1).strftime('%Y-%m-%d 00:00:00') self.event_view['endTime'] = arrow.get().shift(days=-int(e_days)).strftime('%Y-%m-%d 23:59:59') self.event_view['startTime'] = arrow.get().shift(days=-int(s_days)).strftime('%Y-%m-%d 00:00:00') else: self.event_view = self.data_in.eventView self.events = self.data_in.events for d in pd.date_range(self.event_view['startTime'], self.event_view['endTime'], freq='D', tz='UTC'): self.date_range.append(d.date()) self.global_filters = self._get_global_filters() self.global_relation = self.event_view.get('relation', 'and') # 用户自带过滤 if 'data_where' in kwargs: self.account_filters = kwargs['data_where'].get(self.game, []) def handler_filts(self, *filters): """ :param filters: (filts:list,relation:str) :param g_f: :param relation: :return: """ event_filters = [] for filter in filters: filts = filter[0] relation = filter[1] event_filter = [] for item in filts: where = event_filter col = sa.Column(item['columnName']) comparator = item['comparator'] ftv = item['ftv'] if comparator == '==': if len(ftv) > 1: where.append(or_(*[col == v for v in ftv])) else: where.append(col == ftv[0]) elif comparator == '>=': where.append(col >= ftv[0]) elif comparator == '<=': where.append(col <= ftv[0]) elif comparator == '>': where.append(col > ftv[0]) elif comparator == '<': where.append(col < ftv[0]) elif comparator == 'is not null': where.append(col.isnot(None)) elif comparator == 'is null': where.append(col.is_(None)) elif comparator == 'like': where.append(col.like(f'%{ftv[0]}%')) elif comparator == 'not like': where.append(col.notlike(f'%{ftv[0]}%')) elif comparator == 'in': where.append(col.in_(ftv)) elif comparator == '!=': where.append(col != ftv[0]) if relation == 'and': if event_filter: event_filters.append(and_(*event_filter)) else: if event_filter: event_filters.append(or_(*event_filter)) return event_filters def ltv_model_sql(self): # ltv的生成sql days = (arrow.get(self.event_view['endTime']).date() - arrow.get(self.event_view['startTime']).date()).days quota = self.event_view['quota'] select_ltv = [] sumpay = [] sum_money = [] # for i in range(1, days + 2): ltv_n = [*[k for k in range(1, 61)], 70, 75, 80, 85, 90, 95, 100, 110, 120, 150, 180, 210, 240, 270, 300, 360] for i in ltv_n: # select_ltv.append(func.round(sa.Column(f'sumpay_{i}') / sa.Column('cnt1'), 2).label(f'LTV{i}')) select_ltv.append( f"if(dateDiff('day', reg.date, now())<{i - 1}, '-',toString(round(sumpay_{i} / cnt1, 2))) AS LTV{i}") sumpay.append(f"sum(if(dateDiff('day', a.date, b.date) < {i}, money, 0)) as sumpay_{i}") sum_money.append(f"sumpay_{i}") # qry = sa.select(*select_ltv) # select_ltv_str = str(qry.compile(compile_kwargs={"literal_binds": True})) # select_ltv_str = select_ltv_str.split('SELECT ')[1] sumpay_str = ','.join(sumpay) select_ltv_str = ','.join(select_ltv) sum_money_str = ','.join(sum_money) where = [ sa.Column('date') >= self.event_view['startTime'].split(' ')[0], sa.Column('date') <= self.event_view['endTime'].split(' ')[0] ] if quota == '#distinct_id': where.append(sa.Column('is_new_device') == 1) qry = sa.select().where(*where) sql = str(qry.compile(compile_kwargs={"literal_binds": True})) where_str = sql.split('WHERE ')[1] where_order = self.handler_filts((self.global_filters, self.global_relation)) #global_relation就是 and where_order_str = 1 if where_order: qry = sa.select().where(*where_order) sql = str(qry.compile(compile_kwargs={"literal_binds": True})) where_order_str = sql.split('WHERE ')[1] where_account = self.handler_filts((self.account_filters, 'and'), self.ext_filters) where_account_str = 1 if where_account: qry = sa.select().where(*where_account) sql = str(qry.compile(compile_kwargs={"literal_binds": True})) where_account_str = sql.split('WHERE ')[1] sql = f"""SELECT reg.date as date, cnt1, {select_ltv_str}, {sum_money_str} FROM (SELECT toDate(addHours(`#event_time`, `#zone_offset`)) as date, uniqExact(`{quota}`) cnt1 FROM {self.game}.event where `#event_name` = 'create_account' AND {where_str} AND {where_account_str} GROUP BY toDate(addHours(`#event_time`, `#zone_offset`))) as reg left join (select a.date, {sumpay_str} from (SELECT toDate(addHours(`#event_time`, `#zone_offset`)) as date, `{quota}` FROM {self.game}.event where `#event_name` = 'create_account' AND {where_str} AND {where_account_str} ) as a left join (select `{quota}`, unitPrice/100 as money, toDate(addHours(`#event_time`, `#zone_offset`)) as date from {self.game}.event where `#event_name` = 'pay' and {where_order_str} AND {where_account_str}) b on a.`{quota}` = b.`{quota}` group by a.date) log on reg.date = log.date order by date """ print(sql) return {'sql': sql, 'quota': quota, 'start_date': self.event_view['startTime'][:10], 'end_date': self.event_view['endTime'][:10], 'date_range': self.date_range, 'ltv_n': ltv_n }