diff --git a/api/api_v1/endpoints/query.py b/api/api_v1/endpoints/query.py index 4c336a3..cf4f9ef 100644 --- a/api/api_v1/endpoints/query.py +++ b/api/api_v1/endpoints/query.py @@ -1,4 +1,5 @@ import pandas as pd +import numpy as np from fastapi import APIRouter, Depends, Request from motor.motor_asyncio import AsyncIOMotorDatabase @@ -275,5 +276,69 @@ async def funnel_model( _ = date_data.setdefault(key[0].strftime('%Y-%m-%d'), {}) _[key[1]] = tmp - resp = {'list': data_list, 'date_data': date_data, 'title': groupby + cond_level} + resp = {'list': data_list, 'date_data': date_data, 'title': groupby + cond_level, 'level': cond_level + } return schemas.Msg(code=0, msg='ok', data=resp) + + +@router.post("/scatter_model_sql") +async def scatter_model_sql( + request: Request, + game: str, + analysis: BehaviorAnalysis = Depends(BehaviorAnalysis), + current_user: schemas.UserDB = Depends(deps.get_current_user) +) -> schemas.Msg: + """分布分析 sql""" + await analysis.init() + data = analysis.scatter_model_sql() + return schemas.Msg(code=0, msg='ok', data=[data]) + + +@router.post("/scatter_model") +async def scatter_model_sql( + request: Request, + game: str, + ckdb: CKDrive = Depends(get_ck_db), + db: AsyncIOMotorDatabase = Depends(get_database), + analysis: BehaviorAnalysis = Depends(BehaviorAnalysis), + current_user: schemas.UserDB = Depends(deps.get_current_user) +) -> schemas.Msg: + """分布分析 模型""" + await analysis.init() + res = analysis.scatter_model_sql() + sql = res['sql'] + df = await ckdb.query_dataframe(sql) + interval_type = res['interval_type'] + analysis = res['analysis'] + quota_interval_arr = res['quota_interval_arr'] + if analysis != 'number_of_days': + max_v = int(df['values'].max()) + min_v = int(df['values'].min()) + interval = (max_v - min_v) // 10 + resp = {} + if not quota_interval_arr: + resp['label'] = [f'[{i},{i + interval})' for i in range(min_v, max_v, interval)] + bins = [i for i in range(min_v, max_v + interval, interval)] + else: + quota_interval_arr = [-float('inf')] + quota_interval_arr+[float('inf')] + resp['label'] = [] + bins = [quota_interval_arr[0]] + for i, v in enumerate(quota_interval_arr[1:]): + resp['label'].append(f'[{quota_interval_arr[i]},{v})') + bins.append(v) + + for key, tmp_df in df.groupby('date'): + bins_s = pd.cut(tmp_df['values'], bins=bins, + right=False).value_counts() + bins_s.sort_index(inplace=True) + total = int(bins_s.sum()) + resp[key.strftime('%Y-%m-%d')] = {'n': bins_s.to_list(), 'total': total, + 'p': round(bins_s * 100 / total, 2).to_list()} + return schemas.Msg(code=0, msg='ok', data=resp) + + if interval_type == 'def' and analysis == 'number_of_days': + resp = {} + for key, tmp_df in df.groupby('date'): + total = int(tmp_df['values'].sum()) + resp[key.strftime('%Y-%m-%d')] = {'n': total, 'total': total, 'p': 100} + return schemas.Msg(code=0, msg='ok', data=resp) diff --git a/api/api_v1/endpoints/test.py b/api/api_v1/endpoints/test.py index 854d2af..ee0d415 100644 --- a/api/api_v1/endpoints/test.py +++ b/api/api_v1/endpoints/test.py @@ -20,7 +20,7 @@ async def test( current_user: schemas.UserDB = Depends(deps.get_current_user), analysis: BehaviorAnalysis = Depends(BehaviorAnalysis)) -> schemas.Msg: await analysis.init() - query = analysis.funnel_model_sql() + query = analysis.scatter_model_sql() data = { 'game': game, 'analysis': analysis.game, diff --git a/core/config.py b/core/config.py index 01c2ad2..65f7535 100644 --- a/core/config.py +++ b/core/config.py @@ -11,12 +11,8 @@ class Settings(BaseSettings): BACKEND_CORS_ORIGINS: List[str] = ['*'] - - CASBIN_COLL: str = 'casbin_rule' - - SUPERUSER_EMAIL: str = '15392746632@qq.com' SUPERUSER_PASSWORD: str = '123456' SUPERUSER_NAME: str = 'root' @@ -74,6 +70,15 @@ class Settings(BaseSettings): "Nullable(Float)": 'float', "Float": 'float', } + CK_FUNC = { + 'sum': lambda x: func.sum(x), + 'avg': lambda x: func.round(func.avg(x), 2), + 'median': lambda x: func.median(x), + 'max': lambda x: func.max(x), + 'min': lambda x: func.min(x), + 'distinct_count': lambda x: func.count(func.distinct_count(x)), + } + CK_OPERATOR = { 'int': [{ 'id': 'sum', diff --git a/models/behavior_analysis.py b/models/behavior_analysis.py index 727400c..d9dd590 100644 --- a/models/behavior_analysis.py +++ b/models/behavior_analysis.py @@ -280,3 +280,49 @@ ORDER BY level 'date_range': self.date_range, 'cond_level': cond_level } + + def scatter_model_sql(self): + event = self.events[0] + event_name = event['eventName'] + analysis = event['analysis'] + e_account_id_col = getattr(self.event_tbl.c, '#account_id') + event_name_col = getattr(self.event_tbl.c, '#event_name') + event_time_col = getattr(self.event_tbl.c, '#event_time').label('date') + event_date_col = settings.TIME_GRAIN_EXPRESSIONS[self.time_particle](event_time_col, self.zone_time) + + quota_interval_arr = event.get('quotaIntervalArr') + + where = [ + event_date_col >= self.start_date, + event_date_col <= self.end_date, + event_name_col == event_name + + ] + if analysis in ['times', 'number_of_days', 'number_of_hours']: + + qry = sa.select(event_date_col, func.count().label('values')) \ + .where(and_(*where)) \ + .group_by(event_date_col, e_account_id_col) + + sql = str(qry.compile(compile_kwargs={"literal_binds": True})) + print(sql) + return { + 'sql': sql, + 'interval_type': event['intervalType'], + 'analysis': analysis, + 'quota_interval_arr': quota_interval_arr + } + elif event.get('quota'): + event_attr_col = getattr(self.event_tbl.c, event['quota']) + + qry = sa.select(event_date_col, settings.CK_FUNC[analysis](event_attr_col).label('values')) \ + .where(and_(*where)) \ + .group_by(event_date_col, e_account_id_col) + sql = str(qry.compile(compile_kwargs={"literal_binds": True})) + print(sql) + return { + 'sql': sql, + 'interval_type': event['intervalType'], + 'analysis': analysis, + 'quota_interval_arr': quota_interval_arr + }