This commit is contained in:
wuaho 2021-06-18 11:32:04 +08:00
parent 749ff54ddf
commit 9aeb897e4a
4 changed files with 122 additions and 6 deletions

View File

@ -1,4 +1,5 @@
import pandas as pd import pandas as pd
import numpy as np
from fastapi import APIRouter, Depends, Request from fastapi import APIRouter, Depends, Request
from motor.motor_asyncio import AsyncIOMotorDatabase from motor.motor_asyncio import AsyncIOMotorDatabase
@ -275,5 +276,69 @@ async def funnel_model(
_ = date_data.setdefault(key[0].strftime('%Y-%m-%d'), {}) _ = date_data.setdefault(key[0].strftime('%Y-%m-%d'), {})
_[key[1]] = tmp _[key[1]] = tmp
resp = {'list': data_list, 'date_data': date_data, 'title': groupby + cond_level} resp = {'list': data_list, 'date_data': date_data, 'title': groupby + cond_level, 'level': cond_level
}
return schemas.Msg(code=0, msg='ok', data=resp) return schemas.Msg(code=0, msg='ok', data=resp)
@router.post("/scatter_model_sql")
async def scatter_model_sql(
request: Request,
game: str,
analysis: BehaviorAnalysis = Depends(BehaviorAnalysis),
current_user: schemas.UserDB = Depends(deps.get_current_user)
) -> schemas.Msg:
"""分布分析 sql"""
await analysis.init()
data = analysis.scatter_model_sql()
return schemas.Msg(code=0, msg='ok', data=[data])
@router.post("/scatter_model")
async def scatter_model_sql(
request: Request,
game: str,
ckdb: CKDrive = Depends(get_ck_db),
db: AsyncIOMotorDatabase = Depends(get_database),
analysis: BehaviorAnalysis = Depends(BehaviorAnalysis),
current_user: schemas.UserDB = Depends(deps.get_current_user)
) -> schemas.Msg:
"""分布分析 模型"""
await analysis.init()
res = analysis.scatter_model_sql()
sql = res['sql']
df = await ckdb.query_dataframe(sql)
interval_type = res['interval_type']
analysis = res['analysis']
quota_interval_arr = res['quota_interval_arr']
if analysis != 'number_of_days':
max_v = int(df['values'].max())
min_v = int(df['values'].min())
interval = (max_v - min_v) // 10
resp = {}
if not quota_interval_arr:
resp['label'] = [f'[{i},{i + interval})' for i in range(min_v, max_v, interval)]
bins = [i for i in range(min_v, max_v + interval, interval)]
else:
quota_interval_arr = [-float('inf')] + quota_interval_arr+[float('inf')]
resp['label'] = []
bins = [quota_interval_arr[0]]
for i, v in enumerate(quota_interval_arr[1:]):
resp['label'].append(f'[{quota_interval_arr[i]},{v})')
bins.append(v)
for key, tmp_df in df.groupby('date'):
bins_s = pd.cut(tmp_df['values'], bins=bins,
right=False).value_counts()
bins_s.sort_index(inplace=True)
total = int(bins_s.sum())
resp[key.strftime('%Y-%m-%d')] = {'n': bins_s.to_list(), 'total': total,
'p': round(bins_s * 100 / total, 2).to_list()}
return schemas.Msg(code=0, msg='ok', data=resp)
if interval_type == 'def' and analysis == 'number_of_days':
resp = {}
for key, tmp_df in df.groupby('date'):
total = int(tmp_df['values'].sum())
resp[key.strftime('%Y-%m-%d')] = {'n': total, 'total': total, 'p': 100}
return schemas.Msg(code=0, msg='ok', data=resp)

View File

@ -20,7 +20,7 @@ async def test(
current_user: schemas.UserDB = Depends(deps.get_current_user), current_user: schemas.UserDB = Depends(deps.get_current_user),
analysis: BehaviorAnalysis = Depends(BehaviorAnalysis)) -> schemas.Msg: analysis: BehaviorAnalysis = Depends(BehaviorAnalysis)) -> schemas.Msg:
await analysis.init() await analysis.init()
query = analysis.funnel_model_sql() query = analysis.scatter_model_sql()
data = { data = {
'game': game, 'game': game,
'analysis': analysis.game, 'analysis': analysis.game,

View File

@ -11,12 +11,8 @@ class Settings(BaseSettings):
BACKEND_CORS_ORIGINS: List[str] = ['*'] BACKEND_CORS_ORIGINS: List[str] = ['*']
CASBIN_COLL: str = 'casbin_rule' CASBIN_COLL: str = 'casbin_rule'
SUPERUSER_EMAIL: str = '15392746632@qq.com' SUPERUSER_EMAIL: str = '15392746632@qq.com'
SUPERUSER_PASSWORD: str = '123456' SUPERUSER_PASSWORD: str = '123456'
SUPERUSER_NAME: str = 'root' SUPERUSER_NAME: str = 'root'
@ -74,6 +70,15 @@ class Settings(BaseSettings):
"Nullable(Float)": 'float', "Nullable(Float)": 'float',
"Float": 'float', } "Float": 'float', }
CK_FUNC = {
'sum': lambda x: func.sum(x),
'avg': lambda x: func.round(func.avg(x), 2),
'median': lambda x: func.median(x),
'max': lambda x: func.max(x),
'min': lambda x: func.min(x),
'distinct_count': lambda x: func.count(func.distinct_count(x)),
}
CK_OPERATOR = { CK_OPERATOR = {
'int': [{ 'int': [{
'id': 'sum', 'id': 'sum',

View File

@ -280,3 +280,49 @@ ORDER BY level
'date_range': self.date_range, 'date_range': self.date_range,
'cond_level': cond_level 'cond_level': cond_level
} }
def scatter_model_sql(self):
event = self.events[0]
event_name = event['eventName']
analysis = event['analysis']
e_account_id_col = getattr(self.event_tbl.c, '#account_id')
event_name_col = getattr(self.event_tbl.c, '#event_name')
event_time_col = getattr(self.event_tbl.c, '#event_time').label('date')
event_date_col = settings.TIME_GRAIN_EXPRESSIONS[self.time_particle](event_time_col, self.zone_time)
quota_interval_arr = event.get('quotaIntervalArr')
where = [
event_date_col >= self.start_date,
event_date_col <= self.end_date,
event_name_col == event_name
]
if analysis in ['times', 'number_of_days', 'number_of_hours']:
qry = sa.select(event_date_col, func.count().label('values')) \
.where(and_(*where)) \
.group_by(event_date_col, e_account_id_col)
sql = str(qry.compile(compile_kwargs={"literal_binds": True}))
print(sql)
return {
'sql': sql,
'interval_type': event['intervalType'],
'analysis': analysis,
'quota_interval_arr': quota_interval_arr
}
elif event.get('quota'):
event_attr_col = getattr(self.event_tbl.c, event['quota'])
qry = sa.select(event_date_col, settings.CK_FUNC[analysis](event_attr_col).label('values')) \
.where(and_(*where)) \
.group_by(event_date_col, e_account_id_col)
sql = str(qry.compile(compile_kwargs={"literal_binds": True}))
print(sql)
return {
'sql': sql,
'interval_type': event['intervalType'],
'analysis': analysis,
'quota_interval_arr': quota_interval_arr
}