1.新增分布分析分组显示数据

This commit is contained in:
李伟 2022-04-25 17:15:52 +08:00
parent aced6844d3
commit a26f3035bb
3 changed files with 107 additions and 19 deletions

View File

@ -30,11 +30,11 @@ async def event_list(
#获取事件名 #获取事件名
try: try:
event_list = await ckdb.distinct(game, 'event', '#event_name') event_list = await ckdb.distinct(game, 'event', '#event_name')
# 获取事件量
event_count = await ckdb.yesterday_event_count(game)
event_meta = await crud.event_mana.find_many(db, {'game': game}) or {}
except Exception as e: except Exception as e:
return schemas.Msg(code=-9, msg='查无数据', data='') return schemas.Msg(code=-9, msg='查无数据', data='')
#获取事件量
event_count = await ckdb.yesterday_event_count(game)
event_meta = await crud.event_mana.find_many(db, {'game':game}) or {}
if event_meta: if event_meta:
event_meta = pd.DataFrame(event_meta).set_index('event_name').fillna('').T.to_dict() event_meta = pd.DataFrame(event_meta).set_index('event_name').fillna('').T.to_dict()

View File

@ -990,14 +990,14 @@ async def scatter_model(
#转换数据类型为int #转换数据类型为int
df['values'] = df['values'].astype(int) df['values'] = df['values'].astype(int)
interval_type = res['interval_type'] interval_type = res['interval_type']
analysis = res['analysis'] analysi = res['analysis']
groupby = res['groupby'] groupby = res['groupby']
quota_interval_arr = res['quota_interval_arr'] quota_interval_arr = res['quota_interval_arr']
# 兼容合计的 # 兼容合计的
if res['time_particle'] == 'total': if res['time_particle'] == 'total':
df['date'] = '合计' df['date'] = '合计'
if analysis != 'number_of_days' and interval_type != 'discrete': if analysi != 'number_of_days' and interval_type != 'discrete':
max_v = int(df['values'].max()) max_v = int(df['values'].max())
min_v = int(df['values'].min()) min_v = int(df['values'].min())
interval = (max_v - min_v) // 10 or 1 interval = (max_v - min_v) // 10 or 1
@ -1035,19 +1035,19 @@ async def scatter_model(
'p': round(bins_s * 100 / total, 2).to_list(), 'p': round(bins_s * 100 / total, 2).to_list(),
'title': '总体'} 'title': '总体'}
# 分组的 # 分组的
if groupby: # if groupby:
for key, tmp_df in df.groupby(['date', *groupby]): # for key, tmp_df in df.groupby(['date', *groupby]):
bins_s = pd.cut(tmp_df['values'], bins=bins, # bins_s = pd.cut(tmp_df['values'], bins=bins,
right=False).value_counts() # right=False).value_counts()
bins_s.sort_index(inplace=True) # bins_s.sort_index(inplace=True)
total = int(bins_s.sum()) # total = int(bins_s.sum())
title = '.'.join(key[1:]) # title = '.'.join(key[1:])
date = key[0] # date = key[0]
resp['list'][date.strftime('%Y-%m-%d')][title] = {'n': bins_s.to_list(), 'total': total, # resp['list'][date.strftime('%Y-%m-%d')][title] = {'n': bins_s.to_list(), 'total': total,
'p': round((bins_s * 100 / total).fillna(0), # 'p': round((bins_s * 100 / total).fillna(0),
2).to_list(), # 2).to_list(),
'title': title # 'title': title
} # }
return schemas.Msg(code=0, msg='ok', data=resp) return schemas.Msg(code=0, msg='ok', data=resp)
# elif analysis == 'number_of_days': # elif analysis == 'number_of_days':
@ -1122,7 +1122,91 @@ async def scatter_model(
# resp['list'][key.strftime('%Y-%m-%d')]['总体'] = {'n': bins_s.to_list(), 'total': total, # resp['list'][key.strftime('%Y-%m-%d')]['总体'] = {'n': bins_s.to_list(), 'total': total,
# 'p': round(bins_s * 100 / total, 2).to_list(), # 'p': round(bins_s * 100 / total, 2).to_list(),
# 'title': '总体'} # 'title': '总体'}
@router.post("/scatter_model_details")
async def scatter_model(
request: Request,
game: str,
ckdb: CKDrive = Depends(get_ck_db),
db: AsyncIOMotorDatabase = Depends(get_database),
analysis: BehaviorAnalysis = Depends(BehaviorAnalysis),
current_user: schemas.UserDB = Depends(deps.get_current_user)
) -> schemas.Msg:
await analysis.init(data_where=current_user.data_where)
try:
res = await analysis.scatter_model_sql()
except Exception as e:
return schemas.Msg(code=-9, msg='报表配置参数异常')
sql=res['sql']
if analysis.event_view['groupBy'] != []:
columnName=analysis.event_view['groupBy'][-1]['columnName']
sql=sql.replace(f'toDate(addHours({game}.event."#event_time", 8)) AS date', f'max(`{columnName}`) as va', 1)
sql=sql.replace(f'toDate(addHours({game}.event."#event_time", 8)),','',1)
df = await ckdb.query_dataframe(sql)
if df.empty:
return schemas.Msg(code=-9, msg='无数据', data=None)
df.fillna(0, inplace=True)
# 转换数据类型为int
df['values'] = df['values'].astype(int)
interval_type = res['interval_type']
analysi = res['analysis']
groupby = res['groupby']
quota_interval_arr = res['quota_interval_arr']
# 兼容合计的
if res['time_particle'] == 'total':
df['date'] = '合计'
if analysi != 'number_of_days' and interval_type != 'discrete':
max_v = int(df['values'].max())
min_v = int(df['values'].min())
interval = (max_v - min_v) // 10 or 1
resp = {'list': dict(),
'start_date': res['start_date'],
'end_date': res['end_date'],
'time_particle': res['time_particle'],
'biaotou':columnName
}
if not quota_interval_arr:
resp['label'] = [f'[{i},{i + interval})' for i in range(min_v, max_v, interval)]
bins = [i for i in range(min_v, max_v + interval, interval)]
else:
quota_interval_arr = [-float('inf')] + quota_interval_arr + [float('inf')]
resp['label'] = []
bins = [quota_interval_arr[0]]
for i, v in enumerate(quota_interval_arr[1:]):
resp['label'].append(f'[{quota_interval_arr[i]},{v})')
bins.append(v)
# 这是分组的
for key, tmp_df in df.groupby('va'):
bins_s = pd.cut(tmp_df['values'], bins=bins,
right=False).value_counts()
bins_s.sort_index(inplace=True)
total = int(bins_s.sum())
if res['time_particle'] == 'total':
resp['list']['合计'] = dict()
resp['list']['合计']['总体'] = {'n': bins_s.to_list(), 'total': total,
'p': round(bins_s * 100 / total, 2).to_list(),
'title': '总体'}
else:
p=round(bins_s * 100 / total, 2).to_list()
for i in range(len(p)):
if str(p[i]) == 'nan':
p[i] = 0
if 'time' not in columnName:
resp['list'][key] = dict()
resp['list'][key] = {'n': bins_s.to_list(), 'total': total,
'p': p,
'title': '总体'}
else:
resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = dict()
resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = {'n': bins_s.to_list(), 'total': total,
'p': p,
'title': '总体'}
return schemas.Msg(code=0, msg='ok', data=resp)
else:
return schemas.Msg(code=-9, msg='没有添加分组项', data='')
@router.post("/trace_model_sql") @router.post("/trace_model_sql")
async def trace_model_sql( async def trace_model_sql(

View File

@ -651,10 +651,14 @@ ORDER BY level
.where(and_(*where)) \ .where(and_(*where)) \
.group_by(*self.groupby, e_account_id_col) .group_by(*self.groupby, e_account_id_col)
else: else:
# qry = sa.select(event_date_col, e_account_id_col,
# settings.CK_FUNC[analysis](event_attr_col).label('values')) \
# .where(and_(*where)) \
# .group_by(event_date_col, *self.groupby, e_account_id_col)
qry = sa.select(event_date_col, e_account_id_col, qry = sa.select(event_date_col, e_account_id_col,
settings.CK_FUNC[analysis](event_attr_col).label('values')) \ settings.CK_FUNC[analysis](event_attr_col).label('values')) \
.where(and_(*where)) \ .where(and_(*where)) \
.group_by(event_date_col, *self.groupby, e_account_id_col) .group_by(event_date_col,e_account_id_col)
sql = str(qry.compile(compile_kwargs={"literal_binds": True})) sql = str(qry.compile(compile_kwargs={"literal_binds": True}))
print(sql) print(sql)
return { return {