导出分布
This commit is contained in:
parent
c08d244708
commit
7ae11c4b90
@ -587,6 +587,7 @@ async def scatter_model_sql(
|
|||||||
data = analysis.scatter_model_sql()
|
data = analysis.scatter_model_sql()
|
||||||
return schemas.Msg(code=0, msg='ok', data=[data])
|
return schemas.Msg(code=0, msg='ok', data=[data])
|
||||||
|
|
||||||
|
|
||||||
@router.post("/scatter_model_export")
|
@router.post("/scatter_model_export")
|
||||||
async def retention_model_export(request: Request,
|
async def retention_model_export(request: Request,
|
||||||
game: str,
|
game: str,
|
||||||
@ -596,19 +597,103 @@ async def retention_model_export(request: Request,
|
|||||||
):
|
):
|
||||||
""" 分布分析 数据导出"""
|
""" 分布分析 数据导出"""
|
||||||
await analysis.init(data_where=current_user.data_where)
|
await analysis.init(data_where=current_user.data_where)
|
||||||
data = analysis.scatter_model_sql()
|
res = analysis.scatter_model_sql()
|
||||||
file_name = quote(f'分布分析.xlsx')
|
file_name = quote(f'分布分析.xlsx')
|
||||||
mime = mimetypes.guess_type(file_name)[0]
|
mime = mimetypes.guess_type(file_name)[0]
|
||||||
|
sql = res['sql']
|
||||||
sql = data['sql']
|
|
||||||
df = await ckdb.query_dataframe(sql)
|
df = await ckdb.query_dataframe(sql)
|
||||||
df_to_stream = DfToStream((df, '分布分析'))
|
interval_type = res['interval_type']
|
||||||
|
analysis = res['analysis']
|
||||||
|
groupby = res['groupby']
|
||||||
|
quota_interval_arr = res['quota_interval_arr']
|
||||||
|
if analysis != 'number_of_days' and interval_type != 'discrete':
|
||||||
|
max_v = int(df['values'].max())
|
||||||
|
min_v = int(df['values'].min())
|
||||||
|
interval = (max_v - min_v) // 10 or 1
|
||||||
|
resp = {'list': dict(),
|
||||||
|
'start_date': res['start_date'],
|
||||||
|
'end_date': res['end_date'],
|
||||||
|
'time_particle': res['time_particle']
|
||||||
|
}
|
||||||
|
|
||||||
|
if not quota_interval_arr:
|
||||||
|
resp['label'] = [f'[{i},{i + interval})' for i in range(min_v, max_v, interval)]
|
||||||
|
bins = [i for i in range(min_v, max_v + interval, interval)]
|
||||||
|
else:
|
||||||
|
quota_interval_arr = [-float('inf')] + quota_interval_arr + [float('inf')]
|
||||||
|
resp['label'] = []
|
||||||
|
bins = [quota_interval_arr[0]]
|
||||||
|
for i, v in enumerate(quota_interval_arr[1:]):
|
||||||
|
resp['label'].append(f'[{quota_interval_arr[i]},{v})')
|
||||||
|
bins.append(v)
|
||||||
|
|
||||||
|
# 这是整体的
|
||||||
|
for key, tmp_df in df.groupby('date'):
|
||||||
|
bins_s = pd.cut(tmp_df['values'], bins=bins,
|
||||||
|
right=False).value_counts()
|
||||||
|
bins_s.sort_index(inplace=True)
|
||||||
|
total = int(bins_s.sum())
|
||||||
|
resp['list'][key.strftime('%Y-%m-%d')] = dict()
|
||||||
|
resp['list'][key.strftime('%Y-%m-%d')]['总体'] = {'n': bins_s.to_list(), 'total': total,
|
||||||
|
'p': round(bins_s * 100 / total, 2).to_list(),
|
||||||
|
'title': '总体'}
|
||||||
|
# 分组的
|
||||||
|
if groupby:
|
||||||
|
export_df = pd.DataFrame(columns=resp['label'])
|
||||||
|
|
||||||
|
for key, tmp_df in df.groupby(['date', *groupby]):
|
||||||
|
bins_s = pd.cut(tmp_df['values'], bins=bins,
|
||||||
|
right=False).value_counts()
|
||||||
|
bins_s.sort_index(inplace=True)
|
||||||
|
total = int(bins_s.sum())
|
||||||
|
title = '.'.join(key[1:])
|
||||||
|
date = key[0]
|
||||||
|
resp['list'][date.strftime('%Y-%m-%d')][title] = {'n': bins_s.to_list(), 'total': total,
|
||||||
|
'p': round(bins_s * 100 / total, 2).to_list(),
|
||||||
|
'title': title
|
||||||
|
}
|
||||||
|
|
||||||
|
export_df.loc[(date.strftime('%Y-%m-%d'), title)] = bins_s.to_list()
|
||||||
|
|
||||||
|
df_to_stream = DfToStream((export_df, '分布分析'), (df, '分布分析原始数据'), index=True)
|
||||||
|
with df_to_stream as d:
|
||||||
|
export = d.to_stream()
|
||||||
|
return StreamingResponse(export, media_type=mime,
|
||||||
|
headers={'Content-Disposition': f'filename="{file_name}"'})
|
||||||
|
|
||||||
|
|
||||||
|
# elif analysis == 'number_of_days':
|
||||||
|
else:
|
||||||
|
resp = {'list': {}, 'label': [],
|
||||||
|
'start_date': res['start_date'],
|
||||||
|
'end_date': res['end_date'],
|
||||||
|
'time_particle': res['time_particle']
|
||||||
|
}
|
||||||
|
total_dict = {}
|
||||||
|
labels = [str(i) for i in sorted(df['values'].unique())]
|
||||||
|
resp['label'] = labels
|
||||||
|
for key, tmp_df in df.groupby(['date']):
|
||||||
|
total = len(tmp_df)
|
||||||
|
dt = key.strftime('%Y-%m-%d')
|
||||||
|
labels_dict = {}
|
||||||
|
for key2, tmp_df2 in tmp_df.groupby('values'):
|
||||||
|
label = str(key2)
|
||||||
|
n = len(tmp_df2)
|
||||||
|
labels_dict[label] = n
|
||||||
|
|
||||||
|
resp['list'][dt] = {'总体': {'n': [labels_dict.get(i, 0) for i in labels], 'total': total,
|
||||||
|
'p': [round(labels_dict.get(i, 0) * 100 / total, 2) for i in labels]}}
|
||||||
|
|
||||||
|
export_df = pd.DataFrame(columns=resp['label'])
|
||||||
|
for d, v in resp['list'].items():
|
||||||
|
export_df.loc[d] = v['总体']['n']
|
||||||
|
|
||||||
|
df_to_stream = DfToStream((export_df, '分布分析'), (df, '分布分析原始数据'), index=True)
|
||||||
with df_to_stream as d:
|
with df_to_stream as d:
|
||||||
export = d.to_stream()
|
export = d.to_stream()
|
||||||
return StreamingResponse(export, media_type=mime, headers={'Content-Disposition': f'filename="{file_name}"'})
|
return StreamingResponse(export, media_type=mime, headers={'Content-Disposition': f'filename="{file_name}"'})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@router.post("/scatter_model")
|
@router.post("/scatter_model")
|
||||||
async def scatter_model(
|
async def scatter_model(
|
||||||
request: Request,
|
request: Request,
|
||||||
|
@ -22,5 +22,5 @@ class DfToStream:
|
|||||||
for item in self.dfs:
|
for item in self.dfs:
|
||||||
df = item[0]
|
df = item[0]
|
||||||
sheet_name = item[1]
|
sheet_name = item[1]
|
||||||
df.to_excel(self.writer, encoding='utf-8', sheet_name=sheet_name, index=False)
|
df.to_excel(self.writer, encoding='utf-8', sheet_name=sheet_name, index=self.index)
|
||||||
return self.output
|
return self.output
|
||||||
|
Loading…
Reference in New Issue
Block a user