1.新增分布分析下载功能

2.优化新增付费人数计算公式
This commit is contained in:
李伟 2022-04-28 15:44:44 +08:00
parent 44a001ba22
commit 51d899c9d1
3 changed files with 159 additions and 16 deletions

View File

@ -21,7 +21,7 @@ from db.redisdb import get_redis_pool, RedisDrive
from models.behavior_analysis import BehaviorAnalysis, CombinationEvent
from models.user_analysis import UserAnalysis
from models.x_analysis import XAnalysis
from utils import DfToStream, getEveryDay
from utils import DfToStream, getEveryDay, Download_xlsx,jiange_insert,create_df,create_neidf
router = APIRouter()
@ -1036,7 +1036,7 @@ async def scatter_model(
}
if not quota_interval_arr:
resp['label'] = [f'[{i},{i + interval})' for i in range(min_v, max_v, interval)]
resp['label'] = [f'[{i},{i + interval})' for i in range(min_v, max_v, interval)]
bins = [i for i in range(min_v, max_v + interval, interval)]
else:
quota_interval_arr = [-float('inf')] + quota_interval_arr + [float('inf')]
@ -1054,13 +1054,15 @@ async def scatter_model(
total = int(bins_s.sum())
if res['time_particle'] == 'total':
resp['list']['合计'] = dict()
p = list(round(bins_s * 100 / total, 2).to_list())
resp['list']['合计']['总体'] = {'n': bins_s.to_list(), 'total': total,
'p': round(bins_s * 100 / total, 2).to_list(),
'p': [str(i)+'%' for i in p],
'title': '总体'}
else:
p=list(round(bins_s * 100 / total, 2).to_list())
resp['list'][key.strftime('%Y-%m-%d')] = dict()
resp['list'][key.strftime('%Y-%m-%d')]['总体'] = {'n': bins_s.to_list(), 'total': total,
'p': round(bins_s * 100 / total, 2).to_list(),
'p':[str(i)+'%' for i in p],
'title': '总体'}
# 分组的
# if groupby:
@ -1076,6 +1078,11 @@ async def scatter_model(
# 2).to_list(),
# 'title': title
# }
download=analysis.event_view.get('download','')
if download == 1:
creat_df = create_df(resp)
Download=Download_xlsx(creat_df, '分布分析')
return Download
return schemas.Msg(code=0, msg='ok', data=resp)
# elif analysis == 'number_of_days':
@ -1137,9 +1144,12 @@ async def scatter_model(
df = await ckdb.query_dataframe(sql)
for i in range(len(df)):
resp['list'][str(df['date'][i])]['总体']['total']=int(df['values'][i])
#兼容下载功能
download=analysis.event_view.get('download','')
if download == 1:
creat_df=create_df(resp)
Download=Download_xlsx(creat_df,'分布分析')
return Download
return schemas.Msg(code=0, msg='ok', data=resp)
# bins_s = pd.cut(tmp_df['values'], bins=bins,
@ -1206,16 +1216,19 @@ async def scatter_model(
for i, v in enumerate(quota_interval_arr[1:]):
resp['label'].append(f'[{quota_interval_arr[i]},{v})')
bins.append(v)
if 'float' in str(df.dtypes['va']):
df['va'] = df['va'].astype(int)
# 这是分组的
for key, tmp_df in df.groupby('va'):
bins_s = pd.cut(tmp_df['values'], bins=bins,
right=False).value_counts()
bins_s.sort_index(inplace=True)
total = int(bins_s.sum())
if res['time_particle'] == 'total':
resp['list']['合计'] = dict()
resp['list']['合计']['总体'] = {'n': bins_s.to_list(), 'total': total,
resp['list']['合计'] = {'n': bins_s.to_list(), 'total': total,
'p': round(bins_s * 100 / total, 2).to_list(),
'title': '总体'}
else:
@ -1226,14 +1239,19 @@ async def scatter_model(
if 'time' not in columnName:
resp['list'][key] = dict()
resp['list'][key] = {'n': bins_s.to_list(), 'total': total,
'p': p,
'p': [str(i)+'%' for i in p],
'title': '总体'}
else:
resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = dict()
resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = {'n': bins_s.to_list(), 'total': total,
'p': p,
'p': [str(i)+'%' for i in p],
'title': '总体'}
# 兼容下载功能
download = analysis.event_view.get('download', '')
if download == 1:
create_df = create_neidf(resp,columnName)
Download=Download_xlsx(create_df, '分布分析')
return Download
return schemas.Msg(code=0, msg='ok', data=resp)
else:
resp = {'list': {}, 'label': [],
@ -1295,8 +1313,13 @@ async def scatter_model(
sql = f"""SELECT toDate(addHours({game}.event."#event_time", 8)) AS date, count(DISTINCT {game}.event."#account_id") AS values FROM {game}.event WHERE addHours({game}.event."#event_time", 8) >= '{start_date}' AND addHours({game}.event."#event_time", 8) <= '{end_date}' AND {game}.event."#event_name" = 'create_account' GROUP BY toDate(addHours({game}.event."#event_time", 8)) ORDER BY date"""
df = await ckdb.query_dataframe(sql)
for i in range(len(df)):
resp['list'][str(df['date'][i])]['总体']['total'] = int(df['values'][i])
resp['list'][str(df['date'][i])]['total'] = int(df['values'][i])
# 兼容下载功能
download = analysis.event_view.get('download', '')
if download == 1:
create_df = create_neidf(resp,columnName)
Download=Download_xlsx(create_df, '分布分析')
return Download
return schemas.Msg(code=0, msg='ok', data=resp)
else:
return schemas.Msg(code=-9, msg='没有添加分组项', data='')

View File

@ -383,6 +383,7 @@ class BehaviorAnalysis:
event_time_col = getattr(self.event_tbl.c, '#event_time')
for event in self.events:
operator_ = event.get('operator_val','')
#排头显示名
event_name_display = event.get('eventNameDisplay')
is_show = event.get('is_show', True)
@ -513,6 +514,22 @@ FROM {game}.event
WHERE addHours({game}.event."#event_time", 8) >= '{stat_date}' AND addHours({game}.event."#event_time", 8) <= '{end_date}'
GROUP BY toDate(addHours({game}.event."#event_time", 8))) as bb on aa.date = bb.date ORDER by date
"""
# 单独把新增付费人数(以设备为维度)拿出来
if event.get('event_attr') == '触发用户数' and ['is_new_device', 'orderid']== [i['columnName'] for i in event.get('filts')]:
stat_date=self.start_date
end_date=self.end_date
game=self.game
sql=f"""SELECT toDate(addHours("#event_time", 8)) as date,
round(uniqExact("#distinct_id"), 2) AS values FROM
(SELECT toDate(addHours("#event_time", 8)) as date,"#event_time",`#event_name`,`#distinct_id`,`#account_id` from {game}.event WHERE
addHours("#event_time", 8) >= '{stat_date}' AND addHours("#event_time", 8) <= '{end_date}'
and `#event_name` = 'pay' and orderid NOT LIKE '%GM%') a
inner join
(SELECT toDate(addHours("#event_time", 8)) as date,"#event_time",is_new_device,`#distinct_id`,`#event_name`,`#account_id` from {game}.event WHERE
addHours("#event_time", 8) >= '{stat_date}' AND addHours("#event_time", 8) <= '{end_date}' and
`#event_name` = 'create_account' and is_new_device = 1) b on a.`#distinct_id`= b.`#distinct_id` and a.date = b.date
GROUP BY toDate(addHours("#event_time", 8))"""
sqls.append({'sql': sql,
'groupby': [i.key for i in self.groupby],
'date_range': self.date_range,

View File

@ -1,7 +1,7 @@
import random
import time
import datetime
import pandas as pd
def get_uid():
return hex(int(time.time() * 10 ** 7) + random.randint(0, 10000))[2:]
@ -46,4 +46,107 @@ def getEveryDay(begin_date,end_date):
date_list.append(date_str)
begin_date += datetime.timedelta(days=1)
return date_list
#print(getEveryDay('2016-01-01','2017-05-11'))
#print(getEveryDay('2016-01-01','2017-05-11'))
def Download_xlsx(df,name):
"""
下载功能
name为文件名
"""
from urllib.parse import quote
import mimetypes
from utils import DfToStream
from fastapi.responses import StreamingResponse
file_name=quote(f'{name}.xlsx')
mime = mimetypes.guess_type(file_name)[0]
df_to_stream = DfToStream((df, name))
with df_to_stream as d:
export = d.to_stream()
Download=StreamingResponse(export, media_type=mime, headers={'Content-Disposition': f'filename="{file_name}"'})
return Download
def jiange_insert(list_date):
"""
间隔1条插入一条数据插入数据
:param day: list数据
:return: list
"""
i = 1
while i <= len(list_date):
list_date.insert(i, '-')
i += 2
return list_date
def create_df(resp):
"""
分布分析外部下载功能的df数据
"""
columns = resp['label']
day = list(resp['list'].keys())
jiange_insert(day)
date = []
day_nu = 0
for i in day:
if i == '-':
av = day[day_nu - 1]
day_date = resp['list'][av]['总体']
else:
day_date = resp['list'][i]['总体']
date_dict = {}
n = 0
p = 0
if i == '-':
date_dict['事件发生时间'] = '-'
date_dict['总人数'] = '-'
for nu in range(len(columns)):
date_dict[columns[nu]] = day_date['p'][p]
p += 1
date.append(date_dict)
else:
date_dict['事件发生时间'] = i
date_dict['总人数'] = day_date['total']
for nu in range(len(columns)):
date_dict[columns[nu]] = day_date['n'][n]
n += 1
date.append(date_dict)
day_nu += 1
columns.insert(0, '总人数')
columns.insert(0, '事件发生时间')
df = pd.DataFrame(data=date, columns=columns)
return df
def create_neidf(resp,columnName):
"""
分布分析内部下载功能的df数据
"""
columns = resp['label']
day = list(resp['list'].keys())
jiange_insert(day)
date = []
day_nu = 0
for i in day:
if i == '-':
av = day[day_nu - 1]
day_date = resp['list'][av]
else:
day_date = resp['list'][i]
date_dict = {}
n = 0
p = 0
if i == '-':
date_dict[columnName] = '-'
date_dict['全部用户数'] = '-'
for nu in range(len(columns)):
date_dict[columns[nu]] = day_date['p'][p]
p += 1
date.append(date_dict)
else:
date_dict[columnName] = i
date_dict['全部用户数'] = day_date['total']
for nu in range(len(columns)):
date_dict[columns[nu]] = day_date['n'][n]
n += 1
date.append(date_dict)
day_nu += 1
columns.insert(0, '全部用户数')
columns.insert(0, columnName)
df = pd.DataFrame(data=date, columns=columns)
return df