1.新增分布分析下载功能
2.优化新增付费人数计算公式
This commit is contained in:
parent
44a001ba22
commit
51d899c9d1
@ -21,7 +21,7 @@ from db.redisdb import get_redis_pool, RedisDrive
|
||||
from models.behavior_analysis import BehaviorAnalysis, CombinationEvent
|
||||
from models.user_analysis import UserAnalysis
|
||||
from models.x_analysis import XAnalysis
|
||||
from utils import DfToStream, getEveryDay
|
||||
from utils import DfToStream, getEveryDay, Download_xlsx,jiange_insert,create_df,create_neidf
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@ -1036,7 +1036,7 @@ async def scatter_model(
|
||||
}
|
||||
|
||||
if not quota_interval_arr:
|
||||
resp['label'] = [f'[{i},{i + interval})' for i in range(min_v, max_v, interval)]
|
||||
resp['label'] = [f'[{i},{i + interval})' for i in range(min_v, max_v, interval)]
|
||||
bins = [i for i in range(min_v, max_v + interval, interval)]
|
||||
else:
|
||||
quota_interval_arr = [-float('inf')] + quota_interval_arr + [float('inf')]
|
||||
@ -1054,13 +1054,15 @@ async def scatter_model(
|
||||
total = int(bins_s.sum())
|
||||
if res['time_particle'] == 'total':
|
||||
resp['list']['合计'] = dict()
|
||||
p = list(round(bins_s * 100 / total, 2).to_list())
|
||||
resp['list']['合计']['总体'] = {'n': bins_s.to_list(), 'total': total,
|
||||
'p': round(bins_s * 100 / total, 2).to_list(),
|
||||
'p': [str(i)+'%' for i in p],
|
||||
'title': '总体'}
|
||||
else:
|
||||
p=list(round(bins_s * 100 / total, 2).to_list())
|
||||
resp['list'][key.strftime('%Y-%m-%d')] = dict()
|
||||
resp['list'][key.strftime('%Y-%m-%d')]['总体'] = {'n': bins_s.to_list(), 'total': total,
|
||||
'p': round(bins_s * 100 / total, 2).to_list(),
|
||||
'p':[str(i)+'%' for i in p],
|
||||
'title': '总体'}
|
||||
# 分组的
|
||||
# if groupby:
|
||||
@ -1076,6 +1078,11 @@ async def scatter_model(
|
||||
# 2).to_list(),
|
||||
# 'title': title
|
||||
# }
|
||||
download=analysis.event_view.get('download','')
|
||||
if download == 1:
|
||||
creat_df = create_df(resp)
|
||||
Download=Download_xlsx(creat_df, '分布分析')
|
||||
return Download
|
||||
return schemas.Msg(code=0, msg='ok', data=resp)
|
||||
|
||||
# elif analysis == 'number_of_days':
|
||||
@ -1137,9 +1144,12 @@ async def scatter_model(
|
||||
df = await ckdb.query_dataframe(sql)
|
||||
for i in range(len(df)):
|
||||
resp['list'][str(df['date'][i])]['总体']['total']=int(df['values'][i])
|
||||
|
||||
|
||||
|
||||
#兼容下载功能
|
||||
download=analysis.event_view.get('download','')
|
||||
if download == 1:
|
||||
creat_df=create_df(resp)
|
||||
Download=Download_xlsx(creat_df,'分布分析')
|
||||
return Download
|
||||
return schemas.Msg(code=0, msg='ok', data=resp)
|
||||
|
||||
# bins_s = pd.cut(tmp_df['values'], bins=bins,
|
||||
@ -1206,16 +1216,19 @@ async def scatter_model(
|
||||
for i, v in enumerate(quota_interval_arr[1:]):
|
||||
resp['label'].append(f'[{quota_interval_arr[i]},{v})')
|
||||
bins.append(v)
|
||||
|
||||
if 'float' in str(df.dtypes['va']):
|
||||
df['va'] = df['va'].astype(int)
|
||||
# 这是分组的
|
||||
for key, tmp_df in df.groupby('va'):
|
||||
|
||||
bins_s = pd.cut(tmp_df['values'], bins=bins,
|
||||
right=False).value_counts()
|
||||
bins_s.sort_index(inplace=True)
|
||||
total = int(bins_s.sum())
|
||||
if res['time_particle'] == 'total':
|
||||
resp['list']['合计'] = dict()
|
||||
resp['list']['合计']['总体'] = {'n': bins_s.to_list(), 'total': total,
|
||||
|
||||
resp['list']['合计'] = {'n': bins_s.to_list(), 'total': total,
|
||||
'p': round(bins_s * 100 / total, 2).to_list(),
|
||||
'title': '总体'}
|
||||
else:
|
||||
@ -1226,14 +1239,19 @@ async def scatter_model(
|
||||
if 'time' not in columnName:
|
||||
resp['list'][key] = dict()
|
||||
resp['list'][key] = {'n': bins_s.to_list(), 'total': total,
|
||||
'p': p,
|
||||
'p': [str(i)+'%' for i in p],
|
||||
'title': '总体'}
|
||||
else:
|
||||
resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = dict()
|
||||
resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = {'n': bins_s.to_list(), 'total': total,
|
||||
'p': p,
|
||||
'p': [str(i)+'%' for i in p],
|
||||
'title': '总体'}
|
||||
|
||||
# 兼容下载功能
|
||||
download = analysis.event_view.get('download', '')
|
||||
if download == 1:
|
||||
create_df = create_neidf(resp,columnName)
|
||||
Download=Download_xlsx(create_df, '分布分析')
|
||||
return Download
|
||||
return schemas.Msg(code=0, msg='ok', data=resp)
|
||||
else:
|
||||
resp = {'list': {}, 'label': [],
|
||||
@ -1295,8 +1313,13 @@ async def scatter_model(
|
||||
sql = f"""SELECT toDate(addHours({game}.event."#event_time", 8)) AS date, count(DISTINCT {game}.event."#account_id") AS values FROM {game}.event WHERE addHours({game}.event."#event_time", 8) >= '{start_date}' AND addHours({game}.event."#event_time", 8) <= '{end_date}' AND {game}.event."#event_name" = 'create_account' GROUP BY toDate(addHours({game}.event."#event_time", 8)) ORDER BY date"""
|
||||
df = await ckdb.query_dataframe(sql)
|
||||
for i in range(len(df)):
|
||||
resp['list'][str(df['date'][i])]['总体']['total'] = int(df['values'][i])
|
||||
|
||||
resp['list'][str(df['date'][i])]['total'] = int(df['values'][i])
|
||||
# 兼容下载功能
|
||||
download = analysis.event_view.get('download', '')
|
||||
if download == 1:
|
||||
create_df = create_neidf(resp,columnName)
|
||||
Download=Download_xlsx(create_df, '分布分析')
|
||||
return Download
|
||||
return schemas.Msg(code=0, msg='ok', data=resp)
|
||||
else:
|
||||
return schemas.Msg(code=-9, msg='没有添加分组项', data='')
|
||||
|
@ -383,6 +383,7 @@ class BehaviorAnalysis:
|
||||
event_time_col = getattr(self.event_tbl.c, '#event_time')
|
||||
for event in self.events:
|
||||
operator_ = event.get('operator_val','')
|
||||
#排头显示名
|
||||
event_name_display = event.get('eventNameDisplay')
|
||||
is_show = event.get('is_show', True)
|
||||
|
||||
@ -513,6 +514,22 @@ FROM {game}.event
|
||||
WHERE addHours({game}.event."#event_time", 8) >= '{stat_date}' AND addHours({game}.event."#event_time", 8) <= '{end_date}'
|
||||
GROUP BY toDate(addHours({game}.event."#event_time", 8))) as bb on aa.date = bb.date ORDER by date
|
||||
"""
|
||||
# 单独把新增付费人数(以设备为维度)拿出来
|
||||
if event.get('event_attr') == '触发用户数' and ['is_new_device', 'orderid']== [i['columnName'] for i in event.get('filts')]:
|
||||
stat_date=self.start_date
|
||||
end_date=self.end_date
|
||||
game=self.game
|
||||
sql=f"""SELECT toDate(addHours("#event_time", 8)) as date,
|
||||
round(uniqExact("#distinct_id"), 2) AS values FROM
|
||||
(SELECT toDate(addHours("#event_time", 8)) as date,"#event_time",`#event_name`,`#distinct_id`,`#account_id` from {game}.event WHERE
|
||||
addHours("#event_time", 8) >= '{stat_date}' AND addHours("#event_time", 8) <= '{end_date}'
|
||||
and `#event_name` = 'pay' and orderid NOT LIKE '%GM%') a
|
||||
inner join
|
||||
(SELECT toDate(addHours("#event_time", 8)) as date,"#event_time",is_new_device,`#distinct_id`,`#event_name`,`#account_id` from {game}.event WHERE
|
||||
addHours("#event_time", 8) >= '{stat_date}' AND addHours("#event_time", 8) <= '{end_date}' and
|
||||
`#event_name` = 'create_account' and is_new_device = 1) b on a.`#distinct_id`= b.`#distinct_id` and a.date = b.date
|
||||
GROUP BY toDate(addHours("#event_time", 8))"""
|
||||
|
||||
sqls.append({'sql': sql,
|
||||
'groupby': [i.key for i in self.groupby],
|
||||
'date_range': self.date_range,
|
||||
|
107
utils/func.py
107
utils/func.py
@ -1,7 +1,7 @@
|
||||
import random
|
||||
import time
|
||||
import datetime
|
||||
|
||||
import pandas as pd
|
||||
def get_uid():
|
||||
return hex(int(time.time() * 10 ** 7) + random.randint(0, 10000))[2:]
|
||||
|
||||
@ -46,4 +46,107 @@ def getEveryDay(begin_date,end_date):
|
||||
date_list.append(date_str)
|
||||
begin_date += datetime.timedelta(days=1)
|
||||
return date_list
|
||||
#print(getEveryDay('2016-01-01','2017-05-11'))
|
||||
#print(getEveryDay('2016-01-01','2017-05-11'))
|
||||
def Download_xlsx(df,name):
|
||||
"""
|
||||
下载功能
|
||||
name为文件名
|
||||
"""
|
||||
from urllib.parse import quote
|
||||
import mimetypes
|
||||
from utils import DfToStream
|
||||
from fastapi.responses import StreamingResponse
|
||||
file_name=quote(f'{name}.xlsx')
|
||||
mime = mimetypes.guess_type(file_name)[0]
|
||||
df_to_stream = DfToStream((df, name))
|
||||
with df_to_stream as d:
|
||||
export = d.to_stream()
|
||||
Download=StreamingResponse(export, media_type=mime, headers={'Content-Disposition': f'filename="{file_name}"'})
|
||||
return Download
|
||||
|
||||
def jiange_insert(list_date):
|
||||
"""
|
||||
间隔1条插入一条数据插入数据
|
||||
:param day: list数据
|
||||
:return: list
|
||||
"""
|
||||
i = 1
|
||||
while i <= len(list_date):
|
||||
list_date.insert(i, '-')
|
||||
i += 2
|
||||
return list_date
|
||||
|
||||
def create_df(resp):
|
||||
"""
|
||||
分布分析外部下载功能的df数据
|
||||
"""
|
||||
columns = resp['label']
|
||||
day = list(resp['list'].keys())
|
||||
jiange_insert(day)
|
||||
date = []
|
||||
day_nu = 0
|
||||
for i in day:
|
||||
if i == '-':
|
||||
av = day[day_nu - 1]
|
||||
day_date = resp['list'][av]['总体']
|
||||
else:
|
||||
day_date = resp['list'][i]['总体']
|
||||
date_dict = {}
|
||||
n = 0
|
||||
p = 0
|
||||
if i == '-':
|
||||
date_dict['事件发生时间'] = '-'
|
||||
date_dict['总人数'] = '-'
|
||||
for nu in range(len(columns)):
|
||||
date_dict[columns[nu]] = day_date['p'][p]
|
||||
p += 1
|
||||
date.append(date_dict)
|
||||
else:
|
||||
date_dict['事件发生时间'] = i
|
||||
date_dict['总人数'] = day_date['total']
|
||||
for nu in range(len(columns)):
|
||||
date_dict[columns[nu]] = day_date['n'][n]
|
||||
n += 1
|
||||
date.append(date_dict)
|
||||
day_nu += 1
|
||||
columns.insert(0, '总人数')
|
||||
columns.insert(0, '事件发生时间')
|
||||
df = pd.DataFrame(data=date, columns=columns)
|
||||
return df
|
||||
def create_neidf(resp,columnName):
|
||||
"""
|
||||
分布分析内部下载功能的df数据
|
||||
"""
|
||||
columns = resp['label']
|
||||
day = list(resp['list'].keys())
|
||||
jiange_insert(day)
|
||||
date = []
|
||||
day_nu = 0
|
||||
for i in day:
|
||||
if i == '-':
|
||||
av = day[day_nu - 1]
|
||||
day_date = resp['list'][av]
|
||||
else:
|
||||
day_date = resp['list'][i]
|
||||
date_dict = {}
|
||||
n = 0
|
||||
p = 0
|
||||
if i == '-':
|
||||
date_dict[columnName] = '-'
|
||||
date_dict['全部用户数'] = '-'
|
||||
for nu in range(len(columns)):
|
||||
date_dict[columns[nu]] = day_date['p'][p]
|
||||
p += 1
|
||||
date.append(date_dict)
|
||||
else:
|
||||
date_dict[columnName] = i
|
||||
date_dict['全部用户数'] = day_date['total']
|
||||
for nu in range(len(columns)):
|
||||
date_dict[columns[nu]] = day_date['n'][n]
|
||||
n += 1
|
||||
date.append(date_dict)
|
||||
day_nu += 1
|
||||
columns.insert(0, '全部用户数')
|
||||
columns.insert(0, columnName)
|
||||
df = pd.DataFrame(data=date, columns=columns)
|
||||
return df
|
Loading…
Reference in New Issue
Block a user