1.新增分布分析下载功能
2.优化新增付费人数计算公式
This commit is contained in:
parent
44a001ba22
commit
51d899c9d1
@ -21,7 +21,7 @@ from db.redisdb import get_redis_pool, RedisDrive
|
|||||||
from models.behavior_analysis import BehaviorAnalysis, CombinationEvent
|
from models.behavior_analysis import BehaviorAnalysis, CombinationEvent
|
||||||
from models.user_analysis import UserAnalysis
|
from models.user_analysis import UserAnalysis
|
||||||
from models.x_analysis import XAnalysis
|
from models.x_analysis import XAnalysis
|
||||||
from utils import DfToStream, getEveryDay
|
from utils import DfToStream, getEveryDay, Download_xlsx,jiange_insert,create_df,create_neidf
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
@ -1036,7 +1036,7 @@ async def scatter_model(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if not quota_interval_arr:
|
if not quota_interval_arr:
|
||||||
resp['label'] = [f'[{i},{i + interval})' for i in range(min_v, max_v, interval)]
|
resp['label'] = [f'[{i},{i + interval})' for i in range(min_v, max_v, interval)]
|
||||||
bins = [i for i in range(min_v, max_v + interval, interval)]
|
bins = [i for i in range(min_v, max_v + interval, interval)]
|
||||||
else:
|
else:
|
||||||
quota_interval_arr = [-float('inf')] + quota_interval_arr + [float('inf')]
|
quota_interval_arr = [-float('inf')] + quota_interval_arr + [float('inf')]
|
||||||
@ -1054,13 +1054,15 @@ async def scatter_model(
|
|||||||
total = int(bins_s.sum())
|
total = int(bins_s.sum())
|
||||||
if res['time_particle'] == 'total':
|
if res['time_particle'] == 'total':
|
||||||
resp['list']['合计'] = dict()
|
resp['list']['合计'] = dict()
|
||||||
|
p = list(round(bins_s * 100 / total, 2).to_list())
|
||||||
resp['list']['合计']['总体'] = {'n': bins_s.to_list(), 'total': total,
|
resp['list']['合计']['总体'] = {'n': bins_s.to_list(), 'total': total,
|
||||||
'p': round(bins_s * 100 / total, 2).to_list(),
|
'p': [str(i)+'%' for i in p],
|
||||||
'title': '总体'}
|
'title': '总体'}
|
||||||
else:
|
else:
|
||||||
|
p=list(round(bins_s * 100 / total, 2).to_list())
|
||||||
resp['list'][key.strftime('%Y-%m-%d')] = dict()
|
resp['list'][key.strftime('%Y-%m-%d')] = dict()
|
||||||
resp['list'][key.strftime('%Y-%m-%d')]['总体'] = {'n': bins_s.to_list(), 'total': total,
|
resp['list'][key.strftime('%Y-%m-%d')]['总体'] = {'n': bins_s.to_list(), 'total': total,
|
||||||
'p': round(bins_s * 100 / total, 2).to_list(),
|
'p':[str(i)+'%' for i in p],
|
||||||
'title': '总体'}
|
'title': '总体'}
|
||||||
# 分组的
|
# 分组的
|
||||||
# if groupby:
|
# if groupby:
|
||||||
@ -1076,6 +1078,11 @@ async def scatter_model(
|
|||||||
# 2).to_list(),
|
# 2).to_list(),
|
||||||
# 'title': title
|
# 'title': title
|
||||||
# }
|
# }
|
||||||
|
download=analysis.event_view.get('download','')
|
||||||
|
if download == 1:
|
||||||
|
creat_df = create_df(resp)
|
||||||
|
Download=Download_xlsx(creat_df, '分布分析')
|
||||||
|
return Download
|
||||||
return schemas.Msg(code=0, msg='ok', data=resp)
|
return schemas.Msg(code=0, msg='ok', data=resp)
|
||||||
|
|
||||||
# elif analysis == 'number_of_days':
|
# elif analysis == 'number_of_days':
|
||||||
@ -1137,9 +1144,12 @@ async def scatter_model(
|
|||||||
df = await ckdb.query_dataframe(sql)
|
df = await ckdb.query_dataframe(sql)
|
||||||
for i in range(len(df)):
|
for i in range(len(df)):
|
||||||
resp['list'][str(df['date'][i])]['总体']['total']=int(df['values'][i])
|
resp['list'][str(df['date'][i])]['总体']['total']=int(df['values'][i])
|
||||||
|
#兼容下载功能
|
||||||
|
download=analysis.event_view.get('download','')
|
||||||
|
if download == 1:
|
||||||
|
creat_df=create_df(resp)
|
||||||
|
Download=Download_xlsx(creat_df,'分布分析')
|
||||||
|
return Download
|
||||||
return schemas.Msg(code=0, msg='ok', data=resp)
|
return schemas.Msg(code=0, msg='ok', data=resp)
|
||||||
|
|
||||||
# bins_s = pd.cut(tmp_df['values'], bins=bins,
|
# bins_s = pd.cut(tmp_df['values'], bins=bins,
|
||||||
@ -1206,16 +1216,19 @@ async def scatter_model(
|
|||||||
for i, v in enumerate(quota_interval_arr[1:]):
|
for i, v in enumerate(quota_interval_arr[1:]):
|
||||||
resp['label'].append(f'[{quota_interval_arr[i]},{v})')
|
resp['label'].append(f'[{quota_interval_arr[i]},{v})')
|
||||||
bins.append(v)
|
bins.append(v)
|
||||||
|
if 'float' in str(df.dtypes['va']):
|
||||||
|
df['va'] = df['va'].astype(int)
|
||||||
# 这是分组的
|
# 这是分组的
|
||||||
for key, tmp_df in df.groupby('va'):
|
for key, tmp_df in df.groupby('va'):
|
||||||
|
|
||||||
bins_s = pd.cut(tmp_df['values'], bins=bins,
|
bins_s = pd.cut(tmp_df['values'], bins=bins,
|
||||||
right=False).value_counts()
|
right=False).value_counts()
|
||||||
bins_s.sort_index(inplace=True)
|
bins_s.sort_index(inplace=True)
|
||||||
total = int(bins_s.sum())
|
total = int(bins_s.sum())
|
||||||
if res['time_particle'] == 'total':
|
if res['time_particle'] == 'total':
|
||||||
resp['list']['合计'] = dict()
|
resp['list']['合计'] = dict()
|
||||||
resp['list']['合计']['总体'] = {'n': bins_s.to_list(), 'total': total,
|
|
||||||
|
resp['list']['合计'] = {'n': bins_s.to_list(), 'total': total,
|
||||||
'p': round(bins_s * 100 / total, 2).to_list(),
|
'p': round(bins_s * 100 / total, 2).to_list(),
|
||||||
'title': '总体'}
|
'title': '总体'}
|
||||||
else:
|
else:
|
||||||
@ -1226,14 +1239,19 @@ async def scatter_model(
|
|||||||
if 'time' not in columnName:
|
if 'time' not in columnName:
|
||||||
resp['list'][key] = dict()
|
resp['list'][key] = dict()
|
||||||
resp['list'][key] = {'n': bins_s.to_list(), 'total': total,
|
resp['list'][key] = {'n': bins_s.to_list(), 'total': total,
|
||||||
'p': p,
|
'p': [str(i)+'%' for i in p],
|
||||||
'title': '总体'}
|
'title': '总体'}
|
||||||
else:
|
else:
|
||||||
resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = dict()
|
resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = dict()
|
||||||
resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = {'n': bins_s.to_list(), 'total': total,
|
resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = {'n': bins_s.to_list(), 'total': total,
|
||||||
'p': p,
|
'p': [str(i)+'%' for i in p],
|
||||||
'title': '总体'}
|
'title': '总体'}
|
||||||
|
# 兼容下载功能
|
||||||
|
download = analysis.event_view.get('download', '')
|
||||||
|
if download == 1:
|
||||||
|
create_df = create_neidf(resp,columnName)
|
||||||
|
Download=Download_xlsx(create_df, '分布分析')
|
||||||
|
return Download
|
||||||
return schemas.Msg(code=0, msg='ok', data=resp)
|
return schemas.Msg(code=0, msg='ok', data=resp)
|
||||||
else:
|
else:
|
||||||
resp = {'list': {}, 'label': [],
|
resp = {'list': {}, 'label': [],
|
||||||
@ -1295,8 +1313,13 @@ async def scatter_model(
|
|||||||
sql = f"""SELECT toDate(addHours({game}.event."#event_time", 8)) AS date, count(DISTINCT {game}.event."#account_id") AS values FROM {game}.event WHERE addHours({game}.event."#event_time", 8) >= '{start_date}' AND addHours({game}.event."#event_time", 8) <= '{end_date}' AND {game}.event."#event_name" = 'create_account' GROUP BY toDate(addHours({game}.event."#event_time", 8)) ORDER BY date"""
|
sql = f"""SELECT toDate(addHours({game}.event."#event_time", 8)) AS date, count(DISTINCT {game}.event."#account_id") AS values FROM {game}.event WHERE addHours({game}.event."#event_time", 8) >= '{start_date}' AND addHours({game}.event."#event_time", 8) <= '{end_date}' AND {game}.event."#event_name" = 'create_account' GROUP BY toDate(addHours({game}.event."#event_time", 8)) ORDER BY date"""
|
||||||
df = await ckdb.query_dataframe(sql)
|
df = await ckdb.query_dataframe(sql)
|
||||||
for i in range(len(df)):
|
for i in range(len(df)):
|
||||||
resp['list'][str(df['date'][i])]['总体']['total'] = int(df['values'][i])
|
resp['list'][str(df['date'][i])]['total'] = int(df['values'][i])
|
||||||
|
# 兼容下载功能
|
||||||
|
download = analysis.event_view.get('download', '')
|
||||||
|
if download == 1:
|
||||||
|
create_df = create_neidf(resp,columnName)
|
||||||
|
Download=Download_xlsx(create_df, '分布分析')
|
||||||
|
return Download
|
||||||
return schemas.Msg(code=0, msg='ok', data=resp)
|
return schemas.Msg(code=0, msg='ok', data=resp)
|
||||||
else:
|
else:
|
||||||
return schemas.Msg(code=-9, msg='没有添加分组项', data='')
|
return schemas.Msg(code=-9, msg='没有添加分组项', data='')
|
||||||
|
@ -383,6 +383,7 @@ class BehaviorAnalysis:
|
|||||||
event_time_col = getattr(self.event_tbl.c, '#event_time')
|
event_time_col = getattr(self.event_tbl.c, '#event_time')
|
||||||
for event in self.events:
|
for event in self.events:
|
||||||
operator_ = event.get('operator_val','')
|
operator_ = event.get('operator_val','')
|
||||||
|
#排头显示名
|
||||||
event_name_display = event.get('eventNameDisplay')
|
event_name_display = event.get('eventNameDisplay')
|
||||||
is_show = event.get('is_show', True)
|
is_show = event.get('is_show', True)
|
||||||
|
|
||||||
@ -513,6 +514,22 @@ FROM {game}.event
|
|||||||
WHERE addHours({game}.event."#event_time", 8) >= '{stat_date}' AND addHours({game}.event."#event_time", 8) <= '{end_date}'
|
WHERE addHours({game}.event."#event_time", 8) >= '{stat_date}' AND addHours({game}.event."#event_time", 8) <= '{end_date}'
|
||||||
GROUP BY toDate(addHours({game}.event."#event_time", 8))) as bb on aa.date = bb.date ORDER by date
|
GROUP BY toDate(addHours({game}.event."#event_time", 8))) as bb on aa.date = bb.date ORDER by date
|
||||||
"""
|
"""
|
||||||
|
# 单独把新增付费人数(以设备为维度)拿出来
|
||||||
|
if event.get('event_attr') == '触发用户数' and ['is_new_device', 'orderid']== [i['columnName'] for i in event.get('filts')]:
|
||||||
|
stat_date=self.start_date
|
||||||
|
end_date=self.end_date
|
||||||
|
game=self.game
|
||||||
|
sql=f"""SELECT toDate(addHours("#event_time", 8)) as date,
|
||||||
|
round(uniqExact("#distinct_id"), 2) AS values FROM
|
||||||
|
(SELECT toDate(addHours("#event_time", 8)) as date,"#event_time",`#event_name`,`#distinct_id`,`#account_id` from {game}.event WHERE
|
||||||
|
addHours("#event_time", 8) >= '{stat_date}' AND addHours("#event_time", 8) <= '{end_date}'
|
||||||
|
and `#event_name` = 'pay' and orderid NOT LIKE '%GM%') a
|
||||||
|
inner join
|
||||||
|
(SELECT toDate(addHours("#event_time", 8)) as date,"#event_time",is_new_device,`#distinct_id`,`#event_name`,`#account_id` from {game}.event WHERE
|
||||||
|
addHours("#event_time", 8) >= '{stat_date}' AND addHours("#event_time", 8) <= '{end_date}' and
|
||||||
|
`#event_name` = 'create_account' and is_new_device = 1) b on a.`#distinct_id`= b.`#distinct_id` and a.date = b.date
|
||||||
|
GROUP BY toDate(addHours("#event_time", 8))"""
|
||||||
|
|
||||||
sqls.append({'sql': sql,
|
sqls.append({'sql': sql,
|
||||||
'groupby': [i.key for i in self.groupby],
|
'groupby': [i.key for i in self.groupby],
|
||||||
'date_range': self.date_range,
|
'date_range': self.date_range,
|
||||||
|
105
utils/func.py
105
utils/func.py
@ -1,7 +1,7 @@
|
|||||||
import random
|
import random
|
||||||
import time
|
import time
|
||||||
import datetime
|
import datetime
|
||||||
|
import pandas as pd
|
||||||
def get_uid():
|
def get_uid():
|
||||||
return hex(int(time.time() * 10 ** 7) + random.randint(0, 10000))[2:]
|
return hex(int(time.time() * 10 ** 7) + random.randint(0, 10000))[2:]
|
||||||
|
|
||||||
@ -47,3 +47,106 @@ def getEveryDay(begin_date,end_date):
|
|||||||
begin_date += datetime.timedelta(days=1)
|
begin_date += datetime.timedelta(days=1)
|
||||||
return date_list
|
return date_list
|
||||||
#print(getEveryDay('2016-01-01','2017-05-11'))
|
#print(getEveryDay('2016-01-01','2017-05-11'))
|
||||||
|
def Download_xlsx(df,name):
|
||||||
|
"""
|
||||||
|
下载功能
|
||||||
|
name为文件名
|
||||||
|
"""
|
||||||
|
from urllib.parse import quote
|
||||||
|
import mimetypes
|
||||||
|
from utils import DfToStream
|
||||||
|
from fastapi.responses import StreamingResponse
|
||||||
|
file_name=quote(f'{name}.xlsx')
|
||||||
|
mime = mimetypes.guess_type(file_name)[0]
|
||||||
|
df_to_stream = DfToStream((df, name))
|
||||||
|
with df_to_stream as d:
|
||||||
|
export = d.to_stream()
|
||||||
|
Download=StreamingResponse(export, media_type=mime, headers={'Content-Disposition': f'filename="{file_name}"'})
|
||||||
|
return Download
|
||||||
|
|
||||||
|
def jiange_insert(list_date):
|
||||||
|
"""
|
||||||
|
间隔1条插入一条数据插入数据
|
||||||
|
:param day: list数据
|
||||||
|
:return: list
|
||||||
|
"""
|
||||||
|
i = 1
|
||||||
|
while i <= len(list_date):
|
||||||
|
list_date.insert(i, '-')
|
||||||
|
i += 2
|
||||||
|
return list_date
|
||||||
|
|
||||||
|
def create_df(resp):
|
||||||
|
"""
|
||||||
|
分布分析外部下载功能的df数据
|
||||||
|
"""
|
||||||
|
columns = resp['label']
|
||||||
|
day = list(resp['list'].keys())
|
||||||
|
jiange_insert(day)
|
||||||
|
date = []
|
||||||
|
day_nu = 0
|
||||||
|
for i in day:
|
||||||
|
if i == '-':
|
||||||
|
av = day[day_nu - 1]
|
||||||
|
day_date = resp['list'][av]['总体']
|
||||||
|
else:
|
||||||
|
day_date = resp['list'][i]['总体']
|
||||||
|
date_dict = {}
|
||||||
|
n = 0
|
||||||
|
p = 0
|
||||||
|
if i == '-':
|
||||||
|
date_dict['事件发生时间'] = '-'
|
||||||
|
date_dict['总人数'] = '-'
|
||||||
|
for nu in range(len(columns)):
|
||||||
|
date_dict[columns[nu]] = day_date['p'][p]
|
||||||
|
p += 1
|
||||||
|
date.append(date_dict)
|
||||||
|
else:
|
||||||
|
date_dict['事件发生时间'] = i
|
||||||
|
date_dict['总人数'] = day_date['total']
|
||||||
|
for nu in range(len(columns)):
|
||||||
|
date_dict[columns[nu]] = day_date['n'][n]
|
||||||
|
n += 1
|
||||||
|
date.append(date_dict)
|
||||||
|
day_nu += 1
|
||||||
|
columns.insert(0, '总人数')
|
||||||
|
columns.insert(0, '事件发生时间')
|
||||||
|
df = pd.DataFrame(data=date, columns=columns)
|
||||||
|
return df
|
||||||
|
def create_neidf(resp,columnName):
|
||||||
|
"""
|
||||||
|
分布分析内部下载功能的df数据
|
||||||
|
"""
|
||||||
|
columns = resp['label']
|
||||||
|
day = list(resp['list'].keys())
|
||||||
|
jiange_insert(day)
|
||||||
|
date = []
|
||||||
|
day_nu = 0
|
||||||
|
for i in day:
|
||||||
|
if i == '-':
|
||||||
|
av = day[day_nu - 1]
|
||||||
|
day_date = resp['list'][av]
|
||||||
|
else:
|
||||||
|
day_date = resp['list'][i]
|
||||||
|
date_dict = {}
|
||||||
|
n = 0
|
||||||
|
p = 0
|
||||||
|
if i == '-':
|
||||||
|
date_dict[columnName] = '-'
|
||||||
|
date_dict['全部用户数'] = '-'
|
||||||
|
for nu in range(len(columns)):
|
||||||
|
date_dict[columns[nu]] = day_date['p'][p]
|
||||||
|
p += 1
|
||||||
|
date.append(date_dict)
|
||||||
|
else:
|
||||||
|
date_dict[columnName] = i
|
||||||
|
date_dict['全部用户数'] = day_date['total']
|
||||||
|
for nu in range(len(columns)):
|
||||||
|
date_dict[columns[nu]] = day_date['n'][n]
|
||||||
|
n += 1
|
||||||
|
date.append(date_dict)
|
||||||
|
day_nu += 1
|
||||||
|
columns.insert(0, '全部用户数')
|
||||||
|
columns.insert(0, columnName)
|
||||||
|
df = pd.DataFrame(data=date, columns=columns)
|
||||||
|
return df
|
Loading…
Reference in New Issue
Block a user