From 51d899c9d18364f6bd27383a589247650641d805 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E4=BC=9F?= <250213850@qq.com>
Date: Thu, 28 Apr 2022 15:44:44 +0800
Subject: [PATCH] =?UTF-8?q?1.=E6=96=B0=E5=A2=9E=E5=88=86=E5=B8=83=E5=88=86?=
 =?UTF-8?q?=E6=9E=90=E4=B8=8B=E8=BD=BD=E5=8A=9F=E8=83=BD=202.=E4=BC=98?=
 =?UTF-8?q?=E5=8C=96=E6=96=B0=E5=A2=9E=E4=BB=98=E8=B4=B9=E4=BA=BA=E6=95=B0?=
 =?UTF-8?q?=E8=AE=A1=E7=AE=97=E5=85=AC=E5=BC=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 api/api_v1/endpoints/query.py |  51 +++++++++++-----
 models/behavior_analysis.py   |  17 ++++++
 utils/func.py                 | 107 +++++++++++++++++++++++++++++++++-
 3 files changed, 159 insertions(+), 16 deletions(-)

diff --git a/api/api_v1/endpoints/query.py b/api/api_v1/endpoints/query.py
index 3ea8ede..ab67d70 100644
--- a/api/api_v1/endpoints/query.py
+++ b/api/api_v1/endpoints/query.py
@@ -21,7 +21,7 @@ from db.redisdb import get_redis_pool, RedisDrive
 from models.behavior_analysis import BehaviorAnalysis, CombinationEvent
 from models.user_analysis import UserAnalysis
 from models.x_analysis import XAnalysis
-from utils import DfToStream, getEveryDay
+from utils import DfToStream, getEveryDay, Download_xlsx,jiange_insert,create_df,create_neidf
 
 router = APIRouter()
 
@@ -1036,7 +1036,7 @@ async def scatter_model(
                 }
 
         if not quota_interval_arr:
-            resp['label'] = [f'[{i},{i + interval})' for i in range(min_v, max_v, interval)]
+            resp['label'] = [f'[{i},{i + interval})' for i in range(min_v,  max_v, interval)]
             bins = [i for i in range(min_v, max_v + interval, interval)]
         else:
             quota_interval_arr = [-float('inf')] + quota_interval_arr + [float('inf')]
@@ -1054,13 +1054,15 @@ async def scatter_model(
             total = int(bins_s.sum())
             if res['time_particle'] == 'total':
                 resp['list']['合计'] = dict()
+                p = list(round(bins_s * 100 / total, 2).to_list())
                 resp['list']['合计']['总体'] = {'n': bins_s.to_list(), 'total': total,
-                                            'p': round(bins_s * 100 / total, 2).to_list(),
+                                            'p': [str(i)+'%' for i in p],
                                             'title': '总体'}
             else:
+                p=list(round(bins_s * 100 / total, 2).to_list())
                 resp['list'][key.strftime('%Y-%m-%d')] = dict()
                 resp['list'][key.strftime('%Y-%m-%d')]['总体'] = {'n': bins_s.to_list(), 'total': total,
-                                                                'p': round(bins_s * 100 / total, 2).to_list(),
+                                                                'p':[str(i)+'%' for i in p],
                                                                 'title': '总体'}
         # 分组的
         # if groupby:
@@ -1076,6 +1078,11 @@ async def scatter_model(
         #                                                                      2).to_list(),
         #                                                           'title': title
         #                                                           }
+        download=analysis.event_view.get('download','')
+        if download == 1:
+            creat_df = create_df(resp)
+            Download=Download_xlsx(creat_df, '分布分析')
+            return Download
         return schemas.Msg(code=0, msg='ok', data=resp)
 
     # elif analysis == 'number_of_days':
@@ -1137,9 +1144,12 @@ async def scatter_model(
             df = await ckdb.query_dataframe(sql)
             for i in range(len(df)):
                 resp['list'][str(df['date'][i])]['总体']['total']=int(df['values'][i])
-
-
-
+        #兼容下载功能
+        download=analysis.event_view.get('download','')
+        if download == 1:
+            creat_df=create_df(resp)
+            Download=Download_xlsx(creat_df,'分布分析')
+            return Download
         return schemas.Msg(code=0, msg='ok', data=resp)
 
         # bins_s = pd.cut(tmp_df['values'], bins=bins,
@@ -1206,16 +1216,19 @@ async def scatter_model(
                 for i, v in enumerate(quota_interval_arr[1:]):
                     resp['label'].append(f'[{quota_interval_arr[i]},{v})')
                     bins.append(v)
-
+            if 'float' in str(df.dtypes['va']):
+                df['va'] = df['va'].astype(int)
             # 这是分组的
             for key, tmp_df in df.groupby('va'):
+
                 bins_s = pd.cut(tmp_df['values'], bins=bins,
                                 right=False).value_counts()
                 bins_s.sort_index(inplace=True)
                 total = int(bins_s.sum())
                 if res['time_particle'] == 'total':
                     resp['list']['合计'] = dict()
-                    resp['list']['合计']['总体'] = {'n': bins_s.to_list(), 'total': total,
+
+                    resp['list']['合计'] = {'n': bins_s.to_list(), 'total': total,
                                                 'p': round(bins_s * 100 / total, 2).to_list(),
                                                 'title': '总体'}
                 else:
@@ -1226,14 +1239,19 @@ async def scatter_model(
                     if 'time' not in columnName:
                         resp['list'][key] = dict()
                         resp['list'][key] = {'n': bins_s.to_list(), 'total': total,
-                                                    'p': p,
+                                                    'p': [str(i)+'%' for i in p],
                                                     'title': '总体'}
                     else:
                         resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = dict()
                         resp['list'][key.strftime('%Y-%m-%d %H:%M:%S')] = {'n': bins_s.to_list(), 'total': total,
-                                                                        'p': p,
+                                                                        'p': [str(i)+'%' for i in p],
                                                                         'title': '总体'}
-
+            # 兼容下载功能
+            download = analysis.event_view.get('download', '')
+            if download == 1:
+                create_df = create_neidf(resp,columnName)
+                Download=Download_xlsx(create_df, '分布分析')
+                return Download
             return schemas.Msg(code=0, msg='ok', data=resp)
         else:
             resp = {'list': {}, 'label': [],
@@ -1295,8 +1313,13 @@ async def scatter_model(
                 sql = f"""SELECT toDate(addHours({game}.event."#event_time", 8)) AS date, count(DISTINCT {game}.event."#account_id") AS values FROM {game}.event WHERE addHours({game}.event."#event_time", 8) >= '{start_date}' AND addHours({game}.event."#event_time", 8) <= '{end_date}' AND {game}.event."#event_name" = 'create_account' GROUP BY toDate(addHours({game}.event."#event_time", 8)) ORDER BY date"""
                 df = await ckdb.query_dataframe(sql)
                 for i in range(len(df)):
-                    resp['list'][str(df['date'][i])]['总体']['total'] = int(df['values'][i])
-
+                    resp['list'][str(df['date'][i])]['total'] = int(df['values'][i])
+            # 兼容下载功能
+            download = analysis.event_view.get('download', '')
+            if download == 1:
+                create_df = create_neidf(resp,columnName)
+                Download=Download_xlsx(create_df, '分布分析')
+                return Download
             return schemas.Msg(code=0, msg='ok', data=resp)
     else:
         return schemas.Msg(code=-9, msg='没有添加分组项', data='')
diff --git a/models/behavior_analysis.py b/models/behavior_analysis.py
index fc4b05d..29b6a57 100644
--- a/models/behavior_analysis.py
+++ b/models/behavior_analysis.py
@@ -383,6 +383,7 @@ class BehaviorAnalysis:
         event_time_col = getattr(self.event_tbl.c, '#event_time')
         for event in self.events:
             operator_ = event.get('operator_val','')
+            #排头显示名
             event_name_display = event.get('eventNameDisplay')
             is_show = event.get('is_show', True)
 
@@ -513,6 +514,22 @@ FROM {game}.event
 WHERE addHours({game}.event."#event_time", 8) >= '{stat_date}' AND addHours({game}.event."#event_time", 8) <= '{end_date}' 
 GROUP BY toDate(addHours({game}.event."#event_time", 8))) as bb on aa.date = bb.date ORDER by date
                 """
+            # 单独把新增付费人数（以设备为维度）拿出来
+            if event.get('event_attr') == '触发用户数' and ['is_new_device', 'orderid']== [i['columnName'] for i in event.get('filts')]:
+                stat_date=self.start_date
+                end_date=self.end_date
+                game=self.game
+                sql=f"""SELECT toDate(addHours("#event_time", 8)) as date,
+round(uniqExact("#distinct_id"), 2) AS values FROM
+(SELECT toDate(addHours("#event_time", 8)) as date,"#event_time",`#event_name`,`#distinct_id`,`#account_id` from {game}.event WHERE 
+addHours("#event_time", 8) >= '{stat_date}' AND addHours("#event_time", 8) <= '{end_date}'
+and `#event_name` = 'pay' and orderid NOT LIKE '%GM%') a
+inner join
+(SELECT toDate(addHours("#event_time", 8)) as date,"#event_time",is_new_device,`#distinct_id`,`#event_name`,`#account_id` from {game}.event WHERE 
+addHours("#event_time", 8) >= '{stat_date}' AND addHours("#event_time", 8) <= '{end_date}' and
+`#event_name` = 'create_account' and is_new_device = 1) b on a.`#distinct_id`= b.`#distinct_id` and a.date = b.date
+GROUP BY toDate(addHours("#event_time", 8))"""
+
             sqls.append({'sql': sql,
                          'groupby': [i.key for i in self.groupby],
                          'date_range': self.date_range,
diff --git a/utils/func.py b/utils/func.py
index 5f001ad..8dee789 100644
--- a/utils/func.py
+++ b/utils/func.py
@@ -1,7 +1,7 @@
 import random
 import time
 import datetime
-
+import pandas as pd
 def get_uid():
     return hex(int(time.time() * 10 ** 7) + random.randint(0, 10000))[2:]
 
@@ -46,4 +46,107 @@ def getEveryDay(begin_date,end_date):
         date_list.append(date_str)
         begin_date += datetime.timedelta(days=1)
     return date_list
-#print(getEveryDay('2016-01-01','2017-05-11'))
\ No newline at end of file
+#print(getEveryDay('2016-01-01','2017-05-11'))
+def Download_xlsx(df,name):
+    """
+    下载功能
+    name为文件名
+    """
+    from urllib.parse import quote
+    import mimetypes
+    from utils import DfToStream
+    from fastapi.responses import StreamingResponse
+    file_name=quote(f'{name}.xlsx')
+    mime = mimetypes.guess_type(file_name)[0]
+    df_to_stream = DfToStream((df, name))
+    with df_to_stream as d:
+        export = d.to_stream()
+    Download=StreamingResponse(export, media_type=mime, headers={'Content-Disposition': f'filename="{file_name}"'})
+    return Download
+
+def jiange_insert(list_date):
+    """
+    间隔1条插入一条数据插入数据
+    :param day: list数据
+    :return: list
+    """
+    i = 1
+    while i <= len(list_date):
+        list_date.insert(i, '-')
+        i += 2
+    return list_date
+
+def create_df(resp):
+    """
+    分布分析外部下载功能的df数据
+    """
+    columns = resp['label']
+    day = list(resp['list'].keys())
+    jiange_insert(day)
+    date = []
+    day_nu = 0
+    for i in day:
+        if i == '-':
+            av = day[day_nu - 1]
+            day_date = resp['list'][av]['总体']
+        else:
+            day_date = resp['list'][i]['总体']
+        date_dict = {}
+        n = 0
+        p = 0
+        if i == '-':
+            date_dict['事件发生时间'] = '-'
+            date_dict['总人数'] = '-'
+            for nu in range(len(columns)):
+                date_dict[columns[nu]] = day_date['p'][p]
+                p += 1
+            date.append(date_dict)
+        else:
+            date_dict['事件发生时间'] = i
+            date_dict['总人数'] = day_date['total']
+            for nu in range(len(columns)):
+                date_dict[columns[nu]] = day_date['n'][n]
+                n += 1
+            date.append(date_dict)
+        day_nu += 1
+    columns.insert(0, '总人数')
+    columns.insert(0, '事件发生时间')
+    df = pd.DataFrame(data=date, columns=columns)
+    return df
+def create_neidf(resp,columnName):
+    """
+    分布分析内部下载功能的df数据
+    """
+    columns = resp['label']
+    day = list(resp['list'].keys())
+    jiange_insert(day)
+    date = []
+    day_nu = 0
+    for i in day:
+        if i == '-':
+            av = day[day_nu - 1]
+            day_date = resp['list'][av]
+        else:
+            day_date = resp['list'][i]
+        date_dict = {}
+        n = 0
+        p = 0
+        if i == '-':
+            date_dict[columnName] = '-'
+            date_dict['全部用户数'] = '-'
+            for nu in range(len(columns)):
+                date_dict[columns[nu]] = day_date['p'][p]
+                p += 1
+            date.append(date_dict)
+        else:
+            date_dict[columnName] = i
+            date_dict['全部用户数'] = day_date['total']
+            for nu in range(len(columns)):
+                date_dict[columns[nu]] = day_date['n'][n]
+                n += 1
+            date.append(date_dict)
+        day_nu += 1
+    columns.insert(0, '全部用户数')
+    columns.insert(0, columnName)
+    df = pd.DataFrame(data=date, columns=columns)
+    return df
\ No newline at end of file