From 701aa6f54538c222d855060c92c9b52a980c0783 Mon Sep 17 00:00:00 2001 From: kf_wuhao <15392746632@qq.com> Date: Wed, 23 Dec 2020 15:06:44 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E7=9B=AE=E5=BD=95=E7=BB=93?= =?UTF-8?q?=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.json | 5 ++++ db/__init__.py | 1 - main.py | 4 ++- model/__init__.py | 2 ++ model/field_type.py | 20 +++++++++++++++ {db => model}/model.py | 16 +----------- task/first_recharge.py | 9 ++++--- task/repair_gunfu.py | 58 ++++++++++++++++++++++++++++++++++++++++++ task/summary_func.py | 8 +++--- utils/__init__.py | 1 - utils/field_type.py | 4 --- 11 files changed, 98 insertions(+), 30 deletions(-) create mode 100644 model/__init__.py create mode 100644 model/field_type.py rename {db => model}/model.py (79%) create mode 100644 task/repair_gunfu.py delete mode 100644 utils/field_type.py diff --git a/config.json b/config.json index 22aff81..eea850a 100644 --- a/config.json +++ b/config.json @@ -8,5 +8,10 @@ "source_coll": "paylist", "dest_coll": "user", "task_name": "first_recharge" + }, + "repair_gunfu": { + "source_coll": "user", + "dest_coll": "user", + "task_name": "repair_gunfu" } } \ No newline at end of file diff --git a/db/__init__.py b/db/__init__.py index 40bb84b..ca088a9 100644 --- a/db/__init__.py +++ b/db/__init__.py @@ -1,6 +1,5 @@ import pymongo -from .model import GBaseModel from settings import settings diff --git a/main.py b/main.py index 0ffdea9..fd5efb3 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,5 @@ import json +import os import sys from multiprocessing import Pool @@ -25,10 +26,11 @@ def run_task(kwargs): if __name__ == '__main__': # eg: summary_func 0 0 # eg: first_recharge 0 0 + # eg: repair_gunfu 0 0 task_name, st, et = sys.argv[1:] st, et = int(st), int(et) game_list = get_game() - with open('config.json', 'r', encoding='utf8') as f: + with open(os.path.join(settings.ROOT_DIR, 'config.json'), 'r', encoding='utf8') as f: task_conf = json.load(f) params = [] for item in game_list: diff --git a/model/__init__.py b/model/__init__.py new file mode 100644 index 0000000..9883b4a --- /dev/null +++ b/model/__init__.py @@ -0,0 +1,2 @@ +from .field_type import (IntStr, IntFloat, MdbObjectId) +from .model import (GBaseModel, ) diff --git a/model/field_type.py b/model/field_type.py new file mode 100644 index 0000000..2df5e3c --- /dev/null +++ b/model/field_type.py @@ -0,0 +1,20 @@ +from typing import TypeVar +from bson.objectid import ObjectId + +IntStr = TypeVar('IntStr', int, str) +IntFloat = TypeVar('IntFloat', int, float) + + +class MdbObjectId(ObjectId): + @classmethod + def __get_validators__(cls): + yield cls.validate + + @classmethod + def validate(cls, v): + try: + res = ObjectId(v) + except: + raise TypeError('不能装换为 ObjectId') + else: + return res diff --git a/db/model.py b/model/model.py similarity index 79% rename from db/model.py rename to model/model.py index 733a4f1..2cd221c 100644 --- a/db/model.py +++ b/model/model.py @@ -1,20 +1,6 @@ from pydantic import BaseModel, Field -from bson.objectid import ObjectId - -class MdbObjectId(ObjectId): - @classmethod - def __get_validators__(cls): - yield cls.validate - - @classmethod - def validate(cls, v): - try: - res = ObjectId(v) - except: - raise TypeError('不能装换为 ObjectId') - else: - return res +from model.field_type import MdbObjectId class GBaseModel(BaseModel): diff --git a/task/first_recharge.py b/task/first_recharge.py index 20c8622..e1bcfe2 100644 --- a/task/first_recharge.py +++ b/task/first_recharge.py @@ -1,9 +1,10 @@ from pymongo import UpdateOne -from pydantic import BaseModel, Field, validator +from pydantic import BaseModel, Field import pandas as pd from .task import Task from utils import * +from model import IntStr, IntFloat class FirstRecharge(Task): @@ -27,10 +28,10 @@ class FirstRecharge(Task): def cleaning(self, cursor_list): for cursor in cursor_list: # type:dict - for event_coll, ts in cursor.items(): # type:str,dict + for source_coll, ts in cursor.items(): # type:str,dict if ts['cursor_st'] == ts['cursor_et']: continue - logger.info(f'开始处理{self.game_name} 处理 {event_coll} 游标 {ts}') + logger.info(f'开始处理{self.game_name} 处理 {source_coll} 游标 {ts}') where = { '_event_time': { '$gte': ts['cursor_st'], @@ -40,7 +41,7 @@ class FirstRecharge(Task): projection = self.Model.get_fields() bulk_data = [] - for item in self.local_db[event_coll].find(where, projection): + for item in self.local_db[source_coll].find(where, projection): try: item['cdate'] = int(pd.Timestamp(ts['cursor_st'], unit='s', tz=self.timezone) \ .normalize().timestamp()) diff --git a/task/repair_gunfu.py b/task/repair_gunfu.py new file mode 100644 index 0000000..50916c6 --- /dev/null +++ b/task/repair_gunfu.py @@ -0,0 +1,58 @@ +import pymongo +from pymongo import UpdateOne +from pydantic import BaseModel, Field + +from .task import Task +from utils import * +from model import MdbObjectId + + +class RepairGunfu(Task): + """ + 补充滚服 + """ + + class Model(BaseModel): + id: MdbObjectId = Field(None, title='_id', alias='_id') + device_id: str = Field(..., title='设备id', alias='_device_id') + + @classmethod + def get_fields(cls): + return [v.alias for v in cls.__fields__.values()] + + def cleaning(self, cursor_list): + for cursor in cursor_list: # type:dict + for source_coll, ts in cursor.items(): # type:str,dict + if ts['cursor_st'] == ts['cursor_et']: + continue + logger.info(f'开始处理{self.game_name} 处理 {source_coll} 游标 {ts}') + where = { + 'role_create_time': { + '$gte': ts['cursor_st'], + '$lt': ts['cursor_et'], + }, + 'gunfu_num': {'$exists': False} + } + + projection = self.Model.get_fields() + bulk_data = [] + for item in self.local_db[source_coll].find(where, projection).sort('role_create_time', + pymongo.ASCENDING): + try: + # 新角色 + model = self.Model(**item) + device_id = model.device_id + # 查找该设备所有滚服角色 + role_cnt = self.local_db[source_coll].count( + {'_device_id': device_id, 'gunfu_num': {'$exists': True}}) + + bulk_data.append( + UpdateOne({'_id': model.id}, + {'$set': {'gunfu_num': role_cnt + 1}}, upsert=True)) + except Exception as e: + logger.error(f'msg:{e}') + # pass + if bulk_data: + self.remote_db[self.dest_coll].bulk_write(bulk_data, ordered=False) + self.local_db[self.dest_coll].bulk_write(bulk_data, ordered=False) + self.set_cursor(cursor_st=ts['cursor_st'], cursor_et=ts['cursor_et']) diff --git a/task/summary_func.py b/task/summary_func.py index d8d94b8..f43c17b 100644 --- a/task/summary_func.py +++ b/task/summary_func.py @@ -4,7 +4,7 @@ import pandas as pd from .task import Task from utils import * -from db import GBaseModel +from model import GBaseModel class SummaryFunc(Task): @@ -21,10 +21,10 @@ class SummaryFunc(Task): def cleaning(self, cursor_list): for cursor in cursor_list: # type:dict - for event_coll, ts in cursor.items(): # type:str,dict + for source_coll, ts in cursor.items(): # type:str,dict if ts['cursor_st'] == ts['cursor_et']: continue - logger.info(f'开始处理{self.game_name} 处理 {event_coll} 游标 {ts}') + logger.info(f'开始处理{self.game_name} 处理 {source_coll} 游标 {ts}') where = { '_event_name': 'Func', '_event_time': { @@ -35,7 +35,7 @@ class SummaryFunc(Task): projection = self.Model.get_fields() bulk_data = [] - for item in self.local_db[event_coll].find(where, projection): + for item in self.local_db[source_coll].find(where, projection): try: item['cdate'] = int(pd.Timestamp(item['_event_time'], unit='s', tz=self.timezone) \ .normalize().timestamp()) diff --git a/utils/__init__.py b/utils/__init__.py index 34150bf..80011e8 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -1,5 +1,4 @@ from loguru import logger -from .field_type import * logger.add('/data/log/data_cleaning/log.log', format="{time} {level} {name}:{line} {message}", level="INFO", rotation="100 MB", retention='7 days', diff --git a/utils/field_type.py b/utils/field_type.py deleted file mode 100644 index 0dc3e1e..0000000 --- a/utils/field_type.py +++ /dev/null @@ -1,4 +0,0 @@ -from typing import TypeVar - -IntStr = TypeVar('IntStr', int, str) -IntFloat = TypeVar('IntFloat', int, float)