From 4934372b42ebc02e19a1417169efa6f0c4d6fad6 Mon Sep 17 00:00:00 2001 From: wuhao <15392746632@qq.com> Date: Fri, 11 Dec 2020 10:51:07 +0800 Subject: [PATCH] 'inti' --- .gitignore | 2 +- db/__init__.py | 14 +++++ db/model.py | 89 +++++++++++++++++++++++++++ main.py | 37 ++++++++++++ settings.py | 15 +++++ task/__init__.py | 0 task/summary_func.py | 42 +++++++++++++ task/task.py | 139 +++++++++++++++++++++++++++++++++++++++++++ utils/__init__.py | 5 ++ 9 files changed, 342 insertions(+), 1 deletion(-) create mode 100644 db/__init__.py create mode 100644 db/model.py create mode 100644 main.py create mode 100644 settings.py create mode 100644 task/__init__.py create mode 100644 task/summary_func.py create mode 100644 task/task.py create mode 100644 utils/__init__.py diff --git a/.gitignore b/.gitignore index 13d1490..927f0b2 100644 --- a/.gitignore +++ b/.gitignore @@ -128,4 +128,4 @@ dmypy.json # Pyre type checker .pyre/ - +.idea/ diff --git a/db/__init__.py b/db/__init__.py new file mode 100644 index 0000000..40bb84b --- /dev/null +++ b/db/__init__.py @@ -0,0 +1,14 @@ +import pymongo + +from .model import GBaseModel +from settings import settings + + +def get_local_db(db_name): + db_client = pymongo.MongoClient(settings.local_mongo_uri) + return db_client[db_name] + + +def get_remote_db(db_name): + db_client = pymongo.MongoClient(settings.remote_mongo_uri) + return db_client[db_name] diff --git a/db/model.py b/db/model.py new file mode 100644 index 0000000..d6c0f07 --- /dev/null +++ b/db/model.py @@ -0,0 +1,89 @@ +from typing import Union, Any + +from pydantic import BaseModel, Field +from bson.objectid import ObjectId + + +class MdbObjectId(ObjectId): + @classmethod + def __get_validators__(cls): + yield cls.validate + + @classmethod + def validate(cls, v): + if not isinstance(v, ObjectId): + raise TypeError('ObjectId required') + return v + + +class GBaseModel(BaseModel): + """ + 字段名与保护变量命名冲突,将 _ 前缀变为后缀,读出再还原 + """ + id_: MdbObjectId = Field(..., title="平台") + platform_: str = Field(None, title="平台") + channel_name_: str = Field(None, title="channel") + owner_name_: str = Field(None, title="owner") + channel_uid_: str = Field(None, title="channel_uid") + device_id_: str = Field(None, title='device_id') + district_server_id_: int = Field(None, title="区服id") + game_role_id_: str = Field(None, title="角色id") + event_time_: int = Field(..., title="事件时间") + role_create_time: int = Field(None, title="角色创建时间") + role_level: int = Field(None, title="角色等级") + role_vip: int = Field(None, title="角色vip等级") + + def __init__(self, **data: Any): + if isinstance(data.get('_id'), str) and len(data['_id']) == 24: + data['_id'] = ObjectId(data['_id']) + + new_data = {} + for k, v in data.items(): # type:str,Any + if k.startswith('_'): + new_k = k[1:] + k[0] + new_data[new_k] = v + else: + new_data[k] = v + + super().__init__(**new_data) + + def dict( + self, + *, + include: Union['AbstractSetIntStr', 'MappingIntStrAny'] = None, + exclude: Union['AbstractSetIntStr', 'MappingIntStrAny'] = None, + by_alias: bool = False, + skip_defaults: bool = None, + exclude_unset: bool = False, + exclude_defaults: bool = False, + exclude_none: bool = False, + ) -> 'DictStrAny': + data = super().dict() + + new_data = {} + for k, v in data.items(): # type:str,Any + if k.endswith('_'): + new_k = k[-1] + k[:-1] + new_data[new_k] = v + else: + new_data[k] = v + return new_data + + class Config: + arbitrary_types_allowed = True + + @classmethod + def get_fields(cls): + fields = [] + for k in cls.__fields__: + if k.endswith('_'): + fields.append(k[-1] + k[:-1]) + else: + fields.append(k) + return fields + + +if __name__ == '__main__': + obj = GBaseModel(_id="5fd0f4812de17aeba6c1a374", role_level='2', aaa=123, _platform=13566) + print(obj.dict()) + print(obj.role_level) diff --git a/main.py b/main.py new file mode 100644 index 0000000..8f50d67 --- /dev/null +++ b/main.py @@ -0,0 +1,37 @@ +import sys +from multiprocessing import Pool + +from importlib import import_module + +from db import * + + +def get_game() -> list: + local_db = get_local_db('admin_game') + games = list(local_db['game'].find()) + return games + + +def run_task(kwargs): + module_name = kwargs.get('task_name') + class_name = ''.join([s.capitalize() for s in task_name.split('_')]) + module = import_module(f'.{module_name}', package='task') + c_obj = getattr(module, class_name) + obj = c_obj(**kwargs) + obj.run() + + +if __name__ == '__main__': + # eg: summary_func 0 0 + task_name, st, et = sys.argv[1:] + st, et = int(st), int(et) + game_list = get_game() + params = [{'game_name': item['id_name'], + 'task_name': task_name, + 'timezone': item.get('timezone', 'Asia/Shanghai'), + 'st': st, + 'et': et + } + for item in game_list] + with Pool(len(game_list)) as p: + p.map(run_task, params) diff --git a/settings.py b/settings.py new file mode 100644 index 0000000..b32f891 --- /dev/null +++ b/settings.py @@ -0,0 +1,15 @@ +class Config: + local_mongo_uri = 'mongodb://root:iamciniao@127.0.0.1:27017/?authSource=admin&readPreference=primary&ssl=false' + + remote_mongo_uri = 'mongodb://root:Legu2020@dds-hp35c58764c35aa41188-pub.mongodb.huhehaote.rds.aliyuncs.com:3717,dds-hp35c58764c35aa42908-pub.mongodb.huhehaote.rds.aliyuncs.com:3717/admin?replicaSet=mgset-421510732' + + +class Production(Config): + DB_PREFIX = 'game' + + +class Debug(Config): + DB_PREFIX = 'debug' + + +settings = Production diff --git a/task/__init__.py b/task/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/task/summary_func.py b/task/summary_func.py new file mode 100644 index 0000000..0c0299a --- /dev/null +++ b/task/summary_func.py @@ -0,0 +1,42 @@ +from pymongo import UpdateOne +from pydantic import Field + +from .task import Task +from utils import * +from db import GBaseModel + + +class SummaryFunc(Task): + """ + 功能分析 + """ + + class Model(GBaseModel): + prize: list = Field(None, title='奖励') + need: list = Field(None, title='消耗') + ftype: str = Field(..., title='功能') + data: dict = Field(None, title='功能数据') + + def cleaning(self, cursor_list): + for cursor in cursor_list: # type:dict + for event_coll, ts in cursor.items(): # type:str,dict + logger.info(f'开始处理{self.game_name} 处理 {event_coll} ...') + where = { + '_event_name': 'Func', + '_event_time': { + '$gte': ts['cursor_st'], + '$lt': ts['cursor_et'], + } + } + + projection = self.Model.get_fields() + bulk_data = [] + for item in self.local_db[event_coll].find(where, projection): + try: + model = self.Model(**item) + data = model.dict() + bulk_data.append(UpdateOne({'_id': data['_id']}, {'$set': data}, upsert=True)) + except Exception as e: + logger.error(e) + self.remote_db[self.task_name].bulk_write(bulk_data, ordered=False) + self.set_cursor(cursor_st=ts['cursor_st'], cursor_et=ts['cursor_et']) diff --git a/task/task.py b/task/task.py new file mode 100644 index 0000000..3a7eeb3 --- /dev/null +++ b/task/task.py @@ -0,0 +1,139 @@ +import abc +import time + +import pandas as pd +import numpy as np + +from db import * +from utils import * + +"""" +考虑游标过大,切分为30分钟块写入 +采用批量无序操作提高写性能 bulk_write +""" + + +class Task(metaclass=abc.ABCMeta): + def __init__(self, *args, **kwargs): + self.game_name = kwargs.get('game_name') + self.game_db = f'game_{self.game_name}' + self.cursor_st = kwargs.get('st') + self.cursor_et = kwargs.get('et') + self.timezone = kwargs.get('timezone') + self.task_name = kwargs.get("task_name") + self.local_db = get_local_db(self.game_db) + self.remote_db = get_remote_db(self.game_db) + self.task_coll = self.local_db['task2'] + self.task_where = { + 'name': self.task_name + } + self.task_info = self.get_task_info() + + def get_task_info(self): + task_info = self.task_coll.find_one(self.task_where) or {} + return task_info + + def check_run(self) -> bool: + is_run = self.task_info.get('is_run') + last_ts = self.task_info.get('run_ts', 0) + time_out = self.task_info.get('time_out', 86400) + if not last_ts and not is_run: + # 第一次运行 + return True + if not is_run: + # 可以运行 + return True + elif int(time.time()) - last_ts > time_out: + # 任务超时 + # todo 钉钉通知 + logger.info('钉钉通知') + return False + else: + # 正在运行没超时 + logger.info('正在运行没超时') + return False + + def set_run_ts(self): + self.task_coll.update_one(self.task_where, { + '$set': {'run_ts': int(time.time())} + }, upsert=True) + + def get_cursor(self): + """ + 没有手动设置游标,从taskinfo接着上次执行 + 任务第一次执行取当天0点 + :return: + """ + if not self.cursor_st: + self.cursor_st = self.task_info.get('cursor_et') + + if not self.cursor_st: + self.cursor_st = int(pd.Timestamp(time.time(), unit='s', tz=self.timezone).normalize().timestamp()) + + if not self.cursor_et: + self.cursor_et = int(time.time()) + + def set_cursor(self, **kwargs): + """ + 本次任务完成设置游标 + :return: None + """ + if kwargs and set(kwargs) > {'cursor_et', 'cursor_st'}: + raise ValueError('设置游标不合理') + self.task_coll.update_one(self.task_where, { + '$set': kwargs}, upsert=True) + + def get_event_coll(self) -> list: + """ + 根据游标时间戳 返回要处理的集合 + :return: [{'event_2020-12-10': {'cursor_st': 1607608848, 'cursor_et': 1607610648}}, {'event_2020-12-10': {'cursor_st': 1607610648, 'cursor_et': 1607610791}}] + """ + date_index = pd.date_range(pd.Timestamp(self.cursor_st, unit='s', tz=self.timezone), + pd.Timestamp(self.cursor_et, unit='s', tz=self.timezone), freq='30T') + df = pd.DataFrame(index=date_index) + df['st'] = df.index + df['et'] = np.append(df.index[1:], [pd.Timestamp(self.cursor_et, unit='s', tz=self.timezone)]) + df['event_coll_s'] = df['st'].apply(lambda x: f'event_{x.date().strftime("%Y-%m-%d")}') + df['event_coll_e'] = df['et'].apply(lambda x: f'event_{x.date().strftime("%Y-%m-%d")}') + cursor_list = [] + for k, item in df.T.items(): + data = {} + if item['event_coll_s'] != item['event_coll_e']: + data[item['event_coll_s']] = { + 'cursor_st': int(item['st'].timestamp()), + 'cursor_et': int(item['et'].normalize().timestamp()), + } + data[item['event_coll_e']] = { + 'cursor_st': int(item['et'].normalize().timestamp()), + 'cursor_et': int(item['et'].timestamp()), + } + else: + data[item['event_coll_s']] = { + 'cursor_st': int(item['st'].timestamp()), + 'cursor_et': int(item['et'].timestamp()), + } + + cursor_list.append(data) + return cursor_list + + def set_run_status(self, status: bool): + """ + 设置运行状态 + :param status: + :return: + """ + self.task_coll.update_one(self.task_where, {'$set': {'is_run': status}}, upsert=True) + + @abc.abstractmethod + def cleaning(self, cursor_list): + pass + + def run(self): + if not self.check_run(): + return '运行中...' + self.set_run_ts() + self.set_run_status(True) + self.get_cursor() + cursor_list = self.get_event_coll() + self.cleaning(cursor_list) + self.set_run_status(False) diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..80011e8 --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1,5 @@ +from loguru import logger + +logger.add('/data/log/data_cleaning/log.log', format="{time} {level} {name}:{line} {message}", level="INFO", + rotation="100 MB", retention='7 days', + enqueue=True)