'inti'
This commit is contained in:
parent
a37ea1e69b
commit
4934372b42
2
.gitignore
vendored
2
.gitignore
vendored
@ -128,4 +128,4 @@ dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
.idea/
|
||||
|
14
db/__init__.py
Normal file
14
db/__init__.py
Normal file
@ -0,0 +1,14 @@
|
||||
import pymongo
|
||||
|
||||
from .model import GBaseModel
|
||||
from settings import settings
|
||||
|
||||
|
||||
def get_local_db(db_name):
|
||||
db_client = pymongo.MongoClient(settings.local_mongo_uri)
|
||||
return db_client[db_name]
|
||||
|
||||
|
||||
def get_remote_db(db_name):
|
||||
db_client = pymongo.MongoClient(settings.remote_mongo_uri)
|
||||
return db_client[db_name]
|
89
db/model.py
Normal file
89
db/model.py
Normal file
@ -0,0 +1,89 @@
|
||||
from typing import Union, Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from bson.objectid import ObjectId
|
||||
|
||||
|
||||
class MdbObjectId(ObjectId):
|
||||
@classmethod
|
||||
def __get_validators__(cls):
|
||||
yield cls.validate
|
||||
|
||||
@classmethod
|
||||
def validate(cls, v):
|
||||
if not isinstance(v, ObjectId):
|
||||
raise TypeError('ObjectId required')
|
||||
return v
|
||||
|
||||
|
||||
class GBaseModel(BaseModel):
|
||||
"""
|
||||
字段名与保护变量命名冲突,将 _ 前缀变为后缀,读出再还原
|
||||
"""
|
||||
id_: MdbObjectId = Field(..., title="平台")
|
||||
platform_: str = Field(None, title="平台")
|
||||
channel_name_: str = Field(None, title="channel")
|
||||
owner_name_: str = Field(None, title="owner")
|
||||
channel_uid_: str = Field(None, title="channel_uid")
|
||||
device_id_: str = Field(None, title='device_id')
|
||||
district_server_id_: int = Field(None, title="区服id")
|
||||
game_role_id_: str = Field(None, title="角色id")
|
||||
event_time_: int = Field(..., title="事件时间")
|
||||
role_create_time: int = Field(None, title="角色创建时间")
|
||||
role_level: int = Field(None, title="角色等级")
|
||||
role_vip: int = Field(None, title="角色vip等级")
|
||||
|
||||
def __init__(self, **data: Any):
|
||||
if isinstance(data.get('_id'), str) and len(data['_id']) == 24:
|
||||
data['_id'] = ObjectId(data['_id'])
|
||||
|
||||
new_data = {}
|
||||
for k, v in data.items(): # type:str,Any
|
||||
if k.startswith('_'):
|
||||
new_k = k[1:] + k[0]
|
||||
new_data[new_k] = v
|
||||
else:
|
||||
new_data[k] = v
|
||||
|
||||
super().__init__(**new_data)
|
||||
|
||||
def dict(
|
||||
self,
|
||||
*,
|
||||
include: Union['AbstractSetIntStr', 'MappingIntStrAny'] = None,
|
||||
exclude: Union['AbstractSetIntStr', 'MappingIntStrAny'] = None,
|
||||
by_alias: bool = False,
|
||||
skip_defaults: bool = None,
|
||||
exclude_unset: bool = False,
|
||||
exclude_defaults: bool = False,
|
||||
exclude_none: bool = False,
|
||||
) -> 'DictStrAny':
|
||||
data = super().dict()
|
||||
|
||||
new_data = {}
|
||||
for k, v in data.items(): # type:str,Any
|
||||
if k.endswith('_'):
|
||||
new_k = k[-1] + k[:-1]
|
||||
new_data[new_k] = v
|
||||
else:
|
||||
new_data[k] = v
|
||||
return new_data
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
@classmethod
|
||||
def get_fields(cls):
|
||||
fields = []
|
||||
for k in cls.__fields__:
|
||||
if k.endswith('_'):
|
||||
fields.append(k[-1] + k[:-1])
|
||||
else:
|
||||
fields.append(k)
|
||||
return fields
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
obj = GBaseModel(_id="5fd0f4812de17aeba6c1a374", role_level='2', aaa=123, _platform=13566)
|
||||
print(obj.dict())
|
||||
print(obj.role_level)
|
37
main.py
Normal file
37
main.py
Normal file
@ -0,0 +1,37 @@
|
||||
import sys
|
||||
from multiprocessing import Pool
|
||||
|
||||
from importlib import import_module
|
||||
|
||||
from db import *
|
||||
|
||||
|
||||
def get_game() -> list:
|
||||
local_db = get_local_db('admin_game')
|
||||
games = list(local_db['game'].find())
|
||||
return games
|
||||
|
||||
|
||||
def run_task(kwargs):
|
||||
module_name = kwargs.get('task_name')
|
||||
class_name = ''.join([s.capitalize() for s in task_name.split('_')])
|
||||
module = import_module(f'.{module_name}', package='task')
|
||||
c_obj = getattr(module, class_name)
|
||||
obj = c_obj(**kwargs)
|
||||
obj.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# eg: summary_func 0 0
|
||||
task_name, st, et = sys.argv[1:]
|
||||
st, et = int(st), int(et)
|
||||
game_list = get_game()
|
||||
params = [{'game_name': item['id_name'],
|
||||
'task_name': task_name,
|
||||
'timezone': item.get('timezone', 'Asia/Shanghai'),
|
||||
'st': st,
|
||||
'et': et
|
||||
}
|
||||
for item in game_list]
|
||||
with Pool(len(game_list)) as p:
|
||||
p.map(run_task, params)
|
15
settings.py
Normal file
15
settings.py
Normal file
@ -0,0 +1,15 @@
|
||||
class Config:
|
||||
local_mongo_uri = 'mongodb://root:iamciniao@127.0.0.1:27017/?authSource=admin&readPreference=primary&ssl=false'
|
||||
|
||||
remote_mongo_uri = 'mongodb://root:Legu2020@dds-hp35c58764c35aa41188-pub.mongodb.huhehaote.rds.aliyuncs.com:3717,dds-hp35c58764c35aa42908-pub.mongodb.huhehaote.rds.aliyuncs.com:3717/admin?replicaSet=mgset-421510732'
|
||||
|
||||
|
||||
class Production(Config):
|
||||
DB_PREFIX = 'game'
|
||||
|
||||
|
||||
class Debug(Config):
|
||||
DB_PREFIX = 'debug'
|
||||
|
||||
|
||||
settings = Production
|
0
task/__init__.py
Normal file
0
task/__init__.py
Normal file
42
task/summary_func.py
Normal file
42
task/summary_func.py
Normal file
@ -0,0 +1,42 @@
|
||||
from pymongo import UpdateOne
|
||||
from pydantic import Field
|
||||
|
||||
from .task import Task
|
||||
from utils import *
|
||||
from db import GBaseModel
|
||||
|
||||
|
||||
class SummaryFunc(Task):
|
||||
"""
|
||||
功能分析
|
||||
"""
|
||||
|
||||
class Model(GBaseModel):
|
||||
prize: list = Field(None, title='奖励')
|
||||
need: list = Field(None, title='消耗')
|
||||
ftype: str = Field(..., title='功能')
|
||||
data: dict = Field(None, title='功能数据')
|
||||
|
||||
def cleaning(self, cursor_list):
|
||||
for cursor in cursor_list: # type:dict
|
||||
for event_coll, ts in cursor.items(): # type:str,dict
|
||||
logger.info(f'开始处理{self.game_name} 处理 {event_coll} ...')
|
||||
where = {
|
||||
'_event_name': 'Func',
|
||||
'_event_time': {
|
||||
'$gte': ts['cursor_st'],
|
||||
'$lt': ts['cursor_et'],
|
||||
}
|
||||
}
|
||||
|
||||
projection = self.Model.get_fields()
|
||||
bulk_data = []
|
||||
for item in self.local_db[event_coll].find(where, projection):
|
||||
try:
|
||||
model = self.Model(**item)
|
||||
data = model.dict()
|
||||
bulk_data.append(UpdateOne({'_id': data['_id']}, {'$set': data}, upsert=True))
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
self.remote_db[self.task_name].bulk_write(bulk_data, ordered=False)
|
||||
self.set_cursor(cursor_st=ts['cursor_st'], cursor_et=ts['cursor_et'])
|
139
task/task.py
Normal file
139
task/task.py
Normal file
@ -0,0 +1,139 @@
|
||||
import abc
|
||||
import time
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
from db import *
|
||||
from utils import *
|
||||
|
||||
""""
|
||||
考虑游标过大,切分为30分钟块写入
|
||||
采用批量无序操作提高写性能 bulk_write
|
||||
"""
|
||||
|
||||
|
||||
class Task(metaclass=abc.ABCMeta):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.game_name = kwargs.get('game_name')
|
||||
self.game_db = f'game_{self.game_name}'
|
||||
self.cursor_st = kwargs.get('st')
|
||||
self.cursor_et = kwargs.get('et')
|
||||
self.timezone = kwargs.get('timezone')
|
||||
self.task_name = kwargs.get("task_name")
|
||||
self.local_db = get_local_db(self.game_db)
|
||||
self.remote_db = get_remote_db(self.game_db)
|
||||
self.task_coll = self.local_db['task2']
|
||||
self.task_where = {
|
||||
'name': self.task_name
|
||||
}
|
||||
self.task_info = self.get_task_info()
|
||||
|
||||
def get_task_info(self):
|
||||
task_info = self.task_coll.find_one(self.task_where) or {}
|
||||
return task_info
|
||||
|
||||
def check_run(self) -> bool:
|
||||
is_run = self.task_info.get('is_run')
|
||||
last_ts = self.task_info.get('run_ts', 0)
|
||||
time_out = self.task_info.get('time_out', 86400)
|
||||
if not last_ts and not is_run:
|
||||
# 第一次运行
|
||||
return True
|
||||
if not is_run:
|
||||
# 可以运行
|
||||
return True
|
||||
elif int(time.time()) - last_ts > time_out:
|
||||
# 任务超时
|
||||
# todo 钉钉通知
|
||||
logger.info('钉钉通知')
|
||||
return False
|
||||
else:
|
||||
# 正在运行没超时
|
||||
logger.info('正在运行没超时')
|
||||
return False
|
||||
|
||||
def set_run_ts(self):
|
||||
self.task_coll.update_one(self.task_where, {
|
||||
'$set': {'run_ts': int(time.time())}
|
||||
}, upsert=True)
|
||||
|
||||
def get_cursor(self):
|
||||
"""
|
||||
没有手动设置游标,从taskinfo接着上次执行
|
||||
任务第一次执行取当天0点
|
||||
:return:
|
||||
"""
|
||||
if not self.cursor_st:
|
||||
self.cursor_st = self.task_info.get('cursor_et')
|
||||
|
||||
if not self.cursor_st:
|
||||
self.cursor_st = int(pd.Timestamp(time.time(), unit='s', tz=self.timezone).normalize().timestamp())
|
||||
|
||||
if not self.cursor_et:
|
||||
self.cursor_et = int(time.time())
|
||||
|
||||
def set_cursor(self, **kwargs):
|
||||
"""
|
||||
本次任务完成设置游标
|
||||
:return: None
|
||||
"""
|
||||
if kwargs and set(kwargs) > {'cursor_et', 'cursor_st'}:
|
||||
raise ValueError('设置游标不合理')
|
||||
self.task_coll.update_one(self.task_where, {
|
||||
'$set': kwargs}, upsert=True)
|
||||
|
||||
def get_event_coll(self) -> list:
|
||||
"""
|
||||
根据游标时间戳 返回要处理的集合
|
||||
:return: [{'event_2020-12-10': {'cursor_st': 1607608848, 'cursor_et': 1607610648}}, {'event_2020-12-10': {'cursor_st': 1607610648, 'cursor_et': 1607610791}}]
|
||||
"""
|
||||
date_index = pd.date_range(pd.Timestamp(self.cursor_st, unit='s', tz=self.timezone),
|
||||
pd.Timestamp(self.cursor_et, unit='s', tz=self.timezone), freq='30T')
|
||||
df = pd.DataFrame(index=date_index)
|
||||
df['st'] = df.index
|
||||
df['et'] = np.append(df.index[1:], [pd.Timestamp(self.cursor_et, unit='s', tz=self.timezone)])
|
||||
df['event_coll_s'] = df['st'].apply(lambda x: f'event_{x.date().strftime("%Y-%m-%d")}')
|
||||
df['event_coll_e'] = df['et'].apply(lambda x: f'event_{x.date().strftime("%Y-%m-%d")}')
|
||||
cursor_list = []
|
||||
for k, item in df.T.items():
|
||||
data = {}
|
||||
if item['event_coll_s'] != item['event_coll_e']:
|
||||
data[item['event_coll_s']] = {
|
||||
'cursor_st': int(item['st'].timestamp()),
|
||||
'cursor_et': int(item['et'].normalize().timestamp()),
|
||||
}
|
||||
data[item['event_coll_e']] = {
|
||||
'cursor_st': int(item['et'].normalize().timestamp()),
|
||||
'cursor_et': int(item['et'].timestamp()),
|
||||
}
|
||||
else:
|
||||
data[item['event_coll_s']] = {
|
||||
'cursor_st': int(item['st'].timestamp()),
|
||||
'cursor_et': int(item['et'].timestamp()),
|
||||
}
|
||||
|
||||
cursor_list.append(data)
|
||||
return cursor_list
|
||||
|
||||
def set_run_status(self, status: bool):
|
||||
"""
|
||||
设置运行状态
|
||||
:param status:
|
||||
:return:
|
||||
"""
|
||||
self.task_coll.update_one(self.task_where, {'$set': {'is_run': status}}, upsert=True)
|
||||
|
||||
@abc.abstractmethod
|
||||
def cleaning(self, cursor_list):
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
if not self.check_run():
|
||||
return '运行中...'
|
||||
self.set_run_ts()
|
||||
self.set_run_status(True)
|
||||
self.get_cursor()
|
||||
cursor_list = self.get_event_coll()
|
||||
self.cleaning(cursor_list)
|
||||
self.set_run_status(False)
|
5
utils/__init__.py
Normal file
5
utils/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
from loguru import logger
|
||||
|
||||
logger.add('/data/log/data_cleaning/log.log', format="{time} {level} {name}:{line} {message}", level="INFO",
|
||||
rotation="100 MB", retention='7 days',
|
||||
enqueue=True)
|
Loading…
Reference in New Issue
Block a user