Compare commits
4 Commits
ff0dffb36f
...
0db503885b
Author | SHA1 | Date | |
---|---|---|---|
0db503885b | |||
c0b829dfb4 | |||
a6e9845062 | |||
9912567004 |
12
config.json
Normal file
12
config.json
Normal file
@ -0,0 +1,12 @@
|
||||
{
|
||||
"summary_func": {
|
||||
"source_coll": "event",
|
||||
"dest_coll": "summary_func",
|
||||
"task_name": "summary_func"
|
||||
},
|
||||
"first_recharge": {
|
||||
"source_coll": "paylist",
|
||||
"dest_coll": "user",
|
||||
"task_name": "first_recharge"
|
||||
}
|
||||
}
|
87
db/model.py
87
db/model.py
@ -1,5 +1,3 @@
|
||||
from typing import Union, Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from bson.objectid import ObjectId
|
||||
|
||||
@ -11,79 +9,36 @@ class MdbObjectId(ObjectId):
|
||||
|
||||
@classmethod
|
||||
def validate(cls, v):
|
||||
if not isinstance(v, ObjectId):
|
||||
raise TypeError('ObjectId required')
|
||||
return v
|
||||
try:
|
||||
res = ObjectId(v)
|
||||
except:
|
||||
raise TypeError('不能装换为 ObjectId')
|
||||
else:
|
||||
return res
|
||||
|
||||
|
||||
class GBaseModel(BaseModel):
|
||||
"""
|
||||
字段名与保护变量命名冲突,将 _ 前缀变为后缀,读出再还原
|
||||
"""
|
||||
id_: MdbObjectId = Field(..., title="平台")
|
||||
platform_: str = Field(None, title="平台")
|
||||
channel_name_: str = Field(None, title="channel")
|
||||
owner_name_: str = Field(None, title="owner")
|
||||
channel_uid_: str = Field(None, title="channel_uid")
|
||||
device_id_: str = Field(None, title='device_id')
|
||||
district_server_id_: int = Field(None, title="区服id")
|
||||
game_role_id_: str = Field(None, title="角色id")
|
||||
event_time_: int = Field(..., title="事件时间")
|
||||
id: MdbObjectId = Field(..., title="平台", alias='_id')
|
||||
platform: str = Field(None, title="平台", alias='_platform')
|
||||
channel_name: str = Field(None, title="channel", alias='_channel_name')
|
||||
owner_name: str = Field(None, title="owner", alias='_owner_name')
|
||||
channel_uid: str = Field(None, title="channel_uid", alias='_channel_uid')
|
||||
device_id: str = Field(None, title='device_id', alias='_device_id')
|
||||
district_server_id: int = Field(None, title="区服id", alias='_district_server_id')
|
||||
game_role_id: str = Field(None, title="角色id", alias='_game_role_id')
|
||||
event_time: int = Field(..., title="事件时间", alias='_event_time')
|
||||
role_create_time: int = Field(None, title="角色创建时间")
|
||||
role_level: int = Field(None, title="角色等级")
|
||||
role_vip: int = Field(None, title="角色vip等级")
|
||||
|
||||
def __init__(self, **data: Any):
|
||||
if isinstance(data.get('_id'), str) and len(data['_id']) == 24:
|
||||
data['_id'] = ObjectId(data['_id'])
|
||||
|
||||
new_data = {}
|
||||
for k, v in data.items(): # type:str,Any
|
||||
if k.startswith('_'):
|
||||
new_k = k[1:] + k[0]
|
||||
new_data[new_k] = v
|
||||
else:
|
||||
new_data[k] = v
|
||||
|
||||
super().__init__(**new_data)
|
||||
|
||||
def dict(
|
||||
self,
|
||||
*,
|
||||
include: Union['AbstractSetIntStr', 'MappingIntStrAny'] = None,
|
||||
exclude: Union['AbstractSetIntStr', 'MappingIntStrAny'] = None,
|
||||
by_alias: bool = False,
|
||||
skip_defaults: bool = None,
|
||||
exclude_unset: bool = False,
|
||||
exclude_defaults: bool = False,
|
||||
exclude_none: bool = False,
|
||||
) -> 'DictStrAny':
|
||||
data = super().dict()
|
||||
|
||||
new_data = {}
|
||||
for k, v in data.items(): # type:str,Any
|
||||
if k.endswith('_'):
|
||||
new_k = k[-1] + k[:-1]
|
||||
new_data[new_k] = v
|
||||
else:
|
||||
new_data[k] = v
|
||||
return new_data
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
role_stage: int = Field(None, title="关卡")
|
||||
|
||||
@classmethod
|
||||
def get_fields(cls):
|
||||
fields = []
|
||||
for k in cls.__fields__:
|
||||
if k.endswith('_'):
|
||||
fields.append(k[-1] + k[:-1])
|
||||
else:
|
||||
fields.append(k)
|
||||
return fields
|
||||
return [v.alias for v in cls.__fields__.values()]
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
obj = GBaseModel(_id="5fd0f4812de17aeba6c1a374", role_level='2', aaa=123, _platform=13566)
|
||||
print(obj.dict())
|
||||
print(obj.role_level)
|
||||
obj = GBaseModel(_id="5fd0f4812de17aeba6c1a373", role_level='2', aaa=123, _platform=13566, _event_time=123456789)
|
||||
print(GBaseModel.get_fields())
|
||||
print(obj.dict(by_alias=True))
|
||||
|
15
main.py
15
main.py
@ -1,3 +1,4 @@
|
||||
import json
|
||||
import sys
|
||||
from multiprocessing import Pool
|
||||
|
||||
@ -14,7 +15,7 @@ def get_game() -> list:
|
||||
|
||||
def run_task(kwargs):
|
||||
module_name = kwargs.get('task_name')
|
||||
class_name = ''.join([s.capitalize() for s in task_name.split('_')])
|
||||
class_name = ''.join([s.capitalize() for s in module_name.split('_')])
|
||||
module = import_module(f'.{module_name}', package='task')
|
||||
c_obj = getattr(module, class_name)
|
||||
obj = c_obj(**kwargs)
|
||||
@ -23,15 +24,21 @@ def run_task(kwargs):
|
||||
|
||||
if __name__ == '__main__':
|
||||
# eg: summary_func 0 0
|
||||
# eg: first_recharge 0 0
|
||||
task_name, st, et = sys.argv[1:]
|
||||
st, et = int(st), int(et)
|
||||
game_list = get_game()
|
||||
params = [{'game_name': item['id_name'],
|
||||
'task_name': task_name,
|
||||
with open('config.json', 'r', encoding='utf8') as f:
|
||||
task_conf = json.load(f)
|
||||
params = []
|
||||
for item in game_list:
|
||||
p = {'game_name': item['id_name'],
|
||||
'timezone': item.get('timezone', 'Asia/Shanghai'),
|
||||
'st': st,
|
||||
'et': et
|
||||
}
|
||||
for item in game_list]
|
||||
p.update(task_conf[task_name])
|
||||
params.append(p)
|
||||
|
||||
with Pool(len(game_list)) as p:
|
||||
p.map(run_task, params)
|
||||
|
59
task/first_recharge.py
Normal file
59
task/first_recharge.py
Normal file
@ -0,0 +1,59 @@
|
||||
from pymongo import UpdateOne
|
||||
from pydantic import BaseModel, Field, validator
|
||||
import pandas as pd
|
||||
|
||||
from .task import Task
|
||||
from utils import *
|
||||
|
||||
|
||||
class FirstRecharge(Task):
|
||||
"""
|
||||
首次充值记录
|
||||
"""
|
||||
|
||||
class Model(BaseModel):
|
||||
role_level: int = Field(None, title='角色等级')
|
||||
role_vip: int = Field(None, title='vip等级')
|
||||
role_stage: IntStr = Field(None, title='关卡')
|
||||
money: IntFloat = Field(..., title='金额')
|
||||
game_role_id: str = Field(..., title='角色id', alias='_game_role_id')
|
||||
orderid: str = Field(..., title='订单号')
|
||||
proid: str = Field(..., title='计费点')
|
||||
cdate: int = Field(..., title='当天0点')
|
||||
|
||||
@classmethod
|
||||
def get_fields(cls):
|
||||
return [v.alias for v in cls.__fields__.values()]
|
||||
|
||||
def cleaning(self, cursor_list):
|
||||
for cursor in cursor_list: # type:dict
|
||||
for event_coll, ts in cursor.items(): # type:str,dict
|
||||
if ts['cursor_st'] == ts['cursor_et']:
|
||||
continue
|
||||
logger.info(f'开始处理{self.game_name} 处理 {event_coll} 游标 {ts}')
|
||||
where = {
|
||||
'_event_time': {
|
||||
'$gte': ts['cursor_st'],
|
||||
'$lt': ts['cursor_et'],
|
||||
}
|
||||
}
|
||||
|
||||
projection = self.Model.get_fields()
|
||||
bulk_data = []
|
||||
for item in self.local_db[event_coll].find(where, projection):
|
||||
try:
|
||||
item['cdate'] = int(pd.Timestamp(ts['cursor_st'], unit='s', tz=self.timezone) \
|
||||
.normalize().timestamp())
|
||||
model = self.Model(**item)
|
||||
data = model.dict(by_alias=True)
|
||||
_game_role_id = data.pop('_game_role_id')
|
||||
bulk_data.append(
|
||||
UpdateOne({'_game_role_id': _game_role_id, 'is_recharge': {'$exists': False}},
|
||||
{'$set': {'is_recharge': data}}))
|
||||
except Exception as e:
|
||||
logger.error(f'msg:{e}')
|
||||
# pass
|
||||
if bulk_data:
|
||||
self.remote_db[self.dest_coll].bulk_write(bulk_data, ordered=False)
|
||||
self.local_db[self.dest_coll].bulk_write(bulk_data, ordered=False)
|
||||
self.set_cursor(cursor_st=ts['cursor_st'], cursor_et=ts['cursor_et'])
|
@ -40,10 +40,11 @@ class SummaryFunc(Task):
|
||||
item['cdate'] = int(pd.Timestamp(item['_event_time'], unit='s', tz=self.timezone) \
|
||||
.normalize().timestamp())
|
||||
model = self.Model(**item)
|
||||
data = model.dict()
|
||||
data = model.dict(by_alias=True)
|
||||
bulk_data.append(UpdateOne({'_id': data['_id']}, {'$set': data}, upsert=True))
|
||||
except Exception as e:
|
||||
logger.error(f'ftype {item["ftype"]} msg:{e}')
|
||||
# pass
|
||||
self.remote_db[self.task_name].bulk_write(bulk_data, ordered=False)
|
||||
if bulk_data:
|
||||
self.remote_db[self.dest_coll].bulk_write(bulk_data, ordered=False)
|
||||
self.set_cursor(cursor_st=ts['cursor_st'], cursor_et=ts['cursor_et'])
|
||||
|
35
task/task.py
35
task/task.py
@ -17,6 +17,8 @@ class Task(metaclass=abc.ABCMeta):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.game_name = kwargs.get('game_name')
|
||||
self.game_db = f'game_{self.game_name}'
|
||||
self.source_coll = kwargs.get('source_coll')
|
||||
self.dest_coll = kwargs.get('dest_coll')
|
||||
self.cursor_st = kwargs.get('st')
|
||||
self.cursor_et = kwargs.get('et')
|
||||
self.timezone = kwargs.get('timezone')
|
||||
@ -83,16 +85,31 @@ class Task(metaclass=abc.ABCMeta):
|
||||
self.task_coll.update_one(self.task_where, {
|
||||
'$set': kwargs}, upsert=True)
|
||||
|
||||
def get_event_coll(self) -> list:
|
||||
"""
|
||||
根据游标时间戳 返回要处理的集合
|
||||
:return: [{'event_2020-12-10': {'cursor_st': 1607608848, 'cursor_et': 1607610648}}, {'event_2020-12-10': {'cursor_st': 1607610648, 'cursor_et': 1607610791}}]
|
||||
"""
|
||||
def generate_cursor_time(self):
|
||||
date_index = pd.date_range(pd.Timestamp(self.cursor_st, unit='s', tz=self.timezone),
|
||||
pd.Timestamp(self.cursor_et, unit='s', tz=self.timezone), freq='30T')
|
||||
df = pd.DataFrame(index=date_index)
|
||||
df['st'] = df.index
|
||||
df['et'] = np.append(df.index[1:], [pd.Timestamp(self.cursor_et, unit='s', tz=self.timezone)])
|
||||
return df
|
||||
|
||||
def get_single_coll(self) -> list:
|
||||
df = self.generate_cursor_time()
|
||||
cursor_list = []
|
||||
for k, item in df.T.items():
|
||||
cursor_list.append({self.source_coll: {
|
||||
'cursor_st': int(item['st'].timestamp()),
|
||||
'cursor_et': int(item['et'].timestamp()),
|
||||
}}
|
||||
)
|
||||
return cursor_list
|
||||
|
||||
def get_event_coll(self) -> list:
|
||||
"""
|
||||
根据游标时间戳 返回要处理的集合
|
||||
:return: [{'event_2020-12-10': {'cursor_st': 1607608848, 'cursor_et': 1607610648}}, {'event_2020-12-10': {'cursor_st': 1607610648, 'cursor_et': 1607610791}}]
|
||||
"""
|
||||
df = self.generate_cursor_time()
|
||||
df['event_coll_s'] = df['st'].apply(lambda x: f'event_{x.date().strftime("%Y-%m-%d")}')
|
||||
df['event_coll_e'] = df['et'].apply(lambda x: f'event_{x.date().strftime("%Y-%m-%d")}')
|
||||
cursor_list = []
|
||||
@ -116,6 +133,12 @@ class Task(metaclass=abc.ABCMeta):
|
||||
cursor_list.append(data)
|
||||
return cursor_list
|
||||
|
||||
def get_source_coll(self) -> list:
|
||||
if self.source_coll == 'event':
|
||||
return self.get_event_coll()
|
||||
else:
|
||||
return self.get_single_coll()
|
||||
|
||||
def set_run_status(self, status: bool):
|
||||
"""
|
||||
设置运行状态
|
||||
@ -134,6 +157,6 @@ class Task(metaclass=abc.ABCMeta):
|
||||
self.set_run_ts()
|
||||
self.set_run_status(True)
|
||||
self.get_cursor()
|
||||
cursor_list = self.get_event_coll()
|
||||
cursor_list = self.get_source_coll()
|
||||
self.cleaning(cursor_list)
|
||||
self.set_run_status(False)
|
||||
|
@ -1,4 +1,5 @@
|
||||
from loguru import logger
|
||||
from .field_type import *
|
||||
|
||||
logger.add('/data/log/data_cleaning/log.log', format="{time} {level} {name}:{line} {message}", level="INFO",
|
||||
rotation="100 MB", retention='7 days',
|
||||
|
4
utils/field_type.py
Normal file
4
utils/field_type.py
Normal file
@ -0,0 +1,4 @@
|
||||
from typing import TypeVar
|
||||
|
||||
IntStr = TypeVar('IntStr', int, str)
|
||||
IntFloat = TypeVar('IntFloat', int, float)
|
Loading…
Reference in New Issue
Block a user