data_cleaning/task/summary_assets.py
2021-01-18 10:52:48 +08:00

84 lines
3.7 KiB
Python

import traceback
from typing import List, Dict
from pymongo import UpdateOne
from pydantic import Field
import pandas as pd
from .task import Task
from utils import *
from model import BaseModel, MdbObjectId, IntStr
class SummaryAssets(Task):
"""
资源产出和消耗
"""
class Model(BaseModel):
id: MdbObjectId = Field(..., title="id", alias='_id')
channel_name: str = Field(..., min_length=1, title="channel", alias='_channel_name')
owner_name: str = Field(...,min_length=1, title="owner", alias='_owner_name')
channel_uid: str = Field(..., min_length=1, title="channel_uid", alias='_channel_uid')
device_id: str = Field(..., min_length=1, title='device_id', alias='_device_id')
district_server_id: int = Field(..., title="区服id", alias='_district_server_id')
game_role_id: str = Field(..., min_length=1, title="角色id", alias='_game_role_id')
event_time: int = Field(..., title="事件时间", alias='_event_time')
role_level: int = Field(None, title="角色等级")
role_vip: int = Field(None, title="角色vip等级")
role_stage: IntStr = Field(None, title="关卡")
prize: List[Dict] = Field(None, title='奖励')
need: List[Dict] = Field(None, title='消耗')
cdate: int = Field(..., title='当天0点')
function: str = Field(..., title='功能')
function_detail: str = Field(..., title='功能详细')
def cleaning(self, cursor_list):
# 查询要清洗的资源
assets_filter = (self.local_db['attr'].find_one({'pname': 'assets_filter'}) or dict()).get('data')
if not assets_filter:
msg = f'{self.game_name} 请先设置要分析的资源'
ddsend_msg(msg)
logger.warning(msg)
return
a = set()
t = set()
for item in assets_filter:
a.add(item['a'])
t.add(item['t'])
for cursor in cursor_list: # type:dict
for source_coll, ts in cursor.items(): # type:str,dict
if ts['cursor_st'] == ts['cursor_et']:
continue
logger.info(f'开始处理{self.game_name} 处理 {source_coll} 游标 {ts}')
where = {
'$or': [{'prize.a': {'$in': list(a)}, 'prize.t': {'$in': list(t)}},
{'need.a': {'$in': list(a)}, 'need.t': {'$in': list(t)}}],
'_event_name': 'res',
'_ut': {
'$gte': ts['cursor_st'],
'$lt': ts['cursor_et'],
}
}
projection = self.Model.get_fields()
bulk_data = []
for item in self.local_db[source_coll].find(where, projection):
try:
item['cdate'] = int(pd.Timestamp(item['_event_time'], unit='s', tz=self.timezone) \
.normalize().timestamp())
model = self.Model(**item)
data = model.dict(by_alias=True)
bulk_data.append(UpdateOne({'_id': data['_id']}, {'$set': data}, upsert=True))
except Exception as e:
msg = traceback.format_exc()
ddsend_msg(f'{self.game_name}.{source_coll}字段异常 {msg}')
logger.error(repr(e))
if bulk_data:
self.local_db[self.dest_coll].bulk_write(bulk_data, ordered=False)
self.remote_db[self.dest_coll].bulk_write(bulk_data, ordered=False)
self.set_cursor(cursor_st=ts['cursor_st'], cursor_et=ts['cursor_et'])