data_cleaning/task/summary_shopbuy.py
2021-02-01 18:33:39 +08:00

70 lines
2.8 KiB
Python

import traceback
from typing import List
from pymongo import UpdateOne
from pydantic import Field
import pandas as pd
from .task import Task
from utils import *
from model import GBaseModel, IntStr, MdbObjectId
class SummaryShopbuy(Task):
"""
功能分析
"""
class Model(GBaseModel):
id: MdbObjectId = Field(..., title="id", alias='_id')
cdate: int = Field(..., title='当天0点')
prize: List[dict] = Field(None, title='奖励')
need: List[dict] = Field(None, title='消耗')
user_name: str = Field(None, title='昵称')
stype: IntStr = Field(None, title='商店id')
needa: str = Field(...)
needn: int = Field(...)
needt: str = Field(...)
prizea: str = Field(...)
prizen: int = Field(...)
prizet: str = Field(...)
def cleaning(self, cursor_list):
for cursor in cursor_list: # type:dict
for source_coll, ts in cursor.items(): # type:str,dict
if ts['cursor_st'] == ts['cursor_et']:
continue
logger.info(f'开始处理{self.game_name} 处理 {source_coll} 游标 {ts}')
where = {
'_event_name': 'Shop',
"act": "buy",
'_event_time': {
'$gte': ts['cursor_st'],
'$lt': ts['cursor_et'],
}
}
projection = self.Model.get_fields()
bulk_data = []
for item in self.local_db[source_coll].find(where, projection):
try:
item['cdate'] = int(pd.Timestamp(item['_event_time'], unit='s', tz=self.timezone) \
.normalize().timestamp())
item['needa'] = item['need'][0]['a']
item['needt'] = item['need'][0]['t']
item['needn'] = item['need'][0]['n']
item['prizea'] = item['prize'][0]['a']
item['prizet'] = item['prize'][0]['t']
item['prizen'] = item['prize'][0]['n']
model = self.Model(**item)
data = model.dict(by_alias=True)
bulk_data.append(UpdateOne({'_id': data['_id']}, {'$set': data}, upsert=True))
except Exception as e:
msg = traceback.format_exc()
ddsend_msg(f'{self.game_name}.{source_coll}字段异常 {msg}')
logger.error(repr(e))
if bulk_data:
self.local_db[self.dest_coll].bulk_write(bulk_data, ordered=False)
self.remote_db[self.dest_coll].bulk_write(bulk_data, ordered=False)
self.set_cursor(cursor_st=ts['cursor_st'], cursor_et=ts['cursor_et'])