data_cleaning/task/summary_login.py
2020-12-26 17:01:32 +08:00

65 lines
2.9 KiB
Python

from pymongo import UpdateOne
from pydantic import Field
import pandas as pd
from .task import Task
from utils import *
from model import GBaseModel
class SummaryLogin(Task):
"""
每日活跃
"""
class Model(GBaseModel):
cdate: int = Field(..., title='当天0点')
first_device_id: str = Field(None, title='第一次登录设备id', alias='_first_device_id')
manufacturer: str = Field(None, title='设备品牌', alias='_manufacturer')
model: str = Field(None, title='型号', alias='_model')
os_version: str = Field(None, title='系统版本', alias='_os_version')
app_name: str = Field(None, title='游戏版本', alias='_app_name')
screen_width: int = Field(None, title='屏幕宽', alias='_screen_width')
screen_height: int = Field(None, title='屏幕高', alias='_screen_height')
def cleaning(self, cursor_list):
for cursor in cursor_list: # type:dict
for source_coll, ts in cursor.items(): # type:str,dict
if ts['cursor_st'] == ts['cursor_et']:
continue
logger.info(f'开始处理{self.game_name} 处理 {source_coll} 游标 {ts}')
where = {
'_ut': {
'$gte': ts['cursor_st'],
'$lt': ts['cursor_et'],
}
}
projection = self.Model.get_fields()
bulk_data = []
# 当前活跃role id
role_list = self.local_db[source_coll].distinct('_game_role_id', where)
# 当日已记录的活跃role
cdate = int(pd.Timestamp(ts['cursor_st'], unit='s', tz=self.timezone) \
.normalize().timestamp())
exists_role_list = self.local_db[self.dest_coll].distinct('_game_role_id', {'cdate': cdate})
# 还没有记录的
role_set = set(role_list) - set(exists_role_list)
for item in self.local_db[source_coll].find({'_game_role_id': {'$in': list(role_set)}}, projection):
try:
item['cdate'] = cdate
model = self.Model(**item)
data = model.dict(by_alias=True)
data.pop('_id')
bulk_data.append(
UpdateOne({'cdate': cdate, '_game_role_id': data['_game_role_id']}, {'$set': data},
upsert=True))
except Exception as e:
logger.error(f'msg:{e}')
# pass
if bulk_data:
self.local_db[self.dest_coll].bulk_write(bulk_data, ordered=False)
self.remote_db[self.dest_coll].bulk_write(bulk_data, ordered=False)
self.set_cursor(cursor_st=ts['cursor_st'], cursor_et=ts['cursor_et'])