数据清理
This commit is contained in:
parent
16ddfda32f
commit
c6b50804d3
40
clear_up.py
Normal file
40
clear_up.py
Normal file
@ -0,0 +1,40 @@
|
||||
import json
|
||||
|
||||
from settings import settings
|
||||
|
||||
game = 'zhengba'
|
||||
server_list_url = f'http://gametools.legu.cc/?app=api&act=getServerList&game={game}'
|
||||
|
||||
import pandas as pd
|
||||
from clickhouse_driver import Client
|
||||
|
||||
client = Client(**settings.CK_CONFIG)
|
||||
|
||||
df = pd.read_json(server_list_url)
|
||||
df = df[~df['hostname'].isin(['119.3.89.14', '119.3.105.109'])]
|
||||
serverid = tuple((str(i) for i in df['serverid'].to_list()))
|
||||
|
||||
sql = f"""select `#account_id`, `#event_time`, `#event_name`,`#os`,`svrindex`, count() as n
|
||||
from {game}.event
|
||||
where
|
||||
lower(`#os`) = 'windows'
|
||||
or svrindex not in {serverid}
|
||||
group by `#account_id`, `#event_time`, `#event_name`,`#os`,`svrindex`
|
||||
having n = 1 limit 2000"""
|
||||
|
||||
data, columns = client.execute(
|
||||
sql, columnar=True, with_column_types=True
|
||||
)
|
||||
if not data:
|
||||
exit(0)
|
||||
data_df = pd.DataFrame(
|
||||
{col[0]: d for d, col in zip(data, columns)}
|
||||
)
|
||||
|
||||
data_df.drop('n', axis=1, inplace=True)
|
||||
data_df['sign'] = -1
|
||||
data_df['#event_time'] = data_df['#event_time'].apply(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'))
|
||||
insert_sql = f'INSERT INTO {game}.event FORMAT JSONEachRow '
|
||||
insert_sql = insert_sql + '\n'.join([json.dumps(item) for item in data_df.T.to_dict().values()])
|
||||
|
||||
client.execute(insert_sql)
|
Loading…
Reference in New Issue
Block a user