数据清理
This commit is contained in:
parent
16ddfda32f
commit
c6b50804d3
40
clear_up.py
Normal file
40
clear_up.py
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
from settings import settings
|
||||||
|
|
||||||
|
game = 'zhengba'
|
||||||
|
server_list_url = f'http://gametools.legu.cc/?app=api&act=getServerList&game={game}'
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from clickhouse_driver import Client
|
||||||
|
|
||||||
|
client = Client(**settings.CK_CONFIG)
|
||||||
|
|
||||||
|
df = pd.read_json(server_list_url)
|
||||||
|
df = df[~df['hostname'].isin(['119.3.89.14', '119.3.105.109'])]
|
||||||
|
serverid = tuple((str(i) for i in df['serverid'].to_list()))
|
||||||
|
|
||||||
|
sql = f"""select `#account_id`, `#event_time`, `#event_name`,`#os`,`svrindex`, count() as n
|
||||||
|
from {game}.event
|
||||||
|
where
|
||||||
|
lower(`#os`) = 'windows'
|
||||||
|
or svrindex not in {serverid}
|
||||||
|
group by `#account_id`, `#event_time`, `#event_name`,`#os`,`svrindex`
|
||||||
|
having n = 1 limit 2000"""
|
||||||
|
|
||||||
|
data, columns = client.execute(
|
||||||
|
sql, columnar=True, with_column_types=True
|
||||||
|
)
|
||||||
|
if not data:
|
||||||
|
exit(0)
|
||||||
|
data_df = pd.DataFrame(
|
||||||
|
{col[0]: d for d, col in zip(data, columns)}
|
||||||
|
)
|
||||||
|
|
||||||
|
data_df.drop('n', axis=1, inplace=True)
|
||||||
|
data_df['sign'] = -1
|
||||||
|
data_df['#event_time'] = data_df['#event_time'].apply(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'))
|
||||||
|
insert_sql = f'INSERT INTO {game}.event FORMAT JSONEachRow '
|
||||||
|
insert_sql = insert_sql + '\n'.join([json.dumps(item) for item in data_df.T.to_dict().values()])
|
||||||
|
|
||||||
|
client.execute(insert_sql)
|
Loading…
Reference in New Issue
Block a user