first commit

This commit is contained in:
wuaho 2021-09-22 18:03:33 +08:00
commit 9170fefbe9
24 changed files with 641 additions and 0 deletions

132
.gitignore vendored Normal file
View File

@ -0,0 +1,132 @@
# ---> Python
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
.idea

0
apis/__init__.py Normal file
View File

7
apis/api.py Normal file
View File

@ -0,0 +1,7 @@
from fastapi import APIRouter
from .check_data import controller as check_data_api
api_router = APIRouter()
api_router.include_router(check_data_api.router, prefix='/check')

View File

View File

@ -0,0 +1,33 @@
from fastapi import APIRouter, Request
import schemas
from apis.check_data import service
from db.mongo import check_template_coll
router = APIRouter()
@router.post("/check")
async def check(request: Request,
data_in: schemas.CheckData,
game: str,
db_name: str = 'debug'
) -> schemas.Msg:
res = await service.check_data(db_name, game, data_in)
return schemas.Msg(code=0, msg='ok', data=res)
@router.post("/save")
async def save(request: Request,
data_in: schemas.AddTemplate,
game: str,
db_name: str = 'debug'
) -> schemas.Msg:
res = await service.save_template(data_in, game, db_name)
return schemas.Msg(code=0, msg='ok', data=res)
@router.get('/template')
async def template(request: Request) -> schemas.Msg:
data = await service.get_template(dict(request.query_params))
return schemas.Msg(code=0, msg='ok', data=data)

View File

@ -0,0 +1,89 @@
# coding:utf-8
import copy
import re
from collections import namedtuple
from ipaddress import IPv4Address
import numpy as np
import clickhouse_driver
import schemas
from core import settings
from db import ck_client
from db.mongo import check_template_coll
Type = namedtuple('Type', ['string', 'integer', 'array', 'ipv4'])
type_map = Type(string=str, integer=np.number, array=list, ipv4=IPv4Address)
async def check_data(db, game, data_in: schemas.CheckData):
event_name = data_in.event_name
is_unique = data_in.is_unique
props = data_in.props
where = data_in.where
report = []
limit = 10
check_type = copy.deepcopy(props)
check_type.update(settings.DEFAULT_FIELD)
select = ','.join([f'`{field}`' for field in check_type.keys()])
sql = f"""select {select} from {db}.event where game='{game}' and `#event_name`='{event_name}'"""
for k, v in where.items():
sql += f""" and `{k}`='{v}'"""
sql += f""" order by `#event_time` desc"""
sql += f""" limit {limit}"""
print(sql)
# pass_list: [], fail_list: []
# sql = 'show databases'
report = {'fail_list': [],
'pass_list': []}
fail_list = report['fail_list']
pass_list = report['pass_list']
try:
df = await ck_client.query_dataframe(sql)
except clickhouse_driver.errors.ServerException as e:
if e.code == 47:
msg = re.match(r"""DB::Exception: Missing columns: '(.*)' while processing query""", e.message)
filed = '未知'
if msg:
filed = msg.group(1)
fail_list.append(f'<p style="color:red;font-size:17px;">数据库不存在字段-> {filed}</p>')
else:
fail_list.append('<p style="color:red;font-size:17px;">数据库查询未知错误</p>')
return report
if df.empty:
fail_list.append('<p style="color:blue;font-size:17px;">根据过滤条件未查到任何数据也有可能是数据未及时入库。3分钟后还没查到说明存在问题</p>')
return report
if is_unique and len(df) > 1:
fail_list.append('<p style="color:yellow;font-size:17px;">警告:记录数大于一条</p>')
for k, t in check_type.items():
if not isinstance(df[k][0], getattr(type_map, t)):
fail_list.append(f'<p style="color:red;font-size:17px;">错误:字段{k} 期望{t}类型,得到{type(df[k][0])}</p>')
else:
pass_list.append(f'<p style="color:green;font-size:17px;">通过:字段{k} 是期望的类型</p>')
return report
async def save_template(data_in: schemas.AddTemplate,
game: str,
db_name: str = 'debug'):
res = await check_template_coll.update_one({'title': data_in.title},
{'$set': {'game': game, 'db_name': db_name, 'check': data_in.dict()}},
upsert=True)
return True
async def get_template(*args, **kwargs):
doc_group = {}
async for doc in check_template_coll.find(*args, **kwargs):
doc_group.setdefault(doc['game'], []).append(doc)
return doc_group

3
apis/deps.py Normal file
View File

@ -0,0 +1,3 @@
# coding:utf-8

16
app.py Normal file
View File

@ -0,0 +1,16 @@
from fastapi import FastAPI, Request
from starlette.middleware.cors import CORSMiddleware
from apis.api import api_router
from core import settings
app = FastAPI(title=settings.PROJECT_NAME)
app.include_router(api_router, prefix=settings.API_V1_STR)
app.add_middleware(
CORSMiddleware,
allow_origins=['*'],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)

1
core/__init__.py Normal file
View File

@ -0,0 +1 @@
from .config import settings

135
core/config.py Normal file
View File

@ -0,0 +1,135 @@
from __future__ import annotations
import json
from typing import Optional, Union
from pydantic import BaseSettings, Field
class GlobalConfig(BaseSettings):
"""Global configurations."""
# This variable will be loaded from the .env file. However, if there is a
# shell environment variable having the same name, that will take precedence.
# the class Field is necessary while defining the global variables
ENV_STATE: Optional[str] = Field('dev', env="ENV_STATE")
KAHKA_HOST: Optional[str] = Field('127.0.0.1', env="CK_HOST")
# environment specific configs
# CK_USERNAME: Optional[str] = None
# API_PASSWORD: Optional[str] = None
PROJECT_NAME: str = 'CHECK_DATA'
API_V1_STR: str = '/v1'
class Config:
"""Loads the dotenv file."""
env_file: str = ".env"
DEFAULT_FIELD: dict = {
'#ip': 'ipv4',
'#country': 'string',
'#province': 'string',
'#city': 'string',
'#os': 'string',
'#device_id': 'string',
'#screen_height': 'integer',
'#screen_width': 'integer',
'#device_model': 'string',
'#app_version': 'string',
'#bundle_id': 'string',
'app_name': 'string',
'game_version': 'string',
'#os_version': 'string',
'#network_type': 'string',
'#carrier': 'string',
'#manufacturer': 'string',
'#app_id': 'string',
'#account_id': 'string',
'#distinct_id': 'string',
'binduid': 'string',
'channel': 'string',
'owner_name': 'string',
'role_name': 'string',
'exp': 'integer',
'zhanli': 'integer',
'maxmapid': 'string',
'mapid': 'string',
'ghid': 'string',
'rmbmoney': 'integer',
'jinbi': 'integer',
'svrindex': 'string',
'lv': 'integer',
'vip': 'integer',
'game': 'string',
# 'unitPrice': 'integer',
# 'money': 'string',
# 'isdangrishouci': 'integer',
# 'islishishouci': 'integer',
# 'is_today_reg': 'integer',
# 'orderid': 'string',
# 'proid': 'string',
#
# 'step_id': 'integer',
# 'step_group': 'integer',
# 'guide_start_time': 'integer',
#
# 'online_ts': 'integer'
}
class DevConfig(GlobalConfig):
"""Development configurations."""
# ckÊý¾Ý¿âÁ¬½Ó
CK_CONFIG = {'host': '139.159.159.3',
'port': 9654,
'user': 'legu',
'password': 'gncPASUwpYrc'
}
class Config:
env_prefix: str = "DEV_"
MDB_HOST: str = '10.0.0.7'
MDB_PORT: int = 27017
MDB_USER: str = 'root'
MDB_PASSWORD: str = 'iamciniao'
MDB_DB: str = 'xdata'
DATABASE_URI = f'mongodb://{MDB_USER}:{MDB_PASSWORD}@{MDB_HOST}:{MDB_PORT}/admin'
class ProdConfig(GlobalConfig):
"""Production configurations."""
# ckÊý¾Ý¿âÁ¬½Ó
CK_CONFIG = {'host': '139.159.159.3',
'port': 9654,
'user': 'legu',
'password': 'gncPASUwpYrc'
}
class Config:
env_prefix: str = "PROD_"
class FactoryConfig:
"""Returns a config instance dependending on the ENV_STATE variable."""
def __init__(self, env_state: Optional[str]) -> None:
self.env_state = env_state
def __call__(self) -> Union[DevConfig, ProdConfig]:
if self.env_state == "dev":
return DevConfig()
elif self.env_state == "prod":
return ProdConfig()
settings = FactoryConfig(GlobalConfig().ENV_STATE)()
# print(config.KAHKA_HOST)
# print(config.__repr__())

1
db/__init__.py Normal file
View File

@ -0,0 +1 @@
from .ck import ck_client

5
db/ck.py Normal file
View File

@ -0,0 +1,5 @@
from core import settings
from utils.ck_drive import CKDrive
ck_client: CKDrive = CKDrive()
ck_client.connected_pool(**settings.CK_CONFIG)

7
db/mongo.py Normal file
View File

@ -0,0 +1,7 @@
import motor.motor_asyncio
from core import settings
client = motor.motor_asyncio.AsyncIOMotorClient(settings.DATABASE_URI)
check_template_coll = client.xdata.get_collection('check_template')

4
main.py Normal file
View File

@ -0,0 +1,4 @@
import uvicorn
if __name__ == '__main__':
uvicorn.run(app='app:app', host="0.0.0.0", port=7997, reload=True, debug=True)

2
schemas/__init__.py Normal file
View File

@ -0,0 +1,2 @@
from .msg import Msg
from .check_data import *

9
schemas/base.py Normal file
View File

@ -0,0 +1,9 @@
from pydantic import BaseModel
class DefaultModel(BaseModel):
pass
binduid: str = None
channel: str = None
owner_name: str = None
role_name: str = None

13
schemas/check_data.py Normal file
View File

@ -0,0 +1,13 @@
from pydantic import BaseModel
class CheckData(BaseModel):
event_name: str
is_unique: bool
props: dict
where: dict = dict()
class AddTemplate(BaseModel):
check: CheckData
title: str

9
schemas/msg.py Normal file
View File

@ -0,0 +1,9 @@
from typing import Any
from pydantic import BaseModel
class Msg(BaseModel):
code: int
msg: str
data: Any

65
sql/create_event.sql Normal file
View File

@ -0,0 +1,65 @@
drop table if exists debug.event;
create table debug.event
(
`#ip` Nullable(IPv4),
`#country` Nullable(String),
`#province` Nullable(String),
`#city` Nullable(String),
`#os` Nullable(String),
`#device_id` Nullable(String),
`#screen_height` Nullable(UInt16),
`#screen_width` Nullable(UInt16),
`#device_model` Nullable(String),
`#app_version` Nullable(String),
`#bundle_id` Nullable(String),
app_name Nullable(String),
game_version Nullable(String),
`#os_version` Nullable(String),
`#network_type` Nullable(String),
`#carrier` Nullable(String),
`#manufacturer` Nullable(String),
`#app_id` Nullable(String),
`#account_id` String,
`#distinct_id` Nullable(String),
binduid Nullable(String),
channel Nullable(String),
owner_name String default '',
role_name Nullable(String),
exp Nullable(UInt64),
zhanli Nullable(UInt64),
maxmapid Nullable(UInt16),
mapid Nullable(UInt16),
ghid Nullable(String),
rmbmoney Nullable(UInt64),
jinbi Nullable(UInt64),
svrindex Nullable(String),
lv Nullable(UInt16),
vip Nullable(UInt16),
game Nullable(String),
`#zone_offset` Int8 default 8,
`#event_time` DateTime('UTC'),
`#event_name` String,
`#server_time` DateTime('UTC') default now(),
----- pay ʼþ
unitPrice Nullable(UInt32),
money Nullable(String),
islishishouci Nullable(UInt8),
isdangrishouci Nullable(UInt8),
is_today_reg Nullable(UInt8),
orderid Nullable(String),
proid Nullable(String),
--- guide
step_id Nullable(UInt16),
step_group Nullable(UInt16),
guide_start_time Nullable(UInt32),
--- ping
online_ts Nullable(UInt16)
) engine = ReplacingMergeTree PARTITION BY toYYYYMMDD(`#event_time`)
ORDER BY (`owner_name`,`#event_name`, `#event_time`, `#account_id`)
SETTINGS index_granularity = 8192;

0
test/__init__.py Normal file
View File

36
test/test.ipynb Normal file
View File

@ -0,0 +1,36 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

13
test/test_ck.py Normal file
View File

@ -0,0 +1,13 @@
# coding:utf-8
import asyncio
async def run():
from db import ck_client
res = await ck_client.query_dataframe('show databases')
print(res)
if __name__ == '__main__':
asyncio.run(run())

0
utils/__init__.py Normal file
View File

61
utils/ck_drive.py Normal file
View File

@ -0,0 +1,61 @@
# coding:utf-8
import asyncio
import threading
from aioch import Client
import pandas as pd
__all__ = ('CKDrive',)
class CKDrive:
_instance_lock = threading.Lock()
connect_pool = set()
def __init__(self, pool_size=1, *args, **kwargs):
pass
def __new__(cls, *args, **kwargs):
if not hasattr(CKDrive, "_instance"):
with CKDrive._instance_lock:
if not hasattr(CKDrive, "_instance"):
CKDrive._instance = object.__new__(cls)
return CKDrive._instance
def connected_pool(self, pool_size=1, *args, **kwargs):
if self.connect_pool:
return
for i in range(pool_size):
client = Client(*args, **kwargs)
self.connect_pool.add(client)
async def __execute(self, *args, typ_cnt=5, **kwargs):
if not self.connect_pool:
if typ_cnt < 0:
raise Exception('连接池耗尽')
await asyncio.sleep(1)
await self.__execute(*args, **kwargs, typ_cnt=typ_cnt - 1)
client = None
try:
client = self.connect_pool.pop()
res = await client.execute(*args, **kwargs)
except Exception as e:
raise e
else:
return res
finally:
if client is not None:
self.connect_pool.add(client)
async def execute(self, sql) -> dict:
data, columns = await self.__execute(sql, with_column_types=True, columnar=True)
df = pd.DataFrame({col[0]: d for d, col in zip(data, columns)})
return df.T.to_dict()
async def query_dataframe(self, sql) -> pd.DataFrame:
data, columns = await self.__execute(sql, with_column_types=True, columnar=True)
df = pd.DataFrame({col[0]: d for d, col in zip(data, columns)})
return df