pointapi/utils/ta_sdk.py
2021-03-31 11:58:40 +08:00

931 lines
33 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# encoding:utf-8
from __future__ import unicode_literals
import datetime
import gzip
import json
import os
import random
import re
import threading
import time
import uuid
import requests
from kafka import KafkaProducer
from requests import ConnectionError
try:
import queue
from urllib.parse import urlparse
except ImportError:
import Queue as queue
from urlparse import urlparse
try:
isinstance("", basestring)
def is_str(s):
return isinstance(s, basestring)
except NameError:
def is_str(s):
return isinstance(s, str)
try:
isinstance(1, long)
def is_int(n):
return isinstance(n, int) or isinstance(n, long)
except NameError:
def is_int(n):
return isinstance(n, int)
try:
from enum import Enum
ROTATE_MODE = Enum('ROTATE_MODE', ('DAILY', 'HOURLY'))
except ImportError:
class ROTATE_MODE(object):
DAILY = 0
HOURLY = 1
class TGAException(Exception):
pass
class TGAIllegalDataException(TGAException):
"""数据格式异常
在发送的数据格式有误时SDK 会抛出此异常,用户应当捕获并处理.
"""
pass
class TGANetworkException(TGAException):
"""网络异常
在因为网络或者不可预知的问题导致数据无法发送时SDK会抛出此异常用户应当捕获并处理.
"""
pass
__version__ = '1.6.0'
class TGAnalytics(object):
"""TGAnalytics 实例是发送事件数据和用户属性数据的关键实例
"""
__NAME_PATTERN = re.compile(r"^(#[a-z][a-z0-9_]{0,49})|([a-z][a-z0-9_]{0,50})$", re.I)
def __init__(self, consumer, enable_uuid=False):
"""创建一个 TGAnalytics 实例
TGAanlytics 需要与指定的 Consumer 一起使用,可以使用以下任何一种:
- LoggingConsumer: 批量实时写本地文件,并与 LogBus 搭配
- BatchConsumer: 批量实时地向TA服务器传输数据同步阻塞不需要搭配传输工具
- AsyncBatchConsumer: 批量实时地向TA服务器传输数据异步非阻塞不需要搭配传输工具
- DebugConsumer: 逐条发送数据,并对数据格式做严格校验
Args:
consumer: 指定的 Consumer
"""
self.__consumer = consumer
self.__enableUuid = enable_uuid
self.__super_properties = {}
self.clear_super_properties()
@property
def consumer(self):
"""
用了更换 kafka topic_name
:return:
"""
return self.__consumer
def user_set(self, distinct_id=None, account_id=None, properties=None):
"""设置用户属性
对于一般的用户属性,您可以调用 user_set 来进行设置。使用该接口上传的属性将会覆盖原有的属性值,如果之前不存在该用户属性,
则会新建该用户属性,类型与传入属性的类型一致.
Args:
distinct_id: 访客 ID
account_id: 账户 ID
properties: dict 类型的用户属性
"""
self.__add(distinct_id=distinct_id, account_id=account_id, send_type='user_set', properties_add=properties)
def user_unset(self, distinct_id=None, account_id=None, properties=None):
"""
删除某个用户的用户属性
:param distinct_id:
:param account_id:
:param properties:
"""
if isinstance(properties, list):
properties = dict((key, 0) for key in properties)
self.__add(distinct_id=distinct_id, account_id=account_id, send_type='user_unset', properties_add=properties)
def user_setOnce(self, distinct_id=None, account_id=None, properties=None):
"""设置用户属性, 不覆盖已存在的用户属性
如果您要上传的用户属性只要设置一次,则可以调用 user_setOnce 来进行设置,当该属性之前已经有值的时候,将会忽略这条信息.
Args:
distinct_id: 访客 ID
account_id: 账户 ID
properties: dict 类型的用户属性
"""
self.__add(distinct_id=distinct_id, account_id=account_id, send_type='user_setOnce', properties_add=properties)
def user_add(self, distinct_id=None, account_id=None, properties=None):
"""对指定的数值类型的用户属性进行累加操作
当您要上传数值型的属性时,您可以调用 user_add 来对该属性进行累加操作. 如果该属性还未被设置则会赋值0后再进行计算.
可传入负值,等同于相减操作.
Args:
distinct_id: 访客 ID
account_id: 账户 ID
properties: 数值类型的用户属性
"""
self.__add(distinct_id=distinct_id, account_id=account_id, send_type='user_add', properties_add=properties)
def user_append(self, distinct_id=None, account_id=None, properties=None):
"""追加一个用户的某一个或者多个集合类型
Args:
distinct_id: 访客 ID
account_id: 账户 ID
properties: 集合
"""
self.__add(distinct_id=distinct_id, account_id=account_id, send_type='user_append', properties_add=properties)
def user_del(self, distinct_id=None, account_id=None):
"""删除用户
如果您要删除某个用户,可以调用 user_del 将该用户删除。调用此函数后,将无法再查询该用户的用户属性, 但该用户产生的事件仍然可以被查询到.
Args:
distinct_id: 访客 ID
account_id: 账户 ID
"""
self.__add(distinct_id=distinct_id, account_id=account_id, send_type='user_del')
def track(self, distinct_id=None, account_id=None, event_name=None, properties=None):
"""发送事件数据
您可以调用 track 来上传事件,建议您根据先前梳理的文档来设置事件的属性以及发送信息的条件. 事件的名称只能以字母开头可包含数字字母和下划线“_”
长度最大为 50 个字符,对字母大小写不敏感. 事件的属性是一个 dict 对象,其中每个元素代表一个属性.
Args:
distinct_id: 访客 ID
account_id: 账户 ID
event_name: 事件名称
properties: 事件属性
Raises:
TGAIllegalDataException: 数据格式错误时会抛出此异常
"""
all_properties = self._public_track_add(event_name)
if properties:
all_properties.update(properties)
self.__add(distinct_id=distinct_id, account_id=account_id, send_type='track', event_name=event_name,
properties_add=all_properties)
def track_update(self, distinct_id=None, account_id=None, event_name=None, event_id=None, properties=None):
"""发送可更新的事件数据
您可以调用 track_update 来上传可更新的事件,建议您根据先前梳理的文档来设置事件的属性以及发送信息的条件. 事件的名称只能以字母开头可包含数字字母和下划线“_”
长度最大为 50 个字符,对字母大小写不敏感. 事件的属性是一个 dict 对象,其中每个元素代表一个属性.
Args:
distinct_id: 访客 ID
account_id: 账户 ID
event_name: 事件名称
event_id: 事件唯一ID
properties: 事件属性
Raises:
TGAIllegalDataException: 数据格式错误时会抛出此异常
"""
all_properties = self._public_track_add(event_name)
if properties:
all_properties.update(properties)
self.__add(distinct_id=distinct_id, account_id=account_id, send_type='track_update', event_name=event_name,
event_id=event_id, properties_add=all_properties)
def track_overwrite(self, distinct_id=None, account_id=None, event_name=None, event_id=None, properties=None):
"""发送可覆盖的事件数据
您可以调用 track_overwrite 来上传可全部覆盖的事件,建议您根据先前梳理的文档来设置事件的属性以及发送信息的条件. 事件的名称只能以字母开头可包含数字字母和下划线“_”
长度最大为 50 个字符,对字母大小写不敏感. 事件的属性是一个 dict 对象,其中每个元素代表一个属性.
Args:
distinct_id: 访客 ID
account_id: 账户 ID
event_name: 事件名称
event_id: 事件唯一ID
properties: 事件属性
Raises:
TGAIllegalDataException: 数据格式错误时会抛出此异常
"""
all_properties = self._public_track_add(event_name)
if properties:
all_properties.update(properties)
self.__add(distinct_id=distinct_id, account_id=account_id, send_type='track_overwrite', event_name=event_name,
event_id=event_id, properties_add=all_properties)
def flush(self):
"""立即提交数据到相应的接收端
"""
self.__consumer.flush()
def close(self):
"""关闭并退出 sdk
请在退出前调用本接口,以避免缓存内的数据丢失
"""
self.__consumer.close()
def _public_track_add(self, event_name):
if not is_str(event_name):
raise TGAIllegalDataException('a string type event_name is required for track')
all_properties = {
'#lib': 'tga_python_sdk',
'#lib_version': __version__,
}
all_properties.update(self.__super_properties)
return all_properties
pass
def __add(self, distinct_id, account_id, send_type, event_name=None, event_id=None, properties_add=None):
if distinct_id is None and account_id is None:
raise TGAException("Distinct_id and account_id must be set at least one")
if properties_add:
properties = properties_add.copy()
else:
properties = {}
data = {
'#type': send_type
}
if "#ip" in properties.keys():
data['#ip'] = properties.get("#ip")
del (properties['#ip'])
if "#first_check_id" in properties.keys():
data['#first_check_id'] = properties.get("#first_check_id")
del (properties['#first_check_id'])
# 只支持UUID标准格式xxxxxxxx - xxxx - xxxx - xxxx - xxxxxxxxxxxx
if "#uuid" in properties.keys():
data['#uuid'] = str(properties['#uuid'])
del (properties['#uuid'])
elif self.__enableUuid:
data['#uuid'] = str(uuid.uuid1())
if "#app_id" in properties.keys():
data['#app_id'] = properties.get("#app_id")
del (properties['#app_id'])
self.__assert_properties(send_type, properties)
td_time = properties.get("#time")
data['#time'] = td_time
del (properties['#time'])
data['properties'] = properties
if event_name is not None:
data['#event_name'] = event_name
if event_id is not None:
data['#event_id'] = event_id
if distinct_id is not None:
data['#distinct_id'] = distinct_id
if account_id is not None:
data['#account_id'] = account_id
self.__consumer.add(json.dumps(data))
def __assert_properties(self, action_type, properties):
if properties is not None:
if "#time" not in properties.keys():
properties['#time'] = datetime.datetime.now()
else:
try:
time_temp = properties.get('#time')
if isinstance(time_temp, datetime.datetime) or isinstance(time_temp, datetime.date):
pass
else:
raise TGAIllegalDataException('Value of #time should be datetime.datetime or datetime.date')
except Exception as e:
raise TGAIllegalDataException(e)
for key, value in properties.items():
if not is_str(key):
raise TGAIllegalDataException("Property key must be a str. [key=%s]" % str(key))
if value is None:
continue
if not self.__NAME_PATTERN.match(key):
raise TGAIllegalDataException(
"type[%s] property key must be a valid variable name. [key=%s]" % (action_type, str(key)))
if not is_str(value) and not is_int(value) and not isinstance(value, float) \
and not isinstance(value, bool) \
and not isinstance(value, datetime.datetime) and not isinstance(value, datetime.date) \
and not isinstance(value, list):
raise TGAIllegalDataException(
"property value must be a str/int/float/bool/datetime/date/list. [value=%s]" % type(value))
if 'user_add' == action_type.lower() and not self.__number(value) and not key.startswith('#'):
raise TGAIllegalDataException('user_add properties must be number type')
if isinstance(value, datetime.datetime):
properties[key] = value.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
elif isinstance(value, datetime.date):
properties[key] = value.strftime('%Y-%m-%d')
if isinstance(value, list):
i = 0
for lvalue in value:
if isinstance(lvalue, datetime.datetime):
value[i] = lvalue.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
i += 1
def __number(self, s):
if is_int(s):
return True
if isinstance(s, float):
return True
return False
def clear_super_properties(self):
"""删除所有已设置的事件公共属性
"""
self.__super_properties = {
'#lib': 'tga_python_sdk',
'#lib_version': __version__,
}
def set_super_properties(self, super_properties):
"""设置公共事件属性
公共事件属性是所有事件中的属性属性,建议您在发送事件前,先设置公共事件属性. 当 track 的 properties 和
super properties 有相同的 key 时track 的 properties 会覆盖公共事件属性的值.
Args:
super_properties 公共属性
"""
self.__super_properties.update(super_properties)
if os.name == 'nt':
import msvcrt
def _lock(file_):
try:
save_pos = file_.tell()
file_.seek(0)
try:
msvcrt.locking(file_.fileno(), msvcrt.LK_LOCK, 1)
except IOError as e:
raise TGAException(e)
finally:
if save_pos:
file_.seek(save_pos)
except IOError as e:
raise TGAException(e)
def _unlock(file_):
try:
save_pos = file_.tell()
if save_pos:
file_.seek(0)
try:
msvcrt.locking(file_.fileno(), msvcrt.LK_UNLCK, 1)
except IOError as e:
raise TGAException(e)
finally:
if save_pos:
file_.seek(save_pos)
except IOError as e:
raise TGAException(e)
elif os.name == 'posix':
import fcntl
def _lock(file_):
try:
fcntl.flock(file_.fileno(), fcntl.LOCK_EX)
except IOError as e:
raise TGAException(e)
def _unlock(file_):
fcntl.flock(file_.fileno(), fcntl.LOCK_UN)
else:
raise TGAException("Python SDK is defined for NT and POSIX system.")
class _TAFileLock(object):
def __init__(self, file_handler):
self._file_handler = file_handler
def __enter__(self):
_lock(self._file_handler)
return self
def __exit__(self, t, v, tb):
_unlock(self._file_handler)
class LoggingConsumer(object):
"""数据批量实时写入本地文件
创建指定文件存放目录的 LoggingConsumer, 将数据使用 logging 库输出到指定路径. 同时,需将 LogBus 的监听文件夹地址
设置为此处的地址即可使用LogBus进行数据的监听上传.
"""
_mutex = queue.Queue()
_mutex.put(1)
class _FileWriter(object):
_writers = {}
_writeMutex = queue.Queue()
_writeMutex.put(1)
@classmethod
def instance(cls, filename):
cls._writeMutex.get(block=True, timeout=None)
try:
if filename in cls._writers.keys():
result = cls._writers[filename]
result._count = result._count + 1
else:
result = cls(filename)
cls._writers[filename] = result
return result
finally:
cls._writeMutex.put(1)
def __init__(self, filename):
self._filename = filename
self._file = open(self._filename, 'a')
self._count = 1
def close(self):
LoggingConsumer._FileWriter._writeMutex.get(block=True, timeout=None)
try:
self._count = self._count - 1
if self._count == 0:
self._file.close()
del LoggingConsumer._FileWriter._writers[self._filename]
finally:
LoggingConsumer._FileWriter._writeMutex.put(1)
def is_valid(self, filename):
return self._filename == filename
def write(self, messages):
with _TAFileLock(self._file):
for message in messages:
self._file.write(message)
self._file.write('\n')
self._file.flush()
@classmethod
def construct_filename(cls, directory, date_suffix, file_size, file_prefix):
filename = file_prefix + ".log." + date_suffix \
if file_prefix is not None else "log." + date_suffix
if file_size > 0:
count = 0
file_path = directory + filename + "_" + str(count)
while os.path.exists(file_path) and cls.file_size_out(file_path, file_size):
count = count + 1
file_path = directory + filename + "_" + str(count)
return file_path
else:
return directory + filename
@classmethod
def file_size_out(cls, file_path, file_size):
fsize = os.path.getsize(file_path)
fsize = fsize / float(1024 * 1024)
if fsize >= file_size:
return True
return False
@classmethod
def unlock_logging_consumer(cls):
cls._mutex.put(1)
@classmethod
def lock_logging_consumer(cls):
cls._mutex.get(block=True, timeout=None)
def __init__(self, log_directory, log_size=0, buffer_size=8192, rotate_mode=ROTATE_MODE.DAILY, file_prefix=None):
"""创建指定日志文件目录的 LoggingConsumer
Args:
log_directory: 日志保存目录
log_size: 单个日志文件的大小, 单位 MB, log_size <= 0 表示不限制单个文件大小
buffer_size: 每次写入文件的大小, 单位 Byte, 默认 8K
rotate_mode: 日志切分模式,默认按天切分
"""
if not os.path.exists(log_directory):
os.makedirs(log_directory)
self.log_directory = log_directory # log文件保存的目录
self.sdf = '%Y-%m-%d-%H' if rotate_mode == ROTATE_MODE.HOURLY else '%Y-%m-%d'
self.suffix = datetime.datetime.now().strftime(self.sdf)
self._fileSize = log_size # 单个log文件的大小
if not self.log_directory.endswith("/"):
self.log_directory = self.log_directory + "/"
self._buffer = []
self._buffer_size = buffer_size
self._file_prefix = file_prefix
self.lock_logging_consumer()
filename = LoggingConsumer.construct_filename(self.log_directory, self.suffix, self._fileSize,
self._file_prefix)
self._writer = LoggingConsumer._FileWriter.instance(filename)
self.unlock_logging_consumer()
def add(self, msg):
messages = None
self.lock_logging_consumer()
self._buffer.append(msg)
if len(self._buffer) > self._buffer_size:
messages = self._buffer
date_suffix = datetime.datetime.now().strftime(self.sdf)
if self.suffix != date_suffix:
self.suffix = date_suffix
filename = LoggingConsumer.construct_filename(self.log_directory, self.suffix, self._fileSize,
self._file_prefix)
if not self._writer.is_valid(filename):
self._writer.close()
self._writer = LoggingConsumer._FileWriter.instance(filename)
self._buffer = []
if messages:
self._writer.write(messages)
self.unlock_logging_consumer()
def flush_with_close(self, is_close):
messages = None
self.lock_logging_consumer()
if len(self._buffer) > 0:
messages = self._buffer
filename = LoggingConsumer.construct_filename(self.log_directory, self.suffix, self._fileSize,
self._file_prefix)
if not self._writer.is_valid(filename):
self._writer.close()
self._writer = LoggingConsumer._FileWriter.instance(filename)
self._buffer = []
if messages:
self._writer.write(messages)
if is_close:
self._writer.close()
self.unlock_logging_consumer()
def flush(self):
self.flush_with_close(False)
def close(self):
self.flush_with_close(True)
class BatchConsumer(object):
"""同步、批量地向 TA 服务器传输数据
通过指定接收端地址和 APP ID可以同步的向 TA 服务器传输数据. 此 Consumer 不需要搭配传输工具,
但是存在网络不稳定等原因造成数据丢失的可能,因此不建议在生产环境中使用.
触发上报的时机为以下条件满足其中之一的时候:
1. 数据条数大于预定义的最大值, 默认为 20 条
2. 数据发送间隔超过预定义的最大时间, 默认为 3 秒
"""
_batchlock = threading.RLock()
_cachelock = threading.RLock()
def __init__(self, server_uri, appid, batch=20, timeout=30000, interval=3, compress=True, maxCacheSize=50):
"""创建 BatchConsumer
Args:
server_uri: 服务器的 URL 地址
appid: 项目的 APP ID
batch: 指定触发上传的数据条数, 默认为 20 条, 最大 200 条
timeout: 请求的超时时间, 单位毫秒, 默认为 30000 ms
interval: 推送数据的最大时间间隔, 单位为秒, 默认为 3 秒
"""
self.__interval = interval
self.__batch = min(batch, 200)
self.__message_channel = []
self.__maxCacheSize = maxCacheSize
self.__cache_buffer = []
self.__last_flush = time.time()
server_url = urlparse(server_uri)
self.__http_service = _HttpServices(server_url._replace(path='/sync_server').geturl(), appid, timeout)
self.__http_service.compress = compress
def add(self, msg):
self._batchlock.acquire()
try:
self.__message_channel.append(msg)
finally:
self._batchlock.release()
if len(self.__message_channel) >= self.__batch \
or len(self.__cache_buffer) > 0:
self.flush_once()
def flush(self, throw_exception=True):
while len(self.__cache_buffer) > 0 or len(self.__message_channel) > 0:
try:
self.flush_once(throw_exception)
except TGAIllegalDataException:
continue
def flush_once(self, throw_exception=True):
if len(self.__message_channel) == 0 and len(self.__cache_buffer) == 0:
return
self._cachelock.acquire()
self._batchlock.acquire()
try:
try:
if len(self.__message_channel) == 0 and len(self.__cache_buffer) == 0:
return
if len(self.__cache_buffer) == 0 or len(self.__message_channel) >= self.__batch:
self.__cache_buffer.append(self.__message_channel)
self.__message_channel = []
finally:
self._batchlock.release()
msg = self.__cache_buffer[0]
self.__http_service.send('[' + ','.join(msg) + ']', str(len(msg)))
self.__last_flush = time.time()
self.__cache_buffer = self.__cache_buffer[1:]
except TGANetworkException as e:
if throw_exception:
raise e
except TGAIllegalDataException as e:
self.__cache_buffer = self.__cache_buffer[1:]
if throw_exception:
raise e
finally:
self._cachelock.release()
def close(self):
self.flush()
pass
class AsyncBatchConsumer(object):
"""异步、批量地向 TA 服务器发送数据的
AsyncBatchConsumer 使用独立的线程进行数据发送,当满足以下两个条件之一时触发数据上报:
1. 数据条数大于预定义的最大值, 默认为 20 条
2. 数据发送间隔超过预定义的最大时间, 默认为 3 秒
"""
def __init__(self, server_uri, appid, interval=3, flush_size=20, queue_size=100000):
"""创建 AsyncBatchConsumer
Args:
server_uri: 服务器的 URL 地址
appid: 项目的 APP ID
interval: 推送数据的最大时间间隔, 单位为秒, 默认为 3 秒
flush_size: 队列缓存的阈值,超过此值将立即进行发送
queue_size: 缓存队列的大小
"""
server_url = urlparse(server_uri)
self.__http_service = _HttpServices(server_url._replace(path='/sync_server').geturl(), appid, 30000)
self.__batch = flush_size
self.__queue = queue.Queue(queue_size)
# 初始化发送线程
self.__flushing_thread = self._AsyncFlushThread(self, interval)
self.__flushing_thread.daemon = True
self.__flushing_thread.start()
def add(self, msg):
try:
self.__queue.put_nowait(msg)
except queue.Full as e:
raise TGANetworkException(e)
if self.__queue.qsize() > self.__batch:
self.flush()
def flush(self):
self.__flushing_thread.flush()
def close(self):
self.__flushing_thread.stop()
while not self.__queue.empty():
self._perform_request()
def _perform_request(self):
"""同步的发送数据
仅用于内部调用, 用户不应当调用此方法.
"""
flush_buffer = []
while len(flush_buffer) < self.__batch:
try:
flush_buffer.append(str(self.__queue.get_nowait()))
except queue.Empty:
break
if len(flush_buffer) > 0:
for i in range(3): # 网络异常情况下重试 3 次
try:
self.__http_service.send('[' + ','.join(flush_buffer) + ']', str(len(flush_buffer)))
return True
except TGANetworkException:
pass
except TGAIllegalDataException:
break
class _AsyncFlushThread(threading.Thread):
def __init__(self, consumer, interval):
threading.Thread.__init__(self)
self._consumer = consumer
self._interval = interval
self._stop_event = threading.Event()
self._finished_event = threading.Event()
self._flush_event = threading.Event()
def flush(self):
self._flush_event.set()
def stop(self):
"""停止线程
退出时需调用此方法,以保证线程安全结束.
"""
self._stop_event.set()
self._finished_event.wait()
def run(self):
while True:
# 如果 _flush_event 标志位为 True或者等待超过 _interval 则继续执行
self._flush_event.wait(self._interval)
self._consumer._perform_request()
self._flush_event.clear()
# 发现 stop 标志位时安全退出
if self._stop_event.isSet():
break
self._finished_event.set()
def _gzip_string(data):
try:
return gzip.compress(data)
except AttributeError:
import StringIO
buf = StringIO.StringIO()
fd = gzip.GzipFile(fileobj=buf, mode="w")
fd.write(data)
fd.close()
return buf.getvalue()
class _HttpServices(object):
"""内部类,用于发送网络请求
指定接收端地址和项目 APP ID, 实现向接收端上传数据的接口. 发送前将数据默认使用 Gzip 压缩,
"""
def __init__(self, server_uri, appid, timeout=30000):
self.url = server_uri
self.appid = appid
self.timeout = timeout
self.compress = True
def send(self, data, length):
"""使用 Requests 发送数据给服务器
Args:
data: 待发送的数据
length
Raises:
TGAIllegalDataException: 数据错误
TGANetworkException: 网络错误
"""
headers = {'appid': self.appid, 'TA-Integration-Type': 'python-sdk', 'TA-Integration-Version': __version__,
'TA-Integration-Count': length}
try:
compress_type = 'gzip'
if self.compress:
data = _gzip_string(data.encode("utf-8"))
else:
compress_type = 'none'
data = data.encode("utf-8")
headers['compress'] = compress_type
response = requests.post(self.url, data=data, headers=headers, timeout=self.timeout)
if response.status_code == 200:
responseData = json.loads(response.text)
if responseData["code"] == 0:
return True
else:
raise TGAIllegalDataException("Unexpected result code: " + str(responseData["code"]))
else:
raise TGANetworkException("Unexpected Http status code " + str(response.status_code))
except ConnectionError as e:
time.sleep(0.5)
raise TGANetworkException("Data transmission failed due to " + repr(e))
class DebugConsumer(object):
"""逐条、同步的发送数据给接收服务器
服务端会对数据进行严格校验,当某个属性不符合规范时,整条数据都不会入库. 当数据格式错误时抛出包含详细原因的异常信息.
建议首先使用此 Consumer 来调试埋点数据.
"""
def __init__(self, server_uri, appid, timeout=30000, write_data=True):
"""创建 DebugConsumer
Args:
server_uri: 服务器的 URL 地址
appid: 项目的 APP ID
timeout: 请求的超时时间, 单位毫秒, 默认为 30000 ms
"""
server_url = urlparse(server_uri)
debug_url = server_url._replace(path='/data_debug')
self.__server_uri = debug_url.geturl()
self.__appid = appid
self.__timeout = timeout
self.__writer_data = write_data
def add(self, msg):
try:
dry_run = 0
if not self.__writer_data:
dry_run = 1
response = requests.post(self.__server_uri,
data={'source': 'server', 'appid': self.__appid, 'data': msg, 'dryRun': dry_run},
timeout=self.__timeout)
if response.status_code == 200:
responseData = json.loads(response.text)
if responseData["errorLevel"] == 0:
return True
else:
print("Unexpected result : \n %s" % response.text)
else:
raise TGANetworkException("Unexpected http status code: " + str(response.status_code))
except ConnectionError as e:
time.sleep(0.5)
raise TGANetworkException("Data transmission failed due to " + repr(e))
def flush(self, throw_exception=True):
pass
def close(self):
pass
class ToKafka(object):
"""
将数据发送到kafka
注意 减少不必要的查询 分区固定设置16个
"""
def __init__(self, conf):
self.__topic_name = None
self.__producer = KafkaProducer(**conf)
@property
def topic_name(self):
return self.__topic_name
@topic_name.setter
def topic_name(self, topic_name):
self.__topic_name = topic_name
# self.__producer.partitions_for(topic_name)
def add(self, msg):
try:
self.__producer.send(self.__topic_name, msg, partition=random.randint(0, 15))
except Exception as e:
print(e)
def flush(self, throw_exception=True):
pass
def close(self):
pass