pip install protobuf
# 编写格式文件 toutiao_dmp.proto
syntax = "proto2";
package toutiao.dmp;
option java_outer_classname = "DmpDataProto";
message DmpData {
repeated IdItem idList = 1;
}
message IdItem {
optional uint32 timestamp = 1;
required DataType dataType = 2;
required string id = 3;
repeated string tags = 4;
enum DataType {
IMEI = 0;
IDFA = 1;
UID = 2;
IMEI_MD5 = 4;
IDFA_MD5 = 5;
MOBILE_HASH_SHA256 = 6;
OAID = 7;
OAID_MD5 = 8;
}
}
#本地安装 protoc 详细安装自行百度
#执行命令生成包文件
/usr/local/protoc4/bin/protoc --python_out=. toutiao_dmp.proto
#python 生成protobuf文件示例代码
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import time
import base64
import toutiao_dmp_pb2
import sys
import zipfile
import os
# 转化生成pb2格式文件
def generate_valid_file(file_path, pb2_file_name, device_type):
dmp_data = toutiao_dmp_pb2.DmpData()
target_file = open(pb2_file_name,'wb')
index = 1
for line in open(file_path,"r"):
device_id = line.strip('\n')
id_item = dmp_data.idList.add()
if device_type == 'IMEI':
id_item.dataType = toutiao_dmp_pb2.IdItem.IMEI
else:
id_item.dataType = toutiao_dmp_pb2.IdItem.IDFA
id_item.id = device_id
if index % 100 == 0:#100条一行
binary_string = dmp_data.SerializeToString()
result_string = base64.b64encode(binary_string)
target_file.write(result_string)
target_file.write('\n'.encode('utf-8'))
dmp_data = toutiao_dmp_pb2.DmpData()
index = index + 1
binary_string = dmp_data.SerializeToString()
result_string = base64.b64encode(binary_string)
target_file.write(result_string)
target_file.write('\n'.encode('utf-8'))
target_file.close()
print("pb2 file done:" + pb2_file_name)
#打包成zip
f = zipfile.ZipFile(pb2_file_name + ".zip", 'w', zipfile.ZIP_DEFLATED)
path_list = pb2_file_name.split("/")
f.write(pb2_file_name, path_list[-1])
f.close()
print("zip file done:" + pb2_file_name + ".zip")
if __name__ == '__main__':
if len(sys.argv) < 4:
print("参数不全!");
exit()
txt_file = sys.argv[1]
pb2_file = sys.argv[2]
device_type = sys.argv[3]
generate_valid_file(txt_file, pb2_file, device_type)