0%

YoloV4模型部署

针对yolov4的不同应用场景,这里提供几种部署方式。

  • opencv
  • onnx
  • TensorRT
  • triton server

1. opencv

针对原始的darknet模型,可以直接使用opencv加载,opencv针对yolo做了一定的硬件加速。加载代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import numpy as np
import cv2
import time
import os
import requests
from datetime import datetime


class ObjectRecognition(object):
def __init__(self,
label_path,
config_path,
weights_path,
upload_url,
model_name='fire',
confidence_thre=0.5,
nms_thre=0.3,
alert_label=None):
self.alert_label = alert_label if alert_label else []
self.confidence_thre = confidence_thre
self.nms_thre = nms_thre
self.upload_url = upload_url.rstrip('/') + "/"
self.model_name = model_name
self.net = self.load_model(config_path, weights_path)

# 加载类别标签文件
self.LABELS = open(label_path).read().strip().split("\n")
self.nclass = len(self.LABELS)
# 获取YOLO输出层的名字
ln = self.net.getLayerNames()
self.ln = [ln[i[0] - 1] for i in self.net.getUnconnectedOutLayers()]

def upload_image(self, img):
file_name = "recognized_{}.jpg".format(int(time.time()))
img_dir = "/".join([self.model_name, datetime.now().strftime("%Y%m")])
data = cv2.imencode(".jpg", img)[1].tobytes()
files = [
('file', (file_name, data, 'image/png'))
]

result = requests.post(self.upload_url + img_dir, files=files)
if result.status_code == 200:
path = img_dir + "/" + file_name
else:
path = ""
return path

def load_model(self, config_path, weights_path):
# 加载模型配置和权重文件
net = cv2.dnn.readNetFromDarknet(config_path, weights_path)
return net

def detect(self, img):
s = time.time()
# 为每个类别的边界框随机匹配相应颜色
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(self.nclass, 3), dtype='uint8')
# 载入图片并获取其维度
(H, W) = img.shape[:2]

# 将图片构建成一个blob,设置图片尺寸,然后执行一次
# YOLO前馈网络计算,最终获取边界框和相应概率 216
blob = cv2.dnn.blobFromImage(img, 1 / 255.0, (416, 416), swapRB=True, crop=False)

self.net.setInput(blob)

layerOutputs = self.net.forward(self.ln)

end = time.time()
# 初始化边界框,置信度(概率)以及类别
boxes = []
confidences = []
classIDs = []
# 迭代每个输出层,总共三个
for output in layerOutputs:
# 迭代每个检测
for detection in output:
# 提取类别ID和置信度
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
# 只保留置信度大于某值的边界框
if confidence > self.confidence_thre:
# 将边界框的坐标还原至与原图片相匹配,记住YOLO返回的是
# 边界框的中心坐标以及边界框的宽度和高度
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height) = box.astype("int")
# 计算边界框的左上角位置
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
# 更新边界框,置信度(概率)以及类别
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
classIDs.append(classID)

# 使用非极大值抑制方法抑制弱、重叠边界框
idxs = cv2.dnn.NMSBoxes(boxes, confidences, self.confidence_thre, self.nms_thre)
loc = []
flag = False
# 确保至少一个边界框
if len(idxs) > 0:

# 迭代每个边界框
for i in idxs.flatten():

if classIDs[i] in self.alert_label:
flag = True

# 提取边界框的坐标
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
# 绘制边界框以及在左上角添加类别标签和置信度
color = [int(c) for c in COLORS[classIDs[i]]]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
text = '{}: {:.3f}'.format(self.LABELS[classIDs[i]], confidences[i])
(text_w, text_h), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
cv2.rectangle(img, (x, y - text_h - baseline), (x + text_w, y), color, -1)
cv2.putText(img, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
loc.append([x, y, w, h])
m = time.time()

if flag:
# 上传至云端文件服务器
img_path = self.upload_image(img)
else:
img_path = ""
e = time.time()
print("检测时间", m-s, '上传时间', e-m, "整体时间", e-s)
return img_path, flag
else:
return "", flag

2. onnx

针对有GPU的环境,可以统一将darknet转化为onnx格式,可使用转化工具:

对于转化之后的onnx文件,可尝试用如下方式加载预测:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import numpy as np
import cv2
import time
import onnxruntime
import requests
from datetime import datetime
import math
import urllib


class ObjectRecognition(object):
def __init__(self,
label_path,
onnx_path,
upload_url='http://192.168.0.15:8080/ai/upload/',
model_name='fire',
confidence_thre=0.5,
nms_thre=0.3,
alert_label=None):
self.alert_label = alert_label if alert_label else []

# 加载类别标签文件
self.class_names = open(label_path).read().strip().split("\n")

self.confidence_thre = confidence_thre
self.nms_thre = nms_thre
self.upload_url = upload_url.rstrip('/') + "/"
self.model_name = model_name
self.session = onnxruntime.InferenceSession(onnx_path)

def upload_image(self, img):
file_name = "recognized_{}.jpg".format(int(time.time()))
img_dir = "/".join([self.model_name, datetime.now().strftime("%Y%m")])
data = cv2.imencode(".jpg", img)[1].tobytes()
files = [
('file', (file_name, data, 'image/png'))
]

result = requests.post(self.upload_url + img_dir, files=files)
if result.status_code == 200:
path = img_dir + "/" + file_name
else:
path = ""
return path

def detect(self, img):
IN_IMAGE_H = self.session.get_inputs()[0].shape[2]
IN_IMAGE_W = self.session.get_inputs()[0].shape[3]

# Input
resized = cv2.resize(img, (IN_IMAGE_W, IN_IMAGE_H), interpolation=cv2.INTER_LINEAR)
img_in = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
img_in = np.transpose(img_in, (2, 0, 1)).astype(np.float32)
img_in = np.expand_dims(img_in, axis=0)
img_in /= 255.0
print("Shape of the network input: ", img_in.shape)

# Compute
input_name = self.session.get_inputs()[0].name

outputs = self.session.run(None, {input_name: img_in})

boxes = self.post_processing(0.4, 0.6, outputs)

img_path, flag = self.plot_boxes(img, boxes[0])
return img_path, flag

def post_processing(self, conf_thresh, nms_thresh, output):
# [batch, num, 1, 4]
box_array = output[0]
# [batch, num, num_classes]
confs = output[1]

t1 = time.time()

if type(box_array).__name__ != 'ndarray':
box_array = box_array.cpu().detach().numpy()
confs = confs.cpu().detach().numpy()

num_classes = confs.shape[2]

# [batch, num, 4]
box_array = box_array[:, :, 0]

# [batch, num, num_classes] --> [batch, num]
max_conf = np.max(confs, axis=2)
max_id = np.argmax(confs, axis=2)

t2 = time.time()

bboxes_batch = []
for i in range(box_array.shape[0]):

argwhere = max_conf[i] > conf_thresh
l_box_array = box_array[i, argwhere, :]
l_max_conf = max_conf[i, argwhere]
l_max_id = max_id[i, argwhere]

bboxes = []
# nms for each class
for j in range(num_classes):

cls_argwhere = l_max_id == j
ll_box_array = l_box_array[cls_argwhere, :]
ll_max_conf = l_max_conf[cls_argwhere]
ll_max_id = l_max_id[cls_argwhere]

keep = self.nms_cpu(ll_box_array, ll_max_conf, nms_thresh)

if (keep.size > 0):
ll_box_array = ll_box_array[keep, :]
ll_max_conf = ll_max_conf[keep]
ll_max_id = ll_max_id[keep]

for k in range(ll_box_array.shape[0]):
bboxes.append(
[ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], ll_box_array[k, 3],
ll_max_conf[k],
ll_max_conf[k], ll_max_id[k]])

bboxes_batch.append(bboxes)

t3 = time.time()
return bboxes_batch

def nms_cpu(self, boxes, confs, nms_thresh=0.5, min_mode=False):
# print(boxes.shape)
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]

areas = (x2 - x1) * (y2 - y1)
order = confs.argsort()[::-1]

keep = []
while order.size > 0:
idx_self = order[0]
idx_other = order[1:]

keep.append(idx_self)

xx1 = np.maximum(x1[idx_self], x1[idx_other])
yy1 = np.maximum(y1[idx_self], y1[idx_other])
xx2 = np.minimum(x2[idx_self], x2[idx_other])
yy2 = np.minimum(y2[idx_self], y2[idx_other])

w = np.maximum(0.0, xx2 - xx1)
h = np.maximum(0.0, yy2 - yy1)
inter = w * h

if min_mode:
over = inter / np.minimum(areas[order[0]], areas[order[1:]])
else:
over = inter / (areas[order[0]] + areas[order[1:]] - inter)

inds = np.where(over <= nms_thresh)[0]
order = order[inds + 1]

return np.array(keep)

def plot_boxes(self, img, boxes):
COLORS = np.random.randint(0, 255, size=(len(self.class_names), 3), dtype='uint8')

width = img.shape[1]
height = img.shape[0]
flag = False
for i in range(len(boxes)):

box = boxes[i]
x1 = int(box[0] * width)
y1 = int(box[1] * height)
x2 = int(box[2] * width)
y2 = int(box[3] * height)

if len(box) >= 7 and self.class_names:
cls_conf = box[5]
cls_id = box[6]

if cls_id in self.alert_label:
flag = True

color = [int(c) for c in COLORS[cls_id]]
text = '{}: {:.3f}'.format(self.class_names[cls_id], cls_conf)
img = cv2.putText(img, text, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
img = cv2.rectangle(img, (x1, y1), (x2, y2), color, 1)

# cv2.imwrite('predictions_onnx.jpg', img)
if flag:
# 上传至云端文件服务器
img_path = self.upload_image(img)
else:
img_path = ""
return img_path, flag

3. triton server

具体的triton部署方式参考:NVIDIA triton 服务部署与测试

4. tensorRt

可以利用TensorRT自带的转换工具,轻松将onnx模型转为TensorRT模型。

1
/usr/src/tensorrt/bin/trtexec --onnx=/sdk/sunshine/trained_model/yolov4/onnx/helmet/yolov4_helmet_1_3_608_608_static.onnx --explicitBatch --saveEngine=yolov4_helmet_1_3_608_608_static.trt

如果想要转化为半精度,在后面添加参数--fp16即可

加载预测TensorRT模型,可使用如下方式:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import requests
import time
from datetime import datetime
import cv2
import urllib
import numpy as np

TRT_LOGGER = trt.Logger()


class HostDeviceMem(object):
def __init__(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem

def __str__(self):
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

def __repr__(self):
return self.__str__()


class TRTModelPredict:
def __init__(self, engine_path, shape=(608, 608)):
shape = (1, 3, shape[0], shape[1])
self.engine = self.get_engine(engine_path)
self.context = self.engine.create_execution_context()

self.buffers = self.allocate_buffers(self.engine, 1)
self.context.set_binding_shape(0, shape)

def allocate_buffers(self, engine, batch_size):
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for binding in engine:

size = trt.volume(engine.get_binding_shape(binding)) * batch_size
dims = engine.get_binding_shape(binding)

# in case batch dimension is -1 (dynamic)
if dims[0] < 0:
size *= -1

dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream

def get_engine(self, engine_path):
# If a serialized engine exists, use it instead of building an engine.
print("Reading engine from file {}".format(engine_path))
with open(engine_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
return runtime.deserialize_cuda_engine(f.read())

def do_inference(self, img_in):

inputs, outputs, bindings, stream = self.buffers
inputs[0].host = img_in
for i in range(2):
# Transfer input data to the GPU.
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
# Run inference.
self.context.execute_async(bindings=bindings, stream_handle=stream.handle)
# Transfer predictions back from the GPU.
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
# Synchronize the stream
stream.synchronize()
# Return only the host outputs.
return [out.host for out in outputs]


class Hat(TRTModelPredict):
def __init__(self, label_path, engine_path, shape=(608, 608), upload_url='http://192.168.0.15:8080/ai/upload/',
model_name='fire', alert_label=None):
self.shape = shape
self.alert_label = alert_label if alert_label else []
self.class_names = open(label_path).read().strip().split("\n")
self.model_name = model_name
self.upload_url = upload_url.rstrip('/') + "/"
super(Hat, self).__init__(engine_path, shape)

def pre_process(self, image_src):
resized = cv2.resize(image_src, self.shape, interpolation=cv2.INTER_LINEAR)
img_in = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
img_in = np.transpose(img_in, (2, 0, 1)).astype(np.float32)
img_in = np.expand_dims(img_in, axis=0)
img_in /= 255.0
img_in = np.ascontiguousarray(img_in)
return img_in

def post_processing(self, conf_thresh, nms_thresh, output):
box_array = output[0]
# [batch, num, num_classes]
confs = output[1]

if type(box_array).__name__ != 'ndarray':
box_array = box_array.cpu().detach().numpy()
confs = confs.cpu().detach().numpy()

num_classes = confs.shape[2]

# [batch, num, 4]
box_array = box_array[:, :, 0]

# [batch, num, num_classes] --> [batch, num]
max_conf = np.max(confs, axis=2)
max_id = np.argmax(confs, axis=2)

bboxes_batch = []
for i in range(box_array.shape[0]):

argwhere = max_conf[i] > conf_thresh
l_box_array = box_array[i, argwhere, :]
l_max_conf = max_conf[i, argwhere]
l_max_id = max_id[i, argwhere]

bboxes = []
# nms for each class
for j in range(num_classes):

cls_argwhere = l_max_id == j
ll_box_array = l_box_array[cls_argwhere, :]
ll_max_conf = l_max_conf[cls_argwhere]
ll_max_id = l_max_id[cls_argwhere]

keep = self.nms_cpu(ll_box_array, ll_max_conf, nms_thresh)

if (keep.size > 0):
ll_box_array = ll_box_array[keep, :]
ll_max_conf = ll_max_conf[keep]
ll_max_id = ll_max_id[keep]

for k in range(ll_box_array.shape[0]):
bboxes.append(
[ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], ll_box_array[k, 3],
ll_max_conf[k],
ll_max_conf[k], ll_max_id[k]])

bboxes_batch.append(bboxes)

return bboxes_batch

def nms_cpu(self, boxes, confs, nms_thresh=0.5, min_mode=False):
# print(boxes.shape)
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]

areas = (x2 - x1) * (y2 - y1)
order = confs.argsort()[::-1]

keep = []
while order.size > 0:
idx_self = order[0]
idx_other = order[1:]

keep.append(idx_self)

xx1 = np.maximum(x1[idx_self], x1[idx_other])
yy1 = np.maximum(y1[idx_self], y1[idx_other])
xx2 = np.minimum(x2[idx_self], x2[idx_other])
yy2 = np.minimum(y2[idx_self], y2[idx_other])

w = np.maximum(0.0, xx2 - xx1)
h = np.maximum(0.0, yy2 - yy1)
inter = w * h

if min_mode:
over = inter / np.minimum(areas[order[0]], areas[order[1:]])
else:
over = inter / (areas[order[0]] + areas[order[1:]] - inter)

inds = np.where(over <= nms_thresh)[0]
order = order[inds + 1]

return np.array(keep)

def detect(self, img):
img_in = self.pre_process(img)
trt_outputs = self.do_inference(img_in)
trt_outputs[0] = trt_outputs[0].reshape(1, -1, 1, 4)
trt_outputs[1] = trt_outputs[1].reshape(1, -1, len(self.class_names))

boxes = self.post_processing(0.4, 0.6, trt_outputs)
return self.plot_boxes(img, boxes[0])


def plot_boxes(self, img, boxes):
COLORS = np.random.randint(0, 255, size=(len(self.class_names), 3), dtype='uint8')

width = img.shape[1]
height = img.shape[0]
flag = False
for i in range(len(boxes)):

box = boxes[i]
x1 = int(box[0] * width)
y1 = int(box[1] * height)
x2 = int(box[2] * width)
y2 = int(box[3] * height)

if len(box) >= 7 and self.class_names:
cls_conf = box[5]
cls_id = box[6]

if cls_id in self.alert_label:
flag = True

color = [int(c) for c in COLORS[cls_id]]
text = '{}: {:.3f}'.format(self.class_names[cls_id], cls_conf)
img = cv2.putText(img, text, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
img = cv2.rectangle(img, (x1, y1), (x2, y2), color, 1)

# cv2.imwrite('predictions_onnx.jpg', img)
if flag:
# 上传至云端文件服务器
img_path = self.upload_image(img)
else:
img_path = ""
return img_path, flag

def upload_image(self, img):
file_name = "recognized_{}.jpg".format(int(time.time()))
img_dir = "/".join([self.model_name, datetime.now().strftime("%Y%m")])
data = cv2.imencode(".jpg", img)[1].tobytes()
files = [
('file', (file_name, data, 'image/png'))
]

result = requests.post(self.upload_url + img_dir, files=files)
if result.status_code == 200:
path = img_dir + "/" + file_name
else:
path = ""
return path

性能对比

使用安全帽模型,测试对比三种预测方式的情况,性能如下。

模型格式 预测时间 预测置信度 备注
opencv 0.3789951801300049 0.533
onnx 0.44134068489074707 0.425
TensorRT 0.12151765823364258 0.423

opencv的速度居然比onnx还快,离谱。仔细检查后发现,jetson上面装的是onnxruntime而不是onnxruntime-gpu,初步怀疑是并没有用上GPU加速。

服务器环境:

NVIDIA Jetson AGX Xavier - Jetpack 4.5.1 [L4T 32.5.1]

  • torch ==1.9.0
  • onnx == 1.10.2
  • pycuda==2020.1
  • cv2==4.1.1
  • tensorrt=7.1.3.0
  • python 3.6.9

1. pytorch2onnx

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import torch
from torchvision import models


def transform_to_onnx(model, shape, onnx_file_name, input_names=["input"], output_names=['boxes', 'confs']):
batch_size = shape[0]
dynamic = False
if batch_size <= 0:
dynamic = True
if dynamic:
x = torch.randn(shape, requires_grad=True)
dynamic_axes = {name: {0: "batch_size"} for name in input_names + output_names}
# Export the model
print('Export the onnx model ...')
torch.onnx.export(model,
x,
onnx_file_name,
export_params=True,
input_names=input_names, output_names=output_names,
dynamic_axes=dynamic_axes
)
print('Onnx model exporting done')
return onnx_file_name

else:
x = torch.randn(shape, requires_grad=True)
# Export the model
print('Export the onnx model ...')
torch.onnx.export(model,
x,
onnx_file_name,
export_params=True,
do_constant_folding=True,
input_names=input_names, output_names=output_names,
dynamic_axes=None)

print('Onnx model exporting done')

利用resnet模型 测试一下

1
2
3
4
5
6
7
model = models.resnet50(pretrained=True)
transform_to_onnx(model,
shape=(1, 3, 224, 224),
onnx_file_name="resnet50.onnx",
input_names=["input"],
output_names=["output"]
)

在当前目前成功生成resnet50.onnx,转化正确。

2. 可视化onnx

可以利用netron工具可视化onnx模型。

1
python -m pip install netron
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
netron -h
usage: netron [-h] [-v] [-b] [-p PORT] [--host HOST] [--log] [MODEL_FILE]

Viewer for neural network, deep learning and machine learning models.

positional arguments:
MODEL_FILE model file to serve

optional arguments:
-h, --help show this help message and exit
-v, --version print version
-b, --browse launch web browser
-p PORT, --port PORT port to serve
--host HOST host to serve
--log log details to console

启动netron

1
nerton --host 0.0.0.0 --port 8080

选择onnx,可以看到具体的模型参数。

image-20211209153618873

2. onnx2trt

利用tensorrt自带的工具,可以轻松完成onnx到trt的转换。

1
/usr/src/bin/trtexec --onnx=resnet50.onnx --explicitBatch --saveEngine=resnet50.trt --fp16

也可以将该命令添加到环境变量中。

1
ln -s /usr/src/bin/trtexec /usr/bin/trtexec

3. 测试

构建一个通用的TensorRT模型加载工具,如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# author: sunshine
# datetime:2021/12/9 下午2:39

import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit # 重要


TRT_LOGGER = trt.Logger()


class HostDeviceMem(object):
def __init__(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem

def __str__(self):
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

def __repr__(self):
return self.__str__()


class TRTModelPredict:
def __init__(self, engine_path, shape=(608, 608)):
shape = (1, 3, shape[0], shape[1])
self.engine = self.get_engine(engine_path)
self.context = self.engine.create_execution_context()

self.buffers = self.allocate_buffers(self.engine, 1)
self.context.set_binding_shape(0, shape)

def allocate_buffers(self, engine, batch_size):
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for binding in engine:

size = trt.volume(engine.get_binding_shape(binding)) * batch_size
dims = engine.get_binding_shape(binding)

# in case batch dimension is -1 (dynamic)
if dims[0] < 0:
size *= -1

dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream

def get_engine(self, engine_path):
# If a serialized engine exists, use it instead of building an engine.
print("Reading engine from file {}".format(engine_path))
with open(engine_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
return runtime.deserialize_cuda_engine(f.read())

def do_inference(self, img_in):

inputs, outputs, bindings, stream = self.buffers
inputs[0].host = img_in
for i in range(2):
# Transfer input data to the GPU.
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
# Run inference.
self.context.execute_async(bindings=bindings, stream_handle=stream.handle)
# Transfer predictions back from the GPU.
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
# Synchronize the stream
stream.synchronize()
# Return only the host outputs.
return [out.host for out in outputs]

以安全帽识别为例,继承 TRTModelPredict​类,并实现数据前后处理,即可完成模型预测。

结果如下:

image-20211209174956684

详细代码:https://github.com/fushengwuyu/torch2tensorrt_demos

NeMo 学习笔记

1. 注意要点

  1. All arguments must be passed by kwargs only for typed methods

    使用nemo,所有的函数调用,必须指定参数名,如下:

    1
    2
    f1(name='zhangsan', age=12)  # 正确
    f1('zhangsan'12) # 报错,入错小标题
  2. nlp模块不可用

    不解释,python3.6.9升级到python3.8就可以了

  3. 导出onnx模型

    1
    model.export('xx.onnx', onnx_opset_version=12)  # 默认13,不支持

1. triton server

  1. 安装triton服务

    • x86

      1. 拉去镜像

        1
        docker pull nvcr.io/nvidia/tritonserver:21.07-py3
      2. 运行

        1
        docker run -it --rm -p8000:8000 -p8001:8001 -p8002:8002 -v /home/sunshine/infer/:/models nvcr.io/nvidia/tritonserver:21.07-py3 tritonserver --model-repository=/models --strict-model-config=false
    • jetson

      nvidia暂未发布支持jetson的triton容器,所以只能源码安装。

      参考地址:https://github.com/triton-inference-server/server/blob/main/docs/jetson.md

      1. 按照jetpack版本下载相应的release版本包并解压至: /opt/tritonserver

        下载地址:https://github.com/triton-inference-server/server/releases

        备注:如下解压到其他目录,启动会报错动态库文件找不到,当然也可以把所需的动态库文件添加到搜索空间,但解压到/opt/tritonserver最为简单

      2. 安装依赖包

        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        apt-get update && \
        apt-get install -y --no-install-recommends \
        software-properties-common \
        autoconf \
        automake \
        build-essential \
        cmake \
        git \
        libb64-dev \
        libre2-dev \
        libssl-dev \
        libtool \
        libboost-dev \
        libcurl4-openssl-dev \
        libopenblas-dev \
        rapidjson-dev \
        patchelf \
        zlib1g-dev
      3. 启动服务

        1
        bin/tritonserver --model-repository=/sdk/python/triton/models
  2. 创建模型仓库库

    模型仓库布局如下:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    <model-repository-path>/
    <model-name>/
    [config.pbtxt] # 非必须,若模型为 TensorRT, TensorFlow saved-model, ONNX 则可以不配置,其他模型必须配置
    [<output-labels-file> ...]
    <version>/ # 1, 2,3...
    <model-definition-file> # model.onnx, model.pb,...
    <version>/
    <model-definition-file>
    ...
    <model-name>/
    [config.pbtxt]
    [<output-labels-file> ...]
    <version>/
    <model-definition-file>
    <version>/
    <model-definition-file>
    ...
    ...

    更多模型配置见官网: https://github.com/triton-inference-server/server/blob/main/docs/model_configuration.md#model-configuration

    以文本分类为例,模型仓库布局如下:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    [root@bg1 sunshine]# tree infer/
    infer/
    ├── densenet_onnx # 模型名
    │   ├── 1 # 版本
    │   │   └── model.onnx # 模型文件,默认为model.xxx
    │   ├── config.pbtxt # 配置文件
    │   └── densenet_labels.txt # 分类类别,根据配置文件选择是否添加
    └── text_class
    ├── 1
    │   └── model.onnx
    └── config.pbtxt

    4 directories, 5 files
  3. 添加配置文件

    配置文件默认为模型目录下的config.pbtxt,其格式不同于json,要严格按照官网给的格式来配置,否则加载报错

    同样以文本分类为例:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    platform: "onnxruntime_onnx"
    max_batch_size : 0
    input [
    {
    name: "token_type_ids"
    data_type: TYPE_INT64
    format: FORMAT_NONE
    dims: [-1,-1]
    },
    {
    name: "attention_mask"
    data_type: TYPE_INT64
    format: FORMAT_NONE
    dims: [-1,-1]
    },
    {
    name: "input_ids"
    data_type: TYPE_INT64
    format: FORMAT_NONE
    dims: [-1,-1]
    }
    ]
    output: [
    {
    name: "logits"
    data_type: TYPE_FP32
    dims: [-1,2]
    label_filename: ""
    }
    ]

    若模型为TensorRT, TensorFlow saved-model, ONNX,则可以不配置config.pbtxt配置文件,在启动服务的时候指定参数$–strict-model-config=false$ 即可。

    使用如下调用获取配置文件:

    1
    curl 192.168.0.15:8000/v2/models/text_class/config
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    {
    "name": "text_class",
    "platform": "onnxruntime_onnx",
    "backend": "onnxruntime",
    "version_policy": {
    "latest": {
    "num_versions": 1
    }
    },
    "max_batch_size": 1,
    "input": [
    {
    "name": "token_type_ids",
    "data_type": "TYPE_INT64",
    "format": "FORMAT_NONE",
    "dims": [
    -1
    ],
    "is_shape_tensor": false,
    "allow_ragged_batch": false
    },
    {
    "name": "attention_mask",
    "data_type": "TYPE_INT64",
    "format": "FORMAT_NONE",
    "dims": [
    -1
    ],
    "is_shape_tensor": false,
    "allow_ragged_batch": false
    },
    {
    "name": "input_ids",
    "data_type": "TYPE_INT64",
    "format": "FORMAT_NONE",
    "dims": [
    -1
    ],
    "is_shape_tensor": false,
    "allow_ragged_batch": false
    }
    ],
    "output": [
    {
    "name": "logits",
    "data_type": "TYPE_FP32",
    "dims": [
    2
    ],
    "label_filename": "",
    "is_shape_tensor": false
    }
    ],
    "batch_input": [],
    "batch_output": [],
    "optimization": {
    "priority": "PRIORITY_DEFAULT",
    "input_pinned_memory": {
    "enable": true
    },
    "output_pinned_memory": {
    "enable": true
    },
    "gather_kernel_buffer_threshold": 0,
    "eager_batching": false
    },
    "instance_group": [
    {
    "name": "text_class",
    "kind": "KIND_CPU",
    "count": 1,
    "gpus": [],
    "secondary_devices": [],
    "profile": [],
    "passive": false,
    "host_policy": ""
    }
    ],
    "default_model_filename": "model.onnx",
    "cc_model_filenames": {},
    "metric_tags": {},
    "parameters": {},
    "model_warmup": []
    }
  4. 服务启动

    • 命令启动

      1
      tritonserver --model-repository=/models --strict-model-config=false
    • docker容器启动

      1
      docker run -it --rm -p8000:8000 -p8001:8001 -p8002:8002 -v /home/sunshine/infer/:/models nvcr.io/nvidia/tritonserver:21.07-py3 tritonserver --model-repository=/models --strict-model-config=false

      服务启动后,返回如下信息:

      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      =============================
      == Triton Inference Server ==
      =============================

      NVIDIA Release 21.07 (build 24810355)

      Copyright (c) 2018-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.

      Various files include modifications (c) NVIDIA CORPORATION. All rights reserved.

      ...

      I0819 02:54:33.978409 1 onnxruntime.cc:2072] TRITONBACKEND_ModelInstanceInitialize: text_class (CPU device 0)
      I0819 02:54:36.500230 1 onnxruntime.cc:2072] TRITONBACKEND_ModelInstanceInitialize: densenet_onnx (CPU device 0)
      I0819 02:54:36.501075 1 model_repository_manager.cc:1212] successfully loaded 'text_class' version 1
      I0819 02:54:37.042157 1 model_repository_manager.cc:1212] successfully loaded 'densenet_onnx' version 1
      I0819 02:54:37.042560 1 server.cc:504]
      +------------------+------+
      | Repository Agent | Path |
      +------------------+------+
      +------------------+------+

      I0819 02:54:37.042936 1 server.cc:543]
      +-------------+-----------------------------------------------------------------+--------+
      | Backend | Path | Config |
      +-------------+-----------------------------------------------------------------+--------+
      | tensorrt | <built-in> | {} |
      | pytorch | /opt/tritonserver/backends/pytorch/libtriton_pytorch.so | {} |
      | tensorflow | /opt/tritonserver/backends/tensorflow1/libtriton_tensorflow1.so | {} |
      | onnxruntime | /opt/tritonserver/backends/onnxruntime/libtriton_onnxruntime.so | {} |
      | openvino | /opt/tritonserver/backends/openvino/libtriton_openvino.so | {} |
      +-------------+-----------------------------------------------------------------+--------+

      I0819 02:54:37.043287 1 server.cc:586]
      +---------------+---------+--------+
      | Model | Version | Status |
      +---------------+---------+--------+
      | densenet_onnx | 1 | READY |
      | text_class | 1 | READY |
      +---------------+---------+--------+

      I0819 02:54:37.043677 1 tritonserver.cc:1718]
      +----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
      | Option | Value |
      +----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
      | server_id | triton |
      | server_version | 2.12.0 |
      | server_extensions | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data |
      | | statistics |
      | model_repository_path[0] | /models |
      | model_control_mode | MODE_NONE |
      | strict_model_config | 0 |
      | pinned_memory_pool_byte_size | 268435456 |
      | min_supported_compute_capability | 6.0 |
      | strict_readiness | 1 |
      | exit_timeout | 30 |
      +----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

      I0819 02:54:37.046408 1 grpc_server.cc:4072] Started GRPCInferenceService at 0.0.0.0:8001
      I0819 02:54:37.046930 1 http_server.cc:2795] Started HTTPService at 0.0.0.0:8000
      I0819 02:54:37.090841 1 sagemaker_server.cc:134] Started Sagemaker HTTPService at 0.0.0.0:8080
      I0819 02:54:37.134294 1 http_server.cc:162] Started Metrics Service at 0.0.0.0:8002

    • 验证

      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      [root@bg1 ~]# curl -v localhost:8000/v2/health/ready

      * About to connect() to localhost port 8000 (#0)
      * Trying ::1...
      * Connected to localhost (::1) port 8000 (#0)
      > GET /v2/health/ready HTTP/1.1
      > User-Agent: curl/7.29.0
      > Host: localhost:8000
      > Accept: */*
      >
      < HTTP/1.1 200 OK
      < Content-Length: 0
      < Content-Type: text/plain
      <
      * Connection #0 to host localhost left intact

  5. yolov4

2. triton client

​ 官网: https://github.com/triton-inference-server/client

  1. 本地安装

    1
    2
    pip install nvidia-pyindex
    pip install tritonclient[http] # [all, http, grpc, utils]

    http请求方式所需要的依赖:

    1
    2
    3
    geventhttpclient>=1.4.4
    numpy>=1.19.1
    python-rapidjson>=0.9.1

    其他请求方式需要依赖:https://github.com/triton-inference-server/client/tree/main/src/python/library/requirements

  2. 容器

    • 拉取容器

      1
      docker pull nvcr.io/nvidia/tritonserver:21.07-py3-sdk
    • 启动容器

      1
      docker run -it --rm --net=host nvcr.io/nvidia/tritonserver::21.07-py3-sdk
    • 启动服务

      1
      2
      3
      4
      5
      6
      /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg
      Request 0, batch size 1
      Image '/workspace/images/mug.jpg':
      15.346230 (504) = COFFEE MUG
      13.224326 (968) = CUP
      10.422965 (505) = COFFEEPOT
  3. python客户端代码调用服务

    http请求文本分类示例如下:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    # author: sunshine
    # datetime:2021/8/19 上午11:01
    import numpy as np

    import tritonclient.http as httpclient
    from attrdict import AttrDict
    from transformers import BertTokenizer
    from tritonclient.utils import triton_to_np_dtype


    def convert_http_metadata_config(_metadata, _config):
    _model_metadata = AttrDict(_metadata)
    _model_config = AttrDict(_config)

    return _model_metadata, _model_config


    def parse_model(model_metadata, model_config):
    """
    input_dtype,
    :param model_metadata:
    :param model_config:

    """
    input_metadata = model_metadata.inputs
    output_metadata = model_metadata.outputs
    max_batch_size = model_config.max_batch_size
    input_params = {i.name: i.datatype for i in input_metadata}
    output_names = [o.name for o in output_metadata]
    return max_batch_size, input_params, output_names


    def preprocess(text, input_param, output_names, max_len=128):
    client = httpclient

    inputs = tokenizer(text, padding='longest', max_length=max_len, truncation='longest_first')

    input_data = []
    names = ['input_ids', 'attention_mask', 'token_type_ids']

    for name in names:
    ndtype = triton_to_np_dtype(input_param[name])
    data = np.array(inputs[name]).astype(ndtype)

    data_t = client.InferInput(name, list(data.shape), input_param[name])
    data_t.set_data_from_numpy(data)

    input_data.append(data_t)

    outputs = [
    client.InferRequestedOutput(out_name) for out_name in output_names
    ]

    return input_data, outputs


    def postprocess(results, output_names):
    """
    response结果处理

    """
    logit_name = output_names[0]

    output = results.as_numpy(logit_name)
    pred = np.argmax(output, axis=-1)
    return pred


    if __name__ == '__main__':
    model_name = 'text_class'
    model_version = '' # 若为空,则选择最新版本
    url = "192.168.0.15:8000"
    bert_path = '/home/sunshine/pre_models/pytorch/bert-base-chinese'
    tokenizer = BertTokenizer.from_pretrained(bert_path)

    triton_client = httpclient.InferenceServerClient(url=url, verbose=False)

    model_metadata = triton_client.get_model_metadata(
    model_name=model_name, model_version=model_version)

    model_config = triton_client.get_model_config(
    model_name=model_name, model_version=model_version)

    model_metadata, model_config = convert_http_metadata_config(
    model_metadata, model_config)

    max_batch_size, input_params, output_names = parse_model(model_metadata, model_config)

    texts = ['今天天气真好', '我讨厌你']
    inputs, outputs = preprocess(texts, input_params, output_names)

    results = triton_client.infer(model_name,
    inputs,
    request_id=str(1),
    model_version=model_version,
    outputs=outputs)
    pred = postprocess(results, output_names)
    print(pred)

    output:

    1
    [1 0]

选用的模型有:

这里,我们采用统一的格式onnx ,使用三种方式来加载推理:

  • onnxruntime
  • openvino
  • opencv

加载模型的代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# author: sunshine
# datetime:2022/8/17 下午2:53
import onnxruntime
import cv2
from openvino.runtime import Core


class ONNXPredict:
def __init__(self, onnx_path):
self.session = onnxruntime.InferenceSession(onnx_path)

def do_inference(self, img_in):
# Compute
input_name = self.session.get_inputs()[0].name
outputs = self.session.run(None, {input_name: img_in})
return outputs


class OpencvPredict:
def __init__(self, onnx_path, w, h):
self.net = cv2.dnn.readNet(onnx_path)
self.w = w
self.h = h

def do_inference(self, img_in):
blob = cv2.dnn.blobFromImage(img_in, 1 / 255.0, (self.w, self.h))
self.net.setInput(blob)
outs = self.net.forward(self.net.getUnconnectedOutLayersNames())
return outs


class OpenvinoPredict:
def __init__(self, engine_path):
ie = Core()
model = ie.read_model(engine_path)
self.compiled_model = ie.compile_model(model=model, device_name='CPU')

def do_inference(self, img_in):
result_infer = self.compiled_model([img_in])
return list(result_infer.values())

具体三种轻量级目标检测模型的推理代码见:https://github.com/fushengwuyu/light_objectdetect

推理时间统计(用本地笔记本测试 CPU: i7-7700HQ,8核):

模型 onnx openvino opencv Resolution
nanodet_plus 0.0842 0.0503 0.2394 416*416
FastestDet 0.0200 0.0182 0.0747 512*512
yolo-fastestv2 0.0125 0.01271 0.0380 352*352

硬件资源占用(边缘服务器:6核2G,带不动openvino,所以没测试)

模型 CPU(onnx) memory(onnx) CPU(opencv) memory(opencv)
nanodet_plus 360% 6.3% 412% 6.7%
FastestDet 310% 4.7% 270% 4.7%
yolo-fastestv2 306% 5.4% 335% 5.7%

边缘服务器推理时间统计:

模型 onnx opencv
nanodet_plus 0.5430 0.6136
FastestDet 0.1653 0.2123
yolo-fastestv2 0.1653 0.1064

最终统计如下:

模型 mAP size run time(onnx) cpu memory model size
nanodet_plus-m-1.5x 34.1 416 0.5430 360% 6.3% 9.5M
FastestDet 25.3 352 0.1653 310% 4.7% 960K
yolo-fastestv2 24.1 352 0.1653 306% 5.4% 958K
nanodet_plus-m 30.4 416 0.3575 341% 6.0% 4.6M

1. 科学论⽂⻓摘要生成任务 第一名

https://blog.csdn.net/weixin_45839693/article/details/114791680

2. “万创杯”中医药天池大数据竞赛——中医文献问题生成挑战 决赛 第一名方案

https://github.com/kangyishuai/CHINESE-MEDICINE-QUESTION-GENERATION

3. 第四名方案

https://tianchi.aliyun.com/forum/postDetail?spm=5176.12586969.1002.12.767a1ebbsqTpuN&postId=155051

4. 季军方案

https://tianchi.aliyun.com/forum/postDetail?spm=5176.12586969.1002.9.767a1ebbxuIt2J&postId=155097

一,URL路径

  • 前缀

    路由地址以api开头,后跟微服务名、版本及模块名或资源地址,实例:

    https://host:port/api/pdi_metadata/v1/dep

    pdi_metadata表示消息微服务。

    v1表示版本,以字母v开头,后跟数字。dep表示微服务内部的一个模块或者资源地址。

  • 资源地址

    在路径设计中需要遵守下列约定:

    • 资源命名全部小写且易读,可使用连字符(-)或下划线(_)进行分隔
    • 资源的命名需要符合RESTful风格,只有算法资源中可以存在动词,否则只能使用名词
    • 路径部分使用斜杠分隔符(/)来表达层次结构
    • 若同一资源名会出现在资源地址、资源表述、query中,资源名必须一致,即统一使用下划线(_)分隔。

二, 响应状态码

所有API响应遵守HTTP规范,常见的HTTP状态码如下所示。

状态码 描述
1xx 信息状态码
2xx 成功状态码
3xx 重定向状态码
4xx 客户端错误码
5xx 服务器错误码
  1. 请求被正确返回

    其返回参数定义如下:

    参数 类型 是否必需 描述
    code int64 正确状态码
    data obj 响应的请求数据
  2. 请求调用失败,必须返回出错的相信信息,参数定义如下:

    参数 类型 是否必须 描述
    code int64 业务错误码
    message string 业务错误信息,与code一一对应,用于表明code的含义,应尽量简短,抽象,具有概括性和通用性。
    cause string 导致此错误的原因,也可用于给接口调用者提示解决此错误的办法,相同的code可对应不同的cause
    detail obj 错误详细信息,供调用方查看和展示给最终用户的信息

    备注:

    • 前三位为 HTTP标准状态码,中间三位为系统内全局唯一的微服务错误码标识号,后三位为自定义状态码,应尽量抽象,具有概括性和通用性。
    • 对于参数不合法、json格式不对等由客户端调用API代码不对导致的错误,code后3位为000,原因在cause中说明。
    • 对于需要由客户端判断,用于处理UI和具体功能逻辑的错误,code后3位表明具体错误,细节在detail中说明。

三,错误码方案

PDI产品后期需要嵌入到爱数其他的产品中去,所以可以采取动态配置的方案,实现针对不同的产品,使用不同的错误码。

常见的错误码:

  • 400XXX000: 参数不合法、json格式不对等由客户端调用API代码不对导致的错误
  • 500XXX000:服务内部错误
  • 501XXX000:方法为实现

四,Flask 异常方案

Flask内部通过继承HTTPException类来处理异常,同样,我们可以自定义自己的异常基类类(继承自HTTPException),定义好返回的错误码,请求的url,错误原因等。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
class APIException(HTTPException):
code = 500
message = 'sorry, we made a mistake'
error_code = 999
cause = 'just a mistake'
detail = None
dump_json = False # 是否dump成json串。flask_restful.Resource方式需要设置为False,原生方式需要设置为True

def __init__(self, msg=None, code=None, error_code=None, cause=None, header=None, detail=None):
if code:
self.code = code
if error_code:
self.error_code = error_code
if msg:
self.msg = msg
if cause:
self.cause = cause
if detail:
self.detail = detail
super(APIException, self).__init__(msg, None)

def get_body(self, environ=None):
body = dict(
message=self.message,
code=self.error_code,
cause=self.cause
)
if self.detail:
body.update({"detail": self.detail})

data = json.dumps(body) if self.dump_json else body

return data

def get_headers(self, environ=None):
"""Get a list of headers."""
return [('Content-Type', 'application/json')]

定义好基类之后,派生出各种各样的异常类,自由定义各种状态码的错误及对应的错误信息,出现该异常后,抛出异常。比如:

1
2
3
4
class NotFound(APIException):
code = 404
message = 'the resource are not found'
error_code = 1001

目前定义的派生类有:

  • Success
  • DeleteSucess
  • UpdateSucess
  • ServerError
  • ParameterException
  • NotFound
  • AuthFailed
  • Forbidden

在程序中使用。

1
2
3
4
5
6
7
@input_bp.route('/input', methods=['POST'])
def input_m():
d = request.get_json()

if "file_id" not in d or "op_type" not in d:
return ParameterException(message='file_id 和 op_type两个参数不能为空').get_body()
pass

虽然我们可以在可能出错的地方,继承自己的异常类,然后抛出,但是并不是所有的异常我们都能提前预知。比如参数错误等异常,我们可以提前预知并处理好,但是如果出现逻辑问题等提前没法感知的异常,就不是我们能够控制并处理的。所以我们还需要全局捕获所有异常。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# 全局错误AOP处理
@app.errorhandler(Exception)
def framework_error(e):
api_logger.error("error info: %s" % e) # 对错误进行日志记录
if isinstance(e, APIException):
return e
if isinstance(e, HTTPException):
code = e.code
msg = e.description
error_code = 1007
return APIException(msg, code, error_code)
else:
if not app.config['DEBUG']:
return ServerError().get_body()
else:
return e

如此,Flask中出现的所有异常皆可处理了,保证程序的健壮性。co

五, Sanic异常方案

可以理解sanic为flask的升级版,采用协程机制,并发与效率都比flask高,用法基本上与flask一致。

与flask不同的是:

  • 基类继承自HTTPResponse,而不是HTTPException
  • 全局错误AOP处理机制不同,sanic通过添加异常处理函数的方式实现。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
class MyResponse(HTTPResponse):
code = 500
message = 'sorry, we made a mistake'
error_code = 999
cause = None
detail = None

def __init__(self, message=None, code=None, error_code=None, cause=None, detail=None):
if code:
self.code = code
if error_code:
self.error_code = error_code
if message:
self.message = message
if cause is not None:
self.cause = cause
if detail is not None:
self.detail = detail
super(MyResponse, self).__init__(body=self.get_body(), status=self.code)

def get_body(self):
body = dict(
message=self.message,
code=self.error_code
)

if self.cause is not None:
body.update(dict(cause=self.cause))
if self.detail is not None:
body.update({"detail": self.detail})

return json_dumps(body)

至于各种派生的错误类,与flask一致。

1
2
3
4
5
class Success(MyResponse):
code = 200
message = 'OK'
local_code = "000"
error_code = 99999

全局异常处理。

1
2
3
4
5
6
async def server_error_handler(request, exception):
logger.error(msg=traceback.format_exc()) # 记录错误日志
return ServerError(message=repr(exception), cause=traceback.format_exc())


app.error_handler.add(Exception, server_error_handler)

发生系统异常后,这里统一使用ServerError返回。ServerError是自己定义的继承自MyResponse的异常类。

1
2
3
4
5
class ServerError(MyResponse):
code = 500
message = 'something happen'
local_code = "000"
error_code = 99999

1. sanicdb简介

SanicDB是为python的异步web框架sanic方便操作mysql(mariadb)而开发的工具,是对aiomysql.Pool的轻量级封装。Sanic是异步IO的web框架,用异步IO的方式读取mysql也才能更加发挥它的效率。

备注:sanicdb也可以用于其他任何需要异步IO操作mysql的地方,不仅仅限于sanic中

github:https://github.com/veelion/sanicdb

安装

pip install sanicdb

2. 初始化

1
2
3
4
5
6
7
8
9
10
11
12
13
class SanicDB:
"""A lightweight wrapper around aiomysql.Pool for easy to use
"""
def __init__(self, host, database, user, password,
loop=None, sanic=None,
minsize=3, maxsize=5,
return_dict=True,
pool_recycle=7*3600,
autocommit=True,
charset = "utf8mb4", **kwargs):
'''
kwargs: all parameters that aiomysql.connect() accept.
'''
  • 前4个参数是对应mysql数据库的。

  • loop是应用程序中的事件循环(event_loop),默认为None,则程序会自动调用asyncio.get_event_loop()创建。此外,sanic和loop两者只需要提供一个即可,当同时存在时,了解数据库其实用的是sanic中的loop。可以在初始化中看到

1
2
3
4
async def init_pool(self):
if self.sanic:
self.db_args['loop'] = self.sanic.loop
self.pool = await aiomysql.create_pool(**self.db_args)
  • minsize: 最小连接数量
  • maxsize:最大连接数量
  • return_dict : 是返回的数据一条记录为一个dict,key是MySQL表的字段名,value是字段的值
  • pool_recycle :连接池重连的时间间隔,MySQL默认的连接闲置时间是8小时(我们服务器目前已改为24小时)
  • autocommit:是否自动提交

3. 函数定义

sanicdb提供了6个功能函数。

  • query

    功能如名,查询数据库。

    1
    2
    async def query(self, query, *parameters, **kwparameters):
    """Returns a row list for the given query and parameters."""
  • get

    查询数据库,只返回一条数据。

    1
    2
    3
    async def get(self, query, *parameters, **kwparameters):
    """Returns the (singular) row returned by the given query.
    """
  • execute

    执行操作,返回受影响的行ID

    1
    2
    async def execute(self, query, *parameters, **kwparameters):
    """Executes the given query, returning the lastrowid from the query."""
  • table_has

    检查一个table中是否含有某字段为某值的记录

    1
    async def table_has(self, table_name, field, value):
  • table_insert

    插入数据库。

    1
    2
    async def table_insert(self, table_name, item, ignore_duplicated=True):
    '''item is a dict : key is mysql table field'''
  • table_update

    更新数据库

    1
    2
    3
    async def table_update(self, table_name, updates,
    field_where, value_where):
    '''updates is a dict of {field_update:value_update}'''

4. 示例

  1. 普通的异步mysql例子,参照官网给出的test

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65

    import asyncio
    from sanicdb import SanicDB


    async def test(loop):
    db = SanicDB('localhost', 'testdb', 'root', 'the_password',
    minsize=3, maxsize=5,
    connect_timeout=5,
    loop=loop)
    sql = 'Drop table test'
    await db.execute(sql)

    sql = """CREATE TABLE `test` (
    `id` int(8) NOT NULL AUTO_INCREMENT,
    `name` varchar(16) NOT NULL,
    `content` varchar(255) NOT NULL,
    PRIMARY KEY (`id`),
    UNIQUE KEY `name` (`name`)
    ) ENGINE=MyISAM ;"""
    await db.execute(sql)

    sql = 'select * from test where name = %s'
    data = await db.query(sql, 'abc')
    print('query():', data)

    sql += ' limit 1'
    d = await db.get(sql, 'abc')
    print('get():', d)

    sql = 'delete from test where name=%s'
    lastrowid = await db.execute(sql, 'xyz')
    print('execute(delete...):', lastrowid)
    sql = 'insert into test set name=%s, content=%s'
    lastrowid = await db.execute(sql, 'xyz', '456')
    print('execute(insert...):', lastrowid)

    ret = await db.table_has('test', 'name', 'abc')
    print('has(): ', ret)

    ret = await db.table_update('test', {'content': 'updated'},
    'name', 'abc')
    print('update():', ret)
    sql = 'select * from test where name = %s'
    data = await db.query(sql, 'abc')
    print('query():', data)

    item = {
    'name': 'abc',
    'content': '123'
    }
    i = 0
    while 1:
    i += 1
    if i % 2 == 0:
    lastid = await db.table_insert('test', item, ignore_duplicated=False)
    else:
    lastid = await db.table_insert('test', item)
    print('insert():', lastid)
    await asyncio.sleep(1)


    if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    loop.run_until_complete(test(loop))
  2. sanic web中异步插叙mysql

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    # author: sunshine
    # datetime: 2020/10/29 10:12
    from sanic import Sanic
    from sanic_cors import CORS
    from sanic.response import json
    from sanicdb import SanicDB

    app = Sanic(__name__)
    # 支持跨域
    CORS(app)

    db = SanicDB(
    host='172.27.0.6',
    database='privacy',
    user='root',
    password='PdiCond$402875432',
    sanic=app,
    maxsize=18
    )

    @app.route('/simple', methods=['POST'])
    async def simple_query(request):
    """
    对数据库的简单查询测试
    :param request:
    :return:
    """
    sql_str = 'select * from employee'
    num = 0
    data = await app.db.query(sql_str)
    num += len(data)
    return json({"查询条数": num})

    if __name__ == '__main__':
    app.run('0.0.0.0', port=5067)

  1. 查看容器启动脚本

    docker inspect $image_name

    image-20210622110807715

1. ensorflow.python.framework.errors_impl.FailedPreconditionError: Error while reading resource variable softmax/kernel from Container: localhost. This could mean that the variable was uninitialized. Not found: Resource localhost/softmax/kernel/N10tensorflow3VarE does not exist.

​ 最近在使用flask部署keras服务的时候。遇到改错误,看错误,感觉是变量没有初始化,百度各种资料,成功找到解决办法。

​  creating a reference to the session that is used for loading the models and then to set it to be used by keras in each request

​ 在加载模型之前,创建session。并在模型调用predict的时候,设置同样的session。保证整个项目使用模型使用同一个session。成功解决问题。

1
2
3
4
5
6
7
8
9
10
11
from tensorflow.python.keras.backend import set_session
from tensorflow.python.keras.models import load_model

tf_config = some_custom_config
sess = tf.Session(config=tf_config)
graph = tf.get_default_graph()

# IMPORTANT: models have to be loaded AFTER SETTING THE SESSION for keras!
# Otherwise, their weights will be unavailable in the threads after the session there has been set
set_session(sess)
model = load_model(...)

在调用的时候,如下设置。

1
2
3
4
5
global sess
global graph
with graph.as_default():
set_session(sess)
model.predict(...)

参考:

https://github.com/tensorflow/tensorflow/issues/28287

https://www.datalearner.com/blog/1051572578207468

2. XLA_GPU

参考:https://stackoverflow.com/questions/53696654/why-keras-does-not-see-my-gpu-while-tensorflow-does?r=SearchResults

解决办法:

sudo chmod -R a+r /usr/local/cuda*