浮生物语的博客

yolov4的几种部署方式

发表于 2022-04-02 更新于 2024-01-11 分类于运维

YoloV4模型部署

针对yolov4的不同应用场景，这里提供几种部署方式。

opencv
onnx
TensorRT
triton server

1. opencv

针对原始的darknet模型，可以直接使用opencv加载，opencv针对yolo做了一定的硬件加速。加载代码如下：

import numpy as np
import cv2
import time
import os
import requests
from datetime import datetime


class ObjectRecognition(object):
    def __init__(self,
                 label_path,
                 config_path,
                 weights_path,
                 upload_url,
                 model_name='fire',
                 confidence_thre=0.5,
                 nms_thre=0.3,
                 alert_label=None):
        self.alert_label = alert_label if alert_label else []
        self.confidence_thre = confidence_thre
        self.nms_thre = nms_thre
        self.upload_url = upload_url.rstrip('/') + "/"
        self.model_name = model_name
        self.net = self.load_model(config_path, weights_path)

        # 加载类别标签文件
        self.LABELS = open(label_path).read().strip().split("\n")
        self.nclass = len(self.LABELS)
        # 获取YOLO输出层的名字
        ln = self.net.getLayerNames()
        self.ln = [ln[i[0] - 1] for i in self.net.getUnconnectedOutLayers()]

    def upload_image(self, img):
        file_name = "recognized_{}.jpg".format(int(time.time()))
        img_dir = "/".join([self.model_name, datetime.now().strftime("%Y%m")])
        data = cv2.imencode(".jpg", img)[1].tobytes()
        files = [
            ('file', (file_name, data, 'image/png'))
        ]

        result = requests.post(self.upload_url + img_dir, files=files)
        if result.status_code == 200:
            path = img_dir + "/" + file_name
        else:
            path = ""
        return path

    def load_model(self, config_path, weights_path):
        # 加载模型配置和权重文件
        net = cv2.dnn.readNetFromDarknet(config_path, weights_path)
        return net

    def detect(self, img):
        s = time.time()
        # 为每个类别的边界框随机匹配相应颜色
        np.random.seed(42)
        COLORS = np.random.randint(0, 255, size=(self.nclass, 3), dtype='uint8')
        # 载入图片并获取其维度
        (H, W) = img.shape[:2]

        # 将图片构建成一个blob，设置图片尺寸，然后执行一次
        # YOLO前馈网络计算，最终获取边界框和相应概率 216
        blob = cv2.dnn.blobFromImage(img, 1 / 255.0, (416, 416), swapRB=True, crop=False)

        self.net.setInput(blob)

        layerOutputs = self.net.forward(self.ln)

        end = time.time()
        # 初始化边界框，置信度（概率）以及类别
        boxes = []
        confidences = []
        classIDs = []
        # 迭代每个输出层，总共三个
        for output in layerOutputs:
            # 迭代每个检测
            for detection in output:
                # 提取类别ID和置信度
                scores = detection[5:]
                classID = np.argmax(scores)
                confidence = scores[classID]
                # 只保留置信度大于某值的边界框
                if confidence > self.confidence_thre:
                    # 将边界框的坐标还原至与原图片相匹配，记住YOLO返回的是
                    # 边界框的中心坐标以及边界框的宽度和高度
                    box = detection[0:4] * np.array([W, H, W, H])
                    (centerX, centerY, width, height) = box.astype("int")
                    # 计算边界框的左上角位置
                    x = int(centerX - (width / 2))
                    y = int(centerY - (height / 2))
                    # 更新边界框，置信度（概率）以及类别
                    boxes.append([x, y, int(width), int(height)])
                    confidences.append(float(confidence))
                    classIDs.append(classID)

        # 使用非极大值抑制方法抑制弱、重叠边界框
        idxs = cv2.dnn.NMSBoxes(boxes, confidences, self.confidence_thre, self.nms_thre)
        loc = []
        flag = False
        # 确保至少一个边界框
        if len(idxs) > 0:

            # 迭代每个边界框
            for i in idxs.flatten():

                if classIDs[i] in self.alert_label:
                    flag = True

                    # 提取边界框的坐标
                    (x, y) = (boxes[i][0], boxes[i][1])
                    (w, h) = (boxes[i][2], boxes[i][3])
                    # 绘制边界框以及在左上角添加类别标签和置信度
                    color = [int(c) for c in COLORS[classIDs[i]]]
                    cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
                    text = '{}: {:.3f}'.format(self.LABELS[classIDs[i]], confidences[i])
                    (text_w, text_h), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
                    cv2.rectangle(img, (x, y - text_h - baseline), (x + text_w, y), color, -1)
                    cv2.putText(img, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
                    loc.append([x, y, w, h])
            m = time.time()

            if flag:
                # 上传至云端文件服务器
                img_path = self.upload_image(img)
            else:
                img_path = ""
            e = time.time()
            print("检测时间", m-s, '上传时间', e-m, "整体时间", e-s)
            return img_path, flag
        else:
            return "", flag

2. onnx

针对有GPU的环境，可以统一将darknet转化为onnx格式，可使用转化工具：

对于转化之后的onnx文件，可尝试用如下方式加载预测：

import numpy as np
import cv2
import time
import onnxruntime
import requests
from datetime import datetime
import math
import urllib


class ObjectRecognition(object):
    def __init__(self,
                 label_path,
                 onnx_path,
                 upload_url='http://192.168.0.15:8080/ai/upload/',
                 model_name='fire',
                 confidence_thre=0.5,
                 nms_thre=0.3,
                 alert_label=None):
        self.alert_label = alert_label if alert_label else []

        # 加载类别标签文件
        self.class_names = open(label_path).read().strip().split("\n")

        self.confidence_thre = confidence_thre
        self.nms_thre = nms_thre
        self.upload_url = upload_url.rstrip('/') + "/"
        self.model_name = model_name
        self.session = onnxruntime.InferenceSession(onnx_path)

    def upload_image(self, img):
        file_name = "recognized_{}.jpg".format(int(time.time()))
        img_dir = "/".join([self.model_name, datetime.now().strftime("%Y%m")])
        data = cv2.imencode(".jpg", img)[1].tobytes()
        files = [
            ('file', (file_name, data, 'image/png'))
        ]

        result = requests.post(self.upload_url + img_dir, files=files)
        if result.status_code == 200:
            path = img_dir + "/" + file_name
        else:
            path = ""
        return path

    def detect(self, img):
        IN_IMAGE_H = self.session.get_inputs()[0].shape[2]
        IN_IMAGE_W = self.session.get_inputs()[0].shape[3]

        # Input
        resized = cv2.resize(img, (IN_IMAGE_W, IN_IMAGE_H), interpolation=cv2.INTER_LINEAR)
        img_in = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
        img_in = np.transpose(img_in, (2, 0, 1)).astype(np.float32)
        img_in = np.expand_dims(img_in, axis=0)
        img_in /= 255.0
        print("Shape of the network input: ", img_in.shape)

        # Compute
        input_name = self.session.get_inputs()[0].name

        outputs = self.session.run(None, {input_name: img_in})

        boxes = self.post_processing(0.4, 0.6, outputs)

        img_path, flag = self.plot_boxes(img, boxes[0])
        return img_path, flag

    def post_processing(self, conf_thresh, nms_thresh, output):
        # [batch, num, 1, 4]
        box_array = output[0]
        # [batch, num, num_classes]
        confs = output[1]

        t1 = time.time()

        if type(box_array).__name__ != 'ndarray':
            box_array = box_array.cpu().detach().numpy()
            confs = confs.cpu().detach().numpy()

        num_classes = confs.shape[2]

        # [batch, num, 4]
        box_array = box_array[:, :, 0]

        # [batch, num, num_classes] --> [batch, num]
        max_conf = np.max(confs, axis=2)
        max_id = np.argmax(confs, axis=2)

        t2 = time.time()

        bboxes_batch = []
        for i in range(box_array.shape[0]):

            argwhere = max_conf[i] > conf_thresh
            l_box_array = box_array[i, argwhere, :]
            l_max_conf = max_conf[i, argwhere]
            l_max_id = max_id[i, argwhere]

            bboxes = []
            # nms for each class
            for j in range(num_classes):

                cls_argwhere = l_max_id == j
                ll_box_array = l_box_array[cls_argwhere, :]
                ll_max_conf = l_max_conf[cls_argwhere]
                ll_max_id = l_max_id[cls_argwhere]

                keep = self.nms_cpu(ll_box_array, ll_max_conf, nms_thresh)

                if (keep.size > 0):
                    ll_box_array = ll_box_array[keep, :]
                    ll_max_conf = ll_max_conf[keep]
                    ll_max_id = ll_max_id[keep]

                    for k in range(ll_box_array.shape[0]):
                        bboxes.append(
                            [ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], ll_box_array[k, 3],
                             ll_max_conf[k],
                             ll_max_conf[k], ll_max_id[k]])

            bboxes_batch.append(bboxes)

        t3 = time.time()
        return bboxes_batch

    def nms_cpu(self, boxes, confs, nms_thresh=0.5, min_mode=False):
        # print(boxes.shape)
        x1 = boxes[:, 0]
        y1 = boxes[:, 1]
        x2 = boxes[:, 2]
        y2 = boxes[:, 3]

        areas = (x2 - x1) * (y2 - y1)
        order = confs.argsort()[::-1]

        keep = []
        while order.size > 0:
            idx_self = order[0]
            idx_other = order[1:]

            keep.append(idx_self)

            xx1 = np.maximum(x1[idx_self], x1[idx_other])
            yy1 = np.maximum(y1[idx_self], y1[idx_other])
            xx2 = np.minimum(x2[idx_self], x2[idx_other])
            yy2 = np.minimum(y2[idx_self], y2[idx_other])

            w = np.maximum(0.0, xx2 - xx1)
            h = np.maximum(0.0, yy2 - yy1)
            inter = w * h

            if min_mode:
                over = inter / np.minimum(areas[order[0]], areas[order[1:]])
            else:
                over = inter / (areas[order[0]] + areas[order[1:]] - inter)

            inds = np.where(over <= nms_thresh)[0]
            order = order[inds + 1]

        return np.array(keep)

    def plot_boxes(self, img, boxes):
        COLORS = np.random.randint(0, 255, size=(len(self.class_names), 3), dtype='uint8')

        width = img.shape[1]
        height = img.shape[0]
        flag = False
        for i in range(len(boxes)):

            box = boxes[i]
            x1 = int(box[0] * width)
            y1 = int(box[1] * height)
            x2 = int(box[2] * width)
            y2 = int(box[3] * height)

            if len(box) >= 7 and self.class_names:
                cls_conf = box[5]
                cls_id = box[6]

                if cls_id in self.alert_label:
                    flag = True

                color = [int(c) for c in COLORS[cls_id]]
                text = '{}: {:.3f}'.format(self.class_names[cls_id], cls_conf)
                img = cv2.putText(img, text, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
            img = cv2.rectangle(img, (x1, y1), (x2, y2), color, 1)

        # cv2.imwrite('predictions_onnx.jpg', img)
        if flag:
            # 上传至云端文件服务器
            img_path = self.upload_image(img)
        else:
            img_path = ""
        return img_path, flag

3. triton server

具体的triton部署方式参考：NVIDIA triton 服务部署与测试

4. tensorRt

可以利用TensorRT自带的转换工具，轻松将onnx模型转为TensorRT模型。

1	/usr/src/tensorrt/bin/trtexec --onnx=/sdk/sunshine/trained_model/yolov4/onnx/helmet/yolov4_helmet_1_3_608_608_static.onnx --explicitBatch --saveEngine=yolov4_helmet_1_3_608_608_static.trt

如果想要转化为半精度，在后面添加参数--fp16即可

加载预测TensorRT模型，可使用如下方式：

import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import requests
import time
from datetime import datetime
import cv2
import urllib
import numpy as np

TRT_LOGGER = trt.Logger()


class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()


class TRTModelPredict:
    def __init__(self, engine_path, shape=(608, 608)):
        shape = (1, 3, shape[0], shape[1])
        self.engine = self.get_engine(engine_path)
        self.context = self.engine.create_execution_context()

        self.buffers = self.allocate_buffers(self.engine, 1)
        self.context.set_binding_shape(0, shape)

    def allocate_buffers(self, engine, batch_size):
        inputs = []
        outputs = []
        bindings = []
        stream = cuda.Stream()
        for binding in engine:

            size = trt.volume(engine.get_binding_shape(binding)) * batch_size
            dims = engine.get_binding_shape(binding)

            # in case batch dimension is -1 (dynamic)
            if dims[0] < 0:
                size *= -1

            dtype = trt.nptype(engine.get_binding_dtype(binding))
            # Allocate host and device buffers
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            # Append the device buffer to device bindings.
            bindings.append(int(device_mem))
            # Append to the appropriate list.
            if engine.binding_is_input(binding):
                inputs.append(HostDeviceMem(host_mem, device_mem))
            else:
                outputs.append(HostDeviceMem(host_mem, device_mem))
        return inputs, outputs, bindings, stream

    def get_engine(self, engine_path):
        # If a serialized engine exists, use it instead of building an engine.
        print("Reading engine from file {}".format(engine_path))
        with open(engine_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())

    def do_inference(self, img_in):

        inputs, outputs, bindings, stream = self.buffers
        inputs[0].host = img_in
        for i in range(2):
            # Transfer input data to the GPU.
            [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
            # Run inference.
            self.context.execute_async(bindings=bindings, stream_handle=stream.handle)
            # Transfer predictions back from the GPU.
            [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
            # Synchronize the stream
            stream.synchronize()
        # Return only the host outputs.
        return [out.host for out in outputs]


class Hat(TRTModelPredict):
    def __init__(self, label_path, engine_path, shape=(608, 608), upload_url='http://192.168.0.15:8080/ai/upload/',
                 model_name='fire', alert_label=None):
        self.shape = shape
        self.alert_label = alert_label if alert_label else []
        self.class_names = open(label_path).read().strip().split("\n")
        self.model_name = model_name
        self.upload_url = upload_url.rstrip('/') + "/"
        super(Hat, self).__init__(engine_path, shape)

    def pre_process(self, image_src):
        resized = cv2.resize(image_src, self.shape, interpolation=cv2.INTER_LINEAR)
        img_in = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
        img_in = np.transpose(img_in, (2, 0, 1)).astype(np.float32)
        img_in = np.expand_dims(img_in, axis=0)
        img_in /= 255.0
        img_in = np.ascontiguousarray(img_in)
        return img_in

    def post_processing(self, conf_thresh, nms_thresh, output):
        box_array = output[0]
        # [batch, num, num_classes]
        confs = output[1]

        if type(box_array).__name__ != 'ndarray':
            box_array = box_array.cpu().detach().numpy()
            confs = confs.cpu().detach().numpy()

        num_classes = confs.shape[2]

        # [batch, num, 4]
        box_array = box_array[:, :, 0]

        # [batch, num, num_classes] --> [batch, num]
        max_conf = np.max(confs, axis=2)
        max_id = np.argmax(confs, axis=2)

        bboxes_batch = []
        for i in range(box_array.shape[0]):

            argwhere = max_conf[i] > conf_thresh
            l_box_array = box_array[i, argwhere, :]
            l_max_conf = max_conf[i, argwhere]
            l_max_id = max_id[i, argwhere]

            bboxes = []
            # nms for each class
            for j in range(num_classes):

                cls_argwhere = l_max_id == j
                ll_box_array = l_box_array[cls_argwhere, :]
                ll_max_conf = l_max_conf[cls_argwhere]
                ll_max_id = l_max_id[cls_argwhere]

                keep = self.nms_cpu(ll_box_array, ll_max_conf, nms_thresh)

                if (keep.size > 0):
                    ll_box_array = ll_box_array[keep, :]
                    ll_max_conf = ll_max_conf[keep]
                    ll_max_id = ll_max_id[keep]

                    for k in range(ll_box_array.shape[0]):
                        bboxes.append(
                            [ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], ll_box_array[k, 3],
                             ll_max_conf[k],
                             ll_max_conf[k], ll_max_id[k]])

            bboxes_batch.append(bboxes)

        return bboxes_batch

    def nms_cpu(self, boxes, confs, nms_thresh=0.5, min_mode=False):
        # print(boxes.shape)
        x1 = boxes[:, 0]
        y1 = boxes[:, 1]
        x2 = boxes[:, 2]
        y2 = boxes[:, 3]

        areas = (x2 - x1) * (y2 - y1)
        order = confs.argsort()[::-1]

        keep = []
        while order.size > 0:
            idx_self = order[0]
            idx_other = order[1:]

            keep.append(idx_self)

            xx1 = np.maximum(x1[idx_self], x1[idx_other])
            yy1 = np.maximum(y1[idx_self], y1[idx_other])
            xx2 = np.minimum(x2[idx_self], x2[idx_other])
            yy2 = np.minimum(y2[idx_self], y2[idx_other])

            w = np.maximum(0.0, xx2 - xx1)
            h = np.maximum(0.0, yy2 - yy1)
            inter = w * h

            if min_mode:
                over = inter / np.minimum(areas[order[0]], areas[order[1:]])
            else:
                over = inter / (areas[order[0]] + areas[order[1:]] - inter)

            inds = np.where(over <= nms_thresh)[0]
            order = order[inds + 1]

        return np.array(keep)

    def detect(self, img):
        img_in = self.pre_process(img)
        trt_outputs = self.do_inference(img_in)
        trt_outputs[0] = trt_outputs[0].reshape(1, -1, 1, 4)
        trt_outputs[1] = trt_outputs[1].reshape(1, -1, len(self.class_names))

        boxes = self.post_processing(0.4, 0.6, trt_outputs)
        return self.plot_boxes(img, boxes[0])


    def plot_boxes(self, img, boxes):
        COLORS = np.random.randint(0, 255, size=(len(self.class_names), 3), dtype='uint8')

        width = img.shape[1]
        height = img.shape[0]
        flag = False
        for i in range(len(boxes)):

            box = boxes[i]
            x1 = int(box[0] * width)
            y1 = int(box[1] * height)
            x2 = int(box[2] * width)
            y2 = int(box[3] * height)

            if len(box) >= 7 and self.class_names:
                cls_conf = box[5]
                cls_id = box[6]

                if cls_id in self.alert_label:
                    flag = True

                color = [int(c) for c in COLORS[cls_id]]
                text = '{}: {:.3f}'.format(self.class_names[cls_id], cls_conf)
                img = cv2.putText(img, text, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
            img = cv2.rectangle(img, (x1, y1), (x2, y2), color, 1)

        # cv2.imwrite('predictions_onnx.jpg', img)
        if flag:
            # 上传至云端文件服务器
            img_path = self.upload_image(img)
        else:
            img_path = ""
        return img_path, flag

    def upload_image(self, img):
        file_name = "recognized_{}.jpg".format(int(time.time()))
        img_dir = "/".join([self.model_name, datetime.now().strftime("%Y%m")])
        data = cv2.imencode(".jpg", img)[1].tobytes()
        files = [
            ('file', (file_name, data, 'image/png'))
        ]

        result = requests.post(self.upload_url + img_dir, files=files)
        if result.status_code == 200:
            path = img_dir + "/" + file_name
        else:
            path = ""
        return path

性能对比

使用安全帽模型，测试对比三种预测方式的情况，性能如下。

模型格式	预测时间	预测置信度
opencv	0.3789951801300049	0.533
onnx	0.44134068489074707	0.425
TensorRT	0.12151765823364258	0.423

opencv的速度居然比onnx还快，离谱。仔细检查后发现，jetson上面装的是onnxruntime而不是onnxruntime-gpu，初步怀疑是并没有用上GPU加速。

pytorch2tensorrt全流程

发表于 2021-12-09 更新于 2024-01-11 分类于模型部署

服务器环境：

NVIDIA Jetson AGX Xavier - Jetpack 4.5.1 [L4T 32.5.1]

torch ==1.9.0
onnx == 1.10.2
pycuda==2020.1
cv2==4.1.1
tensorrt=7.1.3.0
python 3.6.9

1. pytorch2onnx

import torch
from torchvision import models


def transform_to_onnx(model, shape, onnx_file_name, input_names=["input"], output_names=['boxes', 'confs']):
    batch_size = shape[0]
    dynamic = False
    if batch_size <= 0:
        dynamic = True
    if dynamic:
        x = torch.randn(shape, requires_grad=True)
        dynamic_axes = {name: {0: "batch_size"} for name in input_names + output_names}
        # Export the model
        print('Export the onnx model ...')
        torch.onnx.export(model,
                          x,
                          onnx_file_name,
                          export_params=True,
                          input_names=input_names, output_names=output_names,
                          dynamic_axes=dynamic_axes
                          )
        print('Onnx model exporting done')
        return onnx_file_name

    else:
        x = torch.randn(shape, requires_grad=True)
        # Export the model
        print('Export the onnx model ...')
        torch.onnx.export(model,
                          x,
                          onnx_file_name,
                          export_params=True,
                          do_constant_folding=True,
                          input_names=input_names, output_names=output_names,
                          dynamic_axes=None)

        print('Onnx model exporting done')

利用resnet模型测试一下

model = models.resnet50(pretrained=True)
transform_to_onnx(model,
                  shape=(1, 3, 224, 224),
                  onnx_file_name="resnet50.onnx",
                  input_names=["input"],
                  output_names=["output"]
                 )

在当前目前成功生成resnet50.onnx，转化正确。

2. 可视化onnx

可以利用netron工具可视化onnx模型。

1	python -m pip install netron

netron -h
usage: netron [-h] [-v] [-b] [-p PORT] [--host HOST] [--log] [MODEL_FILE]

Viewer for neural network, deep learning and machine learning models.

positional arguments:
  MODEL_FILE            model file to serve

optional arguments:
  -h, --help            show this help message and exit
  -v, --version         print version
  -b, --browse          launch web browser
  -p PORT, --port PORT  port to serve
  --host HOST           host to serve
  --log                 log details to console

启动netron

1	nerton --host 0.0.0.0 --port 8080

选择onnx，可以看到具体的模型参数。

2. onnx2trt

利用tensorrt自带的工具，可以轻松完成onnx到trt的转换。

1	/usr/src/bin/trtexec --onnx=resnet50.onnx --explicitBatch --saveEngine=resnet50.trt --fp16

也可以将该命令添加到环境变量中。

1	ln -s /usr/src/bin/trtexec /usr/bin/trtexec

3. 测试

构建一个通用的TensorRT模型加载工具，如下：

# author: sunshine
# datetime:2021/12/9 下午2:39

import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit  # 重要


TRT_LOGGER = trt.Logger()


class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()


class TRTModelPredict:
    def __init__(self, engine_path, shape=(608, 608)):
        shape = (1, 3, shape[0], shape[1])
        self.engine = self.get_engine(engine_path)
        self.context = self.engine.create_execution_context()

        self.buffers = self.allocate_buffers(self.engine, 1)
        self.context.set_binding_shape(0, shape)

    def allocate_buffers(self, engine, batch_size):
        inputs = []
        outputs = []
        bindings = []
        stream = cuda.Stream()
        for binding in engine:

            size = trt.volume(engine.get_binding_shape(binding)) * batch_size
            dims = engine.get_binding_shape(binding)

            # in case batch dimension is -1 (dynamic)
            if dims[0] < 0:
                size *= -1

            dtype = trt.nptype(engine.get_binding_dtype(binding))
            # Allocate host and device buffers
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            # Append the device buffer to device bindings.
            bindings.append(int(device_mem))
            # Append to the appropriate list.
            if engine.binding_is_input(binding):
                inputs.append(HostDeviceMem(host_mem, device_mem))
            else:
                outputs.append(HostDeviceMem(host_mem, device_mem))
        return inputs, outputs, bindings, stream

    def get_engine(self, engine_path):
        # If a serialized engine exists, use it instead of building an engine.
        print("Reading engine from file {}".format(engine_path))
        with open(engine_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())

    def do_inference(self, img_in):

        inputs, outputs, bindings, stream = self.buffers
        inputs[0].host = img_in
        for i in range(2):
            # Transfer input data to the GPU.
            [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
            # Run inference.
            self.context.execute_async(bindings=bindings, stream_handle=stream.handle)
            # Transfer predictions back from the GPU.
            [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
            # Synchronize the stream
            stream.synchronize()
        # Return only the host outputs.
        return [out.host for out in outputs]

以安全帽识别为例，继承 TRTModelPredict类，并实现数据前后处理，即可完成模型预测。

结果如下：

详细代码：https://github.com/fushengwuyu/torch2tensorrt_demos

NeMo学习笔记

发表于 2021-08-20 更新于 2024-01-11 分类于 NLP

NeMo 学习笔记

1. 注意要点

All arguments must be passed by kwargs only for typed methods

使用nemo，所有的函数调用，必须指定参数名，如下：
1
2
f1(name='zhangsan'， age=12) # 正确
f1('zhangsan'， 12) # 报错，入错小标题
nlp模块不可用

不解释，python3.6.9升级到python3.8就可以了

导出onnx模型

1	model.export('xx.onnx', onnx_opset_version=12) # 默认13，不支持

NVIDIA triton 服务部署与测试

发表于 2021-08-19 更新于 2024-01-11 分类于 NLP

1. triton server

安装triton服务

x86

拉去镜像

1	docker pull nvcr.io/nvidia/tritonserver:21.07-py3

运行

1	docker run -it --rm -p8000:8000 -p8001:8001 -p8002:8002 -v /home/sunshine/infer/:/models nvcr.io/nvidia/tritonserver:21.07-py3 tritonserver --model-repository=/models --strict-model-config=false

jetson

nvidia暂未发布支持jetson的triton容器，所以只能源码安装。

参考地址：https://github.com/triton-inference-server/server/blob/main/docs/jetson.md

按照jetpack版本下载相应的release版本包并解压至: /opt/tritonserver

下载地址：https://github.com/triton-inference-server/server/releases

备注：如下解压到其他目录，启动会报错动态库文件找不到，当然也可以把所需的动态库文件添加到搜索空间，但解压到/opt/tritonserver最为简单

安装依赖包

apt-get update && \
    apt-get install -y --no-install-recommends \
        software-properties-common \
        autoconf \
        automake \
        build-essential \
        cmake \
        git \
        libb64-dev \
        libre2-dev \
        libssl-dev \
        libtool \
        libboost-dev \
        libcurl4-openssl-dev \
        libopenblas-dev \
        rapidjson-dev \
        patchelf \
        zlib1g-dev

启动服务

1	bin/tritonserver --model-repository=/sdk/python/triton/models

创建模型仓库库

模型仓库布局如下：

<model-repository-path>/
    <model-name>/
      [config.pbtxt]  # 非必须，若模型为 TensorRT, TensorFlow saved-model, ONNX 则可以不配置，其他模型必须配置
      [<output-labels-file> ...]
      <version>/  # 1， 2，3...
        <model-definition-file>  # model.onnx, model.pb,...
      <version>/
        <model-definition-file>
      ...
    <model-name>/
      [config.pbtxt]
      [<output-labels-file> ...]
      <version>/
        <model-definition-file>
      <version>/
        <model-definition-file>
      ...
    ...

以文本分类为例，模型仓库布局如下：

[root@bg1 sunshine]# tree infer/
infer/
├── densenet_onnx  # 模型名
│   ├── 1  # 版本
│   │   └── model.onnx  # 模型文件，默认为model.xxx
│   ├── config.pbtxt  # 配置文件
│   └── densenet_labels.txt  # 分类类别，根据配置文件选择是否添加
└── text_class
    ├── 1
    │   └── model.onnx
    └── config.pbtxt

4 directories, 5 files

添加配置文件

配置文件默认为模型目录下的config.pbtxt，其格式不同于json，要严格按照官网给的格式来配置，否则加载报错

同样以文本分类为例：

platform: "onnxruntime_onnx"
max_batch_size : 0  
input [
  {
    name: "token_type_ids"
    data_type: TYPE_INT64
    format: FORMAT_NONE
    dims: [-1,-1]
  },
  {
    name: "attention_mask"
    data_type: TYPE_INT64
    format: FORMAT_NONE
    dims: [-1,-1]
  },
  {
    name: "input_ids"
    data_type: TYPE_INT64
    format: FORMAT_NONE
    dims: [-1,-1]
  }
]
output: [
  {
    name: "logits"
    data_type: TYPE_FP32
    dims: [-1,2]
    label_filename: ""
  }
]

若模型为TensorRT, TensorFlow saved-model, ONNX，则可以不配置config.pbtxt配置文件，在启动服务的时候指定参数$–strict-model-config=false$ 即可。

使用如下调用获取配置文件：

1	curl 192.168.0.15:8000/v2/models/text_class/config

{
    "name": "text_class",
    "platform": "onnxruntime_onnx",
    "backend": "onnxruntime",
    "version_policy": {
        "latest": {
            "num_versions": 1
        }
    },
    "max_batch_size": 1,
    "input": [
        {
            "name": "token_type_ids",
            "data_type": "TYPE_INT64",
            "format": "FORMAT_NONE",
            "dims": [
                -1
            ],
            "is_shape_tensor": false,
            "allow_ragged_batch": false
        },
        {
            "name": "attention_mask",
            "data_type": "TYPE_INT64",
            "format": "FORMAT_NONE",
            "dims": [
                -1
            ],
            "is_shape_tensor": false,
            "allow_ragged_batch": false
        },
        {
            "name": "input_ids",
            "data_type": "TYPE_INT64",
            "format": "FORMAT_NONE",
            "dims": [
                -1
            ],
            "is_shape_tensor": false,
            "allow_ragged_batch": false
        }
    ],
    "output": [
        {
            "name": "logits",
            "data_type": "TYPE_FP32",
            "dims": [
                2
            ],
            "label_filename": "",
            "is_shape_tensor": false
        }
    ],
    "batch_input": [],
    "batch_output": [],
    "optimization": {
        "priority": "PRIORITY_DEFAULT",
        "input_pinned_memory": {
            "enable": true
        },
        "output_pinned_memory": {
            "enable": true
        },
        "gather_kernel_buffer_threshold": 0,
        "eager_batching": false
    },
    "instance_group": [
        {
            "name": "text_class",
            "kind": "KIND_CPU",
            "count": 1,
            "gpus": [],
            "secondary_devices": [],
            "profile": [],
            "passive": false,
            "host_policy": ""
        }
    ],
    "default_model_filename": "model.onnx",
    "cc_model_filenames": {},
    "metric_tags": {},
    "parameters": {},
    "model_warmup": []
}

服务启动

命令启动

1	tritonserver --model-repository=/models --strict-model-config=false

docker容器启动

1	docker run -it --rm -p8000:8000 -p8001:8001 -p8002:8002 -v /home/sunshine/infer/:/models nvcr.io/nvidia/tritonserver:21.07-py3 tritonserver --model-repository=/models --strict-model-config=false

服务启动后，返回如下信息：

=============================
== Triton Inference Server ==
=============================

NVIDIA Release 21.07 (build 24810355)

Copyright (c) 2018-2021, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.

Various files include modifications (c) NVIDIA CORPORATION.  All rights reserved.

...

I0819 02:54:33.978409 1 onnxruntime.cc:2072] TRITONBACKEND_ModelInstanceInitialize: text_class (CPU device 0)
I0819 02:54:36.500230 1 onnxruntime.cc:2072] TRITONBACKEND_ModelInstanceInitialize: densenet_onnx (CPU device 0)
I0819 02:54:36.501075 1 model_repository_manager.cc:1212] successfully loaded 'text_class' version 1
I0819 02:54:37.042157 1 model_repository_manager.cc:1212] successfully loaded 'densenet_onnx' version 1
I0819 02:54:37.042560 1 server.cc:504] 
+------------------+------+
| Repository Agent | Path |
+------------------+------+
+------------------+------+

I0819 02:54:37.042936 1 server.cc:543] 
+-------------+-----------------------------------------------------------------+--------+
| Backend     | Path                                                            | Config |
+-------------+-----------------------------------------------------------------+--------+
| tensorrt    | <built-in>                                                      | {}     |
| pytorch     | /opt/tritonserver/backends/pytorch/libtriton_pytorch.so         | {}     |
| tensorflow  | /opt/tritonserver/backends/tensorflow1/libtriton_tensorflow1.so | {}     |
| onnxruntime | /opt/tritonserver/backends/onnxruntime/libtriton_onnxruntime.so | {}     |
| openvino    | /opt/tritonserver/backends/openvino/libtriton_openvino.so       | {}     |
+-------------+-----------------------------------------------------------------+--------+

I0819 02:54:37.043287 1 server.cc:586] 
+---------------+---------+--------+
| Model         | Version | Status |
+---------------+---------+--------+
| densenet_onnx | 1       | READY  |
| text_class    | 1       | READY  |
+---------------+---------+--------+

I0819 02:54:37.043677 1 tritonserver.cc:1718] 
+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Option                           | Value                                                                                                                                                                        |
+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| server_id                        | triton                                                                                                                                                                       |
| server_version                   | 2.12.0                                                                                                                                                                       |
| server_extensions                | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data  |
|                                  | statistics                                                                                                                                                                   |
| model_repository_path[0]         | /models                                                                                                                                                                      |
| model_control_mode               | MODE_NONE                                                                                                                                                                    |
| strict_model_config              | 0                                                                                                                                                                            |
| pinned_memory_pool_byte_size     | 268435456                                                                                                                                                                    |
| min_supported_compute_capability | 6.0                                                                                                                                                                          |
| strict_readiness                 | 1                                                                                                                                                                            |
| exit_timeout                     | 30                                                                                                                                                                           |
+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

I0819 02:54:37.046408 1 grpc_server.cc:4072] Started GRPCInferenceService at 0.0.0.0:8001
I0819 02:54:37.046930 1 http_server.cc:2795] Started HTTPService at 0.0.0.0:8000
I0819 02:54:37.090841 1 sagemaker_server.cc:134] Started Sagemaker HTTPService at 0.0.0.0:8080
I0819 02:54:37.134294 1 http_server.cc:162] Started Metrics Service at 0.0.0.0:8002

验证

[root@bg1 ~]# curl -v localhost:8000/v2/health/ready

* About to connect() to localhost port 8000 (#0)
*   Trying ::1...
* Connected to localhost (::1) port 8000 (#0)
> GET /v2/health/ready HTTP/1.1
> User-Agent: curl/7.29.0
> Host: localhost:8000
> Accept: */*
> 
< HTTP/1.1 200 OK
< Content-Length: 0
< Content-Type: text/plain
< 
* Connection #0 to host localhost left intact

yolov4

2. triton client

官网： https://github.com/triton-inference-server/client

本地安装
1
2
pip install nvidia-pyindex
pip install tritonclient[http] # [all, http, grpc, utils]
http请求方式所需要的依赖：
1
2
3
geventhttpclient>=1.4.4
numpy>=1.19.1
python-rapidjson>=0.9.1
其他请求方式需要依赖：https://github.com/triton-inference-server/client/tree/main/src/python/library/requirements

容器

拉取容器

1	docker pull nvcr.io/nvidia/tritonserver:21.07-py3-sdk

启动容器

1	docker run -it --rm --net=host nvcr.io/nvidia/tritonserver::21.07-py3-sdk

启动服务

/workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg
Request 0, batch size 1
Image '/workspace/images/mug.jpg':
    15.346230 (504) = COFFEE MUG
    13.224326 (968) = CUP
    10.422965 (505) = COFFEEPOT

python客户端代码调用服务

http请求文本分类示例如下：

# author: sunshine
# datetime:2021/8/19 上午11:01
import numpy as np

import tritonclient.http as httpclient
from attrdict import AttrDict
from transformers import BertTokenizer
from tritonclient.utils import triton_to_np_dtype


def convert_http_metadata_config(_metadata, _config):
    _model_metadata = AttrDict(_metadata)
    _model_config = AttrDict(_config)

    return _model_metadata, _model_config


def parse_model(model_metadata, model_config):
    """
    input_dtype,
    :param model_metadata:
    :param model_config:

    """
    input_metadata = model_metadata.inputs
    output_metadata = model_metadata.outputs
    max_batch_size = model_config.max_batch_size
    input_params = {i.name: i.datatype for i in input_metadata}
    output_names = [o.name for o in output_metadata]
    return max_batch_size, input_params, output_names


def preprocess(text, input_param, output_names, max_len=128):
    client = httpclient

    inputs = tokenizer(text, padding='longest', max_length=max_len, truncation='longest_first')

    input_data = []
    names = ['input_ids', 'attention_mask', 'token_type_ids']

    for name in names:
        ndtype = triton_to_np_dtype(input_param[name])
        data = np.array(inputs[name]).astype(ndtype)

        data_t = client.InferInput(name, list(data.shape), input_param[name])
        data_t.set_data_from_numpy(data)

        input_data.append(data_t)

    outputs = [
        client.InferRequestedOutput(out_name) for out_name in output_names
    ]

    return input_data, outputs


def postprocess(results, output_names):
    """
    response结果处理

    """
    logit_name = output_names[0]

    output = results.as_numpy(logit_name)
    pred = np.argmax(output, axis=-1)
    return pred


if __name__ == '__main__':
    model_name = 'text_class'
    model_version = ''  # 若为空，则选择最新版本
    url = "192.168.0.15:8000"
    bert_path = '/home/sunshine/pre_models/pytorch/bert-base-chinese'
    tokenizer = BertTokenizer.from_pretrained(bert_path)

    triton_client = httpclient.InferenceServerClient(url=url, verbose=False)

    model_metadata = triton_client.get_model_metadata(
        model_name=model_name, model_version=model_version)

    model_config = triton_client.get_model_config(
        model_name=model_name, model_version=model_version)

    model_metadata, model_config = convert_http_metadata_config(
        model_metadata, model_config)

    max_batch_size, input_params, output_names = parse_model(model_metadata, model_config)

    texts = ['今天天气真好', '我讨厌你']
    inputs, outputs = preprocess(texts, input_params, output_names)

    results = triton_client.infer(model_name,
                                  inputs,
                                  request_id=str(1),
                                  model_version=model_version,
                                  outputs=outputs)
    pred = postprocess(results, output_names)
    print(pred)

output:

[1 0]

轻量级目标检测模型部署对比测试

发表于 2021-08-18 更新于 2024-01-11 分类于模型部署

选用的模型有：

这里，我们采用统一的格式onnx ，使用三种方式来加载推理：

onnxruntime
openvino
opencv

加载模型的代码如下：

# author: sunshine
# datetime:2022/8/17 下午2:53
import onnxruntime
import cv2
from openvino.runtime import Core


class ONNXPredict:
    def __init__(self, onnx_path):
        self.session = onnxruntime.InferenceSession(onnx_path)

    def do_inference(self, img_in):
        # Compute
        input_name = self.session.get_inputs()[0].name
        outputs = self.session.run(None, {input_name: img_in})
        return outputs


class OpencvPredict:
    def __init__(self, onnx_path, w, h):
        self.net = cv2.dnn.readNet(onnx_path)
        self.w = w
        self.h = h

    def do_inference(self, img_in):
        blob = cv2.dnn.blobFromImage(img_in, 1 / 255.0, (self.w, self.h))
        self.net.setInput(blob)
        outs = self.net.forward(self.net.getUnconnectedOutLayersNames())
        return outs


class OpenvinoPredict:
    def __init__(self, engine_path):
        ie = Core()
        model = ie.read_model(engine_path)
        self.compiled_model = ie.compile_model(model=model, device_name='CPU')

    def do_inference(self, img_in):
        result_infer = self.compiled_model([img_in])
        return list(result_infer.values())

具体三种轻量级目标检测模型的推理代码见：https://github.com/fushengwuyu/light_objectdetect

推理时间统计（用本地笔记本测试 CPU: i7-7700HQ，8核）：

模型	onnx	openvino	opencv	Resolution
nanodet_plus	0.0842	0.0503	0.2394	416*416
FastestDet	0.0200	0.0182	0.0747	512*512
yolo-fastestv2	0.0125	0.01271	0.0380	352*352

硬件资源占用（边缘服务器：6核2G，带不动openvino，所以没测试）

模型	CPU（onnx）	memory(onnx)	CPU（opencv）	memory(opencv)
nanodet_plus	360%	6.3%	412%	6.7%
FastestDet	310%	4.7%	270%	4.7%
yolo-fastestv2	306%	5.4%	335%	5.7%

边缘服务器推理时间统计：

模型	onnx	opencv
nanodet_plus	0.5430	0.6136
FastestDet	0.1653	0.2123
yolo-fastestv2	0.1653	0.1064

最终统计如下：

模型	mAP	size	run time(onnx)	cpu	memory	model size
nanodet_plus-m-1.5x	34.1	416	0.5430	360%	6.3%	9.5M
FastestDet	25.3	352	0.1653	310%	4.7%	960K
yolo-fastestv2	24.1	352	0.1653	306%	5.4%	958K
nanodet_plus-m	30.4	416	0.3575	341%	6.0%	4.6M

未命名

发表于 2021-08-12 更新于 2024-01-11

python web 异常框架

发表于 2021-07-20 更新于 2024-01-11 分类于 python web

一，URL路径

前缀

路由地址以api开头，后跟微服务名、版本及模块名或资源地址，实例：

https://host:port/api/pdi_metadata/v1/dep

pdi_metadata表示消息微服务。

v1表示版本，以字母v开头，后跟数字。dep表示微服务内部的一个模块或者资源地址。
资源地址

在路径设计中需要遵守下列约定：
- 资源命名全部小写且易读，可使用连字符（-）或下划线（_）进行分隔
- 资源的命名需要符合RESTful风格，只有算法资源中可以存在动词，否则只能使用名词
- 路径部分使用斜杠分隔符（/）来表达层次结构
- 若同一资源名会出现在资源地址、资源表述、query中，资源名必须一致，即统一使用下划线（_）分隔。

二，响应状态码

所有API响应遵守HTTP规范，常见的HTTP状态码如下所示。

状态码	描述
1xx	信息状态码
2xx	成功状态码
3xx	重定向状态码
4xx	客户端错误码
5xx	服务器错误码

请求被正确返回

其返回参数定义如下：

参数类型是否必需描述

code int64 是正确状态码

data obj 是响应的请求数据

参数	类型	是否必需	描述
code	int64	是	正确状态码
data	obj	是	响应的请求数据

请求调用失败，必须返回出错的相信信息，参数定义如下：

参数	类型	是否必须	描述
code	int64	是	业务错误码
message	string	是	业务错误信息，与code一一对应，用于表明code的含义，应尽量简短，抽象，具有概括性和通用性。
cause	string	是	导致此错误的原因，也可用于给接口调用者提示解决此错误的办法，相同的code可对应不同的cause
detail	obj	否	错误详细信息，供调用方查看和展示给最终用户的信息

备注：

前三位为 HTTP标准状态码，中间三位为系统内全局唯一的微服务错误码标识号，后三位为自定义状态码，应尽量抽象，具有概括性和通用性。
对于参数不合法、json格式不对等由客户端调用API代码不对导致的错误，code后3位为000，原因在cause中说明。
对于需要由客户端判断，用于处理UI和具体功能逻辑的错误，code后3位表明具体错误，细节在detail中说明。

三，错误码方案

PDI产品后期需要嵌入到爱数其他的产品中去，所以可以采取动态配置的方案，实现针对不同的产品，使用不同的错误码。

常见的错误码：

400XXX000：参数不合法、json格式不对等由客户端调用API代码不对导致的错误
500XXX000：服务内部错误
501XXX000：方法为实现

四，Flask 异常方案

Flask内部通过继承HTTPException类来处理异常，同样，我们可以自定义自己的异常基类类（继承自HTTPException），定义好返回的错误码，请求的url，错误原因等。

class APIException(HTTPException):
    code = 500
    message = 'sorry, we made a mistake'
    error_code = 999
    cause = 'just a mistake'
    detail = None
    dump_json = False  # 是否dump成json串。flask_restful.Resource方式需要设置为False，原生方式需要设置为True

    def __init__(self, msg=None, code=None, error_code=None, cause=None, header=None, detail=None):
        if code:
            self.code = code
        if error_code:
            self.error_code = error_code
        if msg:
            self.msg = msg
        if cause:
            self.cause = cause
        if detail:
            self.detail = detail
        super(APIException, self).__init__(msg, None)

    def get_body(self, environ=None):
        body = dict(
            message=self.message,
            code=self.error_code,
            cause=self.cause
        )
        if self.detail:
            body.update({"detail": self.detail})

        data = json.dumps(body) if self.dump_json else body

        return data

    def get_headers(self, environ=None):
        """Get a list of headers."""
        return [('Content-Type', 'application/json')]

定义好基类之后，派生出各种各样的异常类，自由定义各种状态码的错误及对应的错误信息，出现该异常后，抛出异常。比如：

class NotFound(APIException):
    code = 404
    message = 'the resource are not found'
    error_code = 1001

目前定义的派生类有：

Success
DeleteSucess
UpdateSucess
ServerError
ParameterException
NotFound
AuthFailed
Forbidden

在程序中使用。

@input_bp.route('/input', methods=['POST'])
def input_m():
    d = request.get_json()

    if "file_id" not in d or "op_type" not in d:
        return ParameterException(message='file_id 和 op_type两个参数不能为空').get_body()
    pass

虽然我们可以在可能出错的地方，继承自己的异常类，然后抛出，但是并不是所有的异常我们都能提前预知。比如参数错误等异常，我们可以提前预知并处理好，但是如果出现逻辑问题等提前没法感知的异常，就不是我们能够控制并处理的。所以我们还需要全局捕获所有异常。

# 全局错误AOP处理
@app.errorhandler(Exception)
def framework_error(e):
    api_logger.error("error info: %s" % e) # 对错误进行日志记录
    if isinstance(e, APIException):
        return e
    if isinstance(e, HTTPException):
        code = e.code
        msg = e.description
        error_code = 1007
        return APIException(msg, code, error_code)
    else:
        if not app.config['DEBUG']:
            return ServerError().get_body()
        else:
            return e

如此，Flask中出现的所有异常皆可处理了，保证程序的健壮性。co

五， Sanic异常方案

可以理解sanic为flask的升级版，采用协程机制，并发与效率都比flask高，用法基本上与flask一致。

与flask不同的是：

基类继承自HTTPResponse，而不是HTTPException
全局错误AOP处理机制不同，sanic通过添加异常处理函数的方式实现。

class MyResponse(HTTPResponse):
    code = 500
    message = 'sorry, we made a mistake'
    error_code = 999
    cause = None
    detail = None

    def __init__(self, message=None, code=None, error_code=None, cause=None, detail=None):
        if code:
            self.code = code
        if error_code:
            self.error_code = error_code
        if message:
            self.message = message
        if cause is not None:
            self.cause = cause
        if detail is not None:
            self.detail = detail
        super(MyResponse, self).__init__(body=self.get_body(), status=self.code)

    def get_body(self):
        body = dict(
            message=self.message,
            code=self.error_code
        )

        if self.cause is not None:
            body.update(dict(cause=self.cause))
        if self.detail is not None:
            body.update({"detail": self.detail})

        return json_dumps(body)

至于各种派生的错误类，与flask一致。

class Success(MyResponse):
    code = 200
    message = 'OK'
    local_code = "000"
    error_code = 99999

全局异常处理。

async def server_error_handler(request, exception):
    logger.error(msg=traceback.format_exc())  # 记录错误日志
    return ServerError(message=repr(exception), cause=traceback.format_exc())


app.error_handler.add(Exception, server_error_handler)

发生系统异常后，这里统一使用ServerError返回。ServerError是自己定义的继承自MyResponse的异常类。

class ServerError(MyResponse):
    code = 500
    message = 'something happen'
    local_code = "000"
    error_code = 99999

SanicDB 使用文档

发表于 2021-07-20 更新于 2024-01-11 分类于数据库

1. sanicdb简介

SanicDB是为python的异步web框架sanic方便操作mysql（mariadb）而开发的工具，是对aiomysql.Pool的轻量级封装。Sanic是异步IO的web框架，用异步IO的方式读取mysql也才能更加发挥它的效率。

备注：sanicdb也可以用于其他任何需要异步IO操作mysql的地方，不仅仅限于sanic中

github：https://github.com/veelion/sanicdb

安装

pip install sanicdb

2. 初始化

class SanicDB:
    """A lightweight wrapper around aiomysql.Pool for easy to use
    """
    def __init__(self, host, database, user, password,
                 loop=None, sanic=None,
                 minsize=3, maxsize=5,
                 return_dict=True,
                 pool_recycle=7*3600,
                 autocommit=True,
                 charset = "utf8mb4", **kwargs):
        '''
        kwargs: all parameters that aiomysql.connect() accept.
        '''

前4个参数是对应mysql数据库的。
loop是应用程序中的事件循环（event_loop）,默认为None，则程序会自动调用asyncio.get_event_loop()创建。此外，sanic和loop两者只需要提供一个即可，当同时存在时，了解数据库其实用的是sanic中的loop。可以在初始化中看到

async def init_pool(self):
    if self.sanic:
        self.db_args['loop'] = self.sanic.loop
    self.pool = await aiomysql.create_pool(**self.db_args)

minsize：最小连接数量
maxsize：最大连接数量
return_dict ：是返回的数据一条记录为一个dict，key是MySQL表的字段名，value是字段的值
pool_recycle ：连接池重连的时间间隔，MySQL默认的连接闲置时间是8小时（我们服务器目前已改为24小时）
autocommit：是否自动提交

3. 函数定义

sanicdb提供了6个功能函数。

query

功能如名，查询数据库。

1 2	async def query(self, query, parameters, *kwparameters): """Returns a row list for the given query and parameters."""

get

查询数据库，只返回一条数据。

1
2
3

async def get(self, query, *parameters, **kwparameters):
    """Returns the (singular) row returned by the given query.
    """

execute

执行操作，返回受影响的行ID

1 2	async def execute(self, query, parameters, *kwparameters): """Executes the given query, returning the lastrowid from the query."""

table_has

检查一个table中是否含有某字段为某值的记录
1
async def table_has(self, table_name, field, value):

table_insert

插入数据库。

1 2	async def table_insert(self, table_name, item, ignore_duplicated=True): '''item is a dict : key is mysql table field'''

table_update

更新数据库

1
2
3

async def table_update(self, table_name, updates,
                 field_where, value_where):
    '''updates is a dict of {field_update:value_update}'''

4. 示例

普通的异步mysql例子，参照官网给出的test


import asyncio
from sanicdb import SanicDB


async def test(loop):
    db = SanicDB('localhost', 'testdb', 'root', 'the_password',
                 minsize=3, maxsize=5,
                 connect_timeout=5,
                 loop=loop)
    sql = 'Drop table test'
    await db.execute(sql)

    sql = """CREATE TABLE `test` (
    `id` int(8) NOT NULL AUTO_INCREMENT,
    `name` varchar(16) NOT NULL,
    `content` varchar(255) NOT NULL,
    PRIMARY KEY (`id`),
    UNIQUE KEY `name` (`name`)
    ) ENGINE=MyISAM ;"""
    await db.execute(sql)

    sql = 'select * from test where name = %s'
    data = await db.query(sql, 'abc')
    print('query():', data)

    sql += ' limit 1'
    d = await db.get(sql, 'abc')
    print('get():', d)

    sql = 'delete from test where name=%s'
    lastrowid = await db.execute(sql, 'xyz')
    print('execute(delete...):', lastrowid)
    sql = 'insert into test set name=%s, content=%s'
    lastrowid = await db.execute(sql, 'xyz', '456')
    print('execute(insert...):', lastrowid)

    ret = await db.table_has('test', 'name', 'abc')
    print('has(): ', ret)

    ret = await db.table_update('test', {'content': 'updated'},
                          'name', 'abc')
    print('update():', ret)
    sql = 'select * from test where name = %s'
    data = await db.query(sql, 'abc')
    print('query():', data)

    item = {
        'name': 'abc',
        'content': '123'
    }
    i = 0
    while 1:
        i += 1
        if i % 2 == 0:
            lastid = await db.table_insert('test', item, ignore_duplicated=False)
        else:
            lastid = await db.table_insert('test', item)
        print('insert():', lastid)
        await asyncio.sleep(1)


if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    loop.run_until_complete(test(loop))

sanic web中异步插叙mysql

# author： sunshine
# datetime： 2020/10/29 10:12 
from sanic import Sanic
from sanic_cors import CORS
from sanic.response import json
from sanicdb import SanicDB

app = Sanic(__name__)
# 支持跨域
CORS(app)

db = SanicDB(
    host='172.27.0.6',
    database='privacy',
    user='root',
    password='PdiCond$402875432',
    sanic=app,
    maxsize=18
)

@app.route('/simple', methods=['POST'])
async def simple_query(request):
    """
    对数据库的简单查询测试
    :param request:
    :return:
    """
    sql_str = 'select * from employee'
    num = 0
    data = await app.db.query(sql_str)
    num += len(data)
    return json({"查询条数": num})

if __name__ == '__main__':
    app.run('0.0.0.0', port=5067)

docker常用命令

发表于 2021-07-20 更新于 2024-01-11 分类于运维

查看容器启动脚本

docker inspect $image_name

keras+python web 遇到的问题

发表于 2021-07-20 更新于 2024-01-11 分类于运维

1. ensorflow.python.framework.errors_impl.FailedPreconditionError: Error while reading resource variable softmax/kernel from Container: localhost. This could mean that the variable was uninitialized. Not found: Resource localhost/softmax/kernel/N10tensorflow3VarE does not exist.

最近在使用flask部署keras服务的时候。遇到改错误，看错误，感觉是变量没有初始化，百度各种资料，成功找到解决办法。

creating a reference to the session that is used for loading the models and then to set it to be used by keras in each request

在加载模型之前，创建session。并在模型调用predict的时候，设置同样的session。保证整个项目使用模型使用同一个session。成功解决问题。

from tensorflow.python.keras.backend import set_session
from tensorflow.python.keras.models import load_model

tf_config = some_custom_config
sess = tf.Session(config=tf_config)
graph = tf.get_default_graph()

# IMPORTANT: models have to be loaded AFTER SETTING THE SESSION for keras! 
# Otherwise, their weights will be unavailable in the threads after the session there has been set
set_session(sess)
model = load_model(...)

在调用的时候，如下设置。

global sess
global graph
with graph.as_default():
    set_session(sess)
    model.predict(...)

参考：

https://github.com/tensorflow/tensorflow/issues/28287

https://www.datalearner.com/blog/1051572578207468

2. XLA_GPU

参考：https://stackoverflow.com/questions/53696654/why-keras-does-not-see-my-gpu-while-tensorflow-does?r=SearchResults

解决办法：

sudo chmod -R a+r /usr/local/cuda*