基于sanic的模型推理服务异步部署

通常来说，模型推理是一个耗时任务，如何在接受模型推理的请求之后，不阻塞其他请求，就成为了模型部署过程中的一个难点问题。很容易想到使用异步方式来实现，接下来，使用使用sanic框架来演示如何实现模型的异步部署。

# author: sunshine
# datetime:2023/12/14 下午2:10
import asyncio
import time
import functools
from sanic import Sanic
from sanic.response import text
from apps.service.uie_predictor import load_uie_model

app = Sanic(__name__)
uie = load_uie_model()

text1 = '如何演好自己的角色，请读《演员自我修养》《喜剧之王》周星驰崛起于穷困潦倒之中的独门秘笈'


@app.get('/f1')
async def f1(request):
    result = await async_decorate(model_inference, text)
    return text(result)


def model_inference(text):
    schema = [{'人物': ['祖籍', '出生地', '民族', '毕业院校', '出生日期', '妻子'], "影视作品": ["上映日期", '导演', '制片人', '主演']}]
    uie.set_schema(schema)
    result = uie(text1)
    time.sleep(10)
    return str(result)


async def async_decorate(func, *args, **kwargs):
    loop = asyncio.get_event_loop()
    partial_func = functools.partial(func, **kwargs)
    result = await loop.run_in_executor(None, partial_func, *args)
    return result


@app.get('/f2')
def f2(request):
    return text('f2')




if __name__ == '__main__':
    app.run('0.0.0.0', port=5555)

最核心的在于使用loop.run_in_executor函数将同步函数包装为异步函数。

更进一步，可以将异步包装函数封装为一个装饰器，简化使用。

# author: sunshine
# datetime:2023/12/14 下午2:10
import asyncio
import functools
import time

from sanic import Sanic
from sanic.response import text
from apps.service.uie_predictor import load_uie_model

app = Sanic(__name__)
uie = load_uie_model()

text1 = '如何演好自己的角色，请读《演员自我修养》《喜剧之王》周星驰崛起于穷困潦倒之中的独门秘笈'


def async_decorator(func):
    async def wrapper(*args, **kwargs):
        loop = asyncio.get_event_loop()
        partial_func = functools.partial(func, **kwargs)
        result = await loop.run_in_executor(None, partial_func, *args)
        return result

    return wrapper


@app.get('/f1')
async def f1(request):
    result = await model_inference(text1)
    return text(result)


@async_decorator
def model_inference(sentence):
    schema = [{'人物': ['祖籍', '出生地', '民族', '毕业院校', '出生日期', '妻子'], "影视作品": ["上映日期", '导演', '制片人', '主演']}]
    uie.set_schema(schema)
    result = uie(sentence)
    time.sleep(10)
    return str(result)


@app.get('/f2')
def f2(request):
    return text('f2')


if __name__ == '__main__':
    app.run('0.0.0.0', port=5555)

二者作用相当。