视觉理解多模态模型转为API服务
将一个视觉理解多模态模型推理转为API服务,输入图像和文本,流式输出:
import io
import torch
from PIL import Image
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.responses import StreamingResponse
from transformers import AutoModel, AutoTokenizer# 初始化 FastAPI 应用
app = FastAPI()# 模型路径
model_path = "/data/models/minicpm3v-4b"# 加载模型和分词器(只在启动时加载一次)
print("Loading model, please wait...")
model = AutoModel.from_pretrained(model_path,trust_remote_code=True,attn_implementation='eager',torch_dtype=torch.bfloat16
).eval().cuda()tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
print("Model loaded successfully.")def generate_stream(image: Image.Image, prompt: str):"""流式生成回复"""msgs = [{'role': 'user', 'content': [image, prompt]}]res = model.chat(image=None,msgs=msgs,tokenizer=tokenizer,sampling=True,stream=True)for new_text in res:yield new_text@app.post("/chat")
async def chat(image: UploadFile = File(...),prompt: str = Form(...)
):# 读取图像文件img_bytes = await image.read()img = Image.open(io.BytesIO(img_bytes)).convert('RGB')return StreamingResponse(generate_stream(img, prompt), media_type="text/plain")# 启动服务
if __name__ == "__main__":import uvicornuvicorn.run("app:app", host="0.0.0.0", port=8900, reload=False)
测试该服务:
import requestsurl = "http://localhost:8900/chat"with open("image.png", "rb") as f:files = {"image": f,"prompt": (None, "Describe what you see in this image.")}with requests.post(url, files=files, stream=True) as r:for chunk in r.iter_content(chunk_size=None):print(chunk.decode("utf-8"), end="", flush=True)
输出示例: