当前位置：首页 > news >正文

视觉理解多模态模型转为API服务

news 2025/7/12 20:08:11

将一个视觉理解多模态模型推理转为API服务，输入图像和文本，流式输出：

import io
import torch
from PIL import Image
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.responses import StreamingResponse
from transformers import AutoModel, AutoTokenizer# 初始化 FastAPI 应用
app = FastAPI()# 模型路径
model_path = "/data/models/minicpm3v-4b"# 加载模型和分词器（只在启动时加载一次）
print("Loading model, please wait...")
model = AutoModel.from_pretrained(model_path,trust_remote_code=True,attn_implementation='eager',torch_dtype=torch.bfloat16
).eval().cuda()tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
print("Model loaded successfully.")def generate_stream(image: Image.Image, prompt: str):"""流式生成回复"""msgs = [{'role': 'user', 'content': [image, prompt]}]res = model.chat(image=None,msgs=msgs,tokenizer=tokenizer,sampling=True,stream=True)for new_text in res:yield new_text@app.post("/chat")
async def chat(image: UploadFile = File(...),prompt: str = Form(...)
):# 读取图像文件img_bytes = await image.read()img = Image.open(io.BytesIO(img_bytes)).convert('RGB')return StreamingResponse(generate_stream(img, prompt), media_type="text/plain")# 启动服务
if __name__ == "__main__":import uvicornuvicorn.run("app:app", host="0.0.0.0", port=8900, reload=False)

测试该服务：

import requestsurl = "http://localhost:8900/chat"with open("image.png", "rb") as f:files = {"image": f,"prompt": (None, "Describe what you see in this image.")}with requests.post(url, files=files, stream=True) as r:for chunk in r.iter_content(chunk_size=None):print(chunk.decode("utf-8"), end="", flush=True)