OpenVINO教程(三):使用NNCF进行模型量化加速
本文接着上篇继续讲述OpenVINO,主要介绍如何通过NNCF来量化优化OpenVINO模型以提高模型性能
NNCF 提供了一套先进的神经网络推理优化算法,适用于 OpenVINO,并能在保持精度几乎不下降的情况下提升推理性能。我们将使用后训练量化(Post-Training Quantization, PTQ),即不需要再训练模型,只通过分析权重和数据分布将模型压缩为更小、更快的 8 位版本.要实现这个优化功能我们需要做以下两步骤:
- 创建用于量化的数据集
- 运行nncf.quantize以获得优化的模型, 使用OpenVINO.save_model函数序列化OpenVINO IR模型
我们下载coco数据集来作为量化的数据集,这个执行比较慢需要点时间
# COCO 数据集资源路径
DATA_URL = "http://images.cocodataset.org/zips/val2017.zip"
LABELS_URL = "https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels-segments.zip"
CFG_URL = "https://raw.githubusercontent.com/ultralytics/ultralytics/v8.1.0/ultralytics/cfg/datasets/coco.yaml"
DATA_PATH = OUT_DIR / "val2017.zip"
LABELS_PATH = OUT_DIR / "coco2017labels-segments.zip"
CFG_PATH = OUT_DIR / "coco.yaml"def prepare_dataset() -> 'tuple[nncf.Dataset, object]':"""下载并解压 COCO 数据集,构造验证器和 NNCF 所需数据集格式"""if not (OUT_DIR / "coco/labels").exists():download_file_if_needed(DATA_URL, DATA_PATH.name, DATA_PATH.parent)download_file_if_needed(LABELS_URL, LABELS_PATH.name, LABELS_PATH.parent)download_file_if_needed(CFG_URL, CFG_PATH.name, CFG_PATH.parent)with ZipFile(LABELS_PATH, "r") as z:z.extractall(OUT_DIR)with ZipFile(DATA_PATH, "r") as z:z.extractall(OUT_DIR / "coco/images")args = get_cfg(cfg=DEFAULT_CFG)args.data = str(CFG_PATH)# 用 ultralytics 的 validator 构建 datasetdet_model = build_ultralytics_model();validator_cls = det_model.task_map[det_model.task]["validator"]validator = validator_cls(args=args)validator.data = check_det_dataset(args.data)validator.stride = 32dataloader = validator.get_dataloader(OUT_DIR / "coco", 1)validator.class_map = coco80_to_coco91_class()validator.names = YOLO(PT_MODEL_PATH).to("cpu").model.namesvalidator.nc = 80def transform_fn(data: Dict):return validator.preprocess(data)['img'].numpy()return nncf.Dataset(dataloader, transform_fn), validator
运行nncf.quantize以获得优化的模型,并序列化OpenVINO IR模型
nncf.quantize 函数为模型量化提供了一个接口。它需要传入 一个 OpenVINO 的模型实例和用于量化的数据集(称为量化集)。此外,还可以通过一些可选参数来配置量化过程,例如:
用于量化的数据样本数量(number of samples),预设配置(preset)忽略量化的子图或操作范围(ignored_scope)等。
def quantize_model(original_model: ov.Model, quant_dataset: nncf.Dataset) -> ov.Model:"""使用 NNCF 对 OpenVINO 模型进行混合精度量化(混合 INT8/F32)"""ignored_scope = nncf.IgnoredScope(subgraphs=[nncf.Subgraph(inputs=[f"__module.model.{22 if 'v8' in MODEL_NAME else 23}/aten::cat/Concat",f"__module.model.{22 if 'v8' in MODEL_NAME else 23}/aten::cat/Concat_1",f"__module.model.{22 if 'v8' in MODEL_NAME else 23}/aten::cat/Concat_2"],outputs=[f"__module.model.{22 if 'v8' in MODEL_NAME else 23}/aten::cat/Concat_7"])])quant_model = nncf.quantize(original_model,quant_dataset,preset=nncf.QuantizationPreset.MIXED,ignored_scope=ignored_scope)ov.save_model(quant_model, str(INT8_MODEL_PATH))print(f"Quantized model saved to: {INT8_MODEL_PATH}")return quant_model
最后我们使用优化后的模型进行图片检测. 下面是完整的优化过程和推理代码
from pathlib import Path
from zipfile import ZipFile
from typing import Dictimport urllib.request
import tkinter as tk
from PIL import Image, ImageTkfrom ultralytics import YOLO
from ultralytics.utils import DEFAULT_CFG
from ultralytics.cfg import get_cfg
from ultralytics.data.converter import coco80_to_coco91_class
from ultralytics.data.utils import check_det_datasetimport openvino as ov
import nncf# ----------------------------- #
# 全局配置和路径定义
# ----------------------------- #MODEL_VARIANTS = ["yolo11n", "yolo11s", "yolo11m", "yolo11l", "yolo11x"]
MODEL_NAME = MODEL_VARIANTS[0] # 默认使用最轻量的 yolo11n 模型
PT_MODEL_PATH = f"{MODEL_NAME}.pt"
IR_MODEL_DIR = Path(f"{MODEL_NAME}_openvino_model")
IR_MODEL_PATH = IR_MODEL_DIR / f"{MODEL_NAME}.xml"
INT8_MODEL_PATH = Path(f"{MODEL_NAME}_openvino_int8_model/{MODEL_NAME}.xml")IMAGE_PATH = Path("./coco_bike.jpg")
OUT_DIR = Path("./")# COCO 数据集资源路径
DATA_URL = "http://images.cocodataset.org/zips/val2017.zip"
LABELS_URL = "https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels-segments.zip"
CFG_URL = "https://raw.githubusercontent.com/ultralytics/ultralytics/v8.1.0/ultralytics/cfg/datasets/coco.yaml"
DATA_PATH = OUT_DIR / "val2017.zip"
LABELS_PATH = OUT_DIR / "coco2017labels-segments.zip"
CFG_PATH = OUT_DIR / "coco.yaml"# ----------------------------- #
# 工具函数模块
# ----------------------------- #def download_file_if_needed(url: str, filename: str, dest_dir: Path) -> Path:"""下载文件(若文件已存在则跳过)"""dest_dir.mkdir(parents=True, exist_ok=True)file_path = dest_dir / filenameif not file_path.exists():print(f"Downloading: {filename}")urllib.request.urlretrieve(url, file_path)else:print(f"File already exists: {file_path}")return file_pathdef prepare_test_image():"""确保测试图片存在,如无则从官方地址下载"""if not IMAGE_PATH.exists():download_file_if_needed("https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_bike.jpg",IMAGE_PATH.name, IMAGE_PATH.parent)def load_or_export_openvino_model() -> ov.CompiledModel:"""加载或导出 YOLOv11 OpenVINO IR 模型,并编译为 CPU 运行时模型"""model = YOLO(PT_MODEL_PATH).to("cpu")if not IR_MODEL_PATH.exists():model.export(format="openvino", dynamic=True, half=True)core = ov.Core()ir_model = core.read_model(IR_MODEL_PATH)return core.compile_model(ir_model, "CPU")def build_ultralytics_model() -> YOLO:"""创建 Ultralytics 的 YOLO 模型接口,用于调用预测器"""return YOLO(IR_MODEL_DIR, task="detect")def prepare_dataset() -> 'tuple[nncf.Dataset, object]':"""下载并解压 COCO 数据集,构造验证器和 NNCF 所需数据集格式"""if not (OUT_DIR / "coco/labels").exists():download_file_if_needed(DATA_URL, DATA_PATH.name, DATA_PATH.parent)download_file_if_needed(LABELS_URL, LABELS_PATH.name, LABELS_PATH.parent)download_file_if_needed(CFG_URL, CFG_PATH.name, CFG_PATH.parent)with ZipFile(LABELS_PATH, "r") as z:z.extractall(OUT_DIR)with ZipFile(DATA_PATH, "r") as z:z.extractall(OUT_DIR / "coco/images")args = get_cfg(cfg=DEFAULT_CFG)args.data = str(CFG_PATH)# 用 ultralytics 的 validator 构建 datasetdet_model = build_ultralytics_model();validator_cls = det_model.task_map[det_model.task]["validator"]validator = validator_cls(args=args)validator.data = check_det_dataset(args.data)validator.stride = 32dataloader = validator.get_dataloader(OUT_DIR / "coco", 1)validator.class_map = coco80_to_coco91_class()validator.names = YOLO(PT_MODEL_PATH).to("cpu").model.namesvalidator.nc = 80def transform_fn(data: Dict):return validator.preprocess(data)['img'].numpy()return nncf.Dataset(dataloader, transform_fn), validatordef quantize_model(original_model: ov.Model, quant_dataset: nncf.Dataset) -> ov.Model:"""使用 NNCF 对 OpenVINO 模型进行混合精度量化(混合 INT8/F32)"""ignored_scope = nncf.IgnoredScope(subgraphs=[nncf.Subgraph(inputs=[f"__module.model.{22 if 'v8' in MODEL_NAME else 23}/aten::cat/Concat",f"__module.model.{22 if 'v8' in MODEL_NAME else 23}/aten::cat/Concat_1",f"__module.model.{22 if 'v8' in MODEL_NAME else 23}/aten::cat/Concat_2"],outputs=[f"__module.model.{22 if 'v8' in MODEL_NAME else 23}/aten::cat/Concat_7"])])quant_model = nncf.quantize(original_model,quant_dataset,preset=nncf.QuantizationPreset.MIXED,ignored_scope=ignored_scope)ov.save_model(quant_model, str(INT8_MODEL_PATH))print(f"Quantized model saved to: {INT8_MODEL_PATH}")return quant_modeldef predict_and_show_image(det_model: YOLO, compiled_model: ov.CompiledModel):"""使用模型对图像进行目标检测,并通过 Tkinter GUI 显示检测结果"""if det_model.predictor is None:config = {"conf": 0.25, "batch": 1, "save": False, "mode": "predict"}args = {**det_model.overrides, **config}det_model.predictor = det_model._smart_load("predictor")(overrides=args, _callbacks=det_model.callbacks)det_model.predictor.setup_model(model=det_model.model)det_model.predictor.model.ov_compiled_model = compiled_modelresults = det_model(IMAGE_PATH)result_img = Image.fromarray(results[0].plot()[:, :, ::-1])root = tk.Tk()root.title("YOLOv11 (OpenVINO INT8) Detection Result")tk_img = ImageTk.PhotoImage(result_img)label = tk.Label(root, image=tk_img)label.pack()root.mainloop()# ----------------------------- #
# 主执行流程
# ----------------------------- #def main():# 1. 准备测试图像(如无则下载)prepare_test_image()# 2. 加载或导出 OpenVINO IR 模型,并编译运行(用于量化或预测)compiled_fp_model = load_or_export_openvino_model()# 3. 构造 Ultralytics YOLO 接口,用于推理/验证det_model = build_ultralytics_model()# 4. 若 INT8 模型已存在,则直接加载;否则进行量化生成core = ov.Core()if INT8_MODEL_PATH.exists():quantized_model = core.read_model(INT8_MODEL_PATH)compiled_int8_model = core.compile_model(quantized_model, "CPU")else:quant_dataset, _ = prepare_dataset()quantized_model = quantize_model(core.read_model(IR_MODEL_PATH), quant_dataset)compiled_int8_model = core.compile_model(quantized_model, "CPU")# 5. 使用 INT8 量化后的模型进行推理并显示结果predict_and_show_image(det_model, compiled_int8_model)if __name__ == "__main__":main()
下面是推理结果