当前位置: 首页 > news >正文

yolov8使用

导出onnx yolo export model=D:/zou/wjzz/workDoc/screwsDetech/runs/detect/train/weights/best.pt format=onnx opset=12 dynamic=True simplify=True

内容解释
蓝色 Conv卷积层,提取特征
黑色 Add / Mul做位置偏移、网格映射
绿色 Gather / Shape从 shape 里提取宽、高等维度信息
红色 Concat / Reshape拼接张量、变形输出
紫色 Softmax / Transpose用于分类概率或格式变换

因为这部分在 ONNX 里:

  • 很难理解 / 不好调试

  • 不利于优化和加速

  • 有时还不能使用 GPU 推理

所以很多人导出的时候会加上 export(onnx, simplify=True, dynamic=False, nms=False),再用自己写的代码(像你 Python 写的那样)做后处理。

✅ 什么是张量(Tensor)?

张量(Tensor)就是一个多维数组。 你可以把它看作是:

维度举例类比(在 NumPy 中)
标量(0维)3.14np.array(3.14)
向量(1维)[1, 2, 3]np.array([1, 2, 3])
矩阵(2维)[[1,2],[3,4]]np.array([[1,2],[3,4]])
三维张量(3维)彩色图像(H, W, C)img[height][width][channel]
四维张量(4维)多张图片:[batch, channel, height, width]img[batch][channel][H][W]

所以你可以记住一句话: 张量 = “有维度的数组”,是用来装图片、特征图、模型输入输出的容器。

✅ NCHW 和 NHWC 是什么?

它们是 图像数据的维度排列方式,在模型输入输出中经常见到。

格式含义解释举例(YOLO)
NCHWBatch, Channels, Height, WidthPyTorch 默认格式[1, 3, 640, 640]
NHWCBatch, Height, Width, ChannelsTensorFlow 默认格式[1, 640, 640, 3]

比如,一张 RGB 图片输入模型时:

  • NCHW:先通道、后高宽:[1, 3, 640, 640]

  • NHWC:先高宽、后通道:[1, 640, 640, 3]

假设你有 100 张图片要输入模型,但你的显卡内存有限,不能一次处理 100 张,那就可以每次处理 16 张,这个 16 就是 Batch Size(N)

NCHW: (16, 3, 640, 640)

N = 16:一次处理 16 张图片

C = 3:每张图有 3 个通道(RGB)

H = 640:高度 640 像素

W = 640:宽度 640 像素

已经可以跑通的程序、没有优化

using OpenCvSharp; using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; using System; using System.Linq;

class Program { static void Main(string[] args) { // 1. 加载模型 var session = new InferenceSession(@"D:\zou\wjzz\workDoc\screwsDetech\runs\detect\train\weights\best.onnx");

    // 2. 读取图片string imgPath = @"D:\zou\wjzz\workDoc\screwsDetech\test_images\screw_keshang_bad_130.png";Mat img = Cv2.ImRead(imgPath);Mat originalImg = img.Clone();int h0 = img.Rows;int w0 = img.Cols;
​// 3. 图片预处理:resize到640x640,BGR转RGB,归一化Mat imgResized = new Mat();Cv2.Resize(img, imgResized, new Size(640, 640));Cv2.CvtColor(imgResized, imgResized, ColorConversionCodes.BGR2RGB);imgResized.ConvertTo(imgResized, MatType.CV_32FC3, 1.0 / 255);
​// 4. HWC转CHW,并变成1x3x640x640的Tensorvar inputData = new float[1 * 3 * 640 * 640];for (int y = 0; y < 640; y++){for (int x = 0; x < 640; x++){Vec3f pixel = imgResized.At<Vec3f>(y, x);inputData[0 * 3 * 640 * 640 + 0 * 640 * 640 + y * 640 + x] = pixel.Item0; // R从0开始inputData[0 * 3 * 640 * 640 + 1 * 640 * 640 + y * 640 + x] = pixel.Item1; // G从640*640inputData[0 * 3 * 640 * 640 + 2 * 640 * 640 + y * 640 + x] = pixel.Item2; // B从640*640*2}}var inputTensor = new DenseTensor<float>(inputData, new[] { 1, 3, 640, 640 });
​// 5. 推理var inputs = new List<NamedOnnxValue>{NamedOnnxValue.CreateFromTensor(session.InputMetadata.Keys.First(), inputTensor)};using var results = session.Run(inputs);var output = results.First().AsTensor<float>();
​// 6. 解析输出var dims = output.Dimensions; // 应该是 (1, 5, N),比如(1,5,8400)int numPreds = dims[2];float[,] preds = new float[numPreds, 5];for (int i = 0; i < 5; i++){for (int j = 0; j < numPreds; j++){preds[j, i] = output[0, i, j];}}
​// 7. 按置信度筛选float confThreshold = 0.25f;var boxes = new List<Rect2d>();var scores = new List<float>();for (int i = 0; i < numPreds; i++){float objness = preds[i, 4];if (objness > confThreshold){float cx = preds[i, 0];float cy = preds[i, 1];float w = preds[i, 2];float h = preds[i, 3];float x1 = cx - w / 2;float y1 = cy - h / 2;  float x2 = cx + w / 2;float y2 = cy + h / 2;
​// 还原到原图尺度x1 *= (float)w0 / 640;x2 *= (float)w0 / 640;y1 *= (float)h0 / 640;y2 *= (float)h0 / 640;
​boxes.Add(new Rect2d(x1, y1, x2 - x1, y2 - y1));scores.Add(objness);}}
​// 8. 绘制框for (int i = 0; i < boxes.Count; i++){var rect = boxes[i];Cv2.Rectangle(originalImg, new Point(rect.X, rect.Y), new Point(rect.X + rect.Width, rect.Y + rect.Height), Scalar.Green, 2);Cv2.PutText(originalImg, $"{scores[i]:0.00}", new Point((int)rect.X, (int)rect.Y - 10), HersheyFonts.HersheySimplex, 0.5, Scalar.Green, 1);}
​// 9. 显示Cv2.ImShow("Result", originalImg);Cv2.WaitKey();Cv2.DestroyAllWindows();
}

}

处理逻辑优化使用NMS(非极大值抑制)

using OpenCvSharp; using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; using System; using System.Collections.Generic; using System.Linq;

class Program { static readonly int inputWidth = 640; static readonly int inputHeight = 640; static readonly float confThreshold = 0.25f; static readonly float iouThreshold = 0.45f;

static readonly InferenceSession session = new InferenceSession(@"D:\zou\wjzz\workDoc\screwsDetech\runs\detect\train\weights\best.onnx");
​
static void Main(string[] args)
{string imgPath = @"D:\zou\wjzz\workDoc\screwsDetech\test_images\screw_keshang_bad_130.png";Mat img = Cv2.ImRead(imgPath);Mat originalImg = img.Clone();
​int h0 = img.Rows;int w0 = img.Cols;
​var inputTensor = Preprocess(img);var output = Inference(inputTensor);var (boxes, scores) = Postprocess(output, w0, h0);var keepIdx = NMS(boxes, scores, iouThreshold);
​foreach (int i in keepIdx){var box = boxes[i];Cv2.Rectangle(originalImg, new Point(box.X, box.Y), new Point(box.X + box.Width, box.Y + box.Height), Scalar.Green, 2);Cv2.PutText(originalImg, $"{scores[i]:0.00}", new Point((int)box.X, (int)box.Y - 10), HersheyFonts.HersheySimplex, 0.5, Scalar.Green, 1);}
​Cv2.ImShow("Result", originalImg);Cv2.WaitKey();Cv2.DestroyAllWindows();
}
​
static DenseTensor<float> Preprocess(Mat img)
{Mat resized = new Mat();Cv2.Resize(img, resized, new Size(inputWidth, inputHeight));Cv2.CvtColor(resized, resized, ColorConversionCodes.BGR2RGB);resized.ConvertTo(resized, MatType.CV_32FC3, 1.0 / 255);
​Mat[] channels = Cv2.Split(resized);float[] inputData = new float[3 * inputHeight * inputWidth];for (int c = 0; c < 3; c++){var indexer = channels[c].GetGenericIndexer<float>();for (int y = 0; y < inputHeight; y++)for (int x = 0; x < inputWidth; x++)inputData[c * inputHeight * inputWidth + y * inputWidth + x] = indexer[y, x];}
​return new DenseTensor<float>(inputData, new[] { 1, 3, inputHeight, inputWidth });
}
​
static Tensor<float> Inference(DenseTensor<float> inputTensor)
{var inputs = new List<NamedOnnxValue>{NamedOnnxValue.CreateFromTensor(session.InputMetadata.Keys.First(), inputTensor)};using var results = session.Run(inputs);return results.First().AsTensor<float>();
}
​
static (List<Rect2d>, List<float>) Postprocess(Tensor<float> output, int w0, int h0)
{var dims = output.Dimensions;int numPreds = dims[2];var boxes = new List<Rect2d>();var scores = new List<float>();
​for (int i = 0; i < numPreds; i++){float obj = output[0, 4, i];if (obj < confThreshold) continue;
​float cx = output[0, 0, i];float cy = output[0, 1, i];float w = output[0, 2, i];float h = output[0, 3, i];
​float x1 = (cx - w / 2) * w0 / inputWidth;float y1 = (cy - h / 2) * h0 / inputHeight;float x2 = (cx + w / 2) * w0 / inputWidth;float y2 = (cy + h / 2) * h0 / inputHeight;
​boxes.Add(new Rect2d(x1, y1, x2 - x1, y2 - y1));scores.Add(obj);}
​return (boxes, scores);
}
​
static List<int> NMS(List<Rect2d> boxes, List<float> scores, float iouThreshold)
{var indices = scores.Select((score, idx) => new { score, idx }).OrderByDescending(s => s.score).Select(s => s.idx).ToList();
​var keep = new List<int>();var removed = new bool[boxes.Count];
​for (int i = 0; i < indices.Count; i++){int idx = indices[i];if (removed[idx]) continue;keep.Add(idx);
​for (int j = i + 1; j < indices.Count; j++){int idx2 = indices[j];if (removed[idx2]) continue;if (ComputeIOU(boxes[idx], boxes[idx2]) > iouThreshold)removed[idx2] = true;}}return keep;
}
​
static double ComputeIOU(Rect2d box1, Rect2d box2)
{double xx1 = Math.Max(box1.X, box2.X);double yy1 = Math.Max(box1.Y, box2.Y);double xx2 = Math.Min(box1.X + box1.Width, box2.X + box2.Width);double yy2 = Math.Min(box1.Y + box1.Height, box2.Y + box2.Height);
​double interArea = Math.Max(0, xx2 - xx1) * Math.Max(0, yy2 - yy1);double unionArea = box1.Width * box1.Height + box2.Width * box2.Height - interArea;
​return interArea / unionArea;
}

}

http://www.xdnf.cn/news/214723.html

相关文章:

  • 10 基于Gazebo和Rviz实现导航仿真,包括SLAM建图,地图服务,机器人定位,路径规划
  • BIM(建筑信息模型)与GIS(地理信息系统)的融合的技术框架、实现路径与应用场景
  • 【MCP Node.js SDK 全栈进阶指南】高级篇(2):MCP高性能服务优化
  • MCP 协议 ——AI 世界的 “USB-C 接口”:从认知到实践的全面指南
  • 源码角度分析 sync.map
  • Silvaco仿真中victory process的蒙特卡洛(Monte Carlo)离子注入
  • [4-06-09].第10节:自动配置- 分析@SpringBootApplication启动类
  • github使用记录
  • Redis分布式锁使用以及对接支付宝,paypal,strip跨境支付
  • 第十六届蓝桥杯大赛网安组--几道简单题的WP
  • HTTP协议重定向及交互
  • 运放参数汇总
  • mac word接入deepseek
  • LVGL -窗口操作
  • Linux/AndroidOS中进程间的通信线程间的同步 - 管道和FIFO
  • 【C++编程入门】:基本语法
  • Java 多线程基础:Thread 类详解
  • 云数据中心整体规划方案PPT(113页)
  • VIT(ICLR2021)
  • foc控制 - clarke变换和park变换
  • 【后端】【Docker】 Docker 动态代理 取消代理完整脚本合集(Ubuntu)
  • 内网服务器映射到公网上怎么做?网络将内网服务转换到公网上
  • 学习基本宠物美容
  • 零基础实现把知识库接到聆思CSK6大模型开发板上
  • 请简述一下什么是 Kotlin?它有哪些特性?
  • C++ 红黑树
  • 第14讲:科研图表的导出与排版艺术——高质量 PDF、TIFF 输出与投稿规范全攻略!
  • Java 基础--运算符全解析
  • Ubuntu搭建 Nginx以及Keepalived 实现 主备
  • ‘WebDriver‘ object has no attribute ‘find_element_by_class‘