当前位置：首页 > ai >正文

COCO数据集转YOLO数据集

ai 2025/6/22 22:54:16

一、COCO数据格式

# 转换前的数据格式
data_coco/
├── images/
│   ├── train2017/  # 训练集图片
│   └── val2017/    # 验证集图片
└── annotations/    # 原始COCO JSON文件

二、代码

import json
import os
import argparseclass COCO2YOLO:def __init__(self):self._check_file_and_dir(json_file, output)self.labels = json.load(open(json_file, 'r', encoding='utf-8'))self.coco_id_name_map = self._categories()self.coco_name_list = list(self.coco_id_name_map.values())print("total images", len(self.labels['images']))print("total categories", len(self.labels['categories']))print("total labels", len(self.labels['annotations']))@staticmethoddef _check_file_and_dir(file_path, dir_path):"""文件检测和文件夹检测:param file_path::param dir_path::return:"""if not os.path.exists(file_path):# 文件路径错误raise ValueError("file not found")if not os.path.exists(dir_path):# 文件夹不存在就创建os.makedirs(dir_path)def _categories(self):# 获取标注文件中的种类categories = {}for cls in self.labels['categories']:categories[cls['id']] = cls['name']return categoriesdef _load_images_info(self):# 获取图片基本信息：文件名、宽、高images_info = {}for image in self.labels['images']:img_id = image['id']file_name = image['file_name']if file_name.find('\\') > -1:file_name = file_name[file_name.index('\\') + 1:]w = image['width']h = image['height']images_info[img_id] = (file_name, w, h)return images_info@staticmethoddef _bbox_2_yolo(bbox, img_w, img_h):"""将coco格式的边界框（Bounding Box）坐标转换为Yolo格式的归一化坐标:param bbox: COCO格式的边界框，格式为[x_min, y_min, width, height]:param img_w: 图片宽度:param img_h: 图片的高度:return: 归一化后的中心点x、y坐标、边界框的宽、高"""x, y, w, h = bbox[0], bbox[1], bbox[2], bbox[3]# 边界框中心点坐标center_x = bbox[0] + w / 2center_y = bbox[1] + h / 2# 归一化因子dw = 1 / img_wdh = 1 / img_h# 归一化center_x *= dww *= dwcenter_y *= dhh *= dhreturn center_x, center_y, w, hdef _convert_anno(self, images_info):# 格式转换anno_dict = dict()for anno in self.labels['annotations']:bbox = anno['bbox']image_id = anno['image_id']category_id = anno['category_id']image_info = images_info.get(image_id)image_name = image_info[0]img_w = image_info[1]img_h = image_info[2]yolo_box = self._bbox_2_yolo(bbox, img_w, img_h)anno_info = (image_name, category_id, yolo_box)anno_infos = anno_dict.get(image_id)if not anno_infos:anno_dict[image_id] = [anno_info]else:anno_infos.append(anno_info)anno_dict[image_id] = anno_infosreturn anno_dictdef coco2yolo(self):print("loading image info...")images_info = self._load_images_info()print("loading done, total images", len(images_info))print("start converting...")anno_dict = self._convert_anno(images_info)print("converting done, total labels", len(anno_dict))print("saving txt file...")self._save_txt(anno_dict)print("saving done")def _save_txt(self, anno_dict):for k, v in anno_dict.items():file_name = os.path.splitext(v[0][0])[0] + ".txt"with open(os.path.join(output, file_name), 'w', encoding='utf-8') as f:print(k, v)for obj in v:cat_name = self.coco_id_name_map.get(obj[1])category_id = self.coco_name_list.index(cat_name)box = ['{:.6f}'.format(x) for x in obj[2]]box = ' '.join(box)line = str(category_id) + ' ' + boxf.write(line + '\n')if __name__ == '__main__':parser = argparse.ArgumentParser(description='Test yolo data.')parser.add_argument('-i', help='JSON file', dest='json',default='/home/data_coco/annotations/meter_coco_val.json')parser.add_argument('-o', help='path to output folder', dest='out',default='/home/data_coco/labels/val2017')args = parser.parse_args()json_file = args.jsonoutput = args.outCOCO2YOLO().coco2yolo()

三、YOLO数据格式

# 转换后的数据格式
data_coco/
├── images/
│   ├── train2017/  # 训练集图片
│   └── val2017/    # 验证集图片
├── labels/
│   ├── train2017/  # 训练集标签（YOLO格式）
│   └── val2017/    # 验证集标签（YOLO格式）
└── annotations/    # 原始COCO JSON文件

查看全文

http://www.xdnf.cn/news/13124.html