批量清洗与修改 YOLO 标签:删除与替换指定类别
在使用 YOLO 格式的数据进行训练或部署前,常常需要对标签文件进行清洗或修改。本文整理了两种常见场景的 Python 脚本:删除指定类别 和 修改某类为其他类,并支持自动打印检测到该类别的文件名,帮助你快速定位问题数据。
📁 标签格式说明(YOLO格式)
YOLO 标签通常为 .txt
文件,每行格式如下:
<类别ID> <x_center> <y_center> <width> <height>
场景一:删除类别为 10
的标注,并输出相关文件名
该脚本会扫描指定目录下的所有 .txt
标签文件,删除类别为 10 的标注行,并在发现该类别时输出文件名。
import osdef process_txt_file(file_path):with open(file_path, 'r') as file:lines = file.readlines()# 检查是否存在以 "10" 开头的行has_class_10 = any(line.strip().startswith("10") for line in lines)# 过滤掉以 "10" 开头的行filtered_lines = [line for line in lines if not line.strip().startswith("10")]# 输出含有类别10的文件名if has_class_10:print(f"⚠️ 检测到类别10: {file_path}")# 重新写入过滤后的内容with open(file_path, 'w') as file:file.writelines(filtered_lines)def process_folder(folder_path):for root, dirs, files in os.walk(folder_path):for file in files:if file.endswith(".txt"):file_path = os.path.join(root, file)process_txt_file(file_path)# 修改为你自己的标签路径
folder_to_process = "/path/to/labels"
process_folder(folder_to_process)
场景二:将类别 11
修改为 10
有时我们需要将错误标注的类别进行更正,例如将所有类别为 11 的改为 10。
import osdef modify_yolo_labels(directory, old_class=11, new_class=10):for filename in os.listdir(directory):if filename.endswith(".txt"):file_path = os.path.join(directory, filename)with open(file_path, 'r') as f:lines = f.readlines()new_lines = []modified = Falsefor line in lines:parts = line.strip().split()if len(parts) > 0 and parts[0] == str(old_class):parts[0] = str(new_class)modified = Truenew_lines.append(' '.join(parts) + '\n')if modified:print(f"修改类别11为10:{file_path}")with open(file_path, 'w') as f:f.writelines(new_lines)# 替换为你的目录
label_folder = "/path/to/labels"
modify_yolo_labels(label_folder)
以下是将“删除类别”和“替换类别”功能封装为一个 YOLO 标签清洗工具类 的完整实现,便于项目中调用或集成到数据清洗流程中。 yolo_label_cleaner.py
模块:
import osclass YOLOLabelCleaner:def __init__(self, folder_path):self.folder_path = folder_pathdef delete_class(self, target_class):"""删除所有标签文件中类别为 target_class 的标注行,并输出涉及文件名。"""print(f"\n🧹 删除类别:{target_class}")for root, _, files in os.walk(self.folder_path):for file in files:if file.endswith('.txt'):file_path = os.path.join(root, file)with open(file_path, 'r') as f:lines = f.readlines()has_target = any(line.strip().startswith(str(target_class)) for line in lines)filtered = [line for line in lines if not line.strip().startswith(str(target_class))]if has_target:print(f" 检测到类别{target_class}: {file_path}")with open(file_path, 'w') as f:f.writelines(filtered)def replace_class(self, old_class, new_class):"""替换所有标签文件中 old_class 为 new_class,并输出修改的文件名。"""print(f"\n 替换类别:{old_class} ➜ {new_class}")for root, _, files in os.walk(self.folder_path):for file in files:if file.endswith('.txt'):file_path = os.path.join(root, file)with open(file_path, 'r') as f:lines = f.readlines()new_lines = []modified = Falsefor line in lines:parts = line.strip().split()if len(parts) > 0 and parts[0] == str(old_class):parts[0] = str(new_class)modified = Truenew_lines.append(' '.join(parts) + '\n')if modified:print(f"修改类别{old_class}为{new_class}: {file_path}")with open(file_path, 'w') as f:f.writelines(new_lines)
使用示例
from yolo_label_cleaner import YOLOLabelCleaner# 指定标签路径
label_dir = "/mnt/data/code/szl/dataset/self-driving/SelfDriving/test/labels"
cleaner = YOLOLabelCleaner(label_dir)# 删除类别10
cleaner.delete_class(10)# 替换类别11为10
cleaner.replace_class(11, 10)
如果需要替换多类别,可以进行下面的修改:
import osclass YOLOLabelCleaner:def __init__(self, folder_path):self.folder_path = folder_pathdef delete_class(self, target_class):print(f"\n 删除类别:{target_class}")for root, _, files in os.walk(self.folder_path):for file in files:if file.endswith('.txt'):file_path = os.path.join(root, file)with open(file_path, 'r') as f:lines = f.readlines()has_target = any(line.strip().startswith(str(target_class)) for line in lines)filtered = [line for line in lines if not line.strip().startswith(str(target_class))]if has_target:print(f"检测到类别{target_class}: {file_path}")with open(file_path, 'w') as f:f.writelines(filtered)def replace_class(self, old_class, new_class):print(f"\n替换类别:{old_class} ➜ {new_class}")self.replace_classes_bulk({old_class: new_class})def replace_classes_bulk(self, class_map: dict):"""批量替换多个类别,class_map 格式为 {old_class: new_class}"""print(f"\n批量替换类别: {class_map}")for root, _, files in os.walk(self.folder_path):for file in files:if file.endswith('.txt'):file_path = os.path.join(root, file)with open(file_path, 'r') as f:lines = f.readlines()modified = Falsenew_lines = []for line in lines:parts = line.strip().split()if parts and int(parts[0]) in class_map:old = parts[0]parts[0] = str(class_map[int(old)])modified = Truenew_lines.append(' '.join(parts) + '\n')if modified:print(f"✅ 替换成功: {file_path}")with open(file_path, 'w') as f:f.writelines(new_lines)
使用示例(批量替换)
from yolo_label_cleaner import YOLOLabelCleaner# 初始化
label_dir = "/test/labels"
cleaner = YOLOLabelCleaner(label_dir)# 替换多个类别:11 ➜ 10, 12 ➜ 9, 13 ➜ 8
class_map = {11: 10,12: 9,13: 8
}
cleaner.replace_classes_bulk(class_map)