rapidocr v3.3.0发布了
主要更新:
#476: 支持 PP-OCRv5 PyTorch 模型
#489 增加 to_json 格式
from rapidocr import RapidOCRengine = RapidOCR()img_url = "https://img1.baidu.com/it/u=3619974146,1266987475&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=516"
result = engine(img_url, return_word_box=True, return_single_char_box=True)
print(result)result.vis("vis_result.jpg")
print(result.to_json())
结果示例:
[{'box': [[71.0, 363.0], [419.0, 363.0], [419.0, 382.0], [71.0, 382.0]], 'txt': '曲曲折折的荷塘上面,弥望的是田田的叶', 'score': 0.99052}, {'box': [[71.0, 392.0], [410.0, 392.0], [410.0, 412.0], [71.0, 412.0]], 'txt': '子。叶子出水很高,像亭亭的舞女的裙。', 'score': 0.99123}, {'box': [[185.0, 466.0], [317.0, 466.0], [317.0, 488.0], [185.0, 488.0]], 'txt': '—《荷塘月色》一', 'score': 0.89048}]
#498 修复指定font_path字体文件不生效
#499 集成 PP-OCRv5 小语种模型,包括 Korean / Latin / Eslav
from rapidocr import EngineType, LangDet, LangRec, ModelType, RapidOCR
from rapidocr.utils.typings import OCRVersionengine = RapidOCR(params={"Rec.lang_type": LangRec.ESLAV, # KOREAN / LATIN / ESLAV"Rec.engine_type": EngineType.ONNXRUNTIME,"Rec.ocr_version": OCRVersion.PPOCRV5,}
)
img_path = "tests/test_files/eslav.jpg"
result = engine(img_path, use_det=False, use_cls=False, use_rec=True)
print(result)result.vis("vis_result.jpg")