kangda/app/util/baiduOcr.py
2025-05-15 17:08:34 +08:00

79 lines
2.5 KiB
Python

import yaml
import os
from PIL import Image
from paddleocr import PaddleOCR, draw_ocr
class BadiduOcr():
def __init__(self, config_path="app/config/config.yaml"):
self.config_path = config_path
self.config = self._parse_config()
self.ocr = self._init_ocr()
def image_inference(self, img_path, show_result=False, draw_result=False):
result = self.ocr.ocr(img_path, cls=False)
if show_result:
for idx in range(len(result)):
res = result[idx]
for line in res:
print(line)
if draw_result:
file_name = img_path.split("/")[-1]
self.draw_result(result, img_path, os.path.join(self.config["image_save_path_ocr"], file_name))
return result
def draw_result(self, result, img_path, save_path):
if result is not None and result[0] is not None :
result = result[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
# 最好自己下个字体文件
im_show = draw_ocr(image, boxes, txts, scores, font_path='/usr/share/fonts/truetype/teluguvijayam/PottiSreeramulu.ttf')
im_show = Image.fromarray(im_show)
im_show.save(save_path)
def _init_ocr(self):
ocr = PaddleOCR(use_angle_cls=False,
lang="ch",
det_model_dir = self.config["det_model_dir"],
det_max_side_len=1920, det_db_score_mode="slow",
rec_model_dir= self.config["rec_model_dir"] ,
drop_score=0.2)
return ocr
def _parse_config(self):
with open(self.config_path, "r", encoding="utf-8") as f:
config = yaml.safe_load(f)
return config
def parse_result(self, result):
value = list()
conf = list()
if result is not None and result[0] is not None:
# batch is 1
result = result[0]
for line in result:
# line[1][0], line[1][1] --> ocr value and confidence
value.append(line[1][0])
conf.append(round(line[1][1], 2))
return value, conf