338 lines
14 KiB
Python
338 lines
14 KiB
Python
|
# 调用 PaddleOCR-json.exe 的 Python Api
|
|||
|
# 项目主页:
|
|||
|
# https://github.com/hiroi-sora/PaddleOCR-json
|
|||
|
|
|||
|
import os
|
|||
|
import socket # 套接字
|
|||
|
import atexit # 退出处理
|
|||
|
import subprocess # 进程,管道
|
|||
|
import re # regex
|
|||
|
from json import loads as jsonLoads, dumps as jsonDumps
|
|||
|
from sys import platform as sysPlatform # popen静默模式
|
|||
|
from base64 import b64encode # base64 编码
|
|||
|
|
|||
|
|
|||
|
class PPOCR_pipe: # 调用OCR(管道模式)
|
|||
|
def __init__(self, exePath: str, modelsPath: str = None, argument: dict = None):
|
|||
|
"""初始化识别器(管道模式)。\n
|
|||
|
`exePath`: 识别器`PaddleOCR_json.exe`的路径。\n
|
|||
|
`modelsPath`: 识别库`models`文件夹的路径。若为None则默认识别库与识别器在同一目录下。\n
|
|||
|
`argument`: 启动参数,字典`{"键":值}`。参数说明见 https://github.com/hiroi-sora/PaddleOCR-json
|
|||
|
"""
|
|||
|
# 私有成员变量
|
|||
|
self.__ENABLE_CLIPBOARD = False
|
|||
|
|
|||
|
exePath = os.path.abspath(exePath)
|
|||
|
cwd = os.path.abspath(os.path.join(exePath, os.pardir)) # 获取exe父文件夹
|
|||
|
cmds = [exePath]
|
|||
|
# 处理启动参数
|
|||
|
if modelsPath is not None:
|
|||
|
if os.path.exists(modelsPath) and os.path.isdir(modelsPath):
|
|||
|
cmds += ["--models_path", os.path.abspath(modelsPath)]
|
|||
|
else:
|
|||
|
raise Exception(
|
|||
|
f"Input modelsPath doesn't exits or isn't a directory. modelsPath: [{modelsPath}]"
|
|||
|
)
|
|||
|
if isinstance(argument, dict):
|
|||
|
for key, value in argument.items():
|
|||
|
# Popen() 要求输入list里所有的元素都是 str 或 bytes
|
|||
|
if isinstance(value, bool):
|
|||
|
cmds += [f"--{key}={value}"] # 布尔参数必须键和值连在一起
|
|||
|
elif isinstance(value, str):
|
|||
|
cmds += [f"--{key}", value]
|
|||
|
else:
|
|||
|
cmds += [f"--{key}", str(value)]
|
|||
|
# 设置子进程启用静默模式,不显示控制台窗口
|
|||
|
self.ret = None
|
|||
|
startupinfo = None
|
|||
|
if "win32" in str(sysPlatform).lower():
|
|||
|
startupinfo = subprocess.STARTUPINFO()
|
|||
|
startupinfo.dwFlags = (
|
|||
|
subprocess.CREATE_NEW_CONSOLE | subprocess.STARTF_USESHOWWINDOW
|
|||
|
)
|
|||
|
startupinfo.wShowWindow = subprocess.SW_HIDE
|
|||
|
self.ret = subprocess.Popen( # 打开管道
|
|||
|
cmds,
|
|||
|
cwd=cwd,
|
|||
|
stdin=subprocess.PIPE,
|
|||
|
stdout=subprocess.PIPE,
|
|||
|
stderr=subprocess.DEVNULL, # 丢弃stderr的内容
|
|||
|
startupinfo=startupinfo, # 开启静默模式
|
|||
|
)
|
|||
|
# 启动子进程
|
|||
|
while True:
|
|||
|
if not self.ret.poll() == None: # 子进程已退出,初始化失败
|
|||
|
raise Exception(f"OCR init fail.")
|
|||
|
initStr = self.ret.stdout.readline().decode("utf-8", errors="ignore")
|
|||
|
if "OCR init completed." in initStr: # 初始化成功
|
|||
|
break
|
|||
|
elif "OCR clipboard enbaled." in initStr: # 检测到剪贴板已启用
|
|||
|
self.__ENABLE_CLIPBOARD = True
|
|||
|
atexit.register(self.exit) # 注册程序终止时执行强制停止子进程
|
|||
|
|
|||
|
def isClipboardEnabled(self) -> bool:
|
|||
|
return self.__ENABLE_CLIPBOARD
|
|||
|
|
|||
|
def getRunningMode(self) -> str:
|
|||
|
# 默认管道模式只能运行在本地
|
|||
|
return "local"
|
|||
|
|
|||
|
def runDict(self, writeDict: dict):
|
|||
|
"""传入指令字典,发送给引擎进程。\n
|
|||
|
`writeDict`: 指令字典。\n
|
|||
|
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
|
|||
|
# 检查子进程
|
|||
|
if not self.ret:
|
|||
|
return {"code": 901, "data": f"引擎实例不存在。"}
|
|||
|
if not self.ret.poll() == None:
|
|||
|
return {"code": 902, "data": f"子进程已崩溃。"}
|
|||
|
# 输入信息
|
|||
|
writeStr = jsonDumps(writeDict, ensure_ascii=True, indent=None) + "\n"
|
|||
|
try:
|
|||
|
self.ret.stdin.write(writeStr.encode("utf-8"))
|
|||
|
self.ret.stdin.flush()
|
|||
|
except Exception as e:
|
|||
|
return {
|
|||
|
"code": 902,
|
|||
|
"data": f"向识别器进程传入指令失败,疑似子进程已崩溃。{e}",
|
|||
|
}
|
|||
|
# 获取返回值
|
|||
|
try:
|
|||
|
getStr = self.ret.stdout.readline().decode("utf-8", errors="ignore")
|
|||
|
except Exception as e:
|
|||
|
return {"code": 903, "data": f"读取识别器进程输出值失败。异常信息:[{e}]"}
|
|||
|
try:
|
|||
|
return jsonLoads(getStr)
|
|||
|
except Exception as e:
|
|||
|
return {
|
|||
|
"code": 904,
|
|||
|
"data": f"识别器输出值反序列化JSON失败。异常信息:[{e}]。原始内容:[{getStr}]",
|
|||
|
}
|
|||
|
|
|||
|
def run(self, imgPath: str):
|
|||
|
"""对一张本地图片进行文字识别。\n
|
|||
|
`exePath`: 图片路径。\n
|
|||
|
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
|
|||
|
writeDict = {"image_path": imgPath}
|
|||
|
return self.runDict(writeDict)
|
|||
|
|
|||
|
def runClipboard(self):
|
|||
|
"""立刻对剪贴板第一位的图片进行文字识别。\n
|
|||
|
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
|
|||
|
if self.__ENABLE_CLIPBOARD:
|
|||
|
return self.run("clipboard")
|
|||
|
else:
|
|||
|
raise Exception("剪贴板功能不存在或已禁用。")
|
|||
|
|
|||
|
def runBase64(self, imageBase64: str):
|
|||
|
"""对一张编码为base64字符串的图片进行文字识别。\n
|
|||
|
`imageBase64`: 图片base64字符串。\n
|
|||
|
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
|
|||
|
writeDict = {"image_base64": imageBase64}
|
|||
|
return self.runDict(writeDict)
|
|||
|
|
|||
|
def runBytes(self, imageBytes):
|
|||
|
"""对一张图片的字节流信息进行文字识别。\n
|
|||
|
`imageBytes`: 图片字节流。\n
|
|||
|
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
|
|||
|
imageBase64 = b64encode(imageBytes).decode("utf-8")
|
|||
|
return self.runBase64(imageBase64)
|
|||
|
|
|||
|
def exit(self):
|
|||
|
"""关闭引擎子进程"""
|
|||
|
if hasattr(self, "ret"):
|
|||
|
if not self.ret:
|
|||
|
return
|
|||
|
try:
|
|||
|
self.ret.kill() # 关闭子进程
|
|||
|
except Exception as e:
|
|||
|
print(f"[Error] ret.kill() {e}")
|
|||
|
self.ret = None
|
|||
|
atexit.unregister(self.exit) # 移除退出处理
|
|||
|
print("### PPOCR引擎子进程关闭!")
|
|||
|
|
|||
|
@staticmethod
|
|||
|
def printResult(res: dict):
|
|||
|
"""用于调试,格式化打印识别结果。\n
|
|||
|
`res`: OCR识别结果。"""
|
|||
|
|
|||
|
# 识别成功
|
|||
|
if res["code"] == 100:
|
|||
|
index = 1
|
|||
|
for line in res["data"]:
|
|||
|
print(
|
|||
|
f"{index}-置信度:{round(line['score'], 2)},文本:{line['text']}",
|
|||
|
end="\\n\n" if line.get("end", "") == "\n" else "\n",
|
|||
|
)
|
|||
|
index += 1
|
|||
|
elif res["code"] == 100:
|
|||
|
print("图片中未识别出文字。")
|
|||
|
else:
|
|||
|
print(f"图片识别失败。错误码:{res['code']},错误信息:{res['data']}")
|
|||
|
|
|||
|
def __del__(self):
|
|||
|
self.exit()
|
|||
|
|
|||
|
|
|||
|
class PPOCR_socket(PPOCR_pipe):
|
|||
|
"""调用OCR(套接字模式)"""
|
|||
|
|
|||
|
def __init__(self, exePath: str, modelsPath: str = None, argument: dict = None):
|
|||
|
"""初始化识别器(套接字模式)。\n
|
|||
|
`exePath`: 识别器`PaddleOCR_json.exe`的路径。\n
|
|||
|
`modelsPath`: 识别库`models`文件夹的路径。若为None则默认识别库与识别器在同一目录下。\n
|
|||
|
`argument`: 启动参数,字典`{"键":值}`。参数说明见 https://github.com/hiroi-sora/PaddleOCR-json
|
|||
|
"""
|
|||
|
# 处理参数
|
|||
|
if not argument:
|
|||
|
argument = {}
|
|||
|
if "port" not in argument:
|
|||
|
argument["port"] = 0 # 随机端口号
|
|||
|
if "addr" not in argument:
|
|||
|
argument["addr"] = "loopback" # 本地环回地址
|
|||
|
|
|||
|
# 处理输入的路径,可能为本地或远程路径
|
|||
|
self.__runningMode = self.__configureExePath(exePath)
|
|||
|
|
|||
|
# 如果为本地路径:使用 PPOCR_pipe 来开启本地引擎进程
|
|||
|
if self.__runningMode == "local":
|
|||
|
super().__init__(self.exePath, modelsPath, argument) # 父类构造函数
|
|||
|
self.__ENABLE_CLIPBOARD = super().isClipboardEnabled()
|
|||
|
# 再获取一行输出,检查是否成功启动服务器
|
|||
|
initStr = self.ret.stdout.readline().decode("utf-8", errors="ignore")
|
|||
|
if not self.ret.poll() == None: # 子进程已退出,初始化失败
|
|||
|
raise Exception(f"Socket init fail.")
|
|||
|
if "Socket init completed. " in initStr: # 初始化成功
|
|||
|
splits = initStr.split(":")
|
|||
|
self.ip = splits[0].split("Socket init completed. ")[1]
|
|||
|
self.port = int(splits[1]) # 提取端口号
|
|||
|
self.ret.stdout.close() # 关闭管道重定向,防止缓冲区填满导致堵塞
|
|||
|
print(f"套接字服务器初始化成功。{self.ip}:{self.port}")
|
|||
|
return
|
|||
|
|
|||
|
# 如果为远程路径:直接连接
|
|||
|
elif self.__runningMode == "remote":
|
|||
|
self.__ENABLE_CLIPBOARD = False
|
|||
|
# 发送一个空指令,检测远程服务器可用性
|
|||
|
testServer = self.runDict({})
|
|||
|
if testServer["code"] in [902, 903, 904]:
|
|||
|
raise Exception(f"Socket connection fail.")
|
|||
|
print(f"套接字服务器连接成功。{self.ip}:{self.port}")
|
|||
|
return
|
|||
|
|
|||
|
# 异常
|
|||
|
self.exit()
|
|||
|
raise Exception(f"Socket init fail.")
|
|||
|
|
|||
|
def isClipboardEnabled(self) -> bool:
|
|||
|
return self.__ENABLE_CLIPBOARD
|
|||
|
|
|||
|
def getRunningMode(self) -> str:
|
|||
|
return self.__runningMode
|
|||
|
|
|||
|
def runDict(self, writeDict: dict):
|
|||
|
"""传入指令字典,发送给引擎进程。\n
|
|||
|
`writeDict`: 指令字典。\n
|
|||
|
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
|
|||
|
|
|||
|
# 仅在本地模式下检查引擎进程
|
|||
|
if self.__runningMode == "local":
|
|||
|
# 检查子进程
|
|||
|
if not self.ret.poll() == None:
|
|||
|
return {"code": 901, "data": f"子进程已崩溃。"}
|
|||
|
|
|||
|
# 通信
|
|||
|
writeStr = jsonDumps(writeDict, ensure_ascii=True, indent=None) + "\n"
|
|||
|
try:
|
|||
|
# 创建TCP连接
|
|||
|
clientSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|||
|
clientSocket.connect((self.ip, self.port))
|
|||
|
# 发送数据
|
|||
|
clientSocket.sendall(writeStr.encode())
|
|||
|
# 发送完所有数据,关闭我方套接字,之后只能从服务器读取数据
|
|||
|
clientSocket.shutdown(socket.SHUT_WR)
|
|||
|
# 接收数据
|
|||
|
resData = b""
|
|||
|
while True:
|
|||
|
chunk = clientSocket.recv(1024)
|
|||
|
if not chunk:
|
|||
|
break
|
|||
|
resData += chunk
|
|||
|
getStr = resData.decode()
|
|||
|
except ConnectionRefusedError:
|
|||
|
return {"code": 902, "data": "连接被拒绝"}
|
|||
|
except TimeoutError:
|
|||
|
return {"code": 903, "data": "连接超时"}
|
|||
|
except Exception as e:
|
|||
|
return {"code": 904, "data": f"网络错误:{e}"}
|
|||
|
finally:
|
|||
|
clientSocket.close() # 关闭连接
|
|||
|
# 反序列输出信息
|
|||
|
try:
|
|||
|
return jsonLoads(getStr)
|
|||
|
except Exception as e:
|
|||
|
return {
|
|||
|
"code": 905,
|
|||
|
"data": f"识别器输出值反序列化JSON失败。异常信息:[{e}]。原始内容:[{getStr}]",
|
|||
|
}
|
|||
|
|
|||
|
def exit(self):
|
|||
|
"""关闭引擎子进程"""
|
|||
|
# 仅在本地模式下关闭引擎进程
|
|||
|
if hasattr(self, "ret"):
|
|||
|
if self.__runningMode == "local":
|
|||
|
if not self.ret:
|
|||
|
return
|
|||
|
try:
|
|||
|
self.ret.kill() # 关闭子进程
|
|||
|
except Exception as e:
|
|||
|
print(f"[Error] ret.kill() {e}")
|
|||
|
self.ret = None
|
|||
|
|
|||
|
self.ip = None
|
|||
|
self.port = None
|
|||
|
atexit.unregister(self.exit) # 移除退出处理
|
|||
|
print("### PPOCR引擎子进程关闭!")
|
|||
|
|
|||
|
def __del__(self):
|
|||
|
self.exit()
|
|||
|
|
|||
|
def __configureExePath(self, exePath: str) -> str:
|
|||
|
"""处理识别器路径,自动区分本地路径和远程路径"""
|
|||
|
|
|||
|
pattern = r"remote://(.*):(\d+)"
|
|||
|
match = re.search(pattern, exePath)
|
|||
|
try:
|
|||
|
if match: # 远程模式
|
|||
|
self.ip = match.group(1)
|
|||
|
self.port = int(match.group(2))
|
|||
|
if self.ip == "any":
|
|||
|
self.ip = "0.0.0.0"
|
|||
|
elif self.ip == "loopback":
|
|||
|
self.ip = "127.0.0.1"
|
|||
|
return "remote"
|
|||
|
else: # 本地模式
|
|||
|
self.exePath = exePath
|
|||
|
return "local"
|
|||
|
except:
|
|||
|
return None
|
|||
|
|
|||
|
|
|||
|
def GetOcrApi(
|
|||
|
exePath: str, modelsPath: str = None, argument: dict = None, ipcMode: str = "pipe"
|
|||
|
):
|
|||
|
"""获取识别器API对象。\n
|
|||
|
`exePath`: 识别器`PaddleOCR_json.exe`的路径。\n
|
|||
|
`modelsPath`: 识别库`models`文件夹的路径。若为None则默认识别库与识别器在同一目录下。\n
|
|||
|
`argument`: 启动参数,字典`{"键":值}`。参数说明见 https://github.com/hiroi-sora/PaddleOCR-json\n
|
|||
|
`ipcMode`: 进程通信模式,可选值为套接字模式`socket` 或 管道模式`pipe`。用法上完全一致。
|
|||
|
"""
|
|||
|
if ipcMode == "socket":
|
|||
|
return PPOCR_socket(exePath, modelsPath, argument)
|
|||
|
elif ipcMode == "pipe":
|
|||
|
return PPOCR_pipe(exePath, modelsPath, argument)
|
|||
|
else:
|
|||
|
raise Exception(
|
|||
|
f'ipcMode可选值为 套接字模式"socket" 或 管道模式"pipe" ,不允许{ipcMode}。'
|
|||
|
)
|