2024-12-03 11:50:15 +08:00

338 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 调用 PaddleOCR-json.exe 的 Python Api
# 项目主页:
# https://github.com/hiroi-sora/PaddleOCR-json
import os
import socket # 套接字
import atexit # 退出处理
import subprocess # 进程,管道
import re # regex
from json import loads as jsonLoads, dumps as jsonDumps
from sys import platform as sysPlatform # popen静默模式
from base64 import b64encode # base64 编码
class PPOCR_pipe: # 调用OCR管道模式
def __init__(self, exePath: str, modelsPath: str = None, argument: dict = None):
"""初始化识别器(管道模式)。\n
`exePath`: 识别器`PaddleOCR_json.exe`的路径。\n
`modelsPath`: 识别库`models`文件夹的路径。若为None则默认识别库与识别器在同一目录下。\n
`argument`: 启动参数,字典`{"":值}`。参数说明见 https://github.com/hiroi-sora/PaddleOCR-json
"""
# 私有成员变量
self.__ENABLE_CLIPBOARD = False
exePath = os.path.abspath(exePath)
cwd = os.path.abspath(os.path.join(exePath, os.pardir)) # 获取exe父文件夹
cmds = [exePath]
# 处理启动参数
if modelsPath is not None:
if os.path.exists(modelsPath) and os.path.isdir(modelsPath):
cmds += ["--models_path", os.path.abspath(modelsPath)]
else:
raise Exception(
f"Input modelsPath doesn't exits or isn't a directory. modelsPath: [{modelsPath}]"
)
if isinstance(argument, dict):
for key, value in argument.items():
# Popen() 要求输入list里所有的元素都是 str 或 bytes
if isinstance(value, bool):
cmds += [f"--{key}={value}"] # 布尔参数必须键和值连在一起
elif isinstance(value, str):
cmds += [f"--{key}", value]
else:
cmds += [f"--{key}", str(value)]
# 设置子进程启用静默模式,不显示控制台窗口
self.ret = None
startupinfo = None
if "win32" in str(sysPlatform).lower():
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags = (
subprocess.CREATE_NEW_CONSOLE | subprocess.STARTF_USESHOWWINDOW
)
startupinfo.wShowWindow = subprocess.SW_HIDE
self.ret = subprocess.Popen( # 打开管道
cmds,
cwd=cwd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL, # 丢弃stderr的内容
startupinfo=startupinfo, # 开启静默模式
)
# 启动子进程
while True:
if not self.ret.poll() == None: # 子进程已退出,初始化失败
raise Exception(f"OCR init fail.")
initStr = self.ret.stdout.readline().decode("utf-8", errors="ignore")
if "OCR init completed." in initStr: # 初始化成功
break
elif "OCR clipboard enbaled." in initStr: # 检测到剪贴板已启用
self.__ENABLE_CLIPBOARD = True
atexit.register(self.exit) # 注册程序终止时执行强制停止子进程
def isClipboardEnabled(self) -> bool:
return self.__ENABLE_CLIPBOARD
def getRunningMode(self) -> str:
# 默认管道模式只能运行在本地
return "local"
def runDict(self, writeDict: dict):
"""传入指令字典,发送给引擎进程。\n
`writeDict`: 指令字典。\n
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
# 检查子进程
if not self.ret:
return {"code": 901, "data": f"引擎实例不存在。"}
if not self.ret.poll() == None:
return {"code": 902, "data": f"子进程已崩溃。"}
# 输入信息
writeStr = jsonDumps(writeDict, ensure_ascii=True, indent=None) + "\n"
try:
self.ret.stdin.write(writeStr.encode("utf-8"))
self.ret.stdin.flush()
except Exception as e:
return {
"code": 902,
"data": f"向识别器进程传入指令失败,疑似子进程已崩溃。{e}",
}
# 获取返回值
try:
getStr = self.ret.stdout.readline().decode("utf-8", errors="ignore")
except Exception as e:
return {"code": 903, "data": f"读取识别器进程输出值失败。异常信息:[{e}]"}
try:
return jsonLoads(getStr)
except Exception as e:
return {
"code": 904,
"data": f"识别器输出值反序列化JSON失败。异常信息[{e}]。原始内容:[{getStr}]",
}
def run(self, imgPath: str):
"""对一张本地图片进行文字识别。\n
`exePath`: 图片路径。\n
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
writeDict = {"image_path": imgPath}
return self.runDict(writeDict)
def runClipboard(self):
"""立刻对剪贴板第一位的图片进行文字识别。\n
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
if self.__ENABLE_CLIPBOARD:
return self.run("clipboard")
else:
raise Exception("剪贴板功能不存在或已禁用。")
def runBase64(self, imageBase64: str):
"""对一张编码为base64字符串的图片进行文字识别。\n
`imageBase64`: 图片base64字符串。\n
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
writeDict = {"image_base64": imageBase64}
return self.runDict(writeDict)
def runBytes(self, imageBytes):
"""对一张图片的字节流信息进行文字识别。\n
`imageBytes`: 图片字节流。\n
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
imageBase64 = b64encode(imageBytes).decode("utf-8")
return self.runBase64(imageBase64)
def exit(self):
"""关闭引擎子进程"""
if hasattr(self, "ret"):
if not self.ret:
return
try:
self.ret.kill() # 关闭子进程
except Exception as e:
print(f"[Error] ret.kill() {e}")
self.ret = None
atexit.unregister(self.exit) # 移除退出处理
print("### PPOCR引擎子进程关闭")
@staticmethod
def printResult(res: dict):
"""用于调试,格式化打印识别结果。\n
`res`: OCR识别结果。"""
# 识别成功
if res["code"] == 100:
index = 1
for line in res["data"]:
print(
f"{index}-置信度:{round(line['score'], 2)},文本:{line['text']}",
end="\\n\n" if line.get("end", "") == "\n" else "\n",
)
index += 1
elif res["code"] == 100:
print("图片中未识别出文字。")
else:
print(f"图片识别失败。错误码:{res['code']},错误信息:{res['data']}")
def __del__(self):
self.exit()
class PPOCR_socket(PPOCR_pipe):
"""调用OCR套接字模式"""
def __init__(self, exePath: str, modelsPath: str = None, argument: dict = None):
"""初始化识别器(套接字模式)。\n
`exePath`: 识别器`PaddleOCR_json.exe`的路径。\n
`modelsPath`: 识别库`models`文件夹的路径。若为None则默认识别库与识别器在同一目录下。\n
`argument`: 启动参数,字典`{"":值}`。参数说明见 https://github.com/hiroi-sora/PaddleOCR-json
"""
# 处理参数
if not argument:
argument = {}
if "port" not in argument:
argument["port"] = 0 # 随机端口号
if "addr" not in argument:
argument["addr"] = "loopback" # 本地环回地址
# 处理输入的路径,可能为本地或远程路径
self.__runningMode = self.__configureExePath(exePath)
# 如果为本地路径:使用 PPOCR_pipe 来开启本地引擎进程
if self.__runningMode == "local":
super().__init__(self.exePath, modelsPath, argument) # 父类构造函数
self.__ENABLE_CLIPBOARD = super().isClipboardEnabled()
# 再获取一行输出,检查是否成功启动服务器
initStr = self.ret.stdout.readline().decode("utf-8", errors="ignore")
if not self.ret.poll() == None: # 子进程已退出,初始化失败
raise Exception(f"Socket init fail.")
if "Socket init completed. " in initStr: # 初始化成功
splits = initStr.split(":")
self.ip = splits[0].split("Socket init completed. ")[1]
self.port = int(splits[1]) # 提取端口号
self.ret.stdout.close() # 关闭管道重定向,防止缓冲区填满导致堵塞
print(f"套接字服务器初始化成功。{self.ip}:{self.port}")
return
# 如果为远程路径:直接连接
elif self.__runningMode == "remote":
self.__ENABLE_CLIPBOARD = False
# 发送一个空指令,检测远程服务器可用性
testServer = self.runDict({})
if testServer["code"] in [902, 903, 904]:
raise Exception(f"Socket connection fail.")
print(f"套接字服务器连接成功。{self.ip}:{self.port}")
return
# 异常
self.exit()
raise Exception(f"Socket init fail.")
def isClipboardEnabled(self) -> bool:
return self.__ENABLE_CLIPBOARD
def getRunningMode(self) -> str:
return self.__runningMode
def runDict(self, writeDict: dict):
"""传入指令字典,发送给引擎进程。\n
`writeDict`: 指令字典。\n
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
# 仅在本地模式下检查引擎进程
if self.__runningMode == "local":
# 检查子进程
if not self.ret.poll() == None:
return {"code": 901, "data": f"子进程已崩溃。"}
# 通信
writeStr = jsonDumps(writeDict, ensure_ascii=True, indent=None) + "\n"
try:
# 创建TCP连接
clientSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
clientSocket.connect((self.ip, self.port))
# 发送数据
clientSocket.sendall(writeStr.encode())
# 发送完所有数据,关闭我方套接字,之后只能从服务器读取数据
clientSocket.shutdown(socket.SHUT_WR)
# 接收数据
resData = b""
while True:
chunk = clientSocket.recv(1024)
if not chunk:
break
resData += chunk
getStr = resData.decode()
except ConnectionRefusedError:
return {"code": 902, "data": "连接被拒绝"}
except TimeoutError:
return {"code": 903, "data": "连接超时"}
except Exception as e:
return {"code": 904, "data": f"网络错误:{e}"}
finally:
clientSocket.close() # 关闭连接
# 反序列输出信息
try:
return jsonLoads(getStr)
except Exception as e:
return {
"code": 905,
"data": f"识别器输出值反序列化JSON失败。异常信息[{e}]。原始内容:[{getStr}]",
}
def exit(self):
"""关闭引擎子进程"""
# 仅在本地模式下关闭引擎进程
if hasattr(self, "ret"):
if self.__runningMode == "local":
if not self.ret:
return
try:
self.ret.kill() # 关闭子进程
except Exception as e:
print(f"[Error] ret.kill() {e}")
self.ret = None
self.ip = None
self.port = None
atexit.unregister(self.exit) # 移除退出处理
print("### PPOCR引擎子进程关闭")
def __del__(self):
self.exit()
def __configureExePath(self, exePath: str) -> str:
"""处理识别器路径,自动区分本地路径和远程路径"""
pattern = r"remote://(.*):(\d+)"
match = re.search(pattern, exePath)
try:
if match: # 远程模式
self.ip = match.group(1)
self.port = int(match.group(2))
if self.ip == "any":
self.ip = "0.0.0.0"
elif self.ip == "loopback":
self.ip = "127.0.0.1"
return "remote"
else: # 本地模式
self.exePath = exePath
return "local"
except:
return None
def GetOcrApi(
exePath: str, modelsPath: str = None, argument: dict = None, ipcMode: str = "pipe"
):
"""获取识别器API对象。\n
`exePath`: 识别器`PaddleOCR_json.exe`的路径。\n
`modelsPath`: 识别库`models`文件夹的路径。若为None则默认识别库与识别器在同一目录下。\n
`argument`: 启动参数,字典`{"":值}`。参数说明见 https://github.com/hiroi-sora/PaddleOCR-json\n
`ipcMode`: 进程通信模式,可选值为套接字模式`socket` 或 管道模式`pipe`。用法上完全一致。
"""
if ipcMode == "socket":
return PPOCR_socket(exePath, modelsPath, argument)
elif ipcMode == "pipe":
return PPOCR_pipe(exePath, modelsPath, argument)
else:
raise Exception(
f'ipcMode可选值为 套接字模式"socket" 或 管道模式"pipe" ,不允许{ipcMode}'
)