338 lines
14 KiB
Python
Raw Normal View History

2024-12-03 11:50:15 +08:00
# 调用 PaddleOCR-json.exe 的 Python Api
# 项目主页:
# https://github.com/hiroi-sora/PaddleOCR-json
import os
import socket # 套接字
import atexit # 退出处理
import subprocess # 进程,管道
import re # regex
from json import loads as jsonLoads, dumps as jsonDumps
from sys import platform as sysPlatform # popen静默模式
from base64 import b64encode # base64 编码
class PPOCR_pipe: # 调用OCR管道模式
def __init__(self, exePath: str, modelsPath: str = None, argument: dict = None):
"""初始化识别器(管道模式)。\n
`exePath`: 识别器`PaddleOCR_json.exe`的路径\n
`modelsPath`: 识别库`models`文件夹的路径若为None则默认识别库与识别器在同一目录下\n
`argument`: 启动参数字典`{"":}`参数说明见 https://github.com/hiroi-sora/PaddleOCR-json
"""
# 私有成员变量
self.__ENABLE_CLIPBOARD = False
exePath = os.path.abspath(exePath)
cwd = os.path.abspath(os.path.join(exePath, os.pardir)) # 获取exe父文件夹
cmds = [exePath]
# 处理启动参数
if modelsPath is not None:
if os.path.exists(modelsPath) and os.path.isdir(modelsPath):
cmds += ["--models_path", os.path.abspath(modelsPath)]
else:
raise Exception(
f"Input modelsPath doesn't exits or isn't a directory. modelsPath: [{modelsPath}]"
)
if isinstance(argument, dict):
for key, value in argument.items():
# Popen() 要求输入list里所有的元素都是 str 或 bytes
if isinstance(value, bool):
cmds += [f"--{key}={value}"] # 布尔参数必须键和值连在一起
elif isinstance(value, str):
cmds += [f"--{key}", value]
else:
cmds += [f"--{key}", str(value)]
# 设置子进程启用静默模式,不显示控制台窗口
self.ret = None
startupinfo = None
if "win32" in str(sysPlatform).lower():
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags = (
subprocess.CREATE_NEW_CONSOLE | subprocess.STARTF_USESHOWWINDOW
)
startupinfo.wShowWindow = subprocess.SW_HIDE
self.ret = subprocess.Popen( # 打开管道
cmds,
cwd=cwd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL, # 丢弃stderr的内容
startupinfo=startupinfo, # 开启静默模式
)
# 启动子进程
while True:
if not self.ret.poll() == None: # 子进程已退出,初始化失败
raise Exception(f"OCR init fail.")
initStr = self.ret.stdout.readline().decode("utf-8", errors="ignore")
if "OCR init completed." in initStr: # 初始化成功
break
elif "OCR clipboard enbaled." in initStr: # 检测到剪贴板已启用
self.__ENABLE_CLIPBOARD = True
atexit.register(self.exit) # 注册程序终止时执行强制停止子进程
def isClipboardEnabled(self) -> bool:
return self.__ENABLE_CLIPBOARD
def getRunningMode(self) -> str:
# 默认管道模式只能运行在本地
return "local"
def runDict(self, writeDict: dict):
"""传入指令字典,发送给引擎进程。\n
`writeDict`: 指令字典\n
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
# 检查子进程
if not self.ret:
return {"code": 901, "data": f"引擎实例不存在。"}
if not self.ret.poll() == None:
return {"code": 902, "data": f"子进程已崩溃。"}
# 输入信息
writeStr = jsonDumps(writeDict, ensure_ascii=True, indent=None) + "\n"
try:
self.ret.stdin.write(writeStr.encode("utf-8"))
self.ret.stdin.flush()
except Exception as e:
return {
"code": 902,
"data": f"向识别器进程传入指令失败,疑似子进程已崩溃。{e}",
}
# 获取返回值
try:
getStr = self.ret.stdout.readline().decode("utf-8", errors="ignore")
except Exception as e:
return {"code": 903, "data": f"读取识别器进程输出值失败。异常信息:[{e}]"}
try:
return jsonLoads(getStr)
except Exception as e:
return {
"code": 904,
"data": f"识别器输出值反序列化JSON失败。异常信息[{e}]。原始内容:[{getStr}]",
}
def run(self, imgPath: str):
"""对一张本地图片进行文字识别。\n
`exePath`: 图片路径\n
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
writeDict = {"image_path": imgPath}
return self.runDict(writeDict)
def runClipboard(self):
"""立刻对剪贴板第一位的图片进行文字识别。\n
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
if self.__ENABLE_CLIPBOARD:
return self.run("clipboard")
else:
raise Exception("剪贴板功能不存在或已禁用。")
def runBase64(self, imageBase64: str):
"""对一张编码为base64字符串的图片进行文字识别。\n
`imageBase64`: 图片base64字符串\n
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
writeDict = {"image_base64": imageBase64}
return self.runDict(writeDict)
def runBytes(self, imageBytes):
"""对一张图片的字节流信息进行文字识别。\n
`imageBytes`: 图片字节流\n
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
imageBase64 = b64encode(imageBytes).decode("utf-8")
return self.runBase64(imageBase64)
def exit(self):
"""关闭引擎子进程"""
if hasattr(self, "ret"):
if not self.ret:
return
try:
self.ret.kill() # 关闭子进程
except Exception as e:
print(f"[Error] ret.kill() {e}")
self.ret = None
atexit.unregister(self.exit) # 移除退出处理
print("### PPOCR引擎子进程关闭")
@staticmethod
def printResult(res: dict):
"""用于调试,格式化打印识别结果。\n
`res`: OCR识别结果"""
# 识别成功
if res["code"] == 100:
index = 1
for line in res["data"]:
print(
f"{index}-置信度:{round(line['score'], 2)},文本:{line['text']}",
end="\\n\n" if line.get("end", "") == "\n" else "\n",
)
index += 1
elif res["code"] == 100:
print("图片中未识别出文字。")
else:
print(f"图片识别失败。错误码:{res['code']},错误信息:{res['data']}")
def __del__(self):
self.exit()
class PPOCR_socket(PPOCR_pipe):
"""调用OCR套接字模式"""
def __init__(self, exePath: str, modelsPath: str = None, argument: dict = None):
"""初始化识别器(套接字模式)。\n
`exePath`: 识别器`PaddleOCR_json.exe`的路径\n
`modelsPath`: 识别库`models`文件夹的路径若为None则默认识别库与识别器在同一目录下\n
`argument`: 启动参数字典`{"":}`参数说明见 https://github.com/hiroi-sora/PaddleOCR-json
"""
# 处理参数
if not argument:
argument = {}
if "port" not in argument:
argument["port"] = 0 # 随机端口号
if "addr" not in argument:
argument["addr"] = "loopback" # 本地环回地址
# 处理输入的路径,可能为本地或远程路径
self.__runningMode = self.__configureExePath(exePath)
# 如果为本地路径:使用 PPOCR_pipe 来开启本地引擎进程
if self.__runningMode == "local":
super().__init__(self.exePath, modelsPath, argument) # 父类构造函数
self.__ENABLE_CLIPBOARD = super().isClipboardEnabled()
# 再获取一行输出,检查是否成功启动服务器
initStr = self.ret.stdout.readline().decode("utf-8", errors="ignore")
if not self.ret.poll() == None: # 子进程已退出,初始化失败
raise Exception(f"Socket init fail.")
if "Socket init completed. " in initStr: # 初始化成功
splits = initStr.split(":")
self.ip = splits[0].split("Socket init completed. ")[1]
self.port = int(splits[1]) # 提取端口号
self.ret.stdout.close() # 关闭管道重定向,防止缓冲区填满导致堵塞
print(f"套接字服务器初始化成功。{self.ip}:{self.port}")
return
# 如果为远程路径:直接连接
elif self.__runningMode == "remote":
self.__ENABLE_CLIPBOARD = False
# 发送一个空指令,检测远程服务器可用性
testServer = self.runDict({})
if testServer["code"] in [902, 903, 904]:
raise Exception(f"Socket connection fail.")
print(f"套接字服务器连接成功。{self.ip}:{self.port}")
return
# 异常
self.exit()
raise Exception(f"Socket init fail.")
def isClipboardEnabled(self) -> bool:
return self.__ENABLE_CLIPBOARD
def getRunningMode(self) -> str:
return self.__runningMode
def runDict(self, writeDict: dict):
"""传入指令字典,发送给引擎进程。\n
`writeDict`: 指令字典\n
`return`: {"code": 识别码, "data": 内容列表或错误信息字符串}\n"""
# 仅在本地模式下检查引擎进程
if self.__runningMode == "local":
# 检查子进程
if not self.ret.poll() == None:
return {"code": 901, "data": f"子进程已崩溃。"}
# 通信
writeStr = jsonDumps(writeDict, ensure_ascii=True, indent=None) + "\n"
try:
# 创建TCP连接
clientSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
clientSocket.connect((self.ip, self.port))
# 发送数据
clientSocket.sendall(writeStr.encode())
# 发送完所有数据,关闭我方套接字,之后只能从服务器读取数据
clientSocket.shutdown(socket.SHUT_WR)
# 接收数据
resData = b""
while True:
chunk = clientSocket.recv(1024)
if not chunk:
break
resData += chunk
getStr = resData.decode()
except ConnectionRefusedError:
return {"code": 902, "data": "连接被拒绝"}
except TimeoutError:
return {"code": 903, "data": "连接超时"}
except Exception as e:
return {"code": 904, "data": f"网络错误:{e}"}
finally:
clientSocket.close() # 关闭连接
# 反序列输出信息
try:
return jsonLoads(getStr)
except Exception as e:
return {
"code": 905,
"data": f"识别器输出值反序列化JSON失败。异常信息[{e}]。原始内容:[{getStr}]",
}
def exit(self):
"""关闭引擎子进程"""
# 仅在本地模式下关闭引擎进程
if hasattr(self, "ret"):
if self.__runningMode == "local":
if not self.ret:
return
try:
self.ret.kill() # 关闭子进程
except Exception as e:
print(f"[Error] ret.kill() {e}")
self.ret = None
self.ip = None
self.port = None
atexit.unregister(self.exit) # 移除退出处理
print("### PPOCR引擎子进程关闭")
def __del__(self):
self.exit()
def __configureExePath(self, exePath: str) -> str:
"""处理识别器路径,自动区分本地路径和远程路径"""
pattern = r"remote://(.*):(\d+)"
match = re.search(pattern, exePath)
try:
if match: # 远程模式
self.ip = match.group(1)
self.port = int(match.group(2))
if self.ip == "any":
self.ip = "0.0.0.0"
elif self.ip == "loopback":
self.ip = "127.0.0.1"
return "remote"
else: # 本地模式
self.exePath = exePath
return "local"
except:
return None
def GetOcrApi(
exePath: str, modelsPath: str = None, argument: dict = None, ipcMode: str = "pipe"
):
"""获取识别器API对象。\n
`exePath`: 识别器`PaddleOCR_json.exe`的路径\n
`modelsPath`: 识别库`models`文件夹的路径若为None则默认识别库与识别器在同一目录下\n
`argument`: 启动参数字典`{"":}`参数说明见 https://github.com/hiroi-sora/PaddleOCR-json\n
`ipcMode`: 进程通信模式可选值为套接字模式`socket` 管道模式`pipe`用法上完全一致
"""
if ipcMode == "socket":
return PPOCR_socket(exePath, modelsPath, argument)
elif ipcMode == "pipe":
return PPOCR_pipe(exePath, modelsPath, argument)
else:
raise Exception(
f'ipcMode可选值为 套接字模式"socket" 或 管道模式"pipe" ,不允许{ipcMode}'
)