zbparse/flask_app/PaddleOCR/python_api/PPOCR_visualize.py

160 lines
6.3 KiB
Python
Raw Normal View History

2024-12-03 11:50:15 +08:00
# 将 PaddleOCR-json 结果可视化表现
# 项目主页:
# https://github.com/hiroi-sora/PaddleOCR-json
from PIL import Image, ImageDraw, ImageFont
import math
class visualize:
"""可视化"""
# ================================ 静态方法 ================================
@staticmethod
def createBox(textBlocks, size, fill="#00500040", outline="#11ff22", width=6):
"""创建包围盒图层返回PIL Image对象。\n
:textBlocks: 文本块列表\n
:size: 图片尺寸\n
以下为可选字段颜色为十六进制6位RGB或8位RGBA字符串 #112233ff\n
:fill: 包围盒填充颜色\n
:outline: 包围盒轮廓颜色\n
:width: 包围盒轮廓粗细像素
"""
img = Image.new("RGBA", size, 0)
draw = ImageDraw.Draw(img)
for tb in textBlocks:
box = [
tuple(tb["box"][0]),
tuple(tb["box"][1]),
tuple(tb["box"][2]),
tuple(tb["box"][3]),
]
draw.polygon(box, fill=fill, outline=outline, width=width)
return img
@staticmethod
def createText(
textBlocks,
size,
ttfPath="C:\Windows\Fonts\msyh.ttc",
ttfScale=0.9,
fill="#ff0000",
):
"""创建文字图层返回PIL Image对象。\n
:textBlocks: 文本块列表\n
:size: 图片尺寸\n
以下为可选字段\n
:ttfPath: 字体文件路径默认为微软雅黑若不存在此字体会报错\n
:ttfScale: 字体大小整体缩放系数应在1附近\n
:fill: 文字颜色十六进制6位RGB或8位RGBA字符串 #112233ff。\n
"""
img = Image.new("RGBA", size, 0)
draw = ImageDraw.Draw(img)
ttfDict = {} # 缓存不同大小的字体对象
for tb in textBlocks:
text = tb["text"]
xy = tuple(tb["box"][0]) # 左上角坐标
xy1 = tb["box"][3] # 左下角坐标# 行高
hight = round(
math.sqrt(((xy[0] - xy1[0]) ** 2) + ((xy[1] - xy1[1]) ** 2)) * ttfScale
)
if hight not in ttfDict:
ttfDict[hight] = ImageFont.truetype(ttfPath, hight) # 创建新大小的字体
draw.text(xy, text, font=ttfDict[hight], fill=fill)
return img
@staticmethod
def createOrder(
textBlocks,
size,
ttfPath="C:\Windows\Fonts\msyh.ttc",
ttfSize=50,
fill="#2233ff",
bg="#ffffffe0",
):
"""创建序号图层返回PIL Image对象。\n
:textBlocks: 文本块列表\n
:size: 图片尺寸\n
以下为可选字段\n
:ttfPath: 字体文件路径默认为微软雅黑若不存在此字体会报错\n
:ttfSize: 字体大小\n
:fill: 文字颜色十六进制6位RGB或8位RGBA字符串 #112233ff。\n
"""
img = Image.new("RGBA", size, 0)
draw = ImageDraw.Draw(img)
ttf = ImageFont.truetype(ttfPath, ttfSize) # 字体
for index, tb in enumerate(textBlocks):
text = f"{index+1}"
xy = tuple(tb["box"][0]) # 左上角坐标
x_, y_, w, h = ttf.getbbox(text) # 获取宽高。只需要w和h
w *= 1.1
h *= 1.1
draw.rectangle((xy, (xy[0] + w, xy[1] + h)), fill=bg, width=0) # 背景矩形
draw.text(xy, text, font=ttf, fill=fill) # 文字
return img
@staticmethod
def createContrast(img1, img2):
"""左右拼合两个图片创建对比图层返回PIL Image对象。"""
size = (img1.size[0] + img2.size[0], max(img1.size[1], img2.size[1]))
img = Image.new("RGBA", size, 0)
img.paste(img1, (0, 0))
img.paste(img2, (img1.size[0], 0))
return img
@staticmethod
def composite(img1, img2):
"""传入两个PIL Image对象RGBA格式以img1为底将img2叠加在其上
返回生成的图片"""
return Image.alpha_composite(img1, img2)
# ================================ 快捷接口 ================================
def __init__(self, textBlocks, imagePath):
"""创建可视化对象。\n
:textBlocks: 文本块列表即OCR返回的data部分\n
:imagePath: 对应的图片路径
"""
self.imgSource = Image.open(imagePath).convert("RGBA") # 原始图片图层
self.size = self.imgSource.size
self.imgBox = self.createBox(textBlocks, self.size) # 包围盒图层
self.imgText = self.createText(textBlocks, self.size) # 文字图层
self.imgOrder = self.createOrder(textBlocks, self.size) # 序号图层
def get(self, isBox=True, isText=False, isOrder=False, isSource=True):
"""返回合成可视化结果的PIL Image图像。\n
:isBox: T时返回包围盒图层\n
:isText: T时返回文字图层\n
:isOrder: T时返回序号图层\n
:isSource: T时返回原图F时返回透明背景的纯可视化结果\n
"""
img = Image.new("RGBA", self.size, 0)
flags = (isSource, isBox, isText, isOrder)
for index, im in enumerate(
[self.imgSource, self.imgBox, self.imgText, self.imgOrder]
):
if im and flags[index]:
img = visualize.composite(img, im)
return img
def show(self, isBox=True, isText=False, isOrder=False, isSource=True):
"""显示可视化结果图像。\n
:isBox: T时返回包围盒图层\n
:isText: T时返回文字图层\n
:isOrder: T时返回序号图层\n
:isSource: T时返回原图F时返回透明背景的纯可视化结果\n
"""
img = self.get(isBox, isText, isOrder, isSource)
img.show()
def save(self, path="", isBox=True, isText=False, isOrder=False, isSource=True):
"""保存可视化结果图像。\n
:path: 保存路径\n
:isBox: T时返回包围盒图层\n
:isText: T时返回文字图层\n
:isOrder: T时返回序号图层\n
:isSource: T时返回原图F时返回透明背景的纯可视化结果\n
"""
img = self.get(isBox, isText, isOrder, isSource)
img.save(path)