zbparse/flask_app/PaddleOCR/python_api/PPOCR_visualize.py
2024-12-03 11:50:15 +08:00

160 lines
6.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 将 PaddleOCR-json 结果可视化表现
# 项目主页:
# https://github.com/hiroi-sora/PaddleOCR-json
from PIL import Image, ImageDraw, ImageFont
import math
class visualize:
"""可视化"""
# ================================ 静态方法 ================================
@staticmethod
def createBox(textBlocks, size, fill="#00500040", outline="#11ff22", width=6):
"""创建包围盒图层返回PIL Image对象。\n
:textBlocks: 文本块列表。\n
:size: 图片尺寸。\n
以下为可选字段颜色为十六进制6位RGB或8位RGBA字符串如 #112233ff\n
:fill: 包围盒填充颜色。\n
:outline: 包围盒轮廓颜色。\n
:width: 包围盒轮廓粗细,像素。
"""
img = Image.new("RGBA", size, 0)
draw = ImageDraw.Draw(img)
for tb in textBlocks:
box = [
tuple(tb["box"][0]),
tuple(tb["box"][1]),
tuple(tb["box"][2]),
tuple(tb["box"][3]),
]
draw.polygon(box, fill=fill, outline=outline, width=width)
return img
@staticmethod
def createText(
textBlocks,
size,
ttfPath="C:\Windows\Fonts\msyh.ttc",
ttfScale=0.9,
fill="#ff0000",
):
"""创建文字图层返回PIL Image对象。\n
:textBlocks: 文本块列表。\n
:size: 图片尺寸。\n
以下为可选字段:\n
:ttfPath: 字体文件路径。默认为微软雅黑,若不存在此字体会报错。\n
:ttfScale: 字体大小整体缩放系数应在1附近。\n
:fill: 文字颜色十六进制6位RGB或8位RGBA字符串如 #112233ff。\n
"""
img = Image.new("RGBA", size, 0)
draw = ImageDraw.Draw(img)
ttfDict = {} # 缓存不同大小的字体对象
for tb in textBlocks:
text = tb["text"]
xy = tuple(tb["box"][0]) # 左上角坐标
xy1 = tb["box"][3] # 左下角坐标# 行高
hight = round(
math.sqrt(((xy[0] - xy1[0]) ** 2) + ((xy[1] - xy1[1]) ** 2)) * ttfScale
)
if hight not in ttfDict:
ttfDict[hight] = ImageFont.truetype(ttfPath, hight) # 创建新大小的字体
draw.text(xy, text, font=ttfDict[hight], fill=fill)
return img
@staticmethod
def createOrder(
textBlocks,
size,
ttfPath="C:\Windows\Fonts\msyh.ttc",
ttfSize=50,
fill="#2233ff",
bg="#ffffffe0",
):
"""创建序号图层返回PIL Image对象。\n
:textBlocks: 文本块列表。\n
:size: 图片尺寸。\n
以下为可选字段:\n
:ttfPath: 字体文件路径。默认为微软雅黑,若不存在此字体会报错。\n
:ttfSize: 字体大小。\n
:fill: 文字颜色十六进制6位RGB或8位RGBA字符串如 #112233ff。\n
"""
img = Image.new("RGBA", size, 0)
draw = ImageDraw.Draw(img)
ttf = ImageFont.truetype(ttfPath, ttfSize) # 字体
for index, tb in enumerate(textBlocks):
text = f"{index+1}"
xy = tuple(tb["box"][0]) # 左上角坐标
x_, y_, w, h = ttf.getbbox(text) # 获取宽高。只需要w和h
w *= 1.1
h *= 1.1
draw.rectangle((xy, (xy[0] + w, xy[1] + h)), fill=bg, width=0) # 背景矩形
draw.text(xy, text, font=ttf, fill=fill) # 文字
return img
@staticmethod
def createContrast(img1, img2):
"""左右拼合两个图片创建对比图层返回PIL Image对象。"""
size = (img1.size[0] + img2.size[0], max(img1.size[1], img2.size[1]))
img = Image.new("RGBA", size, 0)
img.paste(img1, (0, 0))
img.paste(img2, (img1.size[0], 0))
return img
@staticmethod
def composite(img1, img2):
"""传入两个PIL Image对象RGBA格式以img1为底将img2叠加在其上
返回生成的图片"""
return Image.alpha_composite(img1, img2)
# ================================ 快捷接口 ================================
def __init__(self, textBlocks, imagePath):
"""创建可视化对象。\n
:textBlocks: 文本块列表即OCR返回的data部分\n
:imagePath: 对应的图片路径。
"""
self.imgSource = Image.open(imagePath).convert("RGBA") # 原始图片图层
self.size = self.imgSource.size
self.imgBox = self.createBox(textBlocks, self.size) # 包围盒图层
self.imgText = self.createText(textBlocks, self.size) # 文字图层
self.imgOrder = self.createOrder(textBlocks, self.size) # 序号图层
def get(self, isBox=True, isText=False, isOrder=False, isSource=True):
"""返回合成可视化结果的PIL Image图像。\n
:isBox: T时返回包围盒图层。\n
:isText: T时返回文字图层。\n
:isOrder: T时返回序号图层。\n
:isSource: T时返回原图。F时返回透明背景的纯可视化结果。\n
"""
img = Image.new("RGBA", self.size, 0)
flags = (isSource, isBox, isText, isOrder)
for index, im in enumerate(
[self.imgSource, self.imgBox, self.imgText, self.imgOrder]
):
if im and flags[index]:
img = visualize.composite(img, im)
return img
def show(self, isBox=True, isText=False, isOrder=False, isSource=True):
"""显示可视化结果图像。\n
:isBox: T时返回包围盒图层。\n
:isText: T时返回文字图层。\n
:isOrder: T时返回序号图层。\n
:isSource: T时返回原图。F时返回透明背景的纯可视化结果。\n
"""
img = self.get(isBox, isText, isOrder, isSource)
img.show()
def save(self, path="", isBox=True, isText=False, isOrder=False, isSource=True):
"""保存可视化结果图像。\n
:path: 保存路径。\n
:isBox: T时返回包围盒图层。\n
:isText: T时返回文字图层。\n
:isOrder: T时返回序号图层。\n
:isSource: T时返回原图。F时返回透明背景的纯可视化结果。\n
"""
img = self.get(isBox, isText, isOrder, isSource)
img.save(path)