zbparse/flask_app/PaddleOCR/python_api/PPOCR_visualize.py

# 将 PaddleOCR-json 结果可视化表现
# 项目主页：
# https://github.com/hiroi-sora/PaddleOCR-json
from PIL import Image, ImageDraw, ImageFont
import math


class visualize:
    """可视化"""

    # ================================ 静态方法 ================================

    @staticmethod
    def createBox(textBlocks, size, fill="#00500040", outline="#11ff22", width=6):
        """创建包围盒图层，返回PIL Image对象。\n
        :textBlocks: 文本块列表。\n
        :size: 图片尺寸。\n
        以下为可选字段：（颜色为十六进制6位RGB或8位RGBA字符串，如 #112233ff）\n
        :fill: 包围盒填充颜色。\n
        :outline: 包围盒轮廓颜色。\n
        :width: 包围盒轮廓粗细，像素。
        """
        img = Image.new("RGBA", size, 0)
        draw = ImageDraw.Draw(img)
        for tb in textBlocks:
            box = [
                tuple(tb["box"][0]),
                tuple(tb["box"][1]),
                tuple(tb["box"][2]),
                tuple(tb["box"][3]),
            ]
            draw.polygon(box, fill=fill, outline=outline, width=width)
        return img

    @staticmethod
    def createText(
        textBlocks,
        size,
        ttfPath="C:\Windows\Fonts\msyh.ttc",
        ttfScale=0.9,
        fill="#ff0000",
    ):
        """创建文字图层，返回PIL Image对象。\n
        :textBlocks: 文本块列表。\n
        :size: 图片尺寸。\n
        以下为可选字段：\n
        :ttfPath: 字体文件路径。默认为微软雅黑，若不存在此字体会报错。\n
        :ttfScale: 字体大小整体缩放系数，应在1附近。\n
        :fill: 文字颜色，十六进制6位RGB或8位RGBA字符串，如 #112233ff。\n
        """
        img = Image.new("RGBA", size, 0)
        draw = ImageDraw.Draw(img)
        ttfDict = {}  # 缓存不同大小的字体对象
        for tb in textBlocks:
            text = tb["text"]
            xy = tuple(tb["box"][0])  # 左上角坐标
            xy1 = tb["box"][3]  # 左下角坐标# 行高
            hight = round(
                math.sqrt(((xy[0] - xy1[0]) ** 2) + ((xy[1] - xy1[1]) ** 2)) * ttfScale
            )
            if hight not in ttfDict:
                ttfDict[hight] = ImageFont.truetype(ttfPath, hight)  # 创建新大小的字体
            draw.text(xy, text, font=ttfDict[hight], fill=fill)
        return img

    @staticmethod
    def createOrder(
        textBlocks,
        size,
        ttfPath="C:\Windows\Fonts\msyh.ttc",
        ttfSize=50,
        fill="#2233ff",
        bg="#ffffffe0",
    ):
        """创建序号图层，返回PIL Image对象。\n
        :textBlocks: 文本块列表。\n
        :size: 图片尺寸。\n
        以下为可选字段：\n
        :ttfPath: 字体文件路径。默认为微软雅黑，若不存在此字体会报错。\n
        :ttfSize: 字体大小。\n
        :fill: 文字颜色，十六进制6位RGB或8位RGBA字符串，如 #112233ff。\n
        """
        img = Image.new("RGBA", size, 0)
        draw = ImageDraw.Draw(img)
        ttf = ImageFont.truetype(ttfPath, ttfSize)  # 字体
        for index, tb in enumerate(textBlocks):
            text = f"{index+1}"
            xy = tuple(tb["box"][0])  # 左上角坐标
            x_, y_, w, h = ttf.getbbox(text)  # 获取宽高。只需要w和h
            w *= 1.1
            h *= 1.1
            draw.rectangle((xy, (xy[0] + w, xy[1] + h)), fill=bg, width=0)  # 背景矩形
            draw.text(xy, text, font=ttf, fill=fill)  # 文字
        return img

    @staticmethod
    def createContrast(img1, img2):
        """左右拼合两个图片，创建对比图层，返回PIL Image对象。"""
        size = (img1.size[0] + img2.size[0], max(img1.size[1], img2.size[1]))
        img = Image.new("RGBA", size, 0)
        img.paste(img1, (0, 0))
        img.paste(img2, (img1.size[0], 0))
        return img

    @staticmethod
    def composite(img1, img2):
        """传入两个PIL Image对象（RGBA格式），以img1为底，将img2叠加在其上
        返回生成的图片"""
        return Image.alpha_composite(img1, img2)

    # ================================ 快捷接口 ================================

    def __init__(self, textBlocks, imagePath):
        """创建可视化对象。\n
        :textBlocks: 文本块列表，即OCR返回的data部分\n
        :imagePath: 对应的图片路径。
        """
        self.imgSource = Image.open(imagePath).convert("RGBA")  # 原始图片图层
        self.size = self.imgSource.size
        self.imgBox = self.createBox(textBlocks, self.size)  # 包围盒图层
        self.imgText = self.createText(textBlocks, self.size)  # 文字图层
        self.imgOrder = self.createOrder(textBlocks, self.size)  # 序号图层

    def get(self, isBox=True, isText=False, isOrder=False, isSource=True):
        """返回合成可视化结果的PIL Image图像。\n
        :isBox: T时返回包围盒图层。\n
        :isText: T时返回文字图层。\n
        :isOrder: T时返回序号图层。\n
        :isSource: T时返回原图。F时返回透明背景的纯可视化结果。\n
        """
        img = Image.new("RGBA", self.size, 0)
        flags = (isSource, isBox, isText, isOrder)
        for index, im in enumerate(
            [self.imgSource, self.imgBox, self.imgText, self.imgOrder]
        ):
            if im and flags[index]:
                img = visualize.composite(img, im)
        return img

    def show(self, isBox=True, isText=False, isOrder=False, isSource=True):
        """显示可视化结果图像。\n
        :isBox: T时返回包围盒图层。\n
        :isText: T时返回文字图层。\n
        :isOrder: T时返回序号图层。\n
        :isSource: T时返回原图。F时返回透明背景的纯可视化结果。\n
        """
        img = self.get(isBox, isText, isOrder, isSource)
        img.show()

    def save(self, path="", isBox=True, isText=False, isOrder=False, isSource=True):
        """保存可视化结果图像。\n
        :path: 保存路径。\n
        :isBox: T时返回包围盒图层。\n
        :isText: T时返回文字图层。\n
        :isOrder: T时返回序号图层。\n
        :isSource: T时返回原图。F时返回透明背景的纯可视化结果。\n
        """
        img = self.get(isBox, isText, isOrder, isSource)
        img.save(path)