zbparse/flask_app/general/insert_del_pagemark.py

105 lines
4.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
from docx.opc.exceptions import PackageNotFoundError
from io import BytesIO
from PyPDF2 import PdfReader, PdfWriter
from docx import Document
from reportlab.pdfgen import canvas
from reportlab.lib.units import cm
import copy
#复制input_pdf_path的内容到invalid_added.pdf 这一步可能报错!绘制新的一页基本不会报错
def insert_mark(input_pdf_path):
try:
# 构建输出文件路径,与输入文件同目录,名称为 invalid_added.pdf
input_dir = os.path.dirname(input_pdf_path)
output_pdf_path = os.path.join(input_dir, "invalid_added.pdf")
# 打开输入的PDF文件
with open(input_pdf_path, 'rb') as file:
pdf_reader = PdfReader(file)
pdf_writer = PdfWriter()
total_pages = len(pdf_reader.pages)
# 遍历每一页
for page_num in range(total_pages):
page = pdf_reader.pages[page_num]
pdf_writer.add_page(page)
# 创建一个内存中的PDF用于存放带有文本的空白页
packet = BytesIO()
# 获取当前页面的宽度和高度
page_width = float(page.mediabox.width)
page_height = float(page.mediabox.height)
# 使用reportlab创建一个新的PDF页面
c = canvas.Canvas(packet, pagesize=(page_width, page_height))
# 计算文本的位置单位1厘米 ≈ 28.35点)
x_position = 2.3 * cm
y_position = page_height - 0.5 * cm # 从顶部开始计算,因此用页面高度减去上边距
# 绘制文本,使用 (page_num + 1) 作为索引
c.setFont("Helvetica", 12) # 设置字体和大小
c.drawString(x_position, y_position, f"[$$index_mark_{page_num + 1}$$]")
# 完成绘制
c.save()
# 将内存中的PDF读入PyPDF2
packet.seek(0)
new_pdf = PdfReader(packet)
# blank_page = new_pdf.pages[0]
blank_page = copy.deepcopy(new_pdf.pages[0])
packet.truncate(0)
packet.seek(0)
# 将带有文本的空白页添加到写入器
pdf_writer.add_page(blank_page)
# 将所有页面写入输出的PDF文件
with open(output_pdf_path, 'wb') as output_file:
pdf_writer.write(output_file)
print("invalid_file added successfully!")
return output_pdf_path
except Exception as e:
print(f"发生错误: {e}")
return input_pdf_path
def delete_mark(docx_path):
try:
docx = Document(docx_path)
except KeyError as e:
print(f"Error opening document: {e}")
return ""
except PackageNotFoundError as e:
print(f"Invalid package: {e}")
return ""
# 继续处理文档
find_flag = False
for para in docx.paragraphs:
# 匹配标记: [$$index_mark_X$$]
if "[$$index_mark_" in para.text:
para._element.getparent().remove(para._element) # 删标记
find_flag = True
if find_flag and "w:sectPr" in para._element.xml: # 删空白分节符
para._element.getparent().remove(para._element)
find_flag = False
dir_path = os.path.dirname(docx_path)
new_file_path = os.path.join(dir_path, 'invalid_del.docx')
# 保存修改后的文档
docx.save(new_file_path)
return new_file_path
if __name__ == '__main__':
# input=r'C:\Users\Administrator\Desktop\fsdownload\0bb9cf31-280c-4d96-bc21-0871ee7fd6df\tmp\ztbfile.pdf'
input=r'C:\Users\Administrator\Desktop\fsdownload\45645668-be5e-4124-b06a-6abaa9d87d86\ztbfile_invalid.pdf'
output=insert_mark(input)
# doc_path = r'C:\Users\Administrator\Desktop\fsdownload\0bb9cf31-280c-4d96-bc21-0871ee7fd6df\tmp\invalid_added.docx'
# res=delete_mark(doc_path)
# if res:
# print(res)
# else:
# print("No")