105 lines
4.0 KiB
Python
105 lines
4.0 KiB
Python
import os
|
||
from docx.opc.exceptions import PackageNotFoundError
|
||
from io import BytesIO
|
||
from PyPDF2 import PdfReader, PdfWriter
|
||
from docx import Document
|
||
from reportlab.pdfgen import canvas
|
||
from reportlab.lib.units import cm
|
||
import copy
|
||
|
||
#复制input_pdf_path的内容到invalid_added.pdf 这一步可能报错!绘制新的一页基本不会报错
|
||
def insert_mark(input_pdf_path):
|
||
try:
|
||
# 构建输出文件路径,与输入文件同目录,名称为 invalid_added.pdf
|
||
input_dir = os.path.dirname(input_pdf_path)
|
||
output_pdf_path = os.path.join(input_dir, "invalid_added.pdf")
|
||
|
||
# 打开输入的PDF文件
|
||
with open(input_pdf_path, 'rb') as file:
|
||
pdf_reader = PdfReader(file)
|
||
pdf_writer = PdfWriter()
|
||
|
||
total_pages = len(pdf_reader.pages)
|
||
|
||
# 遍历每一页
|
||
for page_num in range(total_pages):
|
||
page = pdf_reader.pages[page_num]
|
||
pdf_writer.add_page(page)
|
||
|
||
# 创建一个内存中的PDF,用于存放带有文本的空白页
|
||
packet = BytesIO()
|
||
# 获取当前页面的宽度和高度
|
||
page_width = float(page.mediabox.width)
|
||
page_height = float(page.mediabox.height)
|
||
# 使用reportlab创建一个新的PDF页面
|
||
c = canvas.Canvas(packet, pagesize=(page_width, page_height))
|
||
|
||
# 计算文本的位置(单位:点,1厘米 ≈ 28.35点)
|
||
x_position = 2.3 * cm
|
||
y_position = page_height - 0.5 * cm # 从顶部开始计算,因此用页面高度减去上边距
|
||
|
||
# 绘制文本,使用 (page_num + 1) 作为索引
|
||
c.setFont("Helvetica", 12) # 设置字体和大小
|
||
c.drawString(x_position, y_position, f"[$$index_mark_{page_num + 1}$$]")
|
||
|
||
# 完成绘制
|
||
c.save()
|
||
|
||
# 将内存中的PDF读入PyPDF2
|
||
packet.seek(0)
|
||
new_pdf = PdfReader(packet)
|
||
# blank_page = new_pdf.pages[0]
|
||
blank_page = copy.deepcopy(new_pdf.pages[0])
|
||
packet.truncate(0)
|
||
packet.seek(0)
|
||
# 将带有文本的空白页添加到写入器
|
||
pdf_writer.add_page(blank_page)
|
||
|
||
# 将所有页面写入输出的PDF文件
|
||
with open(output_pdf_path, 'wb') as output_file:
|
||
pdf_writer.write(output_file)
|
||
print("invalid_file added successfully!")
|
||
return output_pdf_path
|
||
|
||
except Exception as e:
|
||
print(f"发生错误: {e}")
|
||
return input_pdf_path
|
||
|
||
|
||
def delete_mark(docx_path):
|
||
try:
|
||
docx = Document(docx_path)
|
||
except KeyError as e:
|
||
print(f"Error opening document: {e}")
|
||
return ""
|
||
except PackageNotFoundError as e:
|
||
print(f"Invalid package: {e}")
|
||
return ""
|
||
|
||
# 继续处理文档
|
||
find_flag = False
|
||
for para in docx.paragraphs:
|
||
# 匹配标记: [$$index_mark_X$$]
|
||
if "[$$index_mark_" in para.text:
|
||
para._element.getparent().remove(para._element) # 删标记
|
||
find_flag = True
|
||
if find_flag and "w:sectPr" in para._element.xml: # 删空白分节符
|
||
para._element.getparent().remove(para._element)
|
||
find_flag = False
|
||
dir_path = os.path.dirname(docx_path)
|
||
new_file_path = os.path.join(dir_path, 'invalid_del.docx')
|
||
|
||
# 保存修改后的文档
|
||
docx.save(new_file_path)
|
||
return new_file_path
|
||
|
||
if __name__ == '__main__':
|
||
# input=r'C:\Users\Administrator\Desktop\fsdownload\0bb9cf31-280c-4d96-bc21-0871ee7fd6df\tmp\ztbfile.pdf'
|
||
input=r'C:\Users\Administrator\Desktop\货物标\zbfiles\招标文件(107国道).pdf'
|
||
output=insert_mark(input)
|
||
# doc_path = r'C:\Users\Administrator\Desktop\fsdownload\0bb9cf31-280c-4d96-bc21-0871ee7fd6df\tmp\invalid_added.docx'
|
||
# res=delete_mark(doc_path)
|
||
# if res:
|
||
# print(res)
|
||
# else:
|
||
# print("No") |