94 lines
3.4 KiB
Python
94 lines
3.4 KiB
Python
import os
|
||
import re
|
||
from io import BytesIO
|
||
from PyPDF2 import PdfReader, PdfWriter
|
||
from docx import Document
|
||
from reportlab.pdfgen import canvas
|
||
from reportlab.lib.units import cm
|
||
|
||
def insert_mark(input_pdf_path):
|
||
try:
|
||
# 构建输出文件路径,与输入文件同目录,名称为 invalid_added.pdf
|
||
input_dir = os.path.dirname(input_pdf_path)
|
||
output_pdf_path = os.path.join(input_dir, "invalid_added.pdf")
|
||
|
||
# 打开输入的PDF文件
|
||
with open(input_pdf_path, 'rb') as file:
|
||
pdf_reader = PdfReader(file)
|
||
pdf_writer = PdfWriter()
|
||
|
||
total_pages = len(pdf_reader.pages)
|
||
|
||
# 遍历每一页
|
||
for page_num in range(total_pages):
|
||
page = pdf_reader.pages[page_num]
|
||
pdf_writer.add_page(page)
|
||
|
||
# 创建一个内存中的PDF,用于存放带有文本的空白页
|
||
packet = BytesIO()
|
||
# 获取当前页面的宽度和高度
|
||
page_width = float(page.mediabox.width)
|
||
page_height = float(page.mediabox.height)
|
||
# 使用reportlab创建一个新的PDF页面
|
||
c = canvas.Canvas(packet, pagesize=(page_width, page_height))
|
||
|
||
# 计算文本的位置(单位:点,1厘米 ≈ 28.35点)
|
||
x_position = 2.3 * cm
|
||
y_position = page_height - 0.5 * cm # 从顶部开始计算,因此用页面高度减去上边距
|
||
|
||
# 绘制文本,使用 (page_num + 1) 作为索引
|
||
c.setFont("Helvetica", 12) # 设置字体和大小
|
||
c.drawString(x_position, y_position, f"[$$index_mark_{page_num + 1}$$]")
|
||
|
||
# 完成绘制
|
||
c.save()
|
||
|
||
# 将内存中的PDF读入PyPDF2
|
||
packet.seek(0)
|
||
new_pdf = PdfReader(packet)
|
||
blank_page = new_pdf.pages[0]
|
||
|
||
# 将带有文本的空白页添加到写入器
|
||
pdf_writer.add_page(blank_page)
|
||
|
||
# 将所有页面写入输出的PDF文件
|
||
with open(output_pdf_path, 'wb') as output_file:
|
||
pdf_writer.write(output_file)
|
||
print("invalid_file added successfully!")
|
||
return output_pdf_path
|
||
|
||
except Exception as e:
|
||
print(f"发生错误: {e}")
|
||
return ""
|
||
|
||
|
||
def delete_mark(docx_path):
|
||
"""
|
||
删除docx文档中的所有标记
|
||
:param docx_path: docx文件路径
|
||
"""
|
||
docx = Document(docx_path)
|
||
find_flag = False
|
||
for para in docx.paragraphs:
|
||
# 匹配标记: [$$index_mark_X$$]
|
||
if "[$$index_mark_" in para.text:
|
||
para._element.getparent().remove(para._element) # 删标记
|
||
find_flag = True
|
||
if find_flag and "w:sectPr" in para._element.xml: # 删空白分节符
|
||
para._element.getparent().remove(para._element)
|
||
find_flag = False
|
||
|
||
# 获取文件路径信息
|
||
import os
|
||
dir_path = os.path.dirname(docx_path)
|
||
new_file_path = os.path.join(dir_path, 'invalid_del.docx')
|
||
|
||
# 保存修改后的文档
|
||
docx.save(new_file_path)
|
||
return new_file_path
|
||
|
||
if __name__ == '__main__':
|
||
input=r'C:\Users\Administrator\Desktop\fsdownload\1073c74f-02b5-463e-a129-23c790a3c872\ztbfile_tobidders_notice_part2.pdf'
|
||
# add_blank_pages_v2(input)
|
||
doc_path = r'C:\Users\Administrator\Desktop\fsdownload\1073c74f-02b5-463e-a129-23c790a3c872\invalid_added.docx'
|
||
delete_mark(doc_path) |