import os import re from io import BytesIO from PyPDF2 import PdfReader, PdfWriter from docx import Document from reportlab.pdfgen import canvas from reportlab.lib.units import cm #复制input_pdf_path的内容到invalid_added.pdf 这一步可能报错!绘制新的一页基本不会报错 def insert_mark(input_pdf_path): try: # 构建输出文件路径,与输入文件同目录,名称为 invalid_added.pdf input_dir = os.path.dirname(input_pdf_path) output_pdf_path = os.path.join(input_dir, "invalid_added.pdf") # 打开输入的PDF文件 with open(input_pdf_path, 'rb') as file: pdf_reader = PdfReader(file) pdf_writer = PdfWriter() total_pages = len(pdf_reader.pages) # 遍历每一页 for page_num in range(total_pages): page = pdf_reader.pages[page_num] pdf_writer.add_page(page) # 创建一个内存中的PDF,用于存放带有文本的空白页 packet = BytesIO() # 获取当前页面的宽度和高度 page_width = float(page.mediabox.width) page_height = float(page.mediabox.height) # 使用reportlab创建一个新的PDF页面 c = canvas.Canvas(packet, pagesize=(page_width, page_height)) # 计算文本的位置(单位:点,1厘米 ≈ 28.35点) x_position = 2.3 * cm y_position = page_height - 0.5 * cm # 从顶部开始计算,因此用页面高度减去上边距 # 绘制文本,使用 (page_num + 1) 作为索引 c.setFont("Helvetica", 12) # 设置字体和大小 c.drawString(x_position, y_position, f"[$$index_mark_{page_num + 1}$$]") # 完成绘制 c.save() # 将内存中的PDF读入PyPDF2 packet.seek(0) new_pdf = PdfReader(packet) blank_page = new_pdf.pages[0] # 将带有文本的空白页添加到写入器 pdf_writer.add_page(blank_page) # 将所有页面写入输出的PDF文件 with open(output_pdf_path, 'wb') as output_file: pdf_writer.write(output_file) print("invalid_file added successfully!") return output_pdf_path except Exception as e: print(f"发生错误: {e}") return input_pdf_path def delete_mark(docx_path): """ 删除docx文档中的所有标记 :param docx_path: docx文件路径 """ docx = Document(docx_path) find_flag = False for para in docx.paragraphs: # 匹配标记: [$$index_mark_X$$] if "[$$index_mark_" in para.text: para._element.getparent().remove(para._element) # 删标记 find_flag = True if find_flag and "w:sectPr" in para._element.xml: # 删空白分节符 para._element.getparent().remove(para._element) find_flag = False # 获取文件路径信息 import os dir_path = os.path.dirname(docx_path) new_file_path = os.path.join(dir_path, 'invalid_del.docx') # 保存修改后的文档 docx.save(new_file_path) return new_file_path if __name__ == '__main__': input=r'C:\Users\Administrator\Desktop\文件解析问题\文件解析问题\baada43d-24f6-459d-8a81-219d130f20da\ztbfile.pdf' # input=r'C:\Users\Administrator\Desktop\new招标文件\output5\广水市公安局音视频监控系统设备采购项目_procurement.pdf' output=insert_mark(input) # doc_path = r'C:\Users\Administrator\Desktop\fsdownload\1073c74f-02b5-463e-a129-23c790a3c872\invalid_added.docx' # delete_mark(doc_path)