from docx import Document import re import os def copy_docx(source_path): doc = Document(source_path) # 打开源文档 output_folder = os.path.dirname(source_path) # 获取原文件名并添加后缀 original_file_name = os.path.basename(source_path) file_name_without_ext, file_ext = os.path.splitext(original_file_name) modified_file_name = file_name_without_ext + "_invalid" + file_ext destination_path = os.path.join(output_folder, modified_file_name) new_doc = Document() # 创建新文档 # 定义正则表达式模式 begin_pattern = re.compile(r'第[一二三四五六七八九十]+章\s*招标公告|第一卷') end_pattern = re.compile(r'第[一二三四五六七八九十]+章\s*合同|:清标报告|:清标报告') # 寻找最后一个begin_pattern的位置 last_begin_index = -1 for i, paragraph in enumerate(doc.paragraphs): if begin_pattern.search(paragraph.text): last_begin_index = i # 从最后一个匹配的begin_pattern开始复制,直到end_pattern if last_begin_index != -1: for i, paragraph in enumerate(doc.paragraphs[last_begin_index:], start=last_begin_index): new_para = new_doc.add_paragraph(style=paragraph.style) for run in paragraph.runs: new_run = new_para.add_run(run.text) new_run.bold = run.bold new_run.italic = run.italic new_run.underline = run.underline if run.font.color: new_run.font.color.rgb = run.font.color.rgb new_run.font.size = run.font.size if end_pattern.search(paragraph.text): break new_doc.save(destination_path) # 保存新文档 print("docx截取docx成功!") return destination_path # 调用函数 if __name__ == '__main__': source_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\796f7bb3-2f7a-4332-b044-9d817a07861e\\ztbfile.docx" res=copy_docx(source_path) print(res)