51 lines
1.9 KiB
Python
51 lines
1.9 KiB
Python
from docx import Document
|
||
import re
|
||
import os
|
||
|
||
|
||
def copy_docx(source_path):
|
||
doc = Document(source_path) # 打开源文档
|
||
output_folder = os.path.dirname(source_path)
|
||
|
||
# 获取原文件名并添加后缀
|
||
original_file_name = os.path.basename(source_path)
|
||
file_name_without_ext, file_ext = os.path.splitext(original_file_name)
|
||
modified_file_name = file_name_without_ext + "_invalid" + file_ext
|
||
destination_path = os.path.join(output_folder, modified_file_name)
|
||
|
||
new_doc = Document() # 创建新文档
|
||
|
||
# 定义正则表达式模式
|
||
begin_pattern = re.compile(r'第[一二三四五六七八九十]+章\s*招标公告|第一卷')
|
||
end_pattern = re.compile(r'第[一二三四五六七八九十]+章\s*合同|:清标报告|:清标报告')
|
||
|
||
# 寻找最后一个begin_pattern的位置
|
||
last_begin_index = -1
|
||
for i, paragraph in enumerate(doc.paragraphs):
|
||
if begin_pattern.search(paragraph.text):
|
||
last_begin_index = i
|
||
|
||
# 从最后一个匹配的begin_pattern开始复制,直到end_pattern
|
||
if last_begin_index != -1:
|
||
for i, paragraph in enumerate(doc.paragraphs[last_begin_index:], start=last_begin_index):
|
||
new_para = new_doc.add_paragraph(style=paragraph.style)
|
||
for run in paragraph.runs:
|
||
new_run = new_para.add_run(run.text)
|
||
new_run.bold = run.bold
|
||
new_run.italic = run.italic
|
||
new_run.underline = run.underline
|
||
if run.font.color:
|
||
new_run.font.color.rgb = run.font.color.rgb
|
||
new_run.font.size = run.font.size
|
||
|
||
if end_pattern.search(paragraph.text):
|
||
break
|
||
|
||
new_doc.save(destination_path) # 保存新文档
|
||
|
||
|
||
# 调用函数
|
||
if __name__ == '__main__':
|
||
source_path = "C:\\Users\\Administrator\\Desktop\\货物标\\output\\zbtest13.docx"
|
||
copy_docx(source_path)
|