2024-10-10 11:52:37 +08:00

99 lines
3.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import docx
from spire.doc import *
from spire.doc.common import *
def split_and_clean_docx(input_file, output_folder):
"""
拆分指定的Word文档为多个节并去除文档中的水印。
参数:
input_file (str): 需要拆分的源Word文档路径。
output_folder (str): 拆分后的文档输出目录。
"""
# 检查输出目录是否存在,如果不存在则创建
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# 加载源文档
with Document() as document:
document.LoadFromFile(input_file)
# 遍历文档中的所有节
for sec_index in range(document.Sections.Count):
# 访问当前节
section = document.Sections[sec_index]
# 为当前节创建一个新文档
with Document() as new_document:
# 将当前节复制到新文档
new_document.Sections.Add(section.Clone())
# 复制源文档的主题和样式到新文档以确保格式一致
document.CloneThemesTo(new_document)
document.CloneDefaultStyleTo(new_document)
# 将新文档保存为单独的文件
output_file = os.path.join(output_folder, f"{sec_index + 1}.docx")
new_document.SaveToFile(output_file, FileFormat.Docx2016)
# 去除水印
remove_watermark(output_folder)
print("文档拆分并去除水印完成!")
def remove_watermark(output_folder):
"""
去除指定目录下所有docx文件中的水印。
参数:
output_folder (str): 需要去除水印的文档所在的文件夹。
"""
bookmark_name = "Evaluation Warning: The document was created with Spire.Doc for Python."
for file_name in os.listdir(output_folder):
if file_name.endswith(".docx"):
doc_path = os.path.join(output_folder, file_name)
doc = docx.Document(doc_path)
# 查找并替换水印内容
for paragraph in doc.paragraphs:
if bookmark_name in paragraph.text:
# 替换水印文本为空
inline = paragraph.runs
for i in range(len(inline)):
if bookmark_name in inline[i].text:
inline[i].text = inline[i].text.replace(bookmark_name, "")
break
# 保存文档
doc.save(doc_path)
import os
def test_split_and_clean_docx():
"""
测试文档拆分和水印去除功能。
"""
input_file = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\2-招标文件.docx" # 替换为你的源文档路径
output_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\zboutpub" # 指定输出文件夹
# 执行拆分和去除水印
split_and_clean_docx(input_file, output_folder)
# 验证输出文件是否生成
if os.path.exists(output_folder):
print("测试通过: 输出文件夹已生成。")
output_files = os.listdir(output_folder)
if output_files:
print(f"测试通过: 生成了 {len(output_files)} 个文档。")
else:
print("测试失败: 没有生成任何文档。")
else:
print("测试失败: 输出文件夹未生成。")
if __name__ == "__main__":
test_split_and_clean_docx()