9.3

2024-09-03 09:36:18 +08:00 · 2024-09-03 09:36:18 +08:00 · ee16d5e4b4
commit ee16d5e4b4
parent 1d0211ce72
11 changed files with 194 additions and 76 deletions
--- a/flask_app/main/docx截取docx.py
+++ b/flask_app/main/docx截取docx.py
@ -0,0 +1,50 @@
+from docx import Document
+import re
+import os
+
+
+def copy_docx(source_path):
+    doc = Document(source_path)  # 打开源文档
+    output_folder = os.path.dirname(source_path)
+
+    # 获取原文件名并添加后缀
+    original_file_name = os.path.basename(source_path)
+    file_name_without_ext, file_ext = os.path.splitext(original_file_name)
+    modified_file_name = file_name_without_ext + "_invalid" + file_ext
+    destination_path = os.path.join(output_folder, modified_file_name)
+
+    new_doc = Document()  # 创建新文档
+
+    # 定义正则表达式模式
+    begin_pattern = re.compile(r'第[一二三四五六七八九十]+章\s*招标公告|第一卷')
+    end_pattern = re.compile(r'第[一二三四五六七八九十]+章\s*合同|：清标报告|:清标报告')
+
+    # 寻找最后一个begin_pattern的位置
+    last_begin_index = -1
+    for i, paragraph in enumerate(doc.paragraphs):
+        if begin_pattern.search(paragraph.text):
+            last_begin_index = i
+
+    # 从最后一个匹配的begin_pattern开始复制，直到end_pattern
+    if last_begin_index != -1:
+        for i, paragraph in enumerate(doc.paragraphs[last_begin_index:], start=last_begin_index):
+            new_para = new_doc.add_paragraph(style=paragraph.style)
+            for run in paragraph.runs:
+                new_run = new_para.add_run(run.text)
+                new_run.bold = run.bold
+                new_run.italic = run.italic
+                new_run.underline = run.underline
+                if run.font.color:
+                    new_run.font.color.rgb = run.font.color.rgb
+                new_run.font.size = run.font.size
+
+            if end_pattern.search(paragraph.text):
+                break
+
+    new_doc.save(destination_path)  # 保存新文档
+
+
+# 调用函数
+if __name__ == '__main__':
+    source_path = "C:\\Users\\Administrator\\Desktop\\货物标\\output\\zbtest13.docx"
+    copy_docx(source_path)
--- a/flask_app/main/format_change.py
+++ b/flask_app/main/format_change.py
@ -58,7 +58,7 @@ def docx2pdf(local_path_in):

 if __name__ == '__main__':
    # 替换为你的文件路径和API URL
-    local_path_in="C:\\Users\\Administrator\\Desktop\\招标文件\\zbtest16_invalid.docx"
+    local_path_in="C:\\Users\\Administrator\\Desktop\\招标文件\\output1\\test111.pdf"
    # pdf2docx(local_path_in)
    downloaded_file=docx2pdf(local_path_in)
    print(downloaded_file)
--- a/flask_app/main/ttt.py
+++ b/flask_app/main/ttt.py
@ -1,18 +1,35 @@
 import re
+import ast

-# 正则表达式
-pattern = re.compile(r'第[一二三四五六七八九十百千]+章.*?(?:项目|服务|商务).*?要求')
+def process_string_list(string_list):
+    # 使用正则表达式匹配方括号内的内容
+    match = re.search(r'\[(.*?)\]', string_list)
+    if match:
+        # 获取匹配的内容，即方括号内的部分
+        content_inside_brackets = match.group(1)
+        if content_inside_brackets:  # 检查内容是否为空
+            # 检查内容是否是数字列表
+            if all(item.strip().isdigit() for item in content_inside_brackets.split(',')):
+                # 如果是数字，不用加引号，直接保留数字
+                formatted_list = '[' + ', '.join(item.strip() for item in content_inside_brackets.split(',') if item.strip()) + ']'
+            else:
+                # 如果不全是数字，按字符串处理
+                formatted_list = '[' + ', '.join(f"'{item.strip()}'" for item in content_inside_brackets.split(',') if item.strip()) + ']'
+        else:
+            return []  # 直接返回空列表如果内容为空

-# 示例文本进行测试
-text = """
-第一章项目技术、服务及商务要求
-第二章 服务细节要求
-第三章 商务处理要求
-第四章 项目安排要求
-第五章 安全要求
-"""
+        # 使用 ast.literal_eval 来解析格式化后的字符串
+        try:
+            actual_list = ast.literal_eval(formatted_list)
+            return actual_list
+        except SyntaxError as e:
+            print(f"Error parsing list: {e}")
+            return []
+    else:
+        # 如果没有匹配到内容，返回空列表
+        return []

-# 查找所有匹配
-matches = pattern.findall(text)
-for match in matches:
-    print(match)
+# 测试代码
+test_string = "[1,2,哈哈]"
+result = process_string_list(test_string)
+print(result)  # 现在应该输出: [1, 2, 4, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22]
--- a/flask_app/main/截取pdf.py
+++ b/flask_app/main/截取pdf.py
@ -9,8 +9,9 @@ def clean_page_numbers(text):
    # 删除结尾的页码
    cleaned_text = re.sub(r'\s+\d+\s*$', '', cleaned_text)
    # 删除形如 /129 的页码
-    cleaned_text = re.sub(r'\s*\/\s*\d+\s*', '', cleaned_text)
+    cleaned_text = re.sub(r'\s*\/\s*\d+\\s*', '', cleaned_text)
    return cleaned_text
+
 def extract_pages(pdf_path, output_folder, chapter_pattern, begin_page, end_phrase_pattern, output_suffix):
    # 打开PDF文件
    pdf_document = PdfReader(pdf_path)
@ -38,7 +39,6 @@ def extract_pages(pdf_path, output_folder, chapter_pattern, begin_page, end_phra
                    end_page = i
                    break

-
    # 确保找到了起始和结束页面
    if start_page is None or end_page is None:
        print(f"未找到起始或结束页在文件 {pdf_path} 中！")
@ -111,6 +111,14 @@ def truncate_pdf_main(input_path, output_folder, selection):
        ]
        output_suffix = "tobidders_notice"
    elif selection==4:
+        # 配置用于 "资格审查条件" 的正则表达式模式和短语
+        appendix_pattern = r'^附录(?:一)?[：:]|^附件(?:一)?[：:]|^附表(?:一)?[：:]'
+        pattern = re.compile(appendix_pattern)
+        begin_page = 5
+        end_phrases = [r'评标办法正文', r'评标办法', appendix_pattern]
+        output_suffix = "qualification"
+    elif selection == 5:
+        # 配置用于 "无效标" 的正则表达式模式和短语
        pattern = re.compile(r'第[一二三四五六七八九十]+章\s*招标公告|第一卷|招标编号:|招标编号：')
        begin_page = 0
        end_phrases = [
@ -118,13 +126,7 @@ def truncate_pdf_main(input_path, output_folder, selection):
            r'：清标报告',# 添加了新的匹配项
            r':清标报告'
        ]
-        output_suffix="invalid"
-    elif selection==5:
-        appendix_pattern = r'^附录(?:一)?[：:]|^附件(?:一)?[：:]|^附表(?:一)?[：:]'
-        pattern = re.compile(appendix_pattern)
-        begin_page=5
-        end_phrases = [r'评标办法正文', r'评标办法',appendix_pattern]
-        output_suffix="qualification"
+        output_suffix = "invalid"
    else:
        print("无效的选择")
        return None
@ -134,16 +136,15 @@ def truncate_pdf_main(input_path, output_folder, selection):

 def truncate_pdf_multiple(input_path, output_folder):
    truncate_files = []
-    for selection in range(1, 6):
+    for selection in range(1, 5):
        files = truncate_pdf_main(input_path, output_folder, selection)
        truncate_files.extend(files)
    return truncate_files

 if __name__ == "__main__":
-    input_path = "C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\zbtest5.pdf"
+    input_path = "C:\\Users\\Administrator\\Desktop\\招标文件\\招标03.pdf"
    output_folder = "C:\\Users\\Administrator\\Desktop\\招标文件\\test"
-    truncate_pdf_multiple(input_path,output_folder)
-    # selection = 5  # 例如：1 - 投标人须知前附表, 2 - 评标办法, 3 - 投标人须知正文 4-招标公告-合同条款前
-    # generated_files = truncate_pdf_main(input_path, output_folder, selection)
+    # truncate_pdf_multiple(input_path,output_folder)
+    selection = 5  # 例如：1 - 投标人须知前附表, 2 - 评标办法, 3 - 投标人须知正文 4-资格审查条件
+    generated_files = truncate_pdf_main(input_path, output_folder, selection)
    # print("生成的文件:", generated_files)
-
--- a/flask_app/main/投标人须知正文条款提取成json文件.py
+++ b/flask_app/main/投标人须知正文条款提取成json文件.py
@ -33,7 +33,8 @@ def extract_section(text, start_keyword, end_phrases):

    end_index = len(text)
    for phrase in end_phrases:
-        match = re.search(phrase, text[start_index:])
+        # Use multiline mode with `re.MULTILINE`
+        match = re.search(phrase, text[start_index:], re.MULTILINE)   #Hello, world!\nWelcome to OpenAI. 在多行字符串多，要 re.MULTILINE以匹配每一行的开头，否则只会匹配字符串的开头。
        if match:
            end_index = start_index + match.start()
            break
@ -118,10 +119,8 @@ def convert_to_json(file_path, start_word, end_phrases):
        text = extract_text_from_pdf(file_path)
    else:
        raise ValueError("Unsupported file format")
-
    # 提取从 start_word 开始到 end_phrases 结束的内容
    text = extract_section(text, start_word, end_phrases)
-
    parsed_data = parse_text_by_heading(text)
    return parsed_data

@ -138,13 +137,13 @@ def convert_clause_to_json(input_path,output_folder):
    return output_path

 if __name__ == "__main__":
-    file_path = 'C:\\Users\\Administrator\\Desktop\\fsdownload\\temp6\\48e650ff-70eb-48df-874c-66c8abbcd89d\\ztbfile_tobidders_notice.pdf'
+    file_path = 'C:\\Users\\Administrator\\Desktop\\fsdownload\\temp6\\713022ff-27d9-43e3-9cc9-2752effbfd66\\ztbfile_tobidders_notice.pdf'
    start_word = "投标人须知正文"
    end_phrases = [
        r'^第[一二三四五六七八九十]+章\s+评标办法', r'^评标办法前附表', r'^附录：', r'^附录一：', r'^附件：', r'^附件一：',
        r'^附表：', r'^附表一：', r'^附录:', r'^附录一:', r'^附件:', r'^附件一:', r'^附表:', r'^附表一:',
    ]
-    output_folder = 'C:\\Users\\Administrator\\Desktop\\招标文件'
+    output_folder = 'C:\\Users\\Administrator\\Desktop\\fsdownload\\temp6\\713022ff-27d9-43e3-9cc9-2752effbfd66'
    try:
        output_path = convert_clause_to_json(file_path,output_folder)
        print(f"Final JSON result saved to: {output_path}")
--- a/flask_app/main/招标文件解析.py
+++ b/flask_app/main/招标文件解析.py
--- a/flask_app/main/无效标和废标和禁止投标整合.py
+++ b/flask_app/main/无效标和废标和禁止投标整合.py
@ -7,6 +7,7 @@ from flask_app.main.json_utils import combine_json_results, nest_json_under_key
 from flask_app.main.通义千问long import upload_file, qianwen_long
 from concurrent.futures import ThreadPoolExecutor
 from flask_app.main.禁止投标情形 import find_forbidden
+from 禁止投标情形 import process_string_list

 #如果当前段落有序号，则向下匹配直接遇到相同的序号样式
 #如果当前段落无序号，则向下匹配序号，把若干同类的序号都摘出来。
@ -85,7 +86,7 @@ def clean_dict_datas(extracted_contents, keywords,excludes):    #让正则表达
                if any(exclude in data for exclude in excludes):
                    continue  # 如果包含任何排除字符串，跳过这个数据
                # 去掉开头的序号，包括字母+数字的格式 以及括号+数字
-                pattern = r'^\s*([（(]\d+[)）]|[A-Za-z]?\d+(\.\d+)*(\s|\.|、)?)'
+                pattern = r'^\s*([（(]\d+[)）]|[A-Za-z]?\d+\s*(\.\s*\d+)*(\s|\.|、)?)'
                data = re.sub(pattern, '', data).strip()
                keyword_match = re.search(keywords, data)
                if keyword_match:
@ -106,14 +107,14 @@ def clean_dict_datas(extracted_contents, keywords,excludes):    #让正则表达
                all_texts1.append(cleaned_text)  # 将处理后的文本添加到结果列表

        else:
-            pattern = r'^\s*([（(]\d+[)）]|[A-Za-z]?\d+(\.\d+)*(\s|\.|、)?)'
+            pattern = r'^\s*([（(]\d+[)）]|[A-Za-z]?\d+\s*(\.\s*\d+)*(\s|\.|、)?)'
            data = re.sub(pattern, '', text_list[0]).strip()
            # 将修改后的第一个元素和剩余的元素连接起来
            text_list[0] = data  # 更新列表中的第一个元素
            joined_text = "\n".join(text_list)  # 如果列表中有多个元素，则连接它们
            all_texts2.append(joined_text)  # 将每个列表的内容添加到 all_texts 中

-    return all_texts1,all_texts2
+    return all_texts1,all_texts2        #all_texts1要额外用gpt   all_text2直接返回结果
 def find_sentences_with_keywords(data, keywords, follow_up_keywords):
    """递归查找并返回包含关键词的句子列表，并根据是否存在后续关键词分别存储到两个列表中。"""
    sentences1 = []  # 保存没有后续关键词的情况
@ -236,8 +237,9 @@ def handle_query(file_path, user_query, output_file, result_key, keywords, trunc
        print("starting qianwen-long...")
        qianwen_ans = qianwen_long(file_id, user_query)
        selected_contents = []
-        num_list = json.loads(qianwen_ans)
+        num_list = process_string_list(qianwen_ans)
        print(num_list)
+
        for index in num_list:
            if index - 1 < len(qianwen_txt):
                content = qianwen_txt[index - 1]  # 转换序号为索引（假设序号从1开始）
@ -253,14 +255,14 @@ def handle_query(file_path, user_query, output_file, result_key, keywords, trunc
        res = {result_key: ""}  # Set the response to empty if no contents were extracted
    return res

-def combine_find_invalid(file_path, output_dir, truncate_json_path,clause_path,truncate4):
+def combine_find_invalid(file_path, output_dir, truncate_json_path,clause_path,truncate3):
    print("starting无效标与废标...")
    queries = [
        (r'否\s*决|无\s*效\s*投\s*标|被\s*拒\s*绝|予\s*以\s*拒\s*绝|投\s*标\s*失\s*效|投\s*标\s*无\s*效',
-         "以上是从招标文件中摘取的内容，文本内之间的信息以'...............'分割，请你根据该内容回答：否决投标或拒绝投标或无效投标或投标失效的情况有哪些？文本中可能存在无关的信息，请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果，x为符合的信息的序号。",
+         "以上是从招标文件中摘取的内容，文本内之间的信息以'...............'分割，请你根据该内容回答：否决投标或拒绝投标或无效投标或投标失效的情况有哪些？文本中可能存在无关的信息，请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果，x为符合的信息的序号，若情况不存在，返回[]。",
         os.path.join(output_dir, "temp1.txt"), "否决和无效投标情形"),
        (r'废\s*标',
-         "以上是从招标文件中摘取的内容，文本内之间的信息以'...............'分割，请你根据该内容回答：废标项的情况有哪些？文本中可能存在无关的信息，请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果，x为符合的信息的序号。",
+         "以上是从招标文件中摘取的内容，文本内之间的信息以'...............'分割，请你根据该内容回答：废标项的情况有哪些？文本中可能存在无关的信息，请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果，x为符合的信息的序号，若情况不存在，返回[]。",
         os.path.join(output_dir, "temp2.txt"), "废标项")
    ]
    results = []
@ -277,7 +279,9 @@ def combine_find_invalid(file_path, output_dir, truncate_json_path,clause_path,t
        for future in futures:
            results.append(future.result())

-    forbidden_res = find_forbidden(truncate_json_path, clause_path, truncate4)
+    #禁止投标
+    print("starting不得存在的情形...")
+    forbidden_res = find_forbidden(truncate_json_path, clause_path, truncate3)
    results.append(forbidden_res)

    combined_dict = {}
@ -288,15 +292,15 @@ def combine_find_invalid(file_path, output_dir, truncate_json_path,clause_path,t
    return nest_json_under_key(combined_dict, "无效标与废标项")


-#TODO:1.运行时间约80s，如果成为短板需要优化多线程  2.没有提取评标办法前附表中的表格 3.提取表格时根据中文的句号分割 4.qianwen-long存在bug
+#TODO:1.运行时间约80s，如果成为短板需要优化多线程
 if __name__ == '__main__':
    start_time = time.time()
    truncate_json_path = "C:\\Users\\Administrator\\Desktop\\招标文件\\truncate_output.json"
    clause_path="C:\\Users\\Administrator\\Desktop\\招标文件\\clause.json"
-    truncate4="C:\\Users\\Administrator\\Desktop\\招标文件\\zbtest12_qualification.pdf"
-    output_dir = "C:\\Users\\Administrator\\Desktop\\招标文件"
-    doc_path = 'C:\\Users\\Administrator\\Desktop\\招标文件\\招标03.docx'
-    results = combine_find_invalid(doc_path, output_dir,truncate_json_path,clause_path,truncate4)
+    truncate3="C:\\Users\\Administrator\\Desktop\\招标文件\\zbtest12_qualification.pdf"
+    output_dir = "C:\\Users\\Administrator\\Desktop\\货物标\\output"
+    doc_path = 'C:\\Users\\Administrator\\Desktop\\货物标\\output\\zbtest11_invalid.docx'
+    results = combine_find_invalid(doc_path, output_dir,truncate_json_path,clause_path,truncate3)
    end_time = time.time()
    print("Elapsed time:", str(end_time - start_time))
    print("Results:", results)
--- a/flask_app/main/禁止投标情形.py
+++ b/flask_app/main/禁止投标情形.py
@ -105,10 +105,16 @@ def process_string_list(string_list):
        # 获取匹配的内容，即方括号内的部分
        content_inside_brackets = match.group(1)
        if content_inside_brackets:  # 检查内容是否为空
-            # 将每个项目用引号包裹，并确保适当的空格和逗号处理
-            formatted_list = '[' + ', '.join(f"'{item.strip()}'" for item in content_inside_brackets.split(',') if item.strip()) + ']'
+            # 检查内容是否是数字列表
+            if all(item.strip().isdigit() for item in content_inside_brackets.split(',')):
+                # 如果是数字，不用加引号，直接保留数字
+                formatted_list = '[' + ', '.join(item.strip() for item in content_inside_brackets.split(',') if item.strip()) + ']'
+            else:
+                # 如果不全是数字，按字符串处理
+                formatted_list = '[' + ', '.join(f"'{item.strip()}'" for item in content_inside_brackets.split(',') if item.strip()) + ']'
        else:
            return []  # 直接返回空列表如果内容为空
+
        # 使用 ast.literal_eval 来解析格式化后的字符串
        try:
            actual_list = ast.literal_eval(formatted_list)
@ -123,18 +129,17 @@ def find_forbidden(truncate_json_path,clause_path,truncate4):    #投标人须
    # output_filename="merged.pdf"
    # paths=[truncate1,truncate4]
    # merged_filepath=merge_pdfs(paths,output_filename)        #暂时废弃，评分前附表中的在'否决投标'中摘录了。
+
    file_id=upload_file(truncate4)
-    #user_query_forbidden = "该招标文件规定的投标人不得存在的其他情形有哪些，请按json列表格式给我提供信息，键名为'不得存在的其他情形'，请你不要回答有关\"信誉要求\"的内容,若文件中未说明，请在键值中填'未知'。"
+    # user_query_forbidden = "该招标文件规定的投标人不得存在的其他情形有哪些，请按json列表格式给我提供信息，键名为'不得存在的其他情形'，请你不要回答有关\"信誉要求\"的内容,若文件中未说明，请在键值中填'未知'。"
    user_query_forbidden = "该招标文件规定的投标人不得存在的其他情形有哪些，请以列表给我提供信息，形如[xx,xx,...],请你不要回答有关\"信誉要求\"的内容,若原文未提及，返回[]。"
    qianwen_forbidden_str = qianwen_long(file_id, user_query_forbidden)
-    print(qianwen_forbidden_str)
-    actual_list=process_string_list(qianwen_forbidden_str)
-    print(actual_list)
+    actual_list=process_string_list(qianwen_forbidden_str)    #提取出字符串列表 ["xxx","xx"]

    includes = ["不得存在", "禁止投标"]
    forbidden_results = extract_and_format_from_paths([truncate_json_path, clause_path], includes)
    processed_results = extract_unique_items_from_texts(forbidden_results)
-    print(processed_results)
+    # print(processed_results)
    merged_forbidden_list = list(dict.fromkeys(actual_list + processed_results))
    forbidden_dict={'不得存在的其他情形':merged_forbidden_list}

@ -142,9 +147,9 @@ def find_forbidden(truncate_json_path,clause_path,truncate4):    #投标人须


 if __name__ == '__main__':
-    truncate_json_path = "C:\\Users\\Administrator\\Desktop\\招标文件\\truncate_output.json"
-    clause_path = "C:\\Users\\Administrator\\Desktop\\招标文件\\clause.json"
-    truncate4 = "C:\\Users\\Administrator\\Desktop\\招标文件\\zbtest12_qualification.pdf"
-    output_dir = "C:\\Users\\Administrator\\Desktop\\招标文件"
-    doc_path = 'C:\\Users\\Administrator\\Desktop\\招标文件\\招标03.docx'
+    truncate_json_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\temp7\\3abb6e16-19db-42ad-9504-53bf1072dfe7\\truncate_output.json"
+    clause_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\temp7\\3abb6e16-19db-42ad-9504-53bf1072dfe7\\clause.json"
+    truncate4 = "C:\\Users\\Administrator\\Desktop\\fsdownload\\temp7\\3abb6e16-19db-42ad-9504-53bf1072dfe7\\ztbfile_qualification.pdf"
+    output_dir = "C:\\Users\\Administrator\\Desktop\\fsdownload\\temp7\\3abb6e16-19db-42ad-9504-53bf1072dfe7"
+    doc_path = 'C:\\Users\\Administrator\\Desktop\\fsdownload\\temp7\\3abb6e16-19db-42ad-9504-53bf1072dfe7\\ztbfile.docx'
    find_forbidden(truncate_json_path,clause_path,truncate4)
--- a/flask_app/main/资格审查模块.py
+++ b/flask_app/main/资格审查模块.py
@ -8,7 +8,7 @@ from flask_app.main.资格评审 import process_qualification
 from flask_app.main.通义千问long import upload_file, qianwen_long


-def combine_review_standards(truncate1,truncate4,knowledge_name,truncate0_jsonpath,clause_path):   #评标办法前附表
+def combine_review_standards(truncate1,truncate3,knowledge_name,truncate0_jsonpath,clause_path):   #评标办法前附表
    # 形式评审、响应评审:千问
    print("starting形式响应评审...")
    file_id=upload_file(truncate1)    #评标办法前附表
@ -16,7 +16,7 @@ def combine_review_standards(truncate1,truncate4,knowledge_name,truncate0_jsonpa
    results = qianwen_long(file_id, user_query_1)
    original_dict_data = extract_content_from_json(results)
    qualification_review = original_dict_data.pop('资格评审标准', '默认值或None')
-    final_qualify_json=process_qualification(qualification_review,truncate4,knowledge_name)
+    final_qualify_json=process_qualification(qualification_review,truncate3,knowledge_name)
    form_response_dict=process_reviews(original_dict_data, knowledge_name, truncate0_jsonpath, clause_path)
    print("形式响应评审done")
    form_response_dict.update(final_qualify_json)
--- a/flask_app/main/转化格式/pydocx_p2d.py
+++ b/flask_app/main/转化格式/pydocx_p2d.py
--- a/flask_app/货物标/test.py
+++ b/flask_app/货物标/test.py
@ -0,0 +1,31 @@
+import os
+
+from docx import Document
+from docxcompose.composer import Composer
+
+
+def combine_docx(master, sub,index):
+    if not os.path.exists(sub):  # 待合并文件必须存在
+        return False
+
+    if not master.endswith('.docx') or not sub.endswith('.docx'):  # 主文件必须是docx格式（可以不存在）
+        return False
+
+    if os.path.exists(master):
+        doc_master = Document(master)
+        doc_master.add_page_break()
+        cp = Composer(doc_master)
+        cp.append(Document(sub))
+    else:
+        # master不存在，则sub直接给master
+        doc_master = Document(sub)
+
+    doc_master.save(master)
+    return True
+
+
+if __name__ == '__main__':
+    master = 'C:\\Users\\Administrator\\Desktop\\招标文件\\output1\\test.docx'
+    sub = 'C:\\Users\\Administrator\\Desktop\\招标文件\\output1\\zbfile.docx'
+    index = 2  # 假设你要在第二个元素位置插入
+    combine_docx(master, sub,index)