diff --git a/flask_app/main/docx截取docx.py b/flask_app/main/docx截取docx.py
index bce6e9b..79d9fe6 100644
--- a/flask_app/main/docx截取docx.py
+++ b/flask_app/main/docx截取docx.py
@@ -2,8 +2,6 @@ from docx import Document
 import re
 import os
 
-from flask import g
-
 
 def copy_docx(source_path):
     doc = Document(source_path)  # 打开源文档
@@ -45,7 +43,7 @@ def copy_docx(source_path):
                 break
 
     new_doc.save(destination_path)  # 保存新文档
-    g.logger.info("docx截取docx成功！")
+    print("docx截取docx成功！")
 
 
 # 调用函数
diff --git a/flask_app/main/download.py b/flask_app/main/download.py
index 32bbe92..8aaa520 100644
--- a/flask_app/main/download.py
+++ b/flask_app/main/download.py
@@ -1,7 +1,6 @@
 import requests
 import mimetypes
 
-from flask import g
 
 
 def download_file(url, local_filename):
@@ -29,13 +28,13 @@ def download_file(url, local_filename):
             else:
                 return full_filename,3
     except requests.HTTPError as e:
-        g.logger.error(f"download: HTTP Error: {e}")
+        print(f"download: HTTP Error: {e}")
         return None
     except requests.RequestException as e:
-        g.logger.error(f"download: Error downloading the file: {e}")
+        print(f"download: Error downloading the file: {e}")
         return None
     except Exception as e:
-        g.logger.error(f"download: An error occurred: {e}")
+        print(f"download: An error occurred: {e}")
         return None
 
 if __name__ == '__main__':
diff --git a/flask_app/main/format_change.py b/flask_app/main/format_change.py
index 38a3296..150f80e 100644
--- a/flask_app/main/format_change.py
+++ b/flask_app/main/format_change.py
@@ -1,7 +1,6 @@
 import json
 import os
 import requests
-from flask import g
 
 from flask_app.main.download import download_file
 
@@ -21,14 +20,14 @@ def upload_file(file_path, url):
 
     # 检查响应状态码
     if response.status_code == 200:
-        g.logger.info("format_change 文件上传成功")
+        print("format_change 文件上传成功")
         receive_file_response = response.content.decode('utf-8')
         receive_file_json = json.loads(receive_file_response)
         receive_file_url = receive_file_json["data"]
 
     else:
-        g.logger.info(f"format_change 文件上传失败，状态码: {response.status_code}")
-        g.logger.info(f"format_change {response.text}")
+        print(f"format_change 文件上传失败，状态码: {response.status_code}")
+        print(f"format_change {response.text}")
 
     return receive_file_url
 
@@ -46,7 +45,7 @@ def pdf2docx(local_path_in):
     filename, folder = get_filename_and_folder(local_path_in)  #输入输出在同一个文件夹
     local_path_out=os.path.join(folder,filename)     #输出文件名
     downloaded_filepath,file_type=download_file(receive_download_url, local_path_out)
-    g.logger.info("format_change p2d:have downloaded file to:",downloaded_filepath)
+    print(f"format_change p2d:have downloaded file to: {downloaded_filepath}")
     return downloaded_filepath
 
 def docx2pdf(local_path_in):
@@ -55,7 +54,7 @@ def docx2pdf(local_path_in):
     filename, folder = get_filename_and_folder(local_path_in)  # 输入输出在同一个文件夹
     local_path_out = os.path.join(folder, filename)  # 输出文件名
     downloaded_filepath,file_type = download_file(receive_download_url, local_path_out)
-    g.logger.info("format_change d2p:have downloaded file to:", downloaded_filepath)
+    print(f"format_change d2p:have downloaded file to: {downloaded_filepath}")
     return downloaded_filepath
 
 if __name__ == '__main__':
diff --git a/flask_app/main/json_utils.py b/flask_app/main/json_utils.py
index 6076d51..cb63f52 100644
--- a/flask_app/main/json_utils.py
+++ b/flask_app/main/json_utils.py
@@ -1,9 +1,6 @@
 import json
 import re
 
-from flask import g
-
-
 def extract_content_from_json(json_data):
     """提取 { 和 } 之间的内容，并将其解析为字典"""
     if not json_data.strip():
@@ -14,10 +11,10 @@ def extract_content_from_json(json_data):
             json_data = match.group(0)
             return json.loads(json_data)     #返回字典
         except json.JSONDecodeError as e:
-            g.logger.info(f"json_utils: extract_content_from_json: JSON decode error: {e}")
+            print(f"json_utils: extract_content_from_json: JSON decode error: {e}")
             return {}
     else:
-        g.logger.info("json_utils: extract_content_from_json: No valid JSON content found.")
+        print("json_utils: extract_content_from_json: No valid JSON content found.")
         return {}
 
 def clean_json_string(json_string):
@@ -66,18 +63,18 @@ def add_keys_to_json(target_dict, source_dict):
     dict: 更新后的字典。
     """
     if not target_dict:
-        g.logger.error("json_utils: Error: Target dictionary is empty.")
+        print("json_utils: Error: Target dictionary is empty.")
         return {}
 
     if len(target_dict) != 1:
-        g.logger.error("json_utils: Error: Target dictionary must contain exactly one top-level key.")
+        print("json_utils: Error: Target dictionary must contain exactly one top-level key.")
         return target_dict
 
     # 获取唯一的外层键
     target_key, existing_dict = next(iter(target_dict.items()))
 
     if not isinstance(existing_dict, dict):
-        g.logger.error(f"json_utils: Error: The value under the key '{target_key}' is not a dictionary.")
+        print(f"json_utils: Error: The value under the key '{target_key}' is not a dictionary.")
         return target_dict
 
     # 合并字典
@@ -95,7 +92,7 @@ def rename_outer_key(original_data,new_key):
 
     # 提取原始数据中的唯一外层值（假设只有一个外层键）
     if not original_data or not isinstance(original_data, dict):
-        g.logger.error("json_utils: Error: Invalid input or input is not a dictionary.")  # 如果输入无效或不是字典，则返回空字典
+        print("json_utils: Error: Invalid input or input is not a dictionary.")  # 如果输入无效或不是字典，则返回空字典
         return {}
 
     # 使用 next(iter(...)) 提取第一个键的值
diff --git a/flask_app/main/start_up.py b/flask_app/main/start_up.py
index 04fd137..956c8d4 100644
--- a/flask_app/main/start_up.py
+++ b/flask_app/main/start_up.py
@@ -1,6 +1,5 @@
 import logging
 import shutil
-import sys
 import time
 import uuid
 from datetime import datetime, timedelta
@@ -40,7 +39,8 @@ def before_request():
 def create_logger():
     unique_id = str(uuid.uuid4())
     g.unique_id = unique_id
-    output_folder = f"flask_app/static/output/{unique_id}"
+    # output_folder = f"flask_app/static/output/{unique_id}"
+    output_folder = f"C:/Users/Administrator/Desktop/货物标/zboutpub/{unique_id}"
     os.makedirs(output_folder, exist_ok=True)
     log_filename = "log.txt"
     log_path = os.path.join(output_folder, log_filename)
@@ -59,11 +59,12 @@ def create_logger():
 
 @app.route('/upload', methods=['POST'])
 def zbparse():
+    logger=g.logger
     file_url = validate_request()
     if isinstance(file_url, tuple):  # Check if the returned value is an error response
         return file_url
     try:
-        app.logger.info("starting parsing url:" + file_url)
+        logger.info("starting parsing url:" + file_url)
         final_json_path, output_folder = download_and_process_file(file_url)
         if not final_json_path:
             return jsonify({'error': 'File processing failed'}), 500
@@ -71,7 +72,7 @@ def zbparse():
         # remove_directory(output_folder)  # 然后删除文件夹
         return response  # 最后返回获取的响应
     except Exception as e:
-        app.logger.error('Exception occurred: ' + str(e))  # 使用全局 logger 记录
+        logger.error('Exception occurred: ' + str(e))  # 使用全局 logger 记录
         return jsonify({'error': str(e)}), 500
 
 
@@ -138,7 +139,8 @@ def validate_request():
 def download_and_process_file(file_url):
     logger = g.logger
     unique_id = g.unique_id
-    output_folder = f"flask_app/static/output/{unique_id}"  # 直接使用全局 unique_id 构建路径
+    # output_folder = f"flask_app/static/output/{unique_id}"  # 直接使用全局 unique_id 构建路径
+    output_folder = f"C:/Users/Administrator/Desktop/货物标/zboutpub/{unique_id}"
     filename = "ztbfile"
     downloaded_filename = os.path.join(output_folder, filename)
 
diff --git a/flask_app/main/table_content_extraction.py b/flask_app/main/table_content_extraction.py
index 81dd69e..82dfe48 100644
--- a/flask_app/main/table_content_extraction.py
+++ b/flask_app/main/table_content_extraction.py
@@ -3,7 +3,6 @@ import os
 from docx import Document
 import json
 
-from flask import g
 
 
 def read_tables_from_docx(file_path):
@@ -89,13 +88,13 @@ def save_data_to_json(data, output_folder):
     """将数据保存到JSON文件中."""
     with open(output_filepath, 'w', encoding='utf-8') as file:
         json.dump(data, file, ensure_ascii=False, indent=4)
-    g.logger.info(f"table_content_extraction: The data has been processed and saved to '{output_filepath}'.")
+    print(f"table_content_extraction: The data has been processed and saved to '{output_filepath}'.")
     return output_filepath
 
 
 def extract_tables_main(path, output_folder):
     if not os.path.exists(path):
-        g.logger.error(f"table_content_extraction: The specified file does not exist: {path}")
+        print(f"table_content_extraction: The specified file does not exist: {path}")
         return ""
     # 读取文档表格数据
     table_data = read_tables_from_docx(path)
diff --git a/flask_app/main/基础信息整合.py b/flask_app/main/基础信息整合.py
index 1f82ada..09cf8e9 100644
--- a/flask_app/main/基础信息整合.py
+++ b/flask_app/main/基础信息整合.py
@@ -1,7 +1,5 @@
 import json
 
-from flask import g
-
 from flask_app.main.json_utils import clean_json_string, nest_json_under_key,rename_outer_key, combine_json_results
 from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
 from flask_app.main.判断是否分包等 import judge_whether_main, read_questions_from_judge
@@ -32,9 +30,6 @@ def combine_basic_info(baseinfo_list):
         # 根据检测到的键动态调整 key_groups
     dynamic_key_handling(key_groups, relevant_keys_detected)
 
-    # 打印 key_groups 的内容检查它们是否被正确更新
-    # g.logger.info("Updated key_groups after dynamic handling:")
-
 
         # 使用合并后的字典创建最终输出
     for group_name, keys in key_groups.items():
@@ -82,8 +77,7 @@ def judge_consortium_bidding(baseinfo_list):
 def project_basic_info(knowledge_name,truncate0,output_folder,clause_path):    #投标人须知前附表
     # 调用大模型回答项目基础信息
     baseinfo_list = []
-    # baseinfo_file_path='../static/提示词/前两章提问总结.txt'
-    baseinfo_file_path = 'flask_app/static/提示词/前两章提问总结.txt'  # 替换为你的txt文件路径
+    baseinfo_file_path='flask_app/static/提示词/前两章提问总结.txt'
     questions = read_questions_from_file(baseinfo_file_path)
     res1 = multi_threading(questions, knowledge_name)
     for _, response in res1:  # _占位，代表ques;response[0]也是ques;response[1]是ans
@@ -91,13 +85,12 @@ def project_basic_info(knowledge_name,truncate0,output_folder,clause_path):    #
             if response and len(response) > 1:  # 检查response存在且有至少两个元素
                 baseinfo_list.append(response[1])
             else:
-                g.logger.error(f"基础信息整合： Warning: Missing or incomplete response data for query index {_}.")
+                print(f"基础信息整合： Warning: Missing or incomplete response data for query index {_}.")
         except Exception as e:
-            g.logger.error(f"基础信息整合： Error processing response for query index {_}: {e}")
+            print(f"基础信息整合： Error processing response for query index {_}: {e}")
     # 判断是否分包、是否需要递交投标保证金等
     chosen_numbers, merged = judge_whether_main(truncate0,output_folder)
     baseinfo_list.append(merged)
-    # judge_file_path = '../static/提示词/是否相关问题.txt'
     judge_file_path ='flask_app/static/提示词/是否相关问题.txt'
     judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers)
 
@@ -109,7 +102,7 @@ def project_basic_info(knowledge_name,truncate0,output_folder,clause_path):    #
     file_id=upload_file(truncate0)
     res2 = multi_threading(judge_questions, "",file_id,2)                  #调用千问-long
     if not res2:
-        g.logger.error("基础信息整合： multi_threading errror!")
+        print("基础信息整合： multi_threading errror!")
     else:
         # 打印结果
         for question, response in res2:
diff --git a/flask_app/main/多线程提问.py b/flask_app/main/多线程提问.py
index 1b931e8..7a32b81 100644
--- a/flask_app/main/多线程提问.py
+++ b/flask_app/main/多线程提问.py
@@ -6,7 +6,6 @@ import concurrent.futures
 import time
 
 from dashscope import Assistants, Messages, Runs, Threads
-from flask import g
 from llama_index.indices.managed.dashscope import DashScopeCloudRetriever
 from flask_app.main.通义千问long import qianwen_long, upload_file
 prompt = """
@@ -118,10 +117,10 @@ def pure_assistant():
 
 def llm_call(question, knowledge_name,file_id, result_queue, ans_index, llm_type):
     if llm_type==1:
-        g.logger.info(f"rag_assistant! question:{question}")
+        print(f"rag_assistant! question:{question}")
         assistant = rag_assistant(knowledge_name)
     elif llm_type==2:
-        g.logger.info(f"qianwen_long! question:{question}")
+        print(f"qianwen_long! question:{question}")
         qianwen_res = qianwen_long(file_id,question)
         result_queue.put((ans_index,(question,qianwen_res)))
         return
@@ -131,7 +130,7 @@ def llm_call(question, knowledge_name,file_id, result_queue, ans_index, llm_type
     result_queue.put((ans_index, (question, ans)))  # 在队列中添加索引 (question, ans)
 
 def multi_threading(queries, knowledge_name="", file_id="",llm_type=1):
-    g.logger.info("多线程提问：starting multi_threading...")
+    print("多线程提问：starting multi_threading...")
     result_queue = queue.Queue()
 
     # 使用 ThreadPoolExecutor 管理线程
@@ -150,7 +149,7 @@ def multi_threading(queries, knowledge_name="", file_id="",llm_type=1):
             try:
                 future.result()  # 可以用来捕获异常或确认任务完成
             except Exception as exc:
-                g.logger.error(f"Query {index} generated an exception: {exc}")
+                print(f"Query {index} generated an exception: {exc}")
 
     # 从队列中获取所有结果并按索引排序
     results = [None] * len(queries)
diff --git a/flask_app/main/形式响应评审.py b/flask_app/main/形式响应评审.py
index bc7880a..339bb6e 100644
--- a/flask_app/main/形式响应评审.py
+++ b/flask_app/main/形式响应评审.py
@@ -3,8 +3,6 @@ import re
 import json
 import time
 
-from flask import g
-
 from flask_app.main.多线程提问 import multi_threading
 from flask_app.main.根据条款号整合json import process_and_merge_entries,process_and_merge2
 from flask_app.main.json_utils import extract_content_from_json
@@ -189,9 +187,9 @@ def process_reviews(original_dict_data,knowledge_name, truncate0_jsonpath,clause
                 temp = extract_content_from_json(response[1])
                 first_response_list.append(temp)
             else:
-                g.logger.error(f"形式响应评审：Warning: Missing or incomplete response data for query index {_}.")
+                print(f"形式响应评审：Warning: Missing or incomplete response data for query index {_}.")
         except Exception as e:
-            g.logger.error(f"形式响应评审：Error processing response for query index {_}: {e}")
+            print(f"形式响应评审：Error processing response for query index {_}: {e}")
 
     # Assume JSON file paths are defined or configured correctly
     # print(entries_with_numbers)      #[{'形式评审标准.多标段投标': '3.7.4（5）'}]
diff --git a/flask_app/main/截取pdf.py b/flask_app/main/截取pdf.py
index 083a9ab..55c82f7 100644
--- a/flask_app/main/截取pdf.py
+++ b/flask_app/main/截取pdf.py
@@ -2,43 +2,47 @@ from PyPDF2 import PdfReader, PdfWriter
 import re  # 导入正则表达式库
 import os  # 用于文件和文件夹操作
 
-from flask import g
+def clean_page_content(text, common_header):
+    # 首先删除抬头公共部分
+    if common_header:  # 确保有公共抬头才进行替换
+        for header_line in common_header.split('\n'):
+            if header_line.strip():  # 只处理非空行
+                # 替换首次出现的完整行
+                text = re.sub(r'^' + re.escape(header_line.strip()) + r'\n?', '', text, count=1)
 
+    # 删除页码 eg:89/129   这个代码分三步走可以把89/129完全删除
+    text = re.sub(r'^\s*\d+\s*(?=\D)', '', text)  # 删除开头的页码，仅当紧跟非数字字符时
+    text = re.sub(r'\s+\d+\s*$', '', text)  # 删除结尾的页码
+    text = re.sub(r'\s*\/\s*\d+\s*', '', text)  # 删除形如 /129 的页码
 
+    return text
 def extract_common_header(pdf_path):
     pdf_document = PdfReader(pdf_path)
     headers = []
-    num_pages_to_read = 3  # 预读页数
+    start_page = 4  # 从第5页开始读取，索引为4
+    num_pages_to_read = 3  # 连续读取3页
 
-    for i in range(min(num_pages_to_read, len(pdf_document.pages))):
+    # 确保从第5页开始，且总页数足够
+    for i in range(start_page, min(start_page + num_pages_to_read, len(pdf_document.pages))):
         page = pdf_document.pages[i]
-        text = page.extract_text()
-        if text:  # 确保页面有文本内容
-            first_line = text.strip().split('\n')[0]
-            headers.append(first_line)
+        text = page.extract_text() or ""
+        if text:
+            # 只取每页的前三行
+            first_lines = text.strip().split('\n')[:3]
+            headers.append(first_lines)
 
     if len(headers) < 2:
         return ""  # 如果没有足够的页来比较，返回空字符串
 
-    # 使用set交集来找出公共部分
-    common_header = set(headers[0].split()).intersection(*[set(header.split()) for header in headers[1:]])
-    common_header = ' '.join(common_header)
-    return common_header
+    # 寻找每一行中的公共部分
+    common_headers = []
+    for lines in zip(*headers):
+        # 在每一行中寻找公共单词
+        common_line = set(lines[0].split()).intersection(*[set(line.split()) for line in lines[1:]])
+        if common_line:
+            common_headers.append(' '.join(common_line))
 
-
-def clean_page_content(text, common_header):
-    # 首先删除抬头公共部分
-    if common_header:  # 确保有公共抬头才进行替换
-        cleaned_text = text.replace(common_header, '', 1)  # 假设抬头出现在文本开头，只替换一次
-    else:
-        cleaned_text = text
-
-    # 删除页码
-    cleaned_text = re.sub(r'^\s*\d+\s*(?=\D)', '', cleaned_text)  # 删除开头的页码，仅当紧跟非数字字符时
-    cleaned_text = re.sub(r'\s+\d+\s*$', '', cleaned_text)  # 删除结尾的页码
-    cleaned_text = re.sub(r'\s*\/\s*\d+\s*', '', cleaned_text)  # 删除形如 /129 的页码
-
-    return cleaned_text
+    return '\n'.join(common_headers)
 
 
 def save_pages_to_new_pdf(pdf_path, output_folder, output_suffix, start_page, end_page):
@@ -62,16 +66,16 @@ def save_pages_to_new_pdf(pdf_path, output_folder, output_suffix, start_page, en
         with open(output_pdf_path, 'wb') as f:
             output_doc.write(f)
 
-        g.logger.info(f"已截取并保存页面从 {start_page + 1} 到 {end_page + 1} 为 {output_pdf_path}")
+        print(f"已截取并保存页面从 {start_page + 1} 到 {end_page + 1} 为 {output_pdf_path}")
     else:
-        g.logger.error("提供的页码范围无效。")
+        print("提供的页码范围无效。")
     return output_pdf_path
 
 
 def extract_pages_twice(pdf_path, output_folder, output_suffix):
     common_header = extract_common_header(pdf_path)
     last_begin_index = 0
-    begin_pattern = re.compile(r'第[一二三四五六七八九十]+章\s*招标公告|第一卷')
+    begin_pattern = re.compile(r'第[一二三四五六七八九十]+章\s*招标公告|第一卷|投标邀请书')
     pdf_document = PdfReader(pdf_path)
     for i, page in enumerate(pdf_document.pages):
         text = page.extract_text()
@@ -108,7 +112,7 @@ def extract_pages_twice(pdf_path, output_folder, output_suffix):
                         break  # 找到结束页后退出循环
 
         if start_page is None or end_page is None:
-            g.logger.error(f"twice: 未找到起始或结束页在文件 {pdf_path} 中！")
+            print(f"twice: 未找到起始或结束页在文件 {pdf_path} 中！")
             return ""
         else:
             return save_pages_to_new_pdf(pdf_path, output_folder, output_suffix, start_page, end_page)
@@ -157,7 +161,7 @@ def extract_pages(pdf_path, output_folder, begin_pattern, begin_page, end_patter
         if output_suffix == "qualification" or output_suffix =="invalid":
             return extract_pages_twice(pdf_path, output_folder, output_suffix)
         else:
-            g.logger.error(f"first: 未找到起始或结束页在文件 {pdf_path} 中！")
+            print(f"first: 未找到起始或结束页在文件 {pdf_path} 中！")
             return ""
     else:
         return save_pages_to_new_pdf(pdf_path, output_folder, output_suffix, start_page, end_page)
@@ -185,7 +189,7 @@ def process_input(input_path, output_folder, begin_pattern, begin_page, end_patt
         if output_pdf_path and os.path.isfile(output_pdf_path):
             return [output_pdf_path]  # 以列表形式返回，以保持一致性
     else:
-        g.logger.error("提供的路径既不是文件夹也不是PDF文件。")
+        print("提供的路径既不是文件夹也不是PDF文件。")
     return []
 
 
@@ -224,7 +228,7 @@ def truncate_pdf_main(input_path, output_folder, selection):
         output_suffix = "qualification"
     elif selection == 5:
         # 配置用于 "招标公告" 的正则表达式模式和短语
-        begin_pattern = re.compile(r'第[一二三四五六七八九十]+章\s*招标公告|第一卷')
+        begin_pattern = re.compile(r'第[一二三四五六七八九十]+章\s*招标公告|第一卷|投标邀请书')
         begin_page = 0
         end_pattern = re.compile(r'第[一二三四五六七八九十]+章\s*投标人须知', re.MULTILINE)
         output_suffix = "notice"
@@ -235,7 +239,7 @@ def truncate_pdf_main(input_path, output_folder, selection):
         end_pattern = re.compile(r'第[一二三四五六七八九十]+章\s*合同|[：:]清标报告|第二卷', re.MULTILINE)
         output_suffix = "invalid"
     else:
-        g.logger.error("无效的选择:请选择1-6")
+        print("无效的选择:请选择1-6")
         return None
 
     # Process the selected input
diff --git a/flask_app/main/投标人须知正文条款提取成json文件.py b/flask_app/main/投标人须知正文条款提取成json文件.py
index 9c27e72..0765ed5 100644
--- a/flask_app/main/投标人须知正文条款提取成json文件.py
+++ b/flask_app/main/投标人须知正文条款提取成json文件.py
@@ -4,8 +4,6 @@ import fitz
 import re
 import os
 
-from flask import g
-
 
 def extract_text_from_docx(file_path):
     doc = docx.Document(file_path)
@@ -129,7 +127,7 @@ def convert_to_json(file_path, start_word, end_phrases):
 
 def convert_clause_to_json(input_path,output_folder,type=1):
     if not os.path.exists(input_path):
-        g.logger.error(f"The specified file does not exist: {input_path}")
+        print(f"The specified file does not exist: {input_path}")
         return ""
     if type==1:
         start_word = "投标人须知正文"
diff --git a/flask_app/main/招标文件解析.py b/flask_app/main/招标文件解析.py
index a427932..c600725 100644
--- a/flask_app/main/招标文件解析.py
+++ b/flask_app/main/招标文件解析.py
@@ -4,8 +4,6 @@ import logging
 import os
 import time
 
-from flask import g
-
 from flask_app.main.截取pdf import truncate_pdf_multiple
 from flask_app.main.table_content_extraction import extract_tables_main
 from flask_app.main.知识库操作 import addfileToKnowledge, deleteKnowledge
@@ -20,16 +18,17 @@ from flask_app.main.商务标技术标整合 import combine_evaluation_standards
 from flask_app.main.format_change import pdf2docx, docx2pdf
 from flask_app.main.docx截取docx import copy_docx
 
-# def get_global_logger(unique_id):
-#     if unique_id is None:
-#         return logging.getLogger()  # 获取默认的日志器
-#     logger = logging.getLogger(unique_id)
-#     return logger
+def get_global_logger(unique_id):
+    if unique_id is None:
+        return logging.getLogger()  # 获取默认的日志器
+    logger = logging.getLogger(unique_id)
+    return logger
 
+logger=None
 
 # 可能有问题：pdf转docx导致打勾符号消失
 def preprocess_files(output_folder, downloaded_file_path, file_type, unique_id):
-    g.logger.info("starting 文件预处理...")
+    logger.info("starting 文件预处理...")
     # 根据文件类型处理文件路径
     if file_type == 1:  # docx
         docx_path = downloaded_file_path
@@ -39,7 +38,7 @@ def preprocess_files(output_folder, downloaded_file_path, file_type, unique_id):
         docx_path = pdf2docx(pdf_path)  # 将pdf转换为docx以供上传到知识库
     else:
         # 如果文件类型不是预期中的1或2，记录错误并返回None
-        g.logger.error("Unsupported file type provided. Preprocessing halted.")
+        logger.error("Unsupported file type provided. Preprocessing halted.")
         return None
 
     # 上传知识库
@@ -59,7 +58,7 @@ def preprocess_files(output_folder, downloaded_file_path, file_type, unique_id):
     truncate1 = truncate_files[1]     #评标办法前附表
     truncate3 = truncate_files[3]     #资格审查表
     clause_path = convert_clause_to_json(truncate_files[2], output_folder)  # 投标人须知正文条款pdf->json
-    g.logger.info("文件预处理done")
+    logger.info("文件预处理done")
 
     return {
         'input_file_path':downloaded_file_path,
@@ -94,59 +93,59 @@ def post_processing(data,includes):
     return result
 # 基本信息
 def fetch_project_basic_info(knowledge_name, truncate0, output_folder, clause_path):  # 投标人须知前附表
-    g.logger.info("starting基础信息...")
+    logger.info("starting基础信息...")
     basic_res = project_basic_info(knowledge_name, truncate0, output_folder, clause_path)
-    g.logger.info("基础信息done")
+    logger.info("基础信息done")
     return basic_res
 
 
 # 形式、响应、资格评审
 def fetch_review_standards(truncate1, truncate3, knowledge_name, truncate0_jsonpath, clause_path,input_file,output_folder):
-    g.logger.info("starting资格审查...")
+    logger.info("starting资格审查...")
     review_standards_res = combine_review_standards(truncate1, truncate3, knowledge_name, truncate0_jsonpath,
                                                     clause_path,input_file,output_folder)
-    g.logger.info("资格审查done")
+    logger.info("资格审查done")
     return review_standards_res
 
 
 # 评分细则
 def fetch_evaluation_standards(truncate1):  # 评标办法前附表
-    g.logger.info("starting商务标技术标...")
+    logger.info("starting商务标技术标...")
     evaluation_standards_res = combine_evaluation_standards(truncate1)
-    g.logger.info("商务标技术标done")
+    logger.info("商务标技术标done")
     return evaluation_standards_res
 
 
 # 无效、废标项解析
 def fetch_invalid_requirements(invalid_docpath, output_folder, truncate0_jsonpath, clause_path, truncate3):
     # 废标项要求：千问
-    g.logger.info("starting无效标与废标...")
+    logger.info("starting无效标与废标...")
     find_invalid_res = combine_find_invalid(invalid_docpath, output_folder, truncate0_jsonpath, clause_path, truncate3)
-    g.logger.info("无效标与废标done...")
+    logger.info("无效标与废标done...")
     return find_invalid_res
 
 
 # 投标文件要求
 def fetch_bidding_documents_requirements(clause_path):
-    g.logger.info("starting投标文件要求...")
+    logger.info("starting投标文件要求...")
     fetch_bidding_documents_requirements_json = extract_from_notice(clause_path, 1)
     qualify_nested_res = nest_json_under_key(fetch_bidding_documents_requirements_json, "投标文件要求")
-    g.logger.info("投标文件要求done...")
+    logger.info("投标文件要求done...")
     return qualify_nested_res
 
 
 # 开评定标流程
 def fetch_bid_opening(clause_path):
-    g.logger.info("starting开评定标流程...")
+    logger.info("starting开评定标流程...")
     fetch_bid_opening_json = extract_from_notice(clause_path, 2)
     qualify_nested_res = nest_json_under_key(fetch_bid_opening_json, "开评定标流程")
-    g.logger.info("开评定标流程done...")
+    logger.info("开评定标流程done...")
     return qualify_nested_res
 
 
 def main_processing(output_folder, downloaded_file_path, file_type, unique_id):  # file_type=1->docx  file_type=2->pdf
-    # global global_logger
-    # global_logger = get_global_logger(unique_id)
+    global logger
+    logger = get_global_logger(unique_id)
     # Preprocess files and get necessary data paths and knowledge index
     processed_data = preprocess_files(output_folder, downloaded_file_path, file_type, unique_id)
     if not processed_data:
@@ -180,7 +179,7 @@ def main_processing(output_folder, downloaded_file_path, file_type, unique_id):
                 result = futures[key].result()
                 comprehensive_responses.append(result)
             except Exception as exc:
-                g.logger.error(f"Error processing {key}: {exc}")
+                logger.error(f"Error processing {key}: {exc}")
     # 合并 JSON 结果
     combined_final_result = combine_json_results(comprehensive_responses)
     includes = ["基础信息", "资格审查", "商务标", "技术标", "无效标与废标项", "投标文件要求", "开评定标流程"]
@@ -190,7 +189,7 @@ def main_processing(output_folder, downloaded_file_path, file_type, unique_id):
     final_result_path = os.path.join(output_folder, "final_result.json")
     with open(final_result_path, 'w', encoding='utf-8') as file:
         json.dump(modified_json, file, ensure_ascii=False, indent=2)
-        g.logger.info("final_result.json has been saved")
+        logger.info("final_result.json has been saved")
     deleteKnowledge(processed_data['knowledge_index'])
     return final_result_path
 
diff --git a/flask_app/main/按页读取pdf.py b/flask_app/main/按页读取pdf.py
index 8c50dcf..6fe5ce0 100644
--- a/flask_app/main/按页读取pdf.py
+++ b/flask_app/main/按页读取pdf.py
@@ -27,5 +27,5 @@ def extract_text_by_page(file_path):
                 print(f"Page {page_num + 1} is empty or text could not be extracted.")
         return result
 if __name__ == '__main__':
-    file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\output\\ztb_20240903100337\\ztb_20240903100337_14-20.pdf"
+    file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\招标文件正文(1)(1).pdf"
     extract_text_by_page(file_path)
diff --git a/flask_app/main/提取打勾符号.py b/flask_app/main/提取打勾符号.py
index 0ae12fa..625998a 100644
--- a/flask_app/main/提取打勾符号.py
+++ b/flask_app/main/提取打勾符号.py
@@ -2,9 +2,6 @@ import re
 import PyPDF2
 import json
 
-from flask import g
-
-
 def extract_key_value_pairs(text):
     # 更新正则表达式来包括对"团"的处理和行尾斜线
     pattern = r'\d+\.\d+\s*([\w\s（）\u4e00-\u9fff]+)[\x01\x02☑团]([\w\s\u4e00-\u9fff]+)'
@@ -75,7 +72,7 @@ def read_pdf_and_judge_main(file_path, output_json_path):
         with open(output_json_path, "w", encoding="utf-8") as json_file:
             json.dump(all_data, json_file, ensure_ascii=False, indent=4)
 
-        g.logger.info(f"da_gou signal: Data extraction complete and saved to '{output_json_path}'.")
+        print(f"da_gou signal: Data extraction complete and saved to '{output_json_path}'.")
 
 
 if __name__ == "__main__":
diff --git a/flask_app/main/无效标和废标和禁止投标整合.py b/flask_app/main/无效标和废标和禁止投标整合.py
index 87f3760..0f7fab2 100644
--- a/flask_app/main/无效标和废标和禁止投标整合.py
+++ b/flask_app/main/无效标和废标和禁止投标整合.py
@@ -3,9 +3,6 @@ import json
 import os.path
 import time
 import re
-
-from flask import g
-
 from flask_app.main.json_utils import combine_json_results, nest_json_under_key
 from flask_app.main.通义千问long import upload_file, qianwen_long
 from concurrent.futures import ThreadPoolExecutor
@@ -332,7 +329,7 @@ def combine_find_invalid(file_path, output_dir, truncate_json_path,clause_path,t
             results.append(future.result())
 
     #禁止投标
-    g.logger.info("starting不得存在的情形...")
+    print("starting不得存在的情形...")
     forbidden_res = find_forbidden(truncate_json_path, clause_path, truncate3)
     results.append(forbidden_res)
 
@@ -340,7 +337,7 @@ def combine_find_invalid(file_path, output_dir, truncate_json_path,clause_path,t
     for d in results:
         combined_dict.update(d)
 
-    g.logger.info("无效标与废标done...")
+    print("无效标与废标done...")
     return nest_json_under_key(combined_dict, "无效标与废标项")
 
 
diff --git a/flask_app/main/知识库操作.py b/flask_app/main/知识库操作.py
index 1606a7c..f5300d5 100644
--- a/flask_app/main/知识库操作.py
+++ b/flask_app/main/知识库操作.py
@@ -1,7 +1,5 @@
 import os
 import uuid
-
-from flask import g
 from llama_index.readers.dashscope.base import DashScopeParse
 from llama_index.readers.dashscope.utils import ResultType
 from llama_index.indices.managed.dashscope import DashScopeCloudIndex
@@ -16,7 +14,7 @@ def addfileToKnowledge(filepath,knowledge_name):
         knowledge_name,
         verbose=True,
     )
-    g.logger.info("knowledge created successfully!!!")
+    print("knowledge created successfully!!!")
     # index = DashScopeCloudIndex(knowledge_name)
     # index._insert(documents)
     # return index, documents
diff --git a/flask_app/main/禁止投标情形.py b/flask_app/main/禁止投标情形.py
index f16a552..4a7753d 100644
--- a/flask_app/main/禁止投标情形.py
+++ b/flask_app/main/禁止投标情形.py
@@ -4,7 +4,6 @@ import os
 import re
 
 from PyPDF2 import PdfWriter, PdfReader
-from flask import g
 
 from flask_app.main.通义千问long import upload_file, qianwen_long
 
@@ -55,9 +54,9 @@ def extract_and_format_from_paths(json_paths, includes, excludes):
                 # 将当前文件的结果添加到总结果列表
                 all_formatted_results.extend(formatted_results)
         except FileNotFoundError:
-            g.logger.error(f"禁止投标情形: Error: The file '{path}' does not exist.")
+            print(f"禁止投标情形: Error: The file '{path}' does not exist.")
         except json.JSONDecodeError:
-            g.logger.error(f"禁止投标情形: Error: The file '{path}' contains invalid JSON.")
+            print(f"禁止投标情形: Error: The file '{path}' contains invalid JSON.")
 
     return all_formatted_results
 
@@ -126,9 +125,9 @@ def merge_pdfs(paths, output_filename):
     if output_path:
         with open(output_path, 'wb') as out:
             pdf_writer.write(out)
-        g.logger.info(f"禁止投标情形: Merged PDF saved to {output_path}")
+        print(f"禁止投标情形: Merged PDF saved to {output_path}")
     else:
-        g.logger.error("禁止投标情形: No files to merge.")
+        print("禁止投标情形: No files to merge.")
     return output_path
 
 def process_string_list(string_list):
@@ -153,7 +152,7 @@ def process_string_list(string_list):
             actual_list = ast.literal_eval(formatted_list)
             return actual_list
         except SyntaxError as e:
-            g.logger.error(f"禁止投标情形: Error parsing list: {e}")
+            print(f"禁止投标情形: Error parsing list: {e}")
             return []
     else:
         # 如果没有匹配到内容，返回空列表
diff --git a/flask_app/main/读取docx.py b/flask_app/main/读取docx.py
index e037f52..4bf5e91 100644
--- a/flask_app/main/读取docx.py
+++ b/flask_app/main/读取docx.py
@@ -14,5 +14,5 @@ def read_docx(file_path):
 
 
 if __name__ == "__main__":
-    file_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\796f7bb3-2f7a-4332-b044-9d817a07861e\\ztbfile.docx"
+    file_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\招标文件正文(1)(1).docx"
     read_docx(file_path)
diff --git a/flask_app/main/资格评审.py b/flask_app/main/资格评审.py
index 62adad0..5c27e95 100644
--- a/flask_app/main/资格评审.py
+++ b/flask_app/main/资格评审.py
@@ -2,9 +2,6 @@
 #资格审查中，首先排除'联合体投标'和'不得存在的情况',有'符合'等的，加入matching_keys列表，否则保留原字典
 import json
 import re
-
-from flask import g
-
 from flask_app.main.json_utils import clean_json_string, combine_json_results, add_keys_to_json, nest_json_under_key
 from flask_app.main.多线程提问 import multi_threading,read_questions_from_file
 from flask_app.main.通义千问long import upload_file
@@ -19,7 +16,7 @@ def merge_dictionaries_under_common_key(dicts, common_key):
             # 使用字典解包来合并字典
             merged_dict[common_key].update(d[common_key])
         else:
-            g.logger.error(f"资格评审： Warning: Dictionary does not contain the key {common_key}")
+            print(f"资格评审： Warning: Dictionary does not contain the key {common_key}")
 
     return merged_dict
 def generate_qual_question(matching_keys_list):    #这里假设资质、信誉与人员要求 要不都有、要不都没
@@ -73,15 +70,14 @@ def get_consortium_dict(knowledge_name):
             if response and len(response) > 1:  # 检查response存在且有至少两个元素
                 qualify_list.append(response[1])
             else:
-                g.logger.error(f"资格评审： Warning: Missing or incomplete response data for query index {_}.")
+                print(f"资格评审： Warning: Missing or incomplete response data for query index {_}.")
         except Exception as e:
-            g.logger.error(f"资格评审： Error processing response for query index {_}: {e}")
+            print(f"资格评审： Error processing response for query index {_}: {e}")
     consortium_dict = combine_json_results(qualify_list)
     return consortium_dict
 
 def get_all_dict(knowledge_name):
-    # qualification_review_file_path = '../static/提示词/资格评审.txt'  # 替换为你的txt文件路径
-    qualification_review_file_path='flask_app/static/提示词/资格评审问题.txt'
+    qualification_review_file_path='flask_app/static/提示词/资格评审.txt'
     questions = read_questions_from_file(qualification_review_file_path)
     qualification_list = []
     res1 = multi_threading(questions, knowledge_name)
@@ -90,9 +86,9 @@ def get_all_dict(knowledge_name):
             if response and len(response) > 1:  # 检查response存在且有至少两个元素
                 qualification_list.append(response[1])
             else:
-                g.logger.error(f"资格评审： Warning: Missing or incomplete response data for query index {_}.")
+                print(f"资格评审： Warning: Missing or incomplete response data for query index {_}.")
         except Exception as e:
-            g.logger.error(f"资格评审： Error processing response for query index {_}: {e}")
+            print(f"资格评审： Error processing response for query index {_}: {e}")
     qualification_combined_res = combine_json_results(qualification_list)
     return {'资格评审': qualification_combined_res}
 def process_qualification(qualification_review,truncate3,knowledge_name):
@@ -101,14 +97,14 @@ def process_qualification(qualification_review,truncate3,knowledge_name):
     if not matching_keys_list:             #此时要求全部写在评分办法前附表中，不需要额外提取。
         if not non_matching_dict:          #古法提取
             if truncate3!="":
-                g.logger.info("资格评审： type1")
+                print("资格评审： type1")
                 matching_keys_list=["资质条件","财务要求","业绩要求","信誉要求","其他要求"]
                 ques=generate_qual_question(matching_keys_list)
                 file_id2 = upload_file(truncate3)
                 results2 = multi_threading(ques, "", file_id2, 2)  # 资格评审表，调用qianwen-long
                 res_list = []
                 if not results2:
-                    g.logger.error("资格评审： 调用大模型未成功获取资格评审文件中的要求！")
+                    print("资格评审： 调用大模型未成功获取资格评审文件中的要求！")
                 else:
                     # 打印结果
                     for question, response in results2:
@@ -119,11 +115,11 @@ def process_qualification(qualification_review,truncate3,knowledge_name):
                 updated_qualify_json = add_keys_to_json(merged_dict, consortium_dict)  # 合并字典
                 return updated_qualify_json
             else:
-                g.logger.info("资格评审： type2")
+                print("资格评审： type2")
                 return get_all_dict(knowledge_name)
 
         else:
-            g.logger.info("资格评审： type3")
+            print("资格评审： type3")
             new_non_matching_json={'资格评审':non_matching_dict}
             substring = '联合体'
             found_key = any(substring in key for key in non_matching_dict.keys())   #没有联合体投标，则需生成，防止重复
@@ -135,18 +131,18 @@ def process_qualification(qualification_review,truncate3,knowledge_name):
                 return new_non_matching_json
 
     elif matching_keys_list and truncate3=="":    #这种情况是评分办法前附表中有要求，但是没有正确截取到'资格审查表'
-        g.logger.info("资格评审： type4")
+        print("资格评审： type4")
         final_qualification=get_all_dict(knowledge_name)
         final_qualify_json = add_keys_to_json(final_qualification, non_matching_dict)
         return final_qualify_json
     else:                                       #大多数情况
-        g.logger.info("资格评审： type5")
+        print("资格评审： type5")
         user_querys = generate_qual_question(matching_keys_list)  # 生成提问->‘附件：资格审查’
         file_id2 = upload_file(truncate3)
         results2 = multi_threading(user_querys, "", file_id2, 2)  # 资格评审表，调用qianwen-long
         res_list = []
         if not results2:
-            g.logger.error("资格评审： 调用大模型未成功获取资格评审文件中的要求！")
+            print("资格评审： 调用大模型未成功获取资格评审文件中的要求！")
         else:
             # 打印结果
             for question, response in results2:
diff --git a/flask_app/main/转化格式/pypandoc_d2p.py b/flask_app/main/转化格式/pypandoc_d2p.py
new file mode 100644
index 0000000..fd72e5b
--- /dev/null
+++ b/flask_app/main/转化格式/pypandoc_d2p.py
@@ -0,0 +1,9 @@
+import pypandoc
+
+def docx_to_pdf(docx_path, output_pdf_path):
+    output = pypandoc.convert_file(docx_path, 'pdf', outputfile=output_pdf_path)
+    assert output == ""
+
+docx_path = 'C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\招标文件正文(1)(1).docx'
+output_pdf_path = 'C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\output.pdf'
+docx_to_pdf(docx_path, output_pdf_path)
diff --git a/flask_app/货物标/extract_procurement_requirements.py b/flask_app/货物标/extract_procurement_requirements.py
deleted file mode 100644
index 32a0dcb..0000000
--- a/flask_app/货物标/extract_procurement_requirements.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import json
-
-from flask_app.货物标.货物标截取pdf import truncate_pdf_main
-from flask_app.main.format_change import docx2pdf, pdf2docx
-from flask_app.main.多线程提问 import multi_threading
-from flask_app.main.通义千问long import upload_file,qianwen_long
-from flask_app.main.json_utils import clean_json_string,combine_json_results
-
-def generate_key_paths(data, parent_key=''):
-    key_paths = []
-    for key, value in data.items():
-        current_key = f"{parent_key}.{key}" if parent_key else key
-        if isinstance(value, dict):
-            if value:  # 字典非空时，递归处理
-                key_paths.extend(generate_key_paths(value, current_key))
-            else:  # 字典为空时，直接添加键路径
-                key_paths.append(current_key)
-        else:
-            # 如果到达了末端，添加当前键路径
-            key_paths.append(current_key)
-    return key_paths
-
-#获取采购清单
-def fetch_purchasing_list(file_path,output_folder,file_type):
-    global pdf_path,docx_path
-    if file_type==1:
-        docx_path=file_path
-        pdf_path = docx2pdf(file_path)
-    elif file_type==2:
-        pdf_path=file_path
-        docx_path=pdf2docx(file_path)
-    technical_requirements=[]
-    truncate_path=truncate_pdf_main(pdf_path,output_folder,1)
-    user_query1="这是一份货物标中采购要求部分的内容，你需要摘取出需要采购的系统（货物），一个大系统（大项）中可能包含多个小系统（小项），你需要保留这种层次关系，给出货物名称，请以json格式返回，外层键名为\"采购需求\"，嵌套键名为对应的系统名称或货物名称，无需给出采购数量和单位，如有未知内容，在对应键值处填\"未知\"。"
-    file_id=upload_file(truncate_path[0])
-    res=qianwen_long(file_id,user_query1)
-    cleaned_res=clean_json_string(res)
-    keys_list=generate_key_paths(cleaned_res['采购需求'])    #提取需要采购的货物清单
-    user_query_template = "这是一份货物标中采购要求部分的内容，请你给出\"{}\"的具体型号参数要求，请以json格式返回结果，外层键名为\"{}\", 键值对中的键是你对该要求的总结，而值需要完全与原文保持一致，不可擅自总结删减。"
-    queries=[]
-    for key in keys_list:
-        # 替换 user_query2 中的 "网络硬盘录像机" 为当前 key
-        new_query = user_query_template.format(key, key)
-        print(new_query)
-        queries.append(new_query)
-    results=multi_threading(queries,"",file_id,2)
-    if not results:
-        print("errror!")
-    else:
-        # 打印结果
-        for question, response in results:
-            technical_requirements.append(response)
-    technical_requirements_combined_res=combine_json_results(technical_requirements)
-    json_string = json.dumps(technical_requirements_combined_res, ensure_ascii=False, indent=4)
-    print(json_string)
-if __name__ == "__main__":
-    output_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles"
-    file_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\磋商文件.doc"
-    fetch_purchasing_list(file_path,output_folder,1)
-
diff --git a/flask_app/货物标/find_zbfile.py b/flask_app/货物标/find_zbfile.py
index 2561397..c209b28 100644
--- a/flask_app/货物标/find_zbfile.py
+++ b/flask_app/货物标/find_zbfile.py
@@ -12,7 +12,9 @@ def find_and_copy_files(input_folder, output_folder):
     for root, dirs, files in os.walk(input_folder):
         for file in files:
             # 检查文件名是否包含“招标”或“竞争性”并且文件格式正确
-            if ('竞争性' in file or '招标' in file or '磋商' in file) and file.endswith(supported_formats):
+            if ('响应' not in file and '投标' not in file) and \
+                    ('竞争性' in file or '招标文件' in file or '磋商' in file) and \
+                    file.endswith(supported_formats):
                 # 构造完整的文件路径
                 file_path = os.path.join(root, file)
                 # 构造输出路径
diff --git a/flask_app/货物标/test.py b/flask_app/货物标/test.py
index cf8e7a1..14102b9 100644
--- a/flask_app/货物标/test.py
+++ b/flask_app/货物标/test.py
@@ -1,115 +1,33 @@
-import json
-import re
+def postprocess(data):
+    """转换字典中的值为列表，如果所有键对应的值都是'/', '{}' 或 '未知'"""
+    for key, value in data.items():
+        if all(v in ['/', '未知', {}] for v in value.values()):
+            data[key] = list(value.keys())
+    return data
 
+# 示例数据
+data = {
+    "第一包.耗材": {
+        "服务器": "未知",
+        "台式计算机": "未知",
+        "便携式计算机": "/",
+        "信息安全设备": {},
+        "喷墨打印机": "/",
+        "激光打印机": "/",
+        "针式打印机": "/",
+        "液晶显示器": "/",
+        "扫描仪": "/",
+        "基础软件": "/",
+        "信息安全软件": "/",
+        "复印机": "/",
+        "投影仪": "/",
+        "多功能一体机": "/",
+        "触控一体机": "/",
+        "碎纸机": "/",
+        "复印纸": "/"
+    }
+}
 
-def extract_and_format_from_paths(json_paths, includes, excludes):
-    """
-    从多个 JSON 文件路径读取数据，提取包含特定关键词的内容，并按照格式要求合并。
-
-    参数:
-    json_paths (list): 包含多个 JSON 文件路径的列表。
-    includes (list): 包含要检查的关键词的列表。
-    excludes (list): 包含要排除的关键词的列表。
-
-    返回:
-    list: 包含所有文件中满足条件的格式化字符串列表。
-    """
-    all_formatted_results = []
-
-    # 遍历每个文件路径
-    for path in json_paths:
-        try:
-            with open(path, 'r', encoding='utf-8') as file:
-                # 加载 JSON 数据
-                json_data = json.load(file)
-                formatted_results = []
-
-                # 遍历 JSON 数据的每个键值对
-                for key, value in json_data.items():
-                    if isinstance(value, dict):
-                        # 如果值是字典，检查嵌套字典的每个键值对
-                        for sub_key, sub_value in value.items():
-                            if any(include in sub_key for include in includes):
-                                # 如果子值包含关键词，格式化并添加到结果列表
-                                formatted_results.append(f"{sub_value}")
-                    elif isinstance(value, str):  # clause
-                        # 检查是否包含任何 include 关键词
-                        for include in includes:
-                            if include in value:
-                                # 找到 include 之前的内容
-                                prefix = value.split(include)[0]
-                                # 检查 prefix 是否不包含任何 exclude 关键词
-                                if not any(exclude in prefix for exclude in excludes):
-                                    # 如果不包含任何 exclude 关键词，添加整个 value 到结果列表
-                                    if '\n' in value:
-                                        value = value.split('\n', 1)[-1]
-                                    formatted_results.append(value)
-                                    break  # 找到一个符合条件的就跳出循环
-
-                # 将当前文件的结果添加到总结果列表
-                all_formatted_results.extend(formatted_results)
-        except FileNotFoundError:
-            print(f"Error: The file '{path}' does not exist.")
-        except json.JSONDecodeError:
-            print(f"Error: The file '{path}' contains invalid JSON.")
-
-    return all_formatted_results
-
-
-def extract_unique_items_from_texts(texts):
-    # 更新正则表达式以包括更广泛的序号类型，包括中文序号
-    pattern = re.compile(r'(?:\d+\.|\（\d+\）|\(\d+\)|\d+\)|\①|\②|\③|\④|\⑤|\⑥|\⑦|\⑧|\⑨|\⑩|\⑪|\⑫)\s*')
-    intro_pattern = re.compile(r'^.*?[:：]')
-    punctuation_pattern = re.compile(r'[；。，、．.,:;!?！？]+$')
-    url_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
-    content_check_pattern = re.compile(r'[\u4e00-\u9fa5a-zA-Z0-9]{2,}')  # 检查是否含有至少2个连续的字母、数字或汉字
-
-    all_results = []
-    seen = set()
-
-    for text in texts:
-        # 去除文本中的制表符和换行符
-        text = text.replace('\t', '').replace('\n', '')
-
-        # 删除引导性的文本（直到冒号，但保留冒号后的内容）
-        text = intro_pattern.sub('', text)
-
-        # 替换URL为占位符，并保存URL以便后续还原
-        urls = []
-        def url_replacer(match):
-            urls.append(match.group(0))
-            return f"{{URL{len(urls)}}}"
-        text = url_pattern.sub(url_replacer, text)
-
-        # 使用数字和括号的模式分割文本
-        items = pattern.split(text)
-
-        for item in items:
-            cleaned_item = item.strip()
-            if cleaned_item:
-                # 进一步清理每个条目
-                cleaned_item = pattern.sub('', cleaned_item)
-                cleaned_item = punctuation_pattern.sub('', cleaned_item)
-                cleaned_item = cleaned_item.strip()
-
-                # 还原URL
-                for i, url in enumerate(urls, 1):
-                    cleaned_item = cleaned_item.replace(f"{{URL{i}}}", url)
-
-                # 添加未见过的独特条目，确保它包含足够的实质内容并长度大于3个字符
-                if cleaned_item and cleaned_item not in seen and len(cleaned_item) > 3 and content_check_pattern.search(cleaned_item):
-                    seen.add(cleaned_item)
-                    all_results.append(cleaned_item)
-
-    return all_results
-# 使用上面定义的函数
-truncate_json_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\796f7bb3-2f7a-4332-b044-9d817a07861e\\truncate_output.json"
-clause_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\796f7bb3-2f7a-4332-b044-9d817a07861e\\clause1.json"
-json_paths = [truncate_json_path,clause_path]  # 根据实际存放的路径来填写
-includes = ["不得存在","不得与","禁止投标","对投标人的纪律"]
-excludes=["招标","评标","定标"]
-# 调用函数
-results = extract_and_format_from_paths(json_paths, includes,excludes)
-print(results)
-res=extract_unique_items_from_texts(results)
-print(res)
+# 转换字典
+converted_data = postprocess(data)
+print(converted_data)
diff --git a/flask_app/货物标/技术服务商务要求提取.py b/flask_app/货物标/技术服务商务要求提取.py
new file mode 100644
index 0000000..01562fb
--- /dev/null
+++ b/flask_app/货物标/技术服务商务要求提取.py
@@ -0,0 +1,128 @@
+# -*- encoding:utf-8 -*-
+import json
+import os
+
+from flask_app.main.多线程提问 import multi_threading
+from flask_app.main.通义千问long import upload_file, qianwen_long
+from flask_app.main.json_utils import clean_json_string, combine_json_results
+
+
+def generate_key_paths(data, parent_key=''):
+    key_paths = []
+
+    for key, value in data.items():
+        current_key = f"{parent_key}.{key}" if parent_key else key
+        if isinstance(value, dict):
+            if value:
+                # 检查字典中的值是否为字典、列表或字符串'未知'
+                contains_dict_list_or_unknown = any(isinstance(v, (dict, list)) or v == "未知" for v in value.values())
+                if contains_dict_list_or_unknown:
+                    # 递归生成键路径
+                    sub_paths = generate_key_paths(value, current_key)
+                    if sub_paths:
+                        # 如果子路径非空，则扩展
+                        key_paths.extend(sub_paths)
+                    else:
+                        # 当前字典内部为空或值全为"未知"
+                        key_paths.append(current_key)
+                else:
+                    # 字典中所有值都不是字典、列表或"未知"，添加当前键
+                    key_paths.append(current_key)
+            else:
+                # 空字典，直接添加键路径
+                key_paths.append(current_key)
+        elif isinstance(value, list):
+            # 列表类型，添加包含列表的键的路径
+            if value:  # 只有当列表非空时才添加
+                key_paths.append(current_key)
+        elif value == "未知":
+            # 值为"未知"，添加键路径
+            key_paths.append(current_key)
+
+    return key_paths
+
+
+def get_technical_requirements(truncate_file):
+    user_query1 = "这是一份货物标中采购要求部分的内容，请告诉我需要采购的系统（货物），如果有采购清单，请直接根据清单上的货物名称给出结果，若没有采购清单，你要从文中摘取需要采购的系统（货物），采购需求中可能包含层次关系，如大系统中包含若干子系统，你需要保留这种层次关系，给出系统（货物）名称，请以json格式返回，外层键名为\"采购需求\"，嵌套键名为对应的系统名称或货物名称，需与原文保持一致，无需给出采购数量和单位，如有未知内容，在对应键值处填\"未知\"。"
+    file_id = upload_file(truncate_file)
+    res = qianwen_long(file_id, user_query1)
+    print(res)
+    cleaned_res = clean_json_string(res)
+    keys_list = generate_key_paths(cleaned_res['采购需求'])  # 提取需要采购的货物清单
+    user_query_template = "这是一份货物标中采购要求部分的内容，请你给出\"{}\"的技术参数（或采购要求）和数量，请以json格式返回结果，外层键名为\"{}\", 键值对中的键是你对该要求的总结，而值需要完全与原文保持一致，不可擅自总结删减。"
+    queries = []
+    for key in keys_list:
+        # 替换 user_query2 中的 "网络硬盘录像机" 为当前 key
+        new_query = user_query_template.format(key, key)
+        print(new_query)
+        queries.append(new_query)
+    results = multi_threading(queries, "", file_id, 2)
+    technical_requirements = []
+    if not results:
+        print("errror!未获得大模型的回答！")
+    else:
+        # 打印结果
+        for question, response in results:
+            technical_requirements.append(response)
+    technical_requirements_combined_res = combine_json_results(technical_requirements)
+    """根据所有键是否已添加处理技术要求"""
+    # 更新原始采购需求字典
+    combine_and_update_results(cleaned_res['采购需求'], technical_requirements_combined_res)
+    final_res = postprocess(cleaned_res['采购需求'])
+    print("更新后的采购需求处理完成.")
+
+    # 输出最终的 JSON 字符串
+    json_string = json.dumps(final_res, ensure_ascii=False, indent=4)
+    return json_string
+
+def combine_and_update_results(original_data, updates):
+    def recursive_update(data, key, value):
+        # 处理点分隔的键，递归定位并更新嵌套字典
+        keys = key.split('.')
+        for k in keys[:-1]:
+            data = data.setdefault(k, {})
+        if isinstance(value, dict) and isinstance(data.get(keys[-1], None), dict):
+            data[keys[-1]] = {**data.get(keys[-1], {}), **value}
+        else:
+            data[keys[-1]] = value
+    for key, value in updates.items():
+        recursive_update(original_data, key, value)
+    return original_data
+
+def postprocess(data):
+    """转换字典中的值为列表，如果所有键对应的值都是'/', '{}' 或 '未知'"""
+    for key, value in data.items():
+        if all(v in ['/', '未知', {}] for v in value.values()):
+            data[key] = list(value.keys())
+    return data
+def test_all_files_in_folder(input_folder, output_folder):
+    # 确保输出文件夹存在
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+
+    # 遍历指定文件夹中的所有文件
+    for filename in os.listdir(input_folder):
+        file_path = os.path.join(input_folder, filename)
+        # 检查是否是文件
+        if os.path.isfile(file_path):
+            print(f"处理文件: {file_path}")
+            # 调用函数处理文件
+            try:
+                json_result = get_technical_requirements(file_path)
+                # 定义输出文件的路径
+                output_file_path = os.path.join(output_folder, os.path.splitext(filename)[0] + '.json')
+                # 保存JSON结果到文件
+                with open(output_file_path, 'w', encoding='utf-8') as json_file:
+                    json_file.write(json_result)
+                print(f"结果已保存到: {output_file_path}")
+            except Exception as e:
+                print(f"处理文件 {file_path} 时出错: {e}")
+
+
+if __name__ == "__main__":
+    truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\招标文件(107国道)_procurement.pdf"
+    res=get_technical_requirements(truncate_file)
+    print(res)
+    # input_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\output1"
+    # output_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\output3"
+    # test_all_files_in_folder(input_folder, output_folder)
diff --git a/flask_app/货物标/提取采购需求main.py b/flask_app/货物标/提取采购需求main.py
new file mode 100644
index 0000000..6cef1cd
--- /dev/null
+++ b/flask_app/货物标/提取采购需求main.py
@@ -0,0 +1,24 @@
+import json
+
+from flask_app.货物标.货物标截取pdf import truncate_pdf_main
+from flask_app.main.format_change import docx2pdf, pdf2docx
+
+
+#获取采购清单
+def fetch_purchasing_list(file_path,output_folder,file_type):
+    if file_type==1:
+        docx_path=file_path
+        pdf_path = docx2pdf(file_path)
+    elif file_type==2:
+        pdf_path=file_path
+        docx_path=pdf2docx(file_path)
+    else:
+        print("未传入指定格式的文件！")
+        return None
+    truncate_path=truncate_pdf_main(pdf_path,output_folder,1)
+
+if __name__ == "__main__":
+    output_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\货物标output"
+    file_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\磋商文件.doc"
+    fetch_purchasing_list(file_path,output_folder,1)
+
diff --git a/flask_app/货物标/提示词/prompt1.txt b/flask_app/货物标/提示词/prompt1.txt
index b5f8b4a..844f660 100644
--- a/flask_app/货物标/提示词/prompt1.txt
+++ b/flask_app/货物标/提示词/prompt1.txt
@@ -1,10 +1,13 @@
 #这是一份货物标中采购要求部分的内容，你需要摘取出采购清单，一个大系统（大项）中可能包含多个小系统（小项），你需要保留这种层次关系，给出名称和数量和单位，请以json格式返回，外层键名为"采购需求"，如有未知内容，在对应键值处填"未知"。
 
-这是一份货物标中采购要求部分的内容，你需要摘取出需要采购的系统（货物），一个大系统（大项）中可能包含多个小系统（小项），小系统中也可能包含多个货物，你需要保留这种层次关系，给出货物名称，请以json格式返回，外层键名为"采购需求"，嵌套键名为对应的系统名称或货物名称，无需给出采购数量和单位，如有未知内容，在对应键值处填"未知"。
+#这是一份货物标中采购要求部分的内容，你需要摘取出需要采购的系统（货物），一个大系统（大项）中可能包含多个小系统（小项），小系统中也可能包含多个货物，你需要保留这种层次关系，给出货物名称，请以json格式返回，外层键名为"采购需求"，嵌套键名为对应的系统名称或货物名称，无需给出采购数量和单位，如有未知内容，在对应键值处填"未知"。
+
+"这是一份货物标中采购要求部分的内容，你需要摘取出需要采购的系统（货物），一个大系统（大项）中可能包含多个小系统（小项），你需要保留这种层次关系，给出系统（货物）名称，请以json格式返回，外层键名为\"采购需求\"，嵌套键名为对应的系统名称或货物名称，无需给出采购数量和单位，如有未知内容，在对应键值处填\"未知\"。"
+
 
 #这是一份货物标中采购要求部分的内容，请你给出所需的设备名称以及设备的具体型号参数要求，请以json格式返回结果，外层键名为采购要求。
-
-这是一份货物标中采购要求部分的内容，请你给出"网络硬盘录像机"的具体型号参数要求，请以json格式返回结果，外层键名为"网络硬盘录像机",键值对中的键是你对该要求的总结，而值需要完全与原文保持一致，不可擅自总结删减。
+这是一份货物标中采购要求部分的内容，请你给出\"{}\"的具体型号参数要求和数量，请以json格式返回结果，外层键名为\"{}\", 键值对中的键是你对该要求的总结，而值需要完全与原文保持一致，不可擅自总结删减。
+#这是一份货物标中采购要求部分的内容，请你给出"网络硬盘录像机"的具体型号参数要求，请以json格式返回结果，外层键名为"网络硬盘录像机",键值对中的键是你对该要求的总结，而值需要完全与原文保持一致，不可擅自总结删减。
 
 {
   "采购需求": {
@@ -76,5 +79,71 @@
     }
   }
 }
+{
+  "采购需求": {
+    "第一包": {
+      "办公电子设备": [
+        "服务器",
+        "台式计算机",
+        "便携式计算机",
+        "信息安全设备",
+        "喷墨打印机",
+        "激光打印机",
+        "针式打印机",
+        "液晶显示器",
+        "扫描仪",
+        "基础软件",
+        "信息安全软件",
+        "复印机",
+        "投影仪",
+        "多功能一体机",
+        "触控一体机",
+        "碎纸机"
+      ],
+      "软件": [
+        "基础软件",
+        "信息安全软件"
+      ],
+      "耗材": [
+        "复印纸"
+      ]
+    },
+    "第二包": {
+      "办公家电": [
+        "空调机"
+      ]
+    },
+    "第三包": {
+      "家具用具": [
+        "床类",
+        "台、桌类",
+        "椅凳类",
+        "沙发类",
+        "柜类",
+        "架类",
+        "屏风类",
+        "厨卫用具",
+        "组合家具",
+        "家用家具零配件",
+        "其他家具用具"
+      ]
+    },
+    "第四包": {
+      "印刷服务": "未知"
+    },
+    "第五包": {
+      "汽车维修和保养服务": "未知"
+    },
+    "第六包": {
+      "会计服务": "未知"
+    },
+    "第七包": {
+      "工程造价咨询服务": "未知"
+    },
+    "第八包": {
+      "机动车保险服务": "未知"
+    }
+  }
+}
 
 
diff --git a/flask_app/货物标/货物标截取pdf.py b/flask_app/货物标/货物标截取pdf.py
index 8ec3065..ff5e417 100644
--- a/flask_app/货物标/货物标截取pdf.py
+++ b/flask_app/货物标/货物标截取pdf.py
@@ -1,91 +1,179 @@
 from PyPDF2 import PdfReader, PdfWriter
 import re  # 导入正则表达式库
 import os  # 用于文件和文件夹操作
+from flask_app.main.format_change import docx2pdf
 
-def clean_page_numbers(text):
-    # 使用正则表达式删除页码
-    # 假设页码在文本的最开始，紧跟着文字且无空格分隔
-    cleaned_text = re.sub(r'^\s*\d+\s*(?=\D)', '', text)  # 删除开头的页码，仅当紧跟非数字字符时
-    # 删除结尾的页码
-    cleaned_text = re.sub(r'\s+\d+\s*$', '', cleaned_text)
-    # 删除形如 /129 的页码
-    cleaned_text = re.sub(r'\s*\/\s*\d+\s*', '', cleaned_text)
-    return cleaned_text
-def extract_pages(pdf_path, output_folder, begin_pattern, begin_page, end_pattern, output_suffix):
-    # 打开PDF文件
+
+def clean_page_content(text, common_header):
+    # 首先删除抬头公共部分
+    if common_header:  # 确保有公共抬头才进行替换
+        for header_line in common_header.split('\n'):
+            if header_line.strip():  # 只处理非空行
+                # 替换首次出现的完整行
+                text = re.sub(r'^' + re.escape(header_line.strip()) + r'\n?', '', text, count=1)
+
+    # 删除页码 eg:89/129   这个代码分三步走可以把89/129完全删除
+    text = re.sub(r'^\s*\d+\s*(?=\D)', '', text)  # 删除开头的页码，仅当紧跟非数字字符时
+    text = re.sub(r'\s+\d+\s*$', '', text)  # 删除结尾的页码
+    text = re.sub(r'\s*\/\s*\d+\s*', '', text)  # 删除形如 /129 的页码
+
+    return text
+def extract_common_header(pdf_path):
     pdf_document = PdfReader(pdf_path)
-    start_page = None
-    end_page = None
-    # 遍历文档的每一页，查找开始和结束短语的位置
-    for i in range(len(pdf_document.pages)):
+    headers = []
+    total_pages = len(pdf_document.pages)
+    middle_page = total_pages // 2  # 计算中间页
+
+    # 确保中间页前后各一页（共3页），如果不足3页，则尽可能取足
+    start_page = max(0, middle_page - 1)
+    num_pages_to_read = 3
+
+    for i in range(start_page, min(start_page + num_pages_to_read, total_pages)):
         page = pdf_document.pages[i]
-        text = page.extract_text()
+        text = page.extract_text() or ""
         if text:
-            cleaned_text = clean_page_numbers(text)
-            if re.search(begin_pattern, cleaned_text) and i > begin_page:
-                start_page = i
-            if start_page is not None and re.search(end_pattern, cleaned_text) and i > (start_page+1):
-                end_page = i
-                break
-    # 确保找到了起始和结束页面
-    if start_page is None or end_page is None:
-        print(f"未找到起始或结束页在文件 {pdf_path} 中！")
-        return None
+            # 只取每页的前三行
+            first_lines = text.strip().split('\n')[:3]
+            headers.append(first_lines)
 
-    # 创建一个新的PDF文档保存截取的页面
-    base_file_name = os.path.splitext(os.path.basename(pdf_path))[0]  # Get the base name without extension
-    output_pdf_path = os.path.join(output_folder, f"{base_file_name}_{output_suffix}.pdf")
-    output_doc = PdfWriter()
+    if len(headers) < 2:
+        return ""  # 如果没有足够的页来比较，返回空字符串
 
-    # 添加需要的页面，从 start_page 开始，包括 end_page
-    for page_num in range(start_page, end_page + 1):
-        output_doc.add_page(pdf_document.pages[page_num])
-    # 保存新的PDF文件
-    with open(output_pdf_path, 'wb') as f:
-        output_doc.write(f)
+    # 寻找每一行中的公共部分
+    common_headers = []
+    for lines in zip(*headers):
+        # 在每一行中寻找公共单词
+        common_line = set(lines[0].split()).intersection(*[set(line.split()) for line in lines[1:]])
+        if common_line:
+            common_headers.append(' '.join(common_line))
 
-    print(f"已截取并保存页面从 {start_page} 到 {end_page} 为 {output_pdf_path}")
+    return '\n'.join(common_headers)
 
-    return output_pdf_path
+def is_pdf_or_doc(filename):
+    # 判断文件是否为PDF或Word文档
+    return filename.lower().endswith(('.pdf', '.doc', '.docx'))
+
+def convert_to_pdf(file_path):
+    # 假设 docx2pdf 函数已经被定义，这里仅根据文件扩展名来决定是否需要转换
+    if file_path.lower().endswith(('.doc', '.docx')):
+        return docx2pdf(file_path)
+    return file_path
 
 def process_input(input_path, output_folder, begin_pattern, begin_page, end_pattern, output_suffix):
-    # 确保输出文件夹存在
     if not os.path.exists(output_folder):
         os.makedirs(output_folder)
+    generated_files = []
     if os.path.isdir(input_path):
-        generated_files = []
         # 遍历文件夹内的所有PDF文件
         for file in os.listdir(input_path):
-            if file.endswith(".pdf"):
-                pdf_path = os.path.join(input_path, file)
-                output_pdf_path = extract_pages(pdf_path, output_folder, begin_pattern, begin_page, end_pattern, output_suffix)
-                if output_pdf_path and os.path.isfile(output_pdf_path):
+            file_path = os.path.join(input_path, file)
+            if is_pdf_or_doc(file):
+                pdf_path = convert_to_pdf(file_path)
+                output_pdf_path = extract_pages(pdf_path, output_folder, begin_pattern, begin_page, end_pattern,
+                                                output_suffix)
+                if output_pdf_path:
                     generated_files.append(output_pdf_path)
-        return generated_files
     elif os.path.isfile(input_path) and input_path.endswith(".pdf"):
         # 处理单个PDF文件
         output_pdf_path = extract_pages(input_path, output_folder, begin_pattern, begin_page, end_pattern, output_suffix)
-        if output_pdf_path and os.path.isfile(output_pdf_path):
-            return [output_pdf_path]  # 以列表形式返回，以保持一致性
+        if output_pdf_path:
+            generated_files.append(output_pdf_path)
     else:
         print("提供的路径既不是文件夹也不是PDF文件。")
-    return []
+    return generated_files
+
+def extract_pages_twice(pdf_path, output_folder, output_suffix,common_header):
+    pdf_document = PdfReader(pdf_path)
+    begin_page=5
+    start_page = None
+    end_page = None
+    # 定义用于检测是否包含"文件的构成"的正则表达式
+    exclusion_pattern = re.compile(r'文件的构成|文件的组成')
+    if output_suffix == "procurement":
+        begin_pattern = re.compile(
+            r'^第[一二三四五六七八九十百千]+(?:章|部分).*?(?:服务|项目|商务).*?要求|'
+            r'^第[一二三四五六七八九十百千]+(?:章|部分).*?(?:采购|技术标准).*|'
+            r'^[一二三四五六七八九十百千]+、\s*采购清单',
+            re.MULTILINE
+        )
+        end_pattern = re.compile(
+            r'^第[一二三四五六七八九十百千]+(?:章|部分)\s*[\u4e00-\u9fff]+',
+            re.MULTILINE
+        )
+        for i in range(len(pdf_document.pages)):
+            page = pdf_document.pages[i]
+            text = page.extract_text() or ""
+            cleaned_text = clean_page_content(text,common_header)
+            if re.search(begin_pattern, cleaned_text) and i > begin_page and not re.search(exclusion_pattern, cleaned_text):
+                start_page = i
+            if start_page is not None and re.search(end_pattern, cleaned_text) and i > start_page:
+                end_page = i
+                break
+        if start_page is None or end_page is None:
+            print(f"twice: 未找到起始或结束页在文件 {pdf_path} 中！")
+            return ""
+        else:
+            return save_extracted_pages(pdf_document,start_page, end_page, pdf_path,output_folder, output_suffix)
+
+
+def extract_pages(pdf_path, output_folder, begin_pattern, begin_page, end_pattern, output_suffix):
+    try:
+        common_header = extract_common_header(pdf_path)
+        pdf_document = PdfReader(pdf_path)
+        start_page = None
+        end_page = None
+        for i in range(len(pdf_document.pages)):
+            page = pdf_document.pages[i]
+            text = page.extract_text() or ""
+            cleaned_text = clean_page_content(text,common_header)
+            if start_page is None and re.search(begin_pattern, cleaned_text) and i > begin_page:
+                start_page = i
+            if start_page is not None and re.search(end_pattern, cleaned_text) and i > start_page:
+                end_page = i
+                break
+        if start_page is None or end_page is None:
+            if output_suffix == "procurement":
+                return extract_pages_twice(pdf_path, output_folder, output_suffix,common_header)
+            else:
+                print(f"未找到起始或结束页在文件 {pdf_path} 中！")
+                return None
+
+        return save_extracted_pages(pdf_document, start_page, end_page, pdf_path, output_folder, output_suffix)
+    except Exception as e:
+        print(f"Error processing {pdf_path}: {e}")
+        return None
+
+def save_extracted_pages(pdf_document, start_page, end_page, pdf_path, output_folder, output_suffix):
+    base_file_name = os.path.splitext(os.path.basename(pdf_path))[0]
+    output_pdf_path = os.path.join(output_folder, f"{base_file_name}_{output_suffix}.pdf")
+    output_doc = PdfWriter()
+    for page_num in range(start_page, end_page + 1):
+        output_doc.add_page(pdf_document.pages[page_num])
+    with open(output_pdf_path, 'wb') as f:
+        output_doc.write(f)
+    print(f"已截取并保存页面从 {start_page} 到 {end_page} 为 {output_pdf_path}")
+    return output_pdf_path
 
 
 def truncate_pdf_main(input_path, output_folder, selection):
     if selection == 1:
-        # Configure patterns and phrases for "第三章 项目技术、服务及商务要求"
-        begin_pattern = re.compile(r'第[一二三四五六七八九十百千]+章.*?(?:服务|项目|商务).*?要求|第[一二三四五六七八九十百千]+章.*?采购.*')
+        # 更新的正则表达式以匹配"第x章"和"第x部分"，考虑到可能的空格和其他文字
+        begin_pattern = re.compile(
+            r'^第[一二三四五六七八九十百千]+(?:章|部分).*?(?:服务|项目|商务).*?要求|'
+            r'^第[一二三四五六七八九十百千]+(?:章|部分).*?采购.*',
+            # r'^[一二三四五六七八九十百千]+、\s*采购清单',
+        )
         begin_page = 5
-        end_pattern = re.compile(r'第[一二三四五六七八九十百千]+章\s*(资格审查|评标方法|评审办法|评定办法)')
-        # 示例文本进行测试
-        output_suffix = "tobidders_notice_table"
+        end_pattern = re.compile(
+            r'^第[一二三四五六七八九十百千]+(?:章|部分)\s*[\u4e00-\u9fff]+'
+        )
+        output_suffix = "procurement"
 
     else:
         print("无效的选择")
         return None
 
-    # Process the selected input
+    # 调用相应的处理函数
     return process_input(input_path, output_folder, begin_pattern, begin_page, end_pattern, output_suffix)
 
 def truncate_pdf_multiple(input_path, output_folder):
@@ -96,10 +184,10 @@ def truncate_pdf_multiple(input_path, output_folder):
     return truncate_files
 
 if __name__ == "__main__":
-    input_path = "C:\\Users\\Administrator\\WPSDrive\\292826853\\WPS云盘\\应用\\输出为PDF\\公安局音视频监控系统设备采购项目(定稿）_20240829133603.pdf"
+    input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\招标文件(107国道).pdf"
     output_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\output1"
-    # truncate_pdf_multiple(input_path,output_folder)
-    selection = 1  # 例如：1 - 投标人须知前附表, 2 - 评标办法, 3 - 投标人须知正文 4-招标公告-合同条款前
-    generated_files = truncate_pdf_main(input_path, output_folder, selection)
+    truncate_pdf_multiple(input_path,output_folder)
+    # selection = 1  # 例如：1 - 投标人须知前附表, 2 - 评标办法, 3 - 投标人须知正文 4-招标公告-合同条款前
+    # generated_files = truncate_pdf_main(input_path, output_folder, selection)
     # print("生成的文件:", generated_files)