From d06d6a145f749819dd92a41e3c1838a9f6fb024d Mon Sep 17 00:00:00 2001
From: zy123 <646228430@qq.com>
Date: Fri, 18 Oct 2024 16:05:18 +0800
Subject: [PATCH] =?UTF-8?q?10.18=E5=B0=8F=E8=A7=A3=E6=9E=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 flask_app/general/little_zbparse.py  |   4 +-
 flask_app/main/基础信息整合快速版.py | 172 +++++++++++++++++++++++++++
 flask_app/main/截取pdf.py            |   2 +-
 3 files changed, 175 insertions(+), 3 deletions(-)
 create mode 100644 flask_app/main/基础信息整合快速版.py

diff --git a/flask_app/general/little_zbparse.py b/flask_app/general/little_zbparse.py
index 3513a47..c32fe64 100644
--- a/flask_app/general/little_zbparse.py
+++ b/flask_app/general/little_zbparse.py
@@ -46,8 +46,8 @@ def little_parse_goods(output_folder, file_path):
     # 上传文件并获取文件 ID
     file_id = upload_file(baseinfo_file_path)
     # 注意：以下路径被硬编码，确保该路径存在并且正确
-    # baseinfo_prompt_file_path='flask_app/static/提示词/小解析基本信息货物标.txt'
-    baseinfo_prompt_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\小解析基本信息货物标.txt'
+    baseinfo_prompt_file_path='flask_app/static/提示词/小解析基本信息货物标.txt'
+    # baseinfo_prompt_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\小解析基本信息货物标.txt'
     # 从提示词文件中读取问题
     questions = read_questions_from_file(baseinfo_prompt_file_path)
     # 多线程处理问题，使用指定的处理模式（2 代表使用 qianwen-long）
diff --git a/flask_app/main/基础信息整合快速版.py b/flask_app/main/基础信息整合快速版.py
new file mode 100644
index 0000000..d510725
--- /dev/null
+++ b/flask_app/main/基础信息整合快速版.py
@@ -0,0 +1,172 @@
+import json
+
+from flask_app.main.json_utils import clean_json_string, nest_json_under_key,rename_outer_key
+from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
+from flask_app.main.判断是否分包等 import judge_whether_main, read_questions_from_judge
+from flask_app.main.多线程提问 import read_questions_from_file, multi_threading
+from flask_app.main.通义千问long import upload_file
+
+
+def aggregate_basic_info_engineering(baseinfo_list):
+    """
+    将基础信息列表中的数据进行合并和分类。
+
+    参数：
+    - baseinfo_list (list): 包含多个基础信息的列表。
+
+    返回：
+    - dict: 合并和分类后的基础信息字典。
+    """
+    key_groups = {
+        "招标人/代理信息": ["招标人", "招标人联系方式", "招标代理机构", "招标代理机构联系方式"],
+        "项目信息": ["项目名称", "招标编号", "项目概况", "招标范围", "招标控制价", "投标竞争下浮率"],
+        "关键时间/内容": [
+            "投标文件递交截止日期",
+            "投标文件递交方式",
+            "开标时间",
+            "开标地点",
+            "投标人要求澄清招标文件的截止时间",
+            "投标有效期",
+            "评标结果公示媒介"
+        ],
+        "保证金相关": ["质量保证金", "退还投标保证金"],
+        "其他信息": [
+            "重新招标、不再招标和终止招标",
+            "投标费用承担",
+            "招标代理服务费",
+            "是否退还投标文件",
+        ]
+    }
+
+    combined_data = {}
+    relevant_keys_detected = set()
+
+    # 合并所有基础信息并收集相关键
+    for baseinfo in baseinfo_list:
+        # json_data = clean_json_string(baseinfo)
+        combined_data.update(baseinfo)
+        relevant_keys_detected.update(baseinfo.keys())
+
+    # 动态调整键组
+    dynamic_key_handling(key_groups, relevant_keys_detected)
+
+    # 按键组分类并嵌套
+    for group_name, keys in key_groups.items():
+        group_data = {key: combined_data.get(key, "未提供") for key in keys}
+        combined_data[group_name] = group_data
+        # Optionally remove original keys to avoid duplication
+        for key in keys:
+            combined_data.pop(key, None)
+
+    return combined_data
+
+def dynamic_key_handling(key_groups, detected_keys):
+    # 检查和调整键组配置
+    for key in detected_keys:
+        # 处理“保证金相关”组,插到"质量保证金"前
+        if "保证金" in key:
+            group = key_groups["保证金相关"]
+            insert_before = "质量保证金"
+            if insert_before in group:
+                index = group.index(insert_before)
+                if key not in group:  # 避免重复插入
+                    group.insert(index, key)
+            else:
+                group.append(key)  # 如果没有找到特定键，则追加到末尾
+        elif "联合体" in key:
+            key_groups["项目信息"].append(key)
+        elif "分包" in key:
+            key_groups["项目信息"].append(key)
+        elif "踏勘现场" in key:
+            key_groups["其他信息"].append(key)
+        elif "投标预备会" in key:
+            key_groups["其他信息"].append(key)
+        elif "偏离" in key:
+            key_groups["其他信息"].append(key)
+
+
+def judge_consortium_bidding(baseinfo_list):
+    updated_list = []
+    accept_bidding = False
+    for baseinfo in baseinfo_list:
+        # 检查 "是否接受联合体投标" 键是否存在且其值为 "是"
+        if "是否接受联合体投标" in baseinfo and baseinfo["是否接受联合体投标"] == "是":
+            accept_bidding = True
+            # 从字典中移除特定键值对
+            baseinfo.pop("是否接受联合体投标", None)
+        # # 将修改后的 json 数据转换回 JSON 字符串（如果需要）
+        # updated_info = json.dumps(json_data)
+        updated_list.append(baseinfo)
+    # 更新原始列表，如果你想保留修改
+    baseinfo_list[:] = updated_list
+    return accept_bidding
+def combine_basic_info(merged_baseinfo_path,truncate0, output_folder, clause_path):
+    """
+    综合和处理基础信息，生成最终的基础信息字典。
+
+    参数：
+    - knowledge_name (str): 知识名称。
+    - truncate0 (str): 文件路径。
+    - output_folder (str): 输出文件夹路径。
+    - clause_path (str): 条款路径。
+
+    返回：
+    - dict: 综合后的基础信息。
+    """
+    baseinfo_list = []
+    baseinfo_file_path = 'flask_app/static/提示词/基本信息工程标.txt'
+    # baseinfo_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\基本信息工程标.txt'
+    questions = read_questions_from_file(baseinfo_file_path)
+    res1 = multi_threading(questions, knowledge_name)
+
+    for index, response in res1:
+        try:
+            if response and len(response) > 1:
+                baseinfo_list.append(clean_json_string(response[1]))
+            else:
+                print(f"基础信息整合： Warning: Missing or incomplete response data for query index {index}.")
+        except Exception as e:
+            print(f"基础信息整合： Error processing response for query index {index}: {e}")
+
+    # 判断是否分包、是否需要递交投标保证金等
+    chosen_numbers, merged = judge_whether_main(truncate0, output_folder)
+    baseinfo_list.append(merged)
+
+    judge_file_path = 'flask_app/static/提示词/是否相关问题.txt'
+    # judge_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\是否相关问题.txt'
+    judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers)
+    judge_consortium = judge_consortium_bidding(baseinfo_list)  # 通过招标公告判断是否接受联合体投标
+
+    if judge_consortium:
+        judge_consortium_question = (
+            "该招标文件对于联合体投标的要求是怎样的，请按json格式给我提供信息，"
+            "外层键名为'联合体投标要求'，其中有一个嵌套键值对为：\"是否接受联合体投标\":\"是\""
+        )
+        judge_questions.append(judge_consortium_question)
+
+    file_id = upload_file(truncate0)
+    res2 = multi_threading(judge_questions, "", file_id, 2)  # 调用千问-long
+
+    if not res2:
+        print("基础信息整合： multi_threading error!")
+    else:
+        for question, response in res2:
+            baseinfo_list.append(clean_json_string(response))
+
+    rebidding_situation = extract_from_notice(clause_path, 3)  # "重新招标, 不再招标和终止招标"需从投标人须知正文提取
+    update_json = rename_outer_key(rebidding_situation, "重新招标、不再招标和终止招标")
+    baseinfo_list.append(update_json)
+    aggregated_baseinfo = aggregate_basic_info_engineering(baseinfo_list)  # 现在是一个字典
+    return {"基础信息": aggregated_baseinfo}
+
+if __name__ == "__main__":
+    merged_baseinfo_path="C:\\Users\\Administrator\\Desktop\\招标文件\\special_output\\zbtest2_merged_baseinfo.pdf"
+    output_folder="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405"
+    truncate0="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\ztbfile_tobidders_notice_table.pdf"
+    clause_path="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\clause1.json"
+    res=combine_basic_info(merged_baseinfo_path,truncate0,output_folder,clause_path)
+    print(json.dumps(res,ensure_ascii=False,indent=4))
+
+
+
+
diff --git a/flask_app/main/截取pdf.py b/flask_app/main/截取pdf.py
index a61c532..f691c81 100644
--- a/flask_app/main/截取pdf.py
+++ b/flask_app/main/截取pdf.py
@@ -384,7 +384,7 @@ def truncate_pdf_specific_engineering(pdf_path, output_folder):
                     truncate_files.append(files)
 
         if truncate_files:
-            merged_output_path = os.path.join(output_folder, f"{base_file_name}_merged_specific.pdf")
+            merged_output_path = os.path.join(output_folder, f"{base_file_name}_merged_baseinfo.pdf")
             merge_selected_pdfs(output_folder, truncate_files, merged_output_path, base_file_name)
             truncate_files.append(merged_output_path)
             print(f"已生成合并文件: {merged_output_path}")