12.12 解决了clause_path为空开评定标失败的bug

2024-12-12 18:20:26 +08:00 · 2024-12-12 18:20:26 +08:00 · 528c4b38a1
commit 528c4b38a1
parent c77ada480c
2 changed files with 69 additions and 29 deletions
--- a/flask_app/general/商务技术评分提取.py
+++ b/flask_app/general/商务技术评分提取.py
@ -423,13 +423,13 @@ def combine_evaluation_standards(evaluation_method_path,invalid_path,zb_type):
 if __name__ == "__main__":
    start_time=time.time()
    # truncate_file=r"C:\Users\Administrator\Desktop\招标文件-采购类\tmp2\2024-新疆-塔城地区公安局食药环分局快检实验室项目_evaluation_method.pdf"
-    evaluation_method_path = r'C:\Users\Administrator\Desktop\fsdownload\91399aa4-1ee8-447d-a05b-03cd8d15ced5\ztbfile_evaluation_method.pdf'
+    evaluation_method_path = r'C:\Users\Administrator\Desktop\fsdownload\aba81749-5986-4492-8b4b-16db9c69a09d\ztbfile_evaluation_method.pdf'
    invalid_path=r'C:\Users\Administrator\Desktop\fsdownload\91399aa4-1ee8-447d-a05b-03cd8d15ced5\ztbfile_invalid.pdf'
    # truncate_file = "C:\\Users\\Administrator\\Desktop\\货物标\\output2\\2-招标文件（统计局智能终端二次招标）_evaluation_method.pdf"
    # truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\output2\\广水市妇幼招标文件最新（W改）_evaluation_method.pdf"
    # truncate_file = "C:\\Users\\Administrator\\Desktop\\fsdownload\\2d481945-1f82-45a5-8e56-7fafea4a7793\\ztbfile_evaluation_method.pdf"
    # truncate_file="C:\\Users\\Administrator\\Desktop\\fsdownload\\ztbfile_evaluation_method.pdf"
-    res = combine_evaluation_standards(evaluation_method_path,invalid_path,1)
+    res = combine_evaluation_standards(evaluation_method_path,invalid_path,2)
    print(json.dumps(res, ensure_ascii=False, indent=4))
    end_time=time.time()
    print("elapsed time:"+str(end_time-start_time))
--- a/flask_app/工程标/投标人须知正文提取指定内容工程标.py
+++ b/flask_app/工程标/投标人须知正文提取指定内容工程标.py
@ -68,35 +68,75 @@ post_process 函数尝试将长字符串按特定模式分割成块，每块至
 """

 # 读取JSON数据，提取内容，转换结构，并打印结果
-def extract_from_notice(merged_baseinfo_path,clause_path, type):
-    if type == 1:
-        target_values = ["投标","投标文件","响应文件"]
-    elif type == 2:
-        # target_values = ["开标", "评标", "定标","磋商程序","中标"]
-        target_values=["开标", "评标", "定标","评审","成交","合同","磋商程序", "中标", "程序", "步骤"]
-    elif type == 3:
-        target_values = ["重新招标、不再招标和终止招标","重新招标","重新采购", "不再招标", "不再采购","终止招标","终止采购"]
-    elif type == 4:
-        target_values = ["评标"]  # 测试
-    else:
-        raise ValueError(
-            "Invalid type specified. Use 1 for '投标文件, 投标' or 2 for '开标, 评标, 定标'or 3 for '重新招标'")
-    with open(clause_path, 'r', encoding='utf-8') as file:
-        data = json.load(file)
-        extracted_data = extract_between_sections(data, target_values) #先使用大章节'二、投标文件'这种筛选
-        if not extracted_data:
-            extracted_data = extract_json(data, target_values)  # 若没有，再使用'3.投标文件' 筛选
+def extract_from_notice(merged_baseinfo_path, clause_path, type):
+    """
+    从公告中提取特定类型的内容。
+
+    Args:
+        merged_baseinfo_path (str): 合并后的基础信息路径。
+        clause_path (str): 包含条款的JSON文件路径。
+        type (int): 提取的类型。
+            1 - ["投标", "投标文件", "响应文件"]
+            2 - ["开标", "评标", "定标", "评审", "成交", "合同", "磋商程序", "中标", "程序", "步骤"]
+            3 - ["重新招标、不再招标和终止招标", "重新招标", "重新采购", "不再招标", "不再采购", "终止招标", "终止采购"]
+            4 - ["评标"]   # 测试
+
+    Returns:
+        dict 或 str: 提取并处理后的数据，或在 `clause_path` 为空或发生错误时返回空字符串 `""`。
+    """
+    # 定义默认的返回结果
+    DEFAULT_RESULT = ""
+
+    # 映射 type 到 target_values
+    type_target_map = {
+        1: ["投标", "投标文件", "响应文件"],
+        2: ["开标", "评标", "定标", "评审", "成交", "合同", "磋商程序", "中标", "程序", "步骤"],
+        3: ["重新招标、不再招标和终止招标", "重新招标", "重新采购", "不再招标", "不再采购", "终止招标", "终止采购"],
+        4: ["评标"]  # 测试
+    }
+
+    # 获取对应 type 的 target_values
+    target_values = type_target_map.get(type)
+    if not target_values:
+        print(f"Error: Invalid type specified: {type}. Use 1, 2, 3, or 4.")
+        return DEFAULT_RESULT
+
+    try:
+        # 检查 clause_path 是否为空或仅包含空白字符
+        if clause_path and clause_path.strip():
+            with open(clause_path, 'r', encoding='utf-8') as file:
+                data = json.load(file)
+
+            # 先尝试使用大章节筛选
+            extracted_data = extract_between_sections(data, target_values)
+
            if not extracted_data:
-                final_result = get_requirements_with_gpt(merged_baseinfo_path, type)  # 万一都没，那就调用大模型
-                return final_result
-            final_result=extract_sections(extracted_data,target_values)  #后处理，生成键名
-            return final_result
+                # 如果大章节筛选失败，尝试使用另一种筛选方法
+                extracted_data = extract_json(data, target_values)
+
+                if not extracted_data:
+                    # 如果所有筛选方法均失败，调用回退函数
+                    final_result = get_requirements_with_gpt(merged_baseinfo_path, type)
+                    return final_result
+                else:
+                    # 后处理，生成键名
+                    final_result = extract_sections(extracted_data, target_values)
+                    return final_result
+            else:
+                # 合并键值对，启用结构化
+                extracted_data_concatenated = {
+                    section: concatenate_keys_values(content)
+                    for section, content in extracted_data.items()
+                }
+                return extracted_data_concatenated
        else:
-            extracted_data_concatenated = {
-                section: concatenate_keys_values(content)
-                for section, content in extracted_data.items()
-            }
-            return extracted_data_concatenated
+            # 如果 clause_path 为空，直接调用回退函数
+            final_result = get_requirements_with_gpt(merged_baseinfo_path, type)
+            return final_result
+
+    except Exception as e:
+        print(f"Error occurred while processing clause_path '{clause_path}': {e}")
+        return DEFAULT_RESULT
        # print(json.dumps(res, ensure_ascii=False, indent=4))
        # sorted_data = sort_clean_data_keys(extracted_data)  # 对输入的字典 data 的键进行预处理和排序
        # transformed_data = transform_json(sorted_data)