11.7 开评定标投标改为平级

2024-11-07 10:13:07 +08:00 · 2024-11-07 10:13:07 +08:00 · 23a31da9e9
commit 23a31da9e9
parent 21eeb87903
3 changed files with 62 additions and 12 deletions
--- a/flask_app/general/投标人须知正文提取指定内容.py
+++ b/flask_app/general/投标人须知正文提取指定内容.py
@ -183,7 +183,7 @@ def process_nested_data(data):
        # 到达最内层，处理非字典和非列表的元素（字符串）
        return post_process(data)

-#生成无结构的数据
+#生成无结构的数据货物标
 def concatenate_keys_values(section_content):
    """
    将章节内容的键值对拼接成一个字符串列表，每个元素为 "key value"。
@ -199,6 +199,53 @@ def concatenate_keys_values(section_content):
        concatenated.append(f"{key} {value}")
    return concatenated

+#生成无结构的数据工程标
+def extract_sections(data, target_values):
+    """
+    Extracts sections from the input dictionary where the top-level keys' values
+    match the target_values. For each matching section, collects all sub-keys
+    and their corresponding values as a list of formatted strings. If "定标"
+    and "中标" are in the input data, they are merged into a single "定标与中标" key.
+
+    Args:
+        data (dict): The input dictionary with hierarchical keys.
+        target_values (list): List of target section names to extract.
+
+    Returns:
+        dict: A dictionary with target section names as keys and lists of
+              formatted sub-section strings as values.
+    """
+    result = {}
+    merged_sections = []
+
+    # Sort the keys to maintain order
+    sorted_keys = sorted(data.keys(), key=lambda x: [int(part) for part in x.strip('.').split('.')])
+
+    for key in sorted_keys:
+        value = data[key]
+        if value in target_values:
+            section_key_prefix = key  # e.g., "5."
+            section_name = value  # e.g., "开标"
+            subitems = []
+
+            for sub_key in sorted_keys:
+                # Check if the sub_key starts with the section_key_prefix and is not the section_key itself
+                if sub_key.startswith(section_key_prefix) and sub_key != section_key_prefix:
+                    sub_value = data[sub_key]
+                    subitems.append(f"{sub_key} {sub_value}")
+
+            # Check for "定标" and "中标" to merge them
+            if section_name in ["定标", "中标"]:
+                merged_sections.extend(subitems)
+            else:
+                result[section_name] = subitems
+
+    # Merge "定标" and "中标" into "定标与中标" if both were found
+    if merged_sections:
+        result["定标与中标"] = merged_sections
+
+    return result
+
 def get_requirements_with_gpt(merged_baseinfo_path, selection):
    """
    根据 selection 的值选择相应的用户查询，并调用大模型获取要求。
--- a/flask_app/main/投标人须知正文提取指定内容.py
+++ b/flask_app/main/投标人须知正文提取指定内容.py
@ -1,6 +1,6 @@
 import json
 import re
-from flask_app.general.投标人须知正文提取指定内容 import process_nested_data, transform_json, get_requirements_with_gpt
+from flask_app.general.投标人须知正文提取指定内容 import process_nested_data, transform_json, get_requirements_with_gpt,extract_sections


 # 对于每个target_value元素，如果有完美匹配json_data中的键，那就加入这个完美匹配的键名，否则，把全部模糊匹配到的键名都加入
@ -116,19 +116,22 @@ def extract_from_notice(invalid_path,clause_path, type):
        data = json.load(file)
        extracted_data = extract_json(data, target_values)  # 读取json
        # print(json.dumps(extracted_data,ensure_ascii=False,indent=4))
-        sorted_data = sort_clean_data_keys(extracted_data)  # 对输入的字典 data 的键进行预处理和排序
-        transformed_data = transform_json(sorted_data)
+        res=extract_sections(extracted_data,target_values)
+        print(json.dumps(res, ensure_ascii=False, indent=4))
+        # sorted_data = sort_clean_data_keys(extracted_data)  # 对输入的字典 data 的键进行预处理和排序
+        # transformed_data = transform_json(sorted_data)
        # print(json.dumps(transformed_data,ensure_ascii=False,indent=4))
-        final_result = process_nested_data(transformed_data)
-        if not final_result:
-            final_result = get_requirements_with_gpt(invalid_path, type)
-        return final_result
+        # final_result = process_nested_data(transformed_data)
+        # if not final_result:
+        #     final_result = get_requirements_with_gpt(invalid_path, type)
+        # return final_result

 if __name__ == "__main__":
    # file_path = 'C:\\Users\\Administrator\\Desktop\\fsdownload\\3bffaa84-2434-4bd0-a8ee-5c234ccd7fa0\\clause1.json'
-    file_path="C:\\Users\\Administrator\\Desktop\\招标文件\\special_output\\clause1.json"
+    invalid_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\4e5bc6c2-c2b8-4c0b-8e57-81a498b982f6\\ztbfile_invalid.pdf"
+    clause_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\4e5bc6c2-c2b8-4c0b-8e57-81a498b982f6\\clause1.json"
    try:
-        res = extract_from_notice(file_path, 2)  # 可以改变此处的 type 参数测试不同的场景
+        res = extract_from_notice(invalid_path,clause_path, 1)  # 可以改变此处的 type 参数测试不同的场景
        res2 = json.dumps(res, ensure_ascii=False, indent=4)
        print(res2)
    except ValueError as e:
--- a/flask_app/货物标/投标人须知正文提取指定内容货物标版.py
+++ b/flask_app/货物标/投标人须知正文提取指定内容货物标版.py
@ -107,11 +107,11 @@ def extract_from_notice(merged_baseinfo_path,clause_path, type):
            final_result = get_requirements_with_gpt(merged_baseinfo_path, type)    #万一没用正则匹配到，那就调用大模型
            return final_result
        # print(json.dumps(extracted_data,ensure_ascii=False,indent=4))
-        extracted_data_concatenated = {section: concatenate_keys_values(content)
+        extracted_data_concatenated = {section: concatenate_keys_values(content)           #启用结构化就注释这三行
                                       for section, content in extracted_data.items()}

        return extracted_data_concatenated
-        # transformed_data = process_with_outer_key(extracted_data)
+        # transformed_data = process_with_outer_key(extracted_data)                       #取消注释这三行
        # final_result = process_nested_data(transformed_data)
        # return final_result