11.7 开评定标 投标改为平级
This commit is contained in:
parent
21eeb87903
commit
23a31da9e9
@ -183,7 +183,7 @@ def process_nested_data(data):
|
||||
# 到达最内层,处理非字典和非列表的元素(字符串)
|
||||
return post_process(data)
|
||||
|
||||
#生成无结构的数据
|
||||
#生成无结构的数据货物标
|
||||
def concatenate_keys_values(section_content):
|
||||
"""
|
||||
将章节内容的键值对拼接成一个字符串列表,每个元素为 "key value"。
|
||||
@ -199,6 +199,53 @@ def concatenate_keys_values(section_content):
|
||||
concatenated.append(f"{key} {value}")
|
||||
return concatenated
|
||||
|
||||
#生成无结构的数据工程标
|
||||
def extract_sections(data, target_values):
|
||||
"""
|
||||
Extracts sections from the input dictionary where the top-level keys' values
|
||||
match the target_values. For each matching section, collects all sub-keys
|
||||
and their corresponding values as a list of formatted strings. If "定标"
|
||||
and "中标" are in the input data, they are merged into a single "定标与中标" key.
|
||||
|
||||
Args:
|
||||
data (dict): The input dictionary with hierarchical keys.
|
||||
target_values (list): List of target section names to extract.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary with target section names as keys and lists of
|
||||
formatted sub-section strings as values.
|
||||
"""
|
||||
result = {}
|
||||
merged_sections = []
|
||||
|
||||
# Sort the keys to maintain order
|
||||
sorted_keys = sorted(data.keys(), key=lambda x: [int(part) for part in x.strip('.').split('.')])
|
||||
|
||||
for key in sorted_keys:
|
||||
value = data[key]
|
||||
if value in target_values:
|
||||
section_key_prefix = key # e.g., "5."
|
||||
section_name = value # e.g., "开标"
|
||||
subitems = []
|
||||
|
||||
for sub_key in sorted_keys:
|
||||
# Check if the sub_key starts with the section_key_prefix and is not the section_key itself
|
||||
if sub_key.startswith(section_key_prefix) and sub_key != section_key_prefix:
|
||||
sub_value = data[sub_key]
|
||||
subitems.append(f"{sub_key} {sub_value}")
|
||||
|
||||
# Check for "定标" and "中标" to merge them
|
||||
if section_name in ["定标", "中标"]:
|
||||
merged_sections.extend(subitems)
|
||||
else:
|
||||
result[section_name] = subitems
|
||||
|
||||
# Merge "定标" and "中标" into "定标与中标" if both were found
|
||||
if merged_sections:
|
||||
result["定标与中标"] = merged_sections
|
||||
|
||||
return result
|
||||
|
||||
def get_requirements_with_gpt(merged_baseinfo_path, selection):
|
||||
"""
|
||||
根据 selection 的值选择相应的用户查询,并调用大模型获取要求。
|
||||
|
@ -1,6 +1,6 @@
|
||||
import json
|
||||
import re
|
||||
from flask_app.general.投标人须知正文提取指定内容 import process_nested_data, transform_json, get_requirements_with_gpt
|
||||
from flask_app.general.投标人须知正文提取指定内容 import process_nested_data, transform_json, get_requirements_with_gpt,extract_sections
|
||||
|
||||
|
||||
# 对于每个target_value元素,如果有完美匹配json_data中的键,那就加入这个完美匹配的键名,否则,把全部模糊匹配到的键名都加入
|
||||
@ -116,19 +116,22 @@ def extract_from_notice(invalid_path,clause_path, type):
|
||||
data = json.load(file)
|
||||
extracted_data = extract_json(data, target_values) # 读取json
|
||||
# print(json.dumps(extracted_data,ensure_ascii=False,indent=4))
|
||||
sorted_data = sort_clean_data_keys(extracted_data) # 对输入的字典 data 的键进行预处理和排序
|
||||
transformed_data = transform_json(sorted_data)
|
||||
res=extract_sections(extracted_data,target_values)
|
||||
print(json.dumps(res, ensure_ascii=False, indent=4))
|
||||
# sorted_data = sort_clean_data_keys(extracted_data) # 对输入的字典 data 的键进行预处理和排序
|
||||
# transformed_data = transform_json(sorted_data)
|
||||
# print(json.dumps(transformed_data,ensure_ascii=False,indent=4))
|
||||
final_result = process_nested_data(transformed_data)
|
||||
if not final_result:
|
||||
final_result = get_requirements_with_gpt(invalid_path, type)
|
||||
return final_result
|
||||
# final_result = process_nested_data(transformed_data)
|
||||
# if not final_result:
|
||||
# final_result = get_requirements_with_gpt(invalid_path, type)
|
||||
# return final_result
|
||||
|
||||
if __name__ == "__main__":
|
||||
# file_path = 'C:\\Users\\Administrator\\Desktop\\fsdownload\\3bffaa84-2434-4bd0-a8ee-5c234ccd7fa0\\clause1.json'
|
||||
file_path="C:\\Users\\Administrator\\Desktop\\招标文件\\special_output\\clause1.json"
|
||||
invalid_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\4e5bc6c2-c2b8-4c0b-8e57-81a498b982f6\\ztbfile_invalid.pdf"
|
||||
clause_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\4e5bc6c2-c2b8-4c0b-8e57-81a498b982f6\\clause1.json"
|
||||
try:
|
||||
res = extract_from_notice(file_path, 2) # 可以改变此处的 type 参数测试不同的场景
|
||||
res = extract_from_notice(invalid_path,clause_path, 1) # 可以改变此处的 type 参数测试不同的场景
|
||||
res2 = json.dumps(res, ensure_ascii=False, indent=4)
|
||||
print(res2)
|
||||
except ValueError as e:
|
||||
|
@ -107,11 +107,11 @@ def extract_from_notice(merged_baseinfo_path,clause_path, type):
|
||||
final_result = get_requirements_with_gpt(merged_baseinfo_path, type) #万一没用正则匹配到,那就调用大模型
|
||||
return final_result
|
||||
# print(json.dumps(extracted_data,ensure_ascii=False,indent=4))
|
||||
extracted_data_concatenated = {section: concatenate_keys_values(content)
|
||||
extracted_data_concatenated = {section: concatenate_keys_values(content) #启用结构化就注释这三行
|
||||
for section, content in extracted_data.items()}
|
||||
|
||||
return extracted_data_concatenated
|
||||
# transformed_data = process_with_outer_key(extracted_data)
|
||||
# transformed_data = process_with_outer_key(extracted_data) #取消注释这三行
|
||||
# final_result = process_nested_data(transformed_data)
|
||||
# return final_result
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user