12.12 解决了clause_path为空 开评定标失败的bug
This commit is contained in:
parent
e82233397f
commit
29064ed76b
@ -68,35 +68,75 @@ post_process 函数尝试将长字符串按特定模式分割成块,每块至
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# 读取JSON数据,提取内容,转换结构,并打印结果
|
# 读取JSON数据,提取内容,转换结构,并打印结果
|
||||||
def extract_from_notice(merged_baseinfo_path,clause_path, type):
|
def extract_from_notice(merged_baseinfo_path, clause_path, type):
|
||||||
if type == 1:
|
"""
|
||||||
target_values = ["投标","投标文件","响应文件"]
|
从公告中提取特定类型的内容。
|
||||||
elif type == 2:
|
|
||||||
# target_values = ["开标", "评标", "定标","磋商程序","中标"]
|
Args:
|
||||||
target_values=["开标", "评标", "定标","评审","成交","合同","磋商程序", "中标", "程序", "步骤"]
|
merged_baseinfo_path (str): 合并后的基础信息路径。
|
||||||
elif type == 3:
|
clause_path (str): 包含条款的JSON文件路径。
|
||||||
target_values = ["重新招标、不再招标和终止招标","重新招标","重新采购", "不再招标", "不再采购","终止招标","终止采购"]
|
type (int): 提取的类型。
|
||||||
elif type == 4:
|
1 - ["投标", "投标文件", "响应文件"]
|
||||||
target_values = ["评标"] # 测试
|
2 - ["开标", "评标", "定标", "评审", "成交", "合同", "磋商程序", "中标", "程序", "步骤"]
|
||||||
else:
|
3 - ["重新招标、不再招标和终止招标", "重新招标", "重新采购", "不再招标", "不再采购", "终止招标", "终止采购"]
|
||||||
raise ValueError(
|
4 - ["评标"] # 测试
|
||||||
"Invalid type specified. Use 1 for '投标文件, 投标' or 2 for '开标, 评标, 定标'or 3 for '重新招标'")
|
|
||||||
|
Returns:
|
||||||
|
dict 或 str: 提取并处理后的数据,或在 `clause_path` 为空或发生错误时返回空字符串 `""`。
|
||||||
|
"""
|
||||||
|
# 定义默认的返回结果
|
||||||
|
DEFAULT_RESULT = ""
|
||||||
|
|
||||||
|
# 映射 type 到 target_values
|
||||||
|
type_target_map = {
|
||||||
|
1: ["投标", "投标文件", "响应文件"],
|
||||||
|
2: ["开标", "评标", "定标", "评审", "成交", "合同", "磋商程序", "中标", "程序", "步骤"],
|
||||||
|
3: ["重新招标、不再招标和终止招标", "重新招标", "重新采购", "不再招标", "不再采购", "终止招标", "终止采购"],
|
||||||
|
4: ["评标"] # 测试
|
||||||
|
}
|
||||||
|
|
||||||
|
# 获取对应 type 的 target_values
|
||||||
|
target_values = type_target_map.get(type)
|
||||||
|
if not target_values:
|
||||||
|
print(f"Error: Invalid type specified: {type}. Use 1, 2, 3, or 4.")
|
||||||
|
return DEFAULT_RESULT
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 检查 clause_path 是否为空或仅包含空白字符
|
||||||
|
if clause_path and clause_path.strip():
|
||||||
with open(clause_path, 'r', encoding='utf-8') as file:
|
with open(clause_path, 'r', encoding='utf-8') as file:
|
||||||
data = json.load(file)
|
data = json.load(file)
|
||||||
extracted_data = extract_between_sections(data, target_values) #先使用大章节'二、投标文件'这种筛选
|
|
||||||
|
# 先尝试使用大章节筛选
|
||||||
|
extracted_data = extract_between_sections(data, target_values)
|
||||||
|
|
||||||
if not extracted_data:
|
if not extracted_data:
|
||||||
extracted_data = extract_json(data, target_values) # 若没有,再使用'3.投标文件' 筛选
|
# 如果大章节筛选失败,尝试使用另一种筛选方法
|
||||||
|
extracted_data = extract_json(data, target_values)
|
||||||
|
|
||||||
if not extracted_data:
|
if not extracted_data:
|
||||||
final_result = get_requirements_with_gpt(merged_baseinfo_path, type) # 万一都没,那就调用大模型
|
# 如果所有筛选方法均失败,调用回退函数
|
||||||
return final_result
|
final_result = get_requirements_with_gpt(merged_baseinfo_path, type)
|
||||||
final_result=extract_sections(extracted_data,target_values) #后处理,生成键名
|
|
||||||
return final_result
|
return final_result
|
||||||
else:
|
else:
|
||||||
|
# 后处理,生成键名
|
||||||
|
final_result = extract_sections(extracted_data, target_values)
|
||||||
|
return final_result
|
||||||
|
else:
|
||||||
|
# 合并键值对,启用结构化
|
||||||
extracted_data_concatenated = {
|
extracted_data_concatenated = {
|
||||||
section: concatenate_keys_values(content)
|
section: concatenate_keys_values(content)
|
||||||
for section, content in extracted_data.items()
|
for section, content in extracted_data.items()
|
||||||
}
|
}
|
||||||
return extracted_data_concatenated
|
return extracted_data_concatenated
|
||||||
|
else:
|
||||||
|
# 如果 clause_path 为空,直接调用回退函数
|
||||||
|
final_result = get_requirements_with_gpt(merged_baseinfo_path, type)
|
||||||
|
return final_result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error occurred while processing clause_path '{clause_path}': {e}")
|
||||||
|
return DEFAULT_RESULT
|
||||||
# print(json.dumps(res, ensure_ascii=False, indent=4))
|
# print(json.dumps(res, ensure_ascii=False, indent=4))
|
||||||
# sorted_data = sort_clean_data_keys(extracted_data) # 对输入的字典 data 的键进行预处理和排序
|
# sorted_data = sort_clean_data_keys(extracted_data) # 对输入的字典 data 的键进行预处理和排序
|
||||||
# transformed_data = transform_json(sorted_data)
|
# transformed_data = transform_json(sorted_data)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user