zbparse/flask_app/old_version/基础信息整合_old.py

167 lines
7.1 KiB
Python
Raw Permalink Normal View History

2024-11-28 13:38:17 +08:00
import json
from flask_app.general.json_utils import clean_json_string
2024-12-06 14:40:22 +08:00
from flask_app.工程标.投标人须知正文提取指定内容工程标 import extract_from_notice
from flask_app.old_version.判断是否分包等_old import judge_whether_main, read_questions_from_judge
from flask_app.general.llm.多线程提问 import read_questions_from_file, multi_threading
from flask_app.general.llm.通义千问long import upload_file
2024-11-28 13:38:17 +08:00
from flask_app.general.通用功能函数 import judge_consortium_bidding
def aggregate_basic_info_engineering(baseinfo_list):
"""
将基础信息列表中的数据进行合并和分类
参数
- baseinfo_list (list): 包含多个基础信息的列表
返回
- dict: 合并和分类后的基础信息字典
"""
key_groups = {
"招标人/代理信息": ["招标人", "招标人联系方式", "招标代理机构", "招标代理机构联系方式"],
"项目信息": ["项目名称", "招标编号", "项目概况", "招标范围", "招标控制价", "投标竞争下浮率"],
"关键时间/内容": [
"投标文件递交截止日期",
"投标文件递交方式",
"开标时间",
"开标地点",
"投标人要求澄清招标文件的截止时间",
"投标有效期",
"评标结果公示媒介"
],
"保证金相关": ["质量保证金", "退还投标保证金"],
"其他信息": [
"重新招标、不再招标和终止招标",
"投标费用承担",
"招标代理服务费",
"是否退还投标文件",
]
}
combined_data = {}
relevant_keys_detected = set()
# 合并所有基础信息并收集相关键
for baseinfo in baseinfo_list:
combined_data.update(baseinfo)
relevant_keys_detected.update(baseinfo.keys())
# 动态调整键组
dynamic_key_handling(key_groups, relevant_keys_detected)
# 创建一个副本以存储未分类的项目
unclassified_items = {k: v for k, v in combined_data.items() if k not in [item for sublist in key_groups.values() for item in sublist]}
# 按键组分类并嵌套
for group_name, keys in key_groups.items():
group_data = {key: combined_data.get(key, "未提供") for key in keys}
combined_data[group_name] = group_data
# 从 unclassified_items 中移除已分类的键
for key in keys:
unclassified_items.pop(key, None)
# 将剩余未分类的键值对添加到 "其他信息" 组
combined_data["其他信息"].update(unclassified_items)
# 移除顶层的未分类键值对
for key in list(combined_data.keys()):
if key not in key_groups:
del combined_data[key]
return combined_data
def dynamic_key_handling(key_groups, detected_keys):
# 检查和调整键组配置
for key in detected_keys:
# 处理“保证金相关”组,插到"质量保证金"前
if "保证金" in key:
group = key_groups["保证金相关"]
insert_before = "质量保证金"
if insert_before in group:
index = group.index(insert_before)
if key not in group: # 避免重复插入
group.insert(index, key)
else:
group.append(key) # 如果没有找到特定键,则追加到末尾
elif "联合体" in key:
key_groups["项目信息"].append(key)
elif "分包" in key:
key_groups["项目信息"].append(key)
elif "踏勘现场" in key:
key_groups["其他信息"].append(key)
elif "投标预备会" in key:
key_groups["其他信息"].append(key)
elif "偏离" in key:
key_groups["其他信息"].append(key)
def combine_basic_info(knowledge_name, truncate0, output_folder, clause_path):
"""
综合和处理基础信息生成最终的基础信息字典
参数
- knowledge_name (str): 知识名称
- truncate0 (str): 文件路径
- output_folder (str): 输出文件夹路径
- clause_path (str): 条款路径
返回
- dict: 综合后的基础信息
"""
baseinfo_list = []
baseinfo_file_path = 'flask_app/static/提示词/基本信息工程标.txt'
# baseinfo_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\基本信息工程标.txt'
questions = read_questions_from_file(baseinfo_file_path)
res1 = multi_threading(questions, knowledge_name)
for index, response in res1:
try:
if response and len(response) > 1:
baseinfo_list.append(clean_json_string(response[1]))
else:
print(f"基础信息整合: Warning: Missing or incomplete response data for query index {index}.")
except Exception as e:
print(f"基础信息整合: Error processing response for query index {index}: {e}")
# 判断是否分包、是否需要递交投标保证金等
chosen_numbers, merged = judge_whether_main(truncate0, output_folder)
baseinfo_list.append(merged)
judge_file_path = 'flask_app/static/提示词/是否相关问题.txt'
# judge_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\是否相关问题.txt'
judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers)
judge_consortium = judge_consortium_bidding(baseinfo_list) # 通过招标公告判断是否接受联合体投标
if judge_consortium:
judge_consortium_question = (
"该招标文件对于联合体投标的要求是怎样的请按json格式给我提供信息"
"外层键名为'联合体投标要求',其中有一个嵌套键值对为:\"是否接受联合体投标\":\"\""
)
judge_questions.append(judge_consortium_question)
file_id = upload_file(truncate0)
res2 = multi_threading(judge_questions, "", file_id, 2) # 调用千问-long
if not res2:
print("基础信息整合: multi_threading error!")
else:
for question, response in res2:
baseinfo_list.append(clean_json_string(response))
rebidding_situation = extract_from_notice(clause_path, 3) # "重新招标, 不再招标和终止招标"需从投标人须知正文提取
update_json = rename_outer_key(rebidding_situation, "重新招标、不再招标和终止招标")
baseinfo_list.append(update_json)
aggregated_baseinfo = aggregate_basic_info_engineering(baseinfo_list) # 现在是一个字典
return {"基础信息": aggregated_baseinfo}
if __name__ == "__main__":
knowledge_name = "ztb"
output_folder="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405"
truncate0="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\ztbfile_tobidders_notice_table.pdf"
clause_path="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\clause1.json"
res=combine_basic_info(knowledge_name,truncate0,output_folder,clause_path)
print(json.dumps(res,ensure_ascii=False,indent=4))