zbparse/flask_app/old_version/基础信息整合_old.py

167 lines
7.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
from flask_app.general.json_utils import clean_json_string
from flask_app.工程标.投标人须知正文提取指定内容工程标 import extract_from_notice
from flask_app.old_version.判断是否分包等_old import judge_whether_main, read_questions_from_judge
from flask_app.general.llm.多线程提问 import read_questions_from_file, multi_threading
from flask_app.general.llm.通义千问long import upload_file
from flask_app.general.通用功能函数 import judge_consortium_bidding
def aggregate_basic_info_engineering(baseinfo_list):
"""
将基础信息列表中的数据进行合并和分类。
参数:
- baseinfo_list (list): 包含多个基础信息的列表。
返回:
- dict: 合并和分类后的基础信息字典。
"""
key_groups = {
"招标人/代理信息": ["招标人", "招标人联系方式", "招标代理机构", "招标代理机构联系方式"],
"项目信息": ["项目名称", "招标编号", "项目概况", "招标范围", "招标控制价", "投标竞争下浮率"],
"关键时间/内容": [
"投标文件递交截止日期",
"投标文件递交方式",
"开标时间",
"开标地点",
"投标人要求澄清招标文件的截止时间",
"投标有效期",
"评标结果公示媒介"
],
"保证金相关": ["质量保证金", "退还投标保证金"],
"其他信息": [
"重新招标、不再招标和终止招标",
"投标费用承担",
"招标代理服务费",
"是否退还投标文件",
]
}
combined_data = {}
relevant_keys_detected = set()
# 合并所有基础信息并收集相关键
for baseinfo in baseinfo_list:
combined_data.update(baseinfo)
relevant_keys_detected.update(baseinfo.keys())
# 动态调整键组
dynamic_key_handling(key_groups, relevant_keys_detected)
# 创建一个副本以存储未分类的项目
unclassified_items = {k: v for k, v in combined_data.items() if k not in [item for sublist in key_groups.values() for item in sublist]}
# 按键组分类并嵌套
for group_name, keys in key_groups.items():
group_data = {key: combined_data.get(key, "未提供") for key in keys}
combined_data[group_name] = group_data
# 从 unclassified_items 中移除已分类的键
for key in keys:
unclassified_items.pop(key, None)
# 将剩余未分类的键值对添加到 "其他信息" 组
combined_data["其他信息"].update(unclassified_items)
# 移除顶层的未分类键值对
for key in list(combined_data.keys()):
if key not in key_groups:
del combined_data[key]
return combined_data
def dynamic_key_handling(key_groups, detected_keys):
# 检查和调整键组配置
for key in detected_keys:
# 处理“保证金相关”组,插到"质量保证金"前
if "保证金" in key:
group = key_groups["保证金相关"]
insert_before = "质量保证金"
if insert_before in group:
index = group.index(insert_before)
if key not in group: # 避免重复插入
group.insert(index, key)
else:
group.append(key) # 如果没有找到特定键,则追加到末尾
elif "联合体" in key:
key_groups["项目信息"].append(key)
elif "分包" in key:
key_groups["项目信息"].append(key)
elif "踏勘现场" in key:
key_groups["其他信息"].append(key)
elif "投标预备会" in key:
key_groups["其他信息"].append(key)
elif "偏离" in key:
key_groups["其他信息"].append(key)
def combine_basic_info(knowledge_name, truncate0, output_folder, clause_path):
"""
综合和处理基础信息,生成最终的基础信息字典。
参数:
- knowledge_name (str): 知识名称。
- truncate0 (str): 文件路径。
- output_folder (str): 输出文件夹路径。
- clause_path (str): 条款路径。
返回:
- dict: 综合后的基础信息。
"""
baseinfo_list = []
baseinfo_file_path = 'flask_app/static/提示词/基本信息工程标.txt'
# baseinfo_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\基本信息工程标.txt'
questions = read_questions_from_file(baseinfo_file_path)
res1 = multi_threading(questions, knowledge_name)
for index, response in res1:
try:
if response and len(response) > 1:
baseinfo_list.append(clean_json_string(response[1]))
else:
print(f"基础信息整合: Warning: Missing or incomplete response data for query index {index}.")
except Exception as e:
print(f"基础信息整合: Error processing response for query index {index}: {e}")
# 判断是否分包、是否需要递交投标保证金等
chosen_numbers, merged = judge_whether_main(truncate0, output_folder)
baseinfo_list.append(merged)
judge_file_path = 'flask_app/static/提示词/是否相关问题.txt'
# judge_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\是否相关问题.txt'
judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers)
judge_consortium = judge_consortium_bidding(baseinfo_list) # 通过招标公告判断是否接受联合体投标
if judge_consortium:
judge_consortium_question = (
"该招标文件对于联合体投标的要求是怎样的请按json格式给我提供信息"
"外层键名为'联合体投标要求',其中有一个嵌套键值对为:\"是否接受联合体投标\":\"\""
)
judge_questions.append(judge_consortium_question)
file_id = upload_file(truncate0)
res2 = multi_threading(judge_questions, "", file_id, 2) # 调用千问-long
if not res2:
print("基础信息整合: multi_threading error!")
else:
for question, response in res2:
baseinfo_list.append(clean_json_string(response))
rebidding_situation = extract_from_notice(clause_path, 3) # "重新招标, 不再招标和终止招标"需从投标人须知正文提取
update_json = rename_outer_key(rebidding_situation, "重新招标、不再招标和终止招标")
baseinfo_list.append(update_json)
aggregated_baseinfo = aggregate_basic_info_engineering(baseinfo_list) # 现在是一个字典
return {"基础信息": aggregated_baseinfo}
if __name__ == "__main__":
knowledge_name = "ztb"
output_folder="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405"
truncate0="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\ztbfile_tobidders_notice_table.pdf"
clause_path="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\clause1.json"
res=combine_basic_info(knowledge_name,truncate0,output_folder,clause_path)
print(json.dumps(res,ensure_ascii=False,indent=4))