zbparse/flask_app/main/基础信息整合.py
2024-10-16 20:18:55 +08:00

174 lines
7.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
from flask_app.main.json_utils import clean_json_string, nest_json_under_key,rename_outer_key
from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
from flask_app.main.判断是否分包等 import judge_whether_main, read_questions_from_judge
from flask_app.main.多线程提问 import read_questions_from_file, multi_threading
from flask_app.main.通义千问long import upload_file
def aggregate_basic_info(baseinfo_list):
"""
将基础信息列表中的数据进行合并和分类。
参数:
- baseinfo_list (list): 包含多个基础信息的列表。
返回:
- dict: 合并和分类后的基础信息字典。
"""
key_groups = {
"招标人/代理信息": ["招标人", "招标人联系方式", "招标代理机构", "招标代理机构联系方式"],
"项目信息": ["项目名称", "招标编号", "项目概况", "招标范围", "招标控制价", "投标竞争下浮率"],
"关键时间/内容": [
"投标文件递交截止日期",
"投标文件递交方式",
"开标时间",
"开标地点",
"投标人要求澄清招标文件的截止时间",
"投标有效期",
"评标结果公示媒介"
],
"保证金相关": ["质量保证金", "退还投标保证金"],
"其他信息": [
"重新招标、不再招标和终止招标",
"投标费用承担",
"招标代理服务费",
"是否退还投标文件",
]
}
combined_data = {}
relevant_keys_detected = set()
# 合并所有基础信息并收集相关键
for baseinfo in baseinfo_list:
# json_data = clean_json_string(baseinfo)
combined_data.update(baseinfo)
relevant_keys_detected.update(baseinfo.keys())
# 动态调整键组
dynamic_key_handling(key_groups, relevant_keys_detected)
# 按键组分类并嵌套
for group_name, keys in key_groups.items():
group_data = {key: combined_data.get(key, "未提供") for key in keys}
combined_data[group_name] = group_data
# Optionally remove original keys to avoid duplication
for key in keys:
combined_data.pop(key, None)
return combined_data
def dynamic_key_handling(key_groups, detected_keys):
# 检查和调整键组配置
for key in detected_keys:
# 处理“保证金相关”组,插到"质量保证金"前
if "保证金" in key:
group = key_groups["保证金相关"]
insert_before = "质量保证金"
if insert_before in group:
index = group.index(insert_before)
if key not in group: # 避免重复插入
group.insert(index, key)
else:
group.append(key) # 如果没有找到特定键,则追加到末尾
elif "联合体" in key:
key_groups["项目信息"].append(key)
elif "分包" in key:
key_groups["项目信息"].append(key)
elif "踏勘现场" in key:
key_groups["其他信息"].append(key)
elif "投标预备会" in key:
key_groups["其他信息"].append(key)
elif "偏离" in key:
key_groups["其他信息"].append(key)
def judge_consortium_bidding(baseinfo_list):
updated_list = []
accept_bidding = False
for baseinfo in baseinfo_list:
json_data = clean_json_string(baseinfo)
# 检查 "是否接受联合体投标" 键是否存在且其值为 "是"
if "是否接受联合体投标" in json_data and json_data["是否接受联合体投标"] == "":
accept_bidding = True
# 从字典中移除特定键值对
json_data.pop("是否接受联合体投标", None)
# # 将修改后的 json 数据转换回 JSON 字符串(如果需要)
# updated_info = json.dumps(json_data)
updated_list.append(json_data)
# 更新原始列表,如果你想保留修改
baseinfo_list[:] = updated_list
return accept_bidding
def combine_basic_info(knowledge_name, truncate0, output_folder, clause_path):
"""
综合和处理基础信息,生成最终的基础信息字典。
参数:
- knowledge_name (str): 知识名称。
- truncate0 (str): 文件路径。
- output_folder (str): 输出文件夹路径。
- clause_path (str): 条款路径。
返回:
- dict: 综合后的基础信息。
"""
baseinfo_list = []
baseinfo_file_path = 'flask_app/static/提示词/前两章提问总结.txt'
# baseinfo_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\前两章提问总结.txt'
questions = read_questions_from_file(baseinfo_file_path)
res1 = multi_threading(questions, knowledge_name)
for index, response in res1:
try:
if response and len(response) > 1:
baseinfo_list.append(response[1])
else:
print(f"基础信息整合: Warning: Missing or incomplete response data for query index {index}.")
except Exception as e:
print(f"基础信息整合: Error processing response for query index {index}: {e}")
# 判断是否分包、是否需要递交投标保证金等
chosen_numbers, merged = judge_whether_main(truncate0, output_folder)
baseinfo_list.append(merged)
judge_file_path = 'flask_app/static/提示词/是否相关问题.txt'
# judge_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\是否相关问题.txt'
judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers)
judge_consortium = judge_consortium_bidding(baseinfo_list) # 通过招标公告判断是否接受联合体投标
if judge_consortium:
judge_consortium_question = (
"该招标文件对于联合体投标的要求是怎样的请按json格式给我提供信息"
"外层键名为'联合体投标要求',其中有一个嵌套键值对为:\"是否接受联合体投标\":\"\""
)
judge_questions.append(judge_consortium_question)
file_id = upload_file(truncate0)
res2 = multi_threading(judge_questions, "", file_id, 2) # 调用千问-long
if not res2:
print("基础信息整合: multi_threading error!")
else:
for question, response in res2:
baseinfo_list.append(clean_json_string(response))
rebidding_situation = extract_from_notice(clause_path, 3) # "重新招标, 不再招标和终止招标"需从投标人须知正文提取
update_json = rename_outer_key(rebidding_situation, "重新招标、不再招标和终止招标")
baseinfo_list.append(update_json)
aggregated_baseinfo = aggregate_basic_info(baseinfo_list) # 现在是一个字典
return {"基础信息": aggregated_baseinfo}
if __name__ == "__main__":
knowledge_name = "ztb"
output_folder="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405"
truncate0="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\ztbfile_tobidders_notice_table.pdf"
clause_path="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\clause1.json"
res=combine_basic_info(knowledge_name,truncate0,output_folder,clause_path)
print(json.dumps(res,ensure_ascii=False,indent=4))