2024-10-19 15:33:55 +08:00
|
|
|
|
import copy
|
2024-10-18 16:05:18 +08:00
|
|
|
|
import json
|
2024-10-19 12:53:25 +08:00
|
|
|
|
import time
|
2024-10-19 15:33:55 +08:00
|
|
|
|
import concurrent.futures
|
2024-10-22 10:06:22 +08:00
|
|
|
|
from flask_app.general.json_utils import clean_json_string, rename_outer_key
|
|
|
|
|
from flask_app.general.通用功能函数 import judge_consortium_bidding
|
2024-10-18 16:05:18 +08:00
|
|
|
|
from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice
|
2024-10-22 10:06:22 +08:00
|
|
|
|
from flask_app.main.判断是否分包等 import read_questions_from_judge, merge_json_to_list
|
|
|
|
|
from flask_app.general.多线程提问 import read_questions_from_file, multi_threading
|
2024-10-30 16:56:05 +08:00
|
|
|
|
from flask_app.general.通义千问long import upload_file,qianwen_long
|
2024-10-18 16:05:18 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def aggregate_basic_info_engineering(baseinfo_list):
|
|
|
|
|
"""
|
|
|
|
|
将基础信息列表中的数据进行合并和分类。
|
|
|
|
|
|
|
|
|
|
参数:
|
|
|
|
|
- baseinfo_list (list): 包含多个基础信息的列表。
|
|
|
|
|
|
|
|
|
|
返回:
|
|
|
|
|
- dict: 合并和分类后的基础信息字典。
|
|
|
|
|
"""
|
|
|
|
|
key_groups = {
|
|
|
|
|
"招标人/代理信息": ["招标人", "招标人联系方式", "招标代理机构", "招标代理机构联系方式"],
|
|
|
|
|
"项目信息": ["项目名称", "招标编号", "项目概况", "招标范围", "招标控制价", "投标竞争下浮率"],
|
|
|
|
|
"关键时间/内容": [
|
|
|
|
|
"投标文件递交截止日期",
|
|
|
|
|
"投标文件递交方式",
|
|
|
|
|
"开标时间",
|
|
|
|
|
"开标地点",
|
|
|
|
|
"投标人要求澄清招标文件的截止时间",
|
|
|
|
|
"投标有效期",
|
|
|
|
|
"评标结果公示媒介"
|
|
|
|
|
],
|
|
|
|
|
"保证金相关": ["质量保证金", "退还投标保证金"],
|
|
|
|
|
"其他信息": [
|
|
|
|
|
"重新招标、不再招标和终止招标",
|
|
|
|
|
"投标费用承担",
|
|
|
|
|
"招标代理服务费",
|
|
|
|
|
"是否退还投标文件",
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
combined_data = {}
|
|
|
|
|
relevant_keys_detected = set()
|
|
|
|
|
|
|
|
|
|
# 合并所有基础信息并收集相关键
|
|
|
|
|
for baseinfo in baseinfo_list:
|
|
|
|
|
combined_data.update(baseinfo)
|
|
|
|
|
relevant_keys_detected.update(baseinfo.keys())
|
|
|
|
|
|
|
|
|
|
# 动态调整键组
|
|
|
|
|
dynamic_key_handling(key_groups, relevant_keys_detected)
|
|
|
|
|
|
2024-10-18 18:06:23 +08:00
|
|
|
|
# 创建一个副本以存储未分类的项目
|
2024-10-30 11:11:57 +08:00
|
|
|
|
unclassified_items = {k: v for k, v in combined_data.items() if
|
|
|
|
|
k not in [item for sublist in key_groups.values() for item in sublist]}
|
2024-10-18 18:06:23 +08:00
|
|
|
|
|
2024-10-18 16:05:18 +08:00
|
|
|
|
# 按键组分类并嵌套
|
|
|
|
|
for group_name, keys in key_groups.items():
|
|
|
|
|
group_data = {key: combined_data.get(key, "未提供") for key in keys}
|
|
|
|
|
combined_data[group_name] = group_data
|
2024-10-18 18:06:23 +08:00
|
|
|
|
# 从 unclassified_items 中移除已分类的键
|
2024-10-18 16:05:18 +08:00
|
|
|
|
for key in keys:
|
2024-10-18 18:06:23 +08:00
|
|
|
|
unclassified_items.pop(key, None)
|
|
|
|
|
|
|
|
|
|
# 将剩余未分类的键值对添加到 "其他信息" 组
|
|
|
|
|
combined_data["其他信息"].update(unclassified_items)
|
|
|
|
|
|
|
|
|
|
# 移除顶层的未分类键值对
|
|
|
|
|
for key in list(combined_data.keys()):
|
|
|
|
|
if key not in key_groups:
|
|
|
|
|
del combined_data[key]
|
2024-10-18 16:05:18 +08:00
|
|
|
|
|
|
|
|
|
return combined_data
|
|
|
|
|
|
2024-10-30 11:11:57 +08:00
|
|
|
|
|
2024-10-18 16:05:18 +08:00
|
|
|
|
def dynamic_key_handling(key_groups, detected_keys):
|
|
|
|
|
# 检查和调整键组配置
|
|
|
|
|
for key in detected_keys:
|
|
|
|
|
# 处理“保证金相关”组,插到"质量保证金"前
|
|
|
|
|
if "保证金" in key:
|
|
|
|
|
group = key_groups["保证金相关"]
|
|
|
|
|
insert_before = "质量保证金"
|
|
|
|
|
if insert_before in group:
|
|
|
|
|
index = group.index(insert_before)
|
|
|
|
|
if key not in group: # 避免重复插入
|
|
|
|
|
group.insert(index, key)
|
|
|
|
|
else:
|
|
|
|
|
group.append(key) # 如果没有找到特定键,则追加到末尾
|
|
|
|
|
elif "联合体" in key:
|
|
|
|
|
key_groups["项目信息"].append(key)
|
|
|
|
|
elif "分包" in key:
|
|
|
|
|
key_groups["项目信息"].append(key)
|
|
|
|
|
elif "踏勘现场" in key:
|
|
|
|
|
key_groups["其他信息"].append(key)
|
|
|
|
|
elif "投标预备会" in key:
|
|
|
|
|
key_groups["其他信息"].append(key)
|
|
|
|
|
elif "偏离" in key:
|
|
|
|
|
key_groups["其他信息"].append(key)
|
|
|
|
|
|
|
|
|
|
|
2024-10-19 12:53:25 +08:00
|
|
|
|
def update_baseinfo_lists(baseinfo_list1, baseinfo_list2):
|
|
|
|
|
# 创建一个字典,用于存储 baseinfo_list1 中的所有键值对
|
|
|
|
|
combined_dict = {}
|
|
|
|
|
for item in baseinfo_list1:
|
|
|
|
|
combined_dict.update(item)
|
|
|
|
|
|
|
|
|
|
# 使用 baseinfo_list2 中的信息更新 combined_dict
|
|
|
|
|
for item in baseinfo_list2:
|
|
|
|
|
for key, value in item.items():
|
|
|
|
|
if key in combined_dict:
|
|
|
|
|
combined_dict[key] = value
|
|
|
|
|
|
|
|
|
|
# 重新构建 baseinfo_list1,保持原有的结构
|
|
|
|
|
updated_list = []
|
|
|
|
|
for item in baseinfo_list1:
|
|
|
|
|
updated_item = {}
|
|
|
|
|
for key in item:
|
|
|
|
|
updated_item[key] = combined_dict[key]
|
|
|
|
|
updated_list.append(updated_item)
|
|
|
|
|
|
|
|
|
|
return updated_list
|
|
|
|
|
|
2024-10-30 11:11:57 +08:00
|
|
|
|
|
2024-10-29 20:40:14 +08:00
|
|
|
|
def process_judge_questions(judge_file_path, chosen_numbers, tobidders_notice_table, baseinfo_list1):
|
2024-10-19 12:53:25 +08:00
|
|
|
|
judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers)
|
|
|
|
|
judge_consortium = judge_consortium_bidding(baseinfo_list1)
|
|
|
|
|
if judge_consortium:
|
|
|
|
|
judge_consortium_question = (
|
|
|
|
|
"该招标文件对于联合体投标的要求是怎样的,请按json格式给我提供信息,"
|
|
|
|
|
"外层键名为'联合体投标要求',其中有一个嵌套键值对为:\"是否接受联合体投标\":\"是\""
|
|
|
|
|
)
|
|
|
|
|
judge_questions.append(judge_consortium_question)
|
2024-10-29 20:40:14 +08:00
|
|
|
|
file_id3 = upload_file(tobidders_notice_table)
|
2024-10-19 12:53:25 +08:00
|
|
|
|
res2 = multi_threading(judge_questions, "", file_id3, 2)
|
|
|
|
|
|
|
|
|
|
if not res2:
|
|
|
|
|
print("基础信息整合: multi_threading error!")
|
|
|
|
|
else:
|
|
|
|
|
for question, response in res2:
|
|
|
|
|
baseinfo_list1.append(clean_json_string(response))
|
|
|
|
|
|
2024-10-30 11:11:57 +08:00
|
|
|
|
|
2024-10-29 20:40:14 +08:00
|
|
|
|
def process_baseinfo_list(baseinfo_list, tobidders_notice):
|
2024-10-19 15:33:55 +08:00
|
|
|
|
questions_list = []
|
|
|
|
|
for item in baseinfo_list:
|
|
|
|
|
# print(json.dumps(item, ensure_ascii=False, indent=4))
|
|
|
|
|
for key, value in item.items():
|
|
|
|
|
if value == "未知" or (isinstance(value, dict) and all(v == "未知" for v in value.values())):
|
|
|
|
|
question = (
|
|
|
|
|
f"根据该招标文件中的信息,{key}的内容是怎样的?"
|
|
|
|
|
f"请按json格式给我提供信息,键名是'{key}',"
|
|
|
|
|
f"若存在未知信息,在对应的键值中填'未知'。"
|
|
|
|
|
)
|
|
|
|
|
questions_list.append(question)
|
2024-10-19 12:53:25 +08:00
|
|
|
|
if questions_list:
|
2024-10-29 20:40:14 +08:00
|
|
|
|
file_id = upload_file(tobidders_notice)
|
2024-10-19 12:53:25 +08:00
|
|
|
|
baseinfo_results = multi_threading(questions_list, "", file_id, 2)
|
|
|
|
|
return [clean_json_string(res) for _, res in baseinfo_results] if baseinfo_results else []
|
|
|
|
|
else:
|
|
|
|
|
return []
|
|
|
|
|
|
2024-10-30 11:11:57 +08:00
|
|
|
|
|
2024-10-30 16:56:05 +08:00
|
|
|
|
def combine_basic_info(invalid_path,merged_baseinfo_path, tobidders_notice_table, tobidders_notice, clause_path):
|
2024-10-18 16:05:18 +08:00
|
|
|
|
"""
|
|
|
|
|
综合和处理基础信息,生成最终的基础信息字典。
|
|
|
|
|
|
|
|
|
|
参数:
|
|
|
|
|
- knowledge_name (str): 知识名称。
|
|
|
|
|
- truncate0 (str): 文件路径。
|
|
|
|
|
- output_folder (str): 输出文件夹路径。
|
|
|
|
|
- clause_path (str): 条款路径。
|
|
|
|
|
|
|
|
|
|
返回:
|
|
|
|
|
- dict: 综合后的基础信息。
|
|
|
|
|
"""
|
2024-10-30 16:56:05 +08:00
|
|
|
|
|
2024-10-30 11:11:57 +08:00
|
|
|
|
baseinfo_prompt_file_path = 'flask_app/static/提示词/基本信息工程标qianwen-long.txt'
|
2024-10-21 17:38:02 +08:00
|
|
|
|
# baseinfo_prompt_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\基本信息工程标qianwen-long.txt'
|
2024-10-19 12:53:25 +08:00
|
|
|
|
file_id1 = upload_file(merged_baseinfo_path)
|
2024-10-18 18:06:23 +08:00
|
|
|
|
questions = read_questions_from_file(baseinfo_prompt_file_path)
|
2024-10-19 12:53:25 +08:00
|
|
|
|
# 判断是否分包、是否需要递交投标保证金等
|
|
|
|
|
more_query = "请你根据招标文件信息,回答以下问题:是否组织踏勘现场?是否召开投标预备会?是否允许偏离?是否退还投标文件?是否允许分包? 是否需要递交投标保证金?是否需要提交履约保证金(履约担保)?是否有招标代理服务费?请按json格式给我提供信息,键名分别为'是否组织踏勘现场','是否召开投标预备会','是否允许偏离','是否退还投标文件',是否允许分包','是否递交投标保证金','是否提交履约保证金','是否有招标代理服务费',键值仅限于'是','否','未知',若存在矛盾信息,请回答'未知'。"
|
|
|
|
|
questions.append(more_query)
|
|
|
|
|
baseinfo_results = multi_threading(questions, "", file_id1, 2)
|
2024-10-18 18:06:23 +08:00
|
|
|
|
# 清理 JSON 字符串
|
2024-10-19 12:53:25 +08:00
|
|
|
|
baseinfo_list1 = [clean_json_string(res) for _, res in baseinfo_results] if baseinfo_results else []
|
2024-10-18 16:05:18 +08:00
|
|
|
|
|
2024-10-19 12:53:25 +08:00
|
|
|
|
chosen_numbers, merged = merge_json_to_list(baseinfo_list1.pop())
|
2024-10-19 15:33:55 +08:00
|
|
|
|
baseinfo_list1_copy = copy.deepcopy(baseinfo_list1)
|
2024-10-19 12:53:25 +08:00
|
|
|
|
baseinfo_list1.append(merged)
|
2024-10-21 17:38:02 +08:00
|
|
|
|
judge_file_path = 'flask_app/static/提示词/是否相关问题qianwen-long.txt'
|
|
|
|
|
# judge_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\是否相关问题qianwen-long.txt'
|
2024-10-18 16:05:18 +08:00
|
|
|
|
|
2024-10-19 15:33:55 +08:00
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
|
|
|
|
# 提交两个任务
|
2024-10-30 11:11:57 +08:00
|
|
|
|
future1 = executor.submit(process_judge_questions, judge_file_path, chosen_numbers, tobidders_notice_table,
|
|
|
|
|
baseinfo_list1)
|
2024-10-29 20:40:14 +08:00
|
|
|
|
future2 = executor.submit(process_baseinfo_list, baseinfo_list1_copy, tobidders_notice)
|
2024-10-19 12:53:25 +08:00
|
|
|
|
|
2024-10-19 15:33:55 +08:00
|
|
|
|
# 等待两个任务完成并获取结果
|
|
|
|
|
future1.result() # process_judge_questions 直接修改 baseinfo_list1,不需要返回值
|
|
|
|
|
baseinfo_list2 = future2.result()
|
2024-10-19 12:53:25 +08:00
|
|
|
|
|
2024-10-19 15:33:55 +08:00
|
|
|
|
# 如果需要,合并或处理 baseinfo_list1 和 baseinfo_list2
|
|
|
|
|
updated_list = update_baseinfo_lists(baseinfo_list1, baseinfo_list2)
|
2024-10-18 16:05:18 +08:00
|
|
|
|
|
2024-10-30 16:56:05 +08:00
|
|
|
|
rebidding_situation = extract_from_notice(clause_path, 3) #提取失败的情况
|
|
|
|
|
if rebidding_situation:
|
|
|
|
|
update_json = rename_outer_key(rebidding_situation, "重新招标、不再招标和终止招标")
|
|
|
|
|
else:
|
|
|
|
|
user_query="""
|
|
|
|
|
该招标文件中重新招标、不再招标、终止招标的情况分别是什么?请以json格式返回给我结果,键名分别为'重新招标','不再招标','终止招标',键值应该完全与原文内容保持一致,不得擅自总结删减,如果原文中未提及相关内容,在键值中填'未知'。示例输出如下:
|
|
|
|
|
{
|
|
|
|
|
"重新招标":"有下列情形之一的,招标人将重新招标:(1)投标截止时间止,投标人少于3个的;(2)经评标委员会评审后否决所有投标的;",
|
|
|
|
|
"不再招标":"重新招标后投标人仍少于3个或者所有投标被否决的,属于必须审批或核准的工程建设项目,经原审批或核准部门批准后不再进行招标。",
|
|
|
|
|
"终止招标":"未知"
|
|
|
|
|
}
|
|
|
|
|
"""
|
|
|
|
|
file_id=upload_file(invalid_path)
|
|
|
|
|
res=clean_json_string(qianwen_long(file_id,user_query))
|
|
|
|
|
update_json=rename_outer_key(res,"重新招标、不再招标和终止招标")
|
2024-10-19 12:53:25 +08:00
|
|
|
|
updated_list.append(update_json)
|
2024-10-19 15:33:55 +08:00
|
|
|
|
aggregated_baseinfo = aggregate_basic_info_engineering(updated_list)
|
2024-10-18 16:05:18 +08:00
|
|
|
|
return {"基础信息": aggregated_baseinfo}
|
|
|
|
|
|
2024-10-30 11:11:57 +08:00
|
|
|
|
|
|
|
|
|
# TODO:先不带投标人须知正文,如果是未知,再直接问正文,
|
2024-10-18 16:05:18 +08:00
|
|
|
|
if __name__ == "__main__":
|
2024-10-30 11:11:57 +08:00
|
|
|
|
start_time = time.time()
|
|
|
|
|
merged_baseinfo_path = "C:\\Users\\Administrator\\Desktop\\招标文件\\special_output\\zbtest2_merged_baseinfo.pdf"
|
2024-10-19 12:53:25 +08:00
|
|
|
|
# output_folder="C:\\Users\\Administrator\\Desktop\\招标文件\\special_output"
|
2024-10-30 11:11:57 +08:00
|
|
|
|
tobidders_notice_table = "C:\\Users\\Administrator\\Desktop\\招标文件\\special_output\\zbtest2_tobidders_notice_table.pdf"
|
|
|
|
|
tobidders_notice = "C:\\Users\\Administrator\\Desktop\\招标文件\\special_output\\zbtest2_tobidders_notice.pdf"
|
|
|
|
|
clause_path = "C:\\Users\\Administrator\\Desktop\\招标文件\\special_output\\clause1.json"
|
|
|
|
|
res = combine_basic_info(merged_baseinfo_path, tobidders_notice_table, tobidders_notice, clause_path)
|
|
|
|
|
print(json.dumps(res, ensure_ascii=False, indent=4))
|
|
|
|
|
end_time = time.time()
|
|
|
|
|
print("elapsed_time:" + str(end_time - start_time))
|