172 lines
8.2 KiB
Python
172 lines
8.2 KiB
Python
# -*- encoding:utf-8 -*-
|
||
import json
|
||
import threading
|
||
import time
|
||
|
||
from flask_app.main.json_utils import clean_json_string
|
||
from flask_app.main.基础信息整合 import judge_consortium_bidding
|
||
from flask_app.main.多线程提问 import read_questions_from_file, multi_threading
|
||
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||
from flask_app.main.判断是否分包等 import merge_json_to_list, read_questions_from_judge
|
||
from flask_app.货物标.提取采购需求main import fetch_procurement_reqs
|
||
|
||
|
||
def aggregate_basic_info(baseinfo_list):
|
||
"""
|
||
将基础信息列表中的数据进行合并和分类。
|
||
|
||
参数:
|
||
- baseinfo_list (list): 包含多个基础信息的列表。
|
||
|
||
返回:
|
||
- dict: 合并和分类后的基础信息字典。
|
||
"""
|
||
key_groups = {
|
||
"招标人/代理信息": ["招标人", "招标人联系方式", "招标代理机构", "招标代理机构联系方式","项目联系方式"],
|
||
"项目信息": ["项目名称", "项目编号", "项目概况", "项目基本情况", "招标控制价", "投标竞争下浮率"],
|
||
"采购要求": ["技术要求","商务要求","服务要求","其他要求"],
|
||
"关键时间/内容": [
|
||
"投标文件递交截止日期",
|
||
"开标时间",
|
||
"开标地点",
|
||
"澄清招标文件的截止时间",
|
||
"投标有效期",
|
||
"信息公示媒介"
|
||
],
|
||
"保证金相关": ["质量保证金"],
|
||
"其他信息": [
|
||
"投标费用承担",
|
||
"招标代理服务费",
|
||
"是否退还投标文件"
|
||
]
|
||
}
|
||
|
||
combined_data = {}
|
||
relevant_keys_detected = set()
|
||
|
||
# 合并所有基础信息并收集相关键
|
||
for baseinfo in baseinfo_list:
|
||
combined_data.update(baseinfo)
|
||
relevant_keys_detected.update(baseinfo.keys())
|
||
|
||
# 动态调整键组
|
||
dynamic_key_handling(key_groups, relevant_keys_detected)
|
||
|
||
# 按键组分类并嵌套
|
||
for group_name, keys in key_groups.items():
|
||
group_data = {key: combined_data.get(key, "未提供") for key in keys}
|
||
combined_data[group_name] = group_data
|
||
# Optionally remove original keys to avoid duplication
|
||
for key in keys:
|
||
combined_data.pop(key, None)
|
||
|
||
return combined_data
|
||
|
||
def dynamic_key_handling(key_groups, detected_keys):
|
||
# 检查和调整键组配置
|
||
for key in detected_keys:
|
||
# 处理“保证金相关”组
|
||
if "保证金" in key:
|
||
group = key_groups["保证金相关"]
|
||
insert_before = "质量保证金"
|
||
if insert_before in group:
|
||
index = group.index(insert_before)
|
||
if key not in group: # 避免重复插入
|
||
group.insert(index, key)
|
||
else:
|
||
group.append(key) # 如果没有找到特定键,则追加到末尾
|
||
elif "联合体" in key:
|
||
key_groups["项目信息"].append(key)
|
||
elif "分包" in key:
|
||
key_groups["项目信息"].append(key)
|
||
elif "踏勘现场" in key:
|
||
key_groups["其他信息"].append(key)
|
||
elif "投标预备会" in key or "投标答疑会" in key:
|
||
key_groups["其他信息"].append(key)
|
||
elif "偏离" in key:
|
||
key_groups["其他信息"].append(key)
|
||
elif "递交方式" in key or "递交地点" in key:
|
||
group = key_groups["关键时间/内容"]
|
||
insert_after = "投标文件递交截止日期"
|
||
if insert_after in group:
|
||
index = group.index(insert_after)
|
||
# 确保新键不重复
|
||
if key not in group:
|
||
group.insert(index + 1, key)
|
||
else:
|
||
# 如果“投标文件递交截止日期”不存在,则追加到末尾
|
||
if key not in group:
|
||
group.append(key)
|
||
|
||
def get_base_info(baseinfo_file_path):
|
||
file_id = upload_file(baseinfo_file_path)
|
||
baseinfo_file_path='flask_app/static/提示词/基本信息货物标.txt'
|
||
# baseinfo_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\基本信息货物标.txt'
|
||
questions = read_questions_from_file(baseinfo_file_path)
|
||
more_query = "请你根据招标文件信息,回答以下问题:是否组织踏勘现场?是否召开投标预备会(或投标答疑会)?是否退还投标文件?是否允许分包? 是否需要递交投标保证金(或磋商保证金)?是否需要提交履约保证金(或履约担保)?是否有招标代理服务费(或中标、成交服务费)?请按json格式给我提供信息,键名分别为'是否组织踏勘现场','是否召开投标预备会'(或'是否召开投标答疑会'),'是否退还投标文件',是否允许分包','是否递交投标保证金'(或'是否递交磋商保证金'),'是否提交履约保证金','是否有招标代理服务费',键值仅限于'是','否','未知',若存在矛盾信息,请回答'未知'。"
|
||
questions.append(more_query)
|
||
baseinfo_results = multi_threading(questions, "", file_id, 2) # 1代表使用百炼rag 2代表使用qianwen-long
|
||
baseinfo_list = [res for _, res in baseinfo_results] if baseinfo_results else []
|
||
chosen_numbers, merged = merge_json_to_list(clean_json_string(baseinfo_list.pop()))
|
||
baseinfo_list.append(merged)
|
||
|
||
judge_file_path = 'flask_app/static/提示词/是否相关问题货物标.txt'
|
||
# judge_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\是否相关问题货物标.txt'
|
||
judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers)
|
||
# print(judge_questions)
|
||
judge_consortium = judge_consortium_bidding(baseinfo_list) # 通过招标公告判断是否接受联合体投标
|
||
|
||
if judge_consortium:
|
||
judge_consortium_question = (
|
||
"该招标文件对于联合体投标的要求是怎样的,请按json格式给我提供信息,"
|
||
"外层键名为'联合体投标要求',其中有一个嵌套键值对为:\"是否接受联合体投标\":\"是\""
|
||
)
|
||
judge_questions.append(judge_consortium_question)
|
||
res2 = multi_threading(judge_questions, "", file_id, 2) # 调用千问-long
|
||
if not res2:
|
||
print("基础信息整合: multi_threading error!")
|
||
else:
|
||
for question, response in res2:
|
||
baseinfo_list.append(clean_json_string(response))
|
||
return baseinfo_list
|
||
|
||
def combine_basic_info(baseinfo_file_path, procurement_file_path):
|
||
baseinfo_list = []
|
||
temp_list = []
|
||
procurement_reqs = {}
|
||
# 定义一个线程函数来获取基础信息
|
||
def get_base_info_thread():
|
||
nonlocal temp_list
|
||
temp_list = get_base_info(baseinfo_file_path)
|
||
# 定义一个线程函数来获取采购需求
|
||
def fetch_procurement_reqs_thread():
|
||
nonlocal procurement_reqs
|
||
procurement_reqs = fetch_procurement_reqs(procurement_file_path)
|
||
# 创建并启动获取基础信息的线程
|
||
thread1 = threading.Thread(target=get_base_info_thread)
|
||
thread1.start()
|
||
# 等待一秒后启动获取采购需求的线程
|
||
time.sleep(1)
|
||
thread2 = threading.Thread(target=fetch_procurement_reqs_thread)
|
||
thread2.start()
|
||
# 等待两个线程都完成
|
||
thread1.join()
|
||
thread2.join()
|
||
# 合并结果
|
||
baseinfo_list += temp_list # temp_list 是一个列表
|
||
baseinfo_list.append(procurement_reqs) # procurement_reqs 是一个字典
|
||
aggregated_baseinfo = aggregate_basic_info(baseinfo_list)
|
||
|
||
return {"基础信息": aggregated_baseinfo}
|
||
|
||
|
||
if __name__ == "__main__":
|
||
start_time=time.time()
|
||
baseinfo_file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\truncate_all\\ztbfile_merged_baseinfo\\ztbfile_merged_baseinfo_3-31.pdf"
|
||
# procurement_file_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\b4601ea1-f087-4fa2-88ae-336ad4d8e1e9\\tmp\\ztbfile_procurement.pdf"
|
||
procurement_file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zboutpub\\广水农商行门禁控制主机及基础验证设备采购项目——磋商文件(定稿)(三次)_procurement.pdf"
|
||
res = combine_basic_info(baseinfo_file_path, procurement_file_path)
|
||
print(json.dumps(res, ensure_ascii=False, indent=4))
|
||
end_time=time.time()
|
||
print("elasped time:"+str(end_time-start_time))
|