# -*- encoding:utf-8 -*- import json import threading import time import concurrent.futures from flask_app.general.json_utils import clean_json_string, rename_outer_key from flask_app.general.通用功能函数 import judge_consortium_bidding, process_judge_questions from flask_app.general.多线程提问 import read_questions_from_file, multi_threading from flask_app.general.通义千问long import upload_file from flask_app.main.判断是否分包等 import merge_json_to_list, read_questions_from_judge from flask_app.货物标.投标人须知正文提取指定内容货物标版 import extract_from_notice from flask_app.货物标.提取采购需求main import fetch_procurement_reqs def aggregate_basic_info_goods(baseinfo_list): # for i in baseinfo_list: # print(json.dumps(i,ensure_ascii=False,indent=4)) """ 将基础信息列表中的数据进行合并和分类。 参数: - baseinfo_list (list): 包含多个基础信息的列表。 返回: - dict: 合并和分类后的基础信息字典。 """ key_groups = { "招标人/代理信息": ["招标人", "招标人联系方式", "招标代理机构", "招标代理机构联系方式","项目联系方式"], "项目信息": ["项目名称", "项目编号", "项目概况", "项目基本情况", "招标控制价", "投标竞争下浮率"], "采购要求": ["技术要求","商务要求","服务要求","其他要求"], "关键时间/内容": [ "投标文件递交截止日期", "开标时间", "开标地点", "澄清招标文件的截止时间", "投标有效期", "信息公示媒介" ], "保证金相关": ["质量保证金"], "其他信息": [ "重新招标、不再招标和终止招标", "投标费用承担", "招标代理服务费", "是否退还投标文件" ] } combined_data = {} relevant_keys_detected = set() # 合并所有基础信息并收集相关键 for baseinfo in baseinfo_list: combined_data.update(baseinfo) relevant_keys_detected.update(baseinfo.keys()) # 动态调整键组 dynamic_key_handling(key_groups, relevant_keys_detected) # 创建一个副本以存储未分类的项目 unclassified_items = {k: v for k, v in combined_data.items() if k not in [item for sublist in key_groups.values() for item in sublist]} # 按键组分类并嵌套 for group_name, keys in key_groups.items(): group_data = {key: combined_data.get(key, "未提供") for key in keys} combined_data[group_name] = group_data # 从 unclassified_items 中移除已分类的键 for key in keys: unclassified_items.pop(key, None) # 将剩余未分类的键值对添加到 "其他信息" 组中 combined_data["其他信息"].update(unclassified_items) # 移除顶层的未分类键值对 for key in list(combined_data.keys()): if key not in key_groups: del combined_data[key] return combined_data def dynamic_key_handling(key_groups, detected_keys): # 检查和调整键组配置 for key in detected_keys: # 处理“保证金相关”组 if "保证金" in key: group = key_groups["保证金相关"] insert_before = "质量保证金" if insert_before in group: index = group.index(insert_before) if key not in group: # 避免重复插入 group.insert(index, key) else: group.append(key) # 如果没有找到特定键,则追加到末尾 elif "联合体" in key: key_groups["项目信息"].append(key) elif "分包" in key: key_groups["项目信息"].append(key) elif "踏勘现场" in key: key_groups["其他信息"].append(key) elif "投标预备会" in key or "投标答疑会" in key: key_groups["其他信息"].append(key) elif "偏离" in key: key_groups["其他信息"].append(key) elif "递交方式" in key or "递交地点" in key: group = key_groups["关键时间/内容"] insert_after = "投标文件递交截止日期" if insert_after in group: index = group.index(insert_after) # 确保新键不重复 if key not in group: group.insert(index + 1, key) else: # 如果“投标文件递交截止日期”不存在,则追加到末尾 if key not in group: group.append(key) def get_base_info(merged_baseinfo_path,clause_path): file_id = upload_file(merged_baseinfo_path) baseinfo_file_path='flask_app/static/提示词/基本信息货物标.txt' # baseinfo_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\基本信息货物标.txt' questions = read_questions_from_file(baseinfo_file_path) more_query = "请你根据招标文件信息,回答以下问题:是否组织踏勘现场?是否召开投标预备会(或投标答疑会)?是否退还投标文件?是否允许分包? 是否需要递交投标保证金(或磋商保证金)?是否需要提交履约保证金(或履约担保)?是否有招标代理服务费(或中标、成交服务费或采购代理服务费)?请按json格式给我提供信息,键名分别为'是否组织踏勘现场','是否召开投标预备会'(或'是否召开投标答疑会'),'是否退还投标文件',是否允许分包','是否递交投标保证金'(或'是否递交磋商保证金'),'是否提交履约保证金','是否有招标代理服务费',键值仅限于'是','否','未知',若存在矛盾信息,请回答'未知'。" questions.append(more_query) baseinfo_results = multi_threading(questions, "", file_id, 2) # 1代表使用百炼rag 2代表使用qianwen-long baseinfo_list = [clean_json_string(res) for _, res in baseinfo_results] if baseinfo_results else [] chosen_numbers, merged = merge_json_to_list(baseinfo_list.pop()) baseinfo_list.append(merged) judge_file_path = 'flask_app/static/提示词/是否相关问题货物标.txt' # judge_file_path ='D:\\flask_project\\flask_app\\static\\提示词\\是否相关问题货物标.txt' with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor: # 提交两个任务 future1 = executor.submit(process_judge_questions, judge_file_path, chosen_numbers, merged_baseinfo_path, baseinfo_list) future2 = executor.submit(extract_from_notice, merged_baseinfo_path, clause_path, 3) # 新增的多线程任务 # 等待两个任务完成并获取结果 future1.result() # process_judge_questions 直接修改 baseinfo_list,不需要返回值 rebidding_situation = future2.result() update_json = rename_outer_key(rebidding_situation, "重新招标、不再招标和终止招标") baseinfo_list.append(update_json) # # judge_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\是否相关问题货物标.txt' # judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers) # # print(judge_questions) # judge_consortium = judge_consortium_bidding(baseinfo_list) # 通过招标公告判断是否接受联合体投标 # # if judge_consortium: # judge_consortium_question = ( # "该招标文件对于联合体投标的要求是怎样的,请按json格式给我提供信息," # "外层键名为'联合体投标要求',其中有一个嵌套键值对为:\"是否接受联合体投标\":\"是\"" # ) # judge_questions.append(judge_consortium_question) # res2 = multi_threading(judge_questions, "", file_id, 2) # 调用千问-long # if not res2: # print("基础信息整合: multi_threading error!") # else: # for question, response in res2: # baseinfo_list.append(clean_json_string(response)) return baseinfo_list def combine_basic_info(merged_baseinfo_path, procurement_path,procurement_docpath,clause_path,invalid_path): baseinfo_list = [] temp_list = [] procurement_reqs = {} # 定义一个线程函数来获取基础信息 def get_base_info_thread(): nonlocal temp_list temp_list = get_base_info(merged_baseinfo_path,clause_path) # 定义一个线程函数来获取采购需求 def fetch_procurement_reqs_thread(): nonlocal procurement_reqs procurement_reqs = fetch_procurement_reqs(procurement_path,procurement_docpath,invalid_path) # 创建并启动获取基础信息的线程 thread1 = threading.Thread(target=get_base_info_thread) thread1.start() # 等待一秒后启动获取采购需求的线程 time.sleep(1) thread2 = threading.Thread(target=fetch_procurement_reqs_thread) thread2.start() # 等待两个线程都完成 thread1.join() thread2.join() # 合并结果 baseinfo_list += temp_list # temp_list 是一个列表 baseinfo_list.append(procurement_reqs) # procurement_reqs 是一个字典 aggregated_baseinfo = aggregate_basic_info_goods(baseinfo_list) return {"基础信息": aggregated_baseinfo} if __name__ == "__main__": start_time=time.time() # baseinfo_file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\truncate_all\\ztbfile_merged_baseinfo\\ztbfile_merged_baseinfo_3-31.pdf" merged_baseinfo_path="D:\\flask_project\\flask_app\\static\\output\\output1\\bf225a5e-16d0-45c8-8c19-54a1a94cf3e2\\ztbfile_merged_baseinfo.pdf" # procurement_file_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\b4601ea1-f087-4fa2-88ae-336ad4d8e1e9\\tmp\\ztbfile_procurement.pdf" procurement_file_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\bf225a5e-16d0-45c8-8c19-54a1a94cf3e2\\ztbfile_procurement.docx" clause_path='D:\\flask_project\\flask_app\\static\\output\\output1\\bf225a5e-16d0-45c8-8c19-54a1a94cf3e2\\clause1.json' res = combine_basic_info(merged_baseinfo_path, procurement_file_path,clause_path) print("------------------------------------") print(json.dumps(res, ensure_ascii=False, indent=4)) end_time=time.time() print("elasped time:"+str(end_time-start_time))