import copy import json import time import concurrent.futures from flask_app.general.json_utils import clean_json_string, rename_outer_key from flask_app.general.通用功能函数 import judge_consortium_bidding, process_judge_questions from flask_app.main.投标人须知正文提取指定内容 import extract_from_notice from flask_app.main.判断是否分包等 import read_questions_from_judge, merge_json_to_list from flask_app.general.多线程提问 import read_questions_from_file, multi_threading from flask_app.general.通义千问long import upload_file,qianwen_long def aggregate_basic_info_engineering(baseinfo_list): """ 将基础信息列表中的数据进行合并和分类。 参数: - baseinfo_list (list): 包含多个基础信息的列表。 返回: - dict: 合并和分类后的基础信息字典。 """ key_groups = { "招标人/代理信息": ["招标人", "招标人联系方式", "招标代理机构", "招标代理机构联系方式"], "项目信息": ["项目名称", "招标编号", "项目概况", "招标范围", "招标控制价", "投标竞争下浮率"], "关键时间/内容": [ "投标文件递交截止日期", "投标文件递交方式", "开标时间", "开标地点", "投标人要求澄清招标文件的截止时间", "投标有效期", "评标结果公示媒介" ], "保证金相关": ["退还投标保证金","质量保证金"], "其他信息": [ "重新招标、不再招标和终止招标", "投标费用承担", "招标代理服务费", "是否退还投标文件", ] } combined_data = {} relevant_keys_detected = set() # 合并所有基础信息并收集相关键 for baseinfo in baseinfo_list: combined_data.update(baseinfo) relevant_keys_detected.update(baseinfo.keys()) # 动态调整键组 dynamic_key_handling(key_groups, relevant_keys_detected) # 创建一个副本以存储未分类的项目 unclassified_items = {k: v for k, v in combined_data.items() if k not in [item for sublist in key_groups.values() for item in sublist]} # 按键组分类并嵌套 for group_name, keys in key_groups.items(): group_data = {key: combined_data.get(key, "未提供") for key in keys} combined_data[group_name] = group_data # 从 unclassified_items 中移除已分类的键 for key in keys: unclassified_items.pop(key, None) # 将剩余未分类的键值对添加到 "其他信息" 组 combined_data["其他信息"].update(unclassified_items) # 移除顶层的未分类键值对 for key in list(combined_data.keys()): if key not in key_groups: del combined_data[key] return combined_data def dynamic_key_handling(key_groups, detected_keys): # 检查和调整键组配置 for key in detected_keys: # 处理“保证金相关”组,插到"质量保证金"前 if "保证金" in key: group = key_groups["保证金相关"] insert_before = "退还投标保证金" if insert_before in group: index = group.index(insert_before) if key not in group: # 避免重复插入 group.insert(index, key) else: group.append(key) # 如果没有找到特定键,则追加到末尾 elif "联合体" in key: key_groups["项目信息"].append(key) elif "分包" in key: key_groups["项目信息"].append(key) elif "踏勘现场" in key: key_groups["其他信息"].append(key) elif "投标预备会" in key: key_groups["其他信息"].append(key) elif "偏离" in key: key_groups["其他信息"].append(key) def update_baseinfo_lists(baseinfo_list1, baseinfo_list2): # 创建一个字典,用于存储 baseinfo_list1 中的所有键值对 combined_dict = {} for item in baseinfo_list1: combined_dict.update(item) # 使用 baseinfo_list2 中的信息更新 combined_dict for item in baseinfo_list2: for key, value in item.items(): if key in combined_dict: combined_dict[key] = value # 重新构建 baseinfo_list1,保持原有的结构 updated_list = [] for item in baseinfo_list1: updated_item = {} for key in item: updated_item[key] = combined_dict[key] updated_list.append(updated_item) return updated_list #先不带投标人须知正文,如果是未知,再直接问正文, def process_baseinfo_list(baseinfo_list, tobidders_notice): questions_list = [] for item in baseinfo_list: # print(json.dumps(item, ensure_ascii=False, indent=4)) for key, value in item.items(): if value == "未知" or (isinstance(value, dict) and all(v == "未知" for v in value.values())): question = ( f"根据该招标文件中的信息,{key}的内容是怎样的?请按json格式给我提供信息,键名是'{key}',若存在嵌套信息,嵌套内容键名以文件中对应字段命名(或是你对相应要求的总结),而对应键值需要与原文保持一致。注意:默认情况用普通键值对返回结果即可,键名为{key};若原文中未提及'{key}'相关内容,在键值中填'未知'。" ) questions_list.append(question) if questions_list: file_id = upload_file(tobidders_notice) baseinfo_results = multi_threading(questions_list, "", file_id, 2) return [clean_json_string(res) for _, res in baseinfo_results] if baseinfo_results else [] else: return [] def combine_basic_info(merged_baseinfo_path,merged_baseinfo_path_more, tobidders_notice, clause_path): """ 综合和处理基础信息,生成最终的基础信息字典。 参数: - knowledge_name (str): 知识名称。 - truncate0 (str): 文件路径。 - output_folder (str): 输出文件夹路径。 - clause_path (str): 条款路径。 返回: - dict: 综合后的基础信息。 """ baseinfo_prompt_file_path=r'D:\flask_project\flask_app\static\提示词\基本信息工程标qianwen-long.txt' # baseinfo_prompt_file_path = 'flask_app/static/提示词/基本信息工程标qianwen-long.txt' file_id1 = upload_file(merged_baseinfo_path) questions = read_questions_from_file(baseinfo_prompt_file_path) more_query = "请你根据招标文件信息,回答以下问题:是否组织踏勘现场?是否召开投标预备会?是否允许偏离?是否退还投标文件?是否允许分包? 是否需要递交投标保证金?是否需要提交履约保证金(履约担保)?是否有招标代理服务费?请按json格式给我提供信息,键名分别为'是否组织踏勘现场','是否召开投标预备会','是否允许偏离','是否退还投标文件',是否允许分包','是否递交投标保证金','是否提交履约保证金','是否有招标代理服务费',键值仅限于'是','否','未知',若存在矛盾信息,请回答'未知'。" questions.append(more_query) baseinfo_results = multi_threading(questions, "", file_id1, 2) baseinfo_list1 = [clean_json_string(res) for _, res in baseinfo_results] if baseinfo_results else [] chosen_numbers, merged = merge_json_to_list(baseinfo_list1.pop(),tobidders_notice) baseinfo_list1_copy = copy.deepcopy(baseinfo_list1) baseinfo_list1.append(merged) judge_file_path=r'D:\flask_project\flask_app\static\提示词\是否相关问题qianwen-long.txt' # judge_file_path = 'flask_app/static/提示词/是否相关问题qianwen-long.txt' with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor: # 提交两个任务 future1 = executor.submit(process_judge_questions, judge_file_path, chosen_numbers, merged_baseinfo_path, baseinfo_list1) future2 = executor.submit(process_baseinfo_list, baseinfo_list1_copy, tobidders_notice) #只问tobidders_notice future3 = executor.submit(extract_from_notice, merged_baseinfo_path_more, clause_path, 3) # 新增的多线程任务 # 等待两个任务完成并获取结果 future1.result() # process_judge_questions 直接修改 baseinfo_list1,不需要返回值 baseinfo_list2 = future2.result() rebidding_situation = future3.result() # 获取提取失败的情况 updated_list = update_baseinfo_lists(baseinfo_list1, baseinfo_list2) update_json = rename_outer_key(rebidding_situation, "重新招标、不再招标和终止招标") updated_list.append(update_json) aggregated_baseinfo = aggregate_basic_info_engineering(updated_list) return {"基础信息": aggregated_baseinfo} if __name__ == "__main__": start_time = time.time() merged_baseinfo_path = r"D:\flask_project\flask_app\static\output\output1\6f2010ee-d7cd-4787-a26a-2db8233d179a\ztbfile_merged_baseinfo.pdf" more=r"D:\flask_project\flask_app\static\output\output1\6f2010ee-d7cd-4787-a26a-2db8233d179a\merged_baseinfo_path_more.pdf" # output_folder="C:\\Users\\Administrator\\Desktop\\招标文件\\special_output" tobidders_notice_table = r"D:\flask_project\flask_app\static\output\output1\6f2010ee-d7cd-4787-a26a-2db8233d179a\ztbfile_tobidders_notice_table.pdf" tobidders_notice = r"D:\flask_project\flask_app\static\output\output1\6f2010ee-d7cd-4787-a26a-2db8233d179a\ztbfile_tobidders_notice.pdf" clause_path = r"D:\flask_project\flask_app\static\output\output1\6f2010ee-d7cd-4787-a26a-2db8233d179a\clause1.json" res = combine_basic_info(merged_baseinfo_path,more,tobidders_notice, clause_path) print(json.dumps(res, ensure_ascii=False, indent=4)) end_time = time.time() print("elapsed_time:" + str(end_time - start_time))