2024-10-19 15:33:55 +08:00
|
|
|
|
import copy
|
2024-10-18 16:05:18 +08:00
|
|
|
|
import json
|
2024-10-19 12:53:25 +08:00
|
|
|
|
import time
|
2024-10-19 15:33:55 +08:00
|
|
|
|
import concurrent.futures
|
2024-12-12 16:06:20 +08:00
|
|
|
|
from flask_app.general.json_utils import clean_json_string, add_outer_key
|
2025-01-02 15:35:38 +08:00
|
|
|
|
from flask_app.general.通用功能函数 import process_judge_questions, aggregate_basic_info
|
|
|
|
|
from flask_app.general.投标人须知正文提取指定内容 import extract_from_notice
|
2025-02-06 14:39:58 +08:00
|
|
|
|
from flask_app.old_version.判断是否分包等_old import merge_json_to_list
|
|
|
|
|
from flask_app.general.llm.多线程提问 import read_questions_from_file, multi_threading
|
2025-02-12 14:55:35 +08:00
|
|
|
|
from flask_app.general.llm.通义千问long import upload_file
|
2024-10-18 16:05:18 +08:00
|
|
|
|
|
2024-10-19 12:53:25 +08:00
|
|
|
|
def update_baseinfo_lists(baseinfo_list1, baseinfo_list2):
|
|
|
|
|
# 创建一个字典,用于存储 baseinfo_list1 中的所有键值对
|
|
|
|
|
combined_dict = {}
|
|
|
|
|
for item in baseinfo_list1:
|
|
|
|
|
combined_dict.update(item)
|
|
|
|
|
|
|
|
|
|
# 使用 baseinfo_list2 中的信息更新 combined_dict
|
|
|
|
|
for item in baseinfo_list2:
|
|
|
|
|
for key, value in item.items():
|
|
|
|
|
if key in combined_dict:
|
|
|
|
|
combined_dict[key] = value
|
|
|
|
|
|
|
|
|
|
# 重新构建 baseinfo_list1,保持原有的结构
|
|
|
|
|
updated_list = []
|
|
|
|
|
for item in baseinfo_list1:
|
|
|
|
|
updated_item = {}
|
|
|
|
|
for key in item:
|
|
|
|
|
updated_item[key] = combined_dict[key]
|
|
|
|
|
updated_list.append(updated_item)
|
|
|
|
|
|
|
|
|
|
return updated_list
|
|
|
|
|
|
2024-11-12 14:44:57 +08:00
|
|
|
|
#先不带投标人须知正文,如果是未知,再直接问正文,
|
2025-01-03 17:36:23 +08:00
|
|
|
|
def process_baseinfo_list(baseinfo_list, merged_baseinfo_path):
|
2024-10-19 15:33:55 +08:00
|
|
|
|
questions_list = []
|
|
|
|
|
for item in baseinfo_list:
|
|
|
|
|
# print(json.dumps(item, ensure_ascii=False, indent=4))
|
|
|
|
|
for key, value in item.items():
|
|
|
|
|
if value == "未知" or (isinstance(value, dict) and all(v == "未知" for v in value.values())):
|
|
|
|
|
question = (
|
2025-01-03 17:36:23 +08:00
|
|
|
|
f"根据该招标文件中的信息,{key}的内容是怎样的?请按json格式给我提供信息,键名是'{key}',若存在嵌套信息,嵌套内容键名以文件中对应字段命名(或是你对相应要求的总结),而对应键值需要与原文保持一致。注意:默认情况用普通键值对返回结果即可,外层键名为{key};若原文中未提及'{key}'相关内容,在键值中填'未知'。"
|
2024-10-19 15:33:55 +08:00
|
|
|
|
)
|
|
|
|
|
questions_list.append(question)
|
2024-11-12 14:44:57 +08:00
|
|
|
|
|
2024-10-19 12:53:25 +08:00
|
|
|
|
if questions_list:
|
2025-01-03 17:36:23 +08:00
|
|
|
|
file_id = upload_file(merged_baseinfo_path)
|
2024-10-19 12:53:25 +08:00
|
|
|
|
baseinfo_results = multi_threading(questions_list, "", file_id, 2)
|
|
|
|
|
return [clean_json_string(res) for _, res in baseinfo_results] if baseinfo_results else []
|
|
|
|
|
else:
|
|
|
|
|
return []
|
|
|
|
|
|
2025-01-03 17:36:23 +08:00
|
|
|
|
def combine_basic_info(merged_baseinfo_path,merged_baseinfo_path_more, clause_path,invalid_path):
|
2024-10-18 16:05:18 +08:00
|
|
|
|
"""
|
|
|
|
|
综合和处理基础信息,生成最终的基础信息字典。
|
|
|
|
|
|
|
|
|
|
参数:
|
|
|
|
|
- knowledge_name (str): 知识名称。
|
|
|
|
|
- truncate0 (str): 文件路径。
|
|
|
|
|
- output_folder (str): 输出文件夹路径。
|
|
|
|
|
- clause_path (str): 条款路径。
|
|
|
|
|
|
|
|
|
|
返回:
|
|
|
|
|
- dict: 综合后的基础信息。
|
|
|
|
|
"""
|
2024-12-30 17:32:24 +08:00
|
|
|
|
# baseinfo_prompt_file_path=r'D:\flask_project\flask_app\static\提示词\基本信息工程标.txt'
|
|
|
|
|
baseinfo_prompt_file_path = 'flask_app/static/提示词/基本信息工程标.txt'
|
2024-12-17 14:47:19 +08:00
|
|
|
|
file_id = upload_file(merged_baseinfo_path)
|
2024-10-18 18:06:23 +08:00
|
|
|
|
questions = read_questions_from_file(baseinfo_prompt_file_path)
|
2024-12-17 14:47:19 +08:00
|
|
|
|
baseinfo_results = multi_threading(questions, "", file_id, 2)
|
2024-10-19 12:53:25 +08:00
|
|
|
|
baseinfo_list1 = [clean_json_string(res) for _, res in baseinfo_results] if baseinfo_results else []
|
2024-12-30 17:32:24 +08:00
|
|
|
|
chosen_numbers, merged = merge_json_to_list(baseinfo_list1.pop())
|
2024-10-19 15:33:55 +08:00
|
|
|
|
baseinfo_list1_copy = copy.deepcopy(baseinfo_list1)
|
2024-10-19 12:53:25 +08:00
|
|
|
|
baseinfo_list1.append(merged)
|
2024-12-30 17:32:24 +08:00
|
|
|
|
# judge_file_path=r'D:\flask_project\flask_app\static\提示词\是否相关问题.txt'
|
|
|
|
|
judge_file_path = 'flask_app/static/提示词/是否相关问题.txt'
|
2024-10-18 16:05:18 +08:00
|
|
|
|
|
2024-10-30 20:41:19 +08:00
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
|
2024-10-19 15:33:55 +08:00
|
|
|
|
# 提交两个任务
|
2025-01-03 17:36:23 +08:00
|
|
|
|
future1 = executor.submit(process_judge_questions, judge_file_path, chosen_numbers, invalid_path, baseinfo_list1)
|
|
|
|
|
future2 = executor.submit(process_baseinfo_list, baseinfo_list1_copy, merged_baseinfo_path) #未知的内容再问一次
|
2024-11-04 17:13:06 +08:00
|
|
|
|
future3 = executor.submit(extract_from_notice, merged_baseinfo_path_more, clause_path, 3) # 新增的多线程任务
|
2024-10-19 12:53:25 +08:00
|
|
|
|
|
2024-10-19 15:33:55 +08:00
|
|
|
|
# 等待两个任务完成并获取结果
|
|
|
|
|
future1.result() # process_judge_questions 直接修改 baseinfo_list1,不需要返回值
|
|
|
|
|
baseinfo_list2 = future2.result()
|
2024-10-30 20:41:19 +08:00
|
|
|
|
rebidding_situation = future3.result() # 获取提取失败的情况
|
2024-10-19 12:53:25 +08:00
|
|
|
|
|
2025-01-03 17:36:23 +08:00
|
|
|
|
updated_baseinfo_list = update_baseinfo_lists(baseinfo_list1, baseinfo_list2)
|
2024-12-12 16:06:20 +08:00
|
|
|
|
update_json = add_outer_key(rebidding_situation, "重新招标、不再招标和终止招标")
|
2025-01-03 17:36:23 +08:00
|
|
|
|
updated_baseinfo_list.append(update_json)
|
|
|
|
|
aggregated_baseinfo = aggregate_basic_info(updated_baseinfo_list)
|
2024-10-30 20:41:19 +08:00
|
|
|
|
|
2024-10-18 16:05:18 +08:00
|
|
|
|
return {"基础信息": aggregated_baseinfo}
|
|
|
|
|
|
2024-10-30 11:11:57 +08:00
|
|
|
|
|
2024-10-18 16:05:18 +08:00
|
|
|
|
if __name__ == "__main__":
|
2024-10-30 11:11:57 +08:00
|
|
|
|
start_time = time.time()
|
2025-01-03 17:36:23 +08:00
|
|
|
|
merged_baseinfo_path = r"C:\Users\Administrator\Desktop\工程\test\2022-广东-鹏华基金管理有限公司深圳深业上城办公室装修项目.docx"
|
|
|
|
|
more=r"C:\Users\Administrator\Desktop\工程\test\2022-广东-鹏华基金管理有限公司深圳深业上城办公室装修项目.docx"
|
2024-10-19 12:53:25 +08:00
|
|
|
|
# output_folder="C:\\Users\\Administrator\\Desktop\\招标文件\\special_output"
|
2024-11-12 14:44:57 +08:00
|
|
|
|
tobidders_notice_table = r"D:\flask_project\flask_app\static\output\output1\6f2010ee-d7cd-4787-a26a-2db8233d179a\ztbfile_tobidders_notice_table.pdf"
|
2025-01-03 17:36:23 +08:00
|
|
|
|
# tobidders_notice = r"D:\flask_project\flask_app\static\output\output1\6f2010ee-d7cd-4787-a26a-2db8233d179a\ztbfile_tobidders_notice.pdf"
|
|
|
|
|
clause_path = ""
|
|
|
|
|
invalid_path=r"C:\Users\Administrator\Desktop\工程\test\2022-广东-鹏华基金管理有限公司深圳深业上城办公室装修项目.docx"
|
|
|
|
|
res = combine_basic_info(merged_baseinfo_path,more, clause_path,invalid_path)
|
2024-10-30 11:11:57 +08:00
|
|
|
|
print(json.dumps(res, ensure_ascii=False, indent=4))
|
|
|
|
|
end_time = time.time()
|
|
|
|
|
print("elapsed_time:" + str(end_time - start_time))
|