zbparse/flask_app/工程标/基础信息整合工程标.py
2025-01-03 17:36:23 +08:00

110 lines
5.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import copy
import json
import time
import concurrent.futures
from flask_app.general.json_utils import clean_json_string, add_outer_key
from flask_app.general.通用功能函数 import process_judge_questions, aggregate_basic_info
from flask_app.general.投标人须知正文提取指定内容 import extract_from_notice
from flask_app.工程标.判断是否分包等 import merge_json_to_list
from flask_app.general.多线程提问 import read_questions_from_file, multi_threading
from flask_app.general.通义千问long import upload_file
def update_baseinfo_lists(baseinfo_list1, baseinfo_list2):
# 创建一个字典,用于存储 baseinfo_list1 中的所有键值对
combined_dict = {}
for item in baseinfo_list1:
combined_dict.update(item)
# 使用 baseinfo_list2 中的信息更新 combined_dict
for item in baseinfo_list2:
for key, value in item.items():
if key in combined_dict:
combined_dict[key] = value
# 重新构建 baseinfo_list1保持原有的结构
updated_list = []
for item in baseinfo_list1:
updated_item = {}
for key in item:
updated_item[key] = combined_dict[key]
updated_list.append(updated_item)
return updated_list
#先不带投标人须知正文,如果是未知,再直接问正文,
def process_baseinfo_list(baseinfo_list, merged_baseinfo_path):
questions_list = []
for item in baseinfo_list:
# print(json.dumps(item, ensure_ascii=False, indent=4))
for key, value in item.items():
if value == "未知" or (isinstance(value, dict) and all(v == "未知" for v in value.values())):
question = (
f"根据该招标文件中的信息,{key}的内容是怎样的请按json格式给我提供信息键名是'{key}',若存在嵌套信息,嵌套内容键名以文件中对应字段命名(或是你对相应要求的总结),而对应键值需要与原文保持一致。注意:默认情况用普通键值对返回结果即可,外层键名为{key};若原文中未提及'{key}'相关内容,在键值中填'未知'"
)
questions_list.append(question)
if questions_list:
file_id = upload_file(merged_baseinfo_path)
baseinfo_results = multi_threading(questions_list, "", file_id, 2)
return [clean_json_string(res) for _, res in baseinfo_results] if baseinfo_results else []
else:
return []
def combine_basic_info(merged_baseinfo_path,merged_baseinfo_path_more, clause_path,invalid_path):
"""
综合和处理基础信息,生成最终的基础信息字典。
参数:
- knowledge_name (str): 知识名称。
- truncate0 (str): 文件路径。
- output_folder (str): 输出文件夹路径。
- clause_path (str): 条款路径。
返回:
- dict: 综合后的基础信息。
"""
# baseinfo_prompt_file_path=r'D:\flask_project\flask_app\static\提示词\基本信息工程标.txt'
baseinfo_prompt_file_path = 'flask_app/static/提示词/基本信息工程标.txt'
file_id = upload_file(merged_baseinfo_path)
questions = read_questions_from_file(baseinfo_prompt_file_path)
baseinfo_results = multi_threading(questions, "", file_id, 2)
baseinfo_list1 = [clean_json_string(res) for _, res in baseinfo_results] if baseinfo_results else []
chosen_numbers, merged = merge_json_to_list(baseinfo_list1.pop())
baseinfo_list1_copy = copy.deepcopy(baseinfo_list1)
baseinfo_list1.append(merged)
# judge_file_path=r'D:\flask_project\flask_app\static\提示词\是否相关问题.txt'
judge_file_path = 'flask_app/static/提示词/是否相关问题.txt'
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
# 提交两个任务
future1 = executor.submit(process_judge_questions, judge_file_path, chosen_numbers, invalid_path, baseinfo_list1)
future2 = executor.submit(process_baseinfo_list, baseinfo_list1_copy, merged_baseinfo_path) #未知的内容再问一次
future3 = executor.submit(extract_from_notice, merged_baseinfo_path_more, clause_path, 3) # 新增的多线程任务
# 等待两个任务完成并获取结果
future1.result() # process_judge_questions 直接修改 baseinfo_list1不需要返回值
baseinfo_list2 = future2.result()
rebidding_situation = future3.result() # 获取提取失败的情况
updated_baseinfo_list = update_baseinfo_lists(baseinfo_list1, baseinfo_list2)
update_json = add_outer_key(rebidding_situation, "重新招标、不再招标和终止招标")
updated_baseinfo_list.append(update_json)
aggregated_baseinfo = aggregate_basic_info(updated_baseinfo_list)
return {"基础信息": aggregated_baseinfo}
if __name__ == "__main__":
start_time = time.time()
merged_baseinfo_path = r"C:\Users\Administrator\Desktop\工程\test\2022-广东-鹏华基金管理有限公司深圳深业上城办公室装修项目.docx"
more=r"C:\Users\Administrator\Desktop\工程\test\2022-广东-鹏华基金管理有限公司深圳深业上城办公室装修项目.docx"
# output_folder="C:\\Users\\Administrator\\Desktop\\招标文件\\special_output"
tobidders_notice_table = r"D:\flask_project\flask_app\static\output\output1\6f2010ee-d7cd-4787-a26a-2db8233d179a\ztbfile_tobidders_notice_table.pdf"
# tobidders_notice = r"D:\flask_project\flask_app\static\output\output1\6f2010ee-d7cd-4787-a26a-2db8233d179a\ztbfile_tobidders_notice.pdf"
clause_path = ""
invalid_path=r"C:\Users\Administrator\Desktop\工程\test\2022-广东-鹏华基金管理有限公司深圳深业上城办公室装修项目.docx"
res = combine_basic_info(merged_baseinfo_path,more, clause_path,invalid_path)
print(json.dumps(res, ensure_ascii=False, indent=4))
end_time = time.time()
print("elapsed_time:" + str(end_time - start_time))