80 lines
4.4 KiB
Python
80 lines
4.4 KiB
Python
# -*- encoding:utf-8 -*-
|
||
import json
|
||
import threading
|
||
import time
|
||
import concurrent.futures
|
||
from flask_app.general.json_utils import clean_json_string, add_outer_key
|
||
from flask_app.general.通用功能函数 import process_judge_questions, aggregate_basic_info
|
||
from flask_app.general.llm.多线程提问 import read_questions_from_file, multi_threading
|
||
from flask_app.general.llm.通义千问long_plus import upload_file
|
||
from flask_app.old_version.判断是否分包等_old import merge_json_to_list
|
||
from flask_app.general.投标人须知正文提取指定内容 import extract_from_notice
|
||
from flask_app.货物标.提取采购需求main import fetch_procurement_reqs
|
||
|
||
|
||
def get_base_info(merged_baseinfo_path,clause_path,invalid_path):
|
||
file_id = upload_file(merged_baseinfo_path)
|
||
baseinfo_file_path='flask_app/static/提示词/基本信息货物标.txt'
|
||
# baseinfo_file_path = r'D:\flask_project\flask_app\static\提示词\基本信息货物标.txt'
|
||
questions = read_questions_from_file(baseinfo_file_path)
|
||
baseinfo_results = multi_threading(questions, "", file_id, 2) # 1代表使用百炼rag 2代表使用qianwen-long
|
||
baseinfo_list = [clean_json_string(res) for _, res in baseinfo_results] if baseinfo_results else []
|
||
chosen_numbers, merged = merge_json_to_list(baseinfo_list.pop()) #取出并处理最后一个回答 '是否xxx'
|
||
baseinfo_list.append(merged)
|
||
judge_file_path = 'flask_app/static/提示词/是否相关问题.txt'
|
||
# judge_file_path =r'D:\flask_project\flask_app\static\提示词\是否相关问题.txt'
|
||
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
||
# 提交两个任务
|
||
future1 = executor.submit(process_judge_questions, judge_file_path, chosen_numbers, invalid_path,baseinfo_list)
|
||
future2 = executor.submit(extract_from_notice, merged_baseinfo_path, clause_path, 3) #重新招标
|
||
|
||
# 等待两个任务完成并获取结果
|
||
future1.result() # process_judge_questions 直接修改 baseinfo_list,不需要返回值
|
||
rebidding_situation = future2.result()
|
||
update_json = add_outer_key(rebidding_situation, "重新招标、不再招标和终止招标")
|
||
baseinfo_list.append(update_json)
|
||
return baseinfo_list
|
||
|
||
def combine_basic_info(merged_baseinfo_path, procurement_path,clause_path,invalid_path):
|
||
baseinfo_list = []
|
||
temp_list = []
|
||
procurement_reqs = {}
|
||
# 定义一个线程函数来获取基础信息
|
||
def get_base_info_thread(): #传统的基础信息提取
|
||
nonlocal temp_list
|
||
temp_list = get_base_info(merged_baseinfo_path,clause_path,invalid_path)
|
||
# 定义一个线程函数来获取采购需求
|
||
def fetch_procurement_reqs_thread(): #采购要求提取
|
||
nonlocal procurement_reqs
|
||
procurement_reqs = fetch_procurement_reqs(procurement_path,invalid_path)
|
||
# 创建并启动获取基础信息的线程
|
||
thread1 = threading.Thread(target=get_base_info_thread)
|
||
thread1.start()
|
||
# 等待一秒后启动获取采购需求的线程
|
||
time.sleep(1)
|
||
thread2 = threading.Thread(target=fetch_procurement_reqs_thread)
|
||
thread2.start()
|
||
# 等待两个线程都完成
|
||
thread1.join()
|
||
thread2.join()
|
||
# 合并结果
|
||
baseinfo_list += temp_list # temp_list 是一个列表
|
||
baseinfo_list.append(procurement_reqs) # procurement_reqs 是一个字典
|
||
aggregated_baseinfo = aggregate_basic_info(baseinfo_list,'goods')
|
||
|
||
return {"基础信息": aggregated_baseinfo}
|
||
|
||
if __name__ == "__main__":
|
||
start_time=time.time()
|
||
# baseinfo_file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\truncate_all\\ztbfile_merged_baseinfo\\ztbfile_merged_baseinfo_3-31.pdf"
|
||
merged_baseinfo_path=r"C:\Users\Administrator\Desktop\fsdownload\0c80edcc-cc86-4d53-8bd4-78a531446760\ztbfile.docx"
|
||
# procurement_file_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\b4601ea1-f087-4fa2-88ae-336ad4d8e1e9\\tmp\\ztbfile_procurement.pdf"
|
||
clause_path=r'D:\flask_project\flask_app\static\output\output1\3783ce68-1839-4449-97e6-cd07749d8664\clause1.json'
|
||
invalid_path=r"C:\Users\Administrator\Desktop\fsdownload\0c80edcc-cc86-4d53-8bd4-78a531446760\ztbfile.docx"
|
||
# res = combine_basic_info(merged_baseinfo_path, procurement_file_path,clause_path)
|
||
res=combine_basic_info(merged_baseinfo_path,"","",invalid_path)
|
||
print("------------------------------------")
|
||
print(json.dumps(res, ensure_ascii=False, indent=4))
|
||
end_time=time.time()
|
||
print("elasped time:"+str(end_time-start_time))
|