zbparse/flask_app/货物标/基础信息解析货物标版.py

84 lines
5.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- encoding:utf-8 -*-
import json
import threading
import time
import concurrent.futures
from flask_app.general.json_utils import clean_json_string, add_outer_key
from flask_app.general.通用功能函数 import judge_consortium_bidding, process_judge_questions, aggregate_basic_info
from flask_app.general.多线程提问 import read_questions_from_file, multi_threading
from flask_app.general.通义千问long import upload_file
from flask_app.工程标.判断是否分包等 import merge_json_to_list, read_questions_from_judge
from flask_app.货物标.投标人须知正文提取指定内容货物标版 import extract_from_notice
from flask_app.货物标.提取采购需求main import fetch_procurement_reqs
def get_base_info(merged_baseinfo_path,clause_path):
file_id = upload_file(merged_baseinfo_path)
baseinfo_file_path='flask_app/static/提示词/基本信息货物标.txt'
# baseinfo_file_path = r'D:\flask_project\flask_app\static\提示词\基本信息货物标.txt'
questions = read_questions_from_file(baseinfo_file_path)
more_query = "请你根据招标文件信息,回答以下问题:是否组织踏勘现场?是否召开投标预备会(或投标答疑会)?是否退还投标文件?是否允许分包? 是否需要递交投标保证金或磋商保证金是否需要提交履约保证金或履约担保是否有招标代理服务费或中标、成交服务费或采购代理服务费请按json格式给我提供信息键名分别为'是否组织踏勘现场','是否召开投标预备会'(或'是否召开投标答疑会','是否退还投标文件',是否允许分包','是否递交投标保证金'(或'是否递交磋商保证金','是否提交履约保证金','是否有招标代理服务费',键值仅限于'','','未知',若存在矛盾信息,请回答'未知'"
questions.append(more_query)
baseinfo_results = multi_threading(questions, "", file_id, 2) # 1代表使用百炼rag 2代表使用qianwen-long
baseinfo_list = [clean_json_string(res) for _, res in baseinfo_results] if baseinfo_results else []
chosen_numbers, merged = merge_json_to_list(baseinfo_list.pop())
baseinfo_list.append(merged)
judge_file_path = 'flask_app/static/提示词/是否相关问题.txt'
# judge_file_path =r'D:\flask_project\flask_app\static\提示词\是否相关问题.txt'
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
# 提交两个任务
future1 = executor.submit(process_judge_questions, judge_file_path, chosen_numbers, file_id,
baseinfo_list)
future2 = executor.submit(extract_from_notice, merged_baseinfo_path, clause_path, 3) # 新增的多线程任务
# 等待两个任务完成并获取结果
future1.result() # process_judge_questions 直接修改 baseinfo_list不需要返回值
rebidding_situation = future2.result()
update_json = add_outer_key(rebidding_situation, "重新招标、不再招标和终止招标")
baseinfo_list.append(update_json)
return baseinfo_list
def combine_basic_info(merged_baseinfo_path, procurement_path,clause_path,invalid_path):
baseinfo_list = []
temp_list = []
procurement_reqs = {}
# 定义一个线程函数来获取基础信息
def get_base_info_thread():
nonlocal temp_list
temp_list = get_base_info(merged_baseinfo_path,clause_path)
# 定义一个线程函数来获取采购需求
def fetch_procurement_reqs_thread():
nonlocal procurement_reqs
procurement_reqs = fetch_procurement_reqs(procurement_path,invalid_path)
# 创建并启动获取基础信息的线程
thread1 = threading.Thread(target=get_base_info_thread)
thread1.start()
# 等待一秒后启动获取采购需求的线程
time.sleep(1)
thread2 = threading.Thread(target=fetch_procurement_reqs_thread)
thread2.start()
# 等待两个线程都完成
thread1.join()
thread2.join()
# 合并结果
baseinfo_list += temp_list # temp_list 是一个列表
baseinfo_list.append(procurement_reqs) # procurement_reqs 是一个字典
aggregated_baseinfo = aggregate_basic_info(baseinfo_list)
return {"基础信息": aggregated_baseinfo}
if __name__ == "__main__":
start_time=time.time()
# baseinfo_file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\truncate_all\\ztbfile_merged_baseinfo\\ztbfile_merged_baseinfo_3-31.pdf"
merged_baseinfo_path=r"D:\flask_project\flask_app\static\output\output1\eabefc28-142f-4bb5-b1be-e86e43bb87b5\invalid_del.docx"
# procurement_file_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\b4601ea1-f087-4fa2-88ae-336ad4d8e1e9\\tmp\\ztbfile_procurement.pdf"
procurement_file_path = r"D:\flask_project\flask_app\static\output\output1\83ae3e35-9136-4402-a74f-01d7adfcbb73\invalid_added.docx"
clause_path='D:\\flask_project\\flask_app\\static\\output\\output1\\bf225a5e-16d0-45c8-8c19-54a1a94cf3e2\\clause1.json'
# res = combine_basic_info(merged_baseinfo_path, procurement_file_path,clause_path)
res=get_base_info(merged_baseinfo_path,"")
print("------------------------------------")
print(json.dumps(res, ensure_ascii=False, indent=4))
end_time=time.time()
print("elasped time:"+str(end_time-start_time))