2024-09-23 15:49:30 +08:00
|
|
|
|
import concurrent.futures
|
2024-09-13 15:03:55 +08:00
|
|
|
|
import json
|
2024-09-23 15:49:30 +08:00
|
|
|
|
import time
|
2024-11-23 15:38:52 +08:00
|
|
|
|
|
|
|
|
|
from flask_app.general.doubao import pdf2txt
|
2024-10-25 17:50:20 +08:00
|
|
|
|
from flask_app.货物标.技术参数要求提取 import get_technical_requirements
|
2024-10-22 10:06:22 +08:00
|
|
|
|
from flask_app.general.通义千问long import upload_file
|
2024-10-15 21:03:02 +08:00
|
|
|
|
from flask_app.货物标.商务服务其他要求提取 import get_business_requirements
|
2024-10-22 10:06:22 +08:00
|
|
|
|
|
|
|
|
|
|
2024-09-13 15:03:55 +08:00
|
|
|
|
#获取采购清单
|
2024-11-16 16:14:53 +08:00
|
|
|
|
def fetch_procurement_reqs(procurement_path, invalid_path):
|
|
|
|
|
# procurement_docpath = pdf2docx(procurement_path) # 采购需求docx
|
2024-10-17 15:33:58 +08:00
|
|
|
|
# 定义默认的 procurement_reqs 字典
|
|
|
|
|
DEFAULT_PROCUREMENT_REQS = {
|
2024-11-08 15:44:29 +08:00
|
|
|
|
"采购需求": "",
|
2024-10-17 15:33:58 +08:00
|
|
|
|
"技术要求": "",
|
|
|
|
|
"商务要求": "",
|
|
|
|
|
"服务要求": "",
|
|
|
|
|
"其他要求": ""
|
|
|
|
|
}
|
2024-11-08 15:44:29 +08:00
|
|
|
|
|
|
|
|
|
# 如果 procurement_docpath 是空字符串,直接返回包含空字符串的字典
|
2024-11-17 17:27:05 +08:00
|
|
|
|
if not procurement_path:
|
2024-10-17 15:33:58 +08:00
|
|
|
|
return DEFAULT_PROCUREMENT_REQS.copy()
|
2024-11-08 15:44:29 +08:00
|
|
|
|
|
2024-10-17 15:33:58 +08:00
|
|
|
|
try:
|
2024-11-23 15:38:52 +08:00
|
|
|
|
processed_filepath = pdf2txt(procurement_path) # 纯文本提取
|
2024-10-17 15:33:58 +08:00
|
|
|
|
# 使用 ThreadPoolExecutor 并行处理 get_technical_requirements 和 get_business_requirements
|
|
|
|
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
|
|
|
# 提交任务给线程池
|
2024-11-23 15:38:52 +08:00
|
|
|
|
future_technical = executor.submit(get_technical_requirements, procurement_path, invalid_path,processed_filepath)
|
2024-11-08 15:44:29 +08:00
|
|
|
|
time.sleep(0.5) # 保持原有的延时
|
2024-11-23 15:38:52 +08:00
|
|
|
|
future_business = executor.submit(get_business_requirements, procurement_path,processed_filepath)
|
2024-09-23 15:49:30 +08:00
|
|
|
|
|
2024-10-17 15:33:58 +08:00
|
|
|
|
# 获取并行任务的结果
|
|
|
|
|
technical_requirements = future_technical.result()
|
|
|
|
|
business_requirements = future_business.result()
|
2024-09-23 15:49:30 +08:00
|
|
|
|
|
2024-11-08 15:44:29 +08:00
|
|
|
|
# 构建最终的采购需求字典
|
2024-10-17 15:33:58 +08:00
|
|
|
|
procurement_reqs = {
|
2024-11-21 16:22:22 +08:00
|
|
|
|
"采购需求": technical_requirements.get("采购需求", {})
|
2024-10-17 15:33:58 +08:00
|
|
|
|
}
|
|
|
|
|
|
2024-11-08 15:44:29 +08:00
|
|
|
|
# 合并 business_requirements 到 procurement_reqs 中
|
|
|
|
|
# 这样无论 business_requirements 包含什么键(如 "技术要求"、"服务要求" 或 "技术、服务要求"),都将被保留
|
|
|
|
|
procurement_reqs.update(business_requirements)
|
|
|
|
|
|
|
|
|
|
# 如果需要确保所有默认键存在,可以取消下面的注释
|
|
|
|
|
# for key, default_value in DEFAULT_PROCUREMENT_REQS.items():
|
|
|
|
|
# procurement_reqs.setdefault(key, default_value)
|
|
|
|
|
|
2024-10-17 15:33:58 +08:00
|
|
|
|
return procurement_reqs
|
2024-09-23 15:49:30 +08:00
|
|
|
|
|
2024-10-17 15:33:58 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"Error in fetch_procurement_reqs: {e}")
|
|
|
|
|
# 在出错时返回默认的包含空字符串的字典
|
|
|
|
|
return DEFAULT_PROCUREMENT_REQS.copy()
|
2024-09-13 15:03:55 +08:00
|
|
|
|
|
2024-11-23 15:38:52 +08:00
|
|
|
|
|
|
|
|
|
#TODO:技术要求可以在技术参数之后执行,把完整的技术参数输入,问大模型,除了上述内容还有哪些,这样的话把技术标和其他的区分开。
|
2024-09-13 15:03:55 +08:00
|
|
|
|
if __name__ == "__main__":
|
2024-11-08 15:44:29 +08:00
|
|
|
|
start_time=time.time()
|
2024-09-13 15:03:55 +08:00
|
|
|
|
output_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\货物标output"
|
2024-10-15 20:57:58 +08:00
|
|
|
|
# file_path="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\2-招标文件(2020年广水市中小学教师办公电脑系统及多媒体“班班通”设备采购安装项目)_procurement.pdf"
|
2024-11-23 15:38:52 +08:00
|
|
|
|
procurement_path = r"C:\Users\Administrator\Desktop\fsdownload\5901b181-b55f-4107-9f30-c85d607b1fa0\ztbfile_procurement.pdf"
|
|
|
|
|
procurement_docpath=r"C:\Users\Administrator\Desktop\fsdownload\5901b181-b55f-4107-9f30-c85d607b1fa0"
|
2024-11-21 16:22:22 +08:00
|
|
|
|
invalid_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\db79e9e0-830e-442c-8cb6-1d036215f8ff\\ztbfile.pdf"
|
2024-11-16 16:14:53 +08:00
|
|
|
|
res=fetch_procurement_reqs(procurement_path,invalid_path)
|
2024-09-23 15:49:30 +08:00
|
|
|
|
print(json.dumps(res, ensure_ascii=False, indent=4))
|
2024-11-08 15:44:29 +08:00
|
|
|
|
end_time=time.time()
|
|
|
|
|
print("耗时:"+str(end_time-start_time))
|