2.17 新增子进程处理分段解析,可以解决内存泄漏问题

This commit is contained in:
zy123 2025-02-17 17:12:36 +08:00
parent ab2a9dfdb2
commit c59bfabc9c
2 changed files with 6 additions and 7 deletions

View File

@ -3,7 +3,7 @@ import json
import multiprocessing
import os
import time
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
from docx import Document
@ -239,7 +239,7 @@ def engineering_bid_main(output_folder, file_path, file_type, unique_id):
}
yield json.dumps(error_response, ensure_ascii=False)
return # 停止进一步处理
with concurrent.futures.ThreadPoolExecutor() as executor:
with ProcessPoolExecutor() as executor:
# 立即启动不依赖 knowledge_name 和 index 的任务
futures = {
'base_info': executor.submit(fetch_project_basic_info,processed_data['invalid_deleted_docx'] ,processed_data['merged_baseinfo_path'],processed_data['merged_baseinfo_path_more'],
@ -256,7 +256,7 @@ def engineering_bid_main(output_folder, file_path, file_type, unique_id):
}
# 提前处理这些不依赖的任务,按完成顺序返回
for future in concurrent.futures.as_completed(futures.values()):
for future in as_completed(futures.values()):
key = next(k for k, v in futures.items() if v == future)
try:
result = future.result()

View File

@ -10,8 +10,7 @@ from flask_app.general.通用功能函数 import get_global_logger
from flask_app.货物标.基础信息解析货物标版 import combine_basic_info
from flask_app.general.投标人须知正文提取指定内容 import extract_from_notice
from flask_app.general.截取pdf_main import truncate_pdf_multiple
from concurrent.futures import ThreadPoolExecutor
import concurrent.futures
from concurrent.futures import as_completed, ProcessPoolExecutor
from flask_app.general.投标人须知正文条款提取成json文件 import convert_clause_to_json
from flask_app.general.无效标和废标公共代码 import combine_find_invalid
from flask_app.货物标.资格审查main import combine_qualification_review
@ -242,7 +241,7 @@ def goods_bid_main(output_folder, file_path, file_type, unique_id):
}
yield json.dumps(error_response, ensure_ascii=False)
return # 停止进一步处理
with concurrent.futures.ThreadPoolExecutor() as executor:
with ProcessPoolExecutor() as executor:
# 立即启动不依赖 knowledge_name 和 index 的任务
futures = {
'evaluation_standards': executor.submit(fetch_evaluation_standards,processed_data['invalid_deleted_docx'], #技术评分 商务评分
@ -266,7 +265,7 @@ def goods_bid_main(output_folder, file_path, file_type, unique_id):
}
# 提前处理这些不依赖的任务,按完成顺序返回
for future in concurrent.futures.as_completed(futures.values()): #as_completed哪个先运行结束就先返回
for future in as_completed(futures.values()): #as_completed哪个先运行结束就先返回
key = next(k for k, v in futures.items() if v == future)
try:
result = future.result()