2.17 新增子进程处理文件切割,貌似可以解决内存泄漏问题

This commit is contained in:
zy123 2025-02-17 17:00:34 +08:00
parent 157a4d706b
commit ab2a9dfdb2
3 changed files with 24 additions and 11 deletions

View File

@ -7,8 +7,7 @@ import time
from flask_app.ConnectionLimiter import require_execution_timeout from flask_app.ConnectionLimiter import require_execution_timeout
from flask_app.general.format_change import download_file from flask_app.general.format_change import download_file
from flask_app.routes.utils import validate_and_setup_logger from flask_app.routes.utils import validate_and_setup_logger
from flask_app.routes.货物标解析main import preprocess_files from flask_app.routes.货物标解析main import preprocess_files, preprocess_file_main
test_process_bp = Blueprint('test_process', __name__) test_process_bp = Blueprint('test_process', __name__)
@test_process_bp.route('/test_process', methods=['POST']) @test_process_bp.route('/test_process', methods=['POST'])
@ -31,9 +30,7 @@ def process_file():
# print(file_path) # print(file_path)
# 调用预处理函数 # 调用预处理函数
start_time = time.time() start_time = time.time()
with multiprocessing.Pool(processes=1) as pool: result=preprocess_file_main(output_folder, file_path, file_type,logger)
# 调用 apply 或 apply_async 执行子进程任务
result = pool.apply(preprocess_files, args=(output_folder, file_path, 2, logger,))
# 处理结果 # 处理结果
if not result: if not result:

View File

@ -1,5 +1,6 @@
# -*- encoding:utf-8 -*- # -*- encoding:utf-8 -*-
import json import json
import multiprocessing
import os import os
import time import time
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
@ -21,8 +22,15 @@ from flask_app.工程标.资格审查模块main import combine_review_standards
from flask_app.general.商务技术评分提取 import combine_evaluation_standards from flask_app.general.商务技术评分提取 import combine_evaluation_standards
from flask_app.general.format_change import pdf2docx, docx2pdf from flask_app.general.format_change import pdf2docx, docx2pdf
# 创建全局线程池
executor = ThreadPoolExecutor() def preprocess_file_main(output_folder, file_path, file_type,logger):
# 这里是你原本处理请求的地方
with multiprocessing.Pool(processes=1) as pool:
result = pool.apply(
preprocess_files, # 你的实际执行函数
args=(output_folder, file_path, file_type, logger)
)
return result
def preprocess_files(output_folder, file_path, file_type,logger): def preprocess_files(output_folder, file_path, file_type,logger):
logger.info("starting 文件预处理...") logger.info("starting 文件预处理...")
start_time=time.time() start_time=time.time()
@ -221,12 +229,10 @@ def fetch_bid_opening(invalid_deleted_docx, merged_baseinfo_path_more, clause_pa
result = {"开评定标流程": {}} result = {"开评定标流程": {}}
return result return result
#分段返回
def engineering_bid_main(output_folder, file_path, file_type, unique_id): def engineering_bid_main(output_folder, file_path, file_type, unique_id):
logger = get_global_logger(unique_id) logger = get_global_logger(unique_id)
# 预处理文件,获取处理后的数据 # 预处理文件,获取处理后的数据
processed_data = preprocess_files(output_folder, file_path, file_type,logger) processed_data = preprocess_file_main(output_folder, file_path, file_type,logger)
if not processed_data: if not processed_data:
error_response = { error_response = {
'error': '文件预处理失败。请检查文件类型并重试。' 'error': '文件预处理失败。请检查文件类型并重试。'

View File

@ -1,4 +1,5 @@
import json import json
import multiprocessing
import time import time
from docx import Document from docx import Document
from flask_app.general.format_change import docx2pdf, pdf2docx from flask_app.general.format_change import docx2pdf, pdf2docx
@ -15,6 +16,15 @@ from flask_app.general.投标人须知正文条款提取成json文件 import con
from flask_app.general.无效标和废标公共代码 import combine_find_invalid from flask_app.general.无效标和废标公共代码 import combine_find_invalid
from flask_app.货物标.资格审查main import combine_qualification_review from flask_app.货物标.资格审查main import combine_qualification_review
from flask_app.general.商务技术评分提取 import combine_evaluation_standards from flask_app.general.商务技术评分提取 import combine_evaluation_standards
def preprocess_file_main(output_folder, file_path, file_type,logger):
# 这里是你原本处理请求的地方
with multiprocessing.Pool(processes=1) as pool:
result = pool.apply(
preprocess_files, # 你的实际执行函数
args=(output_folder, file_path, file_type, logger)
)
return result
def preprocess_files(output_folder, file_path, file_type,logger): def preprocess_files(output_folder, file_path, file_type,logger):
logger.info("starting 文件预处理...") logger.info("starting 文件预处理...")
start_time = time.time() start_time = time.time()
@ -225,7 +235,7 @@ def post_process_baseinfo(base_info,logger):
def goods_bid_main(output_folder, file_path, file_type, unique_id): def goods_bid_main(output_folder, file_path, file_type, unique_id):
logger = get_global_logger(unique_id) logger = get_global_logger(unique_id)
# 预处理文件,获取处理后的数据 # 预处理文件,获取处理后的数据
processed_data = preprocess_files(output_folder, file_path, file_type,logger) processed_data = preprocess_file_main(output_folder, file_path, file_type,logger)
if not processed_data: if not processed_data:
error_response = { error_response = {
'error': '文件预处理失败。请检查文件类型并重试。' 'error': '文件预处理失败。请检查文件类型并重试。'