2.19 判断是否为招标文件:创建进程池+热启动,解决内存泄漏问题的同时提升速度

This commit is contained in:
zy123 2025-02-19 11:37:22 +08:00
parent 6a2b1c2ffb
commit 4b378b27c2
5 changed files with 36 additions and 13 deletions

View File

@ -66,9 +66,9 @@ def create_logger(app, subfolder):
file_formatter = CSTFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(file_formatter)
logger.addHandler(file_handler)
# stream_handler = logging.StreamHandler()
# stream_handler.setFormatter(logging.Formatter('%(message)s'))
# logger.addHandler(stream_handler)
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(logging.Formatter('%(message)s'))
logger.addHandler(stream_handler)
logger.setLevel(logging.INFO) #Logger 只会处理大于或等于 INFO 级别的日志消息(例如 INFO、WARNING、ERROR、CRITICAL而 DEBUG 级别的消息会被忽略。
logger.propagate = False
g.logger = logger

View File

@ -1,9 +1,10 @@
import multiprocessing
import os
import threading
import time
from enum import Enum
from typing import Any
from flask import Blueprint, g
from flask import Blueprint, g, current_app
from flask_app.general.format_change import download_file
from flask_app.routes.判断是否是招标文件 import judge_zbfile_exec
from flask_app.routes.utils import validate_and_setup_logger, create_response_normal, log_error_unique_id
@ -26,7 +27,13 @@ def judge_zbfile() -> Any: #判断是否是招标文件
unique_id = g.unique_id
result = [None] # 用于存储结果的可变对象
done = threading.Event() # 标志判断是否完成
pool = current_app.process_pool # 使用全局的进程池
def judge_zbfile_exec_sub(file_path):
result = pool.apply(
judge_zbfile_exec, # 你的实际执行函数
args=(file_path,)
)
return result
def wrapper() -> None:
"""
包装整个 judge_zbfile 的函数逻辑
@ -38,7 +45,6 @@ def judge_zbfile() -> Any: #判断是否是招标文件
downloaded_filepath, file_type = download_file(file_url, downloaded_filename,True)
if not downloaded_filepath or file_type == 4:
logger.error("下载地址不存在或不支持的文件类型!")
log_error_unique_id(unique_id, 4)
result[0] = JudgeResult.ERROR
return
@ -46,7 +52,7 @@ def judge_zbfile() -> Any: #判断是否是招标文件
logger.info(f"Local file path: {downloaded_filepath}")
# 调用实际的判断函数
judge_result = judge_zbfile_exec(downloaded_filepath)
judge_result = judge_zbfile_exec_sub(downloaded_filepath)
judge = JudgeResult.YES if judge_result else JudgeResult.NO
end_time = time.time()

View File

@ -44,6 +44,8 @@ def judge_zbfile_exec(file_path):
except Exception:
print("DOCX 文件读取失败,可能为乱码文件")
return False # 解析失败直接返回 False
pre_endtime=time.time()
print(f"judge_zbfile_exec预处理耗时{pre_endtime - start_time:.2f}")
# 使用大模型进行判断
user_query = """该文件是否属于招标文件?如果是的话,请返回'',如果不是的话,返回''。请不要返回其他解释或内容。
以下是常见的招标文件类型

View File

@ -1,8 +1,24 @@
#flask_app/run_serve.py
import threading
import time
import requests
from waitress import serve
from flask_app.start_up import create_app
def warmup_request():
# 等待服务器完全启动,例如等待 1-2 秒
time.sleep(5)
try:
url = "http://127.0.0.1:5000/judge_zbfile"
#url必须为永久地址完成热启动创建进程池
payload = {"file_url": "https://intellbid-open.oss-cn-wuhan-lr.aliyuncs.com/test/094%E5%AE%9A%E7%A8%BF-%E6%B9%96%E5%8C%97%E5%B7%A5%E4%B8%9A%E5%A4%A7%E5%AD%A6%E8%BD%BB%E6%AD%A6%E5%99%A8%E6%A8%A1%E6%8B%9F%E5%B0%84%E5%87%BB%E8%AE%BE%E5%A4%87%E9%87%87%E8%B4%AD%E9%A1%B9%E7%9B%AE%E6%8B%9B%E6%A0%87%E6%96%87%E4%BB%B6.pdf"} # 根据实际情况设置 file_url
headers = {"Content-Type": "application/json"}
response = requests.post(url, json=payload, headers=headers)
print(f"Warm-up 请求发送成功,状态码:{response.status_code}")
except Exception as e:
print(f"Warm-up 请求出错:{e}")
if __name__ == "__main__":
app = create_app()
serve(app, host='0.0.0.0', port=5000)
threading.Thread(target=warmup_request, daemon=True).start()
serve(app, host='0.0.0.0', port=5000)

View File

@ -1,6 +1,6 @@
# flask_app/start_up.py
import gc
import os
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import Pool
from flask import Flask, g
from flask_app.ConnectionLimiter import ConnectionLimiter
@ -13,11 +13,10 @@ from flask_app.routes.test_zbparse import test_zbparse_bp
from flask_app.general.llm.清除file_id import delete_file_by_ids,read_file_ids
from flask_app.routes.judge_zbfile import judge_zbfile_bp
from flask_app.routes.test_preprocess import test_process_bp
def create_app():
# 创建全局日志记录器
app = Flask(__name__)
app.process_pool = Pool(processes=10, maxtasksperchild=3)
app.global_logger = create_logger_main('model_log') # 全局日志记录器
# 注册蓝图
app.register_blueprint(get_deviation_bp)