2.19 判断是否为招标文件:创建进程池+热启动,解决内存泄漏问题的同时提升速度

This commit is contained in:
zy123 2025-02-19 11:37:22 +08:00
parent 6a2b1c2ffb
commit 4b378b27c2
5 changed files with 36 additions and 13 deletions

View File

@ -66,9 +66,9 @@ def create_logger(app, subfolder):
file_formatter = CSTFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') file_formatter = CSTFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(file_formatter) file_handler.setFormatter(file_formatter)
logger.addHandler(file_handler) logger.addHandler(file_handler)
# stream_handler = logging.StreamHandler() stream_handler = logging.StreamHandler()
# stream_handler.setFormatter(logging.Formatter('%(message)s')) stream_handler.setFormatter(logging.Formatter('%(message)s'))
# logger.addHandler(stream_handler) logger.addHandler(stream_handler)
logger.setLevel(logging.INFO) #Logger 只会处理大于或等于 INFO 级别的日志消息(例如 INFO、WARNING、ERROR、CRITICAL而 DEBUG 级别的消息会被忽略。 logger.setLevel(logging.INFO) #Logger 只会处理大于或等于 INFO 级别的日志消息(例如 INFO、WARNING、ERROR、CRITICAL而 DEBUG 级别的消息会被忽略。
logger.propagate = False logger.propagate = False
g.logger = logger g.logger = logger

View File

@ -1,9 +1,10 @@
import multiprocessing
import os import os
import threading import threading
import time import time
from enum import Enum from enum import Enum
from typing import Any from typing import Any
from flask import Blueprint, g from flask import Blueprint, g, current_app
from flask_app.general.format_change import download_file from flask_app.general.format_change import download_file
from flask_app.routes.判断是否是招标文件 import judge_zbfile_exec from flask_app.routes.判断是否是招标文件 import judge_zbfile_exec
from flask_app.routes.utils import validate_and_setup_logger, create_response_normal, log_error_unique_id from flask_app.routes.utils import validate_and_setup_logger, create_response_normal, log_error_unique_id
@ -26,7 +27,13 @@ def judge_zbfile() -> Any: #判断是否是招标文件
unique_id = g.unique_id unique_id = g.unique_id
result = [None] # 用于存储结果的可变对象 result = [None] # 用于存储结果的可变对象
done = threading.Event() # 标志判断是否完成 done = threading.Event() # 标志判断是否完成
pool = current_app.process_pool # 使用全局的进程池
def judge_zbfile_exec_sub(file_path):
result = pool.apply(
judge_zbfile_exec, # 你的实际执行函数
args=(file_path,)
)
return result
def wrapper() -> None: def wrapper() -> None:
""" """
包装整个 judge_zbfile 的函数逻辑 包装整个 judge_zbfile 的函数逻辑
@ -38,7 +45,6 @@ def judge_zbfile() -> Any: #判断是否是招标文件
downloaded_filepath, file_type = download_file(file_url, downloaded_filename,True) downloaded_filepath, file_type = download_file(file_url, downloaded_filename,True)
if not downloaded_filepath or file_type == 4: if not downloaded_filepath or file_type == 4:
logger.error("下载地址不存在或不支持的文件类型!")
log_error_unique_id(unique_id, 4) log_error_unique_id(unique_id, 4)
result[0] = JudgeResult.ERROR result[0] = JudgeResult.ERROR
return return
@ -46,7 +52,7 @@ def judge_zbfile() -> Any: #判断是否是招标文件
logger.info(f"Local file path: {downloaded_filepath}") logger.info(f"Local file path: {downloaded_filepath}")
# 调用实际的判断函数 # 调用实际的判断函数
judge_result = judge_zbfile_exec(downloaded_filepath) judge_result = judge_zbfile_exec_sub(downloaded_filepath)
judge = JudgeResult.YES if judge_result else JudgeResult.NO judge = JudgeResult.YES if judge_result else JudgeResult.NO
end_time = time.time() end_time = time.time()

View File

@ -44,6 +44,8 @@ def judge_zbfile_exec(file_path):
except Exception: except Exception:
print("DOCX 文件读取失败,可能为乱码文件") print("DOCX 文件读取失败,可能为乱码文件")
return False # 解析失败直接返回 False return False # 解析失败直接返回 False
pre_endtime=time.time()
print(f"judge_zbfile_exec预处理耗时{pre_endtime - start_time:.2f}")
# 使用大模型进行判断 # 使用大模型进行判断
user_query = """该文件是否属于招标文件?如果是的话,请返回'',如果不是的话,返回''。请不要返回其他解释或内容。 user_query = """该文件是否属于招标文件?如果是的话,请返回'',如果不是的话,返回''。请不要返回其他解释或内容。
以下是常见的招标文件类型 以下是常见的招标文件类型

View File

@ -1,8 +1,24 @@
#flask_app/run_serve.py #flask_app/run_serve.py
import threading
import time
import requests
from waitress import serve from waitress import serve
from flask_app.start_up import create_app from flask_app.start_up import create_app
def warmup_request():
# 等待服务器完全启动,例如等待 1-2 秒
time.sleep(5)
try:
url = "http://127.0.0.1:5000/judge_zbfile"
#url必须为永久地址完成热启动创建进程池
payload = {"file_url": "https://intellbid-open.oss-cn-wuhan-lr.aliyuncs.com/test/094%E5%AE%9A%E7%A8%BF-%E6%B9%96%E5%8C%97%E5%B7%A5%E4%B8%9A%E5%A4%A7%E5%AD%A6%E8%BD%BB%E6%AD%A6%E5%99%A8%E6%A8%A1%E6%8B%9F%E5%B0%84%E5%87%BB%E8%AE%BE%E5%A4%87%E9%87%87%E8%B4%AD%E9%A1%B9%E7%9B%AE%E6%8B%9B%E6%A0%87%E6%96%87%E4%BB%B6.pdf"} # 根据实际情况设置 file_url
headers = {"Content-Type": "application/json"}
response = requests.post(url, json=payload, headers=headers)
print(f"Warm-up 请求发送成功,状态码:{response.status_code}")
except Exception as e:
print(f"Warm-up 请求出错:{e}")
if __name__ == "__main__": if __name__ == "__main__":
app = create_app() app = create_app()
threading.Thread(target=warmup_request, daemon=True).start()
serve(app, host='0.0.0.0', port=5000) serve(app, host='0.0.0.0', port=5000)

View File

@ -1,6 +1,6 @@
# flask_app/start_up.py # flask_app/start_up.py
import gc from concurrent.futures import ProcessPoolExecutor
import os from multiprocessing import Pool
from flask import Flask, g from flask import Flask, g
from flask_app.ConnectionLimiter import ConnectionLimiter from flask_app.ConnectionLimiter import ConnectionLimiter
@ -13,11 +13,10 @@ from flask_app.routes.test_zbparse import test_zbparse_bp
from flask_app.general.llm.清除file_id import delete_file_by_ids,read_file_ids from flask_app.general.llm.清除file_id import delete_file_by_ids,read_file_ids
from flask_app.routes.judge_zbfile import judge_zbfile_bp from flask_app.routes.judge_zbfile import judge_zbfile_bp
from flask_app.routes.test_preprocess import test_process_bp from flask_app.routes.test_preprocess import test_process_bp
def create_app(): def create_app():
# 创建全局日志记录器 # 创建全局日志记录器
app = Flask(__name__) app = Flask(__name__)
app.process_pool = Pool(processes=10, maxtasksperchild=3)
app.global_logger = create_logger_main('model_log') # 全局日志记录器 app.global_logger = create_logger_main('model_log') # 全局日志记录器
# 注册蓝图 # 注册蓝图
app.register_blueprint(get_deviation_bp) app.register_blueprint(get_deviation_bp)