11.25 超时设置、连接设置

This commit is contained in:
zy123 2024-11-25 14:38:58 +08:00
parent 557e278b2e
commit 0fc892f819
8 changed files with 111 additions and 51 deletions

View File

@ -1,21 +1,70 @@
# flask_app/ConnectionLimiter.py
import threading import threading
import time
from functools import wraps from functools import wraps
from flask import current_app, jsonify, stream_with_context, Response
class ExecutionTimeoutMonitor:
"""监控请求执行时间,超时后释放信号量"""
def __init__(self, timeout, semaphore):
self.timeout = timeout
self.semaphore = semaphore
self.is_timeout = False
self.thread = threading.Thread(target=self._monitor)
self.thread.daemon = True
def _monitor(self):
"""等待指定时间后标记为超时并释放信号量
目前的超时机制是通过 ExecutionTimeoutMonitor 的标志位来通知生成器超时
但是生成器只有在下一次 yield 时才会检查标志位因此不会在当前执行的代码段中间强行中断
"""
time.sleep(self.timeout)
self.is_timeout = True
self.semaphore.release() # 超时后释放信号量
current_app.logger.error(f"Request execution exceeded {self.timeout} seconds and was terminated.")
def start(self):
self.thread.start()
def require_connection_limit(timeout=900):
"""装饰器:确保路由使用连接限制,并监控请求执行时间"""
def decorator(f):
@wraps(f)
def wrapped(*args, **kwargs):
limiter = getattr(current_app, 'connection_limiter', None)
if limiter is None:
current_app.logger.error("ConnectionLimiter 未初始化")
return jsonify({'error': 'Server configuration error'}), 500
# 阻塞方式获取信号量
limiter.semaphore.acquire()
try:
# 启动执行超时监控器
monitor = ExecutionTimeoutMonitor(timeout, limiter.semaphore)
monitor.start()
generator = f(*args, **kwargs)
@stream_with_context
def generator_wrapper():
try:
for item in generator:
if monitor.is_timeout:
# 如果已超时,终止生成器执行
current_app.logger.error("Request exceeded execution time and was terminated.")
break
yield item
finally:
if not monitor.is_timeout:
limiter.semaphore.release() # 正常结束时释放信号量
return Response(generator_wrapper(), mimetype='text/event-stream')
except Exception as e:
limiter.semaphore.release() # 异常时释放信号量
current_app.logger.error(f"Exception in route: {e}")
return jsonify({'error': 'Internal server error'}), 500
return wrapped
return decorator
class ConnectionLimiter: class ConnectionLimiter:
def __init__(self, max_connections=1): def __init__(self, max_connections=1):
self.semaphore = threading.Semaphore(max_connections) self.semaphore = threading.Semaphore(max_connections)
def limit_connections(self, f):
"""装饰器:限制并发连接数"""
@wraps(f)
def wrapped(*args, **kwargs):
self.semaphore.acquire()
try:
return f(*args, **kwargs)
finally:
self.semaphore.release()
return wrapped

View File

@ -517,7 +517,7 @@ def truncate_pdf_multiple(input_path, output_folder, unique_id="123"):
base_file_name) base_file_name)
if merged_result: if merged_result:
truncate_files.append(merged_result) truncate_files.append(merged_result)
logger.info(f"merged_baseinfo: 已生成合并文件: {merged_output_path}") # logger.info(f"merged_baseinfo: 已生成合并文件: {merged_output_path}")
else: else:
truncate_files.append("") # 如果 merged_result 未生成,添加空字符串 truncate_files.append("") # 如果 merged_result 未生成,添加空字符串
logger.warning("merged_baseinfo: 未生成合并文件,因为没有找到需要合并的 PDF 文件。") logger.warning("merged_baseinfo: 未生成合并文件,因为没有找到需要合并的 PDF 文件。")

View File

@ -1,7 +1,5 @@
# flask_app/routes/upload.py # flask_app/routes/upload.py
from functools import wraps from flask import Blueprint, request, jsonify, Response, stream_with_context, g
from flask import Blueprint, request, jsonify, Response, stream_with_context, g, current_app
import json import json
import os import os
import time import time
@ -13,41 +11,27 @@ from flask_app.general.post_processing import outer_post_processing
from flask_app.general.接口_技术偏离表 import get_tech_and_business_deviation from flask_app.general.接口_技术偏离表 import get_tech_and_business_deviation
from flask_app.routes.utils import generate_deviation_response, validate_request from flask_app.routes.utils import generate_deviation_response, validate_request
from flask_app.logger_setup import CSTFormatter from flask_app.logger_setup import CSTFormatter
from flask_app.ConnectionLimiter import require_connection_limit
upload_bp = Blueprint('upload', __name__) upload_bp = Blueprint('upload', __name__)
def require_connection_limit():
"""装饰器:用于确保路由使用连接限制"""
def decorator(f):
@wraps(f)
def wrapped(*args, **kwargs):
limiter = current_app.connection_limiter
limiter.semaphore.acquire() # 阻塞式获取信号量
try:
return f(*args, **kwargs)
finally:
limiter.semaphore.release()
return wrapped
return decorator
@upload_bp.route('/upload', methods=['POST']) @upload_bp.route('/upload', methods=['POST'])
@require_connection_limit() @require_connection_limit(timeout=900)
def zbparse(): def zbparse():
logger = g.logger
try: try:
logger = g.logger logger.info("大解析开始!!!")
logger.info("zbparse start!!!")
received_data = request.get_json() received_data = request.get_json()
logger.info("Received JSON data: " + str(received_data)) logger.info("Received JSON data: " + str(received_data))
validation = validate_request() validation = validate_request()
if isinstance(validation, tuple) and len(validation) == 2 and isinstance(validation[0], str): if isinstance(validation, tuple) and len(validation) == 2 and isinstance(validation[0], str):
file_url, zb_type = validation file_url, zb_type = validation
else: else:
return validation # 错误响应 return validation # 错误响应
try: try:
logger.info("starting parsing url:" + file_url) logger.info("starting parsing url:" + file_url)
return Response(stream_with_context(process_and_stream(file_url, zb_type)), return process_and_stream(file_url, zb_type)
content_type='text/event-stream')
except Exception as e: except Exception as e:
logger.error('Exception occurred: ' + str(e)) logger.error('Exception occurred: ' + str(e))
return jsonify({'error': str(e)}), 500 return jsonify({'error': str(e)}), 500

View File

@ -1,5 +1,9 @@
import json import json
from flask import request,jsonify from functools import wraps
from flask import request, jsonify, current_app
def validate_request(): def validate_request():
""" """
验证请求中的JSON数据 验证请求中的JSON数据
@ -48,4 +52,31 @@ def generate_deviation_response(tech_deviation, tech_star_deviation, business_de
'filename': 'shangwu_star_deviation', 'filename': 'shangwu_star_deviation',
'data': json.dumps(business_star_deviation, ensure_ascii=False) 'data': json.dumps(business_star_deviation, ensure_ascii=False)
} }
return tech_deviation_response, tech_deviation_star_response, zigefuhe_deviation_response, shangwu_deviation_response, shangwu_star_deviation_response return tech_deviation_response, tech_deviation_star_response, zigefuhe_deviation_response, shangwu_deviation_response, shangwu_star_deviation_response
def require_connection_limit():
"""装饰器:确保路由使用连接限制,并正确处理生成器函数"""
def decorator(f):
@wraps(f)
def wrapped(*args, **kwargs):
limiter = getattr(current_app, 'connection_limiter', None)
if limiter is None:
current_app.logger.error("ConnectionLimiter 未初始化")
return jsonify({'error': 'Server configuration error'}), 500
acquired = limiter.semaphore.acquire(blocking=True)
if not acquired:
return jsonify({
'error': 'Server is busy. Please try again later.',
'code': 503
}), 503
generator = f(*args, **kwargs)
try:
for item in generator:
yield item
finally:
limiter.semaphore.release()
return wrapped
return decorator

View File

@ -13,7 +13,7 @@ from flask_app.routes.test_zbparse import test_zbparse_bp
class FlaskAppWithLimiter(Flask): class FlaskAppWithLimiter(Flask):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.connection_limiter = ConnectionLimiter(max_connections=1) self.connection_limiter = ConnectionLimiter(max_connections=10)
def create_app(): def create_app():
app = FlaskAppWithLimiter(__name__) app = FlaskAppWithLimiter(__name__)

View File

@ -13,7 +13,6 @@ def get_global_logger(unique_id):
return logging.getLogger() # 获取默认的日志器 return logging.getLogger() # 获取默认的日志器
logger = logging.getLogger(unique_id) logger = logging.getLogger(unique_id)
return logger return logger
logger = None
# fitz库版本 # fitz库版本
# def extract_common_header(pdf_path): # def extract_common_header(pdf_path):
@ -736,9 +735,7 @@ def process_input(input_path, output_folder, selection, output_suffix):
return [''] return ['']
def truncate_pdf_multiple(pdf_path, output_folder,unique_id="123"): def truncate_pdf_multiple(pdf_path, output_folder,logger):
global logger
logger = get_global_logger(unique_id)
base_file_name = os.path.splitext(os.path.basename(pdf_path))[0] base_file_name = os.path.splitext(os.path.basename(pdf_path))[0]
truncate_files = [] truncate_files = []
@ -769,11 +766,12 @@ def truncate_pdf_multiple(pdf_path, output_folder,unique_id="123"):
if merged_path: if merged_path:
# 合并成功,添加合并后的文件路径 # 合并成功,添加合并后的文件路径
truncate_files.append(merged_path) truncate_files.append(merged_path)
logger.info(f"已生成合并文件: {merged_output_path}") # logger.info(f"已生成合并文件: {merged_output_path}")
else: else:
# 合并失败,添加空字符串 # 合并失败,添加空字符串
truncate_files.append("") truncate_files.append("")
logger.warning(f"合并失败,没有生成合并文件 for {pdf_path}") logger.warning(f"合并失败,没有生成合并文件 for {pdf_path}")
logger.info("已截取文件路径"+str(truncate_files))
return truncate_files return truncate_files
#小解析,只需要前三章内容 #小解析,只需要前三章内容
@ -789,7 +787,6 @@ def truncate_pdf_specific_goods(pdf_path, output_folder, selections,unique_id="1
Returns: Returns:
list: 截取的文件路径列表包括合并后的文件路径如果有 list: 截取的文件路径列表包括合并后的文件路径如果有
""" """
global logger
logger = get_global_logger(unique_id) logger = get_global_logger(unique_id)
base_file_name = os.path.splitext(os.path.basename(pdf_path))[0] base_file_name = os.path.splitext(os.path.basename(pdf_path))[0]
truncate_files = [] truncate_files = []
@ -838,7 +835,7 @@ if __name__ == "__main__":
# input_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\a091d107-805d-4e28-b8b2-0c7327737238\\ztbfile.pdf" # input_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\a091d107-805d-4e28-b8b2-0c7327737238\\ztbfile.pdf"
# output_folder = "C:\\Users\\Administrator\\Desktop\\fsdownload\\a091d107-805d-4e28-b8b2-0c7327737238\\tmp" # output_folder = "C:\\Users\\Administrator\\Desktop\\fsdownload\\a091d107-805d-4e28-b8b2-0c7327737238\\tmp"
output_folder=r"C:\Users\Administrator\Desktop\new招标文件\output5" output_folder=r"C:\Users\Administrator\Desktop\new招标文件\output5"
# files = truncate_pdf_multiple(input_path, output_folder) files = truncate_pdf_multiple(input_path, output_folder)
# selections = [3,5] # selections = [3,5]
# files=truncate_pdf_specific_goods(input_path,output_folder,selections) # files=truncate_pdf_specific_goods(input_path,output_folder,selections)
# print(files) # print(files)

View File

@ -566,7 +566,7 @@ def test_all_files_in_folder(input_folder, output_folder):
if __name__ == "__main__": if __name__ == "__main__":
start_time=time.time() start_time=time.time()
# truncate_file="C:\\Users\\Administrator\\Desktop\\fsdownload\\469d2aee-9024-4993-896e-2ac7322d41b7\\ztbfile_procurement.docx" # truncate_file="C:\\Users\\Administrator\\Desktop\\fsdownload\\469d2aee-9024-4993-896e-2ac7322d41b7\\ztbfile_procurement.docx"
truncate_file=r"C:\Users\Administrator\Desktop\fsdownload\fa0d51a1-0d63-4c0d-9002-cf8ac3f2211a\ztbfile_procurement.pdf" truncate_file=r"D:\flask_project\flask_app\static\output\output1\a91d59a5-d04a-4588-98e4-ddc6e9caf999\ztbfile_procurement.pdf"
# invalid_path="D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile.pdf" # invalid_path="D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile.pdf"
# truncate_file="D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile_procurement.docx" # truncate_file="D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile_procurement.docx"
# output_folder="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\tmp" # output_folder="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\tmp"

View File

@ -47,8 +47,7 @@ def preprocess_files(output_folder, file_path, file_type,logger):
# # 异步上传知识库 # # 异步上传知识库
# future_knowledge = executor.submit(addfileToKnowledge, docx_path, "招标解析" + unique_id) # future_knowledge = executor.submit(addfileToKnowledge, docx_path, "招标解析" + unique_id)
# 调用截取PDF多次 # 调用截取PDF多次
truncate_files = truncate_pdf_multiple(pdf_path, truncate_files = truncate_pdf_multiple(pdf_path, output_folder,logger) # index: 0->商务技术服务要求 1->评标办法 2->资格审查 3->投标人须知前附表 4->投标人须知正文
output_folder) # index: 0->商务技术服务要求 1->评标办法 2->资格审查 3->投标人须知前附表 4->投标人须知正文
# 处理各个部分 # 处理各个部分
invalid_path=pdf_path invalid_path=pdf_path