From 06a3c2bbaf7b8727f3bef32ad2cc83e3c9edf450 Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Fri, 10 Jan 2025 14:30:35 +0800 Subject: [PATCH] 1.10 --- flask_app/general/判断是否是招标文件.py | 38 -------- flask_app/routes/judge_zbfile.py | 119 ++++++++++++++++++++++-- flask_app/routes/little_zbparse.py | 39 ++------ flask_app/routes/utils.py | 24 ++++- flask_app/routes/偏离表main.py | 4 +- flask_app/routes/判断是否是招标文件.py | 54 +++++++++++ flask_app/routes/小解析main.py | 2 - flask_app/routes/工程标解析main.py | 2 - flask_app/routes/货物标解析main.py | 4 +- flask_app/start_up.py | 3 + 10 files changed, 199 insertions(+), 90 deletions(-) delete mode 100644 flask_app/general/判断是否是招标文件.py create mode 100644 flask_app/routes/判断是否是招标文件.py diff --git a/flask_app/general/判断是否是招标文件.py b/flask_app/general/判断是否是招标文件.py deleted file mode 100644 index 5184faf..0000000 --- a/flask_app/general/判断是否是招标文件.py +++ /dev/null @@ -1,38 +0,0 @@ -from PyPDF2 import PdfReader - -from flask_app.general.通义千问long import upload_file, qianwen_long - - -def judge_zbfile(file_path): - - try: - # 检查文件是否存在且是pdf格式(不区分大小写) - if file_path.lower().endswith(('.pdf', '.PDF')): - reader = PdfReader(file_path) - num_pages = len(reader.pages) - if num_pages <= 5: - return False - user_query = """该文件是否属于招标文件?如果是的话,请返回'是',如果不是的话,返回'否'。请不要返回其他解释或内容。 - 以下是常见的招标文件类型: - 公开招标文件、邀请招标文件、竞争性谈判文件、竞争性磋商文件、询价文件、问询文件、货物类招标文件、工程类招标文件、施工类招标文件、服务类招标文件、比选文件。 - 若有未涵盖的类型,但其内容明确表达了项目需求、采购或招标信息,且包含指导投标人参与的关键要素,则可视为招标文件。 - 请基于上述内容判断文件是否属于招标文件。 - """ - file_id = upload_file(file_path) - model_res = qianwen_long(file_id, user_query) - print(f"判断是否属于招标文件:{model_res}") - if '否' in model_res: - return False - return True - - except Exception as e: - print(f"处理PDF文件时出错: {e}") - return False - -if __name__ == '__main__': - pdf_path=r"C:\Users\Administrator\Desktop\测试信号测试信号.docx" - res=judge_zbfile(pdf_path) - if res: - print("yes") - else: - print("no") \ No newline at end of file diff --git a/flask_app/routes/judge_zbfile.py b/flask_app/routes/judge_zbfile.py index aa67822..6086445 100644 --- a/flask_app/routes/judge_zbfile.py +++ b/flask_app/routes/judge_zbfile.py @@ -1,10 +1,111 @@ -# from flask_app.ConnectionLimiter import require_connection_limit -# from flask import Blueprint -# -# from flask_app.routes.utils import validate_and_setup_logger -# -# judge_zbfile_bp = Blueprint('judge_zbfile', __name__) -# @judge_zbfile_bp.route('/judge_zbfile', methods=['POST']) -# @validate_and_setup_logger +import os +import threading +import time +from enum import Enum +from typing import Any +from flask import Blueprint, g +from flask_app.general.format_change import download_file +from flask_app.routes.判断是否是招标文件 import judge_zbfile_exec +from flask_app.routes.utils import validate_and_setup_logger, create_response_normal + +judge_zbfile_bp = Blueprint('judge_zbfile', __name__) +class JudgeResult(Enum): + ERROR = 1 + YES = 2 + NO = 3 +@judge_zbfile_bp.route('/judge_zbfile', methods=['POST']) +@validate_and_setup_logger # @require_connection_limit(timeout=30) -# def judge_zbfile(): \ No newline at end of file +def judge_zbfile() -> Any: + """ + 主函数,调用 wrapper 并设置整个接口的超时时时间。如果超时返回默认值。 + """ + logger = g.logger + file_url = g.file_url + output_folder = g.output_folder + + result = [None] # 用于存储结果的可变对象 + done = threading.Event() # 标志判断是否完成 + + def wrapper() -> None: + """ + 包装整个 judge_zbfile 的函数逻辑 + """ + try: + start_time = time.time() + downloaded_filename = os.path.join(output_folder, "ztbfile") + downloaded_filepath, file_type = download_file(file_url, downloaded_filename) + + if not downloaded_filepath or file_type == 4: + logger.error("下载地址不存在或不支持的文件类型!") + result[0] = JudgeResult.ERROR + return + + logger.info(f"Local file path: {downloaded_filepath}") + + # 调用实际的判断函数 + judge_result = judge_zbfile_exec(downloaded_filepath) + judge = JudgeResult.YES if judge_result else JudgeResult.NO + + end_time = time.time() + logger.info(f"接口实际耗时:{end_time - start_time:.2f} 秒") + result[0] = judge + + except Exception as e: + logger.error(f'Exception occurred: {e}') + result[0] = JudgeResult.ERROR + finally: + done.set() + + # 启动后台线程执行 wrapper + thread = threading.Thread(target=wrapper, daemon=True) + thread.start() + + # ****设置整个接口的超时时间,如果超时,会默认返回create_response_normal,但wrapper()仍继续执行! + timeout = 15 + finished_in_time = done.wait(timeout) + if not finished_in_time: + logger.warning("整个接口执行超时,返回默认值 'yes'") + # 如果超时,返回默认响应 + return create_response_normal( + message='判断是否为招标文件成功!', + status='success', + data='yes' # 默认返回值 + ) + else: + return build_response(result[0], logger) + +def build_response(judge_result: JudgeResult, logger) -> Any: + """ + 根据 judge_result 构建响应 + """ + if judge_result == JudgeResult.ERROR: + logger.error("下载地址不存在或不支持的文件类型!") + return create_response_normal( + message='下载地址不存在或不支持的文件类型!', + status='error', + data='' + ) + elif judge_result == JudgeResult.YES: + logger.error("判断是否为招标文件成功!YES") + return create_response_normal( + message='判断是否为招标文件成功!', + status='success', + data='yes' + ) + elif judge_result == JudgeResult.NO: + logger.error("判断是否为招标文件成功!NO") + return create_response_normal( + message='判断是否为招标文件成功!', + status='success', + data='no' + ) + else: + # 处理未知的结果 + logger.error("服务器遇到不知名错误!") + return create_response_normal( + message='服务器遇到不知名错误!', + status='error', + data='' + ) + diff --git a/flask_app/routes/little_zbparse.py b/flask_app/routes/little_zbparse.py index aedc837..90c59a6 100644 --- a/flask_app/routes/little_zbparse.py +++ b/flask_app/routes/little_zbparse.py @@ -2,35 +2,14 @@ import json import os -from flask import Blueprint, jsonify, g +from flask import Blueprint, g from flask_app.ConnectionLimiter import require_connection_limit from flask_app.general.format_change import download_file from flask_app.routes.小解析main import little_parse_main -from flask_app.routes.utils import validate_and_setup_logger +from flask_app.routes.utils import validate_and_setup_logger, create_response_normal little_zbparse_bp = Blueprint('little_zbparse', __name__) - -def create_response(message, status, data=''): - """ - 创建统一格式的 JSON 响应。 - - 参数: - message (str): 响应消息。 - status (str): 状态标记,'success' 或 'error'。 - data (str, optional): 响应数据。默认为空字符串。 - status_code (int, optional): HTTP 状态码。默认为 200。 - - 返回: - Response: Flask 响应对象。 - """ - response = jsonify({ - 'message': message, - 'status': status, - 'data': data - }) - return response - @little_zbparse_bp.route('/little_zbparse', methods=['POST']) @validate_and_setup_logger @require_connection_limit(timeout=300) @@ -39,17 +18,13 @@ def little_zbparse(): file_url = g.file_url zb_type = g.zb_type - # 检查是否为错误响应(假设装饰器返回的是一个响应元组) - if isinstance(file_url, tuple): - return file_url - try: logger.info(f"Starting parsing URL: {file_url}") final_json_path = download_and_process_file(file_url, zb_type) if not final_json_path: logger.info(f"上传的文件非招标文件或文件内容不完整!") - return create_response( + return create_response_normal( message='上传的文件非招标文件或文件内容不完整!', status='error', data='' @@ -60,7 +35,7 @@ def little_zbparse(): except Exception as e: logger.error(f'Exception occurred: {e}') - return create_response( + return create_response_normal( message='解析遇到不知名错误!', status='error', data='' @@ -106,7 +81,7 @@ def generate_response(final_json_path): if not os.path.exists(final_json_path): logger.error(f'final_json 未找到!: {final_json_path}') - return create_response( + return create_response_normal( message='final_json not found', status='error', data='' @@ -118,13 +93,13 @@ def generate_response(final_json_path): json_str = json.dumps(zbparse_data, ensure_ascii=False) except Exception as e: logger.error(f'Error reading or parsing final_json: {e}') - return create_response( + return create_response_normal( message='Error processing final_json.', status='error', data='' ) - return create_response( + return create_response_normal( message='Little Parse processed successfully', status='success', data=json_str diff --git a/flask_app/routes/utils.py b/flask_app/routes/utils.py index 3a5148f..e929994 100644 --- a/flask_app/routes/utils.py +++ b/flask_app/routes/utils.py @@ -15,7 +15,7 @@ def validate_request(): if not request.is_json: return jsonify({'error': 'Missing JSON in request'}), 400 file_url = request.json.get('file_url') - zb_type = request.json.get('zb_type', 1) + zb_type = request.json.get('zb_type', 2) #zb_type:默认按货物标解析 if not file_url: return jsonify({'error': 'No file URL provided'}), 400 try: @@ -110,12 +110,13 @@ def validate_and_setup_logger(f): # 根据蓝图确定子文件夹 blueprint = request.blueprint subfolder_map = { + 'judge_zbfile': 'output4', 'get_deviation': 'output3', 'little_zbparse': 'output2', 'upload': 'output1', 'test_zbparse': 'test_output' } - subfolder = subfolder_map.get(blueprint, 'output1') + subfolder = subfolder_map.get(blueprint, 'test_output') # 创建 logger 和 output_folder create_logger(current_app, subfolder) @@ -144,6 +145,25 @@ def perform_cleanup(output_folder, logger): except Exception as e: logger.error(f"清理过程中发生异常: {str(e)}") +def create_response_normal(message, status, data=''): + """ + 创建统一格式的 JSON 响应。 + + 参数: + message (str): 响应消息。 + status (str): 状态标记,'success' 或 'error'。 + data (str, optional): 响应数据。默认为空字符串。 + status_code (int, optional): HTTP 状态码。默认为 200。 + + 返回: + Response: Flask 响应对象。 + """ + response = jsonify({ + 'message': message, + 'status': status, + 'data': data + }) + return response def create_response(message, status, data): """ 创建一个统一格式的响应字典。 diff --git a/flask_app/routes/偏离表main.py b/flask_app/routes/偏离表main.py index 954f253..8ff2eea 100644 --- a/flask_app/routes/偏离表main.py +++ b/flask_app/routes/偏离表main.py @@ -3,11 +3,9 @@ import os import time from copy import deepcopy -from flask_app.general.doubao import doubao_model -from flask_app.general.format_change import pdf2docx, docx2pdf,doc2docx +from flask_app.general.format_change import docx2pdf,doc2docx from flask_app.general.json_utils import clean_json_string, rename_outer_key from flask_app.general.merge_pdfs import merge_pdfs -from flask_app.general.判断是否是招标文件 import judge_zbfile from flask_app.general.通义千问long import qianwen_plus from flask_app.general.通用功能函数 import get_global_logger from flask_app.general.截取pdf_main import truncate_pdf_multiple diff --git a/flask_app/routes/判断是否是招标文件.py b/flask_app/routes/判断是否是招标文件.py new file mode 100644 index 0000000..b8e3494 --- /dev/null +++ b/flask_app/routes/判断是否是招标文件.py @@ -0,0 +1,54 @@ +import time +import multiprocessing +from concurrent.futures import ThreadPoolExecutor, TimeoutError +from queue import Queue +from PyPDF2 import PdfReader # 确保已安装 PyPDF2: pip install PyPDF2 + +from flask_app.general.通义千问long import upload_file, qianwen_long + + +def judge_zbfile_exec(file_path): + """ + 判断文件是否属于招标文件,并返回结果。 + """ + try: + start_time = time.time() + # 检查文件是否为PDF格式 + if file_path.lower().endswith('.pdf'): + reader = PdfReader(file_path) + num_pages = len(reader.pages) + if num_pages <= 5: + return False + # 模拟使用大模型进行判断 + user_query = """该文件是否属于招标文件?如果是的话,请返回'是',如果不是的话,返回'否'。请不要返回其他解释或内容。 + 以下是常见的招标文件类型: + 公开招标文件、邀请招标文件、竞争性谈判文件、竞争性磋商文件、询价文件、问询文件、货物类招标文件、工程类招标文件、施工类招标文件、服务类招标文件、比选文件。 + 若有未涵盖的类型,但其内容明确表达了项目需求、采购或招标信息,且包含指导投标人参与的关键要素,则可视为招标文件。 + 请基于上述内容判断文件是否属于招标文件。 + """ + file_id = upload_file(file_path) + model_res = qianwen_long(file_id, user_query) + end_time = time.time() + print(f"judge_zbfile_exec实际耗时:{end_time - start_time:.2f} 秒") + print(f"判断是否属于招标文件:{model_res}") + + # 根据模型返回结果判断 + if '否' in model_res: + return False + else: + return True + + except Exception as e: + print(f"处理文件时出错: {e}") + return False + +if __name__ == '__main__': + start_time = time.time() + pdf_path = r"C:/Users/Administrator/Downloads/094定稿-湖北工业大学轻武器模拟射击设备采购项目招标文件 - 副本.pdf" + res = judge_zbfile_exec(pdf_path) + if res: + print("yes") + else: + print("no") + end_time = time.time() + print(f"整个程序实际耗时:{end_time - start_time:.2f} 秒") diff --git a/flask_app/routes/小解析main.py b/flask_app/routes/小解析main.py index 861b938..1e415cc 100644 --- a/flask_app/routes/小解析main.py +++ b/flask_app/routes/小解析main.py @@ -1,12 +1,10 @@ # -*- encoding:utf-8 -*- import json -import logging import os import time from flask_app.general.format_change import docx2pdf from flask_app.general.json_utils import clean_json_string -from flask_app.general.判断是否是招标文件 import judge_zbfile from flask_app.general.多线程提问 import read_questions_from_file, multi_threading from flask_app.general.通义千问long import upload_file from flask_app.general.通用功能函数 import get_global_logger,aggregate_basic_info diff --git a/flask_app/routes/工程标解析main.py b/flask_app/routes/工程标解析main.py index 0eb9af9..be2f8e1 100644 --- a/flask_app/routes/工程标解析main.py +++ b/flask_app/routes/工程标解析main.py @@ -1,6 +1,5 @@ # -*- encoding:utf-8 -*- import json -import logging import os import time from concurrent.futures import ThreadPoolExecutor @@ -8,7 +7,6 @@ from concurrent.futures import ThreadPoolExecutor from docx import Document from flask_app.general.insert_del_pagemark import insert_mark,delete_mark -from flask_app.general.判断是否是招标文件 import judge_zbfile from flask_app.general.截取pdf_main import truncate_pdf_multiple from flask_app.general.merge_pdfs import merge_pdfs from flask_app.general.通用功能函数 import get_global_logger diff --git a/flask_app/routes/货物标解析main.py b/flask_app/routes/货物标解析main.py index 1fa45fc..d059110 100644 --- a/flask_app/routes/货物标解析main.py +++ b/flask_app/routes/货物标解析main.py @@ -4,7 +4,7 @@ import time from docx import Document -from flask_app.general.format_change import docx2pdf, pdf2docx,doc2docx +from flask_app.general.format_change import docx2pdf, pdf2docx from flask_app.general.insert_del_pagemark import insert_mark, delete_mark from flask_app.general.json_utils import transform_json_values from flask_app.general.通用功能函数 import get_global_logger @@ -17,7 +17,7 @@ from flask_app.货物标.提取json货物标版 import convert_clause_to_json from flask_app.general.无效标和废标公共代码 import combine_find_invalid from flask_app.货物标.资格审查main import combine_qualification_review from flask_app.general.商务技术评分提取 import combine_evaluation_standards -from flask_app.general.判断是否是招标文件 import judge_zbfile + # 创建全局线程池 executor = ThreadPoolExecutor() diff --git a/flask_app/start_up.py b/flask_app/start_up.py index 0c8ea6f..5fad420 100644 --- a/flask_app/start_up.py +++ b/flask_app/start_up.py @@ -8,6 +8,7 @@ from flask_app.routes.little_zbparse import little_zbparse_bp from flask_app.routes.upload import upload_bp from flask_app.routes.test_zbparse import test_zbparse_bp from flask_app.general.清除file_id import delete_file_by_ids,read_file_ids +from flask_app.routes.judge_zbfile import judge_zbfile_bp class FlaskAppWithLimiter(Flask): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -22,9 +23,11 @@ def create_app(): app.register_blueprint(little_zbparse_bp) app.register_blueprint(upload_bp) app.register_blueprint(test_zbparse_bp) + app.register_blueprint(judge_zbfile_bp) app.connection_limiters['upload'] = ConnectionLimiter(max_connections=10) app.connection_limiters['get_deviation'] = ConnectionLimiter(max_connections=10) app.connection_limiters['default'] = ConnectionLimiter(max_connections=10) + app.connection_limiters['judge_zbfile']=ConnectionLimiter(max_connections=30) @app.teardown_request def teardown_request(exception):