This commit is contained in:
zy123 2025-01-10 14:30:35 +08:00
parent 516b23bd9e
commit 06a3c2bbaf
10 changed files with 199 additions and 90 deletions

View File

@ -1,38 +0,0 @@
from PyPDF2 import PdfReader
from flask_app.general.通义千问long import upload_file, qianwen_long
def judge_zbfile(file_path):
try:
# 检查文件是否存在且是pdf格式不区分大小写
if file_path.lower().endswith(('.pdf', '.PDF')):
reader = PdfReader(file_path)
num_pages = len(reader.pages)
if num_pages <= 5:
return False
user_query = """该文件是否属于招标文件?如果是的话,请返回'',如果不是的话,返回''。请不要返回其他解释或内容。
以下是常见的招标文件类型
公开招标文件邀请招标文件竞争性谈判文件竞争性磋商文件询价文件问询文件货物类招标文件工程类招标文件施工类招标文件服务类招标文件比选文件
若有未涵盖的类型但其内容明确表达了项目需求采购或招标信息且包含指导投标人参与的关键要素则可视为招标文件
请基于上述内容判断文件是否属于招标文件
"""
file_id = upload_file(file_path)
model_res = qianwen_long(file_id, user_query)
print(f"判断是否属于招标文件:{model_res}")
if '' in model_res:
return False
return True
except Exception as e:
print(f"处理PDF文件时出错: {e}")
return False
if __name__ == '__main__':
pdf_path=r"C:\Users\Administrator\Desktop\测试信号测试信号.docx"
res=judge_zbfile(pdf_path)
if res:
print("yes")
else:
print("no")

View File

@ -1,10 +1,111 @@
# from flask_app.ConnectionLimiter import require_connection_limit
# from flask import Blueprint
#
# from flask_app.routes.utils import validate_and_setup_logger
#
# judge_zbfile_bp = Blueprint('judge_zbfile', __name__)
# @judge_zbfile_bp.route('/judge_zbfile', methods=['POST'])
# @validate_and_setup_logger
import os
import threading
import time
from enum import Enum
from typing import Any
from flask import Blueprint, g
from flask_app.general.format_change import download_file
from flask_app.routes.判断是否是招标文件 import judge_zbfile_exec
from flask_app.routes.utils import validate_and_setup_logger, create_response_normal
judge_zbfile_bp = Blueprint('judge_zbfile', __name__)
class JudgeResult(Enum):
ERROR = 1
YES = 2
NO = 3
@judge_zbfile_bp.route('/judge_zbfile', methods=['POST'])
@validate_and_setup_logger
# @require_connection_limit(timeout=30)
# def judge_zbfile():
def judge_zbfile() -> Any:
"""
主函数调用 wrapper 并设置整个接口的超时时时间如果超时返回默认值
"""
logger = g.logger
file_url = g.file_url
output_folder = g.output_folder
result = [None] # 用于存储结果的可变对象
done = threading.Event() # 标志判断是否完成
def wrapper() -> None:
"""
包装整个 judge_zbfile 的函数逻辑
"""
try:
start_time = time.time()
downloaded_filename = os.path.join(output_folder, "ztbfile")
downloaded_filepath, file_type = download_file(file_url, downloaded_filename)
if not downloaded_filepath or file_type == 4:
logger.error("下载地址不存在或不支持的文件类型!")
result[0] = JudgeResult.ERROR
return
logger.info(f"Local file path: {downloaded_filepath}")
# 调用实际的判断函数
judge_result = judge_zbfile_exec(downloaded_filepath)
judge = JudgeResult.YES if judge_result else JudgeResult.NO
end_time = time.time()
logger.info(f"接口实际耗时:{end_time - start_time:.2f}")
result[0] = judge
except Exception as e:
logger.error(f'Exception occurred: {e}')
result[0] = JudgeResult.ERROR
finally:
done.set()
# 启动后台线程执行 wrapper
thread = threading.Thread(target=wrapper, daemon=True)
thread.start()
# ****设置整个接口的超时时间如果超时会默认返回create_response_normal但wrapper()仍继续执行!
timeout = 15
finished_in_time = done.wait(timeout)
if not finished_in_time:
logger.warning("整个接口执行超时,返回默认值 'yes'")
# 如果超时,返回默认响应
return create_response_normal(
message='判断是否为招标文件成功!',
status='success',
data='yes' # 默认返回值
)
else:
return build_response(result[0], logger)
def build_response(judge_result: JudgeResult, logger) -> Any:
"""
根据 judge_result 构建响应
"""
if judge_result == JudgeResult.ERROR:
logger.error("下载地址不存在或不支持的文件类型!")
return create_response_normal(
message='下载地址不存在或不支持的文件类型!',
status='error',
data=''
)
elif judge_result == JudgeResult.YES:
logger.error("判断是否为招标文件成功YES")
return create_response_normal(
message='判断是否为招标文件成功!',
status='success',
data='yes'
)
elif judge_result == JudgeResult.NO:
logger.error("判断是否为招标文件成功NO")
return create_response_normal(
message='判断是否为招标文件成功!',
status='success',
data='no'
)
else:
# 处理未知的结果
logger.error("服务器遇到不知名错误!")
return create_response_normal(
message='服务器遇到不知名错误!',
status='error',
data=''
)

View File

@ -2,35 +2,14 @@
import json
import os
from flask import Blueprint, jsonify, g
from flask import Blueprint, g
from flask_app.ConnectionLimiter import require_connection_limit
from flask_app.general.format_change import download_file
from flask_app.routes.小解析main import little_parse_main
from flask_app.routes.utils import validate_and_setup_logger
from flask_app.routes.utils import validate_and_setup_logger, create_response_normal
little_zbparse_bp = Blueprint('little_zbparse', __name__)
def create_response(message, status, data=''):
"""
创建统一格式的 JSON 响应
参数:
message (str): 响应消息
status (str): 状态标记'success' 'error'
data (str, optional): 响应数据默认为空字符串
status_code (int, optional): HTTP 状态码默认为 200
返回:
Response: Flask 响应对象
"""
response = jsonify({
'message': message,
'status': status,
'data': data
})
return response
@little_zbparse_bp.route('/little_zbparse', methods=['POST'])
@validate_and_setup_logger
@require_connection_limit(timeout=300)
@ -39,17 +18,13 @@ def little_zbparse():
file_url = g.file_url
zb_type = g.zb_type
# 检查是否为错误响应(假设装饰器返回的是一个响应元组)
if isinstance(file_url, tuple):
return file_url
try:
logger.info(f"Starting parsing URL: {file_url}")
final_json_path = download_and_process_file(file_url, zb_type)
if not final_json_path:
logger.info(f"上传的文件非招标文件或文件内容不完整!")
return create_response(
return create_response_normal(
message='上传的文件非招标文件或文件内容不完整!',
status='error',
data=''
@ -60,7 +35,7 @@ def little_zbparse():
except Exception as e:
logger.error(f'Exception occurred: {e}')
return create_response(
return create_response_normal(
message='解析遇到不知名错误!',
status='error',
data=''
@ -106,7 +81,7 @@ def generate_response(final_json_path):
if not os.path.exists(final_json_path):
logger.error(f'final_json 未找到!: {final_json_path}')
return create_response(
return create_response_normal(
message='final_json not found',
status='error',
data=''
@ -118,13 +93,13 @@ def generate_response(final_json_path):
json_str = json.dumps(zbparse_data, ensure_ascii=False)
except Exception as e:
logger.error(f'Error reading or parsing final_json: {e}')
return create_response(
return create_response_normal(
message='Error processing final_json.',
status='error',
data=''
)
return create_response(
return create_response_normal(
message='Little Parse processed successfully',
status='success',
data=json_str

View File

@ -15,7 +15,7 @@ def validate_request():
if not request.is_json:
return jsonify({'error': 'Missing JSON in request'}), 400
file_url = request.json.get('file_url')
zb_type = request.json.get('zb_type', 1)
zb_type = request.json.get('zb_type', 2) #zb_type:默认按货物标解析
if not file_url:
return jsonify({'error': 'No file URL provided'}), 400
try:
@ -110,12 +110,13 @@ def validate_and_setup_logger(f):
# 根据蓝图确定子文件夹
blueprint = request.blueprint
subfolder_map = {
'judge_zbfile': 'output4',
'get_deviation': 'output3',
'little_zbparse': 'output2',
'upload': 'output1',
'test_zbparse': 'test_output'
}
subfolder = subfolder_map.get(blueprint, 'output1')
subfolder = subfolder_map.get(blueprint, 'test_output')
# 创建 logger 和 output_folder
create_logger(current_app, subfolder)
@ -144,6 +145,25 @@ def perform_cleanup(output_folder, logger):
except Exception as e:
logger.error(f"清理过程中发生异常: {str(e)}")
def create_response_normal(message, status, data=''):
"""
创建统一格式的 JSON 响应
参数:
message (str): 响应消息
status (str): 状态标记'success' 'error'
data (str, optional): 响应数据默认为空字符串
status_code (int, optional): HTTP 状态码默认为 200
返回:
Response: Flask 响应对象
"""
response = jsonify({
'message': message,
'status': status,
'data': data
})
return response
def create_response(message, status, data):
"""
创建一个统一格式的响应字典

View File

@ -3,11 +3,9 @@ import os
import time
from copy import deepcopy
from flask_app.general.doubao import doubao_model
from flask_app.general.format_change import pdf2docx, docx2pdf,doc2docx
from flask_app.general.format_change import docx2pdf,doc2docx
from flask_app.general.json_utils import clean_json_string, rename_outer_key
from flask_app.general.merge_pdfs import merge_pdfs
from flask_app.general.判断是否是招标文件 import judge_zbfile
from flask_app.general.通义千问long import qianwen_plus
from flask_app.general.通用功能函数 import get_global_logger
from flask_app.general.截取pdf_main import truncate_pdf_multiple

View File

@ -0,0 +1,54 @@
import time
import multiprocessing
from concurrent.futures import ThreadPoolExecutor, TimeoutError
from queue import Queue
from PyPDF2 import PdfReader # 确保已安装 PyPDF2: pip install PyPDF2
from flask_app.general.通义千问long import upload_file, qianwen_long
def judge_zbfile_exec(file_path):
"""
判断文件是否属于招标文件并返回结果
"""
try:
start_time = time.time()
# 检查文件是否为PDF格式
if file_path.lower().endswith('.pdf'):
reader = PdfReader(file_path)
num_pages = len(reader.pages)
if num_pages <= 5:
return False
# 模拟使用大模型进行判断
user_query = """该文件是否属于招标文件?如果是的话,请返回'',如果不是的话,返回''。请不要返回其他解释或内容。
以下是常见的招标文件类型
公开招标文件邀请招标文件竞争性谈判文件竞争性磋商文件询价文件问询文件货物类招标文件工程类招标文件施工类招标文件服务类招标文件比选文件
若有未涵盖的类型但其内容明确表达了项目需求采购或招标信息且包含指导投标人参与的关键要素则可视为招标文件
请基于上述内容判断文件是否属于招标文件
"""
file_id = upload_file(file_path)
model_res = qianwen_long(file_id, user_query)
end_time = time.time()
print(f"judge_zbfile_exec实际耗时{end_time - start_time:.2f}")
print(f"判断是否属于招标文件:{model_res}")
# 根据模型返回结果判断
if '' in model_res:
return False
else:
return True
except Exception as e:
print(f"处理文件时出错: {e}")
return False
if __name__ == '__main__':
start_time = time.time()
pdf_path = r"C:/Users/Administrator/Downloads/094定稿-湖北工业大学轻武器模拟射击设备采购项目招标文件 - 副本.pdf"
res = judge_zbfile_exec(pdf_path)
if res:
print("yes")
else:
print("no")
end_time = time.time()
print(f"整个程序实际耗时:{end_time - start_time:.2f}")

View File

@ -1,12 +1,10 @@
# -*- encoding:utf-8 -*-
import json
import logging
import os
import time
from flask_app.general.format_change import docx2pdf
from flask_app.general.json_utils import clean_json_string
from flask_app.general.判断是否是招标文件 import judge_zbfile
from flask_app.general.多线程提问 import read_questions_from_file, multi_threading
from flask_app.general.通义千问long import upload_file
from flask_app.general.通用功能函数 import get_global_logger,aggregate_basic_info

View File

@ -1,6 +1,5 @@
# -*- encoding:utf-8 -*-
import json
import logging
import os
import time
from concurrent.futures import ThreadPoolExecutor
@ -8,7 +7,6 @@ from concurrent.futures import ThreadPoolExecutor
from docx import Document
from flask_app.general.insert_del_pagemark import insert_mark,delete_mark
from flask_app.general.判断是否是招标文件 import judge_zbfile
from flask_app.general.截取pdf_main import truncate_pdf_multiple
from flask_app.general.merge_pdfs import merge_pdfs
from flask_app.general.通用功能函数 import get_global_logger

View File

@ -4,7 +4,7 @@ import time
from docx import Document
from flask_app.general.format_change import docx2pdf, pdf2docx,doc2docx
from flask_app.general.format_change import docx2pdf, pdf2docx
from flask_app.general.insert_del_pagemark import insert_mark, delete_mark
from flask_app.general.json_utils import transform_json_values
from flask_app.general.通用功能函数 import get_global_logger
@ -17,7 +17,7 @@ from flask_app.货物标.提取json货物标版 import convert_clause_to_json
from flask_app.general.无效标和废标公共代码 import combine_find_invalid
from flask_app.货物标.资格审查main import combine_qualification_review
from flask_app.general.商务技术评分提取 import combine_evaluation_standards
from flask_app.general.判断是否是招标文件 import judge_zbfile
# 创建全局线程池
executor = ThreadPoolExecutor()

View File

@ -8,6 +8,7 @@ from flask_app.routes.little_zbparse import little_zbparse_bp
from flask_app.routes.upload import upload_bp
from flask_app.routes.test_zbparse import test_zbparse_bp
from flask_app.general.清除file_id import delete_file_by_ids,read_file_ids
from flask_app.routes.judge_zbfile import judge_zbfile_bp
class FlaskAppWithLimiter(Flask):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
@ -22,9 +23,11 @@ def create_app():
app.register_blueprint(little_zbparse_bp)
app.register_blueprint(upload_bp)
app.register_blueprint(test_zbparse_bp)
app.register_blueprint(judge_zbfile_bp)
app.connection_limiters['upload'] = ConnectionLimiter(max_connections=10)
app.connection_limiters['get_deviation'] = ConnectionLimiter(max_connections=10)
app.connection_limiters['default'] = ConnectionLimiter(max_connections=10)
app.connection_limiters['judge_zbfile']=ConnectionLimiter(max_connections=30)
@app.teardown_request
def teardown_request(exception):