1.10
This commit is contained in:
parent
516b23bd9e
commit
06a3c2bbaf
@ -1,38 +0,0 @@
|
||||
from PyPDF2 import PdfReader
|
||||
|
||||
from flask_app.general.通义千问long import upload_file, qianwen_long
|
||||
|
||||
|
||||
def judge_zbfile(file_path):
|
||||
|
||||
try:
|
||||
# 检查文件是否存在且是pdf格式(不区分大小写)
|
||||
if file_path.lower().endswith(('.pdf', '.PDF')):
|
||||
reader = PdfReader(file_path)
|
||||
num_pages = len(reader.pages)
|
||||
if num_pages <= 5:
|
||||
return False
|
||||
user_query = """该文件是否属于招标文件?如果是的话,请返回'是',如果不是的话,返回'否'。请不要返回其他解释或内容。
|
||||
以下是常见的招标文件类型:
|
||||
公开招标文件、邀请招标文件、竞争性谈判文件、竞争性磋商文件、询价文件、问询文件、货物类招标文件、工程类招标文件、施工类招标文件、服务类招标文件、比选文件。
|
||||
若有未涵盖的类型,但其内容明确表达了项目需求、采购或招标信息,且包含指导投标人参与的关键要素,则可视为招标文件。
|
||||
请基于上述内容判断文件是否属于招标文件。
|
||||
"""
|
||||
file_id = upload_file(file_path)
|
||||
model_res = qianwen_long(file_id, user_query)
|
||||
print(f"判断是否属于招标文件:{model_res}")
|
||||
if '否' in model_res:
|
||||
return False
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"处理PDF文件时出错: {e}")
|
||||
return False
|
||||
|
||||
if __name__ == '__main__':
|
||||
pdf_path=r"C:\Users\Administrator\Desktop\测试信号测试信号.docx"
|
||||
res=judge_zbfile(pdf_path)
|
||||
if res:
|
||||
print("yes")
|
||||
else:
|
||||
print("no")
|
@ -1,10 +1,111 @@
|
||||
# from flask_app.ConnectionLimiter import require_connection_limit
|
||||
# from flask import Blueprint
|
||||
#
|
||||
# from flask_app.routes.utils import validate_and_setup_logger
|
||||
#
|
||||
# judge_zbfile_bp = Blueprint('judge_zbfile', __name__)
|
||||
# @judge_zbfile_bp.route('/judge_zbfile', methods=['POST'])
|
||||
# @validate_and_setup_logger
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
from flask import Blueprint, g
|
||||
from flask_app.general.format_change import download_file
|
||||
from flask_app.routes.判断是否是招标文件 import judge_zbfile_exec
|
||||
from flask_app.routes.utils import validate_and_setup_logger, create_response_normal
|
||||
|
||||
judge_zbfile_bp = Blueprint('judge_zbfile', __name__)
|
||||
class JudgeResult(Enum):
|
||||
ERROR = 1
|
||||
YES = 2
|
||||
NO = 3
|
||||
@judge_zbfile_bp.route('/judge_zbfile', methods=['POST'])
|
||||
@validate_and_setup_logger
|
||||
# @require_connection_limit(timeout=30)
|
||||
# def judge_zbfile():
|
||||
def judge_zbfile() -> Any:
|
||||
"""
|
||||
主函数,调用 wrapper 并设置整个接口的超时时时间。如果超时返回默认值。
|
||||
"""
|
||||
logger = g.logger
|
||||
file_url = g.file_url
|
||||
output_folder = g.output_folder
|
||||
|
||||
result = [None] # 用于存储结果的可变对象
|
||||
done = threading.Event() # 标志判断是否完成
|
||||
|
||||
def wrapper() -> None:
|
||||
"""
|
||||
包装整个 judge_zbfile 的函数逻辑
|
||||
"""
|
||||
try:
|
||||
start_time = time.time()
|
||||
downloaded_filename = os.path.join(output_folder, "ztbfile")
|
||||
downloaded_filepath, file_type = download_file(file_url, downloaded_filename)
|
||||
|
||||
if not downloaded_filepath or file_type == 4:
|
||||
logger.error("下载地址不存在或不支持的文件类型!")
|
||||
result[0] = JudgeResult.ERROR
|
||||
return
|
||||
|
||||
logger.info(f"Local file path: {downloaded_filepath}")
|
||||
|
||||
# 调用实际的判断函数
|
||||
judge_result = judge_zbfile_exec(downloaded_filepath)
|
||||
judge = JudgeResult.YES if judge_result else JudgeResult.NO
|
||||
|
||||
end_time = time.time()
|
||||
logger.info(f"接口实际耗时:{end_time - start_time:.2f} 秒")
|
||||
result[0] = judge
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Exception occurred: {e}')
|
||||
result[0] = JudgeResult.ERROR
|
||||
finally:
|
||||
done.set()
|
||||
|
||||
# 启动后台线程执行 wrapper
|
||||
thread = threading.Thread(target=wrapper, daemon=True)
|
||||
thread.start()
|
||||
|
||||
# ****设置整个接口的超时时间,如果超时,会默认返回create_response_normal,但wrapper()仍继续执行!
|
||||
timeout = 15
|
||||
finished_in_time = done.wait(timeout)
|
||||
if not finished_in_time:
|
||||
logger.warning("整个接口执行超时,返回默认值 'yes'")
|
||||
# 如果超时,返回默认响应
|
||||
return create_response_normal(
|
||||
message='判断是否为招标文件成功!',
|
||||
status='success',
|
||||
data='yes' # 默认返回值
|
||||
)
|
||||
else:
|
||||
return build_response(result[0], logger)
|
||||
|
||||
def build_response(judge_result: JudgeResult, logger) -> Any:
|
||||
"""
|
||||
根据 judge_result 构建响应
|
||||
"""
|
||||
if judge_result == JudgeResult.ERROR:
|
||||
logger.error("下载地址不存在或不支持的文件类型!")
|
||||
return create_response_normal(
|
||||
message='下载地址不存在或不支持的文件类型!',
|
||||
status='error',
|
||||
data=''
|
||||
)
|
||||
elif judge_result == JudgeResult.YES:
|
||||
logger.error("判断是否为招标文件成功!YES")
|
||||
return create_response_normal(
|
||||
message='判断是否为招标文件成功!',
|
||||
status='success',
|
||||
data='yes'
|
||||
)
|
||||
elif judge_result == JudgeResult.NO:
|
||||
logger.error("判断是否为招标文件成功!NO")
|
||||
return create_response_normal(
|
||||
message='判断是否为招标文件成功!',
|
||||
status='success',
|
||||
data='no'
|
||||
)
|
||||
else:
|
||||
# 处理未知的结果
|
||||
logger.error("服务器遇到不知名错误!")
|
||||
return create_response_normal(
|
||||
message='服务器遇到不知名错误!',
|
||||
status='error',
|
||||
data=''
|
||||
)
|
||||
|
||||
|
@ -2,35 +2,14 @@
|
||||
|
||||
import json
|
||||
import os
|
||||
from flask import Blueprint, jsonify, g
|
||||
from flask import Blueprint, g
|
||||
|
||||
from flask_app.ConnectionLimiter import require_connection_limit
|
||||
from flask_app.general.format_change import download_file
|
||||
from flask_app.routes.小解析main import little_parse_main
|
||||
from flask_app.routes.utils import validate_and_setup_logger
|
||||
from flask_app.routes.utils import validate_and_setup_logger, create_response_normal
|
||||
|
||||
little_zbparse_bp = Blueprint('little_zbparse', __name__)
|
||||
|
||||
def create_response(message, status, data=''):
|
||||
"""
|
||||
创建统一格式的 JSON 响应。
|
||||
|
||||
参数:
|
||||
message (str): 响应消息。
|
||||
status (str): 状态标记,'success' 或 'error'。
|
||||
data (str, optional): 响应数据。默认为空字符串。
|
||||
status_code (int, optional): HTTP 状态码。默认为 200。
|
||||
|
||||
返回:
|
||||
Response: Flask 响应对象。
|
||||
"""
|
||||
response = jsonify({
|
||||
'message': message,
|
||||
'status': status,
|
||||
'data': data
|
||||
})
|
||||
return response
|
||||
|
||||
@little_zbparse_bp.route('/little_zbparse', methods=['POST'])
|
||||
@validate_and_setup_logger
|
||||
@require_connection_limit(timeout=300)
|
||||
@ -39,17 +18,13 @@ def little_zbparse():
|
||||
file_url = g.file_url
|
||||
zb_type = g.zb_type
|
||||
|
||||
# 检查是否为错误响应(假设装饰器返回的是一个响应元组)
|
||||
if isinstance(file_url, tuple):
|
||||
return file_url
|
||||
|
||||
try:
|
||||
logger.info(f"Starting parsing URL: {file_url}")
|
||||
final_json_path = download_and_process_file(file_url, zb_type)
|
||||
|
||||
if not final_json_path:
|
||||
logger.info(f"上传的文件非招标文件或文件内容不完整!")
|
||||
return create_response(
|
||||
return create_response_normal(
|
||||
message='上传的文件非招标文件或文件内容不完整!',
|
||||
status='error',
|
||||
data=''
|
||||
@ -60,7 +35,7 @@ def little_zbparse():
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Exception occurred: {e}')
|
||||
return create_response(
|
||||
return create_response_normal(
|
||||
message='解析遇到不知名错误!',
|
||||
status='error',
|
||||
data=''
|
||||
@ -106,7 +81,7 @@ def generate_response(final_json_path):
|
||||
|
||||
if not os.path.exists(final_json_path):
|
||||
logger.error(f'final_json 未找到!: {final_json_path}')
|
||||
return create_response(
|
||||
return create_response_normal(
|
||||
message='final_json not found',
|
||||
status='error',
|
||||
data=''
|
||||
@ -118,13 +93,13 @@ def generate_response(final_json_path):
|
||||
json_str = json.dumps(zbparse_data, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
logger.error(f'Error reading or parsing final_json: {e}')
|
||||
return create_response(
|
||||
return create_response_normal(
|
||||
message='Error processing final_json.',
|
||||
status='error',
|
||||
data=''
|
||||
)
|
||||
|
||||
return create_response(
|
||||
return create_response_normal(
|
||||
message='Little Parse processed successfully',
|
||||
status='success',
|
||||
data=json_str
|
||||
|
@ -15,7 +15,7 @@ def validate_request():
|
||||
if not request.is_json:
|
||||
return jsonify({'error': 'Missing JSON in request'}), 400
|
||||
file_url = request.json.get('file_url')
|
||||
zb_type = request.json.get('zb_type', 1)
|
||||
zb_type = request.json.get('zb_type', 2) #zb_type:默认按货物标解析
|
||||
if not file_url:
|
||||
return jsonify({'error': 'No file URL provided'}), 400
|
||||
try:
|
||||
@ -110,12 +110,13 @@ def validate_and_setup_logger(f):
|
||||
# 根据蓝图确定子文件夹
|
||||
blueprint = request.blueprint
|
||||
subfolder_map = {
|
||||
'judge_zbfile': 'output4',
|
||||
'get_deviation': 'output3',
|
||||
'little_zbparse': 'output2',
|
||||
'upload': 'output1',
|
||||
'test_zbparse': 'test_output'
|
||||
}
|
||||
subfolder = subfolder_map.get(blueprint, 'output1')
|
||||
subfolder = subfolder_map.get(blueprint, 'test_output')
|
||||
|
||||
# 创建 logger 和 output_folder
|
||||
create_logger(current_app, subfolder)
|
||||
@ -144,6 +145,25 @@ def perform_cleanup(output_folder, logger):
|
||||
except Exception as e:
|
||||
logger.error(f"清理过程中发生异常: {str(e)}")
|
||||
|
||||
def create_response_normal(message, status, data=''):
|
||||
"""
|
||||
创建统一格式的 JSON 响应。
|
||||
|
||||
参数:
|
||||
message (str): 响应消息。
|
||||
status (str): 状态标记,'success' 或 'error'。
|
||||
data (str, optional): 响应数据。默认为空字符串。
|
||||
status_code (int, optional): HTTP 状态码。默认为 200。
|
||||
|
||||
返回:
|
||||
Response: Flask 响应对象。
|
||||
"""
|
||||
response = jsonify({
|
||||
'message': message,
|
||||
'status': status,
|
||||
'data': data
|
||||
})
|
||||
return response
|
||||
def create_response(message, status, data):
|
||||
"""
|
||||
创建一个统一格式的响应字典。
|
||||
|
@ -3,11 +3,9 @@ import os
|
||||
import time
|
||||
from copy import deepcopy
|
||||
|
||||
from flask_app.general.doubao import doubao_model
|
||||
from flask_app.general.format_change import pdf2docx, docx2pdf,doc2docx
|
||||
from flask_app.general.format_change import docx2pdf,doc2docx
|
||||
from flask_app.general.json_utils import clean_json_string, rename_outer_key
|
||||
from flask_app.general.merge_pdfs import merge_pdfs
|
||||
from flask_app.general.判断是否是招标文件 import judge_zbfile
|
||||
from flask_app.general.通义千问long import qianwen_plus
|
||||
from flask_app.general.通用功能函数 import get_global_logger
|
||||
from flask_app.general.截取pdf_main import truncate_pdf_multiple
|
||||
|
54
flask_app/routes/判断是否是招标文件.py
Normal file
54
flask_app/routes/判断是否是招标文件.py
Normal file
@ -0,0 +1,54 @@
|
||||
import time
|
||||
import multiprocessing
|
||||
from concurrent.futures import ThreadPoolExecutor, TimeoutError
|
||||
from queue import Queue
|
||||
from PyPDF2 import PdfReader # 确保已安装 PyPDF2: pip install PyPDF2
|
||||
|
||||
from flask_app.general.通义千问long import upload_file, qianwen_long
|
||||
|
||||
|
||||
def judge_zbfile_exec(file_path):
|
||||
"""
|
||||
判断文件是否属于招标文件,并返回结果。
|
||||
"""
|
||||
try:
|
||||
start_time = time.time()
|
||||
# 检查文件是否为PDF格式
|
||||
if file_path.lower().endswith('.pdf'):
|
||||
reader = PdfReader(file_path)
|
||||
num_pages = len(reader.pages)
|
||||
if num_pages <= 5:
|
||||
return False
|
||||
# 模拟使用大模型进行判断
|
||||
user_query = """该文件是否属于招标文件?如果是的话,请返回'是',如果不是的话,返回'否'。请不要返回其他解释或内容。
|
||||
以下是常见的招标文件类型:
|
||||
公开招标文件、邀请招标文件、竞争性谈判文件、竞争性磋商文件、询价文件、问询文件、货物类招标文件、工程类招标文件、施工类招标文件、服务类招标文件、比选文件。
|
||||
若有未涵盖的类型,但其内容明确表达了项目需求、采购或招标信息,且包含指导投标人参与的关键要素,则可视为招标文件。
|
||||
请基于上述内容判断文件是否属于招标文件。
|
||||
"""
|
||||
file_id = upload_file(file_path)
|
||||
model_res = qianwen_long(file_id, user_query)
|
||||
end_time = time.time()
|
||||
print(f"judge_zbfile_exec实际耗时:{end_time - start_time:.2f} 秒")
|
||||
print(f"判断是否属于招标文件:{model_res}")
|
||||
|
||||
# 根据模型返回结果判断
|
||||
if '否' in model_res:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"处理文件时出错: {e}")
|
||||
return False
|
||||
|
||||
if __name__ == '__main__':
|
||||
start_time = time.time()
|
||||
pdf_path = r"C:/Users/Administrator/Downloads/094定稿-湖北工业大学轻武器模拟射击设备采购项目招标文件 - 副本.pdf"
|
||||
res = judge_zbfile_exec(pdf_path)
|
||||
if res:
|
||||
print("yes")
|
||||
else:
|
||||
print("no")
|
||||
end_time = time.time()
|
||||
print(f"整个程序实际耗时:{end_time - start_time:.2f} 秒")
|
@ -1,12 +1,10 @@
|
||||
# -*- encoding:utf-8 -*-
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
|
||||
from flask_app.general.format_change import docx2pdf
|
||||
from flask_app.general.json_utils import clean_json_string
|
||||
from flask_app.general.判断是否是招标文件 import judge_zbfile
|
||||
from flask_app.general.多线程提问 import read_questions_from_file, multi_threading
|
||||
from flask_app.general.通义千问long import upload_file
|
||||
from flask_app.general.通用功能函数 import get_global_logger,aggregate_basic_info
|
||||
|
@ -1,6 +1,5 @@
|
||||
# -*- encoding:utf-8 -*-
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
@ -8,7 +7,6 @@ from concurrent.futures import ThreadPoolExecutor
|
||||
from docx import Document
|
||||
|
||||
from flask_app.general.insert_del_pagemark import insert_mark,delete_mark
|
||||
from flask_app.general.判断是否是招标文件 import judge_zbfile
|
||||
from flask_app.general.截取pdf_main import truncate_pdf_multiple
|
||||
from flask_app.general.merge_pdfs import merge_pdfs
|
||||
from flask_app.general.通用功能函数 import get_global_logger
|
||||
|
@ -4,7 +4,7 @@ import time
|
||||
|
||||
from docx import Document
|
||||
|
||||
from flask_app.general.format_change import docx2pdf, pdf2docx,doc2docx
|
||||
from flask_app.general.format_change import docx2pdf, pdf2docx
|
||||
from flask_app.general.insert_del_pagemark import insert_mark, delete_mark
|
||||
from flask_app.general.json_utils import transform_json_values
|
||||
from flask_app.general.通用功能函数 import get_global_logger
|
||||
@ -17,7 +17,7 @@ from flask_app.货物标.提取json货物标版 import convert_clause_to_json
|
||||
from flask_app.general.无效标和废标公共代码 import combine_find_invalid
|
||||
from flask_app.货物标.资格审查main import combine_qualification_review
|
||||
from flask_app.general.商务技术评分提取 import combine_evaluation_standards
|
||||
from flask_app.general.判断是否是招标文件 import judge_zbfile
|
||||
|
||||
# 创建全局线程池
|
||||
executor = ThreadPoolExecutor()
|
||||
|
||||
|
@ -8,6 +8,7 @@ from flask_app.routes.little_zbparse import little_zbparse_bp
|
||||
from flask_app.routes.upload import upload_bp
|
||||
from flask_app.routes.test_zbparse import test_zbparse_bp
|
||||
from flask_app.general.清除file_id import delete_file_by_ids,read_file_ids
|
||||
from flask_app.routes.judge_zbfile import judge_zbfile_bp
|
||||
class FlaskAppWithLimiter(Flask):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
@ -22,9 +23,11 @@ def create_app():
|
||||
app.register_blueprint(little_zbparse_bp)
|
||||
app.register_blueprint(upload_bp)
|
||||
app.register_blueprint(test_zbparse_bp)
|
||||
app.register_blueprint(judge_zbfile_bp)
|
||||
app.connection_limiters['upload'] = ConnectionLimiter(max_connections=10)
|
||||
app.connection_limiters['get_deviation'] = ConnectionLimiter(max_connections=10)
|
||||
app.connection_limiters['default'] = ConnectionLimiter(max_connections=10)
|
||||
app.connection_limiters['judge_zbfile']=ConnectionLimiter(max_connections=30)
|
||||
|
||||
@app.teardown_request
|
||||
def teardown_request(exception):
|
||||
|
Loading…
x
Reference in New Issue
Block a user