259 lines
12 KiB
Python
259 lines
12 KiB
Python
# flask_app/routes/upload.py
|
||
import multiprocessing
|
||
from flask import Blueprint, request, jsonify,g
|
||
import urllib.parse
|
||
import json
|
||
import os
|
||
import time
|
||
from flask_app.general.format_change import download_file
|
||
from flask_app.routes.工程标解析main import engineering_bid_main
|
||
from flask_app.routes.货物标解析main import goods_bid_main
|
||
from flask_app.general.post_processing import outer_post_processing
|
||
from flask_app.routes.utils import generate_deviation_response, validate_and_setup_logger, create_response, sse_format, \
|
||
log_error_unique_id
|
||
from flask_app.ConnectionLimiter import require_connection_limit, require_execution_timeout
|
||
|
||
upload_bp = Blueprint('upload', __name__)
|
||
|
||
def _child_target(main_func, queue, output_folder, file_path, file_type, unique_id):
|
||
"""
|
||
子进程中调用 `main_func`(它是一个生成器函数),
|
||
将其 yield 出的数据逐条放进队列,最后放一个 None 表示结束。
|
||
"""
|
||
try:
|
||
for data in main_func(output_folder, file_path, file_type, unique_id):
|
||
queue.put(data)
|
||
except Exception as e:
|
||
# 如果要把异常也传给父进程,以便父进程可感知
|
||
queue.put(json.dumps({'error': str(e)}, ensure_ascii=False))
|
||
finally:
|
||
queue.put(None)
|
||
def run_in_subprocess(main_func, output_folder, file_path, file_type, unique_id):
|
||
"""
|
||
启动子进程调用 `main_func(...)`,并在父进程流式获取其输出(通过 Queue)。
|
||
子进程结束时,操作系统回收其内存;父进程则保持实时输出。
|
||
"""
|
||
queue = multiprocessing.Queue()
|
||
p = multiprocessing.Process(
|
||
target=_child_target,
|
||
args=(main_func, queue, output_folder, file_path, file_type, unique_id)
|
||
)
|
||
p.start()
|
||
|
||
while True:
|
||
item = queue.get() # 阻塞等待子进程产出的数据
|
||
if item is None:
|
||
break
|
||
yield item
|
||
|
||
p.join()
|
||
@upload_bp.route('/upload', methods=['POST'])
|
||
@validate_and_setup_logger
|
||
# @require_connection_limit(timeout=1800)
|
||
@require_execution_timeout(timeout=1800)
|
||
def zbparse(): #大解析
|
||
logger = g.logger
|
||
try:
|
||
logger.info("大解析开始!!!")
|
||
received_data = request.get_json()
|
||
logger.info("Received JSON data: " + str(received_data))
|
||
file_url = g.file_url
|
||
zb_type = g.zb_type
|
||
file_name = urllib.parse.unquote(os.path.basename(file_url.split('?')[0]))
|
||
logger.info(f"Starting parsing file: {file_name}")
|
||
try:
|
||
logger.info("starting parsing url:" + file_url)
|
||
return process_and_stream(file_url, zb_type) #主要执行函数
|
||
except Exception as e:
|
||
logger.error('Exception occurred: ' + str(e))
|
||
if hasattr(g, 'unique_id'):
|
||
log_error_unique_id(g.unique_id,1,file_name=file_name)
|
||
error_response = create_response(
|
||
message='处理文件时发生异常',
|
||
status='error',
|
||
data=str(e)
|
||
)
|
||
return jsonify(error_response)
|
||
except Exception as e:
|
||
logger.error('Unexpected exception: ' + str(e))
|
||
if hasattr(g, 'unique_id'):
|
||
log_error_unique_id(g.unique_id,1,file_name=file_name)
|
||
error_response = create_response(
|
||
message='内部服务器错误',
|
||
status='error',
|
||
data='Internal server error'
|
||
)
|
||
return jsonify(error_response)
|
||
def process_and_stream(file_url, zb_type):
|
||
"""
|
||
下载文件并进行处理,支持工程标和货物标的处理。
|
||
"""
|
||
logger = g.logger
|
||
unique_id = g.unique_id
|
||
output_folder = g.output_folder
|
||
filename = "ztbfile"
|
||
downloaded_filename = os.path.join(output_folder, filename)
|
||
start_time = time.time()
|
||
file_name = urllib.parse.unquote(os.path.basename(file_url.split('?')[0]))
|
||
try:
|
||
downloaded_filepath, file_type = download_file(file_url, downloaded_filename, True)
|
||
if not downloaded_filepath or file_type == 4:
|
||
logger.error("下载文件失败或不支持的文件类型")
|
||
log_error_unique_id(unique_id, 1, file_name=file_name)
|
||
error_response = create_response(
|
||
message='下载文件失败或不支持的文件类型',
|
||
status='error',
|
||
data=''
|
||
)
|
||
yield sse_format(error_response)
|
||
return
|
||
|
||
logger.info("本地文件路径: " + downloaded_filepath)
|
||
|
||
combined_data = {}
|
||
good_list = None
|
||
|
||
processing_functions = {
|
||
1: engineering_bid_main, #工程标解析
|
||
2: goods_bid_main #货物标解析/服务标解析
|
||
}
|
||
processing_func = processing_functions.get(zb_type, goods_bid_main)
|
||
|
||
for data in run_in_subprocess(processing_func, output_folder, downloaded_filepath, file_type, unique_id): #逐一接收货物标 工程标解析内容,为前端网页展示服务
|
||
if not data.strip():
|
||
logger.error("Received empty data, skipping JSON parsing.")
|
||
continue
|
||
|
||
try:
|
||
parsed_data = json.loads(data)
|
||
except json.JSONDecodeError as e:
|
||
logger.error(f"Failed to decode JSON: {e}")
|
||
logger.error(f"Data received: {data}")
|
||
continue
|
||
if 'error' in parsed_data:
|
||
error_message = parsed_data['error']
|
||
logger.error(f"Processing terminated due to error: {error_message}")
|
||
# 使用指定的格式返回错误响应
|
||
error_response = create_response(
|
||
message=error_message,
|
||
status='error',
|
||
data=''
|
||
)
|
||
logger.info("========== if 'error' in parsed_data ==========")
|
||
logger.info(error_response)
|
||
yield sse_format(error_response)
|
||
return # 终止进一步处理
|
||
|
||
if 'good_list' in parsed_data: #货物列表
|
||
good_list = parsed_data['good_list']
|
||
logger.info("Collected good_list from the processing function: %s", good_list)
|
||
continue
|
||
|
||
for outer_key, inner_dict in parsed_data.items():
|
||
if isinstance(inner_dict, dict):
|
||
combined_data.update(inner_dict)
|
||
|
||
response = create_response(
|
||
message='Processing',
|
||
status='success',
|
||
data=data
|
||
)
|
||
logger.info("========== 返回给后端->前端展示 ==========")
|
||
logger.info(response)
|
||
yield sse_format(response) #返回给后端->前端展示
|
||
|
||
base_end_time = time.time()
|
||
logger.info(f"分段解析完成,耗时:{base_end_time - start_time:.2f} 秒")
|
||
#此时前端已完整接收到解析的所有内容,后面的内容与前端展示无关,主要是后处理:1.extracted_result,关键信息存储 2.技术偏离表 3.商务偏离表 4.投标人需提交的证明材料(目前后端存储了,前端还未展示)
|
||
#后处理开始!!!
|
||
output_json_path = os.path.join(output_folder, 'final_result.json')
|
||
extracted_info_path = os.path.join(output_folder, 'extracted_result.json')
|
||
includes = ["基础信息", "资格审查", "商务评分", "技术评分", "无效标与废标项", "投标文件要求", "开评定标流程"]
|
||
final_result, extracted_info, tech_deviation, tech_star_deviation, business_deviation, business_star_deviation, zigefuhe_deviation, proof_materials = outer_post_processing(
|
||
combined_data, includes, good_list) #后处理 生成 extracted_info、商务 技术偏离数据 以及证明材料返给后端
|
||
|
||
#后处理完毕!后面都是生成响应返回,不额外修改数据
|
||
tech_deviation_response, tech_deviation_star_response, zigefuhe_deviation_response, shangwu_deviation_response, shangwu_star_deviation_response, proof_materials_response = generate_deviation_response(
|
||
tech_deviation, tech_star_deviation, business_deviation, business_star_deviation, zigefuhe_deviation,
|
||
proof_materials, logger) #生成规范的响应
|
||
|
||
# 使用通用响应函数
|
||
logger.info("========== 使用通用响应函数->tech_deviation_response ==========")
|
||
logger.info(tech_deviation_response)
|
||
yield sse_format(tech_deviation_response)
|
||
logger.info("========== 使用通用响应函数->tech_deviation_star_response ==========")
|
||
logger.info(tech_deviation_star_response)
|
||
yield sse_format(tech_deviation_star_response)
|
||
logger.info("========== 使用通用响应函数->zigefuhe_deviation_response ==========")
|
||
logger.info(zigefuhe_deviation_response)
|
||
yield sse_format(zigefuhe_deviation_response)
|
||
logger.info("========== 使用通用响应函数->shangwu_deviation_response ==========")
|
||
logger.info(shangwu_deviation_response)
|
||
yield sse_format(shangwu_deviation_response)
|
||
logger.info("========== 使用通用响应函数->shangwu_star_deviation_response ==========")
|
||
logger.info(shangwu_star_deviation_response)
|
||
yield sse_format(shangwu_star_deviation_response)
|
||
logger.info("========== 使用通用响应函数->proof_materials_response ==========")
|
||
logger.info(proof_materials_response)
|
||
yield sse_format(proof_materials_response)
|
||
|
||
try:
|
||
with open(extracted_info_path, 'w', encoding='utf-8') as json_file:
|
||
json.dump(extracted_info, json_file, ensure_ascii=False, indent=4)
|
||
logger.info(f"摘取后的数据已保存到 '{extracted_info_path}'")
|
||
except IOError as e:
|
||
logger.error(f"保存JSON文件时出错: {e}")
|
||
log_error_unique_id(unique_id,1,file_name=file_name) # 记录失败的 unique_id
|
||
|
||
try:
|
||
with open(output_json_path, 'w', encoding='utf-8') as json_file:
|
||
json.dump(final_result, json_file, ensure_ascii=False, indent=4)
|
||
logger.info(f"合并后的数据已保存到 '{output_json_path}'")
|
||
except IOError as e:
|
||
logger.error(f"保存JSON文件时出错: {e}")
|
||
log_error_unique_id(unique_id,1,file_name=file_name) # 记录失败的 unique_id
|
||
|
||
extracted_info_response = create_response(
|
||
message='extracted_info',
|
||
status='success',
|
||
data=json.dumps(extracted_info, ensure_ascii=False)
|
||
)
|
||
logger.info("========== extracted_info_response ==========")
|
||
logger.info(extracted_info_response)
|
||
yield sse_format(extracted_info_response)
|
||
|
||
complete_response = create_response(
|
||
message='Combined_data',
|
||
status='success',
|
||
data=json.dumps(final_result, ensure_ascii=False)
|
||
)
|
||
logger.info("========== complete_response ==========")
|
||
logger.info(complete_response)
|
||
yield sse_format(complete_response)
|
||
|
||
final_response = create_response( #目前后端的逻辑是读取到'data'中有个'END',就终止连接
|
||
message='文件上传并处理成功',
|
||
status='success',
|
||
data='END'
|
||
)
|
||
logger.info("========== final_response ==========")
|
||
logger.info(final_response)
|
||
yield sse_format(final_response)
|
||
|
||
except Exception as e:
|
||
logger.error(f"Unexpected error in process_and_stream: {e}")
|
||
log_error_unique_id(unique_id,1,file_name=file_name) # 记录失败的 unique_id
|
||
error_response = create_response(
|
||
message='内部服务器错误',
|
||
status='error',
|
||
data=''
|
||
)
|
||
logger.info("========== error_response ==========")
|
||
logger.info(error_response)
|
||
yield sse_format(error_response)
|
||
|
||
finally:
|
||
end_time = time.time()
|
||
duration = end_time - start_time
|
||
logger.info(f"Total processing time: {duration:.2f} seconds")
|