2025-03-03 17:28:33 +08:00

235 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# flask_app/routes/upload.py
import multiprocessing
import urllib.parse
from flask import Blueprint, request, jsonify,g
import json
import os
import time
from flask_app.general.format_change import download_file
from flask_app.routes.工程标解析main import engineering_bid_main
from flask_app.routes.货物标解析main import goods_bid_main
from flask_app.general.post_processing import outer_post_processing
from flask_app.routes.utils import generate_deviation_response, validate_and_setup_logger, create_response, sse_format, \
log_error_unique_id
from flask_app.ConnectionLimiter import require_connection_limit, require_execution_timeout
upload_bp = Blueprint('upload', __name__)
def _child_target(main_func, queue, output_folder, file_path, file_type, unique_id):
"""
子进程中调用 `main_func`(它是一个生成器函数),
将其 yield 出的数据逐条放进队列,最后放一个 None 表示结束。
"""
try:
for data in main_func(output_folder, file_path, file_type, unique_id):
queue.put(data)
except Exception as e:
# 如果要把异常也传给父进程,以便父进程可感知
queue.put(json.dumps({'error': str(e)}, ensure_ascii=False))
finally:
queue.put(None)
def run_in_subprocess(main_func, output_folder, file_path, file_type, unique_id):
"""
启动子进程调用 `main_func(...)`,并在父进程流式获取其输出(通过 Queue
子进程结束时,操作系统回收其内存;父进程则保持实时输出。
"""
queue = multiprocessing.Queue()
p = multiprocessing.Process(
target=_child_target,
args=(main_func, queue, output_folder, file_path, file_type, unique_id)
)
p.start()
while True:
item = queue.get() # 阻塞等待子进程产出的数据
if item is None:
break
yield item
p.join()
@upload_bp.route('/upload', methods=['POST'])
@validate_and_setup_logger
# @require_connection_limit(timeout=1800)
@require_execution_timeout(timeout=1800)
def zbparse(): #大解析
logger = g.logger
try:
logger.info("大解析开始!!!")
received_data = request.get_json()
logger.info("Received JSON data: " + str(received_data))
file_name = urllib.parse.unquote(file_url).split('/')[-1]
logger.info(f"Starting parsing file: {file_name}")
file_url = g.file_url
zb_type = g.zb_type
try:
logger.info("starting parsing url:" + file_url)
return process_and_stream(file_url, zb_type) #主要执行函数
except Exception as e:
logger.error('Exception occurred: ' + str(e))
if hasattr(g, 'unique_id'):
log_error_unique_id(g.unique_id,1,file_name=file_name)
error_response = create_response(
message='处理文件时发生异常',
status='error',
data=str(e)
)
return jsonify(error_response)
except Exception as e:
logger.error('Unexpected exception: ' + str(e))
if hasattr(g, 'unique_id'):
log_error_unique_id(g.unique_id,1,file_name=file_name)
error_response = create_response(
message='内部服务器错误',
status='error',
data='Internal server error'
)
return jsonify(error_response)
def process_and_stream(file_url, zb_type):
"""
下载文件并进行处理,支持工程标和货物标的处理。
"""
logger = g.logger
unique_id = g.unique_id
output_folder = g.output_folder
filename = "ztbfile"
downloaded_filename = os.path.join(output_folder, filename)
start_time = time.time()
file_name = urllib.parse.unquote(file_url).split('/')[-1]
try:
downloaded_filepath, file_type = download_file(file_url, downloaded_filename, True)
if not downloaded_filepath or file_type == 4:
logger.error("下载文件失败或不支持的文件类型")
log_error_unique_id(unique_id, 1, file_name=file_name)
error_response = create_response(
message='下载文件失败或不支持的文件类型',
status='error',
data=''
)
yield sse_format(error_response)
return
logger.info("本地文件路径: " + downloaded_filepath)
combined_data = {}
good_list = None
processing_functions = {
1: engineering_bid_main, #工程标解析
2: goods_bid_main #货物标解析/服务标解析
}
processing_func = processing_functions.get(zb_type, goods_bid_main)
for data in run_in_subprocess(processing_func, output_folder, downloaded_filepath, file_type, unique_id): #逐一接收货物标 工程标解析内容,为前端网页展示服务
if not data.strip():
logger.error("Received empty data, skipping JSON parsing.")
continue
try:
parsed_data = json.loads(data)
except json.JSONDecodeError as e:
logger.error(f"Failed to decode JSON: {e}")
logger.error(f"Data received: {data}")
continue
if 'error' in parsed_data:
error_message = parsed_data['error']
logger.error(f"Processing terminated due to error: {error_message}")
# 使用指定的格式返回错误响应
error_response = create_response(
message=error_message,
status='error',
data=''
)
yield sse_format(error_response)
return # 终止进一步处理
if 'good_list' in parsed_data: #货物列表
good_list = parsed_data['good_list']
logger.info("Collected good_list from the processing function: %s", good_list)
continue
for outer_key, inner_dict in parsed_data.items():
if isinstance(inner_dict, dict):
combined_data.update(inner_dict)
response = create_response(
message='Processing',
status='success',
data=data
)
yield sse_format(response) #返回给后端->前端展示
base_end_time = time.time()
logger.info(f"分段解析完成,耗时:{base_end_time - start_time:.2f}")
#此时前端已完整接收到解析的所有内容后面的内容与前端展示无关主要是后处理1.extracted_result关键信息存储 2.技术偏离表 3.商务偏离表 4.投标人需提交的证明材料(目前后端存储了,前端还未展示)
#后处理开始!!!
output_json_path = os.path.join(output_folder, 'final_result.json')
extracted_info_path = os.path.join(output_folder, 'extracted_result.json')
includes = ["基础信息", "资格审查", "商务评分", "技术评分", "无效标与废标项", "投标文件要求", "开评定标流程"]
final_result, extracted_info, tech_deviation, tech_star_deviation, business_deviation, business_star_deviation, zigefuhe_deviation, proof_materials = outer_post_processing(
combined_data, includes, good_list) #后处理 生成 extracted_info、商务 技术偏离数据 以及证明材料返给后端
#后处理完毕!后面都是生成响应返回,不额外修改数据
tech_deviation_response, tech_deviation_star_response, zigefuhe_deviation_response, shangwu_deviation_response, shangwu_star_deviation_response, proof_materials_response = generate_deviation_response(
tech_deviation, tech_star_deviation, business_deviation, business_star_deviation, zigefuhe_deviation,
proof_materials, logger) #生成规范的响应
# 使用通用响应函数
yield sse_format(tech_deviation_response)
yield sse_format(tech_deviation_star_response)
yield sse_format(zigefuhe_deviation_response)
yield sse_format(shangwu_deviation_response)
yield sse_format(shangwu_star_deviation_response)
yield sse_format(proof_materials_response)
try:
with open(extracted_info_path, 'w', encoding='utf-8') as json_file:
json.dump(extracted_info, json_file, ensure_ascii=False, indent=4)
logger.info(f"摘取后的数据已保存到 '{extracted_info_path}'")
except IOError as e:
logger.error(f"保存JSON文件时出错: {e}")
log_error_unique_id(unique_id,1,file_name=file_name) # 记录失败的 unique_id
try:
with open(output_json_path, 'w', encoding='utf-8') as json_file:
json.dump(final_result, json_file, ensure_ascii=False, indent=4)
logger.info(f"合并后的数据已保存到 '{output_json_path}'")
except IOError as e:
logger.error(f"保存JSON文件时出错: {e}")
log_error_unique_id(unique_id,1,file_name=file_name) # 记录失败的 unique_id
extracted_info_response = create_response(
message='extracted_info',
status='success',
data=json.dumps(extracted_info, ensure_ascii=False)
)
yield sse_format(extracted_info_response)
complete_response = create_response(
message='Combined_data',
status='success',
data=json.dumps(final_result, ensure_ascii=False)
)
yield sse_format(complete_response)
final_response = create_response( #目前后端的逻辑是读取到'data'中有个'END',就终止连接
message='文件上传并处理成功',
status='success',
data='END'
)
yield sse_format(final_response)
except Exception as e:
logger.error(f"Unexpected error in process_and_stream: {e}")
log_error_unique_id(unique_id,1,file_name=file_name) # 记录失败的 unique_id
error_response = create_response(
message='内部服务器错误',
status='error',
data=''
)
yield sse_format(error_response)
finally:
end_time = time.time()
duration = end_time - start_time
logger.info(f"Total processing time: {duration:.2f} seconds")