# flask_app/routes/upload.py import multiprocessing from flask import Blueprint, request, jsonify,g import urllib.parse import json import os import time from flask_app.general.format_change import download_file from flask_app.routes.工程标解析main import engineering_bid_main from flask_app.routes.货物标解析main import goods_bid_main from flask_app.general.post_processing import outer_post_processing from flask_app.routes.utils import generate_deviation_response, validate_and_setup_logger, create_response, sse_format, \ log_error_unique_id from flask_app.ConnectionLimiter import require_connection_limit, require_execution_timeout upload_bp = Blueprint('upload', __name__) def _child_target(main_func, queue, output_folder, file_path, file_type, unique_id): """ 子进程中调用 `main_func`(它是一个生成器函数), 将其 yield 出的数据逐条放进队列,最后放一个 None 表示结束。 """ try: for data in main_func(output_folder, file_path, file_type, unique_id): queue.put(data) except Exception as e: # 如果要把异常也传给父进程,以便父进程可感知 queue.put(json.dumps({'error': str(e)}, ensure_ascii=False)) finally: queue.put(None) def run_in_subprocess(main_func, output_folder, file_path, file_type, unique_id): """ 启动子进程调用 `main_func(...)`,并在父进程流式获取其输出(通过 Queue)。 子进程结束时,操作系统回收其内存;父进程则保持实时输出。 """ queue = multiprocessing.Queue() p = multiprocessing.Process( target=_child_target, args=(main_func, queue, output_folder, file_path, file_type, unique_id) ) p.start() while True: item = queue.get() # 阻塞等待子进程产出的数据 if item is None: break yield item p.join() @upload_bp.route('/upload', methods=['POST']) @validate_and_setup_logger # @require_connection_limit(timeout=1800) @require_execution_timeout(timeout=1800) def zbparse(): #大解析 logger = g.logger try: logger.info("大解析开始!!!") received_data = request.get_json() logger.info("Received JSON data: " + str(received_data)) file_url = g.file_url zb_type = g.zb_type file_name = urllib.parse.unquote(os.path.basename(file_url.split('?')[0])) logger.info(f"Starting parsing file: {file_name}") try: logger.info("starting parsing url:" + file_url) return process_and_stream(file_url, zb_type) #主要执行函数 except Exception as e: logger.error('Exception occurred: ' + str(e)) if hasattr(g, 'unique_id'): log_error_unique_id(g.unique_id,1,file_name=file_name) error_response = create_response( message='处理文件时发生异常', status='error', data=str(e) ) return jsonify(error_response) except Exception as e: logger.error('Unexpected exception: ' + str(e)) if hasattr(g, 'unique_id'): log_error_unique_id(g.unique_id,1,file_name=file_name) error_response = create_response( message='内部服务器错误', status='error', data='Internal server error' ) return jsonify(error_response) def process_and_stream(file_url, zb_type): """ 下载文件并进行处理,支持工程标和货物标的处理。 """ logger = g.logger unique_id = g.unique_id output_folder = g.output_folder filename = "ztbfile" downloaded_filename = os.path.join(output_folder, filename) start_time = time.time() file_name = urllib.parse.unquote(os.path.basename(file_url.split('?')[0])) try: downloaded_filepath, file_type = download_file(file_url, downloaded_filename, True) if not downloaded_filepath or file_type == 4: logger.error("下载文件失败或不支持的文件类型") log_error_unique_id(unique_id, 1, file_name=file_name) error_response = create_response( message='下载文件失败或不支持的文件类型', status='error', data='' ) yield sse_format(error_response) return logger.info("本地文件路径: " + downloaded_filepath) combined_data = {} good_list = None processing_functions = { 1: engineering_bid_main, #工程标解析 2: goods_bid_main #货物标解析/服务标解析 } processing_func = processing_functions.get(zb_type, goods_bid_main) for data in run_in_subprocess(processing_func, output_folder, downloaded_filepath, file_type, unique_id): #逐一接收货物标 工程标解析内容,为前端网页展示服务 if not data.strip(): logger.error("Received empty data, skipping JSON parsing.") continue try: parsed_data = json.loads(data) except json.JSONDecodeError as e: logger.error(f"Failed to decode JSON: {e}") logger.error(f"Data received: {data}") continue if 'error' in parsed_data: error_message = parsed_data['error'] logger.error(f"Processing terminated due to error: {error_message}") # 使用指定的格式返回错误响应 error_response = create_response( message=error_message, status='error', data='' ) logger.info("========== if 'error' in parsed_data ==========") logger.info(error_response) yield sse_format(error_response) return # 终止进一步处理 if 'good_list' in parsed_data: #货物列表 good_list = parsed_data['good_list'] logger.info("Collected good_list from the processing function: %s", good_list) continue for outer_key, inner_dict in parsed_data.items(): if isinstance(inner_dict, dict): combined_data.update(inner_dict) response = create_response( message='Processing', status='success', data=data ) logger.info("========== 返回给后端->前端展示 ==========") logger.info(response) yield sse_format(response) #返回给后端->前端展示 base_end_time = time.time() logger.info(f"分段解析完成,耗时:{base_end_time - start_time:.2f} 秒") #此时前端已完整接收到解析的所有内容,后面的内容与前端展示无关,主要是后处理:1.extracted_result,关键信息存储 2.技术偏离表 3.商务偏离表 4.投标人需提交的证明材料(目前后端存储了,前端还未展示) #后处理开始!!! output_json_path = os.path.join(output_folder, 'final_result.json') extracted_info_path = os.path.join(output_folder, 'extracted_result.json') includes = ["基础信息", "资格审查", "商务评分", "技术评分", "无效标与废标项", "投标文件要求", "开评定标流程"] final_result, extracted_info, tech_deviation, tech_star_deviation, business_deviation, business_star_deviation, zigefuhe_deviation, proof_materials = outer_post_processing( combined_data, includes, good_list) #后处理 生成 extracted_info、商务 技术偏离数据 以及证明材料返给后端 #后处理完毕!后面都是生成响应返回,不额外修改数据 tech_deviation_response, tech_deviation_star_response, zigefuhe_deviation_response, shangwu_deviation_response, shangwu_star_deviation_response, proof_materials_response = generate_deviation_response( tech_deviation, tech_star_deviation, business_deviation, business_star_deviation, zigefuhe_deviation, proof_materials, logger) #生成规范的响应 # 使用通用响应函数 logger.info("========== 使用通用响应函数->tech_deviation_response ==========") logger.info(tech_deviation_response) yield sse_format(tech_deviation_response) logger.info("========== 使用通用响应函数->tech_deviation_star_response ==========") logger.info(tech_deviation_star_response) yield sse_format(tech_deviation_star_response) logger.info("========== 使用通用响应函数->zigefuhe_deviation_response ==========") logger.info(zigefuhe_deviation_response) yield sse_format(zigefuhe_deviation_response) logger.info("========== 使用通用响应函数->shangwu_deviation_response ==========") logger.info(shangwu_deviation_response) yield sse_format(shangwu_deviation_response) logger.info("========== 使用通用响应函数->shangwu_star_deviation_response ==========") logger.info(shangwu_star_deviation_response) yield sse_format(shangwu_star_deviation_response) logger.info("========== 使用通用响应函数->proof_materials_response ==========") logger.info(proof_materials_response) yield sse_format(proof_materials_response) try: with open(extracted_info_path, 'w', encoding='utf-8') as json_file: json.dump(extracted_info, json_file, ensure_ascii=False, indent=4) logger.info(f"摘取后的数据已保存到 '{extracted_info_path}'") except IOError as e: logger.error(f"保存JSON文件时出错: {e}") log_error_unique_id(unique_id,1,file_name=file_name) # 记录失败的 unique_id try: with open(output_json_path, 'w', encoding='utf-8') as json_file: json.dump(final_result, json_file, ensure_ascii=False, indent=4) logger.info(f"合并后的数据已保存到 '{output_json_path}'") except IOError as e: logger.error(f"保存JSON文件时出错: {e}") log_error_unique_id(unique_id,1,file_name=file_name) # 记录失败的 unique_id extracted_info_response = create_response( message='extracted_info', status='success', data=json.dumps(extracted_info, ensure_ascii=False) ) logger.info("========== extracted_info_response ==========") logger.info(extracted_info_response) yield sse_format(extracted_info_response) complete_response = create_response( message='Combined_data', status='success', data=json.dumps(final_result, ensure_ascii=False) ) logger.info("========== complete_response ==========") logger.info(complete_response) yield sse_format(complete_response) final_response = create_response( #目前后端的逻辑是读取到'data'中有个'END',就终止连接 message='文件上传并处理成功', status='success', data='END' ) logger.info("========== final_response ==========") logger.info(final_response) yield sse_format(final_response) except Exception as e: logger.error(f"Unexpected error in process_and_stream: {e}") log_error_unique_id(unique_id,1,file_name=file_name) # 记录失败的 unique_id error_response = create_response( message='内部服务器错误', status='error', data='' ) logger.info("========== error_response ==========") logger.info(error_response) yield sse_format(error_response) finally: end_time = time.time() duration = end_time - start_time logger.info(f"Total processing time: {duration:.2f} seconds")