diff --git a/flask_app/general/file2markdown.py b/flask_app/general/file2markdown.py index dd95110..b655c04 100644 --- a/flask_app/general/file2markdown.py +++ b/flask_app/general/file2markdown.py @@ -102,6 +102,6 @@ def convert_file_to_markdown(file_path, file_name="extract1.txt"): if __name__ == "__main__": # file_path=r"C:\Users\Administrator\Desktop\fsdownload\e702f1e6-095d-443d-bb7d-ef2e42037cb1\ztbfile_procurement.pdf" - file_path=r"D:\flask_project\flask_app\static\output\output1\f91db70d-8d96-44a5-b840-27d2f1ecbe95\ztbfile\ztbfile_11-16.pdf" + file_path=r"C:\Users\Administrator\Desktop\招标文件\tmp_pdf_wo_pages_1.pdf" res=convert_file_to_markdown(file_path) print(res) \ No newline at end of file diff --git a/flask_app/general/model_continue_query.py b/flask_app/general/model_continue_query.py index b10ad47..769add4 100644 --- a/flask_app/general/model_continue_query.py +++ b/flask_app/general/model_continue_query.py @@ -61,13 +61,15 @@ def continue_answer(original_query, original_answer, model_type=1, file_id=None) model_res = qianwen_plus(continue_query) else: raise ValueError(f"无效的模型类型: {model_type}") - + print(original_answer) + print(model_res) # 拼接原始回答和模型的继续回答 # 删除 original_answer 的最右边的 `"` 和 `\n` clean_original = original_answer.rstrip('"\n') - # 删除 model_res 的最左边的 `"` 和 `\n` clean_model = model_res.lstrip('"\n') + if clean_model.startswith("```json"): + clean_model = clean_model[len("```json"):] # 拼接字符串 full_answer = clean_original + clean_model diff --git a/flask_app/general/判断是否是招标文件.py b/flask_app/general/判断是否是招标文件.py new file mode 100644 index 0000000..02762b0 --- /dev/null +++ b/flask_app/general/判断是否是招标文件.py @@ -0,0 +1,30 @@ +from PyPDF2 import PdfReader + +from flask_app.general.通义千问long import upload_file, qianwen_long + + +def judge_zbfile(pdf_path): + reader = PdfReader(pdf_path) + num_pages = len(reader.pages) + if num_pages <= 5: + return False + user_query="""该文件是否属于招标文件?如果是的话,请返回'是',如果不是的话,返回'否'。请不要返回其他解释或内容。 +以下是常见的招标文件类型: + 公开招标文件、邀请招标文件、竞争性谈判文件、竞争性磋商文件、询价文件、问询文件、货物类招标文件、工程类招标文件、施工类招标文件、服务类招标文件、比选文件。 +若有未涵盖的类型,但其内容明确表达了项目需求、采购或招标信息,且包含指导投标人参与的关键要素,则可视为招标文件。 +请基于上述内容判断文件是否属于招标文件。 + """ + file_id=upload_file(pdf_path) + model_res=qianwen_long(file_id,user_query) + print(f"判断是否属于招标文件:{model_res}") + if '否' in model_res: + return False + return True + +if __name__ == '__main__': + pdf_path=r"C:\Users\Administrator\Desktop\货物标\zbfiles\zbtest4_evaluation_method.pdf" + res=judge_zbfile(pdf_path) + if res: + print("yes") + else: + print("no") \ No newline at end of file diff --git a/flask_app/general/商务技术评分提取.py b/flask_app/general/商务技术评分提取.py index 425da66..ed47eaa 100644 --- a/flask_app/general/商务技术评分提取.py +++ b/flask_app/general/商务技术评分提取.py @@ -274,14 +274,32 @@ def combine_evaluation_standards(evaluation_method_path,invalid_path,zb_type): -注意:禁止通过归纳、推测或自行总结来生成子评分因素,尤其不可根据'评分标准'中的打分要求来反向总结'子评分因素'。 若评分因素(内容、项)不存在嵌套关系:键名直接为评分因素,无需附加括号表示总分。 -评分标准: - -评分因素的键值为评分标准,它是列表形式,列表中包含描述分值及评分要求的字典。 + -评分因素的键值为评分标准,它是列表形式,列表中包含描述分值及评分细则的字典。 -字典个数: - 默认为1个字典,若某评分因素包括多个评分标准(多个表格单元格),可以用多个并列字典表示。 + 默认为1个字典,若某评分因素包括多个评分细则(通过表格结构判断,一个单元格视为一个评分细则),字典个数等于评分细则数。 -字典结构如下: - 评分:该评分标准的分值即最高分(如'8分'),字符串类型;不能是一个范围数字(如0-8分);若为定性指标(如“合格制”),可标明相应的定性指标;无评分时可删去'评分'键值对。 - 要求:评分标准或细则,一个表格单元格内的所有内容。 + 评分:一个打分点的分值即最高分(如'8分'),字符串类型;不能是一个范围数字(如0-8分);若为定性指标(如“合格制”),可标明相应的定性指标;无评分时可删去'评分'键值对。 + 要求:一个打分点的评分细则,即一个表格单元格内的所有内容,禁止拆分。 + 例:"拥有一级证书得3分,拥有二级证书得1分,其他不得分。" + 示例输出:{ + "评分": "3分", + "要求": "拥有一级证书得3分,拥有二级证书得1分,其他不得分。" + } + 禁止拆分出三个字典: + { + "评分": "3分", + "要求": "拥有一级证书得3分" + }, + { + "评分": "1分", + "要求": "拥有二级证书得1分" + }, + { + "评分": "0分", + "要求": "其他不得分。" + } -禁止情况: - 禁止将同个单元格内的内容拆分至多个字典中;禁止遗漏单元格内任何信息,包括注的内容。 + 禁止将同个单元格内的内容拆分至多个字典中;禁止遗漏单元格内任何信息,包括注释的内容。 3.备注信息: -若评分部分包含附加信息(如大项评分的整体要求,未直接归属于具体评分项),需添加一个 备注 键,值为该附加信息。 @@ -463,7 +481,7 @@ def combine_evaluation_standards(evaluation_method_path,invalid_path,zb_type): max_tokens = 7900 if model_type == 4 else 5900 if not cleaned_evaluation_res and total_tokens > max_tokens: print(f"total_tokens: {total_tokens}") - questions_to_continue.append((user_query, evaluation_res)) + questions_to_continue.append((user_query, message)) else: temp_final.update(cleaned_evaluation_res) if questions_to_continue: diff --git a/flask_app/general/无效标和废标公共代码.py b/flask_app/general/无效标和废标公共代码.py index 5c1f774..fa2724b 100644 --- a/flask_app/general/无效标和废标公共代码.py +++ b/flask_app/general/无效标和废标公共代码.py @@ -529,7 +529,6 @@ def clean_dict_datas(extracted_contents, keywords, excludes): # 让正则表达 new_text_list = preprocess_text_list(text_list) # 用于处理结构化文本,清理掉不必要的序号,并将分割后的段落合并,最终形成更简洁和格式化的输出。 pattern = r'^\s*(?:[((]\d+[)))]|[A-Za-z]?\d+(?:\.\s*\d+)*[\s\.、.)\)]+|[一二三四五六七八九十]+、|[A-Z][))]\s+|[A-Z]\.\s*)' - data = re.sub(pattern, '', new_text_list[0]).strip() # 去除序号 # 将修改后的第一个元素和剩余的元素连接起来 new_text_list[0] = data # 更新列表中的第一个元素 diff --git a/flask_app/routes/get_deviation.py b/flask_app/routes/get_deviation.py index d64a303..9c2ff19 100644 --- a/flask_app/routes/get_deviation.py +++ b/flask_app/routes/get_deviation.py @@ -4,10 +4,12 @@ from flask import Blueprint, jsonify, Response, g import json import os from flask_app.general.format_change import download_file +from flask_app.routes.upload import create_response, sse_format from flask_app.routes.偏离表main import get_tech_and_business_deviation from flask_app.routes.utils import generate_deviation_response, validate_and_setup_logger from flask_app.ConnectionLimiter import require_connection_limit get_deviation_bp = Blueprint('get_deviation', __name__) + @get_deviation_bp.route('/get_deviation', methods=['POST']) @validate_and_setup_logger @require_connection_limit(timeout=720) @@ -16,56 +18,85 @@ def get_deviation(): unique_id = g.unique_id file_url = g.file_url zb_type = g.zb_type - - try: - logger.info("call /get_deviation: 开始解析 URL: " + file_url) - if zb_type not in [1, 2]: - logger.error(f"无效的 zb_type: {zb_type}. 期望 zb_type: 1 或 2") - return jsonify({ - 'error': 'Invalid zb_type', - 'message': '此端点仅支持 zb_type 1 或 2' - }), 400 - else: + output_folder = g.output_folder + def generate(): + try: + logger.info("call /get_deviation: 开始解析 URL: " + file_url) + if zb_type not in [1, 2]: + logger.error(f"无效的 zb_type: {zb_type}. 期望 zb_type: 1 或 2") + response = create_response( + message='此端点仅支持 zb_type 1 或 2', + status='error', + data='' + ) + yield sse_format(**response) + return # 终止生成器 # 直接下载并处理文件 - output_folder = g.output_folder filename = "ztbfile" downloaded_filename = os.path.join(output_folder, filename) - # 下载文件 downloaded_filepath, file_type = download_file(file_url, downloaded_filename) if downloaded_filepath is None or file_type == 4: - logger.error("Unsupported file type or failed to download file") - return jsonify({'error': 'Unsupported file type or failed to download file'}), 500 + logger.error("下载地址不存在或不支持的文件类型!") + response = create_response( + message='下载地址不存在或不支持的文件类型!', + status='error', + data='' + ) + yield sse_format(**response) + return # 终止生成器 logger.info("Local file path: " + downloaded_filepath) # 处理文件 - tech_deviation, tech_star_deviation, business_deviation, business_star_deviation, zigefuhe_deviation, proof_materials = get_tech_and_business_deviation( - downloaded_filepath, file_type, unique_id, output_folder,zb_type) + deviations = get_tech_and_business_deviation( + downloaded_filepath, file_type, unique_id, output_folder, zb_type + ) + + if deviations is None: + response = create_response( + message='上传的文件非招标文件或文件内容不完整!', + status='error', + data='' + ) + yield sse_format(**response) + return # 终止生成器 + + # 解包返回值 + (tech_deviation, tech_star_deviation, business_deviation, + business_star_deviation, zigefuhe_deviation, proof_materials) = deviations # 生成偏差响应 - tech_deviation_response, tech_deviation_star_response, zigefuhe_deviation_response, shangwu_deviation_response, shangwu_star_deviation_response, proof_materials_response = generate_deviation_response( - tech_deviation, tech_star_deviation, business_deviation, business_star_deviation, zigefuhe_deviation, proof_materials, logger) + tech_deviation_response, tech_deviation_star_response, zigefuhe_deviation_response, \ + shangwu_deviation_response, shangwu_star_deviation_response, proof_materials_response = generate_deviation_response( + tech_deviation, tech_star_deviation, business_deviation, + business_star_deviation, zigefuhe_deviation, proof_materials, logger + ) final_response = { 'message': 'processed successfully', - 'filename': 'END', + 'status': 'success', 'data': 'END' } # 流式返回数据 - def generate(): - yield f"data: {json.dumps(tech_deviation_response, ensure_ascii=False)}\n\n" - yield f"data: {json.dumps(tech_deviation_star_response, ensure_ascii=False)}\n\n" - yield f"data: {json.dumps(zigefuhe_deviation_response, ensure_ascii=False)}\n\n" - yield f"data: {json.dumps(shangwu_deviation_response, ensure_ascii=False)}\n\n" - yield f"data: {json.dumps(shangwu_star_deviation_response, ensure_ascii=False)}\n\n" - yield f"data: {json.dumps(proof_materials_response, ensure_ascii=False)}\n\n" - yield f"data: {json.dumps(final_response, ensure_ascii=False)}\n\n" + yield sse_format(**tech_deviation_response) + yield sse_format(**tech_deviation_star_response) + yield sse_format(**zigefuhe_deviation_response) + yield sse_format(**shangwu_deviation_response) + yield sse_format(**shangwu_star_deviation_response) + yield sse_format(**proof_materials_response) + yield sse_format(**final_response) - return Response(generate(), mimetype='text/event-stream') - except Exception as e: - logger.error('发生异常: ' + str(e)) - return jsonify({'error': str(e)}), 500 + except Exception as e: + logger.error('发生异常: ' + str(e)) + response = create_response( + message=str(e), + status='error', + data='' + ) + yield sse_format(**response) + + return Response(generate(), mimetype='text/event-stream') diff --git a/flask_app/routes/little_zbparse.py b/flask_app/routes/little_zbparse.py index f26e3fd..e2fed35 100644 --- a/flask_app/routes/little_zbparse.py +++ b/flask_app/routes/little_zbparse.py @@ -11,6 +11,27 @@ from flask_app.routes.utils import validate_and_setup_logger little_zbparse_bp = Blueprint('little_zbparse', __name__) +def create_response(message, status, data='', status_code=200): + """ + 创建统一格式的 JSON 响应。 + + 参数: + message (str): 响应消息。 + status (str): 状态标记,'success' 或 'error'。 + data (str, optional): 响应数据。默认为空字符串。 + status_code (int, optional): HTTP 状态码。默认为 200。 + + 返回: + Response: Flask 响应对象。 + """ + response = jsonify({ + 'message': message, + 'status': status, + 'data': data + }) + response.status_code = status_code + return response + @little_zbparse_bp.route('/little_zbparse', methods=['POST']) @validate_and_setup_logger @require_connection_limit(timeout=300) @@ -18,30 +39,45 @@ def little_zbparse(): logger = g.logger file_url = g.file_url zb_type = g.zb_type - if isinstance(file_url, tuple): # 检查是否为错误响应 + + # 检查是否为错误响应(假设装饰器返回的是一个响应元组) + if isinstance(file_url, tuple): return file_url try: - logger.info("starting parsing url:" + file_url) + logger.info(f"Starting parsing URL: {file_url}") final_json_path = download_and_process_file(file_url, zb_type) + if not final_json_path: - return jsonify({'error': 'File processing failed'}), 500 + return create_response( + message='上传的文件非招标文件或文件内容不完整!', + status='error', + data='', + status_code=400 # 400 Bad Request + ) + response = generate_response(final_json_path) return response + except Exception as e: - logger.error('Exception occurred: ' + str(e)) - return jsonify({'error': str(e)}), 500 + logger.error(f'Exception occurred: {e}') + return create_response( + message='解析遇到不知名错误!', + status='error', + data='', + status_code=500 # 500 Internal Server Error + ) def download_and_process_file(file_url, zb_type): """ - 下载并处理文件,根据zb_type选择处理函数。 + 下载并处理文件,根据 zb_type 选择处理函数。 参数: - file_url (str): 文件的URL地址。 - zb_type (int): 标的类型,1表示工程标,2表示货物标。 + file_url (str): 文件的 URL 地址。 + zb_type (int): 标的类型,1 表示工程标,2 表示货物标。 返回: - str: 处理后的文件路径。 + str or None: 处理后的文件路径,或在失败时返回 None。 """ logger = g.logger output_folder = g.output_folder @@ -54,23 +90,47 @@ def download_and_process_file(file_url, zb_type): logger.error("Unsupported file type or failed to download file") return None - logger.info("Local file path: " + downloaded_filepath) + logger.info(f"Local file path: {downloaded_filepath}") processed_file_path = little_parse_main(output_folder, downloaded_filepath, file_type, zb_type, g.unique_id) return processed_file_path def generate_response(final_json_path): + """ + 生成最终的成功响应或错误响应。 + + 参数: + final_json_path (str): 处理后的 JSON 文件路径。 + + 返回: + tuple: Flask 响应对象和状态码。 + """ logger = g.logger - if not final_json_path: - logger.error('Empty or None path provided for final_json.') - return jsonify({'error': 'No path provided for final_json.'}), 400 + if not os.path.exists(final_json_path): - logger.error('final_json not found at path: ' + final_json_path) - return jsonify({'error': 'final_json not found'}), 404 - with open(final_json_path, 'r', encoding='utf-8') as f: - zbparse_data = json.load(f) - json_str = json.dumps(zbparse_data, ensure_ascii=False) - return jsonify({ - 'message': 'Little Parse processed successfully', - 'filename': os.path.basename(final_json_path), - 'data': json_str - }) + logger.error(f'final_json 未找到!: {final_json_path}') + return create_response( + message='final_json not found', + status='error', + data='', + status_code=404 # 404 Not Found + ) + + try: + with open(final_json_path, 'r', encoding='utf-8') as f: + zbparse_data = json.load(f) + json_str = json.dumps(zbparse_data, ensure_ascii=False) + except Exception as e: + logger.error(f'Error reading or parsing final_json: {e}') + return create_response( + message='Error processing final_json.', + status='error', + data='', + status_code=500 # 500 Internal Server Error + ) + + return create_response( + message='Little Parse processed successfully', + status='success', + data=json_str, + status_code=200 # 200 OK + ) diff --git a/flask_app/routes/upload.py b/flask_app/routes/upload.py index 4083a00..f7cec40 100644 --- a/flask_app/routes/upload.py +++ b/flask_app/routes/upload.py @@ -12,8 +12,35 @@ from flask_app.routes.utils import generate_deviation_response, validate_and_set from flask_app.ConnectionLimiter import require_connection_limit -upload_bp = Blueprint('upload', __name__) +def create_response(message, status, data): + """ + 创建一个统一格式的响应字典。 + :param message: 响应消息 + :param status: 响应状态(如 'success', 'error', 'processing') + :param data: 具体的数据内容 + :return: 字典格式的响应 + """ + return { + 'message': message, + 'status': status, + 'data': data + } + + +def sse_format(message, status, data): + """ + 将响应格式化为 Server-Sent Events (SSE) 的格式。 + + :param message: 响应消息 + :param status: 响应状态(如 'success', 'error', 'processing') + :param data: 具体的数据内容 + :return: 格式化后的 SSE 字符串 + """ + response = create_response(message, status, data) + return f"data: {json.dumps(response, ensure_ascii=False)}\n\n" + +upload_bp = Blueprint('upload', __name__) @upload_bp.route('/upload', methods=['POST']) @validate_and_setup_logger @require_connection_limit(timeout=720) @@ -30,10 +57,20 @@ def zbparse(): return process_and_stream(file_url, zb_type) except Exception as e: logger.error('Exception occurred: ' + str(e)) - return jsonify({'error': str(e)}), 500 + error_response = create_response( + message='处理文件时发生异常', + status='error', + data=str(e) + ) + return jsonify(error_response) except Exception as e: logger.error('Unexpected exception: ' + str(e)) - return jsonify({'error': 'Internal server error'}), 500 + error_response = create_response( + message='内部服务器错误', + status='error', + data='Internal server error' + ) + return jsonify(error_response) def process_and_stream(file_url, zb_type): """ 下载文件并进行处理,支持工程标和货物标的处理。 @@ -49,24 +86,24 @@ def process_and_stream(file_url, zb_type): downloaded = download_file(file_url, downloaded_filename) if not downloaded: logger.error("下载文件失败或不支持的文件类型") - error_response = { - 'message': 'File processing failed', - 'filename': '', - 'data': json.dumps({'error': 'File processing failed'}) - } - yield f"data: {json.dumps(error_response)}\n\n" + error_response = create_response( + message='文件处理失败', + status='error', + data='' + ) + yield sse_format(**error_response) return downloaded_filepath, file_type = downloaded if file_type == 4: logger.error("不支持的文件类型") - error_response = { - 'message': 'Unsupported file type', - 'filename': None, - 'data': json.dumps({'error': 'Unsupported file type'}) - } - yield f"data: {json.dumps(error_response)}\n\n" + error_response = create_response( + message='不支持的文件类型', + status='error', + data='' + ) + yield sse_format(**error_response) return logger.info("本地文件路径: " + downloaded_filepath) @@ -78,7 +115,7 @@ def process_and_stream(file_url, zb_type): 1: engineering_bid_main, 2: goods_bid_main } - processing_func = processing_functions.get(zb_type, engineering_bid_main) + processing_func = processing_functions.get(zb_type, goods_bid_main) for data in processing_func(output_folder, downloaded_filepath, file_type, unique_id): if not data.strip(): @@ -92,6 +129,17 @@ def process_and_stream(file_url, zb_type): logger.error(f"Data received: {data}") continue + if 'error' in parsed_data: + # 适当处理错误 + logger.error(f"处理错误: {parsed_data['error']}") + response = create_response( + message='上传的文件非招标文件或文件内容不完整!', + status='error', + data='' + ) + yield sse_format(**response) + return # 直接返回,终止生成器 + if 'good_list' in parsed_data: good_list = parsed_data['good_list'] logger.info("Collected good_list from the processing function: %s", good_list) @@ -101,12 +149,12 @@ def process_and_stream(file_url, zb_type): if isinstance(inner_dict, dict): combined_data.update(inner_dict) - response = { - 'message': 'Processing', - 'filename': os.path.basename(downloaded_filepath), - 'data': data - } - yield f"data: {json.dumps(response, ensure_ascii=False)}\n\n" + response = create_response( + message='Processing', + status='success', + data=data + ) + yield sse_format(**response) base_end_time = time.time() logger.info(f"分段解析完成,耗时:{base_end_time - start_time:.2f} 秒") @@ -114,16 +162,20 @@ def process_and_stream(file_url, zb_type): output_json_path = os.path.join(output_folder, 'final_result.json') extracted_info_path = os.path.join(output_folder, 'extracted_result.json') includes = ["基础信息", "资格审查", "商务评分", "技术评分", "无效标与废标项", "投标文件要求", "开评定标流程"] - final_result, extracted_info, tech_deviation, tech_star_deviation, business_deviation, business_star_deviation, zigefuhe_deviation, proof_materials = outer_post_processing(combined_data, includes, good_list) + final_result, extracted_info, tech_deviation, tech_star_deviation, business_deviation, business_star_deviation, zigefuhe_deviation, proof_materials = outer_post_processing( + combined_data, includes, good_list) - tech_deviation_response, tech_deviation_star_response, zigefuhe_deviation_response, shangwu_deviation_response, shangwu_star_deviation_response,proof_materials_response = generate_deviation_response( - tech_deviation, tech_star_deviation, business_deviation, business_star_deviation, zigefuhe_deviation,proof_materials, logger) - yield f"data: {json.dumps(tech_deviation_response, ensure_ascii=False)}\n\n" - yield f"data: {json.dumps(tech_deviation_star_response, ensure_ascii=False)}\n\n" - yield f"data: {json.dumps(zigefuhe_deviation_response, ensure_ascii=False)}\n\n" - yield f"data: {json.dumps(shangwu_deviation_response, ensure_ascii=False)}\n\n" - yield f"data: {json.dumps(shangwu_star_deviation_response, ensure_ascii=False)}\n\n" - yield f"data: {json.dumps(proof_materials_response, ensure_ascii=False)}\n\n" + tech_deviation_response, tech_deviation_star_response, zigefuhe_deviation_response, shangwu_deviation_response, shangwu_star_deviation_response, proof_materials_response = generate_deviation_response( + tech_deviation, tech_star_deviation, business_deviation, business_star_deviation, zigefuhe_deviation, + proof_materials, logger) + + # 使用通用响应函数 + yield sse_format(**tech_deviation_response) + yield sse_format(**tech_deviation_star_response) + yield sse_format(**zigefuhe_deviation_response) + yield sse_format(**shangwu_deviation_response) + yield sse_format(**shangwu_star_deviation_response) + yield sse_format(**proof_materials_response) try: with open(extracted_info_path, 'w', encoding='utf-8') as json_file: @@ -139,26 +191,26 @@ def process_and_stream(file_url, zb_type): except IOError as e: logger.error(f"保存JSON文件时出错: {e}") - extracted_info_response = { - 'message': 'extracted_info', - 'filename': os.path.basename(downloaded_filepath), - 'data': json.dumps(extracted_info, ensure_ascii=False) - } - yield f"data: {json.dumps(extracted_info_response, ensure_ascii=False)}\n\n" + extracted_info_response = create_response( + message='extracted_info', + status='success', + data=json.dumps(extracted_info, ensure_ascii=False) + ) + yield sse_format(**extracted_info_response) - complete_response = { - 'message': 'Combined_data', - 'filename': os.path.basename(downloaded_filepath), - 'data': json.dumps(final_result, ensure_ascii=False) - } - yield f"data: {json.dumps(complete_response, ensure_ascii=False)}\n\n" + complete_response = create_response( + message='Combined_data', + status='success', + data=json.dumps(final_result, ensure_ascii=False) + ) + yield sse_format(**complete_response) - final_response = { - 'message': 'File uploaded and processed successfully', - 'filename': os.path.basename(downloaded_filepath), - 'data': 'END' - } - yield f"data: {json.dumps(final_response)}\n\n" + final_response = create_response( + message='文件上传并处理成功', + status='success', + data='END' + ) + yield sse_format(**final_response) finally: end_time = time.time() diff --git a/flask_app/routes/utils.py b/flask_app/routes/utils.py index 73944e8..9f8a2b9 100644 --- a/flask_app/routes/utils.py +++ b/flask_app/routes/utils.py @@ -34,36 +34,43 @@ def generate_deviation_response(tech_deviation, tech_star_deviation, business_de tech_deviation_response = { 'message': 'procurement_reqs', - 'filename': 'procurement_reqs', + 'status': 'success', 'data': json.dumps(tech_deviation, ensure_ascii=False) } tech_deviation_star_response = { 'message': 'jishu_star_deviation', - 'filename': 'jishu_star_deviation', + 'status': 'success', 'data': json.dumps(tech_star_deviation, ensure_ascii=False) } zigefuhe_deviation_response = { 'message': 'zigefuhe_deviation', - 'filename': 'zigefuhe_deviation', + 'status': 'success', 'data': json.dumps(zigefuhe_deviation, ensure_ascii=False) } shangwu_deviation_response = { 'message': 'shangwu_deviation', - 'filename': 'shangwu_deviation', + 'status': 'success', 'data': json.dumps(business_deviation, ensure_ascii=False) } shangwu_star_deviation_response = { 'message': 'shangwu_star_deviation', - 'filename': 'shangwu_star_deviation', + 'status': 'success', 'data': json.dumps(business_star_deviation, ensure_ascii=False) } - proof_materials_response={ + proof_materials_response = { 'message': 'proof_materials', - 'filename': 'proof_materials', + 'status': 'success', 'data': json.dumps(proof_materials, ensure_ascii=False) } - return tech_deviation_response, tech_deviation_star_response, zigefuhe_deviation_response, shangwu_deviation_response, shangwu_star_deviation_response,proof_materials_response + return ( + tech_deviation_response, + tech_deviation_star_response, + zigefuhe_deviation_response, + shangwu_deviation_response, + shangwu_star_deviation_response, + proof_materials_response + ) def require_connection_limit(): """装饰器:确保路由使用连接限制,并正确处理生成器函数""" diff --git a/flask_app/routes/偏离表main.py b/flask_app/routes/偏离表main.py index 49de6a2..410a1ae 100644 --- a/flask_app/routes/偏离表main.py +++ b/flask_app/routes/偏离表main.py @@ -7,6 +7,7 @@ from flask_app.general.doubao import doubao_model from flask_app.general.format_change import pdf2docx, docx2pdf,doc2docx from flask_app.general.json_utils import clean_json_string, rename_outer_key from flask_app.general.merge_pdfs import merge_pdfs +from flask_app.general.判断是否是招标文件 import judge_zbfile from flask_app.general.通义千问long import qianwen_plus from flask_app.general.通用功能函数 import get_global_logger from flask_app.general.截取pdf_main import truncate_pdf_multiple @@ -562,7 +563,10 @@ def get_tech_and_business_deviation(file_path,file_type,unique_id,output_folder, docx_path=doc2docx(file_path) pdf_path = docx2pdf(file_path) else: - logger.error("Unsupported file type provided. Preprocessing halted.") + logger.error("不支持的文件类型!") + return None + judge_res = judge_zbfile(pdf_path) + if not judge_res: return None # 第二步:根据zb_type确定选择项和类别,并截取PDF if zb_type == 2: @@ -576,6 +580,7 @@ def get_tech_and_business_deviation(file_path,file_type,unique_id,output_folder, except Exception as e: logger.error(f"PDF截取过程中出错: {e}") return None + # 根据zb_type分配路径 notice_path = files[0] if len(files) > 0 else "" evaluation_path = files[1] if len(files) > 1 else "" diff --git a/flask_app/routes/小解析main.py b/flask_app/routes/小解析main.py index bb9967a..5e4c8be 100644 --- a/flask_app/routes/小解析main.py +++ b/flask_app/routes/小解析main.py @@ -6,6 +6,7 @@ import time from flask_app.general.format_change import docx2pdf from flask_app.general.json_utils import clean_json_string +from flask_app.general.判断是否是招标文件 import judge_zbfile from flask_app.general.多线程提问 import read_questions_from_file, multi_threading from flask_app.general.通义千问long import upload_file from flask_app.general.通用功能函数 import get_global_logger,aggregate_basic_info @@ -111,6 +112,9 @@ def little_parse_main(output_folder, file_path, file_type,zb_type,unique_id): else: logger.error("Unsupported file type provided. Preprocessing halted.") return None + judge_res = judge_zbfile(pdf_path) + if not judge_res: + return None # 根据招标类型调用相应的解析函数 if zb_type == 2: # 货物标 combined_data = little_parse_goods(output_folder, pdf_path,logger) diff --git a/flask_app/routes/工程标解析main.py b/flask_app/routes/工程标解析main.py index a946a27..a0b4552 100644 --- a/flask_app/routes/工程标解析main.py +++ b/flask_app/routes/工程标解析main.py @@ -8,6 +8,7 @@ from concurrent.futures import ThreadPoolExecutor from docx import Document from flask_app.general.insert_del_pagemark import insert_mark,delete_mark +from flask_app.general.判断是否是招标文件 import judge_zbfile from flask_app.general.截取pdf_main import truncate_pdf_multiple from flask_app.general.merge_pdfs import merge_pdfs from flask_app.general.通用功能函数 import get_global_logger @@ -40,6 +41,9 @@ def preprocess_files(output_folder, file_path, file_type,logger): else: logger.error("Unsupported file type provided. Preprocessing halted.") return None + judge_res = judge_zbfile(pdf_path) + if not judge_res: + return None # 调用截取PDF多次 truncate_files = truncate_pdf_multiple(pdf_path, output_folder,logger,'engineering') print("切割出的文件:"+str(truncate_files)) @@ -209,8 +213,11 @@ def engineering_bid_main(output_folder, file_path, file_type, unique_id): # 预处理文件,获取处理后的数据 processed_data = preprocess_files(output_folder, file_path, file_type,logger) if not processed_data: - yield json.dumps({}) # 如果处理数据失败,返回空的 JSON - + error_response = { + 'error': '文件预处理失败。请检查文件类型并重试。' + } + yield json.dumps(error_response, ensure_ascii=False) + return # 停止进一步处理 with concurrent.futures.ThreadPoolExecutor() as executor: # 立即启动不依赖 knowledge_name 和 index 的任务 futures = { diff --git a/flask_app/routes/货物标解析main.py b/flask_app/routes/货物标解析main.py index 05007be..606fdf9 100644 --- a/flask_app/routes/货物标解析main.py +++ b/flask_app/routes/货物标解析main.py @@ -17,7 +17,7 @@ from flask_app.货物标.提取json货物标版 import convert_clause_to_json from flask_app.general.无效标和废标公共代码 import combine_find_invalid from flask_app.货物标.资格审查main import combine_qualification_review from flask_app.general.商务技术评分提取 import combine_evaluation_standards - +from flask_app.general.判断是否是招标文件 import judge_zbfile # 创建全局线程池 executor = ThreadPoolExecutor() @@ -39,6 +39,9 @@ def preprocess_files(output_folder, file_path, file_type,logger): logger.error("Unsupported file type provided. Preprocessing halted.") return None + judge_res=judge_zbfile(pdf_path) + if not judge_res: + return None # 调用截取PDF多次 truncate_files = truncate_pdf_multiple(pdf_path, output_folder,logger,'goods') # index: 0->商务技术服务要求 1->评标办法 2->资格审查 3->投标人须知前附表 4->投标人须知正文 @@ -215,8 +218,11 @@ def goods_bid_main(output_folder, file_path, file_type, unique_id): # 预处理文件,获取处理后的数据 processed_data = preprocess_files(output_folder, file_path, file_type,logger) if not processed_data: - yield json.dumps({}) # 如果处理数据失败,返回空的 JSON - + error_response = { + 'error': '文件预处理失败。请检查文件类型并重试。' + } + yield json.dumps(error_response, ensure_ascii=False) + return # 停止进一步处理 with concurrent.futures.ThreadPoolExecutor() as executor: # 立即启动不依赖 knowledge_name 和 index 的任务 futures = { diff --git a/flask_app/static/提示词/是否相关问题.txt b/flask_app/static/提示词/是否相关问题.txt index d7f2f5b..18903c2 100644 --- a/flask_app/static/提示词/是否相关问题.txt +++ b/flask_app/static/提示词/是否相关问题.txt @@ -63,11 +63,9 @@ } } -9.该招标文件对响应文件(投标文件)偏离项的要求或内容是怎样的?请不要回答具体的技术参数,也不要回答具体的评分要求。请以json格式给我提供信息,外层键名为'偏离',若存在嵌套信息,嵌套内容键名为文件中对应字段或是你的总结,而嵌套键值必须与原文保持一致,若文中未涉及相关内容,在键值中填'未知'。 -注意:不使用任何预设的示例作为回答,示例仅作为格式参考。 +9.该招标文件对响应文件(投标文件)偏离项的要求或内容是怎样的?请不要回答具体的技术参数,也不要回答具体的评分要求。请以json格式给我提供信息,外层键名为'偏离',若存在嵌套信息,嵌套内容键名为文件中对应字段或是你的总结,而嵌套键值必须与原文保持一致。若文中没有关于偏离项的相关内容,在键值中填'未知'。 禁止内容: - 确保所有输出内容均基于提供的实际招标文件内容; - 不使用任何预设的示例作为回答。 + 确保键值内容均基于提供的实际招标文件内容,禁止使用任何预设的示例作为回答。 示例1,嵌套键值对情况: { "偏离":{