9.23测试分段

2024-09-23 11:48:21 +08:00 · 2024-09-23 11:48:21 +08:00 · 5fb525186f
commit 5fb525186f
parent 2ad1bff5f3
3 changed files with 212 additions and 114 deletions
--- a/flask_app/main/start_up.py
+++ b/flask_app/main/start_up.py
@ -56,74 +56,76 @@ def create_logger():
    g.logger = logger


+# @app.route('/upload', methods=['POST'])
+# def zbparse():
+#     logger=g.logger
+#     file_url = validate_request()
+#     if isinstance(file_url, tuple):  # Check if the returned value is an error response
+#         return file_url
+#     try:
+#         logger.info("starting parsing url:" + file_url)
+#         final_json_path, output_folder= download_and_process_file(file_url)
+#         if not final_json_path:
+#             return jsonify({'error': 'File processing failed'}), 500
+#         response = generate_response(final_json_path)  # 先获取响应内容
+#         # remove_directory(output_folder)  # 然后删除文件夹
+#         return response  # 最后返回获取的响应
+#     except Exception as e:
+#         logger.error('Exception occurred: ' + str(e))  # 使用全局 logger 记录
+#         return jsonify({'error': str(e)}), 500
+
+
+# 流式
@app.route('/upload', methods=['POST'])
 def zbparse():
-    logger=g.logger
+    logger = g.logger
    file_url = validate_request()
    if isinstance(file_url, tuple):  # Check if the returned value is an error response
        return file_url
    try:
        logger.info("starting parsing url:" + file_url)
-        final_json_path, output_folder= download_and_process_file(file_url)
-        if not final_json_path:
-            return jsonify({'error': 'File processing failed'}), 500
-        response = generate_response(final_json_path)  # 先获取响应内容
-        # remove_directory(output_folder)  # 然后删除文件夹
-        return response  # 最后返回获取的响应
+        return Response(stream_with_context(process_and_stream(file_url)), content_type='text/event-stream')
    except Exception as e:
-        logger.error('Exception occurred: ' + str(e))  # 使用全局 logger 记录
+        logger.error('Exception occurred: ' + str(e))
        return jsonify({'error': str(e)}), 500


-# 流式
-# def zbparse():
-#     file_url = validate_request()
-#     if isinstance(file_url, tuple):  # Check if the returned value is an error response
-#         return file_url
-#     try:
-#         app.logger.info("starting parsing url:" + file_url)
-#         return Response(stream_with_context(process_and_stream(file_url)), content_type='text/event-stream')
-#     except Exception as e:
-#         app.logger.error('Exception occurred: ' + str(e))
-#         return jsonify({'error': str(e)}), 500
+#分段返回
+def process_and_stream(file_url):
+    logger = g.logger
+    unique_id = g.unique_id
+    output_folder = f"flask_app/static/output/{unique_id}"  # 直接使用全局 unique_id 构建路径
+    filename = "ztbfile"
+    downloaded_filename = os.path.join(output_folder, filename)

+    downloaded_filepath, file_type = download_file(file_url, downloaded_filename)

-# 分段返回
-# def process_and_stream(file_url):
-#     logger = g.logger
-#     unique_id = g.unique_id
-#     output_folder = f"flask_app/static/output/{unique_id}"  # 直接使用全局 unique_id 构建路径
-#     filename = "ztbfile"
-#     downloaded_filename = os.path.join(output_folder, filename)
-#
-#     downloaded_filepath, file_type = download_file(file_url, downloaded_filename)
-#
-#     if downloaded_filepath is None or file_type == 3:
-#         logger.error("Unsupported file type or failed to download file")
-#         error_response = {
-#             'message': 'File processing failed',
-#             'filename': None,
-#             'data': json.dumps({'error': 'File processing failed'})
-#         }
-#         yield f"data: {json.dumps(error_response)}\n\n"
-#         return
-#
-#     logger.info("Local file path: " + downloaded_filepath)
-#
-#     for data in main_processing(output_folder, downloaded_filepath, file_type, unique_id):
-#         response = {
-#             'message': 'Processing',
-#             'filename': os.path.basename(downloaded_filepath),
-#             'data': data
-#         }
-#         yield f"data: {json.dumps(response)}\n\n"
-#
-#     final_response = {
-#         'message': 'File uploaded and processed successfully',
-#         'filename': os.path.basename(downloaded_filepath),
-#         'data': 'END'
-#     }
-#     yield f"data: {json.dumps(final_response)}\n\n"
+    if downloaded_filepath is None or file_type == 3:
+        logger.error("Unsupported file type or failed to download file")
+        error_response = {
+            'message': 'File processing failed',
+            'filename': None,
+            'data': json.dumps({'error': 'File processing failed'})
+        }
+        yield f"data: {json.dumps(error_response)}\n\n"
+        return
+
+    logger.info("Local file path: " + downloaded_filepath)
+
+    for data in main_processing(output_folder, downloaded_filepath, file_type, unique_id):
+        response = {
+            'message': 'Processing',
+            'filename': os.path.basename(downloaded_filepath),
+            'data': data
+        }
+        yield f"data: {json.dumps(response)}\n\n"
+
+    final_response = {
+        'message': 'File uploaded and processed successfully',
+        'filename': os.path.basename(downloaded_filepath),
+        'data': 'END'
+    }
+    yield f"data: {json.dumps(final_response)}\n\n"


 def validate_request():
@ -164,13 +166,90 @@ def test_zbparse():


 def test_process_and_stream():
-    # 模拟五段数据
+    # 模拟七段数据，每段包含指定的中文键名和更多详细数据
    data_segments = [
-        {"base_info": {"project_name": "测试项目1", "project_code": "TP001"}},
-        {"review_standards": ["标准1", "标准2", "标准3"]},
-        {"evaluation_standards": ["评估标准A", "评估标准B"]},
-        {"invalid_requirements": ["无效要求X", "无效要求Y"]},
-        {"bidding_documents_requirements": ["文件要求1", "文件要求2"]}
+        {
+            "base_info": {
+                "基础信息": {
+                    "project_name": "测试项目1",
+                    "project_code": "TP001",
+                    "project_manager": "张三",
+                    "start_date": "2024-01-10",
+                    "end_date": "2024-12-31"
+                }
+            }
+        },
+        {
+            "qualification_review": {
+                "资格审查": {
+                    "review_criteria": ["公司资质", "过往业绩", "财务报表"],
+                    "required_documents": ["营业执照", "资质证书", "近三年财务报告"],
+                    "minimum_requirements": {
+                        "company_age": "至少5年",
+                        "past_projects": "至少3个大型项目"
+                    }
+                }
+            }
+        },
+        {
+            "technical_standards": {
+                "技术标": {
+                    "technical_requirements": ["设备质量要求", "施工工艺", "安全标准"],
+                    "materials_list": ["钢筋", "水泥", "电缆"],
+                    "equipment_specs": {
+                        "excavator": "型号X123",
+                        "concrete_mixer": "型号Y456"
+                    }
+                }
+            }
+        },
+        {
+            "commercial_standards": {
+                "商务标": {
+                    "pricing_method": "固定总价",
+                    "payment_schedule": ["30%合同签订", "40%中期支付", "30%项目完成支付"],
+                    "contract_conditions": {
+                        "warranty_period": "2年",
+                        "penalty_clauses": "延期每周罚款5%"
+                    }
+                }
+            }
+        },
+        {
+            "invalid_requirements": {
+                "无效标与废标项": {
+                    "common_issues": ["未按要求提交保证金", "技术标不达标"],
+                    "invalidation_reasons": {
+                        "missing_documents": "缺少必要文件",
+                        "unqualified_technical_specs": "技术规格不合要求"
+                    }
+                }
+            }
+        },
+        {
+            "bidding_documents_requirements": {
+                "投标文件要求": {
+                    "file_format": "PDF",
+                    "submission_deadline": "2024-08-01 17:00",
+                    "submission_location": "北京市某某大厦5楼",
+                    "required_sections": ["公司简介", "技术方案", "商务报价"]
+                }
+            }
+        },
+        {
+            "opening_bid": {
+                "开评定标流程": {
+                    "bid_opening_time": "2024-09-01 10:00",
+                    "location": "会议室A",
+                    "evaluation_criteria": ["价格", "技术能力", "项目经验"],
+                    "evaluation_process": {
+                        "first_round": "资格审查",
+                        "second_round": "技术评分",
+                        "final_round": "商务报价评定"
+                    }
+                }
+            }
+        }
    ]

    filename = "test_file.pdf"
@ -181,8 +260,8 @@ def test_process_and_stream():
            'filename': filename,
            'data': data
        }
-        yield f"data: {json.dumps(response)}\n\n"
-        time.sleep(5)  # 每隔2秒发送一段数据
+        yield f"data: {json.dumps(response, ensure_ascii=False)}\n\n"
+        time.sleep(5)  # 每隔5秒发送一段数据

    # 发送结束信号
    final_response = {
@ -190,7 +269,7 @@ def test_process_and_stream():
        'filename': filename,
        'data': 'END'
    }
-    yield f"data: {json.dumps(final_response)}\n\n"
+    yield f"data: {json.dumps(final_response, ensure_ascii=False)}\n\n"


 def generate_response(final_json_path):
--- a/flask_app/main/招标文件解析.py
+++ b/flask_app/main/招标文件解析.py
--- a/flask_app/货物标/test.py
+++ b/flask_app/货物标/test.py
@ -11,10 +11,19 @@ def is_numeric_key(key):
 #TODO:如果键值中存在数字就不行
 #zbtest20也有问题
 def contains_number_or_index(key, value):
-    return (isinstance(value, (int, float)) or
-            (isinstance(value, str) and value.isdigit()) or
-            '序号' in key or
-            (isinstance(value, str) and re.search(r'\d+', value)))
+    # 判断值是否是数字或数字字符串
+    is_number = isinstance(value, (int, float)) or (isinstance(value, str) and value.isdigit())
+    # 判断键是否包含 "序号"
+    contains_index = '序号' in key
+    # 判断值中是否包含数字
+    contains_digit = isinstance(value, str) and re.search(r'\d+', value)
+    # 判断值中是否包含中文字符
+    contains_chinese = isinstance(value, str) and re.search(r'[\u4e00-\u9fff]', value)
+    # 如果值中包含数字但也有中文字符，则保留（返回 False）
+    if contains_digit and contains_chinese:
+        return False
+    # 如果值是数字或包含数字，且不包含中文字符，或者键包含 "序号"，返回 True
+    return is_number or contains_index or contains_digit

 def preprocess_dict(data):
    if isinstance(data, dict):
@ -77,15 +86,13 @@ input_data = {
    "符合性审查": {
        "说明": "评标委员会应当对符合资格的投标人的投标文件进行符合性审查，以确定其是否满足招标文件的实质性要求。",
        "审查标准": [
-
-
            {
                "序号": 9,
                "内容": "未按要求提供加盖公章及签字（签章）的；"
            },
            {
                "序号": 1,
-                "内容": "符合招标文件第二章“投标人须知”中 39"
+                "内容": "a39"
            },
            {
                "序号": 2,