1.10

2025-01-10 10:04:30 +08:00 · 2025-01-10 10:04:30 +08:00 · 516b23bd9e
commit 516b23bd9e
parent 51a8796353
6 changed files with 21 additions and 22 deletions
--- a/flask_app/general/投标人须知正文提取指定内容.py
+++ b/flask_app/general/投标人须知正文提取指定内容.py
@ -6,7 +6,7 @@ from flask_app.general.model_continue_query import process_continue_answers
 from flask_app.general.通义千问long import upload_file, qianwen_long_stream

 #提取两个大标题之间的内容
-def extract_between_sections(data, target_values):
+def extract_between_sections(data, target_values,flag=False):
    target_found = False
    extracted_data = {}
    current_section_title = ""
@ -27,9 +27,10 @@ def extract_between_sections(data, target_values):
                target_found = False

            # 检查当前标题是否包含 target_values 中的任意关键词
-            if any(tv in value for tv in target_values) and not file_pattern.search(value):
-                target_found = True  # 找到了目标章节，开始捕获后续内容
-                current_section_title = value  # 保存章节标题内容
+            if any(tv in value for tv in target_values):
+                if (flag and not file_pattern.search(value)) or not flag:
+                    target_found = True  # 找到了目标章节，开始捕获后续内容
+                    current_section_title = value  # 保存章节标题内容

        elif target_found:  # 匹配到普通序号...
            current_block[key] = value
@ -365,8 +366,8 @@ def extract_from_notice(merged_baseinfo_path, clause_path, type):
        3: ["重新招标、不再招标和终止招标", "重新招标", "重新采购", "不再招标", "不再采购", "终止招标", "终止采购"],
        4: ["评标"]  # 测试
    }
-
    # 获取对应 type 的 target_values
+    flag = (type == 2)
    target_values = type_target_map.get(type)
    if not target_values:
        print(f"Error: Invalid type specified: {type}. Use 1, 2, 3, or 4.")
@ -378,7 +379,7 @@ def extract_from_notice(merged_baseinfo_path, clause_path, type):
                data = json.load(file)
            if len(data) >= 60:
                # 尝试使用大章节筛选
-                extracted_data = extract_between_sections(data, target_values)
+                extracted_data = extract_between_sections(data, target_values,flag)
                if extracted_data:
                    # 后处理并返回结果
                    extracted_data_concatenated = {
@ -394,9 +395,9 @@ def extract_from_notice(merged_baseinfo_path, clause_path, type):
                    final_result = postprocess_formatted2(extracted_data, target_values)
                    return final_result

-        # # 如果 clause_path 为空，或者所有筛选方法均失败，调用回退函数
-        # final_result = get_requirements_with_gpt(merged_baseinfo_path, type)
-        # return final_result
+        # 如果 clause_path 为空，或者所有筛选方法均失败，调用回退函数
+        final_result = get_requirements_with_gpt(merged_baseinfo_path, type)
+        return final_result

    except Exception as e:
        print(f"Error occurred while processing clause_path '{clause_path}': {e}")
@ -407,7 +408,7 @@ if __name__ == "__main__":
    merged_baseinfo_path=r"C:\Users\Administrator\Desktop\fsdownload\b29de31a-297e-42cf-b9ba-6859e530a472\ztbfile_merged_baseinfo.pdf"
    clause_path=r"C:\Users\Administrator\Desktop\fsdownload\b29de31a-297e-42cf-b9ba-6859e530a472\clause1.json"
    try:
-        res = extract_from_notice(merged_baseinfo_path,clause_path, 1)  # 可以改变此处的 type 参数测试不同的场景
+        res = extract_from_notice(merged_baseinfo_path,clause_path, 2)  # 可以改变此处的 type 参数测试不同的场景
        res2 = json.dumps(res, ensure_ascii=False, indent=4)
        print(res2)
    except ValueError as e:
--- a/flask_app/routes/judge_zbfile.py
+++ b/flask_app/routes/judge_zbfile.py
@ -0,0 +1,10 @@
+# from flask_app.ConnectionLimiter import require_connection_limit
+# from flask import Blueprint
+#
+# from flask_app.routes.utils import validate_and_setup_logger
+#
+# judge_zbfile_bp = Blueprint('judge_zbfile', __name__)
+# @judge_zbfile_bp.route('/judge_zbfile', methods=['POST'])
+# @validate_and_setup_logger
+# @require_connection_limit(timeout=30)
+# def judge_zbfile():
--- a/flask_app/routes/偏离表main.py
+++ b/flask_app/routes/偏离表main.py
@ -551,9 +551,6 @@ def process_functions_in_parallel(tech_deviation_info, busi_requirements_dict, z
 def get_tech_and_business_deviation(file_path,file_type,unique_id,output_folder,zb_type=2):
    global logger
    logger = get_global_logger(unique_id)
-    judge_res = judge_zbfile(file_path)
-    if not judge_res:
-        return None
    # 第一步：根据文件类型进行转换
    if file_type == 1:  # docx
        docx_path=file_path
--- a/flask_app/routes/小解析main.py
+++ b/flask_app/routes/小解析main.py
@ -99,9 +99,6 @@ def little_parse_main(output_folder, file_path, file_type,zb_type,unique_id):
        """
    logger = get_global_logger(unique_id)
    logger.info("zb_type:"+str(zb_type))
-    judge_res = judge_zbfile(file_path)
-    if not judge_res:
-        return None
    # 根据文件类型处理文件路径
    if file_type == 1:  # docx
        docx_path = file_path
--- a/flask_app/routes/工程标解析main.py
+++ b/flask_app/routes/工程标解析main.py
@ -28,9 +28,6 @@ def preprocess_files(output_folder, file_path, file_type,logger):
    logger.info("starting 文件预处理...")
    logger.info("output_folder..." + output_folder)
    start_time=time.time()
-    judge_res = judge_zbfile(file_path)
-    if not judge_res:
-        return None
    # 根据文件类型处理文件路径
    if file_type == 1:  # docx
        # docx_path = file_path
--- a/flask_app/routes/货物标解析main.py
+++ b/flask_app/routes/货物标解析main.py
@ -24,9 +24,6 @@ executor = ThreadPoolExecutor()
 def preprocess_files(output_folder, file_path, file_type,logger):
    logger.info("starting 文件预处理...")
    start_time = time.time()
-    judge_res = judge_zbfile(file_path)
-    if not judge_res:
-        return None
    logger.info("output_folder..." + output_folder)
    # 根据文件类型处理文件路径
    if file_type == 1:  # docx