12.9 修复解析bug

2024-12-10 09:02:39 +08:00 · 2024-12-10 09:02:39 +08:00 · 6c151fd569
commit 6c151fd569
parent c0c7871767
1 changed files with 6 additions and 3 deletions
--- a/flask_app/货物标/投标人须知正文提取指定内容货物标版.py
+++ b/flask_app/货物标/投标人须知正文提取指定内容货物标版.py
@ -16,7 +16,7 @@ def extract_between_sections(data, target_values):
    # 遍历所有键值对
    for key, value in data.items():
        # 只匹配形如 "一": "竞争性磋商响应文件" 的章节标题
-        if section_pattern.match(key):
+        if section_pattern.match(key):           #匹配到大标题...
            if target_found:
                # 如果已经找到了符合的章节，并且遇到了另一个章节
                # 保存当前块并重置
@ -30,7 +30,7 @@ def extract_between_sections(data, target_values):
                target_found = True  # 找到了目标章节，开始捕获后续内容
                current_section_title = value  # 保存章节标题内容
-        elif target_found:  # 只捕获目标值之后的内容
+        elif target_found:  # 匹配到普通序号...
            current_block[key] = value
    # 保存最后一个块（如果有的话）
@ -111,6 +111,7 @@ def extract_from_notice(merged_baseinfo_path, clause_path, type):
                # transformed_data = process_with_outer_key(extracted_data)                       #取消注释这三行
                # final_result = process_nested_data(transformed_data)
                # return final_result
                print(json.dumps(extracted_data_concatenated,ensure_ascii=False,indent=4))
                return extracted_data_concatenated
        # 如果 clause_path 为空或提取数据失败，调用回退函数
@ -121,8 +122,10 @@ def extract_from_notice(merged_baseinfo_path, clause_path, type):
        print(f"Error occurred: {e}")
        return DEFAULT_RESULT
 #TODO:可以通过判断格式来看是否需要调用GPT 1.1  2.1....
 if __name__ == "__main__":
-    clause_path = r'C:\Users\Administrator\Desktop\fsdownload\a110ed59-00e8-47ec-873a-bd4579a6e628\\clause1.json'
+    clause_path = r'C:\Users\Administrator\Desktop\招标文件\output4\tmp\clause1.json'
    merged_baseinfo_path=r"C:\Users\Administrator\Desktop\fsdownload\a110ed59-00e8-47ec-873a-bd4579a6e628\ztbfile_merged_baseinfo.pdf"
    # file_path = 'D:\\flask_project\\flask_app\\static\\output\\fee18877-0c60-4c28-911f-9a5f7d1325a7\\clause1.json'
    try: