12.9 修复解析bug

This commit is contained in:
zy123 2024-12-10 09:02:39 +08:00
parent c0c7871767
commit 6c151fd569

View File

@ -16,7 +16,7 @@ def extract_between_sections(data, target_values):
# 遍历所有键值对
for key, value in data.items():
# 只匹配形如 "一": "竞争性磋商响应文件" 的章节标题
if section_pattern.match(key):
if section_pattern.match(key): #匹配到大标题...
if target_found:
# 如果已经找到了符合的章节,并且遇到了另一个章节
# 保存当前块并重置
@ -30,7 +30,7 @@ def extract_between_sections(data, target_values):
target_found = True # 找到了目标章节,开始捕获后续内容
current_section_title = value # 保存章节标题内容
elif target_found: # 只捕获目标值之后的内容
elif target_found: # 匹配到普通序号...
current_block[key] = value
# 保存最后一个块(如果有的话)
@ -111,6 +111,7 @@ def extract_from_notice(merged_baseinfo_path, clause_path, type):
# transformed_data = process_with_outer_key(extracted_data) #取消注释这三行
# final_result = process_nested_data(transformed_data)
# return final_result
print(json.dumps(extracted_data_concatenated,ensure_ascii=False,indent=4))
return extracted_data_concatenated
# 如果 clause_path 为空或提取数据失败,调用回退函数
@ -121,8 +122,10 @@ def extract_from_notice(merged_baseinfo_path, clause_path, type):
print(f"Error occurred: {e}")
return DEFAULT_RESULT
#TODO:可以通过判断格式来看是否需要调用GPT 1.1 2.1....
if __name__ == "__main__":
clause_path = r'C:\Users\Administrator\Desktop\fsdownload\a110ed59-00e8-47ec-873a-bd4579a6e628\\clause1.json'
clause_path = r'C:\Users\Administrator\Desktop\招标文件\output4\tmp\clause1.json'
merged_baseinfo_path=r"C:\Users\Administrator\Desktop\fsdownload\a110ed59-00e8-47ec-873a-bd4579a6e628\ztbfile_merged_baseinfo.pdf"
# file_path = 'D:\\flask_project\\flask_app\\static\\output\\fee18877-0c60-4c28-911f-9a5f7d1325a7\\clause1.json'
try: