12.9 修复解析bug
This commit is contained in:
parent
c0c7871767
commit
6c151fd569
@ -16,7 +16,7 @@ def extract_between_sections(data, target_values):
|
|||||||
# 遍历所有键值对
|
# 遍历所有键值对
|
||||||
for key, value in data.items():
|
for key, value in data.items():
|
||||||
# 只匹配形如 "一": "竞争性磋商响应文件" 的章节标题
|
# 只匹配形如 "一": "竞争性磋商响应文件" 的章节标题
|
||||||
if section_pattern.match(key):
|
if section_pattern.match(key): #匹配到大标题...
|
||||||
if target_found:
|
if target_found:
|
||||||
# 如果已经找到了符合的章节,并且遇到了另一个章节
|
# 如果已经找到了符合的章节,并且遇到了另一个章节
|
||||||
# 保存当前块并重置
|
# 保存当前块并重置
|
||||||
@ -30,7 +30,7 @@ def extract_between_sections(data, target_values):
|
|||||||
target_found = True # 找到了目标章节,开始捕获后续内容
|
target_found = True # 找到了目标章节,开始捕获后续内容
|
||||||
current_section_title = value # 保存章节标题内容
|
current_section_title = value # 保存章节标题内容
|
||||||
|
|
||||||
elif target_found: # 只捕获目标值之后的内容
|
elif target_found: # 匹配到普通序号...
|
||||||
current_block[key] = value
|
current_block[key] = value
|
||||||
|
|
||||||
# 保存最后一个块(如果有的话)
|
# 保存最后一个块(如果有的话)
|
||||||
@ -111,6 +111,7 @@ def extract_from_notice(merged_baseinfo_path, clause_path, type):
|
|||||||
# transformed_data = process_with_outer_key(extracted_data) #取消注释这三行
|
# transformed_data = process_with_outer_key(extracted_data) #取消注释这三行
|
||||||
# final_result = process_nested_data(transformed_data)
|
# final_result = process_nested_data(transformed_data)
|
||||||
# return final_result
|
# return final_result
|
||||||
|
print(json.dumps(extracted_data_concatenated,ensure_ascii=False,indent=4))
|
||||||
return extracted_data_concatenated
|
return extracted_data_concatenated
|
||||||
|
|
||||||
# 如果 clause_path 为空或提取数据失败,调用回退函数
|
# 如果 clause_path 为空或提取数据失败,调用回退函数
|
||||||
@ -121,8 +122,10 @@ def extract_from_notice(merged_baseinfo_path, clause_path, type):
|
|||||||
print(f"Error occurred: {e}")
|
print(f"Error occurred: {e}")
|
||||||
return DEFAULT_RESULT
|
return DEFAULT_RESULT
|
||||||
|
|
||||||
|
#TODO:可以通过判断格式来看是否需要调用GPT 1.1 2.1....
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
clause_path = r'C:\Users\Administrator\Desktop\fsdownload\a110ed59-00e8-47ec-873a-bd4579a6e628\\clause1.json'
|
clause_path = r'C:\Users\Administrator\Desktop\招标文件\output4\tmp\clause1.json'
|
||||||
merged_baseinfo_path=r"C:\Users\Administrator\Desktop\fsdownload\a110ed59-00e8-47ec-873a-bd4579a6e628\ztbfile_merged_baseinfo.pdf"
|
merged_baseinfo_path=r"C:\Users\Administrator\Desktop\fsdownload\a110ed59-00e8-47ec-873a-bd4579a6e628\ztbfile_merged_baseinfo.pdf"
|
||||||
# file_path = 'D:\\flask_project\\flask_app\\static\\output\\fee18877-0c60-4c28-911f-9a5f7d1325a7\\clause1.json'
|
# file_path = 'D:\\flask_project\\flask_app\\static\\output\\fee18877-0c60-4c28-911f-9a5f7d1325a7\\clause1.json'
|
||||||
try:
|
try:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user