diff --git a/flask_app/main/投标人须知正文提取指定内容.py b/flask_app/main/投标人须知正文提取指定内容.py index 749834b..a20858e 100644 --- a/flask_app/main/投标人须知正文提取指定内容.py +++ b/flask_app/main/投标人须知正文提取指定内容.py @@ -100,7 +100,7 @@ post_process 函数尝试将长字符串按特定模式分割成块,每块至 """ # 读取JSON数据,提取内容,转换结构,并打印结果 -def extract_from_notice(invalid_path,clause_path, type): +def extract_from_notice(merged_baseinfo_path,clause_path, type): if type == 1: target_values = ["投标","投标文件","响应文件"] elif type == 2: @@ -115,23 +115,25 @@ def extract_from_notice(invalid_path,clause_path, type): with open(clause_path, 'r', encoding='utf-8') as file: data = json.load(file) extracted_data = extract_json(data, target_values) # 读取json + if not extracted_data: + final_result = get_requirements_with_gpt(merged_baseinfo_path, type) # 万一没用正则匹配到,那就调用大模型 + return final_result # print(json.dumps(extracted_data,ensure_ascii=False,indent=4)) - res=extract_sections(extracted_data,target_values) - print(json.dumps(res, ensure_ascii=False, indent=4)) + final_result=extract_sections(extracted_data,target_values) + return final_result + # print(json.dumps(res, ensure_ascii=False, indent=4)) # sorted_data = sort_clean_data_keys(extracted_data) # 对输入的字典 data 的键进行预处理和排序 # transformed_data = transform_json(sorted_data) # print(json.dumps(transformed_data,ensure_ascii=False,indent=4)) # final_result = process_nested_data(transformed_data) - # if not final_result: - # final_result = get_requirements_with_gpt(invalid_path, type) # return final_result if __name__ == "__main__": # file_path = 'C:\\Users\\Administrator\\Desktop\\fsdownload\\3bffaa84-2434-4bd0-a8ee-5c234ccd7fa0\\clause1.json' - invalid_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\4e5bc6c2-c2b8-4c0b-8e57-81a498b982f6\\ztbfile_invalid.pdf" + merged_baseinfo_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\4e5bc6c2-c2b8-4c0b-8e57-81a498b982f6\\ztbfile_tobidders_notice.pdf" clause_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\4e5bc6c2-c2b8-4c0b-8e57-81a498b982f6\\clause1.json" try: - res = extract_from_notice(invalid_path,clause_path, 1) # 可以改变此处的 type 参数测试不同的场景 + res = extract_from_notice(merged_baseinfo_path,clause_path, 1) # 可以改变此处的 type 参数测试不同的场景 res2 = json.dumps(res, ensure_ascii=False, indent=4) print(res2) except ValueError as e: