diff --git a/flask_app/general/商务技术评分提取.py b/flask_app/general/商务技术评分提取.py index 8a27861..2e42a33 100644 --- a/flask_app/general/商务技术评分提取.py +++ b/flask_app/general/商务技术评分提取.py @@ -423,13 +423,13 @@ def combine_evaluation_standards(evaluation_method_path,invalid_path,zb_type): if __name__ == "__main__": start_time=time.time() # truncate_file=r"C:\Users\Administrator\Desktop\招标文件-采购类\tmp2\2024-新疆-塔城地区公安局食药环分局快检实验室项目_evaluation_method.pdf" - evaluation_method_path = r'C:\Users\Administrator\Desktop\fsdownload\91399aa4-1ee8-447d-a05b-03cd8d15ced5\ztbfile_evaluation_method.pdf' + evaluation_method_path = r'C:\Users\Administrator\Desktop\fsdownload\aba81749-5986-4492-8b4b-16db9c69a09d\ztbfile_evaluation_method.pdf' invalid_path=r'C:\Users\Administrator\Desktop\fsdownload\91399aa4-1ee8-447d-a05b-03cd8d15ced5\ztbfile_invalid.pdf' # truncate_file = "C:\\Users\\Administrator\\Desktop\\货物标\\output2\\2-招标文件(统计局智能终端二次招标)_evaluation_method.pdf" # truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\output2\\广水市妇幼招标文件最新(W改)_evaluation_method.pdf" # truncate_file = "C:\\Users\\Administrator\\Desktop\\fsdownload\\2d481945-1f82-45a5-8e56-7fafea4a7793\\ztbfile_evaluation_method.pdf" # truncate_file="C:\\Users\\Administrator\\Desktop\\fsdownload\\ztbfile_evaluation_method.pdf" - res = combine_evaluation_standards(evaluation_method_path,invalid_path,1) + res = combine_evaluation_standards(evaluation_method_path,invalid_path,2) print(json.dumps(res, ensure_ascii=False, indent=4)) end_time=time.time() print("elapsed time:"+str(end_time-start_time)) \ No newline at end of file diff --git a/flask_app/工程标/投标人须知正文提取指定内容工程标.py b/flask_app/工程标/投标人须知正文提取指定内容工程标.py index d1bda60..8d0264e 100644 --- a/flask_app/工程标/投标人须知正文提取指定内容工程标.py +++ b/flask_app/工程标/投标人须知正文提取指定内容工程标.py @@ -68,35 +68,75 @@ post_process 函数尝试将长字符串按特定模式分割成块,每块至 """ # 读取JSON数据,提取内容,转换结构,并打印结果 -def extract_from_notice(merged_baseinfo_path,clause_path, type): - if type == 1: - target_values = ["投标","投标文件","响应文件"] - elif type == 2: - # target_values = ["开标", "评标", "定标","磋商程序","中标"] - target_values=["开标", "评标", "定标","评审","成交","合同","磋商程序", "中标", "程序", "步骤"] - elif type == 3: - target_values = ["重新招标、不再招标和终止招标","重新招标","重新采购", "不再招标", "不再采购","终止招标","终止采购"] - elif type == 4: - target_values = ["评标"] # 测试 - else: - raise ValueError( - "Invalid type specified. Use 1 for '投标文件, 投标' or 2 for '开标, 评标, 定标'or 3 for '重新招标'") - with open(clause_path, 'r', encoding='utf-8') as file: - data = json.load(file) - extracted_data = extract_between_sections(data, target_values) #先使用大章节'二、投标文件'这种筛选 - if not extracted_data: - extracted_data = extract_json(data, target_values) # 若没有,再使用'3.投标文件' 筛选 +def extract_from_notice(merged_baseinfo_path, clause_path, type): + """ + 从公告中提取特定类型的内容。 + + Args: + merged_baseinfo_path (str): 合并后的基础信息路径。 + clause_path (str): 包含条款的JSON文件路径。 + type (int): 提取的类型。 + 1 - ["投标", "投标文件", "响应文件"] + 2 - ["开标", "评标", "定标", "评审", "成交", "合同", "磋商程序", "中标", "程序", "步骤"] + 3 - ["重新招标、不再招标和终止招标", "重新招标", "重新采购", "不再招标", "不再采购", "终止招标", "终止采购"] + 4 - ["评标"] # 测试 + + Returns: + dict 或 str: 提取并处理后的数据,或在 `clause_path` 为空或发生错误时返回空字符串 `""`。 + """ + # 定义默认的返回结果 + DEFAULT_RESULT = "" + + # 映射 type 到 target_values + type_target_map = { + 1: ["投标", "投标文件", "响应文件"], + 2: ["开标", "评标", "定标", "评审", "成交", "合同", "磋商程序", "中标", "程序", "步骤"], + 3: ["重新招标、不再招标和终止招标", "重新招标", "重新采购", "不再招标", "不再采购", "终止招标", "终止采购"], + 4: ["评标"] # 测试 + } + + # 获取对应 type 的 target_values + target_values = type_target_map.get(type) + if not target_values: + print(f"Error: Invalid type specified: {type}. Use 1, 2, 3, or 4.") + return DEFAULT_RESULT + + try: + # 检查 clause_path 是否为空或仅包含空白字符 + if clause_path and clause_path.strip(): + with open(clause_path, 'r', encoding='utf-8') as file: + data = json.load(file) + + # 先尝试使用大章节筛选 + extracted_data = extract_between_sections(data, target_values) + if not extracted_data: - final_result = get_requirements_with_gpt(merged_baseinfo_path, type) # 万一都没,那就调用大模型 - return final_result - final_result=extract_sections(extracted_data,target_values) #后处理,生成键名 - return final_result + # 如果大章节筛选失败,尝试使用另一种筛选方法 + extracted_data = extract_json(data, target_values) + + if not extracted_data: + # 如果所有筛选方法均失败,调用回退函数 + final_result = get_requirements_with_gpt(merged_baseinfo_path, type) + return final_result + else: + # 后处理,生成键名 + final_result = extract_sections(extracted_data, target_values) + return final_result + else: + # 合并键值对,启用结构化 + extracted_data_concatenated = { + section: concatenate_keys_values(content) + for section, content in extracted_data.items() + } + return extracted_data_concatenated else: - extracted_data_concatenated = { - section: concatenate_keys_values(content) - for section, content in extracted_data.items() - } - return extracted_data_concatenated + # 如果 clause_path 为空,直接调用回退函数 + final_result = get_requirements_with_gpt(merged_baseinfo_path, type) + return final_result + + except Exception as e: + print(f"Error occurred while processing clause_path '{clause_path}': {e}") + return DEFAULT_RESULT # print(json.dumps(res, ensure_ascii=False, indent=4)) # sorted_data = sort_clean_data_keys(extracted_data) # 对输入的字典 data 的键进行预处理和排序 # transformed_data = transform_json(sorted_data)