from flask_app.general.format_change import pdf2docx from flask_app.general.通义千问long import upload_file from flask_app.货物标.截取pdf货物标版 import truncate_pdf_main from flask_app.货物标.技术参数要求提取 import get_technical_requirements def extract_matching_keys(data_dict, good_list): """ 递归遍历data_dict,查找good_list中存在的键,并将匹配的键及其值添加到结果字典中。 参数: - data_dict (dict): 要遍历的嵌套字典。 - good_list (list): 包含要查找的键的列表。 返回: - dict: 包含所有匹配键及其值的字典。 """ result = {} def recurse(current_dict): if isinstance(current_dict, dict): for key, value in current_dict.items(): if key in good_list: result[key] = value # 递归遍历子字典 recurse(value) elif isinstance(current_dict, list): for item in current_dict: recurse(item) # 如果current_dict不是dict或list,则无需进一步处理 recurse(data_dict) return result def get_technical_requirements_main(file_path,output_folder): truncate_file=truncate_pdf_main(file_path,output_folder,5)[0] truncate_file_docx=pdf2docx(truncate_file) file_id=upload_file(truncate_file_docx) final_res=get_technical_requirements(file_id) # 安全地提取 "技术要求" 内部的字典内容 if isinstance(final_res, dict) and '技术要求' in final_res and isinstance(final_res['技术要求'], dict): technical_requirements = final_res['技术要求'] good_list = technical_requirements.pop('货物列表', []) # 如果 '货物列表' 不存在,返回 [] return extract_matching_keys(technical_requirements,good_list) else: return final_res if __name__ == "__main__": file_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\45f650ce-e519-457b-9ad6-5840e2ede539\\ztbfile.pdf" output_folder = "C:\\Users\\Administrator\\Desktop\\fsdownload\\45f650ce-e519-457b-9ad6-5840e2ede539\\tmp" res=get_technical_requirements_main(file_path,output_folder) print(res)