zbparse/flask_app/general/纯技术参数要求提取.py
2024-10-27 12:08:54 +08:00

48 lines
2.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from flask_app.general.通义千问long import upload_file
from flask_app.货物标.截取pdf货物标版 import truncate_pdf_main
from flask_app.货物标.技术参数要求提取 import get_technical_requirements
def extract_matching_keys(data_dict, good_list):
"""
递归遍历data_dict查找good_list中存在的键并将匹配的键及其值添加到结果字典中。
参数:
- data_dict (dict): 要遍历的嵌套字典。
- good_list (list): 包含要查找的键的列表。
返回:
- dict: 包含所有匹配键及其值的字典。
"""
result = {}
def recurse(current_dict):
if isinstance(current_dict, dict):
for key, value in current_dict.items():
if key in good_list:
result[key] = value
# 递归遍历子字典
recurse(value)
elif isinstance(current_dict, list):
for item in current_dict:
recurse(item)
# 如果current_dict不是dict或list则无需进一步处理
recurse(data_dict)
return result
def get_technical_requirements_main(file_path,output_folder):
truncate_file=truncate_pdf_main(file_path,output_folder,1)[0]
file_id=upload_file(truncate_file)
final_res=get_technical_requirements(file_id)
# 安全地提取 "技术要求" 内部的字典内容
if isinstance(final_res, dict) and '技术要求' in final_res and isinstance(final_res['技术要求'], dict):
technical_requirements = final_res['技术要求']
good_list = technical_requirements.pop('货物列表', []) # 如果 '货物列表' 不存在,返回 []
return extract_matching_keys(technical_requirements,good_list)
else:
return final_res
if __name__ == "__main__":
file_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\217754b7-3efd-41b2-806b-0b5b1bc98904\\ztbfile.pdf"
output_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\output1\\tmp"
# res=get_technical_requirements_main(truncate_file,output_folder)