zbparse/flask_app/货物标/extract_procurement_requirements.py

import os
import sys
from 货物标截取pdf import truncate_pdf_main
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
# from ..main.format_change import docx2pdf
# from ..main.多线程提问 import multi_threading
from ..main.通义千问long import upload_file,qianwen_long
from ..main.json_utils import clean_json_string

def generate_key_paths(data, parent_key=''):
    key_paths = []
    for key, value in data.items():
        current_key = f"{parent_key}.{key}" if parent_key else key
        if isinstance(value, dict):
            # 如果值是字典，递归调用
            key_paths.extend(generate_key_paths(value, current_key))
        else:
            # 如果到达了末端，添加当前键路径
            key_paths.append(current_key)
    return key_paths

#获取采购清单
def fetch_purchasing_list(file_path):
    output_folder="C:\\Users\\Administrator\\Desktop\\货物标\\output"
    # file_path = docx2pdf(file_path)
    truncate_path=truncate_pdf_main(file_path,output_folder,1)
    user_query="这是一份货物标中采购要求部分的内容，你需要摘取出需要采购的系统（货物），一个大系统（大项）中可能包含多个小系统（小项），你需要保留这种层次关系，给出货物名称，请以json格式返回，外层键名为\"采购需求\"，嵌套键名为对应的系统名称或货物名称，无需给出采购数量和单位，如有未知内容，在对应键值处填\"未知\"。"
    file_id=upload_file(truncate_path)
    res=qianwen_long(file_id,user_query)
    cleaned_res=clean_json_string(res)
    keys_list=generate_key_paths(cleaned_res['采购需求'])
    print(keys_list)

if __name__ == "__main__":
    file_path="C:\\Users\\Administrator\\Desktop\\货物标\\output1\\招招招标文件（一中多媒体报告厅教学设备）_20240829101650_tobidders_notice_table.pdf"
    fetch_purchasing_list(file_path)