import json import logging import re import string from flask_app.general.format_change import pdf2docx, docx2pdf from flask_app.general.通义千问long import upload_file from flask_app.货物标.截取pdf货物标版 import truncate_pdf_main from flask_app.货物标.技术参数要求提取 import get_technical_requirements from flask_app.货物标.技术参数要求提取后处理函数 import extract_matching_keys def get_global_logger(unique_id): if unique_id is None: return logging.getLogger() # 获取默认的日志器 logger = logging.getLogger(unique_id) return logger logger = None def get_technical_requirements_main(file_path,file_type,unique_id,output_folder): global logger logger = get_global_logger(unique_id) if file_type == 1: # docx docx_path = file_path pdf_path = docx2pdf(docx_path) # 将docx转换为pdf以供后续处理 elif file_type == 2: # pdf pdf_path = file_path elif file_type == 3: # doc pdf_path = docx2pdf(file_path) else: logger.error("Unsupported file type provided. Preprocessing halted.") return None truncate_file=truncate_pdf_main(pdf_path,output_folder,5)[0] if not truncate_file: truncate_file=pdf_path #直接传整份文件 truncate_file_docx=pdf2docx(truncate_file) file_id=upload_file(truncate_file_docx) # file_id=upload_file(truncate_file) final_res=get_technical_requirements(file_id,pdf_path) # 安全地提取 "技术要求" 内部的字典内容 if isinstance(final_res, dict) and '技术要求' in final_res and isinstance(final_res['技术要求'], dict): technical_requirements = final_res['技术要求'] good_list = technical_requirements.pop('货物列表', []) # 如果 '货物列表' 不存在,返回 [] print(good_list) logger.info("Collected good_list from the processing function: %s", good_list) return extract_matching_keys(technical_requirements,good_list) else: return final_res if __name__ == "__main__": file_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\包头市公安支队机动车查验监管系统招标文201907.pdf" file_type=2 output_folder = "C:\\Users\\Administrator\\Desktop\\fsdownload\\45f650ce-e519-457b-9ad6-5840e2ede539\\tmp" res=get_technical_requirements_main(file_path,file_type,"123",output_folder) print(json.dumps(res,ensure_ascii=False,indent=4))