2024-09-09 17:42:39 +08:00
|
|
|
|
import json
|
|
|
|
|
|
|
|
|
|
from flask_app.货物标.货物标截取pdf import truncate_pdf_main
|
|
|
|
|
from flask_app.main.format_change import docx2pdf, pdf2docx
|
2024-08-29 17:30:49 +08:00
|
|
|
|
from flask_app.main.多线程提问 import multi_threading
|
|
|
|
|
from flask_app.main.通义千问long import upload_file,qianwen_long
|
2024-09-09 17:42:39 +08:00
|
|
|
|
from flask_app.main.json_utils import clean_json_string,combine_json_results
|
2024-08-29 16:37:09 +08:00
|
|
|
|
|
|
|
|
|
def generate_key_paths(data, parent_key=''):
|
|
|
|
|
key_paths = []
|
|
|
|
|
for key, value in data.items():
|
|
|
|
|
current_key = f"{parent_key}.{key}" if parent_key else key
|
|
|
|
|
if isinstance(value, dict):
|
2024-09-09 17:42:39 +08:00
|
|
|
|
if value: # 字典非空时,递归处理
|
|
|
|
|
key_paths.extend(generate_key_paths(value, current_key))
|
|
|
|
|
else: # 字典为空时,直接添加键路径
|
|
|
|
|
key_paths.append(current_key)
|
2024-08-29 16:37:09 +08:00
|
|
|
|
else:
|
|
|
|
|
# 如果到达了末端,添加当前键路径
|
|
|
|
|
key_paths.append(current_key)
|
|
|
|
|
return key_paths
|
|
|
|
|
|
|
|
|
|
#获取采购清单
|
2024-09-09 17:42:39 +08:00
|
|
|
|
def fetch_purchasing_list(file_path,output_folder,file_type):
|
|
|
|
|
global pdf_path,docx_path
|
|
|
|
|
if file_type==1:
|
|
|
|
|
docx_path=file_path
|
|
|
|
|
pdf_path = docx2pdf(file_path)
|
|
|
|
|
elif file_type==2:
|
|
|
|
|
pdf_path=file_path
|
|
|
|
|
docx_path=pdf2docx(file_path)
|
|
|
|
|
technical_requirements=[]
|
|
|
|
|
truncate_path=truncate_pdf_main(pdf_path,output_folder,1)
|
2024-08-30 10:17:57 +08:00
|
|
|
|
user_query1="这是一份货物标中采购要求部分的内容,你需要摘取出需要采购的系统(货物),一个大系统(大项)中可能包含多个小系统(小项),你需要保留这种层次关系,给出货物名称,请以json格式返回,外层键名为\"采购需求\",嵌套键名为对应的系统名称或货物名称,无需给出采购数量和单位,如有未知内容,在对应键值处填\"未知\"。"
|
2024-08-30 09:53:04 +08:00
|
|
|
|
file_id=upload_file(truncate_path[0])
|
2024-08-30 10:17:57 +08:00
|
|
|
|
res=qianwen_long(file_id,user_query1)
|
2024-08-29 16:37:09 +08:00
|
|
|
|
cleaned_res=clean_json_string(res)
|
2024-08-30 10:17:57 +08:00
|
|
|
|
keys_list=generate_key_paths(cleaned_res['采购需求']) #提取需要采购的货物清单
|
|
|
|
|
user_query_template = "这是一份货物标中采购要求部分的内容,请你给出\"{}\"的具体型号参数要求,请以json格式返回结果,外层键名为\"{}\", 键值对中的键是你对该要求的总结,而值需要完全与原文保持一致,不可擅自总结删减。"
|
|
|
|
|
queries=[]
|
|
|
|
|
for key in keys_list:
|
|
|
|
|
# 替换 user_query2 中的 "网络硬盘录像机" 为当前 key
|
|
|
|
|
new_query = user_query_template.format(key, key)
|
2024-09-09 17:42:39 +08:00
|
|
|
|
print(new_query)
|
2024-08-30 10:17:57 +08:00
|
|
|
|
queries.append(new_query)
|
|
|
|
|
results=multi_threading(queries,"",file_id,2)
|
|
|
|
|
if not results:
|
|
|
|
|
print("errror!")
|
|
|
|
|
else:
|
|
|
|
|
# 打印结果
|
|
|
|
|
for question, response in results:
|
2024-09-09 17:42:39 +08:00
|
|
|
|
technical_requirements.append(response)
|
|
|
|
|
technical_requirements_combined_res=combine_json_results(technical_requirements)
|
|
|
|
|
json_string = json.dumps(technical_requirements_combined_res, ensure_ascii=False, indent=4)
|
|
|
|
|
print(json_string)
|
2024-08-29 16:37:09 +08:00
|
|
|
|
if __name__ == "__main__":
|
2024-09-09 17:42:39 +08:00
|
|
|
|
output_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles"
|
|
|
|
|
file_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\磋商文件.doc"
|
|
|
|
|
fetch_purchasing_list(file_path,output_folder,1)
|
2024-08-30 10:17:57 +08:00
|
|
|
|
|