11.16
This commit is contained in:
parent
22a8b5676a
commit
ca9e995d2e
@ -46,6 +46,7 @@ def read_txt_to_string(file_path):
|
||||
except Exception as e:
|
||||
return f"错误:读取文件时发生错误。详细信息:{e}"
|
||||
def doubao_model(full_user_query):
|
||||
print("call doubao...")
|
||||
# 相关参数
|
||||
url = "https://ark.cn-beijing.volces.com/api/v3/chat/completions"
|
||||
api_key = "ad0c363f-1f23-4b13-aba3-698a4f8c3eb8"
|
||||
@ -64,7 +65,7 @@ def doubao_model(full_user_query):
|
||||
"content": full_user_query
|
||||
}
|
||||
],
|
||||
"temperature":0.5
|
||||
"temperature":0.3
|
||||
}
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
|
||||
|
@ -176,7 +176,7 @@ def get_base_info(merged_baseinfo_path,clause_path):
|
||||
# baseinfo_list.append(clean_json_string(response))
|
||||
return baseinfo_list
|
||||
|
||||
def combine_basic_info(merged_baseinfo_path, procurement_path,procurement_docpath,clause_path,invalid_path):
|
||||
def combine_basic_info(merged_baseinfo_path, procurement_path,clause_path,invalid_path):
|
||||
baseinfo_list = []
|
||||
temp_list = []
|
||||
procurement_reqs = {}
|
||||
@ -187,7 +187,7 @@ def combine_basic_info(merged_baseinfo_path, procurement_path,procurement_docpat
|
||||
# 定义一个线程函数来获取采购需求
|
||||
def fetch_procurement_reqs_thread():
|
||||
nonlocal procurement_reqs
|
||||
procurement_reqs = fetch_procurement_reqs(procurement_path,procurement_docpath,invalid_path)
|
||||
procurement_reqs = fetch_procurement_reqs(procurement_path,invalid_path)
|
||||
# 创建并启动获取基础信息的线程
|
||||
thread1 = threading.Thread(target=get_base_info_thread)
|
||||
thread1.start()
|
||||
|
@ -202,7 +202,8 @@ def get_technical_requirements(file_path,invalid_path):
|
||||
6.最后一级键内值留空或填'未知'(如数量较多或未知内容)。
|
||||
|
||||
特殊情况处理:
|
||||
同一层级下同名但采购要求不同的货物,以'货物名-编号'区分,编号从1递增。
|
||||
1.同一层级下同名但采购要求不同的货物,以'货物名-编号'区分,编号从1递增。
|
||||
2.对于工程施工、建设相关的采购需求,你无需提取,提取的范围仅限软件、硬件,。
|
||||
|
||||
示例输出结构:
|
||||
{{
|
||||
@ -276,73 +277,74 @@ def get_technical_requirements(file_path,invalid_path):
|
||||
file_id=upload_file(invalid_path)
|
||||
print("调用invalid_path")
|
||||
model_res=qianwen_long(file_id,prompt_template1)
|
||||
print(model_res)
|
||||
else:
|
||||
user_query=generate_full_user_query(file_path,prompt_template2)
|
||||
model_res=doubao_model(user_query)
|
||||
# model_res = qianwen_long(file_id,prompt_template1)
|
||||
print(model_res)
|
||||
# res = qianwen_long(file_id, user_query1)
|
||||
|
||||
cleaned_res = clean_json_string(model_res) #转字典
|
||||
keys_list,good_list,grouped_paths,no_keys_added= generate_key_paths(cleaned_res['采购需求']) # 提取需要采购的货物清单 key_list:交通监控视频子系统.高清视频抓拍像机 ...
|
||||
if no_keys_added:
|
||||
final_res = postprocess(cleaned_res)
|
||||
else:
|
||||
# user_query_template = "请你根据该货物标中采购要求部分的内容,请你给出\"{}\"的技术参数(或采购要求),请以json格式返回结果,外层键名为\"{}\", 键值对中的键是你对该要求的总结,而值需要完全与原文保持一致,不可擅自总结删减。"
|
||||
user_query_template = """
|
||||
请你根据该货物标中采购要求部分的内容,请你给出\"{}\"的技术参数(或采购要求),请以json格式返回结果,键名为\"{}\", 键值为一个列表,列表中包含若干描述\"{}\"的技术参数(或采购要求)的字符串,需与原文完全一致,即若技术参数前存在序号也要保留,但你不可擅自增添或删减。以下为需要考虑的特殊情况:如果该货物没有相关采购要求或技术参数要求,键值为空列表。示例输出格式如下:
|
||||
{{
|
||||
"摄像机控制键盘": [
|
||||
"1、支持串行 RS232/RS422 和 IP 混合控制,允许在一个控制器上使用 RS232/RS422/IP 控制单个系统中的摄像机;",
|
||||
"2、支持 2 组 RS422 串口 VISCA 协议菊花链控制 2x7 台摄像机。"
|
||||
]
|
||||
}}
|
||||
"""
|
||||
user_query_template_two="""
|
||||
请你根据该货物标中采购要求部分的内容,请你给出\"{}\"的技术参数(或采购要求),由于该货物存在多种不同的采购要求或技术参数,请你请逐一列出,请以json格式返回结果,请你以'货物名-编号'区分多种型号,编号为从 1 开始的自然数,依次递增,即第一个键名为\"{}-1\", 键值为一个列表,列表中包含若干描述\"{}\"的技术参数(或采购要求)的字符串,需与原文完全一致,即若技术参数前存在序号也要保留,但你不可擅自增添或删减。示例输出格式如下:
|
||||
{{
|
||||
"交换机-1": [
|
||||
"1、支持固化千兆电口≥8 个,固化千兆光口≥2 个,桌面型设备;",
|
||||
"2、支持静态链路聚合"
|
||||
]
|
||||
"交换机-2":[
|
||||
"1、交换容量≥52Gbps,包转发率≥38.69Mpps,",
|
||||
"2、提供国家强制性产品认证证书及测试报告(3C)"
|
||||
]
|
||||
}}
|
||||
"""
|
||||
queries = []
|
||||
for key in keys_list:
|
||||
# 将键中的 '.' 替换为 '下的'
|
||||
modified_key = key.replace('.', '下的')
|
||||
# 使用修改后的键填充第一个占位符,原始键填充第二个占位符
|
||||
new_query = user_query_template.format(modified_key, key, modified_key)
|
||||
queries.append(new_query)
|
||||
|
||||
# 处理 grouped_paths 中的项,应用 user_query_template_two
|
||||
for grouped_key in grouped_paths:
|
||||
# 将键中的 '.' 替换为 '下的'
|
||||
modified_grouped_key = grouped_key.replace('.', '下的')
|
||||
# 使用修改后的键填充第一个占位符,原始键填充第二个占位符
|
||||
new_query = user_query_template_two.format(modified_grouped_key, grouped_key, modified_grouped_key)
|
||||
queries.append(new_query)
|
||||
results = multi_threading(queries, "", file_id, 2)
|
||||
technical_requirements = []
|
||||
if not results:
|
||||
print("errror!未获得大模型的回答!")
|
||||
else:
|
||||
# 打印结果
|
||||
for question, response in results:
|
||||
technical_requirements.append(response)
|
||||
technical_requirements_combined_res = combine_json_results(technical_requirements)
|
||||
|
||||
"""根据所有键是否已添加处理技术要求"""
|
||||
# 更新原始采购需求字典
|
||||
final_res=combine_and_update_results(cleaned_res['采购需求'], technical_requirements_combined_res)
|
||||
# cleaned_res = clean_json_string(model_res) #转字典
|
||||
# keys_list,good_list,grouped_paths,no_keys_added= generate_key_paths(cleaned_res['采购需求']) # 提取需要采购的货物清单 key_list:交通监控视频子系统.高清视频抓拍像机 ...
|
||||
# if no_keys_added:
|
||||
# final_res = postprocess(cleaned_res)
|
||||
final_res["货物列表"] = good_list
|
||||
|
||||
# 输出最终的 JSON 字符串
|
||||
return {"采购需求": final_res}
|
||||
# else:
|
||||
# # user_query_template = "请你根据该货物标中采购要求部分的内容,请你给出\"{}\"的技术参数(或采购要求),请以json格式返回结果,外层键名为\"{}\", 键值对中的键是你对该要求的总结,而值需要完全与原文保持一致,不可擅自总结删减。"
|
||||
# user_query_template = """
|
||||
# 请你根据该货物标中采购要求部分的内容,请你给出\"{}\"的技术参数(或采购要求),请以json格式返回结果,键名为\"{}\", 键值为一个列表,列表中包含若干描述\"{}\"的技术参数(或采购要求)的字符串,需与原文完全一致,即若技术参数前存在序号也要保留,但你不可擅自增添或删减。以下为需要考虑的特殊情况:如果该货物没有相关采购要求或技术参数要求,键值为空列表。示例输出格式如下:
|
||||
# {{
|
||||
# "摄像机控制键盘": [
|
||||
# "1、支持串行 RS232/RS422 和 IP 混合控制,允许在一个控制器上使用 RS232/RS422/IP 控制单个系统中的摄像机;",
|
||||
# "2、支持 2 组 RS422 串口 VISCA 协议菊花链控制 2x7 台摄像机。"
|
||||
# ]
|
||||
# }}
|
||||
# """
|
||||
# user_query_template_two="""
|
||||
# 请你根据该货物标中采购要求部分的内容,请你给出\"{}\"的技术参数(或采购要求),由于该货物存在多种不同的采购要求或技术参数,请你请逐一列出,请以json格式返回结果,请你以'货物名-编号'区分多种型号,编号为从 1 开始的自然数,依次递增,即第一个键名为\"{}-1\", 键值为一个列表,列表中包含若干描述\"{}\"的技术参数(或采购要求)的字符串,需与原文完全一致,即若技术参数前存在序号也要保留,但你不可擅自增添或删减。示例输出格式如下:
|
||||
# {{
|
||||
# "交换机-1": [
|
||||
# "1、支持固化千兆电口≥8 个,固化千兆光口≥2 个,桌面型设备;",
|
||||
# "2、支持静态链路聚合"
|
||||
# ]
|
||||
# "交换机-2":[
|
||||
# "1、交换容量≥52Gbps,包转发率≥38.69Mpps,",
|
||||
# "2、提供国家强制性产品认证证书及测试报告(3C)"
|
||||
# ]
|
||||
# }}
|
||||
# """
|
||||
# queries = []
|
||||
# for key in keys_list:
|
||||
# # 将键中的 '.' 替换为 '下的'
|
||||
# modified_key = key.replace('.', '下的')
|
||||
# # 使用修改后的键填充第一个占位符,原始键填充第二个占位符
|
||||
# new_query = user_query_template.format(modified_key, key, modified_key)
|
||||
# queries.append(new_query)
|
||||
#
|
||||
# # 处理 grouped_paths 中的项,应用 user_query_template_two
|
||||
# for grouped_key in grouped_paths:
|
||||
# # 将键中的 '.' 替换为 '下的'
|
||||
# modified_grouped_key = grouped_key.replace('.', '下的')
|
||||
# # 使用修改后的键填充第一个占位符,原始键填充第二个占位符
|
||||
# new_query = user_query_template_two.format(modified_grouped_key, grouped_key, modified_grouped_key)
|
||||
# queries.append(new_query)
|
||||
# results = multi_threading(queries, "", file_id, 2)
|
||||
# technical_requirements = []
|
||||
# if not results:
|
||||
# print("errror!未获得大模型的回答!")
|
||||
# else:
|
||||
# # 打印结果
|
||||
# for question, response in results:
|
||||
# technical_requirements.append(response)
|
||||
# technical_requirements_combined_res = combine_json_results(technical_requirements)
|
||||
#
|
||||
# """根据所有键是否已添加处理技术要求"""
|
||||
# # 更新原始采购需求字典
|
||||
# final_res=combine_and_update_results(cleaned_res['采购需求'], technical_requirements_combined_res)
|
||||
# # final_res = postprocess(cleaned_res)
|
||||
# final_res["货物列表"] = good_list
|
||||
#
|
||||
# # 输出最终的 JSON 字符串
|
||||
# return {"采购需求": final_res}
|
||||
|
||||
def test_all_files_in_folder(input_folder, output_folder):
|
||||
# 确保输出文件夹存在
|
||||
|
@ -7,7 +7,8 @@ from flask_app.货物标.商务服务其他要求提取 import get_business_requ
|
||||
|
||||
|
||||
#获取采购清单
|
||||
def fetch_procurement_reqs(procurement_path, procurement_docpath, invalid_path):
|
||||
def fetch_procurement_reqs(procurement_path, invalid_path):
|
||||
# procurement_docpath = pdf2docx(procurement_path) # 采购需求docx
|
||||
# 定义默认的 procurement_reqs 字典
|
||||
DEFAULT_PROCUREMENT_REQS = {
|
||||
"采购需求": "",
|
||||
@ -22,8 +23,6 @@ def fetch_procurement_reqs(procurement_path, procurement_docpath, invalid_path):
|
||||
return DEFAULT_PROCUREMENT_REQS.copy()
|
||||
|
||||
try:
|
||||
# 上传文件并获取 file_id
|
||||
# file_id = upload_file(procurement_docpath)
|
||||
|
||||
# 使用 ThreadPoolExecutor 并行处理 get_technical_requirements 和 get_business_requirements
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
@ -63,7 +62,7 @@ if __name__ == "__main__":
|
||||
procurement_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\a110ed59-00e8-47ec-873a-bd4579a6e628\\ztbfile_procurement.pdf"
|
||||
procurement_docpath="C:\\Users\\Administrator\\Desktop\\fsdownload\\a110ed59-00e8-47ec-873a-bd4579a6e628\\ztbfile_procurement.docx"
|
||||
invalid_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\a110ed59-00e8-47ec-873a-bd4579a6e628\\ztbfile.pdf"
|
||||
res=fetch_procurement_reqs(procurement_path,procurement_docpath,invalid_path)
|
||||
res=fetch_procurement_reqs(procurement_path,invalid_path)
|
||||
print(json.dumps(res, ensure_ascii=False, indent=4))
|
||||
end_time=time.time()
|
||||
print("耗时:"+str(end_time-start_time))
|
||||
|
@ -54,7 +54,6 @@ def preprocess_files(output_folder, file_path, file_type,logger):
|
||||
invalid_path=pdf_path
|
||||
invalid_docpath = docx_path # docx截取无效标部分
|
||||
procurement_path = truncate_files[5] # 采购需求
|
||||
procurement_docpath=pdf2docx(procurement_path) # 采购需求docx
|
||||
evaluation_method_path = truncate_files[1] # 评标办法
|
||||
qualification_path = truncate_files[2] # 资格审查
|
||||
tobidders_notice_path = truncate_files[4] # 投标人须知正文
|
||||
@ -70,7 +69,6 @@ def preprocess_files(output_folder, file_path, file_type,logger):
|
||||
'invalid_path': invalid_path,
|
||||
'output_folder': output_folder,
|
||||
'procurement_path': procurement_path,
|
||||
'procurement_docpath':procurement_docpath,
|
||||
'evaluation_method_path': evaluation_method_path,
|
||||
'qualification_path': qualification_path,
|
||||
'notice_path': notice_path,
|
||||
@ -80,16 +78,14 @@ def preprocess_files(output_folder, file_path, file_type,logger):
|
||||
'merged_baseinfo_path': merged_baseinfo_path
|
||||
}
|
||||
|
||||
def fetch_project_basic_info(invalid_path,invalid_docpath, merged_baseinfo_path, procurement_path,procurement_docpath, clause_path,logger):
|
||||
def fetch_project_basic_info(invalid_path,invalid_docpath, merged_baseinfo_path, procurement_path, clause_path,logger):
|
||||
logger.info("starting 基础信息...")
|
||||
start_time = time.time()
|
||||
if not merged_baseinfo_path:
|
||||
merged_baseinfo_path = invalid_path
|
||||
if not procurement_path:
|
||||
procurement_path=invalid_path
|
||||
if not procurement_docpath:
|
||||
procurement_docpath=invalid_docpath
|
||||
basic_res = combine_basic_info(merged_baseinfo_path, procurement_path,procurement_docpath, clause_path,invalid_path)
|
||||
basic_res = combine_basic_info(merged_baseinfo_path, procurement_path,clause_path,invalid_path)
|
||||
base_info, good_list = post_process_baseinfo(basic_res,logger)
|
||||
end_time = time.time()
|
||||
logger.info(f"基础信息 done,耗时:{end_time - start_time:.2f} 秒")
|
||||
@ -207,7 +203,7 @@ def goods_bid_main(output_folder, file_path, file_type, unique_id):
|
||||
processed_data['clause_path'],logger),
|
||||
'opening_bid': executor.submit(fetch_bid_opening, processed_data['invalid_path'],processed_data['merged_baseinfo_path'],processed_data['clause_path'],logger),
|
||||
'base_info': executor.submit(fetch_project_basic_info, processed_data['invalid_path'],processed_data['invalid_docpath'],processed_data['merged_baseinfo_path'],
|
||||
processed_data['procurement_path'],processed_data['procurement_docpath'],processed_data['clause_path'],logger),
|
||||
processed_data['procurement_path'],processed_data['clause_path'],logger),
|
||||
'qualification_review': executor.submit(fetch_qualification_review, processed_data['invalid_path'],
|
||||
processed_data['qualification_path'],
|
||||
processed_data['notice_path'],logger),
|
||||
@ -242,8 +238,7 @@ def goods_bid_main(output_folder, file_path, file_type, unique_id):
|
||||
|
||||
|
||||
#TODO:把所有未知都删掉。
|
||||
#TODO:流式输出
|
||||
#TODO:医院优化提示词
|
||||
#TODO:考虑把解析失败的调用豆包,全文上传。
|
||||
#商务标这里改为列表最里层
|
||||
#good_list 金额 截取上下文
|
||||
if __name__ == "__main__":
|
||||
|
Loading…
x
Reference in New Issue
Block a user