12.12 豆包测试
This commit is contained in:
parent
e023ff70d6
commit
2b2627305a
@ -50,12 +50,12 @@ def convert_pdf_to_markdown(file_path):
|
|||||||
resp = textin.recognize_pdf2md(image, {
|
resp = textin.recognize_pdf2md(image, {
|
||||||
'page_start': 0,
|
'page_start': 0,
|
||||||
'page_count': 50, # 设置解析页数为50页
|
'page_count': 50, # 设置解析页数为50页
|
||||||
'table_flavor': 'md', # html 按html语法输出表格
|
'table_flavor': 'html', # html 按html语法输出表格
|
||||||
'parse_mode': 'scan', # 设置解析模式为scan模式
|
'parse_mode': 'auto', # 设置解析模式为scan模式
|
||||||
'page_details': 0, # 不包含页面细节
|
'page_details': 0, # 不包含页面细节
|
||||||
'markdown_details': 1,
|
'markdown_details': 1,
|
||||||
'apply_document_tree': 1,
|
'apply_document_tree': 1,
|
||||||
'dpi': 144 # 分辨率设置为144 dpi
|
'dpi': 216 # 分辨率设置为144 dpi
|
||||||
})
|
})
|
||||||
print("request time: ", resp.elapsed.total_seconds())
|
print("request time: ", resp.elapsed.total_seconds())
|
||||||
data = json.loads(resp.text)
|
data = json.loads(resp.text)
|
||||||
|
@ -135,7 +135,7 @@ def generate_queries(truncate_file, required_keys):
|
|||||||
return queries
|
return queries
|
||||||
|
|
||||||
|
|
||||||
def generate_template(required_keys, type=1):
|
def generate_template(required_keys,full_text, type=1):
|
||||||
# 定义每个键对应的示例内容
|
# 定义每个键对应的示例内容
|
||||||
example_content1 = {
|
example_content1 = {
|
||||||
"技术要求": ["相关技术要求1", "相关技术要求2"],
|
"技术要求": ["相关技术要求1", "相关技术要求2"],
|
||||||
@ -250,26 +250,27 @@ def generate_template(required_keys, type=1):
|
|||||||
示例 2,嵌套键值对形式:
|
示例 2,嵌套键值对形式:
|
||||||
{tech_json_example2_str}
|
{tech_json_example2_str}
|
||||||
"""
|
"""
|
||||||
|
if full_text:
|
||||||
|
user_query_template += f"\n\n文件内容:{full_text}"
|
||||||
return user_query_template
|
return user_query_template
|
||||||
|
|
||||||
def get_business_requirements(procurement_path,procurement_docpath):
|
def get_business_requirements(procurement_path,processed_filepath):
|
||||||
file_id = upload_file(procurement_docpath)
|
|
||||||
print(file_id)
|
|
||||||
required_keys = ["技\s*术\s*要\s*求", "商\s*务\s*要\s*求", "服\s*务\s*要\s*求", "其\s*他\s*要\s*求","总\s*体\s*要\s*求","建\s*设\s*要\s*求","进\s*度\s*要\s*求","工\s*期\s*要\s*求","质\s*保\s*要\s*求","培\s*训\s*要\s*求","售\s*后\s*要\s*求"]
|
required_keys = ["技\s*术\s*要\s*求", "商\s*务\s*要\s*求", "服\s*务\s*要\s*求", "其\s*他\s*要\s*求","总\s*体\s*要\s*求","建\s*设\s*要\s*求","进\s*度\s*要\s*求","工\s*期\s*要\s*求","质\s*保\s*要\s*求","培\s*训\s*要\s*求","售\s*后\s*要\s*求"]
|
||||||
contained_keys = find_exists(procurement_path, required_keys)
|
contained_keys = find_exists(procurement_path, required_keys)
|
||||||
print(contained_keys)
|
print(contained_keys)
|
||||||
if not contained_keys:
|
if not contained_keys:
|
||||||
return {}
|
return {}
|
||||||
# queries = generate_queries(truncate_file, contained_keys)
|
# queries = generate_queries(truncate_file, contained_keys)
|
||||||
busi_user_query = generate_template(contained_keys, 1)
|
full_text = read_txt_to_string(processed_filepath)
|
||||||
tech_user_query = generate_template(contained_keys, 2)
|
busi_user_query = generate_template(contained_keys, full_text, 1)
|
||||||
|
tech_user_query = generate_template(contained_keys, full_text, 2)
|
||||||
final_res={}
|
final_res={}
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
||||||
futures = []
|
futures = []
|
||||||
if busi_user_query:
|
if busi_user_query:
|
||||||
futures.append(executor.submit(qianwen_long_stream, file_id, busi_user_query, 2, 1))
|
futures.append(executor.submit(doubao_model, busi_user_query))
|
||||||
if tech_user_query:
|
if tech_user_query:
|
||||||
futures.append(executor.submit(qianwen_long_stream, file_id, tech_user_query, 2, 1))
|
futures.append(executor.submit(doubao_model, tech_user_query))
|
||||||
# 获取结果
|
# 获取结果
|
||||||
for future in concurrent.futures.as_completed(futures):
|
for future in concurrent.futures.as_completed(futures):
|
||||||
try:
|
try:
|
||||||
|
@ -411,6 +411,8 @@ def get_technical_requirements(invalid_path,processed_filepath):
|
|||||||
"协议:routes 接口开放:具备;▲支持标准 ONVIF 协议与第三方厂家设备进行互联;支持 GB/T28181;应提供 SDK"
|
"协议:routes 接口开放:具备;▲支持标准 ONVIF 协议与第三方厂家设备进行互联;支持 GB/T28181;应提供 SDK"
|
||||||
]
|
]
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
{}
|
||||||
"""
|
"""
|
||||||
user_query_template_two="""请根据货物标中采购要求部分的内容,告诉我\"{}\"的技术参数或采购要求是什么。由于该货物存在 {} 种不同的采购要求或技术参数,请逐一列出,并以 JSON 格式返回结果。请以'货物名-编号'区分多种型号,编号为从 1 开始的自然数,依次递增,即第一个键名为\"{}-1\";键值为一个列表,列表中包含若干描述\"{}\"的技术参数或采购要求或功能说明的字符串,请按原文内容回答,保留三角▲、五角★和序号(若有),不可擅自增删内容,尤其是不可擅自添加序号。
|
user_query_template_two="""请根据货物标中采购要求部分的内容,告诉我\"{}\"的技术参数或采购要求是什么。由于该货物存在 {} 种不同的采购要求或技术参数,请逐一列出,并以 JSON 格式返回结果。请以'货物名-编号'区分多种型号,编号为从 1 开始的自然数,依次递增,即第一个键名为\"{}-1\";键值为一个列表,列表中包含若干描述\"{}\"的技术参数或采购要求或功能说明的字符串,请按原文内容回答,保留三角▲、五角★和序号(若有),不可擅自增删内容,尤其是不可擅自添加序号。
|
||||||
|
|
||||||
@ -448,6 +450,8 @@ def get_technical_requirements(invalid_path,processed_filepath):
|
|||||||
"支持夜视", "支持云存储"
|
"支持夜视", "支持云存储"
|
||||||
]
|
]
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
{}
|
||||||
"""
|
"""
|
||||||
queries = []
|
queries = []
|
||||||
for key in key_paths:
|
for key in key_paths:
|
||||||
@ -456,9 +460,9 @@ def get_technical_requirements(invalid_path,processed_filepath):
|
|||||||
# 使用修改后的键填充第一个占位符,原始键填充第二个占位符
|
# 使用修改后的键填充第一个占位符,原始键填充第二个占位符
|
||||||
if model_type:
|
if model_type:
|
||||||
full_text = read_txt_to_string(processed_filepath)
|
full_text = read_txt_to_string(processed_filepath)
|
||||||
new_query = user_query_template.format(modified_key, key, modified_key,full_text) #转豆包后取消注释
|
new_query = user_query_template.format(modified_key, key, modified_key,f"文件内容:{full_text}") #转豆包后取消注释
|
||||||
else:
|
else:
|
||||||
new_query = user_query_template.format(modified_key, key, modified_key)
|
new_query = user_query_template.format(modified_key, key, modified_key,"")
|
||||||
queries.append(new_query)
|
queries.append(new_query)
|
||||||
|
|
||||||
# 处理 grouped_paths 中的项,应用 user_query_template_two
|
# 处理 grouped_paths 中的项,应用 user_query_template_two
|
||||||
@ -469,10 +473,10 @@ def get_technical_requirements(invalid_path,processed_filepath):
|
|||||||
if model_type:
|
if model_type:
|
||||||
full_text = read_txt_to_string(processed_filepath)
|
full_text = read_txt_to_string(processed_filepath)
|
||||||
new_query = user_query_template_two.format(modified_grouped_key, grouped_key_cnt, grouped_key,
|
new_query = user_query_template_two.format(modified_grouped_key, grouped_key_cnt, grouped_key,
|
||||||
modified_grouped_key, full_text)
|
modified_grouped_key, f"文件内容:{full_text}")
|
||||||
else:
|
else:
|
||||||
new_query = user_query_template_two.format(modified_grouped_key, grouped_key_cnt, grouped_key,
|
new_query = user_query_template_two.format(modified_grouped_key, grouped_key_cnt, grouped_key,
|
||||||
modified_grouped_key)
|
modified_grouped_key, "")
|
||||||
queries.append(new_query)
|
queries.append(new_query)
|
||||||
if model_type:
|
if model_type:
|
||||||
results = multi_threading(queries, "", "", 3) # 豆包
|
results = multi_threading(queries, "", "", 3) # 豆包
|
||||||
|
Loading…
x
Reference in New Issue
Block a user