12.12 豆包测试
This commit is contained in:
parent
e023ff70d6
commit
2b2627305a
@ -50,12 +50,12 @@ def convert_pdf_to_markdown(file_path):
|
||||
resp = textin.recognize_pdf2md(image, {
|
||||
'page_start': 0,
|
||||
'page_count': 50, # 设置解析页数为50页
|
||||
'table_flavor': 'md', # html 按html语法输出表格
|
||||
'parse_mode': 'scan', # 设置解析模式为scan模式
|
||||
'table_flavor': 'html', # html 按html语法输出表格
|
||||
'parse_mode': 'auto', # 设置解析模式为scan模式
|
||||
'page_details': 0, # 不包含页面细节
|
||||
'markdown_details': 1,
|
||||
'apply_document_tree': 1,
|
||||
'dpi': 144 # 分辨率设置为144 dpi
|
||||
'dpi': 216 # 分辨率设置为144 dpi
|
||||
})
|
||||
print("request time: ", resp.elapsed.total_seconds())
|
||||
data = json.loads(resp.text)
|
||||
|
@ -135,7 +135,7 @@ def generate_queries(truncate_file, required_keys):
|
||||
return queries
|
||||
|
||||
|
||||
def generate_template(required_keys, type=1):
|
||||
def generate_template(required_keys,full_text, type=1):
|
||||
# 定义每个键对应的示例内容
|
||||
example_content1 = {
|
||||
"技术要求": ["相关技术要求1", "相关技术要求2"],
|
||||
@ -250,26 +250,27 @@ def generate_template(required_keys, type=1):
|
||||
示例 2,嵌套键值对形式:
|
||||
{tech_json_example2_str}
|
||||
"""
|
||||
if full_text:
|
||||
user_query_template += f"\n\n文件内容:{full_text}"
|
||||
return user_query_template
|
||||
|
||||
def get_business_requirements(procurement_path,procurement_docpath):
|
||||
file_id = upload_file(procurement_docpath)
|
||||
print(file_id)
|
||||
def get_business_requirements(procurement_path,processed_filepath):
|
||||
required_keys = ["技\s*术\s*要\s*求", "商\s*务\s*要\s*求", "服\s*务\s*要\s*求", "其\s*他\s*要\s*求","总\s*体\s*要\s*求","建\s*设\s*要\s*求","进\s*度\s*要\s*求","工\s*期\s*要\s*求","质\s*保\s*要\s*求","培\s*训\s*要\s*求","售\s*后\s*要\s*求"]
|
||||
contained_keys = find_exists(procurement_path, required_keys)
|
||||
print(contained_keys)
|
||||
if not contained_keys:
|
||||
return {}
|
||||
# queries = generate_queries(truncate_file, contained_keys)
|
||||
busi_user_query = generate_template(contained_keys, 1)
|
||||
tech_user_query = generate_template(contained_keys, 2)
|
||||
full_text = read_txt_to_string(processed_filepath)
|
||||
busi_user_query = generate_template(contained_keys, full_text, 1)
|
||||
tech_user_query = generate_template(contained_keys, full_text, 2)
|
||||
final_res={}
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
||||
futures = []
|
||||
if busi_user_query:
|
||||
futures.append(executor.submit(qianwen_long_stream, file_id, busi_user_query, 2, 1))
|
||||
futures.append(executor.submit(doubao_model, busi_user_query))
|
||||
if tech_user_query:
|
||||
futures.append(executor.submit(qianwen_long_stream, file_id, tech_user_query, 2, 1))
|
||||
futures.append(executor.submit(doubao_model, tech_user_query))
|
||||
# 获取结果
|
||||
for future in concurrent.futures.as_completed(futures):
|
||||
try:
|
||||
|
@ -411,6 +411,8 @@ def get_technical_requirements(invalid_path,processed_filepath):
|
||||
"协议:routes 接口开放:具备;▲支持标准 ONVIF 协议与第三方厂家设备进行互联;支持 GB/T28181;应提供 SDK"
|
||||
]
|
||||
}}
|
||||
|
||||
{}
|
||||
"""
|
||||
user_query_template_two="""请根据货物标中采购要求部分的内容,告诉我\"{}\"的技术参数或采购要求是什么。由于该货物存在 {} 种不同的采购要求或技术参数,请逐一列出,并以 JSON 格式返回结果。请以'货物名-编号'区分多种型号,编号为从 1 开始的自然数,依次递增,即第一个键名为\"{}-1\";键值为一个列表,列表中包含若干描述\"{}\"的技术参数或采购要求或功能说明的字符串,请按原文内容回答,保留三角▲、五角★和序号(若有),不可擅自增删内容,尤其是不可擅自添加序号。
|
||||
|
||||
@ -448,6 +450,8 @@ def get_technical_requirements(invalid_path,processed_filepath):
|
||||
"支持夜视", "支持云存储"
|
||||
]
|
||||
}}
|
||||
|
||||
{}
|
||||
"""
|
||||
queries = []
|
||||
for key in key_paths:
|
||||
@ -456,9 +460,9 @@ def get_technical_requirements(invalid_path,processed_filepath):
|
||||
# 使用修改后的键填充第一个占位符,原始键填充第二个占位符
|
||||
if model_type:
|
||||
full_text = read_txt_to_string(processed_filepath)
|
||||
new_query = user_query_template.format(modified_key, key, modified_key,full_text) #转豆包后取消注释
|
||||
new_query = user_query_template.format(modified_key, key, modified_key,f"文件内容:{full_text}") #转豆包后取消注释
|
||||
else:
|
||||
new_query = user_query_template.format(modified_key, key, modified_key)
|
||||
new_query = user_query_template.format(modified_key, key, modified_key,"")
|
||||
queries.append(new_query)
|
||||
|
||||
# 处理 grouped_paths 中的项,应用 user_query_template_two
|
||||
@ -469,10 +473,10 @@ def get_technical_requirements(invalid_path,processed_filepath):
|
||||
if model_type:
|
||||
full_text = read_txt_to_string(processed_filepath)
|
||||
new_query = user_query_template_two.format(modified_grouped_key, grouped_key_cnt, grouped_key,
|
||||
modified_grouped_key, full_text)
|
||||
modified_grouped_key, f"文件内容:{full_text}")
|
||||
else:
|
||||
new_query = user_query_template_two.format(modified_grouped_key, grouped_key_cnt, grouped_key,
|
||||
modified_grouped_key)
|
||||
modified_grouped_key, "")
|
||||
queries.append(new_query)
|
||||
if model_type:
|
||||
results = multi_threading(queries, "", "", 3) # 豆包
|
||||
|
Loading…
x
Reference in New Issue
Block a user