12.19 修复豆包模型使用的bug

This commit is contained in:
zy123 2024-12-19 14:21:33 +08:00
parent f5261087b4
commit 867fc976b3
3 changed files with 27 additions and 5 deletions

View File

@ -138,6 +138,15 @@ def pdf2docx(local_path_in):
def doc2docx(local_path_in):
if not local_path_in:
return ""
# 获取文件名和所在文件夹
filename, folder = get_filename_and_folder(local_path_in)
# 检查是否已经存在同名的 .docx 文件
docx_file_path = os.path.join(folder, f"{filename}.docx")
if os.path.exists(docx_file_path):
print(f"Skipping conversion, {docx_file_path} already exists.")
return docx_file_path
remote_url = 'http://47.98.59.178:8000/v3/3rd/files/transfer/d2d'
receive_download_url = upload_file(local_path_in, remote_url)
print(receive_download_url)
@ -149,6 +158,13 @@ def doc2docx(local_path_in):
def docx2pdf(local_path_in):
if not local_path_in:
return ""
# 获取文件名和所在文件夹
filename, folder = get_filename_and_folder(local_path_in)
# 检查是否已经存在同名的 .pdf 文件
pdf_file_path = os.path.join(folder, f"{filename}.pdf")
if os.path.exists(pdf_file_path):
print(f"Skipping conversion, {pdf_file_path} already exists.")
return pdf_file_path
remote_url = 'http://47.98.59.178:8000/v3/3rd/files/transfer/d2p'
receive_download_url = upload_file(local_path_in, remote_url)
filename, folder = get_filename_and_folder(local_path_in) # 输入输出在同一个文件夹

View File

@ -372,7 +372,7 @@ def get_technical_requirements(invalid_path,processed_filepath,model_type=1):
if '' in judge_res or model_type==0:
model_type = 0 # 使用qianwen-long+invalid_path
print("no!调用invalid_path")
if invalid_path.lower().endswith('.pdf'): #确保上传的是docx
if invalid_path.lower().endswith('.pdf'): #确保上传的是docx upload中一定是docx但是get_deviation中可能上传的是pdf
invalid_path = pdf2docx(invalid_path)
file_id = upload_file(invalid_path)
user_query = generate_prompt(judge_res)

View File

@ -28,12 +28,18 @@ def fetch_procurement_reqs(procurement_path, invalid_path):
try:
if procurement_path == invalid_path:
# 读取 PDF 页码数
page_count = get_pdf_page_count(procurement_path)
if page_count > 80: # 如果页码数大于 50
model_type = 0
processed_filepath = ""
else:
model_type = 1
processed_filepath = convert_file_to_markdown(procurement_path) # 转markdown格式
else:
model_type = 1
processed_filepath = convert_file_to_markdown(procurement_path) # 转markdown格式
# processed_filepath = pdf2txt(procurement_path) # 纯文本提取
# 使用 ThreadPoolExecutor 并行处理 get_technical_requirements 和 get_business_requirements
with concurrent.futures.ThreadPoolExecutor() as executor: