12.19 invalid_path转md格式前增加判断
This commit is contained in:
parent
27830d271c
commit
e19aaa04f6
@ -7,7 +7,7 @@ from flask_app.general.doubao import read_txt_to_string, pdf2txt
|
||||
from flask_app.general.json_utils import combine_json_results, clean_json_string
|
||||
from flask_app.general.通义千问long import upload_file, qianwen_long_stream
|
||||
from flask_app.货物标.截取pdf货物标版 import extract_common_header, clean_page_content
|
||||
from flask_app.general.format_change import docx2pdf
|
||||
from flask_app.general.format_change import docx2pdf, pdf2docx
|
||||
import concurrent.futures
|
||||
from flask_app.general.doubao import doubao_model
|
||||
|
||||
@ -255,26 +255,60 @@ def generate_template(required_keys,full_text, type=1):
|
||||
user_query_template += f"\n\n文件内容:{full_text}"
|
||||
return user_query_template
|
||||
|
||||
def get_business_requirements(procurement_path,processed_filepath):
|
||||
required_keys = ["技\s*术\s*要\s*求", "商\s*务\s*要\s*求", "服\s*务\s*要\s*求", "其\s*他\s*要\s*求","总\s*体\s*要\s*求","建\s*设\s*要\s*求","进\s*度\s*要\s*求","工\s*期\s*要\s*求","质\s*保\s*要\s*求","培\s*训\s*要\s*求","售\s*后\s*要\s*求"]
|
||||
procurement_pdf_path=procurement_path
|
||||
|
||||
def get_business_requirements(procurement_path, processed_filepath, model_type):
|
||||
required_keys = ["技\s*术\s*要\s*求", "商\s*务\s*要\s*求", "服\s*务\s*要\s*求", "其\s*他\s*要\s*求",
|
||||
"总\s*体\s*要\s*求", "建\s*设\s*要\s*求", "进\s*度\s*要\s*求", "工\s*期\s*要\s*求",
|
||||
"质\s*保\s*要\s*求", "培\s*训\s*要\s*求", "售\s*后\s*要\s*求"]
|
||||
|
||||
# 将 doc/docx 转换为 pdf
|
||||
procurement_pdf_path = procurement_path
|
||||
if procurement_path.lower().endswith(('.doc', '.docx')):
|
||||
procurement_pdf_path = docx2pdf(procurement_path)
|
||||
|
||||
# 查找包含的关键词
|
||||
contained_keys = find_exists(procurement_pdf_path, required_keys)
|
||||
print(contained_keys)
|
||||
if not contained_keys:
|
||||
return {}
|
||||
# queries = generate_queries(truncate_file, contained_keys)
|
||||
|
||||
# 读取文件全文
|
||||
full_text = read_txt_to_string(processed_filepath)
|
||||
|
||||
# 生成业务查询和技术查询
|
||||
busi_user_query = generate_template(contained_keys, full_text, 1)
|
||||
tech_user_query = generate_template(contained_keys, full_text, 2)
|
||||
final_res={}
|
||||
|
||||
# 初始化结果存储
|
||||
final_res = {}
|
||||
|
||||
# 如果是非模型调用,需要提前上传文件并获取 file_id
|
||||
file_id = None
|
||||
if not model_type:
|
||||
procurement_docx_path=procurement_path
|
||||
if procurement_path.lower().endswith('.pdf'):
|
||||
procurement_docx_path = pdf2docx(procurement_path)
|
||||
file_id = upload_file(procurement_docx_path) # 只上传一次文件,避免冗余调用
|
||||
|
||||
# 并行处理业务和技术查询
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
||||
futures = []
|
||||
if busi_user_query:
|
||||
if model_type:
|
||||
# 如果是模型调用,直接使用 doubao_model
|
||||
futures.append(executor.submit(doubao_model, busi_user_query))
|
||||
else:
|
||||
# 使用 qianwen_long_stream 并传入 file_id
|
||||
futures.append(executor.submit(qianwen_long_stream, file_id, busi_user_query, 2, 1))
|
||||
|
||||
if tech_user_query:
|
||||
if model_type:
|
||||
# 如果是模型调用,直接使用 doubao_model
|
||||
futures.append(executor.submit(doubao_model, tech_user_query))
|
||||
else:
|
||||
# 使用 qianwen_long_stream 并传入 file_id
|
||||
futures.append(executor.submit(qianwen_long_stream, file_id, tech_user_query, 2, 1))
|
||||
|
||||
# 获取结果
|
||||
for future in concurrent.futures.as_completed(futures):
|
||||
try:
|
||||
@ -283,6 +317,7 @@ def get_business_requirements(procurement_path,processed_filepath):
|
||||
final_res.update(clean_json_string(result))
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
|
||||
return final_res
|
||||
|
||||
|
||||
|
@ -47,7 +47,7 @@ def fetch_procurement_reqs(procurement_path, invalid_path):
|
||||
# 提交任务给线程池
|
||||
future_technical = executor.submit(get_technical_requirements, invalid_path, processed_filepath,model_type)
|
||||
time.sleep(0.5) # 保持原有的延时
|
||||
future_business = executor.submit(get_business_requirements, procurement_path, processed_filepath)
|
||||
future_business = executor.submit(get_business_requirements, procurement_path, processed_filepath,model_type)
|
||||
# 获取并行任务的结果
|
||||
technical_requirements = future_technical.result()
|
||||
business_requirements = future_business.result()
|
||||
|
Loading…
x
Reference in New Issue
Block a user