12.19 invalid_path转md格式前增加判断
This commit is contained in:
parent
27830d271c
commit
e19aaa04f6
@ -7,7 +7,7 @@ from flask_app.general.doubao import read_txt_to_string, pdf2txt
|
|||||||
from flask_app.general.json_utils import combine_json_results, clean_json_string
|
from flask_app.general.json_utils import combine_json_results, clean_json_string
|
||||||
from flask_app.general.通义千问long import upload_file, qianwen_long_stream
|
from flask_app.general.通义千问long import upload_file, qianwen_long_stream
|
||||||
from flask_app.货物标.截取pdf货物标版 import extract_common_header, clean_page_content
|
from flask_app.货物标.截取pdf货物标版 import extract_common_header, clean_page_content
|
||||||
from flask_app.general.format_change import docx2pdf
|
from flask_app.general.format_change import docx2pdf, pdf2docx
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
from flask_app.general.doubao import doubao_model
|
from flask_app.general.doubao import doubao_model
|
||||||
|
|
||||||
@ -255,26 +255,60 @@ def generate_template(required_keys,full_text, type=1):
|
|||||||
user_query_template += f"\n\n文件内容:{full_text}"
|
user_query_template += f"\n\n文件内容:{full_text}"
|
||||||
return user_query_template
|
return user_query_template
|
||||||
|
|
||||||
def get_business_requirements(procurement_path,processed_filepath):
|
|
||||||
required_keys = ["技\s*术\s*要\s*求", "商\s*务\s*要\s*求", "服\s*务\s*要\s*求", "其\s*他\s*要\s*求","总\s*体\s*要\s*求","建\s*设\s*要\s*求","进\s*度\s*要\s*求","工\s*期\s*要\s*求","质\s*保\s*要\s*求","培\s*训\s*要\s*求","售\s*后\s*要\s*求"]
|
def get_business_requirements(procurement_path, processed_filepath, model_type):
|
||||||
procurement_pdf_path=procurement_path
|
required_keys = ["技\s*术\s*要\s*求", "商\s*务\s*要\s*求", "服\s*务\s*要\s*求", "其\s*他\s*要\s*求",
|
||||||
|
"总\s*体\s*要\s*求", "建\s*设\s*要\s*求", "进\s*度\s*要\s*求", "工\s*期\s*要\s*求",
|
||||||
|
"质\s*保\s*要\s*求", "培\s*训\s*要\s*求", "售\s*后\s*要\s*求"]
|
||||||
|
|
||||||
|
# 将 doc/docx 转换为 pdf
|
||||||
|
procurement_pdf_path = procurement_path
|
||||||
if procurement_path.lower().endswith(('.doc', '.docx')):
|
if procurement_path.lower().endswith(('.doc', '.docx')):
|
||||||
procurement_pdf_path = docx2pdf(procurement_path)
|
procurement_pdf_path = docx2pdf(procurement_path)
|
||||||
|
|
||||||
|
# 查找包含的关键词
|
||||||
contained_keys = find_exists(procurement_pdf_path, required_keys)
|
contained_keys = find_exists(procurement_pdf_path, required_keys)
|
||||||
print(contained_keys)
|
print(contained_keys)
|
||||||
if not contained_keys:
|
if not contained_keys:
|
||||||
return {}
|
return {}
|
||||||
# queries = generate_queries(truncate_file, contained_keys)
|
|
||||||
|
# 读取文件全文
|
||||||
full_text = read_txt_to_string(processed_filepath)
|
full_text = read_txt_to_string(processed_filepath)
|
||||||
|
|
||||||
|
# 生成业务查询和技术查询
|
||||||
busi_user_query = generate_template(contained_keys, full_text, 1)
|
busi_user_query = generate_template(contained_keys, full_text, 1)
|
||||||
tech_user_query = generate_template(contained_keys, full_text, 2)
|
tech_user_query = generate_template(contained_keys, full_text, 2)
|
||||||
final_res={}
|
|
||||||
|
# 初始化结果存储
|
||||||
|
final_res = {}
|
||||||
|
|
||||||
|
# 如果是非模型调用,需要提前上传文件并获取 file_id
|
||||||
|
file_id = None
|
||||||
|
if not model_type:
|
||||||
|
procurement_docx_path=procurement_path
|
||||||
|
if procurement_path.lower().endswith('.pdf'):
|
||||||
|
procurement_docx_path = pdf2docx(procurement_path)
|
||||||
|
file_id = upload_file(procurement_docx_path) # 只上传一次文件,避免冗余调用
|
||||||
|
|
||||||
|
# 并行处理业务和技术查询
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
||||||
futures = []
|
futures = []
|
||||||
if busi_user_query:
|
if busi_user_query:
|
||||||
futures.append(executor.submit(doubao_model, busi_user_query))
|
if model_type:
|
||||||
|
# 如果是模型调用,直接使用 doubao_model
|
||||||
|
futures.append(executor.submit(doubao_model, busi_user_query))
|
||||||
|
else:
|
||||||
|
# 使用 qianwen_long_stream 并传入 file_id
|
||||||
|
futures.append(executor.submit(qianwen_long_stream, file_id, busi_user_query, 2, 1))
|
||||||
|
|
||||||
if tech_user_query:
|
if tech_user_query:
|
||||||
futures.append(executor.submit(doubao_model, tech_user_query))
|
if model_type:
|
||||||
|
# 如果是模型调用,直接使用 doubao_model
|
||||||
|
futures.append(executor.submit(doubao_model, tech_user_query))
|
||||||
|
else:
|
||||||
|
# 使用 qianwen_long_stream 并传入 file_id
|
||||||
|
futures.append(executor.submit(qianwen_long_stream, file_id, tech_user_query, 2, 1))
|
||||||
|
|
||||||
# 获取结果
|
# 获取结果
|
||||||
for future in concurrent.futures.as_completed(futures):
|
for future in concurrent.futures.as_completed(futures):
|
||||||
try:
|
try:
|
||||||
@ -283,6 +317,7 @@ def get_business_requirements(procurement_path,processed_filepath):
|
|||||||
final_res.update(clean_json_string(result))
|
final_res.update(clean_json_string(result))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"An error occurred: {e}")
|
print(f"An error occurred: {e}")
|
||||||
|
|
||||||
return final_res
|
return final_res
|
||||||
|
|
||||||
|
|
||||||
|
@ -47,7 +47,7 @@ def fetch_procurement_reqs(procurement_path, invalid_path):
|
|||||||
# 提交任务给线程池
|
# 提交任务给线程池
|
||||||
future_technical = executor.submit(get_technical_requirements, invalid_path, processed_filepath,model_type)
|
future_technical = executor.submit(get_technical_requirements, invalid_path, processed_filepath,model_type)
|
||||||
time.sleep(0.5) # 保持原有的延时
|
time.sleep(0.5) # 保持原有的延时
|
||||||
future_business = executor.submit(get_business_requirements, procurement_path, processed_filepath)
|
future_business = executor.submit(get_business_requirements, procurement_path, processed_filepath,model_type)
|
||||||
# 获取并行任务的结果
|
# 获取并行任务的结果
|
||||||
technical_requirements = future_technical.result()
|
technical_requirements = future_technical.result()
|
||||||
business_requirements = future_business.result()
|
business_requirements = future_business.result()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user