diff --git a/flask_app/货物标/技术参数要求提取.py b/flask_app/货物标/技术参数要求提取.py index 30abb03..ef0d0e0 100644 --- a/flask_app/货物标/技术参数要求提取.py +++ b/flask_app/货物标/技术参数要求提取.py @@ -362,29 +362,28 @@ def generate_prompt(judge_res, full_text=None): base_prompt += "\n注意事项:\n1.严格按照上述要求执行,确保输出准确性和规范性。\n" return base_prompt -def get_technical_requirements(invalid_path,processed_filepath): +def get_technical_requirements(invalid_path,processed_filepath,model_type=1): + judge_res = "" file_id = "" - model_type = 1 # 默认使用豆包 - first_query_template="""该文件是否说明了采购需求,即需要采购哪些内容(包括货物、设备、系统、功能模块等)?如果有,请回答'是',否则,回答'否' -文件内容: -{full_text} + full_text = read_txt_to_string(processed_filepath) + if model_type: + first_query_template = """该文件是否说明了采购需求,即需要采购哪些内容(包括货物、设备、系统、功能模块等)?如果有,请回答'是',否则,回答'否' + {} """ - judge_query = generate_full_user_query(processed_filepath, first_query_template) - # print(judge_query) - judge_res = doubao_model(judge_query) - if '否' in judge_res: + judge_query = first_query_template.format(f"文件内容:{full_text}") + judge_res = doubao_model(judge_query) + if '否' in judge_res or model_type == 0: model_type = 0 # 使用qianwen-long+invalid_path print("no!调用invalid_path") - if invalid_path.lower().endswith('.pdf'): #确保上传的是docx + if invalid_path.lower().endswith('.pdf'): # 确保上传的是docx upload中一定是docx,但是get_deviation中可能上传的是pdf invalid_path = pdf2docx(invalid_path) - file_id=upload_file(invalid_path) + file_id = upload_file(invalid_path) user_query = generate_prompt(judge_res) - model_res=qianwen_long(file_id,user_query) + model_res = qianwen_long(file_id, user_query) print(model_res) else: - full_text = read_txt_to_string(processed_filepath) - user_query=generate_prompt(judge_res,full_text) - model_res=doubao_model(user_query) + user_query = generate_prompt(judge_res, full_text) + model_res = doubao_model(user_query) print(model_res) cleaned_res = clean_json_string(model_res) #转字典 processed_data=truncate_system_keys(cleaned_res['采购需求']) @@ -467,7 +466,6 @@ def get_technical_requirements(invalid_path,processed_filepath): modified_key = key.replace('.', '下的') # 使用修改后的键填充第一个占位符,原始键填充第二个占位符 if model_type: - full_text = read_txt_to_string(processed_filepath) new_query = user_query_template.format(modified_key, key, modified_key,f"文件内容:{full_text}") #转豆包后取消注释 else: new_query = user_query_template.format(modified_key, key, modified_key,"") @@ -479,7 +477,6 @@ def get_technical_requirements(invalid_path,processed_filepath): # 将键中的 '.' 替换为 '下的' modified_grouped_key = grouped_key.replace('.', '下的') if model_type: - full_text = read_txt_to_string(processed_filepath) new_query = user_query_template_two.format(modified_grouped_key, grouped_key_cnt, grouped_key, modified_grouped_key, f"文件内容:{full_text}") else: diff --git a/flask_app/货物标/提取采购需求main.py b/flask_app/货物标/提取采购需求main.py index 3d60e50..3314fe8 100644 --- a/flask_app/货物标/提取采购需求main.py +++ b/flask_app/货物标/提取采购需求main.py @@ -32,7 +32,7 @@ def fetch_procurement_reqs(procurement_path, invalid_path): # 读取 PDF 页码数 page_count = get_pdf_page_count(procurement_path) - if page_count > 80: # 如果页码数大于 50 + if page_count > 80: # 如果页码数大于 580 model_type = 0 processed_filepath = "" else: