12.19 修复豆包模型使用的bug

2024-12-19 14:34:05 +08:00 · 2024-12-19 14:34:05 +08:00 · de8d8c0a2b
commit de8d8c0a2b
parent fd45d78c10
2 changed files with 15 additions and 18 deletions
--- a/flask_app/货物标/技术参数要求提取.py
+++ b/flask_app/货物标/技术参数要求提取.py
@ -362,29 +362,28 @@ def generate_prompt(judge_res, full_text=None):
    base_prompt += "\n注意事项：\n1.严格按照上述要求执行，确保输出准确性和规范性。\n"
    return base_prompt

-def get_technical_requirements(invalid_path,processed_filepath):
+def get_technical_requirements(invalid_path,processed_filepath,model_type=1):
+    judge_res = ""
    file_id = ""
-    model_type = 1  # 默认使用豆包
-    first_query_template="""该文件是否说明了采购需求,即需要采购哪些内容（包括货物、设备、系统、功能模块等）?如果有,请回答'是',否则,回答'否'
-文件内容：
-{full_text}
+    full_text = read_txt_to_string(processed_filepath)
+    if model_type:
+        first_query_template = """该文件是否说明了采购需求,即需要采购哪些内容（包括货物、设备、系统、功能模块等）?如果有,请回答'是',否则,回答'否'
+    {}
    """
-    judge_query = generate_full_user_query(processed_filepath, first_query_template)
-    # print(judge_query)
+        judge_query = first_query_template.format(f"文件内容：{full_text}")
        judge_res = doubao_model(judge_query)
-    if '否' in judge_res:
+    if '否' in judge_res or model_type == 0:
        model_type = 0  # 使用qianwen-long+invalid_path
        print("no!调用invalid_path")
-        if invalid_path.lower().endswith('.pdf'):           #确保上传的是docx
+        if invalid_path.lower().endswith('.pdf'):  # 确保上传的是docx  upload中一定是docx，但是get_deviation中可能上传的是pdf
            invalid_path = pdf2docx(invalid_path)
-        file_id=upload_file(invalid_path)
+        file_id = upload_file(invalid_path)
        user_query = generate_prompt(judge_res)
-        model_res=qianwen_long(file_id,user_query)
+        model_res = qianwen_long(file_id, user_query)
        print(model_res)
    else:
-        full_text = read_txt_to_string(processed_filepath)
-        user_query=generate_prompt(judge_res,full_text)
-        model_res=doubao_model(user_query)
+        user_query = generate_prompt(judge_res, full_text)
+        model_res = doubao_model(user_query)
        print(model_res)
    cleaned_res = clean_json_string(model_res)     #转字典
    processed_data=truncate_system_keys(cleaned_res['采购需求'])
@ -467,7 +466,6 @@ def get_technical_requirements(invalid_path,processed_filepath):
        modified_key = key.replace('.', '下的')
        # 使用修改后的键填充第一个占位符，原始键填充第二个占位符
        if model_type:
-            full_text = read_txt_to_string(processed_filepath)
            new_query = user_query_template.format(modified_key, key, modified_key,f"文件内容：{full_text}")   #转豆包后取消注释
        else:
            new_query = user_query_template.format(modified_key, key, modified_key,"")
@ -479,7 +477,6 @@ def get_technical_requirements(invalid_path,processed_filepath):
            # 将键中的 '.' 替换为 '下的'
            modified_grouped_key = grouped_key.replace('.', '下的')
            if model_type:
-                full_text = read_txt_to_string(processed_filepath)
                new_query = user_query_template_two.format(modified_grouped_key, grouped_key_cnt, grouped_key,
                                                           modified_grouped_key, f"文件内容：{full_text}")
            else:
--- a/flask_app/货物标/提取采购需求main.py
+++ b/flask_app/货物标/提取采购需求main.py
@ -32,7 +32,7 @@ def fetch_procurement_reqs(procurement_path, invalid_path):
            # 读取 PDF 页码数
            page_count = get_pdf_page_count(procurement_path)

-            if page_count > 80:  # 如果页码数大于 50
+            if page_count > 80:  # 如果页码数大于 580
                model_type = 0
                processed_filepath = ""
            else: