From 2b2627305a50885c1daed2c57e30af8ad6c129e6 Mon Sep 17 00:00:00 2001
From: zy123 <646228430@qq.com>
Date: Thu, 12 Dec 2024 10:56:46 +0800
Subject: [PATCH] =?UTF-8?q?12.12=20=E8=B1=86=E5=8C=85=E6=B5=8B=E8=AF=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 flask_app/general/file2markdown.py       |  6 +++---
 flask_app/货物标/商务服务其他要求提取.py | 17 +++++++++--------
 flask_app/货物标/技术参数要求提取.py     | 12 ++++++++----
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/flask_app/general/file2markdown.py b/flask_app/general/file2markdown.py
index 036f48e..e81eb73 100644
--- a/flask_app/general/file2markdown.py
+++ b/flask_app/general/file2markdown.py
@@ -50,12 +50,12 @@ def convert_pdf_to_markdown(file_path):
     resp = textin.recognize_pdf2md(image, {
         'page_start': 0,
         'page_count': 50,  # 设置解析页数为50页
-        'table_flavor': 'md',  # html 按html语法输出表格
-        'parse_mode': 'scan',  # 设置解析模式为scan模式
+        'table_flavor': 'html',  # html 按html语法输出表格
+        'parse_mode': 'auto',  # 设置解析模式为scan模式
         'page_details': 0,  # 不包含页面细节
         'markdown_details': 1,
         'apply_document_tree': 1,
-        'dpi': 144  # 分辨率设置为144 dpi
+        'dpi': 216  # 分辨率设置为144 dpi
     })
     print("request time: ", resp.elapsed.total_seconds())
     data = json.loads(resp.text)
diff --git a/flask_app/货物标/商务服务其他要求提取.py b/flask_app/货物标/商务服务其他要求提取.py
index ee979ac..0a86322 100644
--- a/flask_app/货物标/商务服务其他要求提取.py
+++ b/flask_app/货物标/商务服务其他要求提取.py
@@ -135,7 +135,7 @@ def generate_queries(truncate_file, required_keys):
     return queries
 
 
-def generate_template(required_keys, type=1):
+def generate_template(required_keys,full_text, type=1):
     # 定义每个键对应的示例内容
     example_content1 = {
         "技术要求": ["相关技术要求1", "相关技术要求2"],
@@ -250,26 +250,27 @@ def generate_template(required_keys, type=1):
     示例 2，嵌套键值对形式：
     {tech_json_example2_str}
     """
+    if full_text:
+        user_query_template += f"\n\n文件内容：{full_text}"
     return user_query_template
 
-def get_business_requirements(procurement_path,procurement_docpath):
-    file_id = upload_file(procurement_docpath)
-    print(file_id)
+def get_business_requirements(procurement_path,processed_filepath):
     required_keys = ["技\s*术\s*要\s*求", "商\s*务\s*要\s*求", "服\s*务\s*要\s*求", "其\s*他\s*要\s*求","总\s*体\s*要\s*求","建\s*设\s*要\s*求","进\s*度\s*要\s*求","工\s*期\s*要\s*求","质\s*保\s*要\s*求","培\s*训\s*要\s*求","售\s*后\s*要\s*求"]
     contained_keys = find_exists(procurement_path, required_keys)
     print(contained_keys)
     if not contained_keys:
         return {}
     # queries = generate_queries(truncate_file, contained_keys)
-    busi_user_query = generate_template(contained_keys, 1)
-    tech_user_query = generate_template(contained_keys, 2)
+    full_text = read_txt_to_string(processed_filepath)
+    busi_user_query = generate_template(contained_keys, full_text, 1)
+    tech_user_query = generate_template(contained_keys, full_text, 2)
     final_res={}
     with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
         futures = []
         if busi_user_query:
-            futures.append(executor.submit(qianwen_long_stream, file_id, busi_user_query, 2, 1))
+            futures.append(executor.submit(doubao_model, busi_user_query))
         if tech_user_query:
-            futures.append(executor.submit(qianwen_long_stream, file_id, tech_user_query, 2, 1))
+            futures.append(executor.submit(doubao_model, tech_user_query))
         # 获取结果
         for future in concurrent.futures.as_completed(futures):
             try:
diff --git a/flask_app/货物标/技术参数要求提取.py b/flask_app/货物标/技术参数要求提取.py
index a5eae39..113bb2c 100644
--- a/flask_app/货物标/技术参数要求提取.py
+++ b/flask_app/货物标/技术参数要求提取.py
@@ -411,6 +411,8 @@ def get_technical_requirements(invalid_path,processed_filepath):
         "协议：routes 接口开放：具备；▲支持标准 ONVIF 协议与第三方厂家设备进行互联；支持 GB/T28181；应提供 SDK"
     ]
 }}
+
+{}
 """
     user_query_template_two="""请根据货物标中采购要求部分的内容，告诉我\"{}\"的技术参数或采购要求是什么。由于该货物存在 {} 种不同的采购要求或技术参数，请逐一列出，并以 JSON 格式返回结果。请以'货物名-编号'区分多种型号，编号为从 1 开始的自然数，依次递增，即第一个键名为\"{}-1\"；键值为一个列表，列表中包含若干描述\"{}\"的技术参数或采购要求或功能说明的字符串，请按原文内容回答，保留三角▲、五角★和序号（若有），不可擅自增删内容，尤其是不可擅自添加序号。
 
@@ -448,6 +450,8 @@ def get_technical_requirements(invalid_path,processed_filepath):
         "支持夜视", "支持云存储"
     ]
 }}
+
+{}
         """
     queries = []
     for key in key_paths:
@@ -456,9 +460,9 @@ def get_technical_requirements(invalid_path,processed_filepath):
         # 使用修改后的键填充第一个占位符，原始键填充第二个占位符
         if model_type:
             full_text = read_txt_to_string(processed_filepath)
-            new_query = user_query_template.format(modified_key, key, modified_key,full_text)   #转豆包后取消注释
+            new_query = user_query_template.format(modified_key, key, modified_key,f"文件内容：{full_text}")   #转豆包后取消注释
         else:
-            new_query = user_query_template.format(modified_key, key, modified_key)
+            new_query = user_query_template.format(modified_key, key, modified_key,"")
         queries.append(new_query)
 
         # 处理 grouped_paths 中的项，应用 user_query_template_two
@@ -469,10 +473,10 @@ def get_technical_requirements(invalid_path,processed_filepath):
             if model_type:
                 full_text = read_txt_to_string(processed_filepath)
                 new_query = user_query_template_two.format(modified_grouped_key, grouped_key_cnt, grouped_key,
-                                                           modified_grouped_key, full_text)
+                                                           modified_grouped_key, f"文件内容：{full_text}")
             else:
                 new_query = user_query_template_two.format(modified_grouped_key, grouped_key_cnt, grouped_key,
-                                                           modified_grouped_key)
+                                                           modified_grouped_key, "")
             queries.append(new_query)
     if model_type:
         results = multi_threading(queries, "", "", 3)  # 豆包