From 8255070f60c8c055f0f6ae8d653426a336460a7b Mon Sep 17 00:00:00 2001
From: zy123 <646228430@qq.com>
Date: Thu, 17 Oct 2024 16:47:35 +0800
Subject: [PATCH] =?UTF-8?q?10.17=20=E5=B0=8F=E8=A7=A3=E6=9E=90=E8=B4=A7?=
 =?UTF-8?q?=E7=89=A9=E6=A0=87?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 flask_app/general/little_zbparse.py | 49 ++++++++++++++++++-----------
 flask_app/main/start_up.py          |  6 ++--
 2 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/flask_app/general/little_zbparse.py b/flask_app/general/little_zbparse.py
index 86e11d3..665a993 100644
--- a/flask_app/general/little_zbparse.py
+++ b/flask_app/general/little_zbparse.py
@@ -22,34 +22,45 @@ def get_global_logger(unique_id):
 
 
 logger = None
-def get_base_info(baseinfo_file_path):
+
+def merge(merged):
+    guarantee_key = '是否递交投标保证金' if '是否递交投标保证金' in merged else '是否递交磋商保证金'
+    if merged.get(guarantee_key) == '是':
+        return 1,""
+    elif merged.get(guarantee_key) == '否':
+        guarantee_type = '投标' if '投标' in guarantee_key else '磋商'
+        merged[f'{guarantee_type}保证金'] = '不提交'
+        merged[f'退还{guarantee_type}保证金'] = '/'
+        merged.pop(guarantee_key, None)
+        return 0,merged
+
+def get_goods_baseinfo(baseinfo_file_path):
     file_id = upload_file(baseinfo_file_path)
-    baseinfo_file_path='flask_app/static/提示词/基本信息货物标.txt'
-    # baseinfo_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\基本信息货物标.txt'
+    # baseinfo_file_path='flask_app/static/提示词/基本信息货物标.txt'
+    baseinfo_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\基本信息货物标.txt'
     questions = read_questions_from_file(baseinfo_file_path)
-    more_query = "请你根据招标文件信息，回答以下问题：是否组织踏勘现场？是否召开投标预备会（或投标答疑会）？是否退还投标文件？是否允许分包? 是否需要递交投标保证金（或磋商保证金）？是否需要提交履约保证金（或履约担保）？是否有招标代理服务费（或中标、成交服务费）？请按json格式给我提供信息，键名分别为'是否组织踏勘现场','是否召开投标预备会'（或'是否召开投标答疑会'）,'是否退还投标文件',是否允许分包','是否递交投标保证金'（或'是否递交磋商保证金'）,'是否提交履约保证金','是否有招标代理服务费',键值仅限于'是','否','未知',若存在矛盾信息，请回答'未知'。"
+    more_query = "请你根据招标文件信息，回答以下问题：是否需要递交投标保证金（或磋商保证金）？请按json格式给我提供信息，键名分为'是否递交投标保证金'（或'是否递交磋商保证金'）,键值仅限于'是','否','未知',若存在矛盾信息，请回答'未知'。"
     questions.append(more_query)
     baseinfo_results = multi_threading(questions, "", file_id, 2)  # 1代表使用百炼rag 2代表使用qianwen-long
     baseinfo_list = [clean_json_string(res) for _, res in baseinfo_results] if baseinfo_results else []
-    chosen_numbers, merged = merge_json_to_list(baseinfo_list.pop())
-    baseinfo_list.append(merged)
-
-    judge_file_path = 'flask_app/static/提示词/是否相关问题货物标.txt'
-    # judge_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\是否相关问题货物标.txt'
-    judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers)
-
-    res2 = multi_threading(judge_questions, "", file_id, 2)  # 调用千问-long
-    if not res2:
-        print("基础信息整合： multi_threading error!")
+    type,merged=merge(baseinfo_list.pop())
+    if type:
+        judge_questions="根据招标文件第二章投标人须知，该项目投标保证金（或磋商保证金）的内容或要求是什么？请按json格式给我提供信息，外层键名为'投标保证金'（或'磋商保证金'），若需要以嵌套键值对返回结果，那么嵌套键名为你对相应要求的总结，而对应键值需要完全与原文保持一致。"
+        res2 = multi_threading(judge_questions, "", file_id, 2)  # 调用千问-long
+        if not res2:
+            print("基础信息整合： multi_threading error!")
+        else:
+            for question, response in res2:
+                baseinfo_list.append(clean_json_string(response))
     else:
-        for question, response in res2:
-            baseinfo_list.append(clean_json_string(response))
+        baseinfo_list.append(merged)
     return baseinfo_list
 
+
 #货物标
 def little_parse_goods(output_folder,file_path):
     files=truncate_pdf_specific(file_path,output_folder)
-    baseinfo_list=get_base_info(files[-1])
+    baseinfo_list=get_goods_baseinfo(files[-1])
     aggregated_baseinfo = aggregate_basic_info_goods(baseinfo_list)
     return {"基础信息": aggregated_baseinfo}
 def little_parse_engineering(output_folder,downloaded_filepath):
@@ -107,8 +118,8 @@ if __name__ == "__main__":
     start_time = time.time()
     file_type = 2  # 1:docx 2:pdf 3:其他
     zb_type=2   #1:工程标 2：货物标
-    input_file = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\6.2定版视频会议磋商文件.pdf"
-    res=little_parse_main(output_folder, input_file, file_type, zb_type)
+    input_file = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\ztbfile.pdf"
+    res=little_parse_main(output_folder, input_file, file_type, zb_type,"122334")
     print(json.dumps(res, ensure_ascii=False, indent=4))
     end_time = time.time()
     elapsed_time = end_time - start_time  # 计算耗时
diff --git a/flask_app/main/start_up.py b/flask_app/main/start_up.py
index 0c84643..487e81a 100644
--- a/flask_app/main/start_up.py
+++ b/flask_app/main/start_up.py
@@ -295,15 +295,15 @@ def process_and_stream(file_url, zb_type):
         final_result, extracted_info = outer_post_processing(combined_data, includes)
 
         try:
-            with open(output_json_path, 'w', encoding='utf-8') as json_file:
+            with open(extracted_info_path, 'w', encoding='utf-8') as json_file:
                 json.dump(extracted_info, json_file, ensure_ascii=False, indent=4)
-            logger.info(f"合并后的数据已保存到 '{output_json_path}'")
+            logger.info(f"摘取后的数据已保存到 '{extracted_info_path}'")
         except IOError as e:
             logger.error(f"保存JSON文件时出错: {e}")
 
         try:
             with open(output_json_path, 'w', encoding='utf-8') as json_file:
-                json.dump(extracted_info_path, json_file, ensure_ascii=False, indent=4)
+                json.dump(final_result, json_file, ensure_ascii=False, indent=4)
             logger.info(f"合并后的数据已保存到 '{output_json_path}'")
         except IOError as e:
             logger.error(f"保存JSON文件时出错: {e}")