From 8afb58e82b0308911e173cfea91b493947beeacc Mon Sep 17 00:00:00 2001
From: zy123 <646228430@qq.com>
Date: Thu, 26 Sep 2024 13:43:47 +0800
Subject: [PATCH] =?UTF-8?q?9.26=20=E5=88=86=E6=AE=B5=E8=A7=A3=E6=9E=90?=
 =?UTF-8?q?=E5=AE=8C=E5=85=A8=E7=89=88?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 flask_app/main/start_up.py                   | 77 ++++++++++++--------
 flask_app/main/商务标技术标整合.py           | 11 ++-
 flask_app/main/基础信息整合.py               |  4 +-
 flask_app/main/形式响应评审.py               |  2 +-
 flask_app/main/招标文件解析.py               | 31 ++++----
 flask_app/main/无效标和废标和禁止投标整合.py |  4 +-
 flask_app/main/资格审查模块.py               |  4 +-
 flask_app/货物标/资格审查main.py             | 28 ++++++-
 8 files changed, 105 insertions(+), 56 deletions(-)

diff --git a/flask_app/main/start_up.py b/flask_app/main/start_up.py
index 9b11239..6caa6f7 100644
--- a/flask_app/main/start_up.py
+++ b/flask_app/main/start_up.py
@@ -93,7 +93,24 @@ def zbparse():
         logger.error('Exception occurred: ' + str(e))
         return jsonify({'error': str(e)}), 500
 
+def post_processing(data,includes):
+    # 初始化结果字典，预设'其他'分类为空字典
+    result = {"其他": {}}
 
+    # 遍历原始字典的每一个键值对
+    for key, value in data.items():
+        if key in includes:
+            # 如果键在includes列表中，直接保留这个键值对
+            result[key] = value
+        else:
+            # 如果键不在includes列表中，将这个键值对加入到'其他'分类中
+            result["其他"][key] = value
+
+    # 如果'其他'分类没有任何内容，可以选择删除这个键
+    if not result["其他"]:
+        del result["其他"]
+
+    return result
 # 分段返回
 def process_and_stream(file_url):
     logger = g.logger
@@ -120,50 +137,48 @@ def process_and_stream(file_url):
 
     # 从 main_processing 获取数据
     for data in main_processing(output_folder, downloaded_filepath, file_type, unique_id):
-        response = {
-            'message': 'Processing',
-            'filename': os.path.basename(downloaded_filepath),
-            'data': data
-        }
-        # 日志记录和流式响应
-        yield f"data: {json.dumps(response, ensure_ascii=False)}\n\n"
-
-        if not data:
-            logger.error(f"Empty data received: {data}")
-            continue
-
-            # 解析 data 作为 JSON 格式数据
         if not data.strip():
             logger.error("Received empty data, skipping JSON parsing.")
-        else:
-            try:
-                parsed_data = json.loads(data)
-            except json.JSONDecodeError as e:
-                logger.error(f"Failed to decode JSON: {e}")
-                logger.error(f"Data received: {data}")
-                continue  # 跳过该数据处理
+            continue  # Skip processing empty data
 
+        try:
+            parsed_data = json.loads(data)
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to decode JSON: {e}")
+            logger.error(f"Data received: {data}")
+            continue  # Skip data if JSON parsing fails
         # 遍历 parsed_data 只提取内层内容进行合并
         for outer_key, inner_dict in parsed_data.items():
             if isinstance(inner_dict, dict):
                 combined_data.update(inner_dict)
-    logger.info(json.dumps(combined_data, ensure_ascii=False,indent=4))
-    # 等待所有数据都处理完后，发送整合后的完整数据
+                # 日志记录已合并数据
+        # 每次数据更新后，流式返回当前进度
+        response = {
+            'message': 'Processing',
+            'filename': os.path.basename(downloaded_filepath),
+            'data': json.dumps(data, ensure_ascii=False)
+        }
+        yield f"data: {json.dumps(response, ensure_ascii=False)}\n\n"
+    # 日志记录已合并数据
+    logger.info(f"Updated combined data: {json.dumps(combined_data, ensure_ascii=False, indent=4)}")
+    # **保存 combined_data 到 output_folder 下的 'final_result.json'**
+    output_json_path = os.path.join(output_folder, 'final_result.json')
+    includes = ["基础信息", "资格审查", "商务标", "技术标", "无效标与废标项", "投标文件要求", "开评定标流程"]
+    result = post_processing(combined_data, includes)
+    try:
+        with open(output_json_path, 'w', encoding='utf-8') as json_file:
+            json.dump(result, json_file, ensure_ascii=False, indent=4)
+        logger.info(f"Combined data saved to '{output_json_path}'")
+    except IOError as e:
+        logger.error(f"Error saving JSON file: {e}")
+    # 最后发送合并后的完整数据
     complete_response = {
         'message': 'Combined data',
         'filename': os.path.basename(downloaded_filepath),
-        'data': json.dumps(combined_data, ensure_ascii=False)
+        'data': json.dumps(result, ensure_ascii=False)
     }
     yield f"data: {json.dumps(complete_response, ensure_ascii=False)}\n\n"
 
-    # 发送最终响应
-    final_response = {
-        'message': 'File uploaded and processed successfully',
-        'filename': os.path.basename(downloaded_filepath),
-        'data': 'END'
-    }
-    yield f"data: {json.dumps(final_response, ensure_ascii=False)}\n\n"
-
 
 def validate_request():
     if not request.is_json:
diff --git a/flask_app/main/商务标技术标整合.py b/flask_app/main/商务标技术标整合.py
index 3dd8439..36f1067 100644
--- a/flask_app/main/商务标技术标整合.py
+++ b/flask_app/main/商务标技术标整合.py
@@ -109,6 +109,11 @@ def combine_evaluation_standards(truncate2):
     # return evaluation_combined_res
     return update_json              #商务标技术标整合
 if __name__ == "__main__":
-    truncate2="D:\\flask_project\\flask_app\\static\\output\\c02a12c2-6f7b-49dc-b97f-c3d740c96c21\\ztbfile_evaluation_method.pdf"
-    res=combine_evaluation_standards(truncate2)
-    print(json.dumps(res,ensure_ascii=False,indent=4))
\ No newline at end of file
+    truncate2="C:\\Users\\Administrator\\Desktop\\fsdownload\\0883895c-e61f-4a99-9308-697fca1d4b77\\ztbfile_evaluation_method.pdf"
+    evaluation_standards_res=combine_evaluation_standards(truncate2)
+    # 从结果中提取"商务标"和"技术标"
+    technical_standards = {"技术标": evaluation_standards_res.get("技术标", {})}
+    commercial_standards = {"商务标": evaluation_standards_res.get("商务标", {})}
+    # 返回技术标和商务标
+    print(json.dumps(technical_standards,ensure_ascii=False,indent=4))
+    print(json.dumps(commercial_standards, ensure_ascii=False, indent=4))
\ No newline at end of file
diff --git a/flask_app/main/基础信息整合.py b/flask_app/main/基础信息整合.py
index 98e0e2b..8c81349 100644
--- a/flask_app/main/基础信息整合.py
+++ b/flask_app/main/基础信息整合.py
@@ -124,8 +124,8 @@ def project_basic_info(knowledge_name,truncate0,output_folder,clause_path):    #
     update_baseinfo_list=combine_basic_info(baseinfo_list)    #整合基础信息核心代码
 
     baseinfo_combined_res = combine_json_results(update_baseinfo_list)  # 返回值是字典
-    return nest_json_under_key(baseinfo_combined_res, "基础信息")    #返回值是json字符串
-
+    # return nest_json_under_key(baseinfo_combined_res, "基础信息")    #返回值是json字符串
+    return {"基础信息":baseinfo_combined_res}
 
 if __name__ == "__main__":
     knowledge_name = "ztb"
diff --git a/flask_app/main/形式响应评审.py b/flask_app/main/形式响应评审.py
index 4c1cb2a..20230dd 100644
--- a/flask_app/main/形式响应评审.py
+++ b/flask_app/main/形式响应评审.py
@@ -171,7 +171,7 @@ def reformat_questions(match_keys, input_path, output_folder):
 
 def process_reviews(original_dict_data,knowledge_name, truncate0_jsonpath,clause_json_path,input_file,output_folder):
     combined_results2={}
-    matched_keys = extract_matching_keys(original_dict_data)  #[{'形式评审标准.投标文件签字盖章': '符合第一章“投标人须知”第 3.7.3（4）目 规定'}, {'形式评审标准.多标段投标': '符合第二章“投标人须知”第 10.1款规定'}] 提取值中包含"符合"的字典
+    matched_keys = extract_matching_keys(original_dict_data)  #output:[{'形式评审标准.投标文件签字盖章': '符合第一章“投标人须知”第 3.7.3（4）目 规定'}, {'形式评审标准.多标段投标': '符合第二章“投标人须知”第 10.1款规定'}] 提取值中包含"符合"的字典
     entries_with_numbers, formatted_questions1,entries_with_numbers2, clause2_path = reformat_questions(matched_keys,input_file,output_folder)
     combined_results = process_and_merge_entries(entries_with_numbers, truncate0_jsonpath,
                                                  clause_json_path)  # 调用根据条款号整合json.py
diff --git a/flask_app/main/招标文件解析.py b/flask_app/main/招标文件解析.py
index 6a04761..9252fd1 100644
--- a/flask_app/main/招标文件解析.py
+++ b/flask_app/main/招标文件解析.py
@@ -118,12 +118,17 @@ def fetch_evaluation_standards(truncate1):  # 评标办法前附表
     # 获取评标办法前附表的字典结果
     evaluation_standards_res = combine_evaluation_standards(truncate1)
 
-    # 从结果中提取"商务标"和"技术标"
-    technical_standards = evaluation_standards_res.get("技术标", {})
-    commercial_standards = evaluation_standards_res.get("商务标", {})
+    # 获取技术标和商务标
+    technical_standards = {"技术标": evaluation_standards_res.get("技术标", {})}
+    commercial_standards = {"商务标": evaluation_standards_res.get("商务标", {})}
+
     logger.info("商务标和技术标 done")
-    # 返回技术标和商务标
-    return {"technical_standards": technical_standards, "commercial_standards": commercial_standards}
+
+    # 返回将 "技术标" 和 "商务标" 包含在新的键中
+    return {
+        "technical_standards": technical_standards,
+        "commercial_standards": commercial_standards
+    }
 
 # def fetch_evaluation_standards(truncate1):  # 评标办法前附表
 #     logger.info("starting商务标技术标...")
@@ -147,8 +152,8 @@ def fetch_bidding_documents_requirements(clause_path):
     fetch_bidding_documents_requirements_json = extract_from_notice(clause_path, 1)
     qualify_nested_res = nest_json_under_key(fetch_bidding_documents_requirements_json, "投标文件要求")
     logger.info("投标文件要求done...")
-    return qualify_nested_res
-
+    # return qualify_nested_res
+    return {"投标文件要求":fetch_bidding_documents_requirements_json}
 
 # 开评定标流程
 def fetch_bid_opening(clause_path):
@@ -156,8 +161,8 @@ def fetch_bid_opening(clause_path):
     fetch_bid_opening_json = extract_from_notice(clause_path, 2)
     qualify_nested_res = nest_json_under_key(fetch_bid_opening_json, "开评定标流程")
     logger.info("开评定标流程done...")
-    return qualify_nested_res
-
+    # return qualify_nested_res
+    return {"开评定标流程":fetch_bid_opening_json}
 
 # def main_processing(output_folder, downloaded_file_path, file_type, unique_id):  # file_type=1->docx  file_type=2->pdf
 #     global logger
@@ -243,12 +248,12 @@ def main_processing(output_folder, downloaded_file_path, file_type, unique_id):
                     commercial_standards = result["commercial_standards"]
 
                     # 分别返回技术标和商务标
-                    yield json.dumps(transform_json_values({'technical_standards': technical_standards}), ensure_ascii=False)
-                    yield json.dumps(transform_json_values({'commercial_standards': commercial_standards}), ensure_ascii=False)
+                    yield json.dumps({'technical_standards': transform_json_values(technical_standards)}, ensure_ascii=False)
+                    yield json.dumps({'commercial_standards': transform_json_values(commercial_standards)}, ensure_ascii=False)
 
                 else:
                     # 处理其他任务的结果
-                    yield json.dumps(transform_json_values({key: result}), ensure_ascii=False)
+                    yield json.dumps({key: transform_json_values(result)}, ensure_ascii=False)
 
             except Exception as exc:
                 logger.error(f"Error processing {key}: {exc}")
@@ -275,7 +280,7 @@ def main_processing(output_folder, downloaded_file_path, file_type, unique_id):
                 key = next(k for k, v in future_dependencies.items() if v == future)
                 try:
                     result = future.result()
-                    yield json.dumps(transform_json_values({key: result}), ensure_ascii=False)
+                    yield json.dumps({key: transform_json_values(result)}, ensure_ascii=False)
                 except Exception as exc:
                     logger.error(f"Error processing {key}: {exc}")
                     yield json.dumps({'error': f'Error processing {key}: {str(exc)}'}, ensure_ascii=False)
diff --git a/flask_app/main/无效标和废标和禁止投标整合.py b/flask_app/main/无效标和废标和禁止投标整合.py
index 051c29e..87e539b 100644
--- a/flask_app/main/无效标和废标和禁止投标整合.py
+++ b/flask_app/main/无效标和废标和禁止投标整合.py
@@ -338,8 +338,8 @@ def combine_find_invalid(file_path, output_dir, truncate_json_path,clause_path,t
         combined_dict.update(d)
 
     print("无效标与废标done...")
-    return nest_json_under_key(combined_dict, "无效标与废标项")
-
+    # return nest_json_under_key(combined_dict, "无效标与废标项")
+    return {"无效标与废标项":combined_dict}
 
 if __name__ == '__main__':
     start_time = time.time()
diff --git a/flask_app/main/资格审查模块.py b/flask_app/main/资格审查模块.py
index 9eabdce..471bd3f 100644
--- a/flask_app/main/资格审查模块.py
+++ b/flask_app/main/资格审查模块.py
@@ -25,8 +25,8 @@ def combine_review_standards(truncate1,truncate3,knowledge_name,truncate0_jsonpa
         final_qualify_json = future_qualification.result()
         form_response_dict = future_form_response.result()
     form_response_dict.update(final_qualify_json)
-    return nest_json_under_key(form_response_dict,"资格审查")
-
+    # return nest_json_under_key(form_response_dict,"资格审查")
+    return {"资格审查":form_response_dict}
 
 if __name__ == "__main__":
     input_file="D:\\flask_project\\flask_app\\static\\output\\c02a12c2-6f7b-49dc-b97f-c3d740c96c21\\ztbfile.pdf"
diff --git a/flask_app/货物标/资格审查main.py b/flask_app/货物标/资格审查main.py
index cd8a7c5..80311a6 100644
--- a/flask_app/货物标/资格审查main.py
+++ b/flask_app/货物标/资格审查main.py
@@ -6,6 +6,7 @@ from flask_app.main.基础信息整合 import combine_basic_info
 from flask_app.main.通义千问long import qianwen_long,upload_file
 from flask_app.main.多线程提问 import multi_threading
 from flask_app.main.json_utils import combine_json_results,clean_json_string
+from flask_app.main.形式响应评审 import update_json_data,extract_matching_keys
 #这个字典可能有嵌套，你需要遍历里面的键名，对键名作判断，而不是键值，具体是这样的：如果处于同一层级的键的数量>1并且键名全由数字或点号组成。那么就将这些序号键名全部删除，重新组织成一个字典格式的数据，你可以考虑用字符串列表来保持部分平级的数据
 #对于同级的键，如果数量>1且键名都统一，那么将键名去掉，用列表保持它们的键值
 #对于同一个字典中，可能存在若干键值对，若它们的键值都是""或者"/" 你就将它们的键值删去，它们的键名用字符串列表保存
@@ -98,9 +99,30 @@ def process_dict(data):
         return result['items']
 
     return result
+
+
+def find_chapter_clause_references(data, parent_key=""):
+    result = []
+    # 正则匹配"第x章"或"第x款"
+    chapter_clause_pattern = re.compile(r'第[一二三四五六七八九十\d]+[章款]')
+
+    # 遍历字典中的键值对
+    for key, value in data.items():
+        # 生成当前的完整键名
+        full_key = f"{parent_key}.{key}" if parent_key else key
+
+        if isinstance(value, dict):
+            # 如果值是字典，递归调用函数
+            result.extend(find_chapter_clause_references(value, full_key))
+        elif isinstance(value, str):
+            # 如果值是字符串，检查是否匹配"第x章"或"第x款"
+            if chapter_clause_pattern.search(value):
+                result.append({full_key: value})
+
+    return result
 def qualification_review(truncate_file):
     file_id=upload_file(truncate_file)
-    user_query=["该招标文件中规定的资格性审查标准是怎样的？请以json格式给出，外层为'资格性审查'，你的回答要与原文一致，不可擅自总结删减，也不要回答资格性审查的内容。","该招标文件中规定的符合性审查标准是怎样的？请以json格式给出，外层为'符合性审查'，你的回答要与原文一致，不可擅自总结删减，也不要回答资格性审查的内容。"]
+    user_query=["该招标文件中规定的资格性审查标准是怎样的？请以json格式给出，外层为'资格性审查'，你的回答要与原文一致，不可擅自总结删减，也不要回答有关符合性性审查的内容。","该招标文件中规定的符合性审查标准是怎样的？请以json格式给出，外层为'符合性审查'，你的回答要与原文一致，不可擅自总结删减，也不要回答有关资格性审查的内容。"]
     results=multi_threading(user_query,"",file_id,2)
     combined_res = {}
     for question, response in results:
@@ -109,10 +131,12 @@ def qualification_review(truncate_file):
         processed1 = preprocess_dict(cleaned_data)
         processed2 = process_dict(processed1)
         combined_res.update(processed2)
+    result=find_chapter_clause_references(combined_res)
+    print(result)
  # 整合基础信息核心代码
     return combined_res
 
 if __name__ == "__main__":
-    truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\output3\\622二次视频会议磋商文件(1)_qualification2.pdf"
+    truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\output3\\6.2定版视频会议磋商文件_qualification2.pdf"
     res=qualification_review(truncate_file)
     print(json.dumps(res,ensure_ascii=False, indent=4))
\ No newline at end of file