9.5 资格审查部分返回更能对应原文

2024-09-05 18:00:40 +08:00 · 2024-09-05 18:00:40 +08:00 · cac8ea4136
commit cac8ea4136
parent f826a3f7a1
4 changed files with 129 additions and 115 deletions
--- a/flask_app/main/ttt.py
+++ b/flask_app/main/ttt.py
@ -1,18 +1,15 @@
 import re

-
-test_text = """
-xxxxxxxxxxxxxxxxx
-附录： 投标人资质条件、能力和信誉（资格审查标准）
-xxxxxx
-附表一：招标文件澄清申请函
-xxxxxxxxxxxxxxx
+content="递交投标文件截止之日后120日内"
+def extract_content_after_special_chars(content):
    """
-pattern = r'^(?:附录(?:一)?[：:]|附件(?:一)?[：:]|附表(?:一)?[：:]).*(?:资质|能力|信誉).*$'
-
-match = re.search(pattern, test_text, re.MULTILINE)
-
+    提取特定符号后的内容，直到遇到结束符号。
+    """
+    pattern = r'[\x01\x02☑√团]([^□]+)'
+    match = re.search(pattern, content)
    if match:
-    print("匹配到的行:", match.group())
-else:
-    print("没有匹配到符合条件的行")
+        return match.group(1).strip()  # 提取匹配的内容，并去除多余空格
+    return content  # 如果没有找到匹配，返回原内容
+
+res=extract_content_after_special_chars(content)
+print(res)
--- a/flask_app/main/形式响应评审.py
+++ b/flask_app/main/形式响应评审.py
@ -1,3 +1,4 @@
+# -*- encoding:utf-8 -*-
 import re
 import json
 import time
@ -27,6 +28,38 @@ prompt = """
 """


+def update_json_data(original_data, updates, second_response_list):
+    """
+    根据提供的更新字典覆盖原始JSON数据中对应的键值，支持点分隔的键来表示嵌套结构。
+    参数:
+    - original_data: dict, 原始的JSON数据。
+    - updates: dict, 包含需要更新的键值对。
+    - second_response_list: list, 包含多个字典，每个字典包含需要更新的键值对。
+    返回:
+    - updated_data: dict, 更新后的JSON数据。
+    """
+    def recursive_update(data, key, value):
+        # 处理点分隔的键，递归定位并更新嵌套字典
+        keys = key.split('.')
+        for k in keys[:-1]:
+            data = data.setdefault(k, {})
+        if isinstance(value, dict) and isinstance(data.get(keys[-1], None), dict):
+            data[keys[-1]] = {**data.get(keys[-1], {}), **value}
+        else:
+            data[keys[-1]] = value
+
+    # 合并 updates 到 original_data 中
+    for key, value in updates.items():
+        recursive_update(original_data, key, value)
+
+    # 遍历 second_response_list 中的每个字典，并合并到 original_data 中
+    for response_dict in second_response_list:
+        for key, value in response_dict.items():
+            recursive_update(original_data, key, value)
+
+    return original_data
+
+
 def extract_matching_keys(json_data):
    # 函数首先检查输入 json_data 是否为字符串类型。如果是，它会使用 json.loads() 将字符串解析为字典。
    if isinstance(json_data, str):
@ -74,8 +107,11 @@ def extract_matching_keys(json_data):

    return final_matching

-
+#TODO:如果要引用到招标公告中的内容，考虑提取  或者qianwen-long
 def reformat_questions(match_keys):
+    """
+    [{'形式评审标准.多标段投标': '符合第一章“招标公告”第 3.3款规定'}, {'形式评审标准.投标文件的签署': '符合第二章“投标人须知”第 3.6.3(5)目规定'}, {'形式评审标准.投标保证金': '符合第二章“投标人须知”第 3.4.1项规定'}, {'形式评审标准.工程分包（如有）': '符合第二章“投标人须知”第 1.11款规定'}, {'响应性评审标准.投标内容': '符合第二章“投标人须知”第 1.3款的规定'}, {'响应性评审标准.监理服务阶段': '符合第二章“投标人须知”第 1.3款的规定'}, {'响应性评审标准.监理工作范围': '符合第二章“投标人须知”第 1.3款的规定'}, {'响应性评审标准.监理服务期': '符合第二章“投标人须知”第 1.3款的规定'}, {'响应性评审标准.投标有效期': '符合第二章“投标人须知”第 3.3.1项的规定'}, {'响应性评审标准.投标保证金': '符合第二章“投标人须知”第 3.4.1项的规定'}, {'响应性评审标准.重大偏差': '见第二章“投标人须知”第 1.12款规定'}]
+    """
    """
    根据是否包含特定序号格式（如3.7.4或3.7.4(5)或3.7.4（5）），重新格式化匹配到的评审条目。
    若包含序号，则提取出来；若不包含，则生成格式化的问题字符串。
@ -84,10 +120,15 @@ def reformat_questions(match_keys):
    formatted_questions = []

    # 正则表达式，同时匹配全角和半角括号
-    pattern = re.compile(r'(\d+(?:\.\d+)+)(?:[\(\（](\d+)[\)\）])?')
+    pattern = re.compile(r'(\d+(?:\.\d+)+)(?:[\(\（](\d+)[\)\）])?')    #识别包含数字序列的特定格式 eg:3.7.4(5)  3.4.1

    for entry in match_keys:
        key, value = next(iter(entry.items()))
+        if '招标公告' in value or '第一章' in value:
+            formatted_entry = f"关于‘{key}’，{value.replace('符合', '')}的内容是怎样的？请按json格式给我提供信息，键名为'{key}',如果存在未知信息，请在对应键值处填'未知'。"
+            formatted_questions.append(formatted_entry)
+            continue  # 继续处理下一个条目
+
        match = pattern.search(value)
        if match:
            # 如果存在序号，保存序号与对应的键值对，包括括号内的数字（如果存在）
@ -98,46 +139,12 @@ def reformat_questions(match_keys):
            revised_standard = re.sub(r'符合', '', value)
            formatted_entry = f"关于‘{key}’，{revised_standard}的内容是怎样的？请按json格式给我提供信息，键名为'{key}',如果存在未知信息，请在对应键值处填'未知'。"
            formatted_questions.append(formatted_entry)
-
+    print(formatted_questions)
    return entries_with_numbers, formatted_questions


-
-def update_json_data(original_data, updates, second_response_list):
-    """
-    根据提供的更新字典覆盖原始JSON数据中对应的键值，支持点分隔的键来表示嵌套结构。
-    参数:
-    - original_data: dict, 原始的JSON数据。
-    - updates: dict, 包含需要更新的键值对。
-    - second_response_list: list, 包含多个字典，每个字典包含需要更新的键值对。
-    返回:
-    - updated_data: dict, 更新后的JSON数据。
-    """
-    def recursive_update(data, key, value):
-        # 处理点分隔的键，递归定位并更新嵌套字典
-        keys = key.split('.')
-        for k in keys[:-1]:
-            data = data.setdefault(k, {})
-        if isinstance(value, dict) and isinstance(data.get(keys[-1], None), dict):
-            data[keys[-1]] = {**data.get(keys[-1], {}), **value}
-        else:
-            data[keys[-1]] = value
-
-    # 合并 updates 到 original_data 中
-    for key, value in updates.items():
-        recursive_update(original_data, key, value)
-
-    # 遍历 second_response_list 中的每个字典，并合并到 original_data 中
-    for response_dict in second_response_list:
-        for key, value in response_dict.items():
-            recursive_update(original_data, key, value)
-
-    return original_data
-
-
-
 def process_reviews(original_dict_data,knowledge_name, truncate0_jsonpath,clause_json_path):
-    matched_keys = extract_matching_keys(original_dict_data)  #[{'形式评审标准.投标文件签字盖章': '符合第二章“投标人须知”第 3.7.3（4）目 规定'}, {'形式评审标准.多标段投标': '符合第二章“投标人须知”第 10.1款规定'}]
+    matched_keys = extract_matching_keys(original_dict_data)  #[{'形式评审标准.投标文件签字盖章': '符合第二章“投标人须知”第 3.7.3（4）目 规定'}, {'形式评审标准.多标段投标': '符合第二章“投标人须知”第 10.1款规定'}] 提取值中包含"符合"的字典
    entries_with_numbers, formatted_questions = reformat_questions(matched_keys)
    results_2 = multi_threading(formatted_questions, knowledge_name, True)    #无序号的直接问大模型
    second_response_list = []
@ -152,21 +159,23 @@ def process_reviews(original_dict_data,knowledge_name, truncate0_jsonpath,clause
            print(f"Error processing response for query index {_}: {e}")

    # Assume JSON file paths are defined or configured correctly
-    combined_results = process_and_merge_entries(entries_with_numbers, truncate0_jsonpath, clause_json_path)   #脚本提取的要求   [{'xxx': '3.7.4（5）'}]
+    # print(entries_with_numbers)      #[{'形式评审标准.多标段投标': '3.7.4（5）'}]
+    combined_results = process_and_merge_entries(entries_with_numbers, truncate0_jsonpath, clause_json_path)   #调用根据条款号整合json.py
    updated_json = update_json_data(original_dict_data, combined_results, second_response_list)
    return updated_json


 if __name__ == "__main__":
    start_time=time.time()
-    knowledge_name="zbfile"
-    truncate_tobidders_table_json_path="C:\\Users\\Administrator\\Desktop\\招标文件\\output\\truncate_output.json"
-    clause_path="C:\\Users\\Administrator\\Desktop\\招标文件\\output\\clause.json"
-    original_dict_data={'营业执照': '具备有效的营业执照', '资质条件': '符合第二章“投标人须知”第 1.4.1项规定', '财务状况': '符合第二章“投标人须知”第 1.4.1项规定', '类似业绩': '符合第二章“投标人须知”第 1.4.1项规定', '信誉': '符合第二章“投标人须知”第 1.4.1项规定', '项目经理资格': '符合第二章“投标人须知”第 1.4.1项规定', '设计负责人资格': '符合第二章“投标人须知”第 1.4.1项规定', '施工负责人资格': '符合第二章“投标人须知”第 1.4.1项规定', '施工机械设备': '符合第二章“投标人须知”第 1.4.1项规定', '项目管理机构及人员': '符合第二章“投标人须知”第 1.4.1项规定', '其他要求': '符合第二章“投标人须知”第 1.4.1项规定', '联合体投投人 （如有）': '符合第二章“投标人须知”第 1.4.2项规定', '不存在禁止投标的情形': '不存在第二章“投标人须知”第 1.4.3项规 定的任何一种情形'}
+    knowledge_name="zbtest20"
+    truncate_tobidders_table_json_path="C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\truncate_output.json"
+    clause_path="C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\clause.json"
+    original_dict_data={'形式评审标准': {'多标段投标': '符合第一章“招标公告”第 3.3款规定', '投标文件': '投标文件能正常打开', '投标人名称': '与营业执照、资质证书一致', '投标文件的签署': '符合第二章“投标人须知”第 3.6.3(5)目规定', '投标保证金': '符合第二章“投标人须知”规定', '投标文件的格式、内容': '符合第八章“投标文件格式”的格式规定、实质性内容齐全，关键字迹清晰可辨', '投标报价': '①一份投标文件应只有一个投标报价，未提交选择性报价； ②投标函中报价与监理服务费报价汇总表中的报价保持一致； ③投标人未提交调价函。', '联合体投标人（如有）': '提交联合体协议书，并明确联合体牵头人', '工程分包（如有）': '符合第二章“投标人须知”第 1.11款规定'}, '响应性评审标准': {'投标内容': '符合第二章“投标人须知”第 1.3款的规定', '监理服务阶段': '符合第二章“投标人须知”第 1.3款的规定', '监理工作范围': '符合第二章“投标人须知”第 1.3款的规定', '投标报价': '①投标报价不高于最高投标限价（如果有）； ②投标报价不低于成本价。', '监理服务期': '符合第二章“投标人须知”第 1.3款的规定', '投标有效期': '符合第二章“投标人须知”第 3.3.1项的规定', '投标保证金': '符合第二章“投标人须知”第 3.4.1项的规定', '权利义务': '符合或优于第四章“合同条款及格式”规定的权利义务', '技术标准': '符合第七章“技术标准”规定', '招标人不能接受的条件': '未附有招标人不能接受的条件', '重大偏差': '见第二章“投标人须知”第 1.12款规定'}}
    formal_json = process_reviews(original_dict_data,knowledge_name, truncate_tobidders_table_json_path, clause_path)
-    data = json.loads(formal_json)
+    data = json.dumps(formal_json, ensure_ascii=False, indent=4)
    end_time=time.time()
    elapsed_time = end_time - start_time
    print(f"Function execution took {elapsed_time} seconds.")
+    print(data)

 #关于'技术暗标'，第二章“投标人须知”规定的内容是怎样的？请按json格式给我提供信息，键名为'技术暗标',请你忠于原文，回答要求完整准确，不要擅自总结、删减，且不要回答诸如'见投标人须知前附表'或'见第x.x项规定'这类无实质性内容的回答。
--- a/flask_app/main/根据条款号整合json.py
+++ b/flask_app/main/根据条款号整合json.py
@ -38,45 +38,6 @@ def convert_dict_to_str(d):
        return "\n".join(f"{k}: {v}" for k, v in d.items())
    return str(d)

-
-def find_entries_in_jsons(entries, json_primary, json_secondary):
-    results = {}
-    for entry in entries:
-        key, value = next(iter(entry.items()))
-        combined_value = []
-        # 先尝试在json_primary中寻找，如果找不到再在json_secondary中查找
-        found_in_primary = process_json_with_subentries(json_primary, value, combined_value)
-        if not found_in_primary:
-            process_json_with_subentries(json_secondary, value, combined_value)
-        if combined_value:
-            results[key] = "\n".join(combined_value)
-    return results
-
-def process_json_with_subentries(json_data, value, combined_value):
-    """
-    处理JSON数据，寻找指定的条目，考虑全角和半角括号。
-    """
-    value = standardize_brackets(value)   #将1.11(1)->1.11（1）
-    if "（" in value and "）" in value:    #存在（）的情况
-        first_content=get_values_only(json_data.get(value))
-        if first_content:
-            combined_value.append(first_content)
-            return True
-        base_key, subentry_key = value.split("（")        #base_key:1.11    subentry_key:（1）
-        subentry_key = "（" + subentry_key
-        content = json_data.get(base_key.strip())
-        if content:
-            if isinstance(content, str):
-                extracted_content = extract_specific_subentry(content, subentry_key)
-                if extracted_content:
-                    combined_value.append(extracted_content)
-                return True
-            else:
-                return False
-    else:
-        return extarct_normal(json_data, value, combined_value)
-
-
 def extarct_normal(json_data, value, combined_value):
    found_subentries = check_and_collect_subentries(json_data, value, combined_value)
    if not found_subentries:     #若无子条目，直接查找
@ -116,15 +77,9 @@ def extract_specific_subentry(content, subentry_key):
        return ""
    return ""

-def get_values_only(content):
-    if isinstance(content, dict):
-        # 如果内容是字典，首先将字典的值转换为字符串
-        content = " / ".join(content.values())
-
-        # 检查并处理特殊字符
-    return extract_content_after_special_chars(content)
-
 def extract_content_after_special_chars(content):
+    if not content:
+        return content
    """
    提取特定符号后的内容，直到遇到结束符号或内容末尾。
    """
@ -136,12 +91,55 @@ def extract_content_after_special_chars(content):
        return match.group(1).strip()  # 去除多余空白字符
    # 如果没有找到特殊字符，返回原始内容
    return content
+
+def get_values_only(content):
+    if isinstance(content, dict):
+        # 如果内容是字典，首先将字典的值转换为字符串
+        content = " / ".join(content.values())
+        # 检查并处理特殊字符
+    return extract_content_after_special_chars(content)
 def standardize_brackets(value):
    """
    将输入中的所有半角括号转换为全角括号。
    """
    return value.replace('(', '（').replace(')', '）')

+def process_json_with_subentries(json_data, value, combined_value):
+    """
+    处理JSON数据，寻找指定的条目，考虑全角和半角括号。
+    """
+    value = standardize_brackets(value)   #将1.11(1)->1.11（1）
+    if "（" in value and "）" in value:    #存在（）的情况
+        first_content=get_values_only(json_data.get(value))
+        if first_content:
+            combined_value.append(first_content)
+            return True
+        base_key, subentry_key = value.split("（")        #base_key:1.11    subentry_key:（1）
+        subentry_key = "（" + subentry_key
+        content = json_data.get(base_key.strip())
+        if content:
+            if isinstance(content, str):
+                extracted_content = extract_specific_subentry(content, subentry_key)
+                if extracted_content:
+                    combined_value.append(extracted_content)
+                return True
+            else:
+                return False
+    else:
+        return extarct_normal(json_data, value, combined_value)
+
+def find_entries_in_jsons(entries, json_primary, json_secondary):
+    results = {}
+    for entry in entries:
+        key, value = next(iter(entry.items()))
+        combined_value = []
+        # 先尝试在json_primary中寻找，如果找不到再在json_secondary中查找
+        found_in_primary = process_json_with_subentries(json_primary, value, combined_value)
+        if not found_in_primary:
+            process_json_with_subentries(json_secondary, value, combined_value)
+        if combined_value:
+            results[key] = "\n".join(combined_value)
+    return results
 def process_and_merge_entries(entries_with_numbers, primary_json_path, secondary_json_path):
    primary_json_data = load_json(primary_json_path)
    secondary_json_data = load_json(secondary_json_path)
@ -151,9 +149,9 @@ def process_and_merge_entries(entries_with_numbers, primary_json_path, secondary
 if __name__ == "__main__":
    # Hypothetical entries and file paths for testing
    # entries_with_numbers = [{'形式评审标准.投标文件签字盖章': '3.7.3(3)'}, {'形式评审标准.多标段投标': '10.1'}, {'形式评审标准.“技术暗标”': '3.7.4(5)'}, {'响应性评审标准.投标内容': '1.3.1'}, {'响应性评审标准.工期': '1.3.2'}, {'响应性评审标准.工程质量': '1.3.3'}, {'响应性评审标准.投标有效期': '3.3.1'}, {'响应性评审标准.投标保证金': '3.4.1'}, {'响应性评审标准.分包计划': '1.11'}]
-    entries_with_numbers=[{'xxx': '3.4.1'}]
-    primary_json_path = 'C:\\Users\\Administrator\\Desktop\\fsdownload\\temp8\\3abb6e16-19db-42ad-9504-53bf1072dfe7\\truncate_output.json'
-    secondary_json_path = 'C:\\Users\\Administrator\\Desktop\\fsdownload\\temp8\\3abb6e16-19db-42ad-9504-53bf1072dfe7\\clause.json'
+    entries_with_numbers=[{'xxx': '3.6.3(5)'}]
+    primary_json_path = 'C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\truncate_output.json'
+    secondary_json_path = 'C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\clause.json'

    # Since this is just a test block, make sure these paths point to actual JSON files with the appropriate structure
    try:
--- a/flask_app/main/资格审查模块.py
+++ b/flask_app/main/资格审查模块.py
@ -5,6 +5,7 @@ from flask_app.main.json_utils import nest_json_under_key, extract_content_from_
 from flask_app.main.形式响应评审 import process_reviews
 from flask_app.main.资格评审 import process_qualification
 from flask_app.main.通义千问long import upload_file, qianwen_long
+from concurrent.futures import ThreadPoolExecutor


 def combine_review_standards(truncate1,truncate3,knowledge_name,truncate0_jsonpath,clause_path):   #评标办法前附表
@ -15,8 +16,16 @@ def combine_review_standards(truncate1,truncate3,knowledge_name,truncate0_jsonpa
    results = qianwen_long(file_id, user_query_1)
    original_dict_data = extract_content_from_json(results)
    qualification_review = original_dict_data.pop('资格评审标准', '默认值或None')    #qianwen-long有关资格评审的内容
-    final_qualify_json=process_qualification(qualification_review,truncate3,knowledge_name)
-    form_response_dict=process_reviews(original_dict_data, knowledge_name, truncate0_jsonpath, clause_path)
+    print(original_dict_data)
+    with ThreadPoolExecutor() as executor:
+        # 创建Future对象
+        future_qualification = executor.submit(process_qualification, qualification_review, truncate3, knowledge_name)
+        future_form_response = executor.submit(process_reviews, original_dict_data, knowledge_name, truncate0_jsonpath,
+                                               clause_path)
+
+        # 等待执行结果
+        final_qualify_json = future_qualification.result()
+        form_response_dict = future_form_response.result()
    print("形式响应评审done")
    form_response_dict.update(final_qualify_json)
    return nest_json_under_key(form_response_dict,"资格审查")
@ -24,11 +33,12 @@ def combine_review_standards(truncate1,truncate3,knowledge_name,truncate0_jsonpa

 if __name__ == "__main__":
    output_folder = "C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹"
-    truncate0 = os.path.join(output_folder, "zbtest20_tobidders_notice_table.pdf")
+    # truncate0 = os.path.join(output_folder, "zbtest20_tobidders_notice_table.pdf")
+    truncate2=os.path.join(output_folder,"zbtest20_tobidders_notice.pdf")
    knowledge_name="zbtest20"
    truncate1=os.path.join(output_folder,"zbtest20_evaluation_method.pdf")
    truncate3=os.path.join(output_folder,"zbtest20_qualification.pdf")
-    clause_path = convert_clause_to_json(truncate1, output_folder)
-    truncate1_jsonpath = os.path.join(output_folder, "truncate_output.json")
-    res=combine_review_standards(truncate1,truncate3, knowledge_name,truncate1_jsonpath,clause_path)
+    clause_path = convert_clause_to_json(truncate2, output_folder)
+    truncate0_jsonpath = os.path.join(output_folder, "truncate_output.json")
+    res=combine_review_standards(truncate1,truncate3, knowledge_name,truncate0_jsonpath,clause_path)
    print(res)