zbparse/flask_app/main/根据条款号整合json.py

import json
import re

def load_json(file_path):
    """
    加载JSON文件，并统一其中的括号为全角括号。
    """
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)       #字典
    return standardize_brackets_in_json(data)

def standardize_brackets_in_json(data):
    """
    递归地处理JSON数据，将所有文本中的半角括号转换为全角括号。
    """
    if isinstance(data, dict):
        return {k: standardize_brackets_in_json(v) for k, v in data.items()}
    elif isinstance(data, list):
        return [standardize_brackets_in_json(element) for element in data]
    elif isinstance(data, str):
        return standardize_brackets(data)
    else:
        return data

def convert_dict_to_str(d):
    if isinstance(d, dict):
        return "\n".join(f"{k}: {v}" for k, v in d.items())
    return str(d)

def extarct_normal(json_data, value, combined_value):
    found_subentries = check_and_collect_subentries(json_data, value, combined_value)
    if not found_subentries:     #若无子条目，直接查找
        content = json_data.get(value, "")  #从一个字典 json_data 中获取与键名 value 相关联的值，默认值为""
        if content:
            combined_value.append(get_values_only(content))
            return True
    return found_subentries

#用于查找和处理由主条目派生的子条目 eg：如果 value 是 "1.1"，它将查找所有像 "1.1.1", "1.1.2" 等以 "1.1." 开头的键,不会匹配仅为 "1.1" 的键。
def check_and_collect_subentries(json_data, value, combined_value):
    found_subentries = False
    subentry_index = 1
    for subkey in json_data:
        if subkey.startswith(value + "."):
            content = json_data[subkey]
            combined_value.append(f"{subentry_index}. {get_values_only(content)}")
            subentry_index += 1
            found_subentries = True
    return found_subentries

#针对于有子序号（）的情况
def extract_specific_subentry(content, subentry_key):
    """
    提取指定的子条目文本，考虑全角和半角括号。
    """
    subentry_index = subentry_key.replace("（", "").replace("）", "")
    try:
        idx = int(subentry_index)
        bracket_pattern = f"（{idx}）"
        parts = content.split(bracket_pattern)
        if len(parts) > 1:
            next_bracket_pattern = f"（{idx+1}）"
            next_part = parts[1].split(next_bracket_pattern, 1)[0]
            return next_part.strip()
    except ValueError:
        return ""
    return ""

def extract_content_after_special_chars(content):
    if not content:
        return content
    """
    提取特定符号后的内容，直到遇到结束符号或内容末尾。
    """
    # 定义搜索特殊字符的正则表达式
    pattern = r'[\x01\x02☑√团]([^□]*)'
    match = re.search(pattern, content)
    if match:
        # 如果找到匹配，返回从特殊字符之后到下一个□或到内容末尾的部分
        return match.group(1).strip()  # 去除多余空白字符
    # 如果没有找到特殊字符，返回原始内容
    return content

def get_values_only(content):
    if isinstance(content, dict):
        # 递归处理字典中的每个值，并确保转换为字符串后再连接
        content = " / ".join(get_values_only(value) for value in content.values())
    return extract_content_after_special_chars(content)

def standardize_brackets(value):
    """
    将输入中的所有半角括号转换为全角括号。
    """
    return value.replace('(', '（').replace(')', '）')

def process_json_with_subentries(json_data, value, combined_value):
    """
    处理JSON数据，寻找指定的条目，考虑全角和半角括号。
    """
    value = standardize_brackets(value)   #将1.11(1)->1.11（1）
    if "（" in value and "）" in value:    #存在（）的情况
        first_content=get_values_only(json_data.get(value))
        if first_content:
            combined_value.append(first_content)
            return True
        base_key, subentry_key = value.split("（")        #base_key:1.11    subentry_key:（1）
        subentry_key = "（" + subentry_key
        content = json_data.get(base_key.strip())
        if content:
            if isinstance(content, str):
                extracted_content = extract_specific_subentry(content, subentry_key)
                if extracted_content:
                    combined_value.append(extracted_content)
                return True
            else:
                return False
    else:
        return extarct_normal(json_data, value, combined_value)

def find_entries_in_jsons(entries, json_primary, json_secondary):
    results = {}
    for entry in entries:
        key, value = next(iter(entry.items()))
        combined_value = []
        # 先尝试在json_primary中寻找，如果找不到再在json_secondary中查找
        found_in_primary = process_json_with_subentries(json_primary, value, combined_value)
        if not found_in_primary:
            process_json_with_subentries(json_secondary, value, combined_value)
        if combined_value:
            results[key] = "\n".join(combined_value)
    return results
def process_and_merge_entries(entries_with_numbers, primary_json_path, secondary_json_path):
    primary_json_data = load_json(primary_json_path)
    secondary_json_data = load_json(secondary_json_path)
    combined_results = find_entries_in_jsons(entries_with_numbers, primary_json_data, secondary_json_data)
    return combined_results

def process_and_merge2(entries_with_numbers,json_path):
    json_data=load_json(json_path)
    combined_results=find_entries_in_jsons(entries_with_numbers,json_data,{})
    return combined_results


#3.4.1 投标保证金，提取截止有问题。
if __name__ == "__main__":
    # Hypothetical entries and file paths for testing
    # entries_with_numbers = [{'形式评审标准.投标文件签字盖章': '3.7.3(3)'}, {'形式评审标准.多标段投标': '10.1'}, {'形式评审标准.“技术暗标”': '3.7.4(5)'}, {'响应性评审标准.投标内容': '1.3.1'}, {'响应性评审标准.工期': '1.3.2'}, {'响应性评审标准.工程质量': '1.3.3'}, {'响应性评审标准.投标有效期': '3.3.1'}, {'响应性评审标准.投标保证金': '3.4.1'}, {'响应性评审标准.分包计划': '1.11'}]
    # entries_with_numbers=[{'xxx': '3.7.3（3）'},{'xxssx': '10.1'},{'xsssxx': '3.7.4（5）'},{'a': '1.3.1'},{'b': '1.3.2'},{'c': '1.3.3'},{'d': '3.3.1'}]
    entries_with_numbers = [ {'xxssx': '1.3.1'}
                            ]
    primary_json_path = 'D:\\flask_project\\flask_app\\static\\output\\c02a12c2-6f7b-49dc-b97f-c3d740c96c21\\truncate_output.json'
    secondary_json_path = 'D:\\flask_project\\flask_app\\static\\output\\c02a12c2-6f7b-49dc-b97f-c3d740c96c21\\clause1.json'

    # Since this is just a test block, make sure these paths point to actual JSON files with the appropriate structure
    try:
        combined_results = process_and_merge_entries(entries_with_numbers, primary_json_path, secondary_json_path)
        print("Combined Results:", json.dumps(combined_results, indent=4, ensure_ascii=False))
    except FileNotFoundError:
        print("One or more JSON files were not found. Please check the file paths.")
    except json.JSONDecodeError:
        print("One or more files could not be decoded. Please check the file content.")