import json import re # 定义查找与目标值匹配的键的函数 def find_keys_by_value(target_value, json_data): matched_keys = [k for k, v in json_data.items() if v == target_value] if not matched_keys: matched_keys = [k for k, v in json_data.items() if isinstance(v, str) and v.startswith(target_value)] return matched_keys # 定义查找以特定前缀开始的键的函数 def find_keys_with_prefix(key_prefix, json_data): subheadings = [k for k in json_data if k.startswith(key_prefix)] return subheadings # 从文件中读取JSON数据,并提取特定内容 def extract_json(data, target_values): results = {} for target_value in target_values: matched_keys = find_keys_by_value(target_value, data) for key in matched_keys: key_and_subheadings = find_keys_with_prefix(key, data) for subkey in key_and_subheadings: if "." in subkey: parent_key = subkey.rsplit('.', 1)[0] top_level_key = parent_key.split('.')[0] + '.' # 特别处理定标相关的顶级键,确保不会重复添加其他键 if target_value == "定标" and top_level_key not in results: results[top_level_key] = "定标" # 添加或更新父级键 if parent_key not in results: if parent_key in data: results[parent_key] = data[parent_key] # 添加当前键 results[subkey] = data[subkey] return results # 转换结构化的JSON数据 def transform_json(data): result = {} temp = {0: result} for key, value in data.items(): match = re.match(r'(\d+)(?:\.(\d+))?(?:\.(\d+))?', key) if match: levels = [int(l) for l in match.groups() if l is not None] parent = temp[len(levels) - 1] if len(levels) == len(match.groups()): if isinstance(parent, list): parent.append(value) else: # 对于没有 \n 的情况,使用首个空格分割的词作为键 parent[value.split()[0]] = value else: new_key = value.split()[0] if '\n' in value and len(levels) == 2: # 处理换行情况并分割键和值 new_key, new_value = value.split('\n', 1) new_key = new_key.strip() new_value = new_value.strip() # 确保父级是字典 if isinstance(parent, list): if len(parent) == 0 or not isinstance(parent[-1], dict): parent.append({}) parent[-1][new_key] = new_value else: parent[new_key] = new_value else: if isinstance(parent, list): if len(parent) == 0 or not isinstance(parent[-1], dict): parent.append({}) parent = parent[-1] if new_key not in parent: parent[new_key] = [] temp[len(levels)] = parent[new_key] # 修改函数以移除只有一个元素的列表和空列表 def remove_single_item_lists(node): if isinstance(node, dict): for key in list(node.keys()): node[key] = remove_single_item_lists(node[key]) if isinstance(node[key], list) and not node[key]: node[key] = "" # 如果列表为空,转换为空字符串 elif isinstance(node, list) and len(node) == 1: return remove_single_item_lists(node[0]) return node return remove_single_item_lists(result) # 读取JSON数据,提取内容,转换结构,并打印结果 def extract_from_notice(clause_path, type): if type == 1: target_values = ["投标文件", "投标"] elif type == 2: target_values = ["开标", "评标", "定标"] elif type == 3: target_values = ["重新招标、不再招标和终止招标", "重新招标", "不再招标", "终止招标"] else: raise ValueError("Invalid type specified. Use 1 for '开标, 评标, 定标' or 2 for '投标文件, 投标'.") with open(clause_path, 'r', encoding='utf-8') as file: data = json.load(file) extracted_data = extract_json(data, target_values) # 读取json transformed_data = transform_json(extracted_data) return transformed_data # 假设原始数据文件路径 if __name__ == "__main__": file_path = 'clause2.json' try: res = extract_from_notice(file_path, 3) # 可以改变此处的 type 参数测试不同的场景 print(res) except ValueError as e: print(e)