2024-11-18 14:25:03 +08:00
|
|
|
|
import json
|
2024-11-18 16:12:11 +08:00
|
|
|
|
import re
|
|
|
|
|
import string
|
|
|
|
|
from collections import OrderedDict
|
2024-11-21 16:22:22 +08:00
|
|
|
|
from collections import defaultdict
|
2024-11-18 16:12:11 +08:00
|
|
|
|
#传输技术参数需求的时候后处理
|
2024-11-21 16:22:22 +08:00
|
|
|
|
def get_suffix(n):
|
|
|
|
|
"""
|
|
|
|
|
根据数字n返回对应的字母后缀。
|
|
|
|
|
1 -> 'a', 2 -> 'b', ..., 26 -> 'z', 27 -> 'aa', 28 -> 'ab', ...
|
2024-11-18 16:12:11 +08:00
|
|
|
|
"""
|
2024-11-21 16:22:22 +08:00
|
|
|
|
suffix = ''
|
|
|
|
|
while n > 0:
|
|
|
|
|
n, r = divmod(n - 1, 26)
|
|
|
|
|
suffix = chr(97 + r) + suffix
|
|
|
|
|
return suffix
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_matching_keys(data, good_list, special_keys=None):
|
|
|
|
|
"""
|
|
|
|
|
过滤字典中的键值对,保留键名符合good_list中元素的项。
|
|
|
|
|
对于重复的键名,添加后缀-a, -b, 等以确保唯一性。
|
|
|
|
|
对于特殊键,使用“父键的键”格式命名。
|
2024-11-18 16:12:11 +08:00
|
|
|
|
|
|
|
|
|
参数:
|
2024-11-21 16:22:22 +08:00
|
|
|
|
- data (dict): 输入的嵌套字典。
|
|
|
|
|
- good_list (list): 包含需要匹配的键名的列表。
|
|
|
|
|
- special_keys (list): 需要特殊处理的键名列表。
|
2024-11-18 16:12:11 +08:00
|
|
|
|
|
|
|
|
|
返回:
|
2024-11-21 16:22:22 +08:00
|
|
|
|
- dict: 筛选后的字典,包含符合条件的键值对。
|
2024-11-18 16:12:11 +08:00
|
|
|
|
"""
|
|
|
|
|
if special_keys is None:
|
|
|
|
|
special_keys = []
|
|
|
|
|
|
2024-11-21 16:22:22 +08:00
|
|
|
|
# 编译正则表达式模式,匹配good_list中的元素,允许后面跟随“-数字”
|
2024-11-18 16:12:11 +08:00
|
|
|
|
patterns = [re.compile(r'^' + re.escape(g) + r'(?:-\d+)?$') for g in good_list]
|
|
|
|
|
|
2024-11-21 16:22:22 +08:00
|
|
|
|
# 第一遍遍历:统计每个键名出现的次数(排除特殊键)
|
|
|
|
|
key_counter = defaultdict(int)
|
|
|
|
|
for top_key, nested_dict in data.items():
|
|
|
|
|
for inner_key in nested_dict.keys():
|
|
|
|
|
if inner_key in special_keys:
|
|
|
|
|
continue
|
|
|
|
|
if any(pattern.match(inner_key) for pattern in patterns):
|
|
|
|
|
key_counter[inner_key] += 1
|
2024-11-18 16:12:11 +08:00
|
|
|
|
|
|
|
|
|
# 初始化用于跟踪每个重复键当前使用的后缀编号
|
2024-11-21 16:22:22 +08:00
|
|
|
|
suffix_map = {key: 0 for key, count in key_counter.items() if count > 1}
|
2024-11-18 16:12:11 +08:00
|
|
|
|
|
2024-11-21 16:22:22 +08:00
|
|
|
|
def get_suffix_label(key):
|
2024-11-18 16:12:11 +08:00
|
|
|
|
"""
|
2024-11-21 16:22:22 +08:00
|
|
|
|
根据当前计数获取字母后缀,并更新suffix_map。
|
2024-11-18 16:12:11 +08:00
|
|
|
|
"""
|
2024-11-21 16:22:22 +08:00
|
|
|
|
suffix_map[key] += 1
|
|
|
|
|
return get_suffix(suffix_map[key])
|
|
|
|
|
|
|
|
|
|
# 第二遍遍历:根据统计结果添加后缀或特殊命名
|
|
|
|
|
filtered_data = {}
|
|
|
|
|
|
|
|
|
|
for top_key, nested_dict in data.items():
|
|
|
|
|
for inner_key, values in nested_dict.items():
|
|
|
|
|
if any(pattern.match(inner_key) for pattern in patterns):
|
|
|
|
|
if inner_key in special_keys:
|
|
|
|
|
# 对于特殊键,使用“父键的键”命名
|
|
|
|
|
new_key = f"{top_key}的{inner_key}"
|
|
|
|
|
else:
|
|
|
|
|
if key_counter[inner_key] > 1:
|
|
|
|
|
# 对于重复键,添加后缀
|
|
|
|
|
suffix = get_suffix_label(inner_key)
|
|
|
|
|
new_key = f"{inner_key}-{suffix}"
|
2024-11-18 16:12:11 +08:00
|
|
|
|
else:
|
2024-11-21 16:22:22 +08:00
|
|
|
|
new_key = inner_key
|
|
|
|
|
filtered_data[new_key] = values
|
|
|
|
|
|
|
|
|
|
return filtered_data
|
2024-11-18 16:12:11 +08:00
|
|
|
|
|
2024-11-18 14:25:03 +08:00
|
|
|
|
def postprocess(data):
|
|
|
|
|
"""递归地转换字典中的值为列表,如果所有键对应的值都是'/', '{}' 或 '未知'"""
|
|
|
|
|
def convert_dict(value):
|
|
|
|
|
# 如果所有值是'/', '{}' 或 '未知'
|
|
|
|
|
if all(v in ['/', '未知', {}] for v in value.values()):
|
|
|
|
|
return list(value.keys())
|
|
|
|
|
else:
|
|
|
|
|
# 如果不满足条件,则递归处理嵌套的字典
|
|
|
|
|
return {k: convert_dict(v) if isinstance(v, dict) else v for k, v in value.items()}
|
|
|
|
|
|
|
|
|
|
# 递归处理顶层数据
|
|
|
|
|
return {key: convert_dict(val) if isinstance(val, dict) else val for key, val in data.items()}
|
|
|
|
|
|
2024-11-20 15:44:05 +08:00
|
|
|
|
|
2024-11-18 14:25:03 +08:00
|
|
|
|
def all_postprocess(data):
|
2024-11-20 15:44:05 +08:00
|
|
|
|
temp = restructure_data(data)
|
|
|
|
|
|
|
|
|
|
def recursive_process(item):
|
|
|
|
|
if isinstance(item, dict):
|
|
|
|
|
return {k: recursive_process(v) for k, v in item.items()}
|
|
|
|
|
elif isinstance(item, list):
|
|
|
|
|
return remove_common_prefixes(item)
|
|
|
|
|
else:
|
|
|
|
|
return item
|
|
|
|
|
|
|
|
|
|
processed_data = recursive_process(temp)
|
2024-11-18 14:25:03 +08:00
|
|
|
|
return processed_data
|
|
|
|
|
def detect_depth(data):
|
|
|
|
|
"""
|
|
|
|
|
Detects the depth of the nested dictionary.
|
|
|
|
|
"""
|
|
|
|
|
if isinstance(data, dict):
|
|
|
|
|
return 1 + max((detect_depth(v) for v in data.values()), default=0)
|
|
|
|
|
elif isinstance(data, list):
|
|
|
|
|
return 1 # Lists are considered the terminal layer
|
|
|
|
|
else:
|
|
|
|
|
return 0 # Base case for non-nested elements
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def restructure_data(data):
|
|
|
|
|
"""
|
|
|
|
|
Restructure data to normalize levels of nesting.
|
|
|
|
|
If depth is 2 throughout, return as-is.
|
|
|
|
|
If both 2 and 3 levels exist, restructure to align to 3-layer format.
|
|
|
|
|
"""
|
|
|
|
|
# Check if data contains mixed 2-layer and 3-layer structures
|
|
|
|
|
has_two_layers = False
|
|
|
|
|
has_three_layers = False
|
|
|
|
|
|
|
|
|
|
for key, value in data.items():
|
|
|
|
|
if isinstance(value, dict):
|
|
|
|
|
has_three_layers = True
|
|
|
|
|
elif isinstance(value, list):
|
|
|
|
|
has_two_layers = True
|
|
|
|
|
else:
|
|
|
|
|
raise ValueError(f"Unexpected data format for key '{key}': {type(value)}")
|
|
|
|
|
|
|
|
|
|
# If only 2-layer, return as-is
|
|
|
|
|
if has_two_layers and not has_three_layers:
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
# If mixed or only 3-layer, normalize to 3-layer
|
|
|
|
|
structured_data = {}
|
|
|
|
|
for key, value in data.items():
|
|
|
|
|
if isinstance(value, dict):
|
|
|
|
|
# Already a 3-layer structure, keep as is
|
|
|
|
|
structured_data[key] = value
|
|
|
|
|
elif isinstance(value, list):
|
|
|
|
|
# Convert 2-layer structure to 3-layer
|
|
|
|
|
structured_data[key] = {key: value}
|
|
|
|
|
|
|
|
|
|
return structured_data
|
|
|
|
|
|
|
|
|
|
# 定义获取所有以':'结尾的前缀的函数
|
|
|
|
|
def get_prefixes(s):
|
|
|
|
|
prefixes = []
|
|
|
|
|
for i in range(len(s)):
|
|
|
|
|
if s[i] == ':':
|
|
|
|
|
prefixes.append(s[:i+1])
|
|
|
|
|
return prefixes
|
|
|
|
|
|
|
|
|
|
# 定义删除公共前缀的函数
|
|
|
|
|
def remove_common_prefixes(string_list):
|
|
|
|
|
# 构建前缀到字符串集合的映射
|
|
|
|
|
prefix_to_strings = {}
|
|
|
|
|
for s in string_list:
|
|
|
|
|
prefixes = get_prefixes(s)
|
|
|
|
|
unique_prefixes = set(prefixes)
|
|
|
|
|
for prefix in unique_prefixes:
|
|
|
|
|
if prefix not in prefix_to_strings:
|
|
|
|
|
prefix_to_strings[prefix] = set()
|
|
|
|
|
prefix_to_strings[prefix].add(s)
|
|
|
|
|
# 找出至少在两个字符串中出现的前缀
|
|
|
|
|
prefixes_occuring_in_multiple_strings = [prefix for prefix, strings in prefix_to_strings.items() if len(strings) >=2]
|
|
|
|
|
# 对每个字符串,找到其匹配的最长前缀并删除
|
|
|
|
|
new_string_list = []
|
|
|
|
|
for s in string_list:
|
|
|
|
|
applicable_prefixes = [prefix for prefix in prefixes_occuring_in_multiple_strings if s.startswith(prefix)]
|
|
|
|
|
if applicable_prefixes:
|
|
|
|
|
# 找到最长的前缀
|
|
|
|
|
longest_prefix = max(applicable_prefixes, key=len)
|
|
|
|
|
# 删除前缀
|
|
|
|
|
new_s = s[len(longest_prefix):]
|
|
|
|
|
new_string_list.append(new_s)
|
|
|
|
|
else:
|
|
|
|
|
new_string_list.append(s)
|
|
|
|
|
return new_string_list
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
# 示例数据
|
|
|
|
|
sample_data = {
|
|
|
|
|
"交通信号机": [
|
|
|
|
|
"★应采用区域控制信号机,并应与广水市交通信号控制系统兼容,信号机能接入已有系统平台,实现联网优化功能。",
|
|
|
|
|
"1、控制功能:(1)区域协调控制:可对单个孤立交叉口、干道多个交叉口和关联性较强的交叉口群进行综合性地信号控制。",
|
|
|
|
|
"1、控制功能:(2)线性协调控制:可对干道多个相邻交叉口进行协调控制。",
|
|
|
|
|
"1、控制功能:(3)多时段控制:可根据交叉口的交通状况,将每天划分为多个不同的时段,每个时段配置不同的控制方案,能设置至少 10个时段、10种以上不同控制方案,能根据不同周日类型对方案进行调整。信号机能够根据内置时钟选择各个时段的控制方案,实现交叉口的合理控制。",
|
|
|
|
|
"2、采集功能:(1)信号机支持接入线圈、地磁、视频、微波、超声波检测器、RFID等多种检测方式。",
|
|
|
|
|
"2、采集功能:(2)信号机支持交通信息采集与统计,并支持交通流量共享。",
|
|
|
|
|
"3、运维功能:(1)信号机能够自动检测地磁故障,若故障,能够自动上传故障信息至监控中心。"
|
|
|
|
|
],
|
|
|
|
|
"高清视频抓拍像机": [
|
|
|
|
|
"1:摄像机:有效像素:≥900W像素",
|
|
|
|
|
"1:摄像机:最低照度:彩色≤0.001lx",
|
|
|
|
|
"1:摄像机:传感器类型:≥1英寸全局曝光 COMS/GMOS/GS COMS",
|
|
|
|
|
"1:摄像机:电子快门:至少满足 1/25s至 1/100,000s,可调",
|
|
|
|
|
"2:视频图像:视频压缩标准:至少支持 H264、H265等",
|
|
|
|
|
"2:视频图像:视频分辨率:≥4096×2160,向下可设置",
|
|
|
|
|
],
|
|
|
|
|
}
|
|
|
|
|
# 处理数据
|
|
|
|
|
result = all_postprocess(sample_data)
|
|
|
|
|
# 输出处理结果
|
|
|
|
|
print(json.dumps(result,ensure_ascii=False,indent=4))
|
|
|
|
|
|