2024-11-18 14:25:03 +08:00
|
|
|
|
import json
|
2024-11-18 16:12:11 +08:00
|
|
|
|
import re
|
2024-11-21 16:22:22 +08:00
|
|
|
|
from collections import defaultdict
|
2024-12-27 16:14:42 +08:00
|
|
|
|
#传输技术参数需求的时候后处理 12.27版本,对重复的键名,若键值一样,不添加后缀-a -b..
|
2024-11-21 19:17:52 +08:00
|
|
|
|
def extract_matching_keys(data, good_list, special_keys=None, parent_key=''):
|
|
|
|
|
def get_suffix(n):
|
2024-11-22 16:06:57 +08:00
|
|
|
|
"""
|
|
|
|
|
根据数字n返回对应的字母后缀。
|
|
|
|
|
1 -> 'a', 2 -> 'b', ..., 26 -> 'z', 27 -> 'aa', 28 -> 'ab', ...
|
|
|
|
|
"""
|
2024-11-21 19:17:52 +08:00
|
|
|
|
suffix = ''
|
|
|
|
|
while n > 0:
|
|
|
|
|
n, r = divmod(n - 1, 26)
|
|
|
|
|
suffix = chr(97 + r) + suffix
|
|
|
|
|
return suffix
|
2024-11-22 16:06:57 +08:00
|
|
|
|
|
2024-12-27 16:14:42 +08:00
|
|
|
|
def count_matching_keys(data, patterns, special_keys, key_value_map=None):
|
|
|
|
|
"""
|
|
|
|
|
递归统计匹配键的出现次数及其对应的唯一值,仅统计值为列表的键。
|
|
|
|
|
不包括 special_keys 中的键。
|
|
|
|
|
"""
|
|
|
|
|
if key_value_map is None:
|
|
|
|
|
key_value_map = defaultdict(list)
|
2024-11-21 19:17:52 +08:00
|
|
|
|
|
|
|
|
|
if isinstance(data, dict):
|
|
|
|
|
for key, value in data.items():
|
2024-11-23 15:38:52 +08:00
|
|
|
|
clean_key = key.replace(" ", "") # 去除键中的空格
|
2024-11-22 16:06:57 +08:00
|
|
|
|
if isinstance(value, list):
|
2024-11-23 15:38:52 +08:00
|
|
|
|
if clean_key not in special_keys and any(pattern.match(clean_key) for pattern in patterns):
|
2024-12-27 16:14:42 +08:00
|
|
|
|
value_tuple = tuple(value)
|
|
|
|
|
if value_tuple not in key_value_map[clean_key]:
|
|
|
|
|
key_value_map[clean_key].append(value_tuple)
|
2024-11-22 16:06:57 +08:00
|
|
|
|
elif isinstance(value, dict):
|
2024-12-27 16:14:42 +08:00
|
|
|
|
count_matching_keys(value, patterns, special_keys, key_value_map)
|
2024-11-21 19:17:52 +08:00
|
|
|
|
elif isinstance(data, list):
|
|
|
|
|
for item in data:
|
|
|
|
|
if isinstance(item, (dict, list)):
|
2024-12-27 16:14:42 +08:00
|
|
|
|
count_matching_keys(item, patterns, special_keys, key_value_map)
|
2024-11-21 19:17:52 +08:00
|
|
|
|
|
2024-12-27 16:14:42 +08:00
|
|
|
|
return key_value_map
|
2024-11-21 19:17:52 +08:00
|
|
|
|
|
2024-12-27 16:14:42 +08:00
|
|
|
|
def assign_suffixes(key_value_map):
|
|
|
|
|
"""
|
|
|
|
|
为每个键的每个唯一值分配后缀。
|
|
|
|
|
返回一个字典,键为原键名,值为另一个字典,键为值元组,值为对应的后缀(如果需要)。
|
|
|
|
|
"""
|
|
|
|
|
suffix_assignment = defaultdict(dict)
|
|
|
|
|
for key, values in key_value_map.items():
|
|
|
|
|
if len(values) == 1:
|
|
|
|
|
suffix_assignment[key][values[0]] = '' # 只有一个唯一值,不需要后缀
|
|
|
|
|
else:
|
|
|
|
|
for idx, val in enumerate(values, start=1):
|
|
|
|
|
if idx == 1:
|
|
|
|
|
suffix = '' # 第一个唯一值不添加后缀
|
|
|
|
|
else:
|
|
|
|
|
suffix = '-' + get_suffix(idx - 1) # 从 '-a' 开始
|
|
|
|
|
suffix_assignment[key][val] = suffix
|
|
|
|
|
return suffix_assignment
|
|
|
|
|
|
|
|
|
|
def process_data(data, patterns, special_keys, suffix_assignment, filtered_data, parent_key):
|
2024-11-21 19:17:52 +08:00
|
|
|
|
"""递归处理数据并构建结果"""
|
|
|
|
|
|
|
|
|
|
if isinstance(data, dict):
|
|
|
|
|
for key, value in data.items():
|
2024-11-23 15:38:52 +08:00
|
|
|
|
clean_key = key.replace(" ", "") # 去除键中的空格
|
2024-12-27 16:14:42 +08:00
|
|
|
|
current_parent_key = clean_key if parent_key == '' else f"{parent_key}的{clean_key}"
|
|
|
|
|
|
2024-11-21 19:17:52 +08:00
|
|
|
|
if isinstance(value, list):
|
2024-12-27 16:14:42 +08:00
|
|
|
|
if clean_key in special_keys:
|
|
|
|
|
# 处理 special_keys,前缀父键路径
|
|
|
|
|
new_key = current_parent_key
|
|
|
|
|
filtered_data[new_key] = value
|
|
|
|
|
elif any(pattern.match(clean_key) for pattern in patterns):
|
|
|
|
|
# 处理普通匹配键
|
2024-12-16 13:56:28 +08:00
|
|
|
|
# 检查是否以特殊符号开头
|
|
|
|
|
if clean_key.startswith(('▲', '★','●','■','◆','☆','△','◇','○','□')):
|
|
|
|
|
symbol = clean_key[0]
|
|
|
|
|
stripped_key = clean_key[1:]
|
2024-12-27 16:14:42 +08:00
|
|
|
|
value_tuple = tuple(value)
|
|
|
|
|
suffix = suffix_assignment.get(stripped_key, {}).get(value_tuple, '')
|
|
|
|
|
if suffix:
|
|
|
|
|
new_key = f"{stripped_key}{suffix}"
|
|
|
|
|
else:
|
|
|
|
|
new_key = stripped_key
|
2024-12-16 13:56:28 +08:00
|
|
|
|
# 将符号添加到每个字符串的开头
|
|
|
|
|
new_value = [symbol + item for item in value]
|
|
|
|
|
filtered_data[new_key] = new_value
|
|
|
|
|
else:
|
2024-12-27 16:14:42 +08:00
|
|
|
|
# 获取当前值的后缀
|
|
|
|
|
value_tuple = tuple(value)
|
|
|
|
|
suffix = suffix_assignment.get(clean_key, {}).get(value_tuple, '')
|
|
|
|
|
if suffix:
|
|
|
|
|
new_key = f"{clean_key}{suffix}"
|
|
|
|
|
else:
|
|
|
|
|
new_key = clean_key
|
2024-12-16 13:56:28 +08:00
|
|
|
|
filtered_data[new_key] = value
|
2024-11-21 19:17:52 +08:00
|
|
|
|
elif isinstance(value, dict):
|
|
|
|
|
# 继续递归处理嵌套字典
|
2024-12-27 16:14:42 +08:00
|
|
|
|
process_data(value, patterns, special_keys, suffix_assignment, filtered_data, current_parent_key)
|
2024-11-22 16:06:57 +08:00
|
|
|
|
elif isinstance(data, list):
|
|
|
|
|
for item in data:
|
|
|
|
|
if isinstance(item, (dict, list)):
|
2024-12-27 16:14:42 +08:00
|
|
|
|
process_data(item, patterns, special_keys, suffix_assignment, filtered_data, parent_key)
|
|
|
|
|
|
|
|
|
|
def generate_patterns(good_list):
|
|
|
|
|
"""生成匹配的正则表达式列表"""
|
|
|
|
|
return [re.compile(r'^' + re.escape(g) + r'(?:-\d+)?$') for g in good_list]
|
2024-11-18 16:12:11 +08:00
|
|
|
|
|
|
|
|
|
if special_keys is None:
|
2024-11-23 15:38:52 +08:00
|
|
|
|
special_keys = ["系统功能"] # 默认值为 ["系统功能"]
|
|
|
|
|
|
|
|
|
|
# 去除 good_list 中的空格
|
|
|
|
|
clean_good_list = [g.replace(" ", "") for g in good_list]
|
2024-11-18 16:12:11 +08:00
|
|
|
|
|
2024-11-23 15:38:52 +08:00
|
|
|
|
# 构建匹配的正则表达式
|
2024-12-27 16:14:42 +08:00
|
|
|
|
patterns = generate_patterns(clean_good_list)
|
2024-11-18 16:12:11 +08:00
|
|
|
|
|
2024-12-27 16:14:42 +08:00
|
|
|
|
# 先统计所有匹配键的出现次数及其对应的唯一值,仅统计值为列表的键
|
|
|
|
|
key_value_map = count_matching_keys(data, patterns, special_keys)
|
2024-11-18 16:12:11 +08:00
|
|
|
|
|
2024-12-27 16:14:42 +08:00
|
|
|
|
# 为每个键的唯一值分配后缀
|
|
|
|
|
suffix_assignment = assign_suffixes(key_value_map)
|
2024-11-18 16:12:11 +08:00
|
|
|
|
|
2024-11-21 19:17:52 +08:00
|
|
|
|
# 用于存储最终结果
|
2024-11-21 16:22:22 +08:00
|
|
|
|
filtered_data = {}
|
|
|
|
|
|
2024-11-21 19:17:52 +08:00
|
|
|
|
# 递归处理数据
|
2024-12-27 16:14:42 +08:00
|
|
|
|
process_data(data, patterns, special_keys, suffix_assignment, filtered_data, parent_key)
|
2024-11-21 16:22:22 +08:00
|
|
|
|
|
|
|
|
|
return filtered_data
|
2024-11-18 16:12:11 +08:00
|
|
|
|
|
2024-11-23 15:38:52 +08:00
|
|
|
|
|
2024-12-27 16:14:42 +08:00
|
|
|
|
|
2024-11-18 14:25:03 +08:00
|
|
|
|
def postprocess(data):
|
|
|
|
|
"""递归地转换字典中的值为列表,如果所有键对应的值都是'/', '{}' 或 '未知'"""
|
|
|
|
|
def convert_dict(value):
|
|
|
|
|
# 如果所有值是'/', '{}' 或 '未知'
|
|
|
|
|
if all(v in ['/', '未知', {}] for v in value.values()):
|
|
|
|
|
return list(value.keys())
|
|
|
|
|
else:
|
|
|
|
|
# 如果不满足条件,则递归处理嵌套的字典
|
|
|
|
|
return {k: convert_dict(v) if isinstance(v, dict) else v for k, v in value.items()}
|
|
|
|
|
|
|
|
|
|
# 递归处理顶层数据
|
|
|
|
|
return {key: convert_dict(val) if isinstance(val, dict) else val for key, val in data.items()}
|
|
|
|
|
|
2024-11-20 15:44:05 +08:00
|
|
|
|
|
2024-11-18 14:25:03 +08:00
|
|
|
|
def all_postprocess(data):
|
2024-11-20 15:44:05 +08:00
|
|
|
|
def recursive_process(item):
|
2024-12-19 12:18:50 +08:00
|
|
|
|
pattern = re.compile(r'(.+)-\d+$')
|
|
|
|
|
|
2024-11-20 15:44:05 +08:00
|
|
|
|
if isinstance(item, dict):
|
2024-12-19 12:18:50 +08:00
|
|
|
|
cleaned_dict = {
|
|
|
|
|
key: recursive_process(value)
|
|
|
|
|
for key, value in item.items()
|
|
|
|
|
if not (pattern.match(key) and value == []) #删除键名匹配 pattern = re.compile(r'(.+)-\d+$') 且其键值为 [] 的键值对
|
|
|
|
|
}
|
|
|
|
|
return cleaned_dict
|
2024-11-20 15:44:05 +08:00
|
|
|
|
elif isinstance(item, list):
|
|
|
|
|
return remove_common_prefixes(item)
|
|
|
|
|
else:
|
|
|
|
|
return item
|
|
|
|
|
|
2024-12-19 12:18:50 +08:00
|
|
|
|
temp = restructure_data(data)
|
2024-11-20 15:44:05 +08:00
|
|
|
|
processed_data = recursive_process(temp)
|
2024-11-18 14:25:03 +08:00
|
|
|
|
return processed_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def restructure_data(data):
|
|
|
|
|
"""
|
2024-11-22 17:41:06 +08:00
|
|
|
|
重构数据以标准化嵌套层级至三层。
|
|
|
|
|
- 如果所有顶层键的值都是列表(两层结构),直接返回原数据。
|
|
|
|
|
- 如果存在混合的两层和三层结构,或更深层级,则将所有数据统一为三层结构。
|
2024-11-18 14:25:03 +08:00
|
|
|
|
"""
|
2024-12-03 17:37:02 +08:00
|
|
|
|
def get_max_depth(d, current_depth=1):
|
|
|
|
|
"""
|
|
|
|
|
计算字典的最大嵌套深度。
|
|
|
|
|
"""
|
|
|
|
|
if not isinstance(d, dict) or not d:
|
|
|
|
|
return current_depth
|
|
|
|
|
return max(get_max_depth(v, current_depth + 1) for v in d.values())
|
|
|
|
|
|
2024-11-22 17:41:06 +08:00
|
|
|
|
# 检查是否所有顶层键的值都是列表(即两层结构)
|
|
|
|
|
all_two_layers = all(isinstance(value, list) for value in data.values())
|
2024-11-18 14:25:03 +08:00
|
|
|
|
|
2024-11-22 17:41:06 +08:00
|
|
|
|
if all_two_layers:
|
|
|
|
|
# 所有数据都是两层结构,直接返回
|
2024-11-18 14:25:03 +08:00
|
|
|
|
return data
|
|
|
|
|
|
2024-11-22 17:41:06 +08:00
|
|
|
|
# 否则,存在混合或更深层级,需要重构为三层结构
|
2024-11-18 14:25:03 +08:00
|
|
|
|
structured_data = {}
|
|
|
|
|
for key, value in data.items():
|
|
|
|
|
if isinstance(value, dict):
|
2024-11-22 17:41:06 +08:00
|
|
|
|
# 检查是否有子值是字典(即原始深度 >=4)
|
|
|
|
|
has_deeper = any(isinstance(sub_value, dict) for sub_value in value.values())
|
|
|
|
|
if has_deeper:
|
|
|
|
|
# 如果存在更深层级,展开至三层
|
|
|
|
|
for sub_key, sub_value in value.items():
|
|
|
|
|
if isinstance(sub_value, dict):
|
2024-12-03 17:37:02 +08:00
|
|
|
|
# 保留为三层
|
2024-11-22 17:41:06 +08:00
|
|
|
|
structured_data[sub_key] = sub_value
|
|
|
|
|
elif isinstance(sub_value, list):
|
|
|
|
|
# 将两层结构转换为三层结构
|
|
|
|
|
structured_data[sub_key] = {sub_key: sub_value}
|
|
|
|
|
else:
|
|
|
|
|
raise ValueError(f"键'{sub_key}'的数据格式异常: {type(sub_value)}")
|
|
|
|
|
else:
|
|
|
|
|
# 已经是三层结构,保持不变
|
|
|
|
|
structured_data[key] = value
|
2024-11-18 14:25:03 +08:00
|
|
|
|
elif isinstance(value, list):
|
2024-11-22 17:41:06 +08:00
|
|
|
|
# 将两层结构转换为三层结构
|
2024-11-18 14:25:03 +08:00
|
|
|
|
structured_data[key] = {key: value}
|
2024-11-22 17:41:06 +08:00
|
|
|
|
else:
|
|
|
|
|
raise ValueError(f"键'{key}'的数据格式异常: {type(value)}")
|
2024-12-03 17:37:02 +08:00
|
|
|
|
|
|
|
|
|
# 检查重构后的数据深度
|
|
|
|
|
max_depth = get_max_depth(structured_data)
|
|
|
|
|
if max_depth > 3:
|
|
|
|
|
# 递归调用以进一步重构
|
|
|
|
|
return restructure_data(structured_data)
|
|
|
|
|
else:
|
|
|
|
|
return structured_data
|
2024-11-18 14:25:03 +08:00
|
|
|
|
|
2024-11-22 17:41:06 +08:00
|
|
|
|
|
2024-11-18 14:25:03 +08:00
|
|
|
|
# 定义获取所有以':'结尾的前缀的函数
|
|
|
|
|
def get_prefixes(s):
|
|
|
|
|
prefixes = []
|
|
|
|
|
for i in range(len(s)):
|
2024-12-04 15:32:42 +08:00
|
|
|
|
if s[i] in [':', ':']:
|
2024-11-18 14:25:03 +08:00
|
|
|
|
prefixes.append(s[:i+1])
|
|
|
|
|
return prefixes
|
|
|
|
|
|
|
|
|
|
# 定义删除公共前缀的函数
|
2024-12-04 15:32:42 +08:00
|
|
|
|
def remove_common_prefixes(string_list, min_occurrence=3):
|
|
|
|
|
"""
|
|
|
|
|
删除列表中所有满足出现次数>= min_occurrence 的公共前缀。
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
string_list (list): 字符串列表。
|
|
|
|
|
min_occurrence (int): 前缀至少出现的次数。
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
list: 删除公共前缀后的字符串列表。
|
|
|
|
|
"""
|
|
|
|
|
if not string_list:
|
|
|
|
|
return string_list
|
|
|
|
|
|
2024-11-18 14:25:03 +08:00
|
|
|
|
# 构建前缀到字符串集合的映射
|
|
|
|
|
prefix_to_strings = {}
|
|
|
|
|
for s in string_list:
|
|
|
|
|
prefixes = get_prefixes(s)
|
|
|
|
|
unique_prefixes = set(prefixes)
|
|
|
|
|
for prefix in unique_prefixes:
|
|
|
|
|
if prefix not in prefix_to_strings:
|
|
|
|
|
prefix_to_strings[prefix] = set()
|
|
|
|
|
prefix_to_strings[prefix].add(s)
|
2024-12-04 15:32:42 +08:00
|
|
|
|
|
|
|
|
|
# 找出所有出现次数 >= min_occurrence 的前缀
|
|
|
|
|
qualifying_prefixes = [prefix for prefix, strings in prefix_to_strings.items() if len(strings) >= min_occurrence]
|
|
|
|
|
|
|
|
|
|
if not qualifying_prefixes:
|
|
|
|
|
# 没有满足条件的公共前缀,返回原列表
|
|
|
|
|
return string_list
|
|
|
|
|
|
|
|
|
|
# 为了确保较长的前缀先被匹配,按长度降序排序
|
|
|
|
|
qualifying_prefixes.sort(key=len, reverse=True)
|
|
|
|
|
|
|
|
|
|
# 对每个字符串,循环删除所有匹配的前缀
|
2024-11-18 14:25:03 +08:00
|
|
|
|
new_string_list = []
|
|
|
|
|
for s in string_list:
|
2024-12-04 15:32:42 +08:00
|
|
|
|
original_s = s
|
|
|
|
|
changed = True
|
|
|
|
|
while changed:
|
|
|
|
|
changed = False
|
|
|
|
|
for prefix in qualifying_prefixes:
|
|
|
|
|
if s.startswith(prefix):
|
|
|
|
|
s = s[len(prefix):]
|
|
|
|
|
changed = True
|
|
|
|
|
# 一旦删除一个前缀,重新开始检查,以处理可能的多个前缀
|
|
|
|
|
break
|
|
|
|
|
new_string_list.append(s)
|
2024-11-18 14:25:03 +08:00
|
|
|
|
return new_string_list
|
2024-12-03 17:37:02 +08:00
|
|
|
|
|
2024-11-18 14:25:03 +08:00
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
# 示例数据
|
|
|
|
|
sample_data = {
|
2024-12-16 13:56:28 +08:00
|
|
|
|
"★交通信号机": [
|
|
|
|
|
"应采用区域控制信号机,并应与广水市交通信号控制系统兼容,信号机能接入已有系统平台,实现联网优化功能。",
|
2024-11-18 14:25:03 +08:00
|
|
|
|
"1、控制功能:(1)区域协调控制:可对单个孤立交叉口、干道多个交叉口和关联性较强的交叉口群进行综合性地信号控制。",
|
|
|
|
|
"1、控制功能:(2)线性协调控制:可对干道多个相邻交叉口进行协调控制。",
|
|
|
|
|
"1、控制功能:(3)多时段控制:可根据交叉口的交通状况,将每天划分为多个不同的时段,每个时段配置不同的控制方案,能设置至少 10个时段、10种以上不同控制方案,能根据不同周日类型对方案进行调整。信号机能够根据内置时钟选择各个时段的控制方案,实现交叉口的合理控制。",
|
|
|
|
|
"2、采集功能:(1)信号机支持接入线圈、地磁、视频、微波、超声波检测器、RFID等多种检测方式。",
|
|
|
|
|
"2、采集功能:(2)信号机支持交通信息采集与统计,并支持交通流量共享。",
|
|
|
|
|
"3、运维功能:(1)信号机能够自动检测地磁故障,若故障,能够自动上传故障信息至监控中心。"
|
|
|
|
|
],
|
2024-12-16 13:56:28 +08:00
|
|
|
|
"▲高清视频抓拍像机": [
|
2024-11-18 14:25:03 +08:00
|
|
|
|
"1:摄像机:有效像素:≥900W像素",
|
|
|
|
|
"1:摄像机:最低照度:彩色≤0.001lx",
|
|
|
|
|
"1:摄像机:传感器类型:≥1英寸全局曝光 COMS/GMOS/GS COMS",
|
|
|
|
|
"1:摄像机:电子快门:至少满足 1/25s至 1/100,000s,可调",
|
|
|
|
|
"2:视频图像:视频压缩标准:至少支持 H264、H265等",
|
|
|
|
|
"2:视频图像:视频分辨率:≥4096×2160,向下可设置",
|
|
|
|
|
],
|
|
|
|
|
}
|
|
|
|
|
# 处理数据
|
|
|
|
|
result = all_postprocess(sample_data)
|
|
|
|
|
# 输出处理结果
|
|
|
|
|
print(json.dumps(result,ensure_ascii=False,indent=4))
|
|
|
|
|
|