2024-09-20 18:01:48 +08:00

101 lines
4.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import re
#这个字典可能有嵌套,你需要遍历里面的键名,对键名作判断,而不是键值,具体是这样的:如果处于同一层级的键的数量>1并且键名全由数字或点号组成。那么就将这些序号键名全部删除重新组织成一个字典格式的数据你可以考虑用字符串列表来保持部分平级的数据
#对于同级的键,如果数量>1且键名都统一那么将键名去掉用列表保持它们的键值
#对于同一个字典中,可能存在若干键值对,若它们的键值都是""或者"/" 你就将它们的键值删去,它们的键名用字符串列表保存
def is_numeric_key(key):
# 这个正则表达式匹配由数字、点、括号中的数字或单个字母(小写或大写)组成的字符串,
# 字母后跟数字,或数字后跟字母,单个字母后跟点,但不能是字母-数字-字母的组合
pattern = r'^[\d.]+$|^\(\d+\)$|^\d+$|^[a-zA-Z]$|^[a-zA-Z]\d+$|^\d+[a-zA-Z]$|^[a-zA-Z]\.$'
return re.match(pattern, key) is not None
#TODO:如果键值中存在数字就不行
#zbtest20也有问题
def contains_number_or_index(key, value):
return (isinstance(value, (int, float)) or
(isinstance(value, str) and value.isdigit()) or
'序号' in key or
(isinstance(value, str) and re.search(r'\d+', value)))
def preprocess_dict(data):
if isinstance(data, dict):
if len(data) > 1:
# 检查是否所有值都是 "" 或 "/"
if all(v == "" or v == "/" for v in data.values()):
return list(data.keys())
else:
processed = {}
for k, v in data.items():
if not contains_number_or_index(k, v):
processed_v = preprocess_dict(v)
if processed_v != "": # 只添加非空值
processed[k] = processed_v
return processed
else:
return {k: preprocess_dict(v) for k, v in data.items()}
elif isinstance(data, list):
return [preprocess_dict(item) for item in data]
else:
return data
def process_dict(data):
if not isinstance(data, dict):
return data
result = {}
numeric_keys = []
non_numeric_keys = {}
for key, value in data.items():
if is_numeric_key(key):
numeric_keys.append((key, value))
else:
non_numeric_keys[key] = value
# 处理数字键,不再要求数量>1
if numeric_keys:
result['items'] = [process_dict(item[1]) for item in sorted(numeric_keys)]
# 处理非数字键
for key, value in non_numeric_keys.items():
if isinstance(value, list) and len(value) > 1 and all(isinstance(item, dict) and len(item) == 1 for item in value):
common_key = next(iter(value[0].keys()))
if all(common_key in item and len(item) == 1 for item in value):
result[key] = [process_dict(item[common_key]) for item in value]
else:
result[key] = [process_dict(item) for item in value]
else:
result[key] = process_dict(value)
# 如果结果只包含'items'键,直接返回其值
if len(result) == 1 and 'items' in result:
return result['items']
return result
# 测试代码
input_data = {
"符合性审查": {
"说明": "评标委员会应当对符合资格的投标人的投标文件进行符合性审查,以确定其是否满足招标文件的实质性要求。",
"审查标准": [
{
"序号": 9,
"内容": "未按要求提供加盖公章及签字(签章)的;"
},
{
"序号": 1,
"内容": "符合招标文件第二章“投标人须知”中 39"
},
{
"序号": 2,
"内容": "依据财库[2019]9号文的规定招标文件采购清单中为“节能产品”的货物未提供国家确定的认证机构出具的节能产品认证证书的"
},
]
}
}
pred=preprocess_dict(input_data)
print(json.dumps(pred, ensure_ascii=False, indent=4))
# processed_data = process_dict(pred)
# print(json.dumps(processed_data, ensure_ascii=False, indent=4))