121 lines
4.8 KiB
Python
121 lines
4.8 KiB
Python
|
import json
|
|||
|
import re
|
|||
|
|
|||
|
|
|||
|
# 定义查找与目标值匹配的键的函数
|
|||
|
def find_keys_by_value(target_value, json_data):
|
|||
|
matched_keys = [k for k, v in json_data.items() if v == target_value]
|
|||
|
if not matched_keys:
|
|||
|
matched_keys = [k for k, v in json_data.items() if isinstance(v, str) and v.startswith(target_value)]
|
|||
|
return matched_keys
|
|||
|
|
|||
|
|
|||
|
# 定义查找以特定前缀开始的键的函数
|
|||
|
def find_keys_with_prefix(key_prefix, json_data):
|
|||
|
subheadings = [k for k in json_data if k.startswith(key_prefix)]
|
|||
|
return subheadings
|
|||
|
|
|||
|
|
|||
|
# 从文件中读取JSON数据,并提取特定内容
|
|||
|
def extract_json(data, target_values):
|
|||
|
results = {}
|
|||
|
for target_value in target_values:
|
|||
|
matched_keys = find_keys_by_value(target_value, data)
|
|||
|
for key in matched_keys:
|
|||
|
key_and_subheadings = find_keys_with_prefix(key, data)
|
|||
|
for subkey in key_and_subheadings:
|
|||
|
if "." in subkey:
|
|||
|
parent_key = subkey.rsplit('.', 1)[0]
|
|||
|
top_level_key = parent_key.split('.')[0] + '.'
|
|||
|
# 特别处理定标相关的顶级键,确保不会重复添加其他键
|
|||
|
if target_value == "定标" and top_level_key not in results:
|
|||
|
results[top_level_key] = "定标"
|
|||
|
# 添加或更新父级键
|
|||
|
if parent_key not in results:
|
|||
|
if parent_key in data:
|
|||
|
results[parent_key] = data[parent_key]
|
|||
|
# 添加当前键
|
|||
|
results[subkey] = data[subkey]
|
|||
|
return results
|
|||
|
|
|||
|
|
|||
|
# 转换结构化的JSON数据
|
|||
|
def transform_json(data):
|
|||
|
result = {}
|
|||
|
temp = {0: result}
|
|||
|
|
|||
|
for key, value in data.items():
|
|||
|
match = re.match(r'(\d+)(?:\.(\d+))?(?:\.(\d+))?', key)
|
|||
|
if match:
|
|||
|
levels = [int(l) for l in match.groups() if l is not None]
|
|||
|
parent = temp[len(levels) - 1]
|
|||
|
|
|||
|
if len(levels) == len(match.groups()):
|
|||
|
if isinstance(parent, list):
|
|||
|
parent.append(value)
|
|||
|
else:
|
|||
|
# 对于没有 \n 的情况,使用首个空格分割的词作为键
|
|||
|
parent[value.split()[0]] = value
|
|||
|
else:
|
|||
|
new_key = value.split()[0]
|
|||
|
if '\n' in value and len(levels) == 2:
|
|||
|
# 处理换行情况并分割键和值
|
|||
|
new_key, new_value = value.split('\n', 1)
|
|||
|
new_key = new_key.strip()
|
|||
|
new_value = new_value.strip()
|
|||
|
# 确保父级是字典
|
|||
|
if isinstance(parent, list):
|
|||
|
if len(parent) == 0 or not isinstance(parent[-1], dict):
|
|||
|
parent.append({})
|
|||
|
parent[-1][new_key] = new_value
|
|||
|
else:
|
|||
|
parent[new_key] = new_value
|
|||
|
else:
|
|||
|
if isinstance(parent, list):
|
|||
|
if len(parent) == 0 or not isinstance(parent[-1], dict):
|
|||
|
parent.append({})
|
|||
|
parent = parent[-1]
|
|||
|
if new_key not in parent:
|
|||
|
parent[new_key] = []
|
|||
|
temp[len(levels)] = parent[new_key]
|
|||
|
|
|||
|
# 修改函数以移除只有一个元素的列表和空列表
|
|||
|
def remove_single_item_lists(node):
|
|||
|
if isinstance(node, dict):
|
|||
|
for key in list(node.keys()):
|
|||
|
node[key] = remove_single_item_lists(node[key])
|
|||
|
if isinstance(node[key], list) and not node[key]:
|
|||
|
node[key] = "" # 如果列表为空,转换为空字符串
|
|||
|
elif isinstance(node, list) and len(node) == 1:
|
|||
|
return remove_single_item_lists(node[0])
|
|||
|
return node
|
|||
|
|
|||
|
return remove_single_item_lists(result)
|
|||
|
|
|||
|
|
|||
|
# 读取JSON数据,提取内容,转换结构,并打印结果
|
|||
|
def extract_from_notice(file_path, type):
|
|||
|
if type == 1:
|
|||
|
target_values = ["投标文件", "投标"]
|
|||
|
elif type == 2:
|
|||
|
target_values = ["开标", "评标", "定标"]
|
|||
|
elif type == 3:
|
|||
|
target_values = ["重新招标、不再招标和终止招标", "重新招标", "不再招标", "终止招标"]
|
|||
|
else:
|
|||
|
raise ValueError("Invalid type specified. Use 1 for '开标, 评标, 定标' or 2 for '投标文件, 投标'.")
|
|||
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|||
|
data = json.load(file)
|
|||
|
extracted_data = extract_json(data, target_values) # 读取json
|
|||
|
transformed_data = transform_json(extracted_data)
|
|||
|
return transformed_data
|
|||
|
|
|||
|
|
|||
|
# 假设原始数据文件路径
|
|||
|
if __name__ == "__main__":
|
|||
|
file_path = 'clause2.json'
|
|||
|
try:
|
|||
|
res = extract_from_notice(file_path, 3) # 可以改变此处的 type 参数测试不同的场景
|
|||
|
print(res)
|
|||
|
except ValueError as e:
|
|||
|
print(e)
|