zbparse/flask_app/main/根据条款号整合json.py
2024-08-29 16:37:09 +08:00

134 lines
5.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
def load_json(file_path):
"""
加载JSON文件并统一其中的括号为全角括号。
"""
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
return standardize_brackets_in_json(data)
def standardize_brackets_in_json(data):
"""
递归地处理JSON数据将所有文本中的半角括号转换为全角括号。
"""
if isinstance(data, dict):
return {k: standardize_brackets_in_json(v) for k, v in data.items()}
elif isinstance(data, list):
return [standardize_brackets_in_json(element) for element in data]
elif isinstance(data, str):
return standardize_brackets(data)
else:
return data
def convert_dict_to_str(d):
if isinstance(d, dict):
return "\n".join(f"{k}: {v}" for k, v in d.items())
return str(d)
def find_entries_in_jsons(entries, json_primary, json_secondary):
results = {}
for entry in entries:
key, value = next(iter(entry.items()))
combined_value = []
# 先尝试在json_primary中寻找如果找不到再在json_secondary中查找
found_in_primary = process_json_with_subentries(json_primary, value, combined_value)
if not found_in_primary:
process_json_with_subentries(json_secondary, value, combined_value)
if combined_value:
results[key] = "\n".join(combined_value)
return results
def process_json_with_subentries(json_data, value, combined_value):
"""
处理JSON数据寻找指定的条目考虑全角和半角括号。
"""
value = standardize_brackets(value)
if "" in value and "" in value:
base_key, subentry_key = value.split("")
subentry_key = "" + subentry_key
content = json_data.get(base_key.strip())
if content:
if isinstance(content, str):
extracted_content = extract_specific_subentry(content, subentry_key)
if extracted_content:
combined_value.append(extracted_content)
return True
else:
return False
else:
return process_json(json_data, value, combined_value)
def process_json(json_data, value, combined_value):
found_subentries = check_and_collect_subentries(json_data, value, combined_value)
if not found_subentries:
content = json_data.get(value, "")
if content:
combined_value.append(get_values_only(content))
return True
return found_subentries
def check_and_collect_subentries(json_data, value, combined_value):
found_subentries = False
subentry_index = 1
for subkey in json_data:
if subkey.startswith(value + "."):
content = json_data[subkey]
combined_value.append(f"{subentry_index}. {get_values_only(content)}")
subentry_index += 1
found_subentries = True
return found_subentries
def extract_specific_subentry(content, subentry_key):
"""
提取指定的子条目文本,考虑全角和半角括号。
"""
subentry_index = subentry_key.replace("", "").replace("", "")
try:
idx = int(subentry_index)
bracket_pattern = f"{idx}"
parts = content.split(bracket_pattern)
if len(parts) > 1:
next_bracket_pattern = f"{idx+1}"
next_part = parts[1].split(next_bracket_pattern, 1)[0]
return next_part.strip()
except ValueError:
return ""
return ""
def get_values_only(content):
if isinstance(content, dict):
return " / ".join(content.values())
return content
def standardize_brackets(value):
"""
将输入中的所有半角括号转换为全角括号。
"""
return value.replace('(', '').replace(')', '')
def process_and_merge_entries(entries_with_numbers, primary_json_path, secondary_json_path):
primary_json_data = load_json(primary_json_path)
secondary_json_data = load_json(secondary_json_path)
combined_results = find_entries_in_jsons(entries_with_numbers, primary_json_data, secondary_json_data)
return combined_results
if __name__ == "__main__":
# Hypothetical entries and file paths for testing
# entries_with_numbers = [{'形式评审标准.投标文件签字盖章': '3.7.3(3)'}, {'形式评审标准.多标段投标': '10.1'}, {'形式评审标准.“技术暗标”': '3.7.4(5)'}, {'响应性评审标准.投标内容': '1.3.1'}, {'响应性评审标准.工期': '1.3.2'}, {'响应性评审标准.工程质量': '1.3.3'}, {'响应性评审标准.投标有效期': '3.3.1'}, {'响应性评审标准.投标保证金': '3.4.1'}, {'响应性评审标准.分包计划': '1.11'}]
entries_with_numbers=[{'xxx': '3.7.45'}]
primary_json_path = 'C:\\Users\\Administrator\\Desktop\\招标文件\\output1\\truncate_output3.json'
secondary_json_path = 'C:\\Users\\Administrator\\Desktop\\招标文件\\output1\\clause3.json'
# Since this is just a test block, make sure these paths point to actual JSON files with the appropriate structure
try:
combined_results = process_and_merge_entries(entries_with_numbers, primary_json_path, secondary_json_path)
print("Combined Results:", json.dumps(combined_results, indent=4, ensure_ascii=False))
except FileNotFoundError:
print("One or more JSON files were not found. Please check the file paths.")
except json.JSONDecodeError:
print("One or more files could not be decoded. Please check the file content.")