zbparse/flask_app/main/根据条款号整合json.py

134 lines
5.5 KiB
Python
Raw Normal View History

2024-08-29 16:37:09 +08:00
import json
def load_json(file_path):
"""
加载JSON文件并统一其中的括号为全角括号
"""
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
return standardize_brackets_in_json(data)
def standardize_brackets_in_json(data):
"""
递归地处理JSON数据将所有文本中的半角括号转换为全角括号
"""
if isinstance(data, dict):
return {k: standardize_brackets_in_json(v) for k, v in data.items()}
elif isinstance(data, list):
return [standardize_brackets_in_json(element) for element in data]
elif isinstance(data, str):
return standardize_brackets(data)
else:
return data
def convert_dict_to_str(d):
if isinstance(d, dict):
return "\n".join(f"{k}: {v}" for k, v in d.items())
return str(d)
def find_entries_in_jsons(entries, json_primary, json_secondary):
results = {}
for entry in entries:
key, value = next(iter(entry.items()))
combined_value = []
# 先尝试在json_primary中寻找如果找不到再在json_secondary中查找
found_in_primary = process_json_with_subentries(json_primary, value, combined_value)
if not found_in_primary:
process_json_with_subentries(json_secondary, value, combined_value)
if combined_value:
results[key] = "\n".join(combined_value)
return results
def process_json_with_subentries(json_data, value, combined_value):
"""
处理JSON数据寻找指定的条目考虑全角和半角括号
"""
value = standardize_brackets(value)
if "" in value and "" in value:
base_key, subentry_key = value.split("")
subentry_key = "" + subentry_key
content = json_data.get(base_key.strip())
if content:
if isinstance(content, str):
extracted_content = extract_specific_subentry(content, subentry_key)
if extracted_content:
combined_value.append(extracted_content)
return True
else:
return False
else:
return process_json(json_data, value, combined_value)
def process_json(json_data, value, combined_value):
found_subentries = check_and_collect_subentries(json_data, value, combined_value)
if not found_subentries:
content = json_data.get(value, "")
if content:
combined_value.append(get_values_only(content))
return True
return found_subentries
def check_and_collect_subentries(json_data, value, combined_value):
found_subentries = False
subentry_index = 1
for subkey in json_data:
if subkey.startswith(value + "."):
content = json_data[subkey]
combined_value.append(f"{subentry_index}. {get_values_only(content)}")
subentry_index += 1
found_subentries = True
return found_subentries
def extract_specific_subentry(content, subentry_key):
"""
提取指定的子条目文本考虑全角和半角括号
"""
subentry_index = subentry_key.replace("", "").replace("", "")
try:
idx = int(subentry_index)
bracket_pattern = f"{idx}"
parts = content.split(bracket_pattern)
if len(parts) > 1:
next_bracket_pattern = f"{idx+1}"
next_part = parts[1].split(next_bracket_pattern, 1)[0]
return next_part.strip()
except ValueError:
return ""
return ""
def get_values_only(content):
if isinstance(content, dict):
return " / ".join(content.values())
return content
def standardize_brackets(value):
"""
将输入中的所有半角括号转换为全角括号
"""
return value.replace('(', '').replace(')', '')
def process_and_merge_entries(entries_with_numbers, primary_json_path, secondary_json_path):
primary_json_data = load_json(primary_json_path)
secondary_json_data = load_json(secondary_json_path)
combined_results = find_entries_in_jsons(entries_with_numbers, primary_json_data, secondary_json_data)
return combined_results
if __name__ == "__main__":
# Hypothetical entries and file paths for testing
# entries_with_numbers = [{'形式评审标准.投标文件签字盖章': '3.7.3(3)'}, {'形式评审标准.多标段投标': '10.1'}, {'形式评审标准.“技术暗标”': '3.7.4(5)'}, {'响应性评审标准.投标内容': '1.3.1'}, {'响应性评审标准.工期': '1.3.2'}, {'响应性评审标准.工程质量': '1.3.3'}, {'响应性评审标准.投标有效期': '3.3.1'}, {'响应性评审标准.投标保证金': '3.4.1'}, {'响应性评审标准.分包计划': '1.11'}]
entries_with_numbers=[{'xxx': '3.7.45'}]
primary_json_path = 'C:\\Users\\Administrator\\Desktop\\招标文件\\output1\\truncate_output3.json'
secondary_json_path = 'C:\\Users\\Administrator\\Desktop\\招标文件\\output1\\clause3.json'
# Since this is just a test block, make sure these paths point to actual JSON files with the appropriate structure
try:
combined_results = process_and_merge_entries(entries_with_numbers, primary_json_path, secondary_json_path)
print("Combined Results:", json.dumps(combined_results, indent=4, ensure_ascii=False))
except FileNotFoundError:
print("One or more JSON files were not found. Please check the file paths.")
except json.JSONDecodeError:
print("One or more files could not be decoded. Please check the file content.")