zbparse/flask_app/main/根据条款号整合json.py

162 lines
7.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import re
def load_json(file_path):
"""
加载JSON文件并统一其中的括号为全角括号。
"""
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f) #字典
return standardize_brackets_in_json(data)
def standardize_brackets_in_json(data):
"""
递归地处理JSON数据将所有文本中的半角括号转换为全角括号。
"""
if isinstance(data, dict):
return {k: standardize_brackets_in_json(v) for k, v in data.items()}
elif isinstance(data, list):
return [standardize_brackets_in_json(element) for element in data]
elif isinstance(data, str):
return standardize_brackets(data)
else:
return data
def convert_dict_to_str(d):
if isinstance(d, dict):
return "\n".join(f"{k}: {v}" for k, v in d.items())
return str(d)
def extarct_normal(json_data, value, combined_value):
found_subentries = check_and_collect_subentries(json_data, value, combined_value)
if not found_subentries: #若无子条目,直接查找
content = json_data.get(value, "") #从一个字典 json_data 中获取与键名 value 相关联的值,默认值为""
if content:
combined_value.append(get_values_only(content))
return True
return found_subentries
#用于查找和处理由主条目派生的子条目 eg如果 value 是 "1.1",它将查找所有像 "1.1.1", "1.1.2" 等以 "1.1." 开头的键,不会匹配仅为 "1.1" 的键。
def check_and_collect_subentries(json_data, value, combined_value):
found_subentries = False
subentry_index = 1
for subkey in json_data:
if subkey.startswith(value + "."):
content = json_data[subkey]
combined_value.append(f"{subentry_index}. {get_values_only(content)}")
subentry_index += 1
found_subentries = True
return found_subentries
#针对于有子序号()的情况
def extract_specific_subentry(content, subentry_key):
"""
提取指定的子条目文本,考虑全角和半角括号。
"""
subentry_index = subentry_key.replace("", "").replace("", "")
try:
idx = int(subentry_index)
bracket_pattern = f"{idx}"
parts = content.split(bracket_pattern)
if len(parts) > 1:
next_bracket_pattern = f"{idx+1}"
next_part = parts[1].split(next_bracket_pattern, 1)[0]
return next_part.strip()
except ValueError:
return ""
return ""
def extract_content_after_special_chars(content):
if not content:
return content
"""
提取特定符号后的内容,直到遇到结束符号或内容末尾。
"""
# 定义搜索特殊字符的正则表达式
pattern = r'[\x01\x02☑√团]([^□]*)'
match = re.search(pattern, content)
if match:
# 如果找到匹配,返回从特殊字符之后到下一个□或到内容末尾的部分
return match.group(1).strip() # 去除多余空白字符
# 如果没有找到特殊字符,返回原始内容
return content
def get_values_only(content):
if isinstance(content, dict):
# 递归处理字典中的每个值,并确保转换为字符串后再连接
content = " / ".join(get_values_only(value) for value in content.values())
return extract_content_after_special_chars(content)
def standardize_brackets(value):
"""
将输入中的所有半角括号转换为全角括号。
"""
return value.replace('(', '').replace(')', '')
def process_json_with_subentries(json_data, value, combined_value):
"""
处理JSON数据寻找指定的条目考虑全角和半角括号。
"""
value = standardize_brackets(value) #将1.11(1)->1.111
if "" in value and "" in value: #存在()的情况
first_content=get_values_only(json_data.get(value))
if first_content:
combined_value.append(first_content)
return True
base_key, subentry_key = value.split("") #base_key:1.11 subentry_key:1
subentry_key = "" + subentry_key
content = json_data.get(base_key.strip())
if content:
if isinstance(content, str):
extracted_content = extract_specific_subentry(content, subentry_key)
if extracted_content:
combined_value.append(extracted_content)
return True
else:
return False
else:
return extarct_normal(json_data, value, combined_value)
def find_entries_in_jsons(entries, json_primary, json_secondary):
results = {}
for entry in entries:
key, value = next(iter(entry.items()))
combined_value = []
# 先尝试在json_primary中寻找如果找不到再在json_secondary中查找
found_in_primary = process_json_with_subentries(json_primary, value, combined_value)
if not found_in_primary:
process_json_with_subentries(json_secondary, value, combined_value)
if combined_value:
results[key] = "\n".join(combined_value)
return results
def process_and_merge_entries(entries_with_numbers, primary_json_path, secondary_json_path):
primary_json_data = load_json(primary_json_path)
secondary_json_data = load_json(secondary_json_path)
combined_results = find_entries_in_jsons(entries_with_numbers, primary_json_data, secondary_json_data)
return combined_results
def process_and_merge2(entries_with_numbers,json_path):
json_data=load_json(json_path)
combined_results=find_entries_in_jsons(entries_with_numbers,json_data,{})
return combined_results
#3.4.1 投标保证金,提取截止有问题。
if __name__ == "__main__":
# Hypothetical entries and file paths for testing
# entries_with_numbers = [{'形式评审标准.投标文件签字盖章': '3.7.3(3)'}, {'形式评审标准.多标段投标': '10.1'}, {'形式评审标准.“技术暗标”': '3.7.4(5)'}, {'响应性评审标准.投标内容': '1.3.1'}, {'响应性评审标准.工期': '1.3.2'}, {'响应性评审标准.工程质量': '1.3.3'}, {'响应性评审标准.投标有效期': '3.3.1'}, {'响应性评审标准.投标保证金': '3.4.1'}, {'响应性评审标准.分包计划': '1.11'}]
# entries_with_numbers=[{'xxx': '3.7.33'},{'xxssx': '10.1'},{'xsssxx': '3.7.45'},{'a': '1.3.1'},{'b': '1.3.2'},{'c': '1.3.3'},{'d': '3.3.1'}]
entries_with_numbers = [ {'xxssx': '1.3.1'}
]
primary_json_path = 'D:\\flask_project\\flask_app\\static\\output\\c02a12c2-6f7b-49dc-b97f-c3d740c96c21\\truncate_output.json'
secondary_json_path = 'D:\\flask_project\\flask_app\\static\\output\\c02a12c2-6f7b-49dc-b97f-c3d740c96c21\\clause1.json'
# Since this is just a test block, make sure these paths point to actual JSON files with the appropriate structure
try:
combined_results = process_and_merge_entries(entries_with_numbers, primary_json_path, secondary_json_path)
print("Combined Results:", json.dumps(combined_results, indent=4, ensure_ascii=False))
except FileNotFoundError:
print("One or more JSON files were not found. Please check the file paths.")
except json.JSONDecodeError:
print("One or more files could not be decoded. Please check the file content.")