2024-08-29 16:37:09 +08:00
|
|
|
|
import json
|
2024-09-03 18:04:05 +08:00
|
|
|
|
import re
|
|
|
|
|
|
2024-08-29 16:37:09 +08:00
|
|
|
|
def load_json(file_path):
|
|
|
|
|
"""
|
|
|
|
|
加载JSON文件,并统一其中的括号为全角括号。
|
|
|
|
|
"""
|
|
|
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
2024-09-06 15:50:52 +08:00
|
|
|
|
data = json.load(f) #字典
|
2024-08-29 16:37:09 +08:00
|
|
|
|
return standardize_brackets_in_json(data)
|
|
|
|
|
|
|
|
|
|
def standardize_brackets_in_json(data):
|
|
|
|
|
"""
|
|
|
|
|
递归地处理JSON数据,将所有文本中的半角括号转换为全角括号。
|
|
|
|
|
"""
|
|
|
|
|
if isinstance(data, dict):
|
|
|
|
|
return {k: standardize_brackets_in_json(v) for k, v in data.items()}
|
|
|
|
|
elif isinstance(data, list):
|
|
|
|
|
return [standardize_brackets_in_json(element) for element in data]
|
|
|
|
|
elif isinstance(data, str):
|
|
|
|
|
return standardize_brackets(data)
|
|
|
|
|
else:
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
def convert_dict_to_str(d):
|
|
|
|
|
if isinstance(d, dict):
|
|
|
|
|
return "\n".join(f"{k}: {v}" for k, v in d.items())
|
|
|
|
|
return str(d)
|
|
|
|
|
|
2024-09-03 18:04:05 +08:00
|
|
|
|
def extarct_normal(json_data, value, combined_value):
|
2024-08-29 16:37:09 +08:00
|
|
|
|
found_subentries = check_and_collect_subentries(json_data, value, combined_value)
|
2024-09-03 18:04:05 +08:00
|
|
|
|
if not found_subentries: #若无子条目,直接查找
|
|
|
|
|
content = json_data.get(value, "") #从一个字典 json_data 中获取与键名 value 相关联的值,默认值为""
|
2024-08-29 16:37:09 +08:00
|
|
|
|
if content:
|
|
|
|
|
combined_value.append(get_values_only(content))
|
|
|
|
|
return True
|
|
|
|
|
return found_subentries
|
|
|
|
|
|
2024-09-03 18:04:05 +08:00
|
|
|
|
#用于查找和处理由主条目派生的子条目 eg:如果 value 是 "1.1",它将查找所有像 "1.1.1", "1.1.2" 等以 "1.1." 开头的键,不会匹配仅为 "1.1" 的键。
|
2024-08-29 16:37:09 +08:00
|
|
|
|
def check_and_collect_subentries(json_data, value, combined_value):
|
|
|
|
|
found_subentries = False
|
|
|
|
|
subentry_index = 1
|
|
|
|
|
for subkey in json_data:
|
|
|
|
|
if subkey.startswith(value + "."):
|
|
|
|
|
content = json_data[subkey]
|
|
|
|
|
combined_value.append(f"{subentry_index}. {get_values_only(content)}")
|
|
|
|
|
subentry_index += 1
|
|
|
|
|
found_subentries = True
|
|
|
|
|
return found_subentries
|
|
|
|
|
|
2024-09-03 18:04:05 +08:00
|
|
|
|
#针对于有子序号()的情况
|
2024-08-29 16:37:09 +08:00
|
|
|
|
def extract_specific_subentry(content, subentry_key):
|
|
|
|
|
"""
|
|
|
|
|
提取指定的子条目文本,考虑全角和半角括号。
|
|
|
|
|
"""
|
|
|
|
|
subentry_index = subentry_key.replace("(", "").replace(")", "")
|
|
|
|
|
try:
|
|
|
|
|
idx = int(subentry_index)
|
|
|
|
|
bracket_pattern = f"({idx})"
|
|
|
|
|
parts = content.split(bracket_pattern)
|
|
|
|
|
if len(parts) > 1:
|
|
|
|
|
next_bracket_pattern = f"({idx+1})"
|
|
|
|
|
next_part = parts[1].split(next_bracket_pattern, 1)[0]
|
|
|
|
|
return next_part.strip()
|
|
|
|
|
except ValueError:
|
|
|
|
|
return ""
|
|
|
|
|
return ""
|
|
|
|
|
|
2024-09-03 18:04:05 +08:00
|
|
|
|
def extract_content_after_special_chars(content):
|
2024-09-05 18:00:40 +08:00
|
|
|
|
if not content:
|
|
|
|
|
return content
|
2024-09-03 18:04:05 +08:00
|
|
|
|
"""
|
|
|
|
|
提取特定符号后的内容,直到遇到结束符号或内容末尾。
|
|
|
|
|
"""
|
|
|
|
|
# 定义搜索特殊字符的正则表达式
|
|
|
|
|
pattern = r'[\x01\x02☑√团]([^□]*)'
|
|
|
|
|
match = re.search(pattern, content)
|
|
|
|
|
if match:
|
|
|
|
|
# 如果找到匹配,返回从特殊字符之后到下一个□或到内容末尾的部分
|
|
|
|
|
return match.group(1).strip() # 去除多余空白字符
|
|
|
|
|
# 如果没有找到特殊字符,返回原始内容
|
|
|
|
|
return content
|
2024-09-05 18:00:40 +08:00
|
|
|
|
|
|
|
|
|
def get_values_only(content):
|
|
|
|
|
if isinstance(content, dict):
|
|
|
|
|
# 如果内容是字典,首先将字典的值转换为字符串
|
|
|
|
|
content = " / ".join(content.values())
|
|
|
|
|
# 检查并处理特殊字符
|
|
|
|
|
return extract_content_after_special_chars(content)
|
2024-08-29 16:37:09 +08:00
|
|
|
|
def standardize_brackets(value):
|
|
|
|
|
"""
|
|
|
|
|
将输入中的所有半角括号转换为全角括号。
|
|
|
|
|
"""
|
|
|
|
|
return value.replace('(', '(').replace(')', ')')
|
|
|
|
|
|
2024-09-05 18:00:40 +08:00
|
|
|
|
def process_json_with_subentries(json_data, value, combined_value):
|
|
|
|
|
"""
|
|
|
|
|
处理JSON数据,寻找指定的条目,考虑全角和半角括号。
|
|
|
|
|
"""
|
|
|
|
|
value = standardize_brackets(value) #将1.11(1)->1.11(1)
|
|
|
|
|
if "(" in value and ")" in value: #存在()的情况
|
|
|
|
|
first_content=get_values_only(json_data.get(value))
|
|
|
|
|
if first_content:
|
|
|
|
|
combined_value.append(first_content)
|
|
|
|
|
return True
|
|
|
|
|
base_key, subentry_key = value.split("(") #base_key:1.11 subentry_key:(1)
|
|
|
|
|
subentry_key = "(" + subentry_key
|
|
|
|
|
content = json_data.get(base_key.strip())
|
|
|
|
|
if content:
|
|
|
|
|
if isinstance(content, str):
|
|
|
|
|
extracted_content = extract_specific_subentry(content, subentry_key)
|
|
|
|
|
if extracted_content:
|
|
|
|
|
combined_value.append(extracted_content)
|
|
|
|
|
return True
|
|
|
|
|
else:
|
|
|
|
|
return False
|
|
|
|
|
else:
|
|
|
|
|
return extarct_normal(json_data, value, combined_value)
|
|
|
|
|
|
|
|
|
|
def find_entries_in_jsons(entries, json_primary, json_secondary):
|
|
|
|
|
results = {}
|
|
|
|
|
for entry in entries:
|
|
|
|
|
key, value = next(iter(entry.items()))
|
|
|
|
|
combined_value = []
|
|
|
|
|
# 先尝试在json_primary中寻找,如果找不到再在json_secondary中查找
|
|
|
|
|
found_in_primary = process_json_with_subentries(json_primary, value, combined_value)
|
|
|
|
|
if not found_in_primary:
|
|
|
|
|
process_json_with_subentries(json_secondary, value, combined_value)
|
|
|
|
|
if combined_value:
|
|
|
|
|
results[key] = "\n".join(combined_value)
|
|
|
|
|
return results
|
2024-08-29 16:37:09 +08:00
|
|
|
|
def process_and_merge_entries(entries_with_numbers, primary_json_path, secondary_json_path):
|
|
|
|
|
primary_json_data = load_json(primary_json_path)
|
|
|
|
|
secondary_json_data = load_json(secondary_json_path)
|
|
|
|
|
combined_results = find_entries_in_jsons(entries_with_numbers, primary_json_data, secondary_json_data)
|
|
|
|
|
return combined_results
|
|
|
|
|
|
2024-09-06 15:50:52 +08:00
|
|
|
|
def process_and_merge2(entries_with_numbers,json_path):
|
|
|
|
|
json_data=load_json(json_path)
|
|
|
|
|
combined_results=find_entries_in_jsons(entries_with_numbers,json_data,{})
|
|
|
|
|
return combined_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#3.4.1 投标保证金,提取截止有问题。
|
2024-08-29 16:37:09 +08:00
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
# Hypothetical entries and file paths for testing
|
|
|
|
|
# entries_with_numbers = [{'形式评审标准.投标文件签字盖章': '3.7.3(3)'}, {'形式评审标准.多标段投标': '10.1'}, {'形式评审标准.“技术暗标”': '3.7.4(5)'}, {'响应性评审标准.投标内容': '1.3.1'}, {'响应性评审标准.工期': '1.3.2'}, {'响应性评审标准.工程质量': '1.3.3'}, {'响应性评审标准.投标有效期': '3.3.1'}, {'响应性评审标准.投标保证金': '3.4.1'}, {'响应性评审标准.分包计划': '1.11'}]
|
2024-09-06 15:50:52 +08:00
|
|
|
|
entries_with_numbers=[{'xxx': '4.2.3'}]
|
2024-09-05 18:00:40 +08:00
|
|
|
|
primary_json_path = 'C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\truncate_output.json'
|
|
|
|
|
secondary_json_path = 'C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\clause.json'
|
2024-08-29 16:37:09 +08:00
|
|
|
|
|
|
|
|
|
# Since this is just a test block, make sure these paths point to actual JSON files with the appropriate structure
|
|
|
|
|
try:
|
|
|
|
|
combined_results = process_and_merge_entries(entries_with_numbers, primary_json_path, secondary_json_path)
|
|
|
|
|
print("Combined Results:", json.dumps(combined_results, indent=4, ensure_ascii=False))
|
|
|
|
|
except FileNotFoundError:
|
|
|
|
|
print("One or more JSON files were not found. Please check the file paths.")
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
print("One or more files could not be decoded. Please check the file content.")
|