diff --git a/flask_app/main/test.py b/flask_app/main/test.py index 3507244..488f7b8 100644 --- a/flask_app/main/test.py +++ b/flask_app/main/test.py @@ -1,95 +1,261 @@ -# -*- encoding:utf-8 -*- +import json import re -def find_keys_by_value(target_value, json_data): - # 找到值等于目标值或以目标值开头的键 - matched_keys = [k for k, v in json_data.items() if v == target_value] - if not matched_keys: - matched_keys = [k for k, v in json_data.items() if isinstance(v, str) and v.startswith(target_value)] - return matched_keys -def find_keys_with_prefix(prefix, json_data): - # 只提取直接子项,比如 prefix 为 '7.2' 时只提取 '7.2.1', '7.2.2' 但不会提取 '7.3' - return [k for k in json_data.keys() if k.startswith(prefix) and k[len(prefix):].lstrip('.').isdigit()] +def compare_headings(current, new): + current_nums = [int(num) for num in current.split('.') if num.isdigit()] + new_nums = [int(num) for num in new.split('.') if num.isdigit()] -def extract_json(data, target_values): - results = {} + for c, n in zip(current_nums, new_nums): + if n > c: + return True + elif n < c: + return False - # 遍历所有目标值 - for target_value in target_values: - # 找到所有与目标值匹配的键 - matched_keys = find_keys_by_value(target_value, data) + return len(new_nums) > len(current_nums) - for key in matched_keys: - # 查找所有以该键为前缀的子键,限制只提取直接子项 - key_and_subheadings = find_keys_with_prefix(key, data) - for subkey in key_and_subheadings: - # 如果子键有多级结构(比如 '7.2.1'),并且是直接子项 - if "." in subkey: - parent_key = subkey.rsplit('.', 1)[0] - top_level_key = parent_key.split('.')[0] + '.' +def should_add_newline(content, keywords, max_length=20): + content_str = ''.join(content).strip() + return any(keyword in content_str for keyword in keywords) or len(content_str) <= max_length - # 确保顶级键不会重复添加 - if top_level_key not in results: - results[top_level_key] = data[top_level_key] - # 添加或更新父级键 - if parent_key not in results: - if parent_key in data: - results[parent_key] = data[parent_key] +def handle_content_append(current_content, line_content, append_newline, keywords): + if append_newline: + if should_add_newline(current_content, keywords): + current_content.append('\n') + append_newline = False + current_content.append(line_content) + return append_newline - # 添加当前子键和它的值 - if subkey in data: - results[subkey] = data[subkey] - return results +# def parse_text_by_heading(text): +# keywords = ['包含', '以下'] +# data = {} +# current_key = None +# current_content = [] +# append_newline = False +# skip_subheadings = False +# +# lines = text.split('\n') +# for i, line in enumerate(lines): +# line_stripped = line.strip().replace('.', '.') +# # print(line_stripped) +# # 匹配二级、三级标题形如 '1.1'、'2.2.3' 并确保其前后没有字母或括号 +# match = re.match(r'^(?