11.18 技术参数修改
This commit is contained in:
parent
d77dde3f6e
commit
a7abff0f8c
@ -7,6 +7,8 @@ from flask_app.general.format_change import pdf2docx, docx2pdf
|
||||
from flask_app.general.通义千问long import upload_file
|
||||
from flask_app.货物标.截取pdf货物标版 import truncate_pdf_main
|
||||
from flask_app.货物标.技术参数要求提取 import get_technical_requirements
|
||||
from flask_app.货物标.技术参数要求提取后处理函数 import extract_matching_keys
|
||||
|
||||
|
||||
def get_global_logger(unique_id):
|
||||
if unique_id is None:
|
||||
@ -16,80 +18,6 @@ def get_global_logger(unique_id):
|
||||
|
||||
|
||||
logger = None
|
||||
def extract_matching_keys(data_dict, good_list):
|
||||
"""
|
||||
递归遍历data_dict,查找good_list中存在的键(完全匹配或以good_list中的键开头,后跟“-数字”),并将匹配的键及其值添加到结果字典中。
|
||||
对于重复的键名,添加后缀 -a, -b, -c 等以确保唯一性。
|
||||
|
||||
参数:
|
||||
- data_dict (dict): 要遍历的嵌套字典。
|
||||
- good_list (list): 包含要查找的键的列表。
|
||||
|
||||
返回:
|
||||
- dict: 包含所有匹配键及其值的字典。
|
||||
"""
|
||||
result = {}
|
||||
key_count = {} # 用于统计每个匹配键的出现次数
|
||||
|
||||
# 预编译正则模式以提高效率
|
||||
patterns = [re.compile(r'^' + re.escape(g) + r'(?:-\d+)?$') for g in good_list] #考虑同一系统下有同门设备,如交换机
|
||||
|
||||
def matches(key):
|
||||
return any(pattern.match(key) for pattern in patterns)
|
||||
|
||||
# 第一次遍历:统计每个匹配键的出现次数
|
||||
def first_pass(current_dict):
|
||||
if isinstance(current_dict, dict):
|
||||
for key, value in current_dict.items():
|
||||
if matches(key):
|
||||
key_count[key] = key_count.get(key, 0) + 1
|
||||
first_pass(value)
|
||||
elif isinstance(current_dict, list):
|
||||
for item in current_dict:
|
||||
first_pass(item)
|
||||
|
||||
first_pass(data_dict)
|
||||
|
||||
# 初始化用于跟踪每个重复键当前使用的后缀编号
|
||||
suffix_map = {key: 0 for key, count in key_count.items() if count > 1}
|
||||
|
||||
def get_suffix(count):
|
||||
"""
|
||||
根据计数获取字母后缀,例如: 考虑不同系统下都有同门设备,如交换机
|
||||
1 -> '-a'
|
||||
2 -> '-b'
|
||||
...
|
||||
26 -> '-z'
|
||||
27 -> '-aa'
|
||||
"""
|
||||
letters = string.ascii_lowercase
|
||||
suffix = ''
|
||||
while count > 0:
|
||||
count, remainder = divmod(count - 1, 26)
|
||||
suffix = letters[remainder] + suffix
|
||||
return '-' + suffix
|
||||
|
||||
# 第二次遍历:添加后缀并构建结果字典
|
||||
def recurse(current_dict):
|
||||
if isinstance(current_dict, dict):
|
||||
for key, value in current_dict.items():
|
||||
if matches(key):
|
||||
if key_count.get(key, 0) > 1:
|
||||
suffix_map[key] += 1
|
||||
suffix = get_suffix(suffix_map[key])
|
||||
new_key = f"{key}{suffix}"
|
||||
else:
|
||||
new_key = key
|
||||
result[new_key] = value
|
||||
recurse(value)
|
||||
elif isinstance(current_dict, list):
|
||||
for item in current_dict:
|
||||
recurse(item)
|
||||
# 如果current_dict不是dict或list,则无需进一步处理
|
||||
|
||||
recurse(data_dict)
|
||||
return result
|
||||
|
||||
def get_technical_requirements_main(file_path,file_type,unique_id,output_folder):
|
||||
global logger
|
||||
logger = get_global_logger(unique_id)
|
||||
|
@ -1,4 +1,103 @@
|
||||
import json
|
||||
import re
|
||||
import string
|
||||
from collections import OrderedDict
|
||||
|
||||
#传输技术参数需求的时候后处理
|
||||
def extract_matching_keys(data_dict, good_list, special_keys=None):
|
||||
"""
|
||||
递归遍历data_dict,查找good_list中存在的键(完全匹配或以good_list中的键开头,后跟“-数字”),并将匹配的键及其值添加到结果字典中。
|
||||
对于重复的键名,添加后缀 -a, -b, -c 等以确保唯一性。
|
||||
对于特殊键(如"系统功能"),在键名前添加其父键名和'的',例如:"交通信号的系统功能"。
|
||||
|
||||
参数:
|
||||
- data_dict (dict): 要遍历的嵌套字典。
|
||||
- good_list (list): 包含要查找的键的列表。
|
||||
- special_keys (list): 需要特殊处理的键的列表。
|
||||
|
||||
返回:
|
||||
- OrderedDict: 包含所有匹配键及其值的字典,保持原始顺序。
|
||||
"""
|
||||
if special_keys is None:
|
||||
special_keys = []
|
||||
|
||||
result = OrderedDict()
|
||||
key_count = {} # 用于统计每个匹配键的出现次数(不包括特殊键)
|
||||
|
||||
# 预编译正则模式以提高效率
|
||||
patterns = [re.compile(r'^' + re.escape(g) + r'(?:-\d+)?$') for g in good_list]
|
||||
|
||||
def matches(key):
|
||||
return any(pattern.match(key) for pattern in patterns)
|
||||
|
||||
# 第一次遍历:统计普通键的出现次数
|
||||
def first_pass(current_dict):
|
||||
if isinstance(current_dict, dict):
|
||||
for key, value in current_dict.items():
|
||||
if matches(key) and key not in special_keys:
|
||||
key_count[key] = key_count.get(key, 0) + 1
|
||||
# 递归遍历
|
||||
if isinstance(value, dict):
|
||||
first_pass(value)
|
||||
elif isinstance(value, list):
|
||||
first_pass(value)
|
||||
elif isinstance(current_dict, list):
|
||||
for item in current_dict:
|
||||
first_pass(item)
|
||||
|
||||
first_pass(data_dict)
|
||||
|
||||
# 初始化用于跟踪每个重复键当前使用的后缀编号
|
||||
suffix_map = {key: 0 for key, count in key_count.items() if count > 1}
|
||||
|
||||
def get_suffix(count):
|
||||
"""
|
||||
根据计数获取字母后缀,例如:
|
||||
1 -> '-a'
|
||||
2 -> '-b'
|
||||
...
|
||||
26 -> '-z'
|
||||
27 -> '-aa'
|
||||
"""
|
||||
letters = string.ascii_lowercase
|
||||
suffix = ''
|
||||
while count > 0:
|
||||
count, remainder = divmod(count - 1, 26)
|
||||
suffix = letters[remainder] + suffix
|
||||
return '-' + suffix
|
||||
|
||||
# 第二次遍历:添加后缀或前缀,并构建结果字典
|
||||
def recurse(current_dict, parent_key=None):
|
||||
if isinstance(current_dict, dict):
|
||||
for key, value in current_dict.items():
|
||||
if matches(key):
|
||||
if key in special_keys:
|
||||
# 对于特殊键,使用当前父键名加'的'前缀
|
||||
new_key = f"{parent_key}的{key}" if parent_key else key
|
||||
result[new_key] = value
|
||||
else:
|
||||
if key_count.get(key, 0) > 1:
|
||||
# 对于重复键,添加后缀以确保唯一性
|
||||
suffix_map[key] += 1
|
||||
suffix = get_suffix(suffix_map[key])
|
||||
new_key = f"{key}{suffix}"
|
||||
else:
|
||||
new_key = key
|
||||
result[new_key] = value
|
||||
# 递归遍历
|
||||
# 如果当前键是字典,新的父键是当前键
|
||||
# 否则,保持原来的父键
|
||||
if isinstance(value, dict):
|
||||
recurse(value, parent_key=key)
|
||||
elif isinstance(value, list):
|
||||
recurse(value, parent_key=parent_key)
|
||||
elif isinstance(current_dict, list):
|
||||
for item in current_dict:
|
||||
recurse(item, parent_key=parent_key)
|
||||
|
||||
recurse(data_dict)
|
||||
return result
|
||||
|
||||
def postprocess(data):
|
||||
"""递归地转换字典中的值为列表,如果所有键对应的值都是'/', '{}' 或 '未知'"""
|
||||
def convert_dict(value):
|
||||
|
Loading…
x
Reference in New Issue
Block a user