9.23测试分段
This commit is contained in:
parent
d5b8a06322
commit
8789bb540a
@ -131,7 +131,7 @@ def llm_call(question, knowledge_name,file_id, result_queue, ans_index, llm_type
|
||||
|
||||
def multi_threading(queries, knowledge_name="", file_id="",llm_type=1):
|
||||
if not queries:
|
||||
return None
|
||||
return []
|
||||
print("多线程提问:starting multi_threading...")
|
||||
result_queue = queue.Queue()
|
||||
|
||||
|
@ -181,6 +181,7 @@ def process_reviews(original_dict_data,knowledge_name, truncate0_jsonpath,clause
|
||||
|
||||
results_2 = multi_threading(formatted_questions1+formatted_questions2, knowledge_name) #无序号的直接问大模型
|
||||
first_response_list = []
|
||||
|
||||
for _, response in results_2:
|
||||
try:
|
||||
if response and len(response) > 1: # 检查response存在且有至少两个元素
|
||||
|
File diff suppressed because one or more lines are too long
@ -1,108 +1,3 @@
|
||||
import json
|
||||
import re
|
||||
#这个字典可能有嵌套,你需要遍历里面的键名,对键名作判断,而不是键值,具体是这样的:如果处于同一层级的键的数量>1并且键名全由数字或点号组成。那么就将这些序号键名全部删除,重新组织成一个字典格式的数据,你可以考虑用字符串列表来保持部分平级的数据
|
||||
#对于同级的键,如果数量>1且键名都统一,那么将键名去掉,用列表保持它们的键值
|
||||
#对于同一个字典中,可能存在若干键值对,若它们的键值都是""或者"/" 你就将它们的键值删去,它们的键名用字符串列表保存
|
||||
def is_numeric_key(key):
|
||||
# 这个正则表达式匹配由数字、点、括号中的数字或单个字母(小写或大写)组成的字符串,
|
||||
# 字母后跟数字,或数字后跟字母,单个字母后跟点,但不能是字母-数字-字母的组合
|
||||
pattern = r'^[\d.]+$|^\(\d+\)$|^(\d+)$|^[a-zA-Z]$|^[a-zA-Z]\d+$|^\d+[a-zA-Z]$|^[a-zA-Z]\.$'
|
||||
return re.match(pattern, key) is not None
|
||||
#TODO:如果键值中存在数字就不行
|
||||
#zbtest20也有问题
|
||||
def contains_number_or_index(key, value):
|
||||
# 判断值是否是数字或数字字符串
|
||||
is_number = isinstance(value, (int, float)) or (isinstance(value, str) and value.isdigit())
|
||||
# 判断键是否包含 "序号"
|
||||
contains_index = '序号' in key
|
||||
# 判断值中是否包含数字
|
||||
contains_digit = isinstance(value, str) and re.search(r'\d+', value)
|
||||
# 判断值中是否包含中文字符
|
||||
contains_chinese = isinstance(value, str) and re.search(r'[\u4e00-\u9fff]', value)
|
||||
# 如果值中包含数字但也有中文字符,则保留(返回 False)
|
||||
if contains_digit and contains_chinese:
|
||||
return False
|
||||
# 如果值是数字或包含数字,且不包含中文字符,或者键包含 "序号",返回 True
|
||||
return is_number or contains_index or contains_digit
|
||||
|
||||
def preprocess_dict(data):
|
||||
if isinstance(data, dict):
|
||||
if len(data) > 1:
|
||||
# 检查是否所有值都是 "" 或 "/"
|
||||
if all(v == "" or v == "/" for v in data.values()):
|
||||
return list(data.keys())
|
||||
else:
|
||||
processed = {}
|
||||
for k, v in data.items():
|
||||
if not contains_number_or_index(k, v):
|
||||
processed_v = preprocess_dict(v)
|
||||
if processed_v != "": # 只添加非空值
|
||||
processed[k] = processed_v
|
||||
return processed
|
||||
else:
|
||||
return {k: preprocess_dict(v) for k, v in data.items()}
|
||||
elif isinstance(data, list):
|
||||
return [preprocess_dict(item) for item in data]
|
||||
else:
|
||||
return data
|
||||
def process_dict(data):
|
||||
if not isinstance(data, dict):
|
||||
return data
|
||||
|
||||
result = {}
|
||||
numeric_keys = []
|
||||
non_numeric_keys = {}
|
||||
|
||||
for key, value in data.items():
|
||||
if is_numeric_key(key):
|
||||
numeric_keys.append((key, value))
|
||||
else:
|
||||
non_numeric_keys[key] = value
|
||||
|
||||
# 处理数字键,不再要求数量>1
|
||||
if numeric_keys:
|
||||
result['items'] = [process_dict(item[1]) for item in sorted(numeric_keys)]
|
||||
|
||||
# 处理非数字键
|
||||
for key, value in non_numeric_keys.items():
|
||||
if isinstance(value, list) and len(value) > 1 and all(isinstance(item, dict) and len(item) == 1 for item in value):
|
||||
common_key = next(iter(value[0].keys()))
|
||||
if all(common_key in item and len(item) == 1 for item in value):
|
||||
result[key] = [process_dict(item[common_key]) for item in value]
|
||||
else:
|
||||
result[key] = [process_dict(item) for item in value]
|
||||
else:
|
||||
result[key] = process_dict(value)
|
||||
|
||||
# 如果结果只包含'items'键,直接返回其值
|
||||
if len(result) == 1 and 'items' in result:
|
||||
return result['items']
|
||||
|
||||
return result
|
||||
|
||||
# 测试代码
|
||||
|
||||
input_data = {
|
||||
"符合性审查": {
|
||||
"说明": "评标委员会应当对符合资格的投标人的投标文件进行符合性审查,以确定其是否满足招标文件的实质性要求。",
|
||||
"审查标准": [
|
||||
{
|
||||
"序号": 9,
|
||||
"内容": "未按要求提供加盖公章及签字(签章)的;"
|
||||
},
|
||||
{
|
||||
"序号": 1,
|
||||
"内容": "a39"
|
||||
},
|
||||
{
|
||||
"序号": 2,
|
||||
"内容": "依据财库[2019]9号文的规定,招标文件采购清单中为“节能产品”的货物,未提供国家确定的认证机构出具的节能产品认证证书的;"
|
||||
},
|
||||
|
||||
]
|
||||
}
|
||||
}
|
||||
pred=preprocess_dict(input_data)
|
||||
print(json.dumps(pred, ensure_ascii=False, indent=4))
|
||||
# processed_data = process_dict(pred)
|
||||
# print(json.dumps(processed_data, ensure_ascii=False, indent=4))
|
||||
from flask_app.main.json_utils import nest_json_under_key
|
||||
res=nest_json_under_key("","资格")
|
||||
print(res)
|
Loading…
x
Reference in New Issue
Block a user