59 lines
2.2 KiB
Python
Raw Normal View History

2024-10-12 18:01:59 +08:00
import re
2024-09-27 17:03:46 +08:00
2024-09-23 15:49:30 +08:00
2024-10-12 18:01:59 +08:00
def generate_questions(input_list):
template = (
"关于'{key}',{value}的内容是怎样的请按json格式给我提供信息键名为'{key}',而键值需要完全与原文保持一致,不要擅自总结、删减,如果存在未知信息,请在对应键值处填'未知'"
)
2024-09-23 15:49:30 +08:00
2024-10-12 18:01:59 +08:00
questions = []
for input_dict in input_list:
for key, value in input_dict.items():
processed_value = preprocess_value(value)
question = template.format(key=key, value=processed_value)
questions.append(question)
return questions
def preprocess_value(value):
# 使用正则表达式查找"第X章"或"第X款"
chapter_match = re.search(r'第(.+?)章', value)
clause_match = re.search(r'第(.+?)款', value)
if chapter_match or clause_match:
# 以逗号、句号、问号、感叹号为分隔符
separators = r'[,。?!,\?!]'
# 分隔符检测函数,确保括号成对闭合时才用作分隔符
def is_separator(ch, count):
return count['('] == count[')'] and count[''] == count[''] and re.match(separators, ch)
parts = []
current_part = []
count = {'(': 0, ')': 0, '': 0, '': 0}
for ch in value:
if ch in count:
count[ch] += 1
if is_separator(ch, count):
parts.append("".join(current_part).strip())
current_part = []
else:
current_part.append(ch)
if current_part:
parts.append("".join(current_part).strip())
# 查找包含章节或条款的部分
target_part = next((part for part in parts if '' in part or '' in part), None)
if target_part:
# 删除开头的"符合"或"应满足"
target_part = re.sub(r'^(符合|应满足)\s*', '', target_part.strip())
return target_part
# 如果没有找到特定章节或条款,返回原始值
return value
input_list=[{'资格性审查标准.资格要求': '符合本采购文件第一章第二款要求,并提供合格有效的证明材料'}]
res=generate_questions(input_list)
print(res)