59 lines
2.2 KiB
Python
59 lines
2.2 KiB
Python
import re
|
||
|
||
|
||
def generate_questions(input_list):
|
||
template = (
|
||
"关于'{key}',{value}的内容是怎样的?请按json格式给我提供信息,键名为'{key}',而键值需要完全与原文保持一致,不要擅自总结、删减,如果存在未知信息,请在对应键值处填'未知'。"
|
||
)
|
||
|
||
questions = []
|
||
for input_dict in input_list:
|
||
for key, value in input_dict.items():
|
||
processed_value = preprocess_value(value)
|
||
question = template.format(key=key, value=processed_value)
|
||
questions.append(question)
|
||
return questions
|
||
|
||
def preprocess_value(value):
|
||
# 使用正则表达式查找"第X章"或"第X款"
|
||
chapter_match = re.search(r'第(.+?)章', value)
|
||
clause_match = re.search(r'第(.+?)款', value)
|
||
|
||
if chapter_match or clause_match:
|
||
# 以逗号、句号、问号、感叹号为分隔符
|
||
separators = r'[,。?!,\?!]'
|
||
|
||
# 分隔符检测函数,确保括号成对闭合时才用作分隔符
|
||
def is_separator(ch, count):
|
||
return count['('] == count[')'] and count['('] == count[')'] and re.match(separators, ch)
|
||
|
||
parts = []
|
||
current_part = []
|
||
count = {'(': 0, ')': 0, '(': 0, ')': 0}
|
||
|
||
for ch in value:
|
||
if ch in count:
|
||
count[ch] += 1
|
||
if is_separator(ch, count):
|
||
parts.append("".join(current_part).strip())
|
||
current_part = []
|
||
else:
|
||
current_part.append(ch)
|
||
|
||
if current_part:
|
||
parts.append("".join(current_part).strip())
|
||
|
||
# 查找包含章节或条款的部分
|
||
target_part = next((part for part in parts if '章' in part or '款' in part), None)
|
||
|
||
if target_part:
|
||
# 删除开头的"符合"或"应满足"
|
||
target_part = re.sub(r'^(符合|应满足)\s*', '', target_part.strip())
|
||
return target_part
|
||
|
||
# 如果没有找到特定章节或条款,返回原始值
|
||
return value
|
||
|
||
input_list=[{'资格性审查标准.资格要求': '符合本采购文件第一章第二款要求,并提供合格有效的证明材料'}]
|
||
res=generate_questions(input_list)
|
||
print(res) |