import re def generate_questions(input_list): template = ( "关于'{key}',{value}的内容是怎样的?请按json格式给我提供信息,键名为'{key}',而键值需要完全与原文保持一致,不要擅自总结、删减,如果存在未知信息,请在对应键值处填'未知'。" ) questions = [] for input_dict in input_list: for key, value in input_dict.items(): processed_value = preprocess_value(value) question = template.format(key=key, value=processed_value) questions.append(question) return questions def preprocess_value(value): # 使用正则表达式查找"第X章"或"第X款" chapter_match = re.search(r'第(.+?)章', value) clause_match = re.search(r'第(.+?)款', value) if chapter_match or clause_match: # 以逗号、句号、问号、感叹号为分隔符 separators = r'[,。?!,\?!]' # 分隔符检测函数,确保括号成对闭合时才用作分隔符 def is_separator(ch, count): return count['('] == count[')'] and count['('] == count[')'] and re.match(separators, ch) parts = [] current_part = [] count = {'(': 0, ')': 0, '(': 0, ')': 0} for ch in value: if ch in count: count[ch] += 1 if is_separator(ch, count): parts.append("".join(current_part).strip()) current_part = [] else: current_part.append(ch) if current_part: parts.append("".join(current_part).strip()) # 查找包含章节或条款的部分 target_part = next((part for part in parts if '章' in part or '款' in part), None) if target_part: # 删除开头的"符合"或"应满足" target_part = re.sub(r'^(符合|应满足)\s*', '', target_part.strip()) return target_part # 如果没有找到特定章节或条款,返回原始值 return value input_list=[{'资格性审查标准.资格要求': '符合本采购文件第一章第二款要求,并提供合格有效的证明材料'}] res=generate_questions(input_list) print(res)