2024-12-24 10:21:27 +08:00
|
|
|
|
import re
|
|
|
|
|
|
2024-12-24 17:32:00 +08:00
|
|
|
|
cleaned_text = """第三章 评标办法 (综合评分法)
|
|
|
|
|
一、评标原则
|
|
|
|
|
1.评标将本着公平、公正、科学、择优的原则进行。
|
|
|
|
|
2.依法评标、严格保密。
|
|
|
|
|
...
|
|
|
|
|
8.投标文件 含有招标人不能接受的附加条件的;
|
|
|
|
|
"""
|
2024-12-24 10:21:27 +08:00
|
|
|
|
|
2024-12-24 17:32:00 +08:00
|
|
|
|
end_pattern = '^(?:第[一二三四五六七八九十百千]+(?:章|部分)\s*[\u4e00-\u9fff]+|评标办法前附表|附录(?:一)?[::]|附件(?:一)?[::]|附表(?:一)?[::]'
|
|
|
|
|
matches = list(re.finditer(end_pattern, cleaned_text, re.MULTILINE))
|
2024-12-24 10:21:27 +08:00
|
|
|
|
|
2024-12-24 17:32:00 +08:00
|
|
|
|
if matches:
|
|
|
|
|
end_index = matches[-1].start()
|
|
|
|
|
cleaned_text = cleaned_text[:end_index]
|
|
|
|
|
print("匹配成功,截断后文本:")
|
|
|
|
|
print(cleaned_text)
|
|
|
|
|
else:
|
|
|
|
|
print("未匹配到内容。")
|