2024-12-10 17:32:08 +08:00
|
|
|
|
import regex
|
2024-12-05 16:53:11 +08:00
|
|
|
|
|
2024-12-10 17:32:08 +08:00
|
|
|
|
begin_pattern = regex.compile(
|
2024-12-18 16:01:32 +08:00
|
|
|
|
r'第[一二三四五六七八九十]+章\s*合同|[::]清标报告|^第二卷',
|
|
|
|
|
regex.MULTILINE
|
|
|
|
|
)
|
2024-12-10 17:32:08 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 测试示例
|
2024-12-05 16:53:11 +08:00
|
|
|
|
test_strings = [
|
2024-12-10 17:32:08 +08:00
|
|
|
|
'投标人须知正文', # 匹配
|
|
|
|
|
'”投标人须知正文', # 不匹配
|
|
|
|
|
'” 投标人须知正文', # 不匹配
|
|
|
|
|
'与 投标人须知正文', # 不匹配
|
|
|
|
|
'见 投标人须知正文', # 不匹配
|
|
|
|
|
'“ 投标人须知正文', # 不匹配
|
|
|
|
|
'供应商须知正文', # 匹配
|
|
|
|
|
'谈判供应商须知正文' # 匹配
|
2024-12-05 16:53:11 +08:00
|
|
|
|
]
|
|
|
|
|
|
2024-12-10 17:32:08 +08:00
|
|
|
|
for s in test_strings:
|
|
|
|
|
if begin_pattern.search(s):
|
|
|
|
|
print(f"匹配: {s}")
|
|
|
|
|
else:
|
|
|
|
|
print(f"不匹配: {s}")
|