1.6 insert_del_pagemark.py修改
This commit is contained in:
parent
04816cdedf
commit
691883cc99
@ -99,7 +99,7 @@ def get_patterns_for_procurement():
|
|||||||
r'第[一二三四五六七八九十1-9]+(?:章|部分)\s*(?!.*说明)' # 匹配“第X章”或“第X部分”
|
r'第[一二三四五六七八九十1-9]+(?:章|部分)\s*(?!.*说明)' # 匹配“第X章”或“第X部分”
|
||||||
r'[\u4e00-\u9fff、()()]*?' # 匹配允许的字符
|
r'[\u4e00-\u9fff、()()]*?' # 匹配允许的字符
|
||||||
r'(?:(?:服务|项目|商务|技术|供货)[\u4e00-\u9fff、()()]*?要求[\u4e00-\u9fff、()()]*?\s*$|' # 匹配“服务”、“项目”、“商务”或“技术”后跟“要求”
|
r'(?:(?:服务|项目|商务|技术|供货)[\u4e00-\u9fff、()()]*?要求[\u4e00-\u9fff、()()]*?\s*$|' # 匹配“服务”、“项目”、“商务”或“技术”后跟“要求”
|
||||||
r'(?:采购.*?(?:内容|要求|需求)?|招标(?:内容|要求|需求))[\u4e00-\u9fff、()()]*?|'
|
r'(?:采购.*?(?:内容|要求|需求)|招标(?:内容|要求|需求))[\u4e00-\u9fff、()()]*?|'
|
||||||
r'需求书[\u4e00-\u9fff、()()]*?)\s*$',
|
r'需求书[\u4e00-\u9fff、()()]*?)\s*$',
|
||||||
regex.MULTILINE
|
regex.MULTILINE
|
||||||
)
|
)
|
||||||
@ -525,7 +525,7 @@ def truncate_pdf_main_goods(input_path, output_folder, selection,logger, output_
|
|||||||
elif selection == 5:
|
elif selection == 5:
|
||||||
begin_pattern = regex.compile(
|
begin_pattern = regex.compile(
|
||||||
r'^第[一二三四五六七八九十百千]+(?:章|部分).*?(?:服务|项目|商务|技术|供货).*?要求|'
|
r'^第[一二三四五六七八九十百千]+(?:章|部分).*?(?:服务|项目|商务|技术|供货).*?要求|'
|
||||||
r'^第[一二三四五六七八九十百千]+(?:章|部分)(?!.*说明).*(?:采购.*?(?:内容|要求|需求)?|招标(?:内容|要求|需求)).*|'
|
r'^第[一二三四五六七八九十百千]+(?:章|部分)(?!.*说明).*(?:采购.*?(?:内容|要求|需求)|招标(?:内容|要求|需求)).*|'
|
||||||
r'^第[一二三四五六七八九十百千]+(?:章|部分).*?需求书'
|
r'^第[一二三四五六七八九十百千]+(?:章|部分).*?需求书'
|
||||||
)
|
)
|
||||||
end_pattern = regex.compile(
|
end_pattern = regex.compile(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user