2.15 应该是pypdf2库的问题
This commit is contained in:
parent
cef8ff5415
commit
515249d4f7
@ -23,7 +23,6 @@ def extract_pages(pdf_path, output_folder, begin_pattern, begin_page, end_patter
|
||||
except Exception as e:
|
||||
print(f"Error processing {pdf_path}: {e}")
|
||||
return ""
|
||||
r'^第[一二三四五六七八九十百千]+(?:章|部分)(?!.*说明).*(?:采购.*?(?:内容|要求|需求)|(招标|项目|货物)(?:内容|要求|需求)).*|'
|
||||
def get_patterns_for_procurement():
|
||||
begin_pattern = regex.compile(
|
||||
r'(?<!对应\s*)(?<!根据\s*)(?<!按照\s*)(?<!见\s*)(?<!与\s*)(?<!同\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)'
|
||||
|
Loading…
x
Reference in New Issue
Block a user