2.15 应该是pypdf2库的问题

This commit is contained in:
zy123 2025-02-15 22:32:27 +08:00
parent cef8ff5415
commit 515249d4f7

View File

@ -23,7 +23,6 @@ def extract_pages(pdf_path, output_folder, begin_pattern, begin_page, end_patter
except Exception as e:
print(f"Error processing {pdf_path}: {e}")
return ""
r'^第[一二三四五六七八九十百千]+(?:章|部分)(?!.*说明).*(?:采购.*?(?:内容|要求|需求)|(招标|项目|货物)(?:内容|要求|需求)).*|'
def get_patterns_for_procurement():
begin_pattern = regex.compile(
r'(?<!对应\s*)(?<!根据\s*)(?<!按照\s*)(?<!见\s*)(?<!与\s*)(?<!同\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)'