2.15 应该是pypdf2库的问题
This commit is contained in:
parent
cef8ff5415
commit
515249d4f7
@ -23,7 +23,6 @@ def extract_pages(pdf_path, output_folder, begin_pattern, begin_page, end_patter
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error processing {pdf_path}: {e}")
|
print(f"Error processing {pdf_path}: {e}")
|
||||||
return ""
|
return ""
|
||||||
r'^第[一二三四五六七八九十百千]+(?:章|部分)(?!.*说明).*(?:采购.*?(?:内容|要求|需求)|(招标|项目|货物)(?:内容|要求|需求)).*|'
|
|
||||||
def get_patterns_for_procurement():
|
def get_patterns_for_procurement():
|
||||||
begin_pattern = regex.compile(
|
begin_pattern = regex.compile(
|
||||||
r'(?<!对应\s*)(?<!根据\s*)(?<!按照\s*)(?<!见\s*)(?<!与\s*)(?<!同\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)'
|
r'(?<!对应\s*)(?<!根据\s*)(?<!按照\s*)(?<!见\s*)(?<!与\s*)(?<!同\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user