diff --git a/flask_app/general/截取pdf通用函数.py b/flask_app/general/截取pdf通用函数.py index 19876d3..30b2ef9 100644 --- a/flask_app/general/截取pdf通用函数.py +++ b/flask_app/general/截取pdf通用函数.py @@ -108,4 +108,22 @@ def convert_to_pdf(file_path): return docx2pdf(file_path) return file_path +def get_invalid_file(file_path,output_folder,common_header): + pdf_document = PdfReader(file_path) + total_pages = len(pdf_document.pages) + begin_pattern=[regex.compile( + r'^第[一二三四五六七八九十百千]+(?:章|部分).*?(?:公告|邀请书|邀请函|邀请).*|^第一卷|^投标邀请书|^投标邀请函|^投标邀请',regex.MULTILINE + ), + regex.compile( + r'.*(?