diff --git a/flask_app/general/读取文件/按页读取pdf.py b/flask_app/general/读取文件/按页读取pdf.py index 9e3fd63..3fd704b 100644 --- a/flask_app/general/读取文件/按页读取pdf.py +++ b/flask_app/general/读取文件/按页读取pdf.py @@ -78,6 +78,16 @@ def process_pages(get_text, common_header, total_pages): def read_pdf_main(pdf_path): # common_header=extract_common_header(pdf_path) common_header="" + try: + with fitz.open(pdf_path) as pdf_document: + total_pages = pdf_document.page_count + get_text = create_get_text_function('fitz', pdf_document) + start_page, end_page = process_pages(get_text, common_header,total_pages) + return start_page, end_page + except Exception as e_pypdf2: + print(f"extract_pages_generic: 使用 PyPDF2 读取 PDF 失败: {e_pypdf2}") + + # 如果 PyPDF2 失败,尝试使用 PyMuPDF try: with open(pdf_path, "rb") as f: pdf_document = PdfReader(f) @@ -85,16 +95,6 @@ def read_pdf_main(pdf_path): get_text = create_get_text_function('pypdf2', pdf_document) start_page, end_page = process_pages(get_text, common_header,total_pages) return start_page, end_page - except Exception as e_pypdf2: - print(f"extract_pages_generic: 使用 PyPDF2 读取 PDF 失败: {e_pypdf2}") - - # 如果 PyPDF2 失败,尝试使用 PyMuPDF - try: - with fitz.open(pdf_path) as pdf_document: - total_pages = pdf_document.page_count - get_text = create_get_text_function('fitz', pdf_document) - start_page, end_page = process_pages(get_text, common_header,total_pages) - return start_page, end_page except Exception as e_pypdf2: print(f"extract_pages_generic: 使用 fitz 读取 PDF 失败: {e_pypdf2}") diff --git a/flask_app/start_up.py b/flask_app/start_up.py index ffc5a22..4ecb897 100644 --- a/flask_app/start_up.py +++ b/flask_app/start_up.py @@ -45,7 +45,6 @@ def create_app(): for handler in logger.handlers[:]: handler.close() logger.removeHandler(handler) - gc.collect() return app