From 24542b47d21144e754e85f8a28b46ecd9b55f907 Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Mon, 17 Feb 2025 15:15:59 +0800 Subject: [PATCH] =?UTF-8?q?2.17=20=E5=A2=9E=E5=8A=A0=E8=AF=BB=E6=96=87?= =?UTF-8?q?=E4=BB=B6pdf=E6=8E=A5=E5=8F=A3=E6=B5=8B=E8=AF=951?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_app/general/读取文件/按页读取pdf.py | 20 ++++++++++---------- flask_app/start_up.py | 1 - 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/flask_app/general/读取文件/按页读取pdf.py b/flask_app/general/读取文件/按页读取pdf.py index 9e3fd63..3fd704b 100644 --- a/flask_app/general/读取文件/按页读取pdf.py +++ b/flask_app/general/读取文件/按页读取pdf.py @@ -78,6 +78,16 @@ def process_pages(get_text, common_header, total_pages): def read_pdf_main(pdf_path): # common_header=extract_common_header(pdf_path) common_header="" + try: + with fitz.open(pdf_path) as pdf_document: + total_pages = pdf_document.page_count + get_text = create_get_text_function('fitz', pdf_document) + start_page, end_page = process_pages(get_text, common_header,total_pages) + return start_page, end_page + except Exception as e_pypdf2: + print(f"extract_pages_generic: 使用 PyPDF2 读取 PDF 失败: {e_pypdf2}") + + # 如果 PyPDF2 失败,尝试使用 PyMuPDF try: with open(pdf_path, "rb") as f: pdf_document = PdfReader(f) @@ -85,16 +95,6 @@ def read_pdf_main(pdf_path): get_text = create_get_text_function('pypdf2', pdf_document) start_page, end_page = process_pages(get_text, common_header,total_pages) return start_page, end_page - except Exception as e_pypdf2: - print(f"extract_pages_generic: 使用 PyPDF2 读取 PDF 失败: {e_pypdf2}") - - # 如果 PyPDF2 失败,尝试使用 PyMuPDF - try: - with fitz.open(pdf_path) as pdf_document: - total_pages = pdf_document.page_count - get_text = create_get_text_function('fitz', pdf_document) - start_page, end_page = process_pages(get_text, common_header,total_pages) - return start_page, end_page except Exception as e_pypdf2: print(f"extract_pages_generic: 使用 fitz 读取 PDF 失败: {e_pypdf2}") diff --git a/flask_app/start_up.py b/flask_app/start_up.py index ffc5a22..4ecb897 100644 --- a/flask_app/start_up.py +++ b/flask_app/start_up.py @@ -45,7 +45,6 @@ def create_app(): for handler in logger.handlers[:]: handler.close() logger.removeHandler(handler) - gc.collect() return app