2.17 增加读文件pdf接口测试1

This commit is contained in:
zy123 2025-02-17 15:15:59 +08:00
parent 3d003a5631
commit 24542b47d2
2 changed files with 10 additions and 11 deletions

View File

@ -79,10 +79,9 @@ def read_pdf_main(pdf_path):
# common_header=extract_common_header(pdf_path) # common_header=extract_common_header(pdf_path)
common_header="" common_header=""
try: try:
with open(pdf_path, "rb") as f: with fitz.open(pdf_path) as pdf_document:
pdf_document = PdfReader(f) total_pages = pdf_document.page_count
total_pages = len(pdf_document.pages) get_text = create_get_text_function('fitz', pdf_document)
get_text = create_get_text_function('pypdf2', pdf_document)
start_page, end_page = process_pages(get_text, common_header,total_pages) start_page, end_page = process_pages(get_text, common_header,total_pages)
return start_page, end_page return start_page, end_page
except Exception as e_pypdf2: except Exception as e_pypdf2:
@ -90,9 +89,10 @@ def read_pdf_main(pdf_path):
# 如果 PyPDF2 失败,尝试使用 PyMuPDF # 如果 PyPDF2 失败,尝试使用 PyMuPDF
try: try:
with fitz.open(pdf_path) as pdf_document: with open(pdf_path, "rb") as f:
total_pages = pdf_document.page_count pdf_document = PdfReader(f)
get_text = create_get_text_function('fitz', pdf_document) total_pages = len(pdf_document.pages)
get_text = create_get_text_function('pypdf2', pdf_document)
start_page, end_page = process_pages(get_text, common_header,total_pages) start_page, end_page = process_pages(get_text, common_header,total_pages)
return start_page, end_page return start_page, end_page
except Exception as e_pypdf2: except Exception as e_pypdf2:

View File

@ -45,7 +45,6 @@ def create_app():
for handler in logger.handlers[:]: for handler in logger.handlers[:]:
handler.close() handler.close()
logger.removeHandler(handler) logger.removeHandler(handler)
gc.collect()
return app return app