2.17 增加读文件pdf接口测试1
This commit is contained in:
parent
3d003a5631
commit
24542b47d2
@ -78,6 +78,16 @@ def process_pages(get_text, common_header, total_pages):
|
|||||||
def read_pdf_main(pdf_path):
|
def read_pdf_main(pdf_path):
|
||||||
# common_header=extract_common_header(pdf_path)
|
# common_header=extract_common_header(pdf_path)
|
||||||
common_header=""
|
common_header=""
|
||||||
|
try:
|
||||||
|
with fitz.open(pdf_path) as pdf_document:
|
||||||
|
total_pages = pdf_document.page_count
|
||||||
|
get_text = create_get_text_function('fitz', pdf_document)
|
||||||
|
start_page, end_page = process_pages(get_text, common_header,total_pages)
|
||||||
|
return start_page, end_page
|
||||||
|
except Exception as e_pypdf2:
|
||||||
|
print(f"extract_pages_generic: 使用 PyPDF2 读取 PDF 失败: {e_pypdf2}")
|
||||||
|
|
||||||
|
# 如果 PyPDF2 失败,尝试使用 PyMuPDF
|
||||||
try:
|
try:
|
||||||
with open(pdf_path, "rb") as f:
|
with open(pdf_path, "rb") as f:
|
||||||
pdf_document = PdfReader(f)
|
pdf_document = PdfReader(f)
|
||||||
@ -85,16 +95,6 @@ def read_pdf_main(pdf_path):
|
|||||||
get_text = create_get_text_function('pypdf2', pdf_document)
|
get_text = create_get_text_function('pypdf2', pdf_document)
|
||||||
start_page, end_page = process_pages(get_text, common_header,total_pages)
|
start_page, end_page = process_pages(get_text, common_header,total_pages)
|
||||||
return start_page, end_page
|
return start_page, end_page
|
||||||
except Exception as e_pypdf2:
|
|
||||||
print(f"extract_pages_generic: 使用 PyPDF2 读取 PDF 失败: {e_pypdf2}")
|
|
||||||
|
|
||||||
# 如果 PyPDF2 失败,尝试使用 PyMuPDF
|
|
||||||
try:
|
|
||||||
with fitz.open(pdf_path) as pdf_document:
|
|
||||||
total_pages = pdf_document.page_count
|
|
||||||
get_text = create_get_text_function('fitz', pdf_document)
|
|
||||||
start_page, end_page = process_pages(get_text, common_header,total_pages)
|
|
||||||
return start_page, end_page
|
|
||||||
except Exception as e_pypdf2:
|
except Exception as e_pypdf2:
|
||||||
print(f"extract_pages_generic: 使用 fitz 读取 PDF 失败: {e_pypdf2}")
|
print(f"extract_pages_generic: 使用 fitz 读取 PDF 失败: {e_pypdf2}")
|
||||||
|
|
||||||
|
@ -45,7 +45,6 @@ def create_app():
|
|||||||
for handler in logger.handlers[:]:
|
for handler in logger.handlers[:]:
|
||||||
handler.close()
|
handler.close()
|
||||||
logger.removeHandler(handler)
|
logger.removeHandler(handler)
|
||||||
gc.collect()
|
|
||||||
return app
|
return app
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user