From 08947c0258e985d7a47c1ba6215451851773ac03 Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Mon, 17 Feb 2025 16:10:38 +0800 Subject: [PATCH] =?UTF-8?q?2.17=20=E5=A2=9E=E5=8A=A0=E8=AF=BB=E6=96=87?= =?UTF-8?q?=E4=BB=B6pdf=E6=8E=A5=E5=8F=A3=E6=B5=8B=E8=AF=951?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_app/general/读取文件/按页读取pdf.py | 2 +- flask_app/routes/test_readpdf.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/flask_app/general/读取文件/按页读取pdf.py b/flask_app/general/读取文件/按页读取pdf.py index 3e29159..2f3f902 100644 --- a/flask_app/general/读取文件/按页读取pdf.py +++ b/flask_app/general/读取文件/按页读取pdf.py @@ -1,4 +1,4 @@ -from PyPDF2 import PdfReader +from pypdf import PdfReader from flask_app.general.读取文件.clean_pdf import extract_common_header, clean_page_content, create_get_text_function import fitz # PyMuPDF diff --git a/flask_app/routes/test_readpdf.py b/flask_app/routes/test_readpdf.py index 8bd491f..4110bd7 100644 --- a/flask_app/routes/test_readpdf.py +++ b/flask_app/routes/test_readpdf.py @@ -5,7 +5,7 @@ import uuid import time from flask_app.ConnectionLimiter import require_execution_timeout from flask_app.general.format_change import download_file -from flask_app.general.读取文件.按页读取pdf import read_pdf_main, extract_text_by_page +from flask_app.general.读取文件.按页读取pdf import read_pdf_main from flask_app.routes.utils import validate_and_setup_logger from flask_app.routes.货物标解析main import preprocess_files @@ -30,11 +30,11 @@ def process_file(): file_path,file_type=download_file(file_url, filename) # print(file_path) # 调用预处理函数 - # result = read_pdf_main(pdf_path=file_path) - extract_text_by_page(file_path) + result = read_pdf_main(pdf_path=file_path) + # 处理结果 - # if not result: - # return jsonify({'error': 'File processing failed'}) + if not result: + return jsonify({'error': 'File processing failed'}) response_data={ "处理结果":"yes" }