diff --git a/flask_app/general/读取文件/按页读取pdf.py b/flask_app/general/读取文件/按页读取pdf.py index 2f3f902..888d994 100644 --- a/flask_app/general/读取文件/按页读取pdf.py +++ b/flask_app/general/读取文件/按页读取pdf.py @@ -1,4 +1,4 @@ -from pypdf import PdfReader +from PyPDF2 import PdfReader from flask_app.general.读取文件.clean_pdf import extract_common_header, clean_page_content, create_get_text_function import fitz # PyMuPDF @@ -60,6 +60,7 @@ def process_pages(get_text, common_header, total_pages): try: text = get_text(page_num) cleaned_text = clean_page_content(text, common_header) + # print(cleaned_text) except Exception as e: print(f"读取第 {page_num} 页失败: {e}") continue diff --git a/flask_app/routes/test_readpdf.py b/flask_app/routes/test_readpdf.py index 4110bd7..ae3dce2 100644 --- a/flask_app/routes/test_readpdf.py +++ b/flask_app/routes/test_readpdf.py @@ -1,3 +1,4 @@ +import multiprocessing import os.path from flask import request, jsonify, Blueprint, g @@ -30,7 +31,9 @@ def process_file(): file_path,file_type=download_file(file_url, filename) # print(file_path) # 调用预处理函数 - result = read_pdf_main(pdf_path=file_path) + with multiprocessing.Pool(processes=1) as pool: + # 调用 apply 或 apply_async 执行子进程任务 + result = pool.apply(read_pdf_main, args=(file_path,)) # 处理结果 if not result: