2.17 增加读文件pdf接口测试1

This commit is contained in:
zy123 2025-02-17 16:24:43 +08:00
parent 08947c0258
commit 7cfb8c614f
2 changed files with 6 additions and 2 deletions

View File

@ -1,4 +1,4 @@
from pypdf import PdfReader
from PyPDF2 import PdfReader
from flask_app.general.读取文件.clean_pdf import extract_common_header, clean_page_content, create_get_text_function
import fitz # PyMuPDF
@ -60,6 +60,7 @@ def process_pages(get_text, common_header, total_pages):
try:
text = get_text(page_num)
cleaned_text = clean_page_content(text, common_header)
# print(cleaned_text)
except Exception as e:
print(f"读取第 {page_num} 页失败: {e}")
continue

View File

@ -1,3 +1,4 @@
import multiprocessing
import os.path
from flask import request, jsonify, Blueprint, g
@ -30,7 +31,9 @@ def process_file():
file_path,file_type=download_file(file_url, filename)
# print(file_path)
# 调用预处理函数
result = read_pdf_main(pdf_path=file_path)
with multiprocessing.Pool(processes=1) as pool:
# 调用 apply 或 apply_async 执行子进程任务
result = pool.apply(read_pdf_main, args=(file_path,))
# 处理结果
if not result: