2.17 增加读文件pdf接口测试1
This commit is contained in:
parent
08947c0258
commit
7cfb8c614f
@ -1,4 +1,4 @@
|
||||
from pypdf import PdfReader
|
||||
from PyPDF2 import PdfReader
|
||||
|
||||
from flask_app.general.读取文件.clean_pdf import extract_common_header, clean_page_content, create_get_text_function
|
||||
import fitz # PyMuPDF
|
||||
@ -60,6 +60,7 @@ def process_pages(get_text, common_header, total_pages):
|
||||
try:
|
||||
text = get_text(page_num)
|
||||
cleaned_text = clean_page_content(text, common_header)
|
||||
# print(cleaned_text)
|
||||
except Exception as e:
|
||||
print(f"读取第 {page_num} 页失败: {e}")
|
||||
continue
|
||||
|
@ -1,3 +1,4 @@
|
||||
import multiprocessing
|
||||
import os.path
|
||||
|
||||
from flask import request, jsonify, Blueprint, g
|
||||
@ -30,7 +31,9 @@ def process_file():
|
||||
file_path,file_type=download_file(file_url, filename)
|
||||
# print(file_path)
|
||||
# 调用预处理函数
|
||||
result = read_pdf_main(pdf_path=file_path)
|
||||
with multiprocessing.Pool(processes=1) as pool:
|
||||
# 调用 apply 或 apply_async 执行子进程任务
|
||||
result = pool.apply(read_pdf_main, args=(file_path,))
|
||||
|
||||
# 处理结果
|
||||
if not result:
|
||||
|
Loading…
x
Reference in New Issue
Block a user