31 lines
1013 B
Python
31 lines
1013 B
Python
import tracemalloc
|
|
from PyPDF2 import PdfReader
|
|
|
|
def extract_text_by_page(file_path):
|
|
result = ""
|
|
with open(file_path, 'rb') as file:
|
|
reader =PdfReader(file)
|
|
num_pages = len(reader.pages)
|
|
# print(f"Total pages: {num_pages}")
|
|
for page_num in range(num_pages):
|
|
page = reader.pages[page_num]
|
|
text = page.extract_text()
|
|
return result
|
|
|
|
# 开始跟踪内存分配
|
|
tracemalloc.start()
|
|
# 捕捉函数调用前的内存快照
|
|
snapshot_before = tracemalloc.take_snapshot()
|
|
# 调用函数
|
|
file_path=r'C:\Users\Administrator\Desktop\fsdownload\00550cfc-fd33-469e-8272-9215291b175c\ztbfile.pdf'
|
|
result = extract_text_by_page(file_path)
|
|
# 捕捉函数调用后的内存快照
|
|
snapshot_after = tracemalloc.take_snapshot()
|
|
# 比较两个快照,获取内存分配差异信息
|
|
stats = snapshot_after.compare_to(snapshot_before, 'lineno')
|
|
print("[ Top 10 内存变化 ]")
|
|
for stat in stats[:10]:
|
|
print(stat)
|
|
# 停止内存分配跟踪
|
|
tracemalloc.stop()
|