2.17 已解决内存泄漏问题
This commit is contained in:
parent
5f13665739
commit
53f1b0fc35
10
README.md
10
README.md
@ -55,9 +55,9 @@ requirements.txt一般无需变动,除非代码中使用了新的库,也要
|
||||
2. .env环境配好 (一般不需要在电脑环境变量中额外配置了,但是要在Pycharm中安装插件,使得项目能将env中的环境变量配置到系统环境变量中!!!)
|
||||
3. 点击下拉框,Edit configurations
|
||||
|
||||

|
||||

|
||||
|
||||
设置run_serve.py为启动脚本
|
||||
设置run_serve.py为启动脚本
|
||||
注意这里的working directory要设置到最外层文件夹,而不是flask_app!!!
|
||||
|
||||
4. postman打post请求测试:
|
||||
@ -85,6 +85,8 @@ bid-assistance/test 里面找个文件的url,推荐'094定稿-湖北工业大
|
||||
1. 编写shell文件,sudo vim clean_dir.sh
|
||||
命名为clean_dir.sh
|
||||
|
||||
清理/home/Z/zbparse_output_dev下的output1这些二级目录下的c8d2140d-9e9a-4a49-9a30-b53ba565db56这种uuid的三级目录(只保留最近7天)。
|
||||
|
||||
```
|
||||
#!/bin/bash
|
||||
|
||||
@ -120,7 +122,7 @@ sudo chmod +x ./clean_dir.sh
|
||||
sudo ./clean_dir.sh
|
||||
```
|
||||
|
||||
4. 每天零点清理
|
||||
4. 以 root 用户的身份编辑 crontab 文件,从而设置或修改系统定时任务(cron jobs)。每天零点10分清理
|
||||
|
||||
```
|
||||
sudo crontab -e
|
||||
@ -129,6 +131,8 @@ sudo crontab -e
|
||||
10 0 * * * /home/Z/clean_dir.sh
|
||||
```
|
||||
|
||||
**目前测试服务器和正式服务器都写上了!无需变动**
|
||||
|
||||
|
||||
|
||||
## flask_app结构介绍
|
||||
|
@ -26,8 +26,8 @@ def extract_text_by_page_fitz(file_path):
|
||||
|
||||
|
||||
def extract_text_by_page(file_path):
|
||||
common_header=""
|
||||
# common_header = extract_common_header(file_path)
|
||||
# common_header=""
|
||||
common_header = extract_common_header(file_path)
|
||||
# print(common_header)
|
||||
result = ""
|
||||
with open(file_path, 'rb') as file:
|
||||
|
@ -1,39 +0,0 @@
|
||||
import os
|
||||
import shutil
|
||||
|
||||
def find_and_copy_files(input_folder, output_folder):
|
||||
# 确保输出文件夹存在
|
||||
os.makedirs(output_folder, exist_ok=True)
|
||||
|
||||
# 定义支持的文件格式
|
||||
supported_formats = ('.pdf', '.doc', '.docx')
|
||||
|
||||
# 遍历输入文件夹
|
||||
for root, dirs, files in os.walk(input_folder):
|
||||
for file in files:
|
||||
# 检查文件名是否包含“招标”或“竞争性”并且文件格式正确
|
||||
if ('响应' not in file and '投标' not in file) and \
|
||||
('竞争性' in file or '招标文件' in file or '磋商' in file) and \
|
||||
file.endswith(supported_formats):
|
||||
# 构造完整的文件路径
|
||||
file_path = os.path.join(root, file)
|
||||
# 构造输出路径
|
||||
output_path = os.path.join(output_folder, file)
|
||||
|
||||
# 检查输出路径是否存在同名文件
|
||||
unique_path = output_path
|
||||
count = 1
|
||||
while os.path.exists(unique_path):
|
||||
# 文件名前加上编号,格式如:filename(1).ext
|
||||
base, extension = os.path.splitext(output_path)
|
||||
unique_path = f"{base}({count}){extension}"
|
||||
count += 1
|
||||
|
||||
# 复制文件到唯一的路径
|
||||
shutil.copy2(file_path, unique_path)
|
||||
print(f"Copied '{file}' to '{unique_path}'.")
|
||||
|
||||
# 使用示例
|
||||
input_folder = 'Z:\\货物类tb\\投标项目资料' # 输入文件夹路径
|
||||
output_folder = 'C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles' # 输出文件夹路径
|
||||
find_and_copy_files(input_folder, output_folder)
|
60
flask_app/test_case/test_内存泄漏.py
Normal file
60
flask_app/test_case/test_内存泄漏.py
Normal file
@ -0,0 +1,60 @@
|
||||
# test_内存泄漏.py
|
||||
from memory_profiler import memory_usage
|
||||
import time
|
||||
from flask_app.general.读取文件.按页读取pdf import read_pdf_main
|
||||
from flask_app.general.通用功能函数 import get_global_logger
|
||||
from flask_app.routes.货物标解析main import preprocess_file_main
|
||||
|
||||
def process_data():
|
||||
# 模拟一些占用内存的操作
|
||||
data = [i for i in range(1000000)] # 建一个1000万元素的list
|
||||
time.sleep(1) # 假装处理2秒
|
||||
return len(data)
|
||||
|
||||
def call():
|
||||
a = 1
|
||||
process_data()
|
||||
return a
|
||||
|
||||
def process_pdf(pdf_path):
|
||||
"""子进程里实际执行的函数"""
|
||||
result = read_pdf_main(pdf_path=pdf_path)
|
||||
return result
|
||||
def main():
|
||||
for i in range(3):
|
||||
logger = get_global_logger("123")
|
||||
pdf_path1=r'C:\Users\Administrator\Desktop\工程标\招标test文件夹\zbtest13.pdf'
|
||||
pdf_path2 = r'C:\Users\Administrator\Desktop\fsdownload\4fb2f541-29c3-497f-9f0a-5216a10591a1\tmp\ztbfile.pdf'
|
||||
pdf_path3= r'C:\Users\Administrator\Desktop\fsdownload\2c3e2291-6804-4ef0-b4a8-6f457edd5709\tmp\ztbfile.pdf'
|
||||
output_folder1 = r'C:\Users\Administrator\Desktop\fsdownload\4fb2f541-29c3-497f-9f0a-5216a10591a1\tmp'
|
||||
output_folder2 = r'C:\Users\Administrator\Desktop\fsdownload\4fb2f541-29c3-497f-9f0a-5216a10591a1\tmp'
|
||||
output_folder3=r'C:\Users\Administrator\Desktop\fsdownload\2c3e2291-6804-4ef0-b4a8-6f457edd5709\tmp'
|
||||
mem_before = memory_usage()[0]
|
||||
if i % 3 == 0:
|
||||
output_folder = output_folder1
|
||||
pdf_path = pdf_path1
|
||||
elif i %3 ==1:
|
||||
output_folder = output_folder2
|
||||
pdf_path = pdf_path2
|
||||
else:
|
||||
output_folder = output_folder3
|
||||
pdf_path = pdf_path3
|
||||
result=preprocess_file_main(output_folder, pdf_path, 2,logger)
|
||||
process_pdf(pdf_path)
|
||||
response_data = {
|
||||
'result': {
|
||||
'procurement_spec': result['procurement_path'],
|
||||
'evaluation_method': result['evaluation_method_path'],
|
||||
'qualification_docs': result['qualification_path'],
|
||||
'notice_docs': result['notice_path'],
|
||||
'clause_details': result['clause_path'],
|
||||
'merged_baseinfo': result['merged_baseinfo_path']
|
||||
}
|
||||
}
|
||||
print(response_data)
|
||||
mem_after = memory_usage()[0]
|
||||
print(f"Memory before: {mem_before} MiB, Memory after: {mem_after} MiB")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
x
Reference in New Issue
Block a user