zbparse/flask_app/main/start_up.py

232 lines
8.5 KiB
Python
Raw Normal View History

2024-08-29 16:37:09 +08:00
import logging
import shutil
import sys
import time
import uuid
from datetime import datetime, timedelta
2024-08-29 17:30:49 +08:00
from flask import Flask, request, jsonify, Response, stream_with_context
2024-08-29 16:37:09 +08:00
import json
import os
2024-08-29 17:30:49 +08:00
from flask_app.main.download import download_file
from flask_app.main.招标文件解析 import main_processing
2024-08-29 16:37:09 +08:00
app = Flask(__name__)
2024-08-30 11:56:11 +08:00
2024-08-29 16:37:09 +08:00
class CSTFormatter(logging.Formatter):
"""自定义的 Formatter将日志的时间戳调整为中国标准时间UTC+8"""
2024-08-30 11:56:11 +08:00
2024-08-29 16:37:09 +08:00
def formatTime(self, record, datefmt=None):
ct = datetime.fromtimestamp(record.created) + timedelta(hours=8)
if datefmt:
s = ct.strftime(datefmt)
else:
try:
s = ct.strftime("%Y-%m-%d %H:%M:%S")
if self.usesTime():
s = f"{s},{record.msecs:03d}"
except ValueError:
s = ct.strftime("%Y-%m-%d %H:%M:%S")
return s
2024-08-30 11:56:11 +08:00
2024-08-29 16:37:09 +08:00
def create_logger(unique_id):
"""为每个请求创建一个新的日志器,日志器的日志文件存放在指定的输出文件夹中"""
2024-08-30 11:56:11 +08:00
output_folder = f"flask_app/static/output/{unique_id}"
2024-08-29 16:37:09 +08:00
# output_folder =f"C:/Users/Administrator/Desktop/招标文件/test/{unique_id}"
if not os.path.exists(output_folder):
os.makedirs(output_folder, exist_ok=True)
log_filename = "log.txt"
log_path = os.path.join(output_folder, log_filename)
logger = logging.getLogger(unique_id) # 使用 unique_id 作为日志器名字
if not logger.handlers: # 避免重复添加处理器
# 文件处理器
file_handler = logging.FileHandler(log_path)
file_formatter = CSTFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(file_formatter)
logger.addHandler(file_handler)
# 流处理器(控制台输出)
stream_handler = logging.StreamHandler(sys.stdout)
stream_formatter = logging.Formatter('%(message)s') # 简化的格式,只输出消息
stream_handler.setFormatter(stream_formatter)
logger.addHandler(stream_handler)
logger.setLevel(logging.INFO)
return logger, output_folder
2024-08-30 11:56:11 +08:00
2024-08-29 16:37:09 +08:00
@app.route('/upload', methods=['POST'])
def zbparse():
file_url = validate_request()
if isinstance(file_url, tuple): # Check if the returned value is an error response
return file_url
try:
2024-08-30 11:56:11 +08:00
app.logger.info("starting parsing url:" + file_url)
final_json_path, output_folder, logger = download_and_process_file(file_url)
2024-08-30 09:53:04 +08:00
if not final_json_path:
return jsonify({'error': 'File processing failed'}), 500
2024-08-30 11:56:11 +08:00
response = generate_response(final_json_path, logger) # 先获取响应内容
2024-08-30 09:53:04 +08:00
# remove_directory(output_folder) # 然后删除文件夹
return response # 最后返回获取的响应
2024-08-29 16:37:09 +08:00
except Exception as e:
2024-08-30 09:53:04 +08:00
app.logger.error('Exception occurred: ' + str(e)) # 使用全局 logger 记录
2024-08-29 16:37:09 +08:00
return jsonify({'error': str(e)}), 500
2024-08-30 11:56:11 +08:00
2024-08-30 09:53:04 +08:00
# def zbparse():
# file_url = validate_request()
# if isinstance(file_url, tuple): # Check if the returned value is an error response
# return file_url
# try:
# app.logger.info("starting parsing url:" + file_url)
# return Response(stream_with_context(process_and_stream(file_url)), content_type='text/event-stream')
# except Exception as e:
# app.logger.error('Exception occurred: ' + str(e))
# return jsonify({'error': str(e)}), 500
2024-08-29 16:37:09 +08:00
def process_and_stream(file_url):
unique_id = str(uuid.uuid4())
logger, output_folder = create_logger(unique_id)
filename = "ztbfile"
downloaded_filename = os.path.join(output_folder, filename)
downloaded_filepath, file_type = download_file(file_url, downloaded_filename)
if downloaded_filepath is None or file_type == 3:
logger.error("Unsupported file type or failed to download file")
error_response = {
'message': 'File processing failed',
'filename': None,
'data': json.dumps({'error': 'File processing failed'})
}
yield f"data: {json.dumps(error_response)}\n\n"
return
logger.info("Local file path: " + downloaded_filepath)
for data in main_processing(output_folder, downloaded_filepath, file_type, unique_id):
response = {
'message': 'Processing',
'filename': os.path.basename(downloaded_filepath),
'data': data
}
yield f"data: {json.dumps(response)}\n\n"
final_response = {
'message': 'File uploaded and processed successfully',
'filename': os.path.basename(downloaded_filepath),
'data': 'END'
}
yield f"data: {json.dumps(final_response)}\n\n"
2024-08-30 11:56:11 +08:00
2024-08-29 16:37:09 +08:00
def validate_request():
if not request.is_json:
return jsonify({'error': 'Missing JSON in request'}), 400
file_url = request.json.get('file_url')[0]
if not file_url:
return jsonify({'error': 'No file URL provided'}), 400
return file_url
2024-08-30 11:56:11 +08:00
2024-08-29 16:37:09 +08:00
def download_and_process_file(file_url):
unique_id = str(uuid.uuid4()) # 生成一个唯一的 UUID
logger, output_folder = create_logger(unique_id)
filename = "ztbfile"
downloaded_filename = os.path.join(output_folder, filename)
# 下载文件,假设 download_file 函数已正确处理异常并返回文件路径
2024-08-30 11:56:11 +08:00
downloaded_filepath, file_type = download_file(file_url, downloaded_filename)
2024-08-29 16:37:09 +08:00
if downloaded_filepath is None or file_type == 3:
logger.error("Unsupported file type or failed to download file")
return None, output_folder, logger
logger.info("Local file path: " + downloaded_filepath)
2024-08-30 11:56:11 +08:00
processed_file_path = main_processing(output_folder, downloaded_filepath, file_type, unique_id)
return processed_file_path, output_folder, logger
2024-08-29 16:37:09 +08:00
@app.route('/api/test_zbparse', methods=['POST'])
def test_zbparse():
try:
return Response(stream_with_context(test_process_and_stream()), content_type='text/event-stream')
except Exception as e:
app.logger.error('Exception occurred: ' + str(e))
return jsonify({'error': str(e)}), 500
2024-08-30 11:56:11 +08:00
2024-08-29 16:37:09 +08:00
def test_process_and_stream():
# 模拟五段数据
data_segments = [
{"base_info": {"project_name": "测试项目1", "project_code": "TP001"}},
{"review_standards": ["标准1", "标准2", "标准3"]},
{"evaluation_standards": ["评估标准A", "评估标准B"]},
{"invalid_requirements": ["无效要求X", "无效要求Y"]},
{"bidding_documents_requirements": ["文件要求1", "文件要求2"]}
]
filename = "test_file.pdf"
for i, data in enumerate(data_segments, 1):
response = {
'message': f'Processing segment {i}',
'filename': filename,
'data': data
}
yield f"data: {json.dumps(response)}\n\n"
time.sleep(5) # 每隔2秒发送一段数据
# 发送结束信号
final_response = {
'message': 'File processed successfully',
'filename': filename,
'data': 'END'
}
yield f"data: {json.dumps(final_response)}\n\n"
2024-08-30 11:56:11 +08:00
def generate_response(final_json_path, logger):
2024-08-30 09:53:04 +08:00
if not os.path.exists(final_json_path):
logger.error('JSON file not found at path: ' + final_json_path)
return jsonify({'error': 'JSON file not found'}), 404
with open(final_json_path, 'r', encoding='utf-8') as f:
2024-08-30 11:56:11 +08:00
logger.info('final_json_path:' + final_json_path)
2024-08-30 09:53:04 +08:00
zbparse_data = json.load(f)
json_str = json.dumps(zbparse_data, ensure_ascii=False)
return jsonify({
'message': 'File uploaded and processed successfully',
'filename': os.path.basename(final_json_path),
'data': json_str
})
2024-08-29 16:37:09 +08:00
# @app.route('/get_json', methods=['POST'])
# def testjson():
# final_json_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\temp4\\fd55f067-2cf6-475c-b7ce-4498f6606bf6\\final_result.json"
# with open(final_json_path, 'r', encoding='utf-8') as f:
# print('final_json_path:'+final_json_path)
# zbparse_data = json.load(f)
# json_str = json.dumps(zbparse_data, ensure_ascii=False)
# print(json_str)
# return jsonify({
# 'message': 'File uploaded and processed successfully',
# 'filename': os.path.basename(final_json_path),
# 'data': json_str
# })
def remove_directory(path):
try:
shutil.rmtree(path)
app.logger.info(f"Successfully removed directory: {path}") # 使用全局 logger 记录
except Exception as e:
app.logger.error(f"Failed to remove directory {path}: {str(e)}") # 使用全局 logger 记录
2024-08-30 11:56:11 +08:00
2024-08-29 16:37:09 +08:00
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=5000)