This commit is contained in:
zy123 2024-08-30 11:56:11 +08:00
parent 6079b41cc7
commit 1d0211ce72
3 changed files with 24 additions and 13 deletions

View File

@ -38,7 +38,7 @@ def download_file(url, local_filename):
if __name__ == '__main__':
# 测试下载的URL
test_url ="https://temp-pdf2docx.oss-cn-wuhan-lr.aliyuncs.com/docx/zbfile.docx?Expires=1724866978&OSSAccessKeyId=TMP.3KhJJmRnpG3r3FKwULgxRm7pfH2wHVDgwo7HotjD9j3w23omXG1mwrnBtP7n1G6j4HWW6CURq7JHqZ4kmC6RBMAZFcoDsw&Signature=LMczkwe6nVNbAHX4xvgCs8MtZ48%3D"
test_url ="https://temp-pdf2docx.oss-cn-wuhan-lr.aliyuncs.com/docx/zbfile.docx?Expires=1725019436&OSSAccessKeyId=TMP.3KjfvBwPjtUPCu4BTNdkuN6BEvSbm1ibnrnTQX4ZdpSjCLX99a2Pq9bV52aA8JysVrbCZwhyuVjeMdJgdgxkqgPhwQfQoV&Signature=kXhJZZouEb82jQlhCwCpbm5%2Furs%3D"
local_file_name = 'C:\\Users\\Administrator\\Desktop\\招标文件\\output\\downloaded_file'
file_path = download_file(test_url, local_file_name)
if file_path:

View File

@ -12,8 +12,11 @@ from flask_app.main.download import download_file
from flask_app.main.招标文件解析 import main_processing
app = Flask(__name__)
class CSTFormatter(logging.Formatter):
"""自定义的 Formatter将日志的时间戳调整为中国标准时间UTC+8"""
def formatTime(self, record, datefmt=None):
ct = datetime.fromtimestamp(record.created) + timedelta(hours=8)
if datefmt:
@ -27,9 +30,10 @@ class CSTFormatter(logging.Formatter):
s = ct.strftime("%Y-%m-%d %H:%M:%S")
return s
def create_logger(unique_id):
"""为每个请求创建一个新的日志器,日志器的日志文件存放在指定的输出文件夹中"""
output_folder = f"/ZbparseProjects/static/output/{unique_id}"
output_folder = f"flask_app/static/output/{unique_id}"
# output_folder =f"C:/Users/Administrator/Desktop/招标文件/test/{unique_id}"
if not os.path.exists(output_folder):
os.makedirs(output_folder, exist_ok=True)
@ -52,22 +56,25 @@ def create_logger(unique_id):
logger.setLevel(logging.INFO)
return logger, output_folder
@app.route('/upload', methods=['POST'])
def zbparse():
file_url = validate_request()
if isinstance(file_url, tuple): # Check if the returned value is an error response
return file_url
try:
app.logger.info("starting parsing url:"+file_url)
final_json_path, output_folder,logger = download_and_process_file(file_url)
app.logger.info("starting parsing url:" + file_url)
final_json_path, output_folder, logger = download_and_process_file(file_url)
if not final_json_path:
return jsonify({'error': 'File processing failed'}), 500
response = generate_response(final_json_path,logger) # 先获取响应内容
response = generate_response(final_json_path, logger) # 先获取响应内容
# remove_directory(output_folder) # 然后删除文件夹
return response # 最后返回获取的响应
except Exception as e:
app.logger.error('Exception occurred: ' + str(e)) # 使用全局 logger 记录
return jsonify({'error': str(e)}), 500
# def zbparse():
# file_url = validate_request()
# if isinstance(file_url, tuple): # Check if the returned value is an error response
@ -115,6 +122,7 @@ def process_and_stream(file_url):
}
yield f"data: {json.dumps(final_response)}\n\n"
def validate_request():
if not request.is_json:
return jsonify({'error': 'Missing JSON in request'}), 400
@ -123,6 +131,7 @@ def validate_request():
return jsonify({'error': 'No file URL provided'}), 400
return file_url
def download_and_process_file(file_url):
unique_id = str(uuid.uuid4()) # 生成一个唯一的 UUID
logger, output_folder = create_logger(unique_id)
@ -130,15 +139,16 @@ def download_and_process_file(file_url):
downloaded_filename = os.path.join(output_folder, filename)
# 下载文件,假设 download_file 函数已正确处理异常并返回文件路径
downloaded_filepath,file_type = download_file(file_url, downloaded_filename)
downloaded_filepath, file_type = download_file(file_url, downloaded_filename)
if downloaded_filepath is None or file_type == 3:
logger.error("Unsupported file type or failed to download file")
return None, output_folder, logger
logger.info("Local file path: " + downloaded_filepath)
processed_file_path = main_processing(output_folder, downloaded_filepath,file_type, unique_id)
return processed_file_path, output_folder,logger
processed_file_path = main_processing(output_folder, downloaded_filepath, file_type, unique_id)
return processed_file_path, output_folder, logger
@app.route('/api/test_zbparse', methods=['POST'])
def test_zbparse():
@ -148,6 +158,7 @@ def test_zbparse():
app.logger.error('Exception occurred: ' + str(e))
return jsonify({'error': str(e)}), 500
def test_process_and_stream():
# 模拟五段数据
data_segments = [
@ -178,12 +189,12 @@ def test_process_and_stream():
yield f"data: {json.dumps(final_response)}\n\n"
def generate_response(final_json_path,logger):
def generate_response(final_json_path, logger):
if not os.path.exists(final_json_path):
logger.error('JSON file not found at path: ' + final_json_path)
return jsonify({'error': 'JSON file not found'}), 404
with open(final_json_path, 'r', encoding='utf-8') as f:
logger.info('final_json_path:'+final_json_path)
logger.info('final_json_path:' + final_json_path)
zbparse_data = json.load(f)
json_str = json.dumps(zbparse_data, ensure_ascii=False)
return jsonify({
@ -215,5 +226,6 @@ def remove_directory(path):
except Exception as e:
app.logger.error(f"Failed to remove directory {path}: {str(e)}") # 使用全局 logger 记录
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=5000)

View File

@ -33,9 +33,8 @@ def download_file(url, local_filename):
if __name__ == '__main__':
# 测试下载的URL
test_url = "https://bid-assistance.oss-cn-wuhan-lr.aliyuncs.com/tender/28f7c0af7c7041bbbdf88ce6848e8a38.pdf?Expires=1722165340&OSSAccessKeyId=TMP.3KfNYFQchGtZWbjd2M1jR6y7PPqYTq1QLZ4pzbfEwkz3LwGLepVvr9371bndcRoMhHFhohaUJxrhiL63jKoAZk6VWQfwh4&Signature=RmktXAOwEbP1BBrkSfARfHtuXh8%3D"
# 基本的本地文件名,不包括扩展名
local_file_name = 'C:\\Users\\zhangsan\\Desktop\\temp\\downloaded_file'
test_url = "https://temp-pdf2docx.oss-cn-wuhan-lr.aliyuncs.com/docx/zbfile.docx?Expires=1725019436&OSSAccessKeyId=TMP.3KjfvBwPjtUPCu4BTNdkuN6BEvSbm1ibnrnTQX4ZdpSjCLX99a2Pq9bV52aA8JysVrbCZwhyuVjeMdJgdgxkqgPhwQfQoV&Signature=kXhJZZouEb82jQlhCwCpbm5%2Furs%3D" # 基本的本地文件名,不包括扩展名
local_file_name = 'C:\\Users\\Administrator\\Desktop\\tmp\\download.docx'
file_path = download_file(test_url, local_file_name)
if file_path:
print(f"Downloaded file path: {file_path}")