From 1d0211ce720648019e6b0aeee93e732736de1019 Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Fri, 30 Aug 2024 11:56:11 +0800 Subject: [PATCH] 8.30 --- flask_app/main/download.py | 2 +- flask_app/main/start_up.py | 30 ++++++++++++++++++++--------- flask_app/main/转化格式/download.py | 5 ++--- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/flask_app/main/download.py b/flask_app/main/download.py index 31695ea..5d5a22f 100644 --- a/flask_app/main/download.py +++ b/flask_app/main/download.py @@ -38,7 +38,7 @@ def download_file(url, local_filename): if __name__ == '__main__': # 测试下载的URL - test_url ="https://temp-pdf2docx.oss-cn-wuhan-lr.aliyuncs.com/docx/zbfile.docx?Expires=1724866978&OSSAccessKeyId=TMP.3KhJJmRnpG3r3FKwULgxRm7pfH2wHVDgwo7HotjD9j3w23omXG1mwrnBtP7n1G6j4HWW6CURq7JHqZ4kmC6RBMAZFcoDsw&Signature=LMczkwe6nVNbAHX4xvgCs8MtZ48%3D" + test_url ="https://temp-pdf2docx.oss-cn-wuhan-lr.aliyuncs.com/docx/zbfile.docx?Expires=1725019436&OSSAccessKeyId=TMP.3KjfvBwPjtUPCu4BTNdkuN6BEvSbm1ibnrnTQX4ZdpSjCLX99a2Pq9bV52aA8JysVrbCZwhyuVjeMdJgdgxkqgPhwQfQoV&Signature=kXhJZZouEb82jQlhCwCpbm5%2Furs%3D" local_file_name = 'C:\\Users\\Administrator\\Desktop\\招标文件\\output\\downloaded_file' file_path = download_file(test_url, local_file_name) if file_path: diff --git a/flask_app/main/start_up.py b/flask_app/main/start_up.py index a334dc9..0df0488 100644 --- a/flask_app/main/start_up.py +++ b/flask_app/main/start_up.py @@ -12,8 +12,11 @@ from flask_app.main.download import download_file from flask_app.main.招标文件解析 import main_processing app = Flask(__name__) + + class CSTFormatter(logging.Formatter): """自定义的 Formatter,将日志的时间戳调整为中国标准时间(UTC+8)""" + def formatTime(self, record, datefmt=None): ct = datetime.fromtimestamp(record.created) + timedelta(hours=8) if datefmt: @@ -27,9 +30,10 @@ class CSTFormatter(logging.Formatter): s = ct.strftime("%Y-%m-%d %H:%M:%S") return s + def create_logger(unique_id): """为每个请求创建一个新的日志器,日志器的日志文件存放在指定的输出文件夹中""" - output_folder = f"/ZbparseProjects/static/output/{unique_id}" + output_folder = f"flask_app/static/output/{unique_id}" # output_folder =f"C:/Users/Administrator/Desktop/招标文件/test/{unique_id}" if not os.path.exists(output_folder): os.makedirs(output_folder, exist_ok=True) @@ -52,22 +56,25 @@ def create_logger(unique_id): logger.setLevel(logging.INFO) return logger, output_folder + @app.route('/upload', methods=['POST']) def zbparse(): file_url = validate_request() if isinstance(file_url, tuple): # Check if the returned value is an error response return file_url try: - app.logger.info("starting parsing url:"+file_url) - final_json_path, output_folder,logger = download_and_process_file(file_url) + app.logger.info("starting parsing url:" + file_url) + final_json_path, output_folder, logger = download_and_process_file(file_url) if not final_json_path: return jsonify({'error': 'File processing failed'}), 500 - response = generate_response(final_json_path,logger) # 先获取响应内容 + response = generate_response(final_json_path, logger) # 先获取响应内容 # remove_directory(output_folder) # 然后删除文件夹 return response # 最后返回获取的响应 except Exception as e: app.logger.error('Exception occurred: ' + str(e)) # 使用全局 logger 记录 return jsonify({'error': str(e)}), 500 + + # def zbparse(): # file_url = validate_request() # if isinstance(file_url, tuple): # Check if the returned value is an error response @@ -115,6 +122,7 @@ def process_and_stream(file_url): } yield f"data: {json.dumps(final_response)}\n\n" + def validate_request(): if not request.is_json: return jsonify({'error': 'Missing JSON in request'}), 400 @@ -123,6 +131,7 @@ def validate_request(): return jsonify({'error': 'No file URL provided'}), 400 return file_url + def download_and_process_file(file_url): unique_id = str(uuid.uuid4()) # 生成一个唯一的 UUID logger, output_folder = create_logger(unique_id) @@ -130,15 +139,16 @@ def download_and_process_file(file_url): downloaded_filename = os.path.join(output_folder, filename) # 下载文件,假设 download_file 函数已正确处理异常并返回文件路径 - downloaded_filepath,file_type = download_file(file_url, downloaded_filename) + downloaded_filepath, file_type = download_file(file_url, downloaded_filename) if downloaded_filepath is None or file_type == 3: logger.error("Unsupported file type or failed to download file") return None, output_folder, logger logger.info("Local file path: " + downloaded_filepath) - processed_file_path = main_processing(output_folder, downloaded_filepath,file_type, unique_id) - return processed_file_path, output_folder,logger + processed_file_path = main_processing(output_folder, downloaded_filepath, file_type, unique_id) + return processed_file_path, output_folder, logger + @app.route('/api/test_zbparse', methods=['POST']) def test_zbparse(): @@ -148,6 +158,7 @@ def test_zbparse(): app.logger.error('Exception occurred: ' + str(e)) return jsonify({'error': str(e)}), 500 + def test_process_and_stream(): # 模拟五段数据 data_segments = [ @@ -178,12 +189,12 @@ def test_process_and_stream(): yield f"data: {json.dumps(final_response)}\n\n" -def generate_response(final_json_path,logger): +def generate_response(final_json_path, logger): if not os.path.exists(final_json_path): logger.error('JSON file not found at path: ' + final_json_path) return jsonify({'error': 'JSON file not found'}), 404 with open(final_json_path, 'r', encoding='utf-8') as f: - logger.info('final_json_path:'+final_json_path) + logger.info('final_json_path:' + final_json_path) zbparse_data = json.load(f) json_str = json.dumps(zbparse_data, ensure_ascii=False) return jsonify({ @@ -215,5 +226,6 @@ def remove_directory(path): except Exception as e: app.logger.error(f"Failed to remove directory {path}: {str(e)}") # 使用全局 logger 记录 + if __name__ == '__main__': app.run(debug=True, host='0.0.0.0', port=5000) diff --git a/flask_app/main/转化格式/download.py b/flask_app/main/转化格式/download.py index 9e5a789..57b693d 100644 --- a/flask_app/main/转化格式/download.py +++ b/flask_app/main/转化格式/download.py @@ -33,9 +33,8 @@ def download_file(url, local_filename): if __name__ == '__main__': # 测试下载的URL - test_url = "https://bid-assistance.oss-cn-wuhan-lr.aliyuncs.com/tender/28f7c0af7c7041bbbdf88ce6848e8a38.pdf?Expires=1722165340&OSSAccessKeyId=TMP.3KfNYFQchGtZWbjd2M1jR6y7PPqYTq1QLZ4pzbfEwkz3LwGLepVvr9371bndcRoMhHFhohaUJxrhiL63jKoAZk6VWQfwh4&Signature=RmktXAOwEbP1BBrkSfARfHtuXh8%3D" - # 基本的本地文件名,不包括扩展名 - local_file_name = 'C:\\Users\\zhangsan\\Desktop\\temp\\downloaded_file' + test_url = "https://temp-pdf2docx.oss-cn-wuhan-lr.aliyuncs.com/docx/zbfile.docx?Expires=1725019436&OSSAccessKeyId=TMP.3KjfvBwPjtUPCu4BTNdkuN6BEvSbm1ibnrnTQX4ZdpSjCLX99a2Pq9bV52aA8JysVrbCZwhyuVjeMdJgdgxkqgPhwQfQoV&Signature=kXhJZZouEb82jQlhCwCpbm5%2Furs%3D" # 基本的本地文件名,不包括扩展名 + local_file_name = 'C:\\Users\\Administrator\\Desktop\\tmp\\download.docx' file_path = download_file(test_url, local_file_name) if file_path: print(f"Downloaded file path: {file_path}")