From 6a2b1c2ffb6f29cea3647b34bb0b374cfc60c7cb Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Wed, 19 Feb 2025 09:41:38 +0800 Subject: [PATCH 01/10] =?UTF-8?q?2.19=20=E4=BD=BF=E7=94=A8=E8=84=9A?= =?UTF-8?q?=E6=9C=AC=E9=A2=9D=E5=A4=96=E5=88=A4=E6=96=ADdocx=E6=96=87?= =?UTF-8?q?=E6=A1=A3=E6=98=AF=E5=90=A6=E4=B8=BA=E4=B9=B1=E7=A0=81=E6=96=87?= =?UTF-8?q?=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_app/general/读取文件/按页读取pdf.py | 6 +-- flask_app/old_version/资格评审前判断_old.py | 4 +- flask_app/routes/判断是否是招标文件.py | 56 ++++++++++++--------- 3 files changed, 38 insertions(+), 28 deletions(-) diff --git a/flask_app/general/读取文件/按页读取pdf.py b/flask_app/general/读取文件/按页读取pdf.py index e44a222..c764a11 100644 --- a/flask_app/general/读取文件/按页读取pdf.py +++ b/flask_app/general/读取文件/按页读取pdf.py @@ -25,7 +25,7 @@ def extract_text_by_page_fitz(file_path): return result -def extract_text_by_page(file_path): +def extract_text_by_page_pypdf2(file_path): # common_header="" common_header = extract_common_header(file_path) # print(common_header) @@ -109,7 +109,7 @@ def save_extracted_text_to_txt(pdf_path, txt_path): """ try: # 提取文本内容 - extracted_text = extract_text_by_page(pdf_path) + extracted_text = extract_text_by_page_pypdf2(pdf_path) # 将提取的文本写入TXT文件 with open(txt_path, 'w', encoding='utf-8') as txt_file: @@ -203,7 +203,7 @@ if __name__ == '__main__': ress = extract_common_header(pdf_path) print(ress) print("-----------------") - extract_text_by_page(pdf_path) + extract_text_by_page_pypdf2(pdf_path) # res=extract_text_by_page_fitz(pdf_path) # print(res)磋商文件_tobidders_notice_part2.pdf # save_extracted_text_to_txt(file_path,"output.txt") diff --git a/flask_app/old_version/资格评审前判断_old.py b/flask_app/old_version/资格评审前判断_old.py index eb3dc87..384a767 100644 --- a/flask_app/old_version/资格评审前判断_old.py +++ b/flask_app/old_version/资格评审前判断_old.py @@ -1,9 +1,9 @@ -from flask_app.general.读取文件.按页读取pdf import extract_text_by_page +from flask_app.general.读取文件.按页读取pdf import extract_text_by_page_pypdf2 def check_strings_in_pdf(file_path): judge_list=['施工机械设备', '企业信息登记'] # Read text from PDF - text = extract_text_by_page(file_path) # Assuming this returns all text from the PDF + text = extract_text_by_page_pypdf2(file_path) # Assuming this returns all text from the PDF full_text = ''.join(text).replace('\n', '').replace(' ', '') # Clean up the text # Initialize the questions list diff --git a/flask_app/routes/判断是否是招标文件.py b/flask_app/routes/判断是否是招标文件.py index 94d7b83..2becb60 100644 --- a/flask_app/routes/判断是否是招标文件.py +++ b/flask_app/routes/判断是否是招标文件.py @@ -1,7 +1,7 @@ import time from PyPDF2 import PdfReader # 确保已安装 PyPDF2: pip install PyPDF2 from docx import Document - +import fitz from flask_app.general.llm.通义千问long import upload_file, qianwen_long def judge_zbfile_exec(file_path): @@ -12,28 +12,38 @@ def judge_zbfile_exec(file_path): start_time = time.time() # 检查文件是否为PDF格式 if file_path.lower().endswith('.pdf'): - # 使用 with 语句确保文件关闭 - with open(file_path, 'rb') as f: - reader = PdfReader(f) - num_pages = len(reader.pages) - if num_pages <= 5: - return False - elif file_path.lower().endswith('.docx'): - doc = Document(file_path) - accumulated_text = "" - chunk_size = 10 # 每次读取10个段落 - paragraphs = doc.paragraphs + try: + with open(file_path, 'rb') as f: + reader = PdfReader(f) + num_pages = len(reader.pages) + except Exception: + try: + doc = fitz.open(file_path) + num_pages = len(doc) + except Exception: + print("PDF 文件读取失败") + return False # 两种解析方式都失败,直接返回 False - for i in range(0, len(paragraphs), chunk_size): - chunk = paragraphs[i:i + chunk_size] - for para in chunk: - accumulated_text += para.text - # 判断累计字符数是否已经达到1000字, - if len(accumulated_text) >= 1000: - break - # 若累计内容不足1000字,则直接返回False - if len(accumulated_text) < 1000: - return False + if num_pages <= 5: + return False # 小于等于 5 页的 PDF 直接判定为非招标文件 + elif file_path.lower().endswith('.docx'): + try: + doc = Document(file_path) + accumulated_text = "" + chunk_size = 10 # 每次读取10个段落 + paragraphs = doc.paragraphs + + for i in range(0, len(paragraphs), chunk_size): + chunk = paragraphs[i:i + chunk_size] + for para in chunk: + accumulated_text += para.text + if len(accumulated_text) >= 1000: + break # 读取超过1000字后即可停止 + if len(accumulated_text) < 1000: + return False # 若累计内容不足1000字,则直接返回 False + except Exception: + print("DOCX 文件读取失败,可能为乱码文件") + return False # 解析失败直接返回 False # 使用大模型进行判断 user_query = """该文件是否属于招标文件?如果是的话,请返回'是',如果不是的话,返回'否'。请不要返回其他解释或内容。 以下是常见的招标文件类型: @@ -58,7 +68,7 @@ def judge_zbfile_exec(file_path): if __name__ == '__main__': start_time = time.time() - pdf_path = r"C:\Users\Administrator\Downloads\file1739842556194.docx" + pdf_path = r"C:\Users\Administrator\Desktop\fsdownload\19f53a17-ad4c-43b5-a7ed-981958ec3e0fs\ztbfile.docx" res = judge_zbfile_exec(pdf_path) if res: print("yes") From 4b378b27c203315ba8c05b06c69f6e7f1ed4ca2f Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Wed, 19 Feb 2025 11:37:22 +0800 Subject: [PATCH 02/10] =?UTF-8?q?2.19=20=E5=88=A4=E6=96=AD=E6=98=AF?= =?UTF-8?q?=E5=90=A6=E4=B8=BA=E6=8B=9B=E6=A0=87=E6=96=87=E4=BB=B6=EF=BC=9A?= =?UTF-8?q?=E5=88=9B=E5=BB=BA=E8=BF=9B=E7=A8=8B=E6=B1=A0+=E7=83=AD?= =?UTF-8?q?=E5=90=AF=E5=8A=A8=EF=BC=8C=E8=A7=A3=E5=86=B3=E5=86=85=E5=AD=98?= =?UTF-8?q?=E6=B3=84=E6=BC=8F=E9=97=AE=E9=A2=98=E7=9A=84=E5=90=8C=E6=97=B6?= =?UTF-8?q?=E6=8F=90=E5=8D=87=E9=80=9F=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_app/logger_setup.py | 6 +++--- flask_app/routes/judge_zbfile.py | 14 ++++++++++---- flask_app/routes/判断是否是招标文件.py | 2 ++ flask_app/run_serve.py | 20 ++++++++++++++++++-- flask_app/start_up.py | 7 +++---- 5 files changed, 36 insertions(+), 13 deletions(-) diff --git a/flask_app/logger_setup.py b/flask_app/logger_setup.py index 4d41753..ea725e1 100644 --- a/flask_app/logger_setup.py +++ b/flask_app/logger_setup.py @@ -66,9 +66,9 @@ def create_logger(app, subfolder): file_formatter = CSTFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') file_handler.setFormatter(file_formatter) logger.addHandler(file_handler) - # stream_handler = logging.StreamHandler() - # stream_handler.setFormatter(logging.Formatter('%(message)s')) - # logger.addHandler(stream_handler) + stream_handler = logging.StreamHandler() + stream_handler.setFormatter(logging.Formatter('%(message)s')) + logger.addHandler(stream_handler) logger.setLevel(logging.INFO) #Logger 只会处理大于或等于 INFO 级别的日志消息(例如 INFO、WARNING、ERROR、CRITICAL),而 DEBUG 级别的消息会被忽略。 logger.propagate = False g.logger = logger diff --git a/flask_app/routes/judge_zbfile.py b/flask_app/routes/judge_zbfile.py index 8e6884a..9f31205 100644 --- a/flask_app/routes/judge_zbfile.py +++ b/flask_app/routes/judge_zbfile.py @@ -1,9 +1,10 @@ +import multiprocessing import os import threading import time from enum import Enum from typing import Any -from flask import Blueprint, g +from flask import Blueprint, g, current_app from flask_app.general.format_change import download_file from flask_app.routes.判断是否是招标文件 import judge_zbfile_exec from flask_app.routes.utils import validate_and_setup_logger, create_response_normal, log_error_unique_id @@ -26,7 +27,13 @@ def judge_zbfile() -> Any: #判断是否是招标文件 unique_id = g.unique_id result = [None] # 用于存储结果的可变对象 done = threading.Event() # 标志判断是否完成 - + pool = current_app.process_pool # 使用全局的进程池 + def judge_zbfile_exec_sub(file_path): + result = pool.apply( + judge_zbfile_exec, # 你的实际执行函数 + args=(file_path,) + ) + return result def wrapper() -> None: """ 包装整个 judge_zbfile 的函数逻辑 @@ -38,7 +45,6 @@ def judge_zbfile() -> Any: #判断是否是招标文件 downloaded_filepath, file_type = download_file(file_url, downloaded_filename,True) if not downloaded_filepath or file_type == 4: - logger.error("下载地址不存在或不支持的文件类型!") log_error_unique_id(unique_id, 4) result[0] = JudgeResult.ERROR return @@ -46,7 +52,7 @@ def judge_zbfile() -> Any: #判断是否是招标文件 logger.info(f"Local file path: {downloaded_filepath}") # 调用实际的判断函数 - judge_result = judge_zbfile_exec(downloaded_filepath) + judge_result = judge_zbfile_exec_sub(downloaded_filepath) judge = JudgeResult.YES if judge_result else JudgeResult.NO end_time = time.time() diff --git a/flask_app/routes/判断是否是招标文件.py b/flask_app/routes/判断是否是招标文件.py index 2becb60..d608b40 100644 --- a/flask_app/routes/判断是否是招标文件.py +++ b/flask_app/routes/判断是否是招标文件.py @@ -44,6 +44,8 @@ def judge_zbfile_exec(file_path): except Exception: print("DOCX 文件读取失败,可能为乱码文件") return False # 解析失败直接返回 False + pre_endtime=time.time() + print(f"judge_zbfile_exec预处理耗时:{pre_endtime - start_time:.2f} 秒") # 使用大模型进行判断 user_query = """该文件是否属于招标文件?如果是的话,请返回'是',如果不是的话,返回'否'。请不要返回其他解释或内容。 以下是常见的招标文件类型: diff --git a/flask_app/run_serve.py b/flask_app/run_serve.py index 14a137a..233e441 100644 --- a/flask_app/run_serve.py +++ b/flask_app/run_serve.py @@ -1,8 +1,24 @@ #flask_app/run_serve.py +import threading +import time + +import requests from waitress import serve from flask_app.start_up import create_app - +def warmup_request(): + # 等待服务器完全启动,例如等待 1-2 秒 + time.sleep(5) + try: + url = "http://127.0.0.1:5000/judge_zbfile" + #url必须为永久地址,完成热启动,创建进程池 + payload = {"file_url": "https://intellbid-open.oss-cn-wuhan-lr.aliyuncs.com/test/094%E5%AE%9A%E7%A8%BF-%E6%B9%96%E5%8C%97%E5%B7%A5%E4%B8%9A%E5%A4%A7%E5%AD%A6%E8%BD%BB%E6%AD%A6%E5%99%A8%E6%A8%A1%E6%8B%9F%E5%B0%84%E5%87%BB%E8%AE%BE%E5%A4%87%E9%87%87%E8%B4%AD%E9%A1%B9%E7%9B%AE%E6%8B%9B%E6%A0%87%E6%96%87%E4%BB%B6.pdf"} # 根据实际情况设置 file_url + headers = {"Content-Type": "application/json"} + response = requests.post(url, json=payload, headers=headers) + print(f"Warm-up 请求发送成功,状态码:{response.status_code}") + except Exception as e: + print(f"Warm-up 请求出错:{e}") if __name__ == "__main__": app = create_app() - serve(app, host='0.0.0.0', port=5000) + threading.Thread(target=warmup_request, daemon=True).start() + serve(app, host='0.0.0.0', port=5000) \ No newline at end of file diff --git a/flask_app/start_up.py b/flask_app/start_up.py index 4ecb897..7eb140c 100644 --- a/flask_app/start_up.py +++ b/flask_app/start_up.py @@ -1,6 +1,6 @@ # flask_app/start_up.py -import gc -import os +from concurrent.futures import ProcessPoolExecutor +from multiprocessing import Pool from flask import Flask, g from flask_app.ConnectionLimiter import ConnectionLimiter @@ -13,11 +13,10 @@ from flask_app.routes.test_zbparse import test_zbparse_bp from flask_app.general.llm.清除file_id import delete_file_by_ids,read_file_ids from flask_app.routes.judge_zbfile import judge_zbfile_bp from flask_app.routes.test_preprocess import test_process_bp - - def create_app(): # 创建全局日志记录器 app = Flask(__name__) + app.process_pool = Pool(processes=10, maxtasksperchild=3) app.global_logger = create_logger_main('model_log') # 全局日志记录器 # 注册蓝图 app.register_blueprint(get_deviation_bp) From 07725ac868619bcd76c4dd27aa5d631ac0f72131 Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Wed, 19 Feb 2025 14:58:11 +0800 Subject: [PATCH 03/10] =?UTF-8?q?2.19=20=E5=88=A4=E6=96=AD=E6=98=AF?= =?UTF-8?q?=E5=90=A6=E6=8B=9B=E6=A0=87=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_app/start_up.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flask_app/start_up.py b/flask_app/start_up.py index 7eb140c..7de0925 100644 --- a/flask_app/start_up.py +++ b/flask_app/start_up.py @@ -16,7 +16,7 @@ from flask_app.routes.test_preprocess import test_process_bp def create_app(): # 创建全局日志记录器 app = Flask(__name__) - app.process_pool = Pool(processes=10, maxtasksperchild=3) + app.process_pool = Pool(processes=10, maxtasksperchild=1) app.global_logger = create_logger_main('model_log') # 全局日志记录器 # 注册蓝图 app.register_blueprint(get_deviation_bp) @@ -49,4 +49,4 @@ def create_app(): if __name__ == '__main__': app = create_app() - app.run(debug=True, host='0.0.0.0', port=5000) + app.run(debug=False, host='0.0.0.0', port=5000) From c1088699dede73768a97baf84c3a8b6924b1b9aa Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Wed, 19 Feb 2025 15:43:35 +0800 Subject: [PATCH 04/10] =?UTF-8?q?2.19=20=E5=88=A4=E6=96=AD=E6=98=AF?= =?UTF-8?q?=E5=90=A6=E6=8B=9B=E6=A0=87=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_app/start_up.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flask_app/start_up.py b/flask_app/start_up.py index 7de0925..819f0d3 100644 --- a/flask_app/start_up.py +++ b/flask_app/start_up.py @@ -16,7 +16,7 @@ from flask_app.routes.test_preprocess import test_process_bp def create_app(): # 创建全局日志记录器 app = Flask(__name__) - app.process_pool = Pool(processes=10, maxtasksperchild=1) + app.process_pool = Pool(processes=1, maxtasksperchild=1) app.global_logger = create_logger_main('model_log') # 全局日志记录器 # 注册蓝图 app.register_blueprint(get_deviation_bp) From f25cd9c75ec11026e7138593a98a27dace36d43b Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Wed, 19 Feb 2025 15:45:11 +0800 Subject: [PATCH 05/10] =?UTF-8?q?2.19=20=E5=88=A4=E6=96=AD=E6=98=AF?= =?UTF-8?q?=E5=90=A6=E6=8B=9B=E6=A0=87=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_app/start_up.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flask_app/start_up.py b/flask_app/start_up.py index 819f0d3..7de0925 100644 --- a/flask_app/start_up.py +++ b/flask_app/start_up.py @@ -16,7 +16,7 @@ from flask_app.routes.test_preprocess import test_process_bp def create_app(): # 创建全局日志记录器 app = Flask(__name__) - app.process_pool = Pool(processes=1, maxtasksperchild=1) + app.process_pool = Pool(processes=10, maxtasksperchild=1) app.global_logger = create_logger_main('model_log') # 全局日志记录器 # 注册蓝图 app.register_blueprint(get_deviation_bp) From 1795a4b2cf016a0e9739c9780436d839a875c0ce Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Wed, 19 Feb 2025 15:50:28 +0800 Subject: [PATCH 06/10] =?UTF-8?q?2.19=20=E5=88=A4=E6=96=AD=E6=98=AF?= =?UTF-8?q?=E5=90=A6=E6=8B=9B=E6=A0=87=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_app/routes/judge_zbfile.py | 10 +++++----- flask_app/run_serve.py | 2 +- flask_app/start_up.py | 1 - 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/flask_app/routes/judge_zbfile.py b/flask_app/routes/judge_zbfile.py index 9f31205..f1ed83e 100644 --- a/flask_app/routes/judge_zbfile.py +++ b/flask_app/routes/judge_zbfile.py @@ -27,12 +27,12 @@ def judge_zbfile() -> Any: #判断是否是招标文件 unique_id = g.unique_id result = [None] # 用于存储结果的可变对象 done = threading.Event() # 标志判断是否完成 - pool = current_app.process_pool # 使用全局的进程池 def judge_zbfile_exec_sub(file_path): - result = pool.apply( - judge_zbfile_exec, # 你的实际执行函数 - args=(file_path,) - ) + with multiprocessing.Pool(processes=1) as pool: + result = pool.apply( + judge_zbfile_exec, # 你的实际执行函数 + args=(file_path,) + ) return result def wrapper() -> None: """ diff --git a/flask_app/run_serve.py b/flask_app/run_serve.py index 233e441..f6d8d55 100644 --- a/flask_app/run_serve.py +++ b/flask_app/run_serve.py @@ -20,5 +20,5 @@ def warmup_request(): if __name__ == "__main__": app = create_app() - threading.Thread(target=warmup_request, daemon=True).start() + # threading.Thread(target=warmup_request, daemon=True).start() serve(app, host='0.0.0.0', port=5000) \ No newline at end of file diff --git a/flask_app/start_up.py b/flask_app/start_up.py index 7de0925..3106274 100644 --- a/flask_app/start_up.py +++ b/flask_app/start_up.py @@ -16,7 +16,6 @@ from flask_app.routes.test_preprocess import test_process_bp def create_app(): # 创建全局日志记录器 app = Flask(__name__) - app.process_pool = Pool(processes=10, maxtasksperchild=1) app.global_logger = create_logger_main('model_log') # 全局日志记录器 # 注册蓝图 app.register_blueprint(get_deviation_bp) From 5463a0b0d3e643109360456f422d82df725f4c08 Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Wed, 19 Feb 2025 16:09:50 +0800 Subject: [PATCH 07/10] =?UTF-8?q?2.19=20=E5=88=A4=E6=96=AD=E6=98=AF?= =?UTF-8?q?=E5=90=A6=E6=8B=9B=E6=A0=87=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_app/routes/judge_zbfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flask_app/routes/judge_zbfile.py b/flask_app/routes/judge_zbfile.py index f1ed83e..507c36e 100644 --- a/flask_app/routes/judge_zbfile.py +++ b/flask_app/routes/judge_zbfile.py @@ -52,7 +52,7 @@ def judge_zbfile() -> Any: #判断是否是招标文件 logger.info(f"Local file path: {downloaded_filepath}") # 调用实际的判断函数 - judge_result = judge_zbfile_exec_sub(downloaded_filepath) + judge_result = judge_zbfile_exec(downloaded_filepath) judge = JudgeResult.YES if judge_result else JudgeResult.NO end_time = time.time() From 0595a87efac0a609ad114727763ac8835c9fc883 Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Wed, 19 Feb 2025 16:17:33 +0800 Subject: [PATCH 08/10] =?UTF-8?q?2.19=20=E5=88=A4=E6=96=AD=E6=98=AF?= =?UTF-8?q?=E5=90=A6=E6=8B=9B=E6=A0=87=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_app/routes/judge_zbfile.py | 17 +++++++++++------ flask_app/start_up.py | 1 + 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/flask_app/routes/judge_zbfile.py b/flask_app/routes/judge_zbfile.py index 507c36e..ba43a31 100644 --- a/flask_app/routes/judge_zbfile.py +++ b/flask_app/routes/judge_zbfile.py @@ -27,12 +27,17 @@ def judge_zbfile() -> Any: #判断是否是招标文件 unique_id = g.unique_id result = [None] # 用于存储结果的可变对象 done = threading.Event() # 标志判断是否完成 + pool = current_app.process_pool # 使用全局的进程池 def judge_zbfile_exec_sub(file_path): - with multiprocessing.Pool(processes=1) as pool: - result = pool.apply( - judge_zbfile_exec, # 你的实际执行函数 - args=(file_path,) - ) + # with multiprocessing.Pool(processes=1) as pool: + # result = pool.apply( + # judge_zbfile_exec, # 你的实际执行函数 + # args=(file_path,) + # ) + result = pool.apply( + judge_zbfile_exec, # 你的实际执行函数 + args=(file_path,) + ) return result def wrapper() -> None: """ @@ -52,7 +57,7 @@ def judge_zbfile() -> Any: #判断是否是招标文件 logger.info(f"Local file path: {downloaded_filepath}") # 调用实际的判断函数 - judge_result = judge_zbfile_exec(downloaded_filepath) + judge_result = judge_zbfile_exec_sub(downloaded_filepath) judge = JudgeResult.YES if judge_result else JudgeResult.NO end_time = time.time() diff --git a/flask_app/start_up.py b/flask_app/start_up.py index 3106274..23b0fcf 100644 --- a/flask_app/start_up.py +++ b/flask_app/start_up.py @@ -16,6 +16,7 @@ from flask_app.routes.test_preprocess import test_process_bp def create_app(): # 创建全局日志记录器 app = Flask(__name__) + app.process_pool = Pool(processes=5, maxtasksperchild=1) app.global_logger = create_logger_main('model_log') # 全局日志记录器 # 注册蓝图 app.register_blueprint(get_deviation_bp) From 9435178b0e13f35776c06a2de2a682ac2ce0353b Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Wed, 19 Feb 2025 16:23:56 +0800 Subject: [PATCH 09/10] =?UTF-8?q?2.19=20=E5=88=A4=E6=96=AD=E6=98=AF?= =?UTF-8?q?=E5=90=A6=E6=8B=9B=E6=A0=87=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_app/routes/judge_zbfile.py | 2 +- flask_app/routes/判断是否是招标文件.py | 8 ++++---- flask_app/run_serve.py | 2 +- flask_app/start_up.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/flask_app/routes/judge_zbfile.py b/flask_app/routes/judge_zbfile.py index ba43a31..b34df9b 100644 --- a/flask_app/routes/judge_zbfile.py +++ b/flask_app/routes/judge_zbfile.py @@ -36,7 +36,7 @@ def judge_zbfile() -> Any: #判断是否是招标文件 # ) result = pool.apply( judge_zbfile_exec, # 你的实际执行函数 - args=(file_path,) + args=(file_path,logger,) ) return result def wrapper() -> None: diff --git a/flask_app/routes/判断是否是招标文件.py b/flask_app/routes/判断是否是招标文件.py index d608b40..8437fb7 100644 --- a/flask_app/routes/判断是否是招标文件.py +++ b/flask_app/routes/判断是否是招标文件.py @@ -4,7 +4,7 @@ from docx import Document import fitz from flask_app.general.llm.通义千问long import upload_file, qianwen_long -def judge_zbfile_exec(file_path): +def judge_zbfile_exec(file_path,logger): """ 判断文件是否属于招标文件,并返回结果。 """ @@ -45,7 +45,7 @@ def judge_zbfile_exec(file_path): print("DOCX 文件读取失败,可能为乱码文件") return False # 解析失败直接返回 False pre_endtime=time.time() - print(f"judge_zbfile_exec预处理耗时:{pre_endtime - start_time:.2f} 秒") + logger.info(f"judge_zbfile_exec预处理耗时:{pre_endtime - start_time:.2f} 秒") # 使用大模型进行判断 user_query = """该文件是否属于招标文件?如果是的话,请返回'是',如果不是的话,返回'否'。请不要返回其他解释或内容。 以下是常见的招标文件类型: @@ -59,8 +59,8 @@ def judge_zbfile_exec(file_path): file_id = upload_file(file_path) model_res = qianwen_long(file_id, user_query) end_time = time.time() - print(f"judge_zbfile_exec实际耗时:{end_time - start_time:.2f} 秒") - print(f"判断是否属于招标文件:{model_res}") + logger.info(f"judge_zbfile_exec实际耗时:{end_time - start_time:.2f} 秒") + print(f"判断是否属于招标文件:{model_res} 实际耗时:{end_time - start_time:.2f} 秒") return '否' not in model_res diff --git a/flask_app/run_serve.py b/flask_app/run_serve.py index f6d8d55..233e441 100644 --- a/flask_app/run_serve.py +++ b/flask_app/run_serve.py @@ -20,5 +20,5 @@ def warmup_request(): if __name__ == "__main__": app = create_app() - # threading.Thread(target=warmup_request, daemon=True).start() + threading.Thread(target=warmup_request, daemon=True).start() serve(app, host='0.0.0.0', port=5000) \ No newline at end of file diff --git a/flask_app/start_up.py b/flask_app/start_up.py index 23b0fcf..75ae428 100644 --- a/flask_app/start_up.py +++ b/flask_app/start_up.py @@ -16,7 +16,7 @@ from flask_app.routes.test_preprocess import test_process_bp def create_app(): # 创建全局日志记录器 app = Flask(__name__) - app.process_pool = Pool(processes=5, maxtasksperchild=1) + app.process_pool = Pool(processes=3, maxtasksperchild=1) app.global_logger = create_logger_main('model_log') # 全局日志记录器 # 注册蓝图 app.register_blueprint(get_deviation_bp) From 527788d5186927e75ee86384c177a12b8407b4c7 Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Wed, 19 Feb 2025 16:35:51 +0800 Subject: [PATCH 10/10] =?UTF-8?q?2.19=20=E5=88=A4=E6=96=AD=E6=98=AF?= =?UTF-8?q?=E5=90=A6=E6=8B=9B=E6=A0=87=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_app/routes/judge_zbfile.py | 2 +- flask_app/routes/判断是否是招标文件.py | 6 +++--- flask_app/start_up.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/flask_app/routes/judge_zbfile.py b/flask_app/routes/judge_zbfile.py index b34df9b..ba43a31 100644 --- a/flask_app/routes/judge_zbfile.py +++ b/flask_app/routes/judge_zbfile.py @@ -36,7 +36,7 @@ def judge_zbfile() -> Any: #判断是否是招标文件 # ) result = pool.apply( judge_zbfile_exec, # 你的实际执行函数 - args=(file_path,logger,) + args=(file_path,) ) return result def wrapper() -> None: diff --git a/flask_app/routes/判断是否是招标文件.py b/flask_app/routes/判断是否是招标文件.py index 8437fb7..ef1aec6 100644 --- a/flask_app/routes/判断是否是招标文件.py +++ b/flask_app/routes/判断是否是招标文件.py @@ -4,7 +4,7 @@ from docx import Document import fitz from flask_app.general.llm.通义千问long import upload_file, qianwen_long -def judge_zbfile_exec(file_path,logger): +def judge_zbfile_exec(file_path): """ 判断文件是否属于招标文件,并返回结果。 """ @@ -45,7 +45,7 @@ def judge_zbfile_exec(file_path,logger): print("DOCX 文件读取失败,可能为乱码文件") return False # 解析失败直接返回 False pre_endtime=time.time() - logger.info(f"judge_zbfile_exec预处理耗时:{pre_endtime - start_time:.2f} 秒") + print(f"judge_zbfile_exec预处理耗时:{pre_endtime - start_time:.2f} 秒") # 使用大模型进行判断 user_query = """该文件是否属于招标文件?如果是的话,请返回'是',如果不是的话,返回'否'。请不要返回其他解释或内容。 以下是常见的招标文件类型: @@ -59,7 +59,7 @@ def judge_zbfile_exec(file_path,logger): file_id = upload_file(file_path) model_res = qianwen_long(file_id, user_query) end_time = time.time() - logger.info(f"judge_zbfile_exec实际耗时:{end_time - start_time:.2f} 秒") + print(f"judge_zbfile_exec实际耗时:{end_time - start_time:.2f} 秒") print(f"判断是否属于招标文件:{model_res} 实际耗时:{end_time - start_time:.2f} 秒") return '否' not in model_res diff --git a/flask_app/start_up.py b/flask_app/start_up.py index 75ae428..b00343d 100644 --- a/flask_app/start_up.py +++ b/flask_app/start_up.py @@ -16,7 +16,7 @@ from flask_app.routes.test_preprocess import test_process_bp def create_app(): # 创建全局日志记录器 app = Flask(__name__) - app.process_pool = Pool(processes=3, maxtasksperchild=1) + app.process_pool = Pool(processes=10, maxtasksperchild=3) app.global_logger = create_logger_main('model_log') # 全局日志记录器 # 注册蓝图 app.register_blueprint(get_deviation_bp)