From 5efdfb20074d1ebdf1775f2ea18830fec033b6c2 Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Thu, 9 Jan 2025 15:57:35 +0800 Subject: [PATCH] =?UTF-8?q?=E5=95=86=E5=8A=A1=E6=8A=80=E6=9C=AF=E8=AF=84?= =?UTF-8?q?=E5=88=86=E8=83=BD=E5=A4=84=E7=90=86=E5=A4=9A=E8=AF=84=E5=88=86?= =?UTF-8?q?=E8=A1=A8=E6=83=85=E5=86=B5=EF=BC=8C=E8=A7=84=E8=8C=83=E5=93=8D?= =?UTF-8?q?=E5=BA=94=E8=BF=94=E5=9B=9E=E6=8E=A5=E5=8F=A3=EF=BC=8C=E5=88=A4?= =?UTF-8?q?=E6=96=AD=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E6=98=AF=E5=90=A6?= =?UTF-8?q?=E4=B8=BA=E6=8B=9B=E6=A0=87=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- flask_app/general/判断是否是招标文件.py | 8 ++++---- flask_app/routes/get_deviation.py | 2 +- flask_app/routes/偏离表main.py | 6 +++--- flask_app/routes/小解析main.py | 6 +++--- flask_app/routes/工程标解析main.py | 6 +++--- flask_app/routes/货物标解析main.py | 6 +++--- flask_app/工程标/截取pdf工程标版.py | 10 +++++----- flask_app/货物标/截取pdf货物标版.py | 12 ++++++------ 8 files changed, 28 insertions(+), 28 deletions(-) diff --git a/flask_app/general/判断是否是招标文件.py b/flask_app/general/判断是否是招标文件.py index 02762b0..1b66766 100644 --- a/flask_app/general/判断是否是招标文件.py +++ b/flask_app/general/判断是否是招标文件.py @@ -4,10 +4,10 @@ from flask_app.general.通义千问long import upload_file, qianwen_long def judge_zbfile(pdf_path): - reader = PdfReader(pdf_path) - num_pages = len(reader.pages) - if num_pages <= 5: - return False + # reader = PdfReader(pdf_path) + # num_pages = len(reader.pages) + # if num_pages <= 5: + # return False user_query="""该文件是否属于招标文件?如果是的话,请返回'是',如果不是的话,返回'否'。请不要返回其他解释或内容。 以下是常见的招标文件类型: 公开招标文件、邀请招标文件、竞争性谈判文件、竞争性磋商文件、询价文件、问询文件、货物类招标文件、工程类招标文件、施工类招标文件、服务类招标文件、比选文件。 diff --git a/flask_app/routes/get_deviation.py b/flask_app/routes/get_deviation.py index e84a5ec..f0794d3 100644 --- a/flask_app/routes/get_deviation.py +++ b/flask_app/routes/get_deviation.py @@ -1,6 +1,6 @@ # flask_app/routes/get_deviation.py -from flask import Blueprint, jsonify, Response, g +from flask import Blueprint, Response, g import os from flask_app.general.format_change import download_file from flask_app.routes.偏离表main import get_tech_and_business_deviation diff --git a/flask_app/routes/偏离表main.py b/flask_app/routes/偏离表main.py index 410a1ae..5ae13a4 100644 --- a/flask_app/routes/偏离表main.py +++ b/flask_app/routes/偏离表main.py @@ -551,6 +551,9 @@ def process_functions_in_parallel(tech_deviation_info, busi_requirements_dict, z def get_tech_and_business_deviation(file_path,file_type,unique_id,output_folder,zb_type=2): global logger logger = get_global_logger(unique_id) + judge_res = judge_zbfile(file_path) + if not judge_res: + return None # 第一步:根据文件类型进行转换 if file_type == 1: # docx docx_path=file_path @@ -565,9 +568,6 @@ def get_tech_and_business_deviation(file_path,file_type,unique_id,output_folder, else: logger.error("不支持的文件类型!") return None - judge_res = judge_zbfile(pdf_path) - if not judge_res: - return None # 第二步:根据zb_type确定选择项和类别,并截取PDF if zb_type == 2: selections = [1, 2, 3, 5] diff --git a/flask_app/routes/小解析main.py b/flask_app/routes/小解析main.py index 5e4c8be..2fa915e 100644 --- a/flask_app/routes/小解析main.py +++ b/flask_app/routes/小解析main.py @@ -99,6 +99,9 @@ def little_parse_main(output_folder, file_path, file_type,zb_type,unique_id): """ logger = get_global_logger(unique_id) logger.info("zb_type:"+str(zb_type)) + judge_res = judge_zbfile(file_path) + if not judge_res: + return None # 根据文件类型处理文件路径 if file_type == 1: # docx docx_path = file_path @@ -112,9 +115,6 @@ def little_parse_main(output_folder, file_path, file_type,zb_type,unique_id): else: logger.error("Unsupported file type provided. Preprocessing halted.") return None - judge_res = judge_zbfile(pdf_path) - if not judge_res: - return None # 根据招标类型调用相应的解析函数 if zb_type == 2: # 货物标 combined_data = little_parse_goods(output_folder, pdf_path,logger) diff --git a/flask_app/routes/工程标解析main.py b/flask_app/routes/工程标解析main.py index a0b4552..1047b20 100644 --- a/flask_app/routes/工程标解析main.py +++ b/flask_app/routes/工程标解析main.py @@ -28,6 +28,9 @@ def preprocess_files(output_folder, file_path, file_type,logger): logger.info("starting 文件预处理...") logger.info("output_folder..." + output_folder) start_time=time.time() + judge_res = judge_zbfile(file_path) + if not judge_res: + return None # 根据文件类型处理文件路径 if file_type == 1: # docx # docx_path = file_path @@ -41,9 +44,6 @@ def preprocess_files(output_folder, file_path, file_type,logger): else: logger.error("Unsupported file type provided. Preprocessing halted.") return None - judge_res = judge_zbfile(pdf_path) - if not judge_res: - return None # 调用截取PDF多次 truncate_files = truncate_pdf_multiple(pdf_path, output_folder,logger,'engineering') print("切割出的文件:"+str(truncate_files)) diff --git a/flask_app/routes/货物标解析main.py b/flask_app/routes/货物标解析main.py index 606fdf9..445bd7e 100644 --- a/flask_app/routes/货物标解析main.py +++ b/flask_app/routes/货物标解析main.py @@ -24,6 +24,9 @@ executor = ThreadPoolExecutor() def preprocess_files(output_folder, file_path, file_type,logger): logger.info("starting 文件预处理...") start_time = time.time() + judge_res = judge_zbfile(file_path) + if not judge_res: + return None logger.info("output_folder..." + output_folder) # 根据文件类型处理文件路径 if file_type == 1: # docx @@ -39,9 +42,6 @@ def preprocess_files(output_folder, file_path, file_type,logger): logger.error("Unsupported file type provided. Preprocessing halted.") return None - judge_res=judge_zbfile(pdf_path) - if not judge_res: - return None # 调用截取PDF多次 truncate_files = truncate_pdf_multiple(pdf_path, output_folder,logger,'goods') # index: 0->商务技术服务要求 1->评标办法 2->资格审查 3->投标人须知前附表 4->投标人须知正文 diff --git a/flask_app/工程标/截取pdf工程标版.py b/flask_app/工程标/截取pdf工程标版.py index 0eab7e0..f9eeede 100644 --- a/flask_app/工程标/截取pdf工程标版.py +++ b/flask_app/工程标/截取pdf工程标版.py @@ -68,7 +68,7 @@ def extract_pages_tobidders_notice(pdf_path, output_folder, begin_pattern, begin # 定义基础的 mid_pattern base_mid_pattern = r'^\s*(?:[((]\s*[一二12]?\s*[))]\s*[、..]*|' \ r'[一二12][、..]+|[、..]+)\s*(说\s*明|总\s*则|名\s*词\s*解\s*释)' \ - r'|(?