商务技术评分能处理多评分表情况,规范响应返回接口,判断上传文件是否为招标文件

This commit is contained in:
zy123 2025-01-09 15:57:35 +08:00
parent fc67ef8e52
commit 5efdfb2007
8 changed files with 28 additions and 28 deletions

View File

@ -4,10 +4,10 @@ from flask_app.general.通义千问long import upload_file, qianwen_long
def judge_zbfile(pdf_path): def judge_zbfile(pdf_path):
reader = PdfReader(pdf_path) # reader = PdfReader(pdf_path)
num_pages = len(reader.pages) # num_pages = len(reader.pages)
if num_pages <= 5: # if num_pages <= 5:
return False # return False
user_query="""该文件是否属于招标文件?如果是的话,请返回'',如果不是的话,返回''。请不要返回其他解释或内容。 user_query="""该文件是否属于招标文件?如果是的话,请返回'',如果不是的话,返回''。请不要返回其他解释或内容。
以下是常见的招标文件类型 以下是常见的招标文件类型
公开招标文件邀请招标文件竞争性谈判文件竞争性磋商文件询价文件问询文件货物类招标文件工程类招标文件施工类招标文件服务类招标文件比选文件 公开招标文件邀请招标文件竞争性谈判文件竞争性磋商文件询价文件问询文件货物类招标文件工程类招标文件施工类招标文件服务类招标文件比选文件

View File

@ -1,6 +1,6 @@
# flask_app/routes/get_deviation.py # flask_app/routes/get_deviation.py
from flask import Blueprint, jsonify, Response, g from flask import Blueprint, Response, g
import os import os
from flask_app.general.format_change import download_file from flask_app.general.format_change import download_file
from flask_app.routes.偏离表main import get_tech_and_business_deviation from flask_app.routes.偏离表main import get_tech_and_business_deviation

View File

@ -551,6 +551,9 @@ def process_functions_in_parallel(tech_deviation_info, busi_requirements_dict, z
def get_tech_and_business_deviation(file_path,file_type,unique_id,output_folder,zb_type=2): def get_tech_and_business_deviation(file_path,file_type,unique_id,output_folder,zb_type=2):
global logger global logger
logger = get_global_logger(unique_id) logger = get_global_logger(unique_id)
judge_res = judge_zbfile(file_path)
if not judge_res:
return None
# 第一步:根据文件类型进行转换 # 第一步:根据文件类型进行转换
if file_type == 1: # docx if file_type == 1: # docx
docx_path=file_path docx_path=file_path
@ -565,9 +568,6 @@ def get_tech_and_business_deviation(file_path,file_type,unique_id,output_folder,
else: else:
logger.error("不支持的文件类型!") logger.error("不支持的文件类型!")
return None return None
judge_res = judge_zbfile(pdf_path)
if not judge_res:
return None
# 第二步根据zb_type确定选择项和类别并截取PDF # 第二步根据zb_type确定选择项和类别并截取PDF
if zb_type == 2: if zb_type == 2:
selections = [1, 2, 3, 5] selections = [1, 2, 3, 5]

View File

@ -99,6 +99,9 @@ def little_parse_main(output_folder, file_path, file_type,zb_type,unique_id):
""" """
logger = get_global_logger(unique_id) logger = get_global_logger(unique_id)
logger.info("zb_type:"+str(zb_type)) logger.info("zb_type:"+str(zb_type))
judge_res = judge_zbfile(file_path)
if not judge_res:
return None
# 根据文件类型处理文件路径 # 根据文件类型处理文件路径
if file_type == 1: # docx if file_type == 1: # docx
docx_path = file_path docx_path = file_path
@ -112,9 +115,6 @@ def little_parse_main(output_folder, file_path, file_type,zb_type,unique_id):
else: else:
logger.error("Unsupported file type provided. Preprocessing halted.") logger.error("Unsupported file type provided. Preprocessing halted.")
return None return None
judge_res = judge_zbfile(pdf_path)
if not judge_res:
return None
# 根据招标类型调用相应的解析函数 # 根据招标类型调用相应的解析函数
if zb_type == 2: # 货物标 if zb_type == 2: # 货物标
combined_data = little_parse_goods(output_folder, pdf_path,logger) combined_data = little_parse_goods(output_folder, pdf_path,logger)

View File

@ -28,6 +28,9 @@ def preprocess_files(output_folder, file_path, file_type,logger):
logger.info("starting 文件预处理...") logger.info("starting 文件预处理...")
logger.info("output_folder..." + output_folder) logger.info("output_folder..." + output_folder)
start_time=time.time() start_time=time.time()
judge_res = judge_zbfile(file_path)
if not judge_res:
return None
# 根据文件类型处理文件路径 # 根据文件类型处理文件路径
if file_type == 1: # docx if file_type == 1: # docx
# docx_path = file_path # docx_path = file_path
@ -41,9 +44,6 @@ def preprocess_files(output_folder, file_path, file_type,logger):
else: else:
logger.error("Unsupported file type provided. Preprocessing halted.") logger.error("Unsupported file type provided. Preprocessing halted.")
return None return None
judge_res = judge_zbfile(pdf_path)
if not judge_res:
return None
# 调用截取PDF多次 # 调用截取PDF多次
truncate_files = truncate_pdf_multiple(pdf_path, output_folder,logger,'engineering') truncate_files = truncate_pdf_multiple(pdf_path, output_folder,logger,'engineering')
print("切割出的文件:"+str(truncate_files)) print("切割出的文件:"+str(truncate_files))

View File

@ -24,6 +24,9 @@ executor = ThreadPoolExecutor()
def preprocess_files(output_folder, file_path, file_type,logger): def preprocess_files(output_folder, file_path, file_type,logger):
logger.info("starting 文件预处理...") logger.info("starting 文件预处理...")
start_time = time.time() start_time = time.time()
judge_res = judge_zbfile(file_path)
if not judge_res:
return None
logger.info("output_folder..." + output_folder) logger.info("output_folder..." + output_folder)
# 根据文件类型处理文件路径 # 根据文件类型处理文件路径
if file_type == 1: # docx if file_type == 1: # docx
@ -39,9 +42,6 @@ def preprocess_files(output_folder, file_path, file_type,logger):
logger.error("Unsupported file type provided. Preprocessing halted.") logger.error("Unsupported file type provided. Preprocessing halted.")
return None return None
judge_res=judge_zbfile(pdf_path)
if not judge_res:
return None
# 调用截取PDF多次 # 调用截取PDF多次
truncate_files = truncate_pdf_multiple(pdf_path, output_folder,logger,'goods') # index: 0->商务技术服务要求 1->评标办法 2->资格审查 3->投标人须知前附表 4->投标人须知正文 truncate_files = truncate_pdf_multiple(pdf_path, output_folder,logger,'goods') # index: 0->商务技术服务要求 1->评标办法 2->资格审查 3->投标人须知前附表 4->投标人须知正文

View File

@ -68,7 +68,7 @@ def extract_pages_tobidders_notice(pdf_path, output_folder, begin_pattern, begin
# 定义基础的 mid_pattern # 定义基础的 mid_pattern
base_mid_pattern = r'^\s*(?:[(]\s*[一二12]?\s*[)]\s*[、..]*|' \ base_mid_pattern = r'^\s*(?:[(]\s*[一二12]?\s*[)]\s*[、..]*|' \
r'[一二12][、..]+|[、..]+)\s*(说\s*明|总\s*则|名\s*词\s*解\s*释)' \ r'[一二12][、..]+|[、..]+)\s*(说\s*明|总\s*则|名\s*词\s*解\s*释)' \
r'|(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)\s*须知正文\s*$' r'|(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|应答人)\s*须知正文\s*$'
# 合并基础模式和额外模式 # 合并基础模式和额外模式
if additional_mid_pattern: if additional_mid_pattern:
combined_mid_pattern = regex.compile( combined_mid_pattern = regex.compile(
@ -323,8 +323,8 @@ def truncate_pdf_main_engineering(input_path, output_folder, selection, logger,
pattern_pairs = [ pattern_pairs = [
( (
regex.compile( regex.compile(
r'(?:第[一二三四五六七八九十百千]+(?:章|部分).*(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知|' r'^第[一二三四五六七八九十百千]+(?:章|部分).*(?:投标人?|磋商|谈判|供应商|应答人)须知+|'
r'(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知前附表)\s*$', r'(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!"\s*)(?<!"\s*)(?:投标人?|磋商|谈判|供应商|应答人)须知前附表\s*$',
regex.MULTILINE regex.MULTILINE
), ),
regex.compile( regex.compile(
@ -334,8 +334,8 @@ def truncate_pdf_main_engineering(input_path, output_folder, selection, logger,
), ),
( (
regex.compile( regex.compile(
r'.*(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知\s*$|' r'.*(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|应答人)须知\s*$|'
r'(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知前附表\s*$', r'(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|应答人)须知前附表\s*$',
regex.MULTILINE regex.MULTILINE
), ),
regex.compile( regex.compile(

View File

@ -215,7 +215,7 @@ def extract_pages_tobidders_notice(pdf_path, begin_pattern, begin_page, common_h
# 定义基础的 mid_pattern # 定义基础的 mid_pattern
base_mid_pattern = r'^\s*(?:[(]\s*[一二12]?\s*[)]\s*[、..]*|' \ base_mid_pattern = r'^\s*(?:[(]\s*[一二12]?\s*[)]\s*[、..]*|' \
r'[一二12][、..]+|[、..]+)\s*(说\s*明|总\s*则|名\s*词\s*解\s*释)' \ r'[一二12][、..]+|[、..]+)\s*(说\s*明|总\s*则|名\s*词\s*解\s*释)' \
r'|(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)\s*须知正文\s*$' r'|(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|应答人)\s*须知正文\s*$'
# 合并基础模式和额外模式 # 合并基础模式和额外模式
if additional_mid_pattern: if additional_mid_pattern:
@ -282,8 +282,8 @@ def extract_pages_tobidders_notice(pdf_path, begin_pattern, begin_page, common_h
else: else:
# 定义新的 begin_pattern 和 end_pattern # 定义新的 begin_pattern 和 end_pattern
new_begin_pattern = regex.compile( new_begin_pattern = regex.compile(
r'.*(?:投标人|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知\s*$|' r'.*(?:投标人|磋商|谈判|供应商|应答人)须知\s*$|'
r'(?:一\s*、\s*)?(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知前附表', r'(?:一\s*、\s*)?(?:投标人?|磋商|谈判|供应商|应答人)须知前附表',
regex.MULTILINE regex.MULTILINE
) )
new_end_pattern = regex.compile( new_end_pattern = regex.compile(
@ -300,7 +300,7 @@ def extract_pages_tobidders_notice(pdf_path, begin_pattern, begin_page, common_h
def extract_pages_twice_tobidders_notice(pdf_document, common_header, begin_page): def extract_pages_twice_tobidders_notice(pdf_document, common_header, begin_page):
output_suffix = "tobidders_notice" output_suffix = "tobidders_notice"
begin_pattern = regex.compile( begin_pattern = regex.compile(
r'^第[一二三四五六七八九十百千]+(?:章|部分).*(?:(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知)+', r'^第[一二三四五六七八九十百千]+(?:章|部分).*(?:(?:投标人?|磋商|谈判|供应商|应答人)须知)+',
regex.MULTILINE regex.MULTILINE
) )
end_pattern = regex.compile( end_pattern = regex.compile(
@ -523,8 +523,8 @@ def truncate_pdf_main_goods(input_path, output_folder, selection,logger, output_
local_output_suffix = "qualification1" local_output_suffix = "qualification1"
elif selection == 4: elif selection == 4:
begin_pattern = regex.compile( begin_pattern = regex.compile(
r'^第[一二三四五六七八九十百千]+(?:章|部分).*(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知+|' r'^第[一二三四五六七八九十百千]+(?:章|部分).*(?:投标人?|磋商|谈判|供应商|应答人)须知+|'
r'(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知前附表\s*$', r'(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!"\s*)(?<!"\s*)(?:投标人?|磋商|谈判|供应商|应答人)须知前附表\s*$',
regex.MULTILINE regex.MULTILINE
) )
end_pattern = None end_pattern = None