商务技术评分能处理多评分表情况,规范响应返回接口,判断上传文件是否为招标文件
This commit is contained in:
parent
fc67ef8e52
commit
5efdfb2007
@ -4,10 +4,10 @@ from flask_app.general.通义千问long import upload_file, qianwen_long
|
|||||||
|
|
||||||
|
|
||||||
def judge_zbfile(pdf_path):
|
def judge_zbfile(pdf_path):
|
||||||
reader = PdfReader(pdf_path)
|
# reader = PdfReader(pdf_path)
|
||||||
num_pages = len(reader.pages)
|
# num_pages = len(reader.pages)
|
||||||
if num_pages <= 5:
|
# if num_pages <= 5:
|
||||||
return False
|
# return False
|
||||||
user_query="""该文件是否属于招标文件?如果是的话,请返回'是',如果不是的话,返回'否'。请不要返回其他解释或内容。
|
user_query="""该文件是否属于招标文件?如果是的话,请返回'是',如果不是的话,返回'否'。请不要返回其他解释或内容。
|
||||||
以下是常见的招标文件类型:
|
以下是常见的招标文件类型:
|
||||||
公开招标文件、邀请招标文件、竞争性谈判文件、竞争性磋商文件、询价文件、问询文件、货物类招标文件、工程类招标文件、施工类招标文件、服务类招标文件、比选文件。
|
公开招标文件、邀请招标文件、竞争性谈判文件、竞争性磋商文件、询价文件、问询文件、货物类招标文件、工程类招标文件、施工类招标文件、服务类招标文件、比选文件。
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# flask_app/routes/get_deviation.py
|
# flask_app/routes/get_deviation.py
|
||||||
|
|
||||||
from flask import Blueprint, jsonify, Response, g
|
from flask import Blueprint, Response, g
|
||||||
import os
|
import os
|
||||||
from flask_app.general.format_change import download_file
|
from flask_app.general.format_change import download_file
|
||||||
from flask_app.routes.偏离表main import get_tech_and_business_deviation
|
from flask_app.routes.偏离表main import get_tech_and_business_deviation
|
||||||
|
@ -551,6 +551,9 @@ def process_functions_in_parallel(tech_deviation_info, busi_requirements_dict, z
|
|||||||
def get_tech_and_business_deviation(file_path,file_type,unique_id,output_folder,zb_type=2):
|
def get_tech_and_business_deviation(file_path,file_type,unique_id,output_folder,zb_type=2):
|
||||||
global logger
|
global logger
|
||||||
logger = get_global_logger(unique_id)
|
logger = get_global_logger(unique_id)
|
||||||
|
judge_res = judge_zbfile(file_path)
|
||||||
|
if not judge_res:
|
||||||
|
return None
|
||||||
# 第一步:根据文件类型进行转换
|
# 第一步:根据文件类型进行转换
|
||||||
if file_type == 1: # docx
|
if file_type == 1: # docx
|
||||||
docx_path=file_path
|
docx_path=file_path
|
||||||
@ -565,9 +568,6 @@ def get_tech_and_business_deviation(file_path,file_type,unique_id,output_folder,
|
|||||||
else:
|
else:
|
||||||
logger.error("不支持的文件类型!")
|
logger.error("不支持的文件类型!")
|
||||||
return None
|
return None
|
||||||
judge_res = judge_zbfile(pdf_path)
|
|
||||||
if not judge_res:
|
|
||||||
return None
|
|
||||||
# 第二步:根据zb_type确定选择项和类别,并截取PDF
|
# 第二步:根据zb_type确定选择项和类别,并截取PDF
|
||||||
if zb_type == 2:
|
if zb_type == 2:
|
||||||
selections = [1, 2, 3, 5]
|
selections = [1, 2, 3, 5]
|
||||||
|
@ -99,6 +99,9 @@ def little_parse_main(output_folder, file_path, file_type,zb_type,unique_id):
|
|||||||
"""
|
"""
|
||||||
logger = get_global_logger(unique_id)
|
logger = get_global_logger(unique_id)
|
||||||
logger.info("zb_type:"+str(zb_type))
|
logger.info("zb_type:"+str(zb_type))
|
||||||
|
judge_res = judge_zbfile(file_path)
|
||||||
|
if not judge_res:
|
||||||
|
return None
|
||||||
# 根据文件类型处理文件路径
|
# 根据文件类型处理文件路径
|
||||||
if file_type == 1: # docx
|
if file_type == 1: # docx
|
||||||
docx_path = file_path
|
docx_path = file_path
|
||||||
@ -112,9 +115,6 @@ def little_parse_main(output_folder, file_path, file_type,zb_type,unique_id):
|
|||||||
else:
|
else:
|
||||||
logger.error("Unsupported file type provided. Preprocessing halted.")
|
logger.error("Unsupported file type provided. Preprocessing halted.")
|
||||||
return None
|
return None
|
||||||
judge_res = judge_zbfile(pdf_path)
|
|
||||||
if not judge_res:
|
|
||||||
return None
|
|
||||||
# 根据招标类型调用相应的解析函数
|
# 根据招标类型调用相应的解析函数
|
||||||
if zb_type == 2: # 货物标
|
if zb_type == 2: # 货物标
|
||||||
combined_data = little_parse_goods(output_folder, pdf_path,logger)
|
combined_data = little_parse_goods(output_folder, pdf_path,logger)
|
||||||
|
@ -28,6 +28,9 @@ def preprocess_files(output_folder, file_path, file_type,logger):
|
|||||||
logger.info("starting 文件预处理...")
|
logger.info("starting 文件预处理...")
|
||||||
logger.info("output_folder..." + output_folder)
|
logger.info("output_folder..." + output_folder)
|
||||||
start_time=time.time()
|
start_time=time.time()
|
||||||
|
judge_res = judge_zbfile(file_path)
|
||||||
|
if not judge_res:
|
||||||
|
return None
|
||||||
# 根据文件类型处理文件路径
|
# 根据文件类型处理文件路径
|
||||||
if file_type == 1: # docx
|
if file_type == 1: # docx
|
||||||
# docx_path = file_path
|
# docx_path = file_path
|
||||||
@ -41,9 +44,6 @@ def preprocess_files(output_folder, file_path, file_type,logger):
|
|||||||
else:
|
else:
|
||||||
logger.error("Unsupported file type provided. Preprocessing halted.")
|
logger.error("Unsupported file type provided. Preprocessing halted.")
|
||||||
return None
|
return None
|
||||||
judge_res = judge_zbfile(pdf_path)
|
|
||||||
if not judge_res:
|
|
||||||
return None
|
|
||||||
# 调用截取PDF多次
|
# 调用截取PDF多次
|
||||||
truncate_files = truncate_pdf_multiple(pdf_path, output_folder,logger,'engineering')
|
truncate_files = truncate_pdf_multiple(pdf_path, output_folder,logger,'engineering')
|
||||||
print("切割出的文件:"+str(truncate_files))
|
print("切割出的文件:"+str(truncate_files))
|
||||||
|
@ -24,6 +24,9 @@ executor = ThreadPoolExecutor()
|
|||||||
def preprocess_files(output_folder, file_path, file_type,logger):
|
def preprocess_files(output_folder, file_path, file_type,logger):
|
||||||
logger.info("starting 文件预处理...")
|
logger.info("starting 文件预处理...")
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
judge_res = judge_zbfile(file_path)
|
||||||
|
if not judge_res:
|
||||||
|
return None
|
||||||
logger.info("output_folder..." + output_folder)
|
logger.info("output_folder..." + output_folder)
|
||||||
# 根据文件类型处理文件路径
|
# 根据文件类型处理文件路径
|
||||||
if file_type == 1: # docx
|
if file_type == 1: # docx
|
||||||
@ -39,9 +42,6 @@ def preprocess_files(output_folder, file_path, file_type,logger):
|
|||||||
logger.error("Unsupported file type provided. Preprocessing halted.")
|
logger.error("Unsupported file type provided. Preprocessing halted.")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
judge_res=judge_zbfile(pdf_path)
|
|
||||||
if not judge_res:
|
|
||||||
return None
|
|
||||||
# 调用截取PDF多次
|
# 调用截取PDF多次
|
||||||
truncate_files = truncate_pdf_multiple(pdf_path, output_folder,logger,'goods') # index: 0->商务技术服务要求 1->评标办法 2->资格审查 3->投标人须知前附表 4->投标人须知正文
|
truncate_files = truncate_pdf_multiple(pdf_path, output_folder,logger,'goods') # index: 0->商务技术服务要求 1->评标办法 2->资格审查 3->投标人须知前附表 4->投标人须知正文
|
||||||
|
|
||||||
|
@ -68,7 +68,7 @@ def extract_pages_tobidders_notice(pdf_path, output_folder, begin_pattern, begin
|
|||||||
# 定义基础的 mid_pattern
|
# 定义基础的 mid_pattern
|
||||||
base_mid_pattern = r'^\s*(?:[((]\s*[一二12]?\s*[))]\s*[、..]*|' \
|
base_mid_pattern = r'^\s*(?:[((]\s*[一二12]?\s*[))]\s*[、..]*|' \
|
||||||
r'[一二12][、..]+|[、..]+)\s*(说\s*明|总\s*则|名\s*词\s*解\s*释)' \
|
r'[一二12][、..]+|[、..]+)\s*(说\s*明|总\s*则|名\s*词\s*解\s*释)' \
|
||||||
r'|(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)\s*须知正文\s*$'
|
r'|(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|应答人)\s*须知正文\s*$'
|
||||||
# 合并基础模式和额外模式
|
# 合并基础模式和额外模式
|
||||||
if additional_mid_pattern:
|
if additional_mid_pattern:
|
||||||
combined_mid_pattern = regex.compile(
|
combined_mid_pattern = regex.compile(
|
||||||
@ -323,8 +323,8 @@ def truncate_pdf_main_engineering(input_path, output_folder, selection, logger,
|
|||||||
pattern_pairs = [
|
pattern_pairs = [
|
||||||
(
|
(
|
||||||
regex.compile(
|
regex.compile(
|
||||||
r'(?:第[一二三四五六七八九十百千]+(?:章|部分).*(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知|'
|
r'^第[一二三四五六七八九十百千]+(?:章|部分).*(?:投标人?|磋商|谈判|供应商|应答人)须知+|'
|
||||||
r'(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知前附表)\s*$',
|
r'(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!"\s*)(?<!"\s*)(?:投标人?|磋商|谈判|供应商|应答人)须知前附表\s*$',
|
||||||
regex.MULTILINE
|
regex.MULTILINE
|
||||||
),
|
),
|
||||||
regex.compile(
|
regex.compile(
|
||||||
@ -334,8 +334,8 @@ def truncate_pdf_main_engineering(input_path, output_folder, selection, logger,
|
|||||||
),
|
),
|
||||||
(
|
(
|
||||||
regex.compile(
|
regex.compile(
|
||||||
r'.*(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知\s*$|'
|
r'.*(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|应答人)须知\s*$|'
|
||||||
r'(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知前附表\s*$',
|
r'(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|应答人)须知前附表\s*$',
|
||||||
regex.MULTILINE
|
regex.MULTILINE
|
||||||
),
|
),
|
||||||
regex.compile(
|
regex.compile(
|
||||||
|
@ -215,7 +215,7 @@ def extract_pages_tobidders_notice(pdf_path, begin_pattern, begin_page, common_h
|
|||||||
# 定义基础的 mid_pattern
|
# 定义基础的 mid_pattern
|
||||||
base_mid_pattern = r'^\s*(?:[((]\s*[一二12]?\s*[))]\s*[、..]*|' \
|
base_mid_pattern = r'^\s*(?:[((]\s*[一二12]?\s*[))]\s*[、..]*|' \
|
||||||
r'[一二12][、..]+|[、..]+)\s*(说\s*明|总\s*则|名\s*词\s*解\s*释)' \
|
r'[一二12][、..]+|[、..]+)\s*(说\s*明|总\s*则|名\s*词\s*解\s*释)' \
|
||||||
r'|(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)\s*须知正文\s*$'
|
r'|(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|应答人)\s*须知正文\s*$'
|
||||||
|
|
||||||
# 合并基础模式和额外模式
|
# 合并基础模式和额外模式
|
||||||
if additional_mid_pattern:
|
if additional_mid_pattern:
|
||||||
@ -282,8 +282,8 @@ def extract_pages_tobidders_notice(pdf_path, begin_pattern, begin_page, common_h
|
|||||||
else:
|
else:
|
||||||
# 定义新的 begin_pattern 和 end_pattern
|
# 定义新的 begin_pattern 和 end_pattern
|
||||||
new_begin_pattern = regex.compile(
|
new_begin_pattern = regex.compile(
|
||||||
r'.*(?:投标人|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知\s*$|'
|
r'.*(?:投标人|磋商|谈判|供应商|应答人)须知\s*$|'
|
||||||
r'(?:一\s*、\s*)?(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知前附表',
|
r'(?:一\s*、\s*)?(?:投标人?|磋商|谈判|供应商|应答人)须知前附表',
|
||||||
regex.MULTILINE
|
regex.MULTILINE
|
||||||
)
|
)
|
||||||
new_end_pattern = regex.compile(
|
new_end_pattern = regex.compile(
|
||||||
@ -300,7 +300,7 @@ def extract_pages_tobidders_notice(pdf_path, begin_pattern, begin_page, common_h
|
|||||||
def extract_pages_twice_tobidders_notice(pdf_document, common_header, begin_page):
|
def extract_pages_twice_tobidders_notice(pdf_document, common_header, begin_page):
|
||||||
output_suffix = "tobidders_notice"
|
output_suffix = "tobidders_notice"
|
||||||
begin_pattern = regex.compile(
|
begin_pattern = regex.compile(
|
||||||
r'^第[一二三四五六七八九十百千]+(?:章|部分).*(?:(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知)+',
|
r'^第[一二三四五六七八九十百千]+(?:章|部分).*(?:(?:投标人?|磋商|谈判|供应商|应答人)须知)+',
|
||||||
regex.MULTILINE
|
regex.MULTILINE
|
||||||
)
|
)
|
||||||
end_pattern = regex.compile(
|
end_pattern = regex.compile(
|
||||||
@ -523,8 +523,8 @@ def truncate_pdf_main_goods(input_path, output_folder, selection,logger, output_
|
|||||||
local_output_suffix = "qualification1"
|
local_output_suffix = "qualification1"
|
||||||
elif selection == 4:
|
elif selection == 4:
|
||||||
begin_pattern = regex.compile(
|
begin_pattern = regex.compile(
|
||||||
r'^第[一二三四五六七八九十百千]+(?:章|部分).*(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知+|'
|
r'^第[一二三四五六七八九十百千]+(?:章|部分).*(?:投标人?|磋商|谈判|供应商|应答人)须知+|'
|
||||||
r'(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知前附表\s*$',
|
r'(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!"\s*)(?<!"\s*)(?:投标人?|磋商|谈判|供应商|应答人)须知前附表\s*$',
|
||||||
regex.MULTILINE
|
regex.MULTILINE
|
||||||
)
|
)
|
||||||
end_pattern = None
|
end_pattern = None
|
||||||
|
Loading…
x
Reference in New Issue
Block a user