商务技术评分能处理多评分表情况,规范响应返回接口,判断上传文件是否为招标文件
This commit is contained in:
parent
fc67ef8e52
commit
5efdfb2007
@ -4,10 +4,10 @@ from flask_app.general.通义千问long import upload_file, qianwen_long
|
||||
|
||||
|
||||
def judge_zbfile(pdf_path):
|
||||
reader = PdfReader(pdf_path)
|
||||
num_pages = len(reader.pages)
|
||||
if num_pages <= 5:
|
||||
return False
|
||||
# reader = PdfReader(pdf_path)
|
||||
# num_pages = len(reader.pages)
|
||||
# if num_pages <= 5:
|
||||
# return False
|
||||
user_query="""该文件是否属于招标文件?如果是的话,请返回'是',如果不是的话,返回'否'。请不要返回其他解释或内容。
|
||||
以下是常见的招标文件类型:
|
||||
公开招标文件、邀请招标文件、竞争性谈判文件、竞争性磋商文件、询价文件、问询文件、货物类招标文件、工程类招标文件、施工类招标文件、服务类招标文件、比选文件。
|
||||
|
@ -1,6 +1,6 @@
|
||||
# flask_app/routes/get_deviation.py
|
||||
|
||||
from flask import Blueprint, jsonify, Response, g
|
||||
from flask import Blueprint, Response, g
|
||||
import os
|
||||
from flask_app.general.format_change import download_file
|
||||
from flask_app.routes.偏离表main import get_tech_and_business_deviation
|
||||
|
@ -551,6 +551,9 @@ def process_functions_in_parallel(tech_deviation_info, busi_requirements_dict, z
|
||||
def get_tech_and_business_deviation(file_path,file_type,unique_id,output_folder,zb_type=2):
|
||||
global logger
|
||||
logger = get_global_logger(unique_id)
|
||||
judge_res = judge_zbfile(file_path)
|
||||
if not judge_res:
|
||||
return None
|
||||
# 第一步:根据文件类型进行转换
|
||||
if file_type == 1: # docx
|
||||
docx_path=file_path
|
||||
@ -565,9 +568,6 @@ def get_tech_and_business_deviation(file_path,file_type,unique_id,output_folder,
|
||||
else:
|
||||
logger.error("不支持的文件类型!")
|
||||
return None
|
||||
judge_res = judge_zbfile(pdf_path)
|
||||
if not judge_res:
|
||||
return None
|
||||
# 第二步:根据zb_type确定选择项和类别,并截取PDF
|
||||
if zb_type == 2:
|
||||
selections = [1, 2, 3, 5]
|
||||
|
@ -99,6 +99,9 @@ def little_parse_main(output_folder, file_path, file_type,zb_type,unique_id):
|
||||
"""
|
||||
logger = get_global_logger(unique_id)
|
||||
logger.info("zb_type:"+str(zb_type))
|
||||
judge_res = judge_zbfile(file_path)
|
||||
if not judge_res:
|
||||
return None
|
||||
# 根据文件类型处理文件路径
|
||||
if file_type == 1: # docx
|
||||
docx_path = file_path
|
||||
@ -112,9 +115,6 @@ def little_parse_main(output_folder, file_path, file_type,zb_type,unique_id):
|
||||
else:
|
||||
logger.error("Unsupported file type provided. Preprocessing halted.")
|
||||
return None
|
||||
judge_res = judge_zbfile(pdf_path)
|
||||
if not judge_res:
|
||||
return None
|
||||
# 根据招标类型调用相应的解析函数
|
||||
if zb_type == 2: # 货物标
|
||||
combined_data = little_parse_goods(output_folder, pdf_path,logger)
|
||||
|
@ -28,6 +28,9 @@ def preprocess_files(output_folder, file_path, file_type,logger):
|
||||
logger.info("starting 文件预处理...")
|
||||
logger.info("output_folder..." + output_folder)
|
||||
start_time=time.time()
|
||||
judge_res = judge_zbfile(file_path)
|
||||
if not judge_res:
|
||||
return None
|
||||
# 根据文件类型处理文件路径
|
||||
if file_type == 1: # docx
|
||||
# docx_path = file_path
|
||||
@ -41,9 +44,6 @@ def preprocess_files(output_folder, file_path, file_type,logger):
|
||||
else:
|
||||
logger.error("Unsupported file type provided. Preprocessing halted.")
|
||||
return None
|
||||
judge_res = judge_zbfile(pdf_path)
|
||||
if not judge_res:
|
||||
return None
|
||||
# 调用截取PDF多次
|
||||
truncate_files = truncate_pdf_multiple(pdf_path, output_folder,logger,'engineering')
|
||||
print("切割出的文件:"+str(truncate_files))
|
||||
|
@ -24,6 +24,9 @@ executor = ThreadPoolExecutor()
|
||||
def preprocess_files(output_folder, file_path, file_type,logger):
|
||||
logger.info("starting 文件预处理...")
|
||||
start_time = time.time()
|
||||
judge_res = judge_zbfile(file_path)
|
||||
if not judge_res:
|
||||
return None
|
||||
logger.info("output_folder..." + output_folder)
|
||||
# 根据文件类型处理文件路径
|
||||
if file_type == 1: # docx
|
||||
@ -39,9 +42,6 @@ def preprocess_files(output_folder, file_path, file_type,logger):
|
||||
logger.error("Unsupported file type provided. Preprocessing halted.")
|
||||
return None
|
||||
|
||||
judge_res=judge_zbfile(pdf_path)
|
||||
if not judge_res:
|
||||
return None
|
||||
# 调用截取PDF多次
|
||||
truncate_files = truncate_pdf_multiple(pdf_path, output_folder,logger,'goods') # index: 0->商务技术服务要求 1->评标办法 2->资格审查 3->投标人须知前附表 4->投标人须知正文
|
||||
|
||||
|
@ -68,7 +68,7 @@ def extract_pages_tobidders_notice(pdf_path, output_folder, begin_pattern, begin
|
||||
# 定义基础的 mid_pattern
|
||||
base_mid_pattern = r'^\s*(?:[((]\s*[一二12]?\s*[))]\s*[、..]*|' \
|
||||
r'[一二12][、..]+|[、..]+)\s*(说\s*明|总\s*则|名\s*词\s*解\s*释)' \
|
||||
r'|(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)\s*须知正文\s*$'
|
||||
r'|(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|应答人)\s*须知正文\s*$'
|
||||
# 合并基础模式和额外模式
|
||||
if additional_mid_pattern:
|
||||
combined_mid_pattern = regex.compile(
|
||||
@ -323,8 +323,8 @@ def truncate_pdf_main_engineering(input_path, output_folder, selection, logger,
|
||||
pattern_pairs = [
|
||||
(
|
||||
regex.compile(
|
||||
r'(?:第[一二三四五六七八九十百千]+(?:章|部分).*(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知|'
|
||||
r'(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知前附表)\s*$',
|
||||
r'^第[一二三四五六七八九十百千]+(?:章|部分).*(?:投标人?|磋商|谈判|供应商|应答人)须知+|'
|
||||
r'(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!"\s*)(?<!"\s*)(?:投标人?|磋商|谈判|供应商|应答人)须知前附表\s*$',
|
||||
regex.MULTILINE
|
||||
),
|
||||
regex.compile(
|
||||
@ -334,8 +334,8 @@ def truncate_pdf_main_engineering(input_path, output_folder, selection, logger,
|
||||
),
|
||||
(
|
||||
regex.compile(
|
||||
r'.*(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知\s*$|'
|
||||
r'(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知前附表\s*$',
|
||||
r'.*(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|应答人)须知\s*$|'
|
||||
r'(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|应答人)须知前附表\s*$',
|
||||
regex.MULTILINE
|
||||
),
|
||||
regex.compile(
|
||||
|
@ -215,7 +215,7 @@ def extract_pages_tobidders_notice(pdf_path, begin_pattern, begin_page, common_h
|
||||
# 定义基础的 mid_pattern
|
||||
base_mid_pattern = r'^\s*(?:[((]\s*[一二12]?\s*[))]\s*[、..]*|' \
|
||||
r'[一二12][、..]+|[、..]+)\s*(说\s*明|总\s*则|名\s*词\s*解\s*释)' \
|
||||
r'|(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)\s*须知正文\s*$'
|
||||
r'|(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|应答人)\s*须知正文\s*$'
|
||||
|
||||
# 合并基础模式和额外模式
|
||||
if additional_mid_pattern:
|
||||
@ -282,8 +282,8 @@ def extract_pages_tobidders_notice(pdf_path, begin_pattern, begin_page, common_h
|
||||
else:
|
||||
# 定义新的 begin_pattern 和 end_pattern
|
||||
new_begin_pattern = regex.compile(
|
||||
r'.*(?:投标人|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知\s*$|'
|
||||
r'(?:一\s*、\s*)?(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知前附表',
|
||||
r'.*(?:投标人|磋商|谈判|供应商|应答人)须知\s*$|'
|
||||
r'(?:一\s*、\s*)?(?:投标人?|磋商|谈判|供应商|应答人)须知前附表',
|
||||
regex.MULTILINE
|
||||
)
|
||||
new_end_pattern = regex.compile(
|
||||
@ -300,7 +300,7 @@ def extract_pages_tobidders_notice(pdf_path, begin_pattern, begin_page, common_h
|
||||
def extract_pages_twice_tobidders_notice(pdf_document, common_header, begin_page):
|
||||
output_suffix = "tobidders_notice"
|
||||
begin_pattern = regex.compile(
|
||||
r'^第[一二三四五六七八九十百千]+(?:章|部分).*(?:(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知)+',
|
||||
r'^第[一二三四五六七八九十百千]+(?:章|部分).*(?:(?:投标人?|磋商|谈判|供应商|应答人)须知)+',
|
||||
regex.MULTILINE
|
||||
)
|
||||
end_pattern = regex.compile(
|
||||
@ -523,8 +523,8 @@ def truncate_pdf_main_goods(input_path, output_folder, selection,logger, output_
|
||||
local_output_suffix = "qualification1"
|
||||
elif selection == 4:
|
||||
begin_pattern = regex.compile(
|
||||
r'^第[一二三四五六七八九十百千]+(?:章|部分).*(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知+|'
|
||||
r'(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!“\s*)(?<!”\s*)(?:投标人?|磋商|谈判|供应商|谈判供应商|磋商供应商|应答人)须知前附表\s*$',
|
||||
r'^第[一二三四五六七八九十百千]+(?:章|部分).*(?:投标人?|磋商|谈判|供应商|应答人)须知+|'
|
||||
r'(?<!见\s*)(?<!与\s*)(?<!"\s*)(?<!"\s*)(?<!"\s*)(?:投标人?|磋商|谈判|供应商|应答人)须知前附表\s*$',
|
||||
regex.MULTILINE
|
||||
)
|
||||
end_pattern = None
|
||||
|
Loading…
x
Reference in New Issue
Block a user