1.10
This commit is contained in:
parent
51a8796353
commit
516b23bd9e
@ -6,7 +6,7 @@ from flask_app.general.model_continue_query import process_continue_answers
|
|||||||
from flask_app.general.通义千问long import upload_file, qianwen_long_stream
|
from flask_app.general.通义千问long import upload_file, qianwen_long_stream
|
||||||
|
|
||||||
#提取两个大标题之间的内容
|
#提取两个大标题之间的内容
|
||||||
def extract_between_sections(data, target_values):
|
def extract_between_sections(data, target_values,flag=False):
|
||||||
target_found = False
|
target_found = False
|
||||||
extracted_data = {}
|
extracted_data = {}
|
||||||
current_section_title = ""
|
current_section_title = ""
|
||||||
@ -27,7 +27,8 @@ def extract_between_sections(data, target_values):
|
|||||||
target_found = False
|
target_found = False
|
||||||
|
|
||||||
# 检查当前标题是否包含 target_values 中的任意关键词
|
# 检查当前标题是否包含 target_values 中的任意关键词
|
||||||
if any(tv in value for tv in target_values) and not file_pattern.search(value):
|
if any(tv in value for tv in target_values):
|
||||||
|
if (flag and not file_pattern.search(value)) or not flag:
|
||||||
target_found = True # 找到了目标章节,开始捕获后续内容
|
target_found = True # 找到了目标章节,开始捕获后续内容
|
||||||
current_section_title = value # 保存章节标题内容
|
current_section_title = value # 保存章节标题内容
|
||||||
|
|
||||||
@ -365,8 +366,8 @@ def extract_from_notice(merged_baseinfo_path, clause_path, type):
|
|||||||
3: ["重新招标、不再招标和终止招标", "重新招标", "重新采购", "不再招标", "不再采购", "终止招标", "终止采购"],
|
3: ["重新招标、不再招标和终止招标", "重新招标", "重新采购", "不再招标", "不再采购", "终止招标", "终止采购"],
|
||||||
4: ["评标"] # 测试
|
4: ["评标"] # 测试
|
||||||
}
|
}
|
||||||
|
|
||||||
# 获取对应 type 的 target_values
|
# 获取对应 type 的 target_values
|
||||||
|
flag = (type == 2)
|
||||||
target_values = type_target_map.get(type)
|
target_values = type_target_map.get(type)
|
||||||
if not target_values:
|
if not target_values:
|
||||||
print(f"Error: Invalid type specified: {type}. Use 1, 2, 3, or 4.")
|
print(f"Error: Invalid type specified: {type}. Use 1, 2, 3, or 4.")
|
||||||
@ -378,7 +379,7 @@ def extract_from_notice(merged_baseinfo_path, clause_path, type):
|
|||||||
data = json.load(file)
|
data = json.load(file)
|
||||||
if len(data) >= 60:
|
if len(data) >= 60:
|
||||||
# 尝试使用大章节筛选
|
# 尝试使用大章节筛选
|
||||||
extracted_data = extract_between_sections(data, target_values)
|
extracted_data = extract_between_sections(data, target_values,flag)
|
||||||
if extracted_data:
|
if extracted_data:
|
||||||
# 后处理并返回结果
|
# 后处理并返回结果
|
||||||
extracted_data_concatenated = {
|
extracted_data_concatenated = {
|
||||||
@ -394,9 +395,9 @@ def extract_from_notice(merged_baseinfo_path, clause_path, type):
|
|||||||
final_result = postprocess_formatted2(extracted_data, target_values)
|
final_result = postprocess_formatted2(extracted_data, target_values)
|
||||||
return final_result
|
return final_result
|
||||||
|
|
||||||
# # 如果 clause_path 为空,或者所有筛选方法均失败,调用回退函数
|
# 如果 clause_path 为空,或者所有筛选方法均失败,调用回退函数
|
||||||
# final_result = get_requirements_with_gpt(merged_baseinfo_path, type)
|
final_result = get_requirements_with_gpt(merged_baseinfo_path, type)
|
||||||
# return final_result
|
return final_result
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error occurred while processing clause_path '{clause_path}': {e}")
|
print(f"Error occurred while processing clause_path '{clause_path}': {e}")
|
||||||
@ -407,7 +408,7 @@ if __name__ == "__main__":
|
|||||||
merged_baseinfo_path=r"C:\Users\Administrator\Desktop\fsdownload\b29de31a-297e-42cf-b9ba-6859e530a472\ztbfile_merged_baseinfo.pdf"
|
merged_baseinfo_path=r"C:\Users\Administrator\Desktop\fsdownload\b29de31a-297e-42cf-b9ba-6859e530a472\ztbfile_merged_baseinfo.pdf"
|
||||||
clause_path=r"C:\Users\Administrator\Desktop\fsdownload\b29de31a-297e-42cf-b9ba-6859e530a472\clause1.json"
|
clause_path=r"C:\Users\Administrator\Desktop\fsdownload\b29de31a-297e-42cf-b9ba-6859e530a472\clause1.json"
|
||||||
try:
|
try:
|
||||||
res = extract_from_notice(merged_baseinfo_path,clause_path, 1) # 可以改变此处的 type 参数测试不同的场景
|
res = extract_from_notice(merged_baseinfo_path,clause_path, 2) # 可以改变此处的 type 参数测试不同的场景
|
||||||
res2 = json.dumps(res, ensure_ascii=False, indent=4)
|
res2 = json.dumps(res, ensure_ascii=False, indent=4)
|
||||||
print(res2)
|
print(res2)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
|
10
flask_app/routes/judge_zbfile.py
Normal file
10
flask_app/routes/judge_zbfile.py
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# from flask_app.ConnectionLimiter import require_connection_limit
|
||||||
|
# from flask import Blueprint
|
||||||
|
#
|
||||||
|
# from flask_app.routes.utils import validate_and_setup_logger
|
||||||
|
#
|
||||||
|
# judge_zbfile_bp = Blueprint('judge_zbfile', __name__)
|
||||||
|
# @judge_zbfile_bp.route('/judge_zbfile', methods=['POST'])
|
||||||
|
# @validate_and_setup_logger
|
||||||
|
# @require_connection_limit(timeout=30)
|
||||||
|
# def judge_zbfile():
|
@ -551,9 +551,6 @@ def process_functions_in_parallel(tech_deviation_info, busi_requirements_dict, z
|
|||||||
def get_tech_and_business_deviation(file_path,file_type,unique_id,output_folder,zb_type=2):
|
def get_tech_and_business_deviation(file_path,file_type,unique_id,output_folder,zb_type=2):
|
||||||
global logger
|
global logger
|
||||||
logger = get_global_logger(unique_id)
|
logger = get_global_logger(unique_id)
|
||||||
judge_res = judge_zbfile(file_path)
|
|
||||||
if not judge_res:
|
|
||||||
return None
|
|
||||||
# 第一步:根据文件类型进行转换
|
# 第一步:根据文件类型进行转换
|
||||||
if file_type == 1: # docx
|
if file_type == 1: # docx
|
||||||
docx_path=file_path
|
docx_path=file_path
|
||||||
|
@ -99,9 +99,6 @@ def little_parse_main(output_folder, file_path, file_type,zb_type,unique_id):
|
|||||||
"""
|
"""
|
||||||
logger = get_global_logger(unique_id)
|
logger = get_global_logger(unique_id)
|
||||||
logger.info("zb_type:"+str(zb_type))
|
logger.info("zb_type:"+str(zb_type))
|
||||||
judge_res = judge_zbfile(file_path)
|
|
||||||
if not judge_res:
|
|
||||||
return None
|
|
||||||
# 根据文件类型处理文件路径
|
# 根据文件类型处理文件路径
|
||||||
if file_type == 1: # docx
|
if file_type == 1: # docx
|
||||||
docx_path = file_path
|
docx_path = file_path
|
||||||
|
@ -28,9 +28,6 @@ def preprocess_files(output_folder, file_path, file_type,logger):
|
|||||||
logger.info("starting 文件预处理...")
|
logger.info("starting 文件预处理...")
|
||||||
logger.info("output_folder..." + output_folder)
|
logger.info("output_folder..." + output_folder)
|
||||||
start_time=time.time()
|
start_time=time.time()
|
||||||
judge_res = judge_zbfile(file_path)
|
|
||||||
if not judge_res:
|
|
||||||
return None
|
|
||||||
# 根据文件类型处理文件路径
|
# 根据文件类型处理文件路径
|
||||||
if file_type == 1: # docx
|
if file_type == 1: # docx
|
||||||
# docx_path = file_path
|
# docx_path = file_path
|
||||||
|
@ -24,9 +24,6 @@ executor = ThreadPoolExecutor()
|
|||||||
def preprocess_files(output_folder, file_path, file_type,logger):
|
def preprocess_files(output_folder, file_path, file_type,logger):
|
||||||
logger.info("starting 文件预处理...")
|
logger.info("starting 文件预处理...")
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
judge_res = judge_zbfile(file_path)
|
|
||||||
if not judge_res:
|
|
||||||
return None
|
|
||||||
logger.info("output_folder..." + output_folder)
|
logger.info("output_folder..." + output_folder)
|
||||||
# 根据文件类型处理文件路径
|
# 根据文件类型处理文件路径
|
||||||
if file_type == 1: # docx
|
if file_type == 1: # docx
|
||||||
|
Loading…
x
Reference in New Issue
Block a user