10.17 小解析货物标
This commit is contained in:
parent
12b1cdcfa3
commit
8255070f60
@ -22,34 +22,45 @@ def get_global_logger(unique_id):
|
|||||||
|
|
||||||
|
|
||||||
logger = None
|
logger = None
|
||||||
def get_base_info(baseinfo_file_path):
|
|
||||||
|
def merge(merged):
|
||||||
|
guarantee_key = '是否递交投标保证金' if '是否递交投标保证金' in merged else '是否递交磋商保证金'
|
||||||
|
if merged.get(guarantee_key) == '是':
|
||||||
|
return 1,""
|
||||||
|
elif merged.get(guarantee_key) == '否':
|
||||||
|
guarantee_type = '投标' if '投标' in guarantee_key else '磋商'
|
||||||
|
merged[f'{guarantee_type}保证金'] = '不提交'
|
||||||
|
merged[f'退还{guarantee_type}保证金'] = '/'
|
||||||
|
merged.pop(guarantee_key, None)
|
||||||
|
return 0,merged
|
||||||
|
|
||||||
|
def get_goods_baseinfo(baseinfo_file_path):
|
||||||
file_id = upload_file(baseinfo_file_path)
|
file_id = upload_file(baseinfo_file_path)
|
||||||
baseinfo_file_path='flask_app/static/提示词/基本信息货物标.txt'
|
# baseinfo_file_path='flask_app/static/提示词/基本信息货物标.txt'
|
||||||
# baseinfo_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\基本信息货物标.txt'
|
baseinfo_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\基本信息货物标.txt'
|
||||||
questions = read_questions_from_file(baseinfo_file_path)
|
questions = read_questions_from_file(baseinfo_file_path)
|
||||||
more_query = "请你根据招标文件信息,回答以下问题:是否组织踏勘现场?是否召开投标预备会(或投标答疑会)?是否退还投标文件?是否允许分包? 是否需要递交投标保证金(或磋商保证金)?是否需要提交履约保证金(或履约担保)?是否有招标代理服务费(或中标、成交服务费)?请按json格式给我提供信息,键名分别为'是否组织踏勘现场','是否召开投标预备会'(或'是否召开投标答疑会'),'是否退还投标文件',是否允许分包','是否递交投标保证金'(或'是否递交磋商保证金'),'是否提交履约保证金','是否有招标代理服务费',键值仅限于'是','否','未知',若存在矛盾信息,请回答'未知'。"
|
more_query = "请你根据招标文件信息,回答以下问题:是否需要递交投标保证金(或磋商保证金)?请按json格式给我提供信息,键名分为'是否递交投标保证金'(或'是否递交磋商保证金'),键值仅限于'是','否','未知',若存在矛盾信息,请回答'未知'。"
|
||||||
questions.append(more_query)
|
questions.append(more_query)
|
||||||
baseinfo_results = multi_threading(questions, "", file_id, 2) # 1代表使用百炼rag 2代表使用qianwen-long
|
baseinfo_results = multi_threading(questions, "", file_id, 2) # 1代表使用百炼rag 2代表使用qianwen-long
|
||||||
baseinfo_list = [clean_json_string(res) for _, res in baseinfo_results] if baseinfo_results else []
|
baseinfo_list = [clean_json_string(res) for _, res in baseinfo_results] if baseinfo_results else []
|
||||||
chosen_numbers, merged = merge_json_to_list(baseinfo_list.pop())
|
type,merged=merge(baseinfo_list.pop())
|
||||||
baseinfo_list.append(merged)
|
if type:
|
||||||
|
judge_questions="根据招标文件第二章投标人须知,该项目投标保证金(或磋商保证金)的内容或要求是什么?请按json格式给我提供信息,外层键名为'投标保证金'(或'磋商保证金'),若需要以嵌套键值对返回结果,那么嵌套键名为你对相应要求的总结,而对应键值需要完全与原文保持一致。"
|
||||||
judge_file_path = 'flask_app/static/提示词/是否相关问题货物标.txt'
|
res2 = multi_threading(judge_questions, "", file_id, 2) # 调用千问-long
|
||||||
# judge_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\是否相关问题货物标.txt'
|
if not res2:
|
||||||
judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers)
|
print("基础信息整合: multi_threading error!")
|
||||||
|
else:
|
||||||
res2 = multi_threading(judge_questions, "", file_id, 2) # 调用千问-long
|
for question, response in res2:
|
||||||
if not res2:
|
baseinfo_list.append(clean_json_string(response))
|
||||||
print("基础信息整合: multi_threading error!")
|
|
||||||
else:
|
else:
|
||||||
for question, response in res2:
|
baseinfo_list.append(merged)
|
||||||
baseinfo_list.append(clean_json_string(response))
|
|
||||||
return baseinfo_list
|
return baseinfo_list
|
||||||
|
|
||||||
|
|
||||||
#货物标
|
#货物标
|
||||||
def little_parse_goods(output_folder,file_path):
|
def little_parse_goods(output_folder,file_path):
|
||||||
files=truncate_pdf_specific(file_path,output_folder)
|
files=truncate_pdf_specific(file_path,output_folder)
|
||||||
baseinfo_list=get_base_info(files[-1])
|
baseinfo_list=get_goods_baseinfo(files[-1])
|
||||||
aggregated_baseinfo = aggregate_basic_info_goods(baseinfo_list)
|
aggregated_baseinfo = aggregate_basic_info_goods(baseinfo_list)
|
||||||
return {"基础信息": aggregated_baseinfo}
|
return {"基础信息": aggregated_baseinfo}
|
||||||
def little_parse_engineering(output_folder,downloaded_filepath):
|
def little_parse_engineering(output_folder,downloaded_filepath):
|
||||||
@ -107,8 +118,8 @@ if __name__ == "__main__":
|
|||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
file_type = 2 # 1:docx 2:pdf 3:其他
|
file_type = 2 # 1:docx 2:pdf 3:其他
|
||||||
zb_type=2 #1:工程标 2:货物标
|
zb_type=2 #1:工程标 2:货物标
|
||||||
input_file = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\6.2定版视频会议磋商文件.pdf"
|
input_file = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\ztbfile.pdf"
|
||||||
res=little_parse_main(output_folder, input_file, file_type, zb_type)
|
res=little_parse_main(output_folder, input_file, file_type, zb_type,"122334")
|
||||||
print(json.dumps(res, ensure_ascii=False, indent=4))
|
print(json.dumps(res, ensure_ascii=False, indent=4))
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
elapsed_time = end_time - start_time # 计算耗时
|
elapsed_time = end_time - start_time # 计算耗时
|
||||||
|
@ -295,15 +295,15 @@ def process_and_stream(file_url, zb_type):
|
|||||||
final_result, extracted_info = outer_post_processing(combined_data, includes)
|
final_result, extracted_info = outer_post_processing(combined_data, includes)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(output_json_path, 'w', encoding='utf-8') as json_file:
|
with open(extracted_info_path, 'w', encoding='utf-8') as json_file:
|
||||||
json.dump(extracted_info, json_file, ensure_ascii=False, indent=4)
|
json.dump(extracted_info, json_file, ensure_ascii=False, indent=4)
|
||||||
logger.info(f"合并后的数据已保存到 '{output_json_path}'")
|
logger.info(f"摘取后的数据已保存到 '{extracted_info_path}'")
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
logger.error(f"保存JSON文件时出错: {e}")
|
logger.error(f"保存JSON文件时出错: {e}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(output_json_path, 'w', encoding='utf-8') as json_file:
|
with open(output_json_path, 'w', encoding='utf-8') as json_file:
|
||||||
json.dump(extracted_info_path, json_file, ensure_ascii=False, indent=4)
|
json.dump(final_result, json_file, ensure_ascii=False, indent=4)
|
||||||
logger.info(f"合并后的数据已保存到 '{output_json_path}'")
|
logger.info(f"合并后的数据已保存到 '{output_json_path}'")
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
logger.error(f"保存JSON文件时出错: {e}")
|
logger.error(f"保存JSON文件时出错: {e}")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user