11.12 工程标适配

This commit is contained in:
zy123 2024-11-12 14:44:57 +08:00
parent 906476ad2c
commit 134ef6c6cb
5 changed files with 76 additions and 36 deletions

View File

@ -43,6 +43,7 @@ def judge_consortium_bidding(baseinfo_list):
baseinfo_list[:] = updated_list baseinfo_list[:] = updated_list
return accept_bidding return accept_bidding
#字符串列表转为普通列表从qianwen回答中提取
def process_string_list(string_list): def process_string_list(string_list):
# 使用正则表达式匹配方括号内的内容 # 使用正则表达式匹配方括号内的内容
match = re.search(r'\[(.*?)\]', string_list) match = re.search(r'\[(.*?)\]', string_list)

View File

@ -2,7 +2,7 @@
import json import json
import os.path import os.path
import re import re
from flask_app.general.json_utils import extract_content_from_json # 可以选择性地导入特定的函数 from flask_app.general.json_utils import extract_content_from_json, clean_json_string # 可以选择性地导入特定的函数
from flask_app.main.提取打勾符号 import read_pdf_and_judge_main from flask_app.main.提取打勾符号 import read_pdf_and_judge_main
from flask_app.general.多线程提问 import multi_threading from flask_app.general.多线程提问 import multi_threading
from flask_app.general.通义千问long import qianwen_long,upload_file from flask_app.general.通义千问long import qianwen_long,upload_file
@ -47,10 +47,26 @@ def construct_judge_questions(json_data):
return questions return questions
def merge_json_to_list(merged): def merge_json_to_list(merged,tobidders_notice=""):
# print(json.dumps(merged,ensure_ascii=False,indent=4))
"""Merge updates into the original data by modifying specific keys based on their value ('' or ''), and create a list based on these values.""" """Merge updates into the original data by modifying specific keys based on their value ('' or ''), and create a list based on these values."""
chosen_numbers = [] chosen_numbers = []
# 定义键名映射
key_mapping = {
'是否允许分包': '分包',
'是否递交投标保证金': '投标保证金',
'是否递交磋商保证金': '磋商保证金',
'是否提交履约保证金': '履约保证金',
'是否有招标代理服务费': '招标代理服务费',
'是否组织踏勘现场': '踏勘现场',
'是否召开投标预备会': '投标预备会',
'是否召开投标答疑会': '投标答疑会',
'是否允许偏离': '偏离',
'是否退还投标文件':'退还投标文件'
}
# 处理是否允许分包 # 处理是否允许分包
if merged.get('是否允许分包') == '': if merged.get('是否允许分包') == '':
chosen_numbers.append(1) chosen_numbers.append(1)
@ -63,13 +79,15 @@ def merge_json_to_list(merged):
guarantee_processed = False guarantee_processed = False
for guarantee_key in ['是否递交投标保证金', '是否递交磋商保证金']: for guarantee_key in ['是否递交投标保证金', '是否递交磋商保证金']:
if guarantee_key in merged and not guarantee_processed: if guarantee_key in merged and not guarantee_processed:
guarantee_type = '投标' if '投标' in guarantee_key else '磋商'
if merged[guarantee_key] == '': if merged[guarantee_key] == '':
chosen_numbers.extend([2, 3]) chosen_numbers.extend([2, 3])
elif merged[guarantee_key] == '': elif merged[guarantee_key] == '':
guarantee_type = '投标' if '投标' in guarantee_key else '磋商'
merged[f'{guarantee_type}保证金'] = '不提交' merged[f'{guarantee_type}保证金'] = '不提交'
merged[f'退还{guarantee_type}保证金'] = '/' merged[f'退还{guarantee_type}保证金'] = '/'
merged.pop(guarantee_key, None) else:
# 当既不是 '是' 也不是 '否' 时执行
merged[f'退还{guarantee_type}保证金'] = '未知'
guarantee_processed = True guarantee_processed = True
elif guarantee_key in merged and guarantee_processed: elif guarantee_key in merged and guarantee_processed:
merged.pop(guarantee_key, None) merged.pop(guarantee_key, None)
@ -119,10 +137,37 @@ def merge_json_to_list(merged):
merged['偏离'] = '不允许' merged['偏离'] = '不允许'
merged.pop('是否允许偏离', None) merged.pop('是否允许偏离', None)
#11.12这里有问题正文部分的信息重要性不高而且这里的key不好设置先注释了
# # 初始化 questions_list
# questions_list = []
# if tobidders_notice:
# file_id = upload_file(tobidders_notice) # 假设 upload_file 函数已定义
# question_template = (
# f"根据该招标文件中的信息,{{key}}的内容是怎样的请按json格式给我提供信息"
# f"键名是'{{key}}',若存在嵌套信息,嵌套内容键名以文件中对应字段命名(或是你对相应要求的总结),"
# f"而对应键值需要与原文保持一致。注意:默认情况用普通键值对返回结果即可,键名为{{key}};若原文中未提及{{key}}相关内容,在键值中填'未知'。"
# )
# # 遍历 merged查找值为 '未知' 的键
# keys_to_remove=[]
# for original_key, value in merged.items():
# if value == '未知':
# keys_to_remove.append(original_key)
# # 获取映射后的键名
# mapped_key = key_mapping.get(original_key, original_key)
# # 生成问题
# question = question_template.format(key=mapped_key)
# questions_list.append(question)
# for k in keys_to_remove:
# del merged[k]
# # 假设 questions_list 需要传递给 multi_threading 函数
# baseinfo_results = multi_threading(questions_list, "", file_id, 2) # 假设 multi_threading 函数已定义
# baseinfo_list = [clean_json_string(res) for _, res in baseinfo_results] if baseinfo_results else []
# # 更新 merged 中的 '未知' 值
# for info in baseinfo_list:
# merged.update(info)
return chosen_numbers, merged return chosen_numbers, merged
def read_questions_from_judge(file_path, indices): def read_questions_from_judge(file_path, indices):
questions = [] questions = []

View File

@ -32,7 +32,7 @@ def aggregate_basic_info_engineering(baseinfo_list):
"投标有效期", "投标有效期",
"评标结果公示媒介" "评标结果公示媒介"
], ],
"保证金相关": ["质量保证金", "退还投标保证金"], "保证金相关": ["退还投标保证金","质量保证金"],
"其他信息": [ "其他信息": [
"重新招标、不再招标和终止招标", "重新招标、不再招标和终止招标",
"投标费用承担", "投标费用承担",
@ -81,7 +81,7 @@ def dynamic_key_handling(key_groups, detected_keys):
# 处理“保证金相关”组,插到"质量保证金"前 # 处理“保证金相关”组,插到"质量保证金"前
if "保证金" in key: if "保证金" in key:
group = key_groups["保证金相关"] group = key_groups["保证金相关"]
insert_before = "质量保证金" insert_before = "退还投标保证金"
if insert_before in group: if insert_before in group:
index = group.index(insert_before) index = group.index(insert_before)
if key not in group: # 避免重复插入 if key not in group: # 避免重复插入
@ -122,7 +122,7 @@ def update_baseinfo_lists(baseinfo_list1, baseinfo_list2):
return updated_list return updated_list
#先不带投标人须知正文,如果是未知,再直接问正文,
def process_baseinfo_list(baseinfo_list, tobidders_notice): def process_baseinfo_list(baseinfo_list, tobidders_notice):
questions_list = [] questions_list = []
for item in baseinfo_list: for item in baseinfo_list:
@ -130,11 +130,10 @@ def process_baseinfo_list(baseinfo_list, tobidders_notice):
for key, value in item.items(): for key, value in item.items():
if value == "未知" or (isinstance(value, dict) and all(v == "未知" for v in value.values())): if value == "未知" or (isinstance(value, dict) and all(v == "未知" for v in value.values())):
question = ( question = (
f"根据该招标文件中的信息,{key}的内容是怎样的?" f"根据该招标文件中的信息,{key}的内容是怎样的请按json格式给我提供信息键名是'{key}',若存在嵌套信息,嵌套内容键名以文件中对应字段命名(或是你对相应要求的总结),而对应键值需要与原文保持一致。注意:默认情况用普通键值对返回结果即可,键名为{key};若原文中未提及'{key}'相关内容,在键值中填'未知'"
f"请按json格式给我提供信息键名是'{key}'"
f"若存在未知信息,在对应的键值中填'未知'"
) )
questions_list.append(question) questions_list.append(question)
if questions_list: if questions_list:
file_id = upload_file(tobidders_notice) file_id = upload_file(tobidders_notice)
baseinfo_results = multi_threading(questions_list, "", file_id, 2) baseinfo_results = multi_threading(questions_list, "", file_id, 2)
@ -142,8 +141,7 @@ def process_baseinfo_list(baseinfo_list, tobidders_notice):
else: else:
return [] return []
def combine_basic_info(merged_baseinfo_path,merged_baseinfo_path_more, tobidders_notice, clause_path):
def combine_basic_info(merged_baseinfo_path,merged_baseinfo_path_more, tobidders_notice_table, tobidders_notice, clause_path):
""" """
综合和处理基础信息生成最终的基础信息字典 综合和处理基础信息生成最终的基础信息字典
@ -156,23 +154,24 @@ def combine_basic_info(merged_baseinfo_path,merged_baseinfo_path_more, tobidders
返回 返回
- dict: 综合后的基础信息 - dict: 综合后的基础信息
""" """
baseinfo_prompt_file_path=r'D:\flask_project\flask_app\static\提示词\基本信息工程标qianwen-long.txt'
baseinfo_prompt_file_path = 'flask_app/static/提示词/基本信息工程标qianwen-long.txt' # baseinfo_prompt_file_path = 'flask_app/static/提示词/基本信息工程标qianwen-long.txt'
file_id1 = upload_file(merged_baseinfo_path) file_id1 = upload_file(merged_baseinfo_path)
questions = read_questions_from_file(baseinfo_prompt_file_path) questions = read_questions_from_file(baseinfo_prompt_file_path)
more_query = "请你根据招标文件信息,回答以下问题:是否组织踏勘现场?是否召开投标预备会?是否允许偏离?是否退还投标文件?是否允许分包? 是否需要递交投标保证金是否需要提交履约保证金履约担保是否有招标代理服务费请按json格式给我提供信息键名分别为'是否组织踏勘现场','是否召开投标预备会','是否允许偏离','是否退还投标文件',是否允许分包','是否递交投标保证金','是否提交履约保证金','是否有招标代理服务费',键值仅限于'','','未知',若存在矛盾信息,请回答'未知'" more_query = "请你根据招标文件信息,回答以下问题:是否组织踏勘现场?是否召开投标预备会?是否允许偏离?是否退还投标文件?是否允许分包? 是否需要递交投标保证金是否需要提交履约保证金履约担保是否有招标代理服务费请按json格式给我提供信息键名分别为'是否组织踏勘现场','是否召开投标预备会','是否允许偏离','是否退还投标文件',是否允许分包','是否递交投标保证金','是否提交履约保证金','是否有招标代理服务费',键值仅限于'','','未知',若存在矛盾信息,请回答'未知'"
questions.append(more_query) questions.append(more_query)
baseinfo_results = multi_threading(questions, "", file_id1, 2) baseinfo_results = multi_threading(questions, "", file_id1, 2)
baseinfo_list1 = [clean_json_string(res) for _, res in baseinfo_results] if baseinfo_results else [] baseinfo_list1 = [clean_json_string(res) for _, res in baseinfo_results] if baseinfo_results else []
chosen_numbers, merged = merge_json_to_list(baseinfo_list1.pop()) chosen_numbers, merged = merge_json_to_list(baseinfo_list1.pop(),tobidders_notice)
baseinfo_list1_copy = copy.deepcopy(baseinfo_list1) baseinfo_list1_copy = copy.deepcopy(baseinfo_list1)
baseinfo_list1.append(merged) baseinfo_list1.append(merged)
judge_file_path = 'flask_app/static/提示词/是否相关问题qianwen-long.txt' judge_file_path=r'D:\flask_project\flask_app\static\提示词\是否相关问题qianwen-long.txt'
# judge_file_path = 'flask_app/static/提示词/是否相关问题qianwen-long.txt'
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor: with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
# 提交两个任务 # 提交两个任务
future1 = executor.submit(process_judge_questions, judge_file_path, chosen_numbers, tobidders_notice_table, baseinfo_list1) future1 = executor.submit(process_judge_questions, judge_file_path, chosen_numbers, merged_baseinfo_path, baseinfo_list1)
future2 = executor.submit(process_baseinfo_list, baseinfo_list1_copy, tobidders_notice) future2 = executor.submit(process_baseinfo_list, baseinfo_list1_copy, tobidders_notice) #只问tobidders_notice
future3 = executor.submit(extract_from_notice, merged_baseinfo_path_more, clause_path, 3) # 新增的多线程任务 future3 = executor.submit(extract_from_notice, merged_baseinfo_path_more, clause_path, 3) # 新增的多线程任务
# 等待两个任务完成并获取结果 # 等待两个任务完成并获取结果
@ -188,16 +187,15 @@ def combine_basic_info(merged_baseinfo_path,merged_baseinfo_path_more, tobidders
return {"基础信息": aggregated_baseinfo} return {"基础信息": aggregated_baseinfo}
# TODO:先不带投标人须知正文,如果是未知,再直接问正文,
if __name__ == "__main__": if __name__ == "__main__":
start_time = time.time() start_time = time.time()
merged_baseinfo_path = "C:\\Users\\Administrator\\Desktop\\招标文件\\special_output\\zbtest2_merged_baseinfo.pdf" merged_baseinfo_path = r"D:\flask_project\flask_app\static\output\output1\6f2010ee-d7cd-4787-a26a-2db8233d179a\ztbfile_merged_baseinfo.pdf"
more=r"D:\flask_project\flask_app\static\output\output1\6f2010ee-d7cd-4787-a26a-2db8233d179a\merged_baseinfo_path_more.pdf"
# output_folder="C:\\Users\\Administrator\\Desktop\\招标文件\\special_output" # output_folder="C:\\Users\\Administrator\\Desktop\\招标文件\\special_output"
tobidders_notice_table = "C:\\Users\\Administrator\\Desktop\\招标文件\\special_output\\zbtest2_tobidders_notice_table.pdf" tobidders_notice_table = r"D:\flask_project\flask_app\static\output\output1\6f2010ee-d7cd-4787-a26a-2db8233d179a\ztbfile_tobidders_notice_table.pdf"
tobidders_notice = "C:\\Users\\Administrator\\Desktop\\招标文件\\special_output\\zbtest2_tobidders_notice.pdf" tobidders_notice = r"D:\flask_project\flask_app\static\output\output1\6f2010ee-d7cd-4787-a26a-2db8233d179a\ztbfile_tobidders_notice.pdf"
clause_path = "C:\\Users\\Administrator\\Desktop\\招标文件\\special_output\\clause1.json" clause_path = r"D:\flask_project\flask_app\static\output\output1\6f2010ee-d7cd-4787-a26a-2db8233d179a\clause1.json"
res = combine_basic_info(merged_baseinfo_path, tobidders_notice_table, tobidders_notice, clause_path) res = combine_basic_info(merged_baseinfo_path,more,tobidders_notice, clause_path)
print(json.dumps(res, ensure_ascii=False, indent=4)) print(json.dumps(res, ensure_ascii=False, indent=4))
end_time = time.time() end_time = time.time()
print("elapsed_time:" + str(end_time - start_time)) print("elapsed_time:" + str(end_time - start_time))

View File

@ -92,18 +92,16 @@ def preprocess_files(output_folder, downloaded_file_path, file_type,unique_id):
} }
# 基本信息 # 基本信息
def fetch_project_basic_info(invalid_path, merged_baseinfo_path, merged_baseinfo_path_more,tobidders_notice_table, tobidders_notice, clause_path): def fetch_project_basic_info(invalid_path, merged_baseinfo_path, merged_baseinfo_path_more,tobidders_notice, clause_path):
logger.info("starting 基础信息...") logger.info("starting 基础信息...")
start_time = time.time() start_time = time.time()
if not merged_baseinfo_path: if not merged_baseinfo_path:
merged_baseinfo_path = invalid_path merged_baseinfo_path = invalid_path
if not merged_baseinfo_path_more: if not merged_baseinfo_path_more:
merged_baseinfo_path_more=invalid_path merged_baseinfo_path_more=invalid_path
if not tobidders_notice_table:
tobidders_notice_table = invalid_path
if not tobidders_notice: if not tobidders_notice:
tobidders_notice = invalid_path tobidders_notice = invalid_path
basic_res = combine_basic_info(merged_baseinfo_path,merged_baseinfo_path_more, tobidders_notice_table, tobidders_notice, clause_path) basic_res = combine_basic_info(merged_baseinfo_path,merged_baseinfo_path_more,tobidders_notice, clause_path)
end_time = time.time() end_time = time.time()
logger.info(f"基础信息 done耗时{end_time - start_time:.2f}") logger.info(f"基础信息 done耗时{end_time - start_time:.2f}")
return basic_res return basic_res
@ -188,7 +186,6 @@ def engineering_bid_main(output_folder, downloaded_file_path, file_type, unique_
# 立即启动不依赖 knowledge_name 和 index 的任务 # 立即启动不依赖 knowledge_name 和 index 的任务
futures = { futures = {
'base_info': executor.submit(fetch_project_basic_info,processed_data['invalid_path'] ,processed_data['merged_baseinfo_path'],processed_data['merged_baseinfo_path_more'], 'base_info': executor.submit(fetch_project_basic_info,processed_data['invalid_path'] ,processed_data['merged_baseinfo_path'],processed_data['merged_baseinfo_path_more'],
processed_data['tobidders_notice_table'],
processed_data['tobidders_notice'], processed_data['clause_path']), processed_data['tobidders_notice'], processed_data['clause_path']),
'qualification_review': executor.submit(fetch_qualification_review, processed_data['evaluation_method'], 'qualification_review': executor.submit(fetch_qualification_review, processed_data['evaluation_method'],
processed_data['qualification'], output_folder, processed_data['qualification'], output_folder,
@ -227,6 +224,7 @@ def engineering_bid_main(output_folder, downloaded_file_path, file_type, unique_
yield json.dumps({'error': f'Error processing {key}: {str(exc)}'}, ensure_ascii=False) yield json.dumps({'error': f'Error processing {key}: {str(exc)}'}, ensure_ascii=False)
#TODO:废标项,针对新文件作优化,统一成货物标的处理逻辑 #TODO:废标项,针对新文件作优化,统一成货物标的处理逻辑
#TODO:基本信息,判断是否这里,打勾逻辑取消了。
if __name__ == "__main__": if __name__ == "__main__":
start_time = time.time() start_time = time.time()
output_folder = "C:\\Users\\Administrator\\Desktop\\招标文件\\new_test1" output_folder = "C:\\Users\\Administrator\\Desktop\\招标文件\\new_test1"

View File

@ -2,7 +2,7 @@
2.该招标文件的项目概况或工程概况招标范围是请按json格式给我提供信息键名分别为'项目概况','招标范围',若存在嵌套信息,嵌套内容键名以文件中对应字段命名,而嵌套键值必须与原文保持一致,若存在未知信息,在对应的键值中填'未知'。 2.该招标文件的项目概况或工程概况招标范围是请按json格式给我提供信息键名分别为'项目概况','招标范围',若存在嵌套信息,嵌套内容键名以文件中对应字段命名,而嵌套键值必须与原文保持一致,若存在未知信息,在对应的键值中填'未知'。
3.该招标文件的招标控制价或投标限价或预算金额或合同估算价但非监理费用请按json格式给我提供信息键名为'招标控制价',若存在未知信息,在对应的键值中填'未知'。 3.该招标文件的招标控制价(或投标限价或预算金额或合同估算价,但非监理费用)是?该项目是否接受联合体投标?请按json格式给我提供信息键名分别为'招标控制价'和'是否接受联合体投标',其中'是否接受联合体投标'的键值仅限于'是'、'否'、'未知',若存在未知信息,在对应的键值中填'未知'。
4.投标文件递交截止日期是递交方式是请按json格式给我提供信息键名分别是'投标文件递交截止日期','投标文件递交方式',若存在未知信息,在对应的键值中填'未知'。 4.投标文件递交截止日期是递交方式是请按json格式给我提供信息键名分别是'投标文件递交截止日期','投标文件递交方式',若存在未知信息,在对应的键值中填'未知'。
@ -20,6 +20,4 @@
11.该文档要求扣留的质量保证金百分比是多少请以json格式给我提供信息键名为'质量保证金',如果没有则以'未知'填充。 11.该文档要求扣留的质量保证金百分比是多少请以json格式给我提供信息键名为'质量保证金',如果没有则以'未知'填充。
12.该项目是否接受联合体投标请按json格式给我提供信息键名为'是否接受联合体投标''是否接受联合体投标'的键值仅限于'是'、'否'、'未知'。 12.该项目的开标时间或开启时间和开标地点是请按json格式给我提供信息键名为'开标时间'和'开标地点',对于"开标时间",若文中没有明确时间,将其键值设为文中相关表述,若存在未知信息,在对应的键值中填'未知'。
13.该项目的开标时间或开启时间和开标地点是请按json格式给我提供信息键名为'开标时间'和'开标地点',对于"开标时间",若文中没有明确时间,将其键值设为文中相关表述,若存在未知信息,在对应的键值中填'未知'。