This commit is contained in:
zy123 2024-09-27 15:31:28 +08:00
parent d8247c95f4
commit 0f9f489f2f
5 changed files with 20 additions and 12 deletions

View File

@ -163,7 +163,7 @@ def process_and_stream(file_url):
logger.info(f"Updated combined data: {json.dumps(combined_data, ensure_ascii=False, indent=4)}")
# **保存 combined_data 到 output_folder 下的 'final_result.json'**
output_json_path = os.path.join(output_folder, 'final_result.json')
includes = ["基础信息", "资格审查", "商务", "技术标", "无效标与废标项", "投标文件要求", "开评定标流程"]
includes = ["基础信息", "资格审查", "商务评分", "技术评分", "无效标与废标项", "投标文件要求", "开评定标流程"]
result = post_processing(combined_data, includes)
try:
with open(output_json_path, 'w', encoding='utf-8') as json_file:

View File

@ -77,9 +77,11 @@ def judge_consortium_bidding(baseinfo_list):
def project_basic_info(knowledge_name,truncate0,output_folder,clause_path): #投标人须知前附表
# 调用大模型回答项目基础信息
baseinfo_list = []
baseinfo_file_path='flask_app/static/提示词/前两章提问总结.txt'
# baseinfo_file_path='flask_app/static/提示词/前两章提问总结.txt'
baseinfo_file_path='D:\\flask_project\\flask_app\\static\\提示词\\前两章提问总结.txt'
questions = read_questions_from_file(baseinfo_file_path)
res1 = multi_threading(questions, knowledge_name)
for _, response in res1: # _占位代表ques;response[0]也是ques;response[1]是ans
try:
if response and len(response) > 1: # 检查response存在且有至少两个元素
@ -91,7 +93,9 @@ def project_basic_info(knowledge_name,truncate0,output_folder,clause_path): #
# 判断是否分包、是否需要递交投标保证金等
chosen_numbers, merged = judge_whether_main(truncate0,output_folder)
baseinfo_list.append(merged)
judge_file_path ='flask_app/static/提示词/是否相关问题.txt'
# judge_file_path ='flask_app/static/提示词/是否相关问题.txt'
judge_file_path='D:\\flask_project\\flask_app\\static\\提示词\\是否相关问题.txt'
judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers)
judge_consortium = judge_consortium_bidding(baseinfo_list) #通过招标公告判断是否接受联合体投标
@ -129,11 +133,11 @@ def project_basic_info(knowledge_name,truncate0,output_folder,clause_path): #
if __name__ == "__main__":
knowledge_name = "ztb"
output_folder="C:\\Users\\Administrator\\Desktop\\fsdownload\\temp8\\3abb6e16-19db-42ad-9504-53bf1072dfe7"
truncate0="C:\\Users\\Administrator\\Desktop\\fsdownload\\temp8\\3abb6e16-19db-42ad-9504-53bf1072dfe7\\ztbfile_tobidders_notice_table.pdf"
clause_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\temp8\\3abb6e16-19db-42ad-9504-53bf1072dfe7\\clause.json"
output_folder="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405"
truncate0="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\ztbfile_tobidders_notice_table.pdf"
clause_path="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\clause1.json"
res=project_basic_info(knowledge_name,truncate0,output_folder,clause_path)
print(res)
print(json.dumps(res,ensure_ascii=False,indent=4))

View File

@ -132,6 +132,7 @@ def llm_call(question, knowledge_name,file_id, result_queue, ans_index, llm_type
def multi_threading(queries, knowledge_name="", file_id="", llm_type=1):
if not queries:
return []
print("多线程提问starting multi_threading...")
result_queue = queue.Queue()
@ -147,17 +148,20 @@ def multi_threading(queries, knowledge_name="", file_id="",llm_type=1):
# 收集每个线程的结果
for future in concurrent.futures.as_completed(future_to_query):
index = future_to_query[future]
# 由于 llm_call 函数本身会处理结果,这里只需要确保任务执行完成
try:
future.result() # 可以用来捕获异常或确认任务完成
future.result() # 捕获异常或确认任务完成
except Exception as exc:
print(f"Query {index} generated an exception: {exc}")
# 确保在异常情况下也向 result_queue 添加占位符
result_queue.put((index, None))
# 从队列中获取所有结果并按索引排序
results = [None] * len(queries)
while not result_queue.empty():
index, result = result_queue.get()
results[index] = result
# 返回一个保证是列表的结构
return results
if __name__ == "__main__":

View File

@ -72,6 +72,6 @@ def extract_text_by_page(file_path):
print(f"Page {page_num + 1} is empty or text could not be extracted.")
return result
if __name__ == '__main__':
file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\中国电信股份有限公司随州分公司广水市雪亮工程信息化项目-招标文件(定稿).pdf"
file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\招标文件正文(1)(1).pdf"
res=extract_text_by_page(file_path)
# print(res)

View File

@ -367,7 +367,7 @@ def truncate_pdf_multiple(input_path, output_folder):
# TODO:交通智能系统和招标(1)(1)文件有问题
if __name__ == "__main__":
input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\广水农商行门禁控制主机及基础验证设备采购项目——磋商文件(定稿)(三次).pdf"
input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\磋商文件(1).pdf"
output_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\output4"
# truncate_pdf_multiple(input_path,output_folder)
selection = 4 # 例如1 - 商务技术服务要求, 2 - 评标办法, 3 - 资格审查后缀有qualification1和qualification2 4.投标人须知前附表