From 0f9f489f2f63e2bb4ea98d223602e7b926638f61 Mon Sep 17 00:00:00 2001 From: zy123 <646228430@qq.com> Date: Fri, 27 Sep 2024 15:31:28 +0800 Subject: [PATCH] 9.27 --- flask_app/main/start_up.py | 2 +- flask_app/main/基础信息整合.py | 16 ++++++++++------ flask_app/main/多线程提问.py | 10 +++++++--- flask_app/main/读取文件/按页读取pdf.py | 2 +- flask_app/货物标/货物标截取pdf.py | 2 +- 5 files changed, 20 insertions(+), 12 deletions(-) diff --git a/flask_app/main/start_up.py b/flask_app/main/start_up.py index 40aa715..685ea7c 100644 --- a/flask_app/main/start_up.py +++ b/flask_app/main/start_up.py @@ -163,7 +163,7 @@ def process_and_stream(file_url): logger.info(f"Updated combined data: {json.dumps(combined_data, ensure_ascii=False, indent=4)}") # **保存 combined_data 到 output_folder 下的 'final_result.json'** output_json_path = os.path.join(output_folder, 'final_result.json') - includes = ["基础信息", "资格审查", "商务标", "技术标", "无效标与废标项", "投标文件要求", "开评定标流程"] + includes = ["基础信息", "资格审查", "商务评分", "技术评分", "无效标与废标项", "投标文件要求", "开评定标流程"] result = post_processing(combined_data, includes) try: with open(output_json_path, 'w', encoding='utf-8') as json_file: diff --git a/flask_app/main/基础信息整合.py b/flask_app/main/基础信息整合.py index 8c81349..2d49396 100644 --- a/flask_app/main/基础信息整合.py +++ b/flask_app/main/基础信息整合.py @@ -77,9 +77,11 @@ def judge_consortium_bidding(baseinfo_list): def project_basic_info(knowledge_name,truncate0,output_folder,clause_path): #投标人须知前附表 # 调用大模型回答项目基础信息 baseinfo_list = [] - baseinfo_file_path='flask_app/static/提示词/前两章提问总结.txt' + # baseinfo_file_path='flask_app/static/提示词/前两章提问总结.txt' + baseinfo_file_path='D:\\flask_project\\flask_app\\static\\提示词\\前两章提问总结.txt' questions = read_questions_from_file(baseinfo_file_path) res1 = multi_threading(questions, knowledge_name) + for _, response in res1: # _占位,代表ques;response[0]也是ques;response[1]是ans try: if response and len(response) > 1: # 检查response存在且有至少两个元素 @@ -91,7 +93,9 @@ def project_basic_info(knowledge_name,truncate0,output_folder,clause_path): # # 判断是否分包、是否需要递交投标保证金等 chosen_numbers, merged = judge_whether_main(truncate0,output_folder) baseinfo_list.append(merged) - judge_file_path ='flask_app/static/提示词/是否相关问题.txt' + # judge_file_path ='flask_app/static/提示词/是否相关问题.txt' + + judge_file_path='D:\\flask_project\\flask_app\\static\\提示词\\是否相关问题.txt' judge_questions = read_questions_from_judge(judge_file_path, chosen_numbers) judge_consortium = judge_consortium_bidding(baseinfo_list) #通过招标公告判断是否接受联合体投标 @@ -129,11 +133,11 @@ def project_basic_info(knowledge_name,truncate0,output_folder,clause_path): # if __name__ == "__main__": knowledge_name = "ztb" - output_folder="C:\\Users\\Administrator\\Desktop\\fsdownload\\temp8\\3abb6e16-19db-42ad-9504-53bf1072dfe7" - truncate0="C:\\Users\\Administrator\\Desktop\\fsdownload\\temp8\\3abb6e16-19db-42ad-9504-53bf1072dfe7\\ztbfile_tobidders_notice_table.pdf" - clause_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\temp8\\3abb6e16-19db-42ad-9504-53bf1072dfe7\\clause.json" + output_folder="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405" + truncate0="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\ztbfile_tobidders_notice_table.pdf" + clause_path="C:\\Users\Administrator\Desktop\\fsdownload\\3424b7cb-1f85-44b4-a432-44539b870405\\clause1.json" res=project_basic_info(knowledge_name,truncate0,output_folder,clause_path) - print(res) + print(json.dumps(res,ensure_ascii=False,indent=4)) diff --git a/flask_app/main/多线程提问.py b/flask_app/main/多线程提问.py index 76d7dd4..059e7b6 100644 --- a/flask_app/main/多线程提问.py +++ b/flask_app/main/多线程提问.py @@ -129,9 +129,10 @@ def llm_call(question, knowledge_name,file_id, result_queue, ans_index, llm_type ans = send_message(assistant, message=question) result_queue.put((ans_index, (question, ans))) # 在队列中添加索引 (question, ans) -def multi_threading(queries, knowledge_name="", file_id="",llm_type=1): +def multi_threading(queries, knowledge_name="", file_id="", llm_type=1): if not queries: return [] + print("多线程提问:starting multi_threading...") result_queue = queue.Queue() @@ -147,17 +148,20 @@ def multi_threading(queries, knowledge_name="", file_id="",llm_type=1): # 收集每个线程的结果 for future in concurrent.futures.as_completed(future_to_query): index = future_to_query[future] - # 由于 llm_call 函数本身会处理结果,这里只需要确保任务执行完成 try: - future.result() # 可以用来捕获异常或确认任务完成 + future.result() # 捕获异常或确认任务完成 except Exception as exc: print(f"Query {index} generated an exception: {exc}") + # 确保在异常情况下也向 result_queue 添加占位符 + result_queue.put((index, None)) # 从队列中获取所有结果并按索引排序 results = [None] * len(queries) while not result_queue.empty(): index, result = result_queue.get() results[index] = result + + # 返回一个保证是列表的结构 return results if __name__ == "__main__": diff --git a/flask_app/main/读取文件/按页读取pdf.py b/flask_app/main/读取文件/按页读取pdf.py index f33310d..af40e4b 100644 --- a/flask_app/main/读取文件/按页读取pdf.py +++ b/flask_app/main/读取文件/按页读取pdf.py @@ -72,6 +72,6 @@ def extract_text_by_page(file_path): print(f"Page {page_num + 1} is empty or text could not be extracted.") return result if __name__ == '__main__': - file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\中国电信股份有限公司随州分公司广水市雪亮工程信息化项目-招标文件(定稿).pdf" + file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\招标文件正文(1)(1).pdf" res=extract_text_by_page(file_path) # print(res) \ No newline at end of file diff --git a/flask_app/货物标/货物标截取pdf.py b/flask_app/货物标/货物标截取pdf.py index 324c586..030787f 100644 --- a/flask_app/货物标/货物标截取pdf.py +++ b/flask_app/货物标/货物标截取pdf.py @@ -367,7 +367,7 @@ def truncate_pdf_multiple(input_path, output_folder): # TODO:交通智能系统和招标(1)(1)文件有问题 if __name__ == "__main__": - input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\广水农商行门禁控制主机及基础验证设备采购项目——磋商文件(定稿)(三次).pdf" + input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\磋商文件(1).pdf" output_folder = "C:\\Users\\Administrator\\Desktop\\货物标\\output4" # truncate_pdf_multiple(input_path,output_folder) selection = 4 # 例如:1 - 商务技术服务要求, 2 - 评标办法, 3 - 资格审查后缀有qualification1和qualification2 4.投标人须知前附表