# 基于知识库提问的通用模板, # assistant_id import json import os import re import queue import concurrent.futures import time import requests from dashscope import Assistants, Messages, Runs, Threads from llama_index.indices.managed.dashscope import DashScopeCloudRetriever from flask_app.main.通义千问long import qianwen_long, upload_file prompt = """ # 角色 你是一个文档处理专家,专门负责理解和操作基于特定内容的文档任务,这包括解析、总结、搜索或生成与给定文档相关的各类信息。 ## 技能 ### 技能 1:文档解析与摘要 - 深入理解并分析${documents}的内容,提取关键信息。 - 根据需求生成简洁明了的摘要,保持原文核心意义不变。 ### 技能 2:信息检索与关联 - 在${documents}中高效检索特定信息或关键词。 - 能够识别并链接到文档内部或外部的相关内容,增强信息的连贯性和深度。 ## 限制 - 所有操作均需基于${documents}的内容,不可超出此范围创造信息。 - 在处理敏感或机密信息时,需遵守严格的隐私和安全规定。 - 确保所有生成或改编的内容逻辑连贯,无误导性信息。 请注意,上述技能执行时将直接利用并参考${documents}的具体内容,以确保所有产出紧密相关且高质量。 """ prom = '请记住以下材料,他们对回答问题有帮助,请你简洁准确地给出回答,不要给出无关内容。${documents}' def read_questions_from_file(file_path): questions = [] current_question = "" current_number = 0 with open(file_path, 'r', encoding='utf-8') as file: for line in file: line = line.strip() if not line: # 跳过空行 continue # 检查是否是新的问题编号 match = re.match(r'^(\d+)\.', line) if match: # 如果有之前的问题,保存它 if current_question: questions.append(current_question.strip()) # 开始新的问题 current_number = int(match.group(1)) current_question = line.split('.', 1)[1].strip() + "\n" else: # 继续添加到当前问题 current_question += line + "\n" # 添加最后一个问题 if current_question: questions.append(current_question.strip()) return questions #正文和文档名之间的内容 def send_message(assistant, message='百炼是什么?'): ans = [] print(f"Query: {message}") # create thread. thread = Threads.create() print(thread) # create a message. message = Messages.create(thread.id, content=message) # create run run = Runs.create(thread.id, assistant_id=assistant.id) # print(run) # wait for run completed or requires_action run_status = Runs.wait(run.id, thread_id=thread.id) # print(run_status) # get the thread messages. msgs = Messages.list(thread.id) for message in msgs['data'][::-1]: ans.append(message['content'][0]['text']['value']) return ans def rag_assistant(knowledge_name): retriever = DashScopeCloudRetriever(knowledge_name) pipeline_id = str(retriever.pipeline_id) assistant = Assistants.create( model='qwen-max', name='smart helper', description='智能助手,支持知识库查询和插件调用。', temperature='0.3', instructions="请记住以下材料,他们对回答问题有帮助,请你简洁准确地给出回答,不要给出无关内容。${documents}", tools=[ { "type": "code_interpreter" }, { "type": "rag", "prompt_ra": { "pipeline_id": pipeline_id, "parameters": { "type": "object", "properties": { "query_word": { "type": "str", "value": "${documents}" } } } } }] ) return assistant #TODO:http格式,有bug还没修改 def create_assistant(knowledge_name): """ Create an assistant using DashScope API via HTTP request based on the provided knowledge name. Parameters: knowledge_name (str): The name of the knowledge base to associate with the assistant. Returns: dict: Response from the API containing assistant details. Raises: ValueError: If the DASHSCOPE_API_KEY environment variable is not set. Exception: If any error occurs during the HTTP request. """ # Step 1: Initialize the Retriever and get the Pipeline ID try: retriever = DashScopeCloudRetriever(knowledge_name) pipeline_id = str(retriever.pipeline_id) except Exception as e: print(f"Error retrieving pipeline ID for knowledge '{knowledge_name}': {e}") return None # Step 2: Fetch the API Key from Environment Variables api_key = os.getenv("DASHSCOPE_API_KEY") if not api_key: raise ValueError("DASHSCOPE_API_KEY environment variable is not set.") # Step 3: Define the API Endpoint and Headers url = 'https://dashscope.aliyuncs.com/api/v1/assistants' headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}" } # Step 4: Construct the Instructions instructions = ( "请记住以下材料,他们对回答问题有帮助,请你简洁准确地给出回答,不要给出无关内容。${documents}" ) # Step 5: Define the Tools tools = [ { "type": "code_interpreter" }, { "type": "rag", "prompt_ra": { "pipeline_id": pipeline_id, "parameters": { "type": "object", "properties": { "query_word": { "type": "str", "value": "${documents}" } } } } } ] # Step 6: Construct the Payload payload = { "model": "qwen-max", "name": "智能小助手", # "Smart Helper" in Chinese "description": "智能助手,支持知识库查询和插件调用。", "temperature": 0.3, "instructions": instructions, "tools": tools, "file_ids": [], # Add file IDs if necessary "metadata": {} # Add metadata if necessary } # Optional: If you have specific file_ids or metadata, you can modify the payload accordingly # For example: # payload["file_ids"] = ["file_id_1", "file_id_2"] # payload["metadata"] = {"key1": "value1", "key2": "value2"} # Step 7: Make the HTTP POST Request try: response = requests.post(url, headers=headers, data=json.dumps(payload)) response.raise_for_status() # Raises HTTPError for bad responses (4xx or 5xx) assistant = response.json() print("Assistant created successfully:") print(json.dumps(assistant, indent=4, ensure_ascii=False)) return assistant except requests.exceptions.HTTPError as http_err: print(f"HTTP error occurred: {http_err} - Response: {response.text}") except Exception as err: print(f"An error occurred: {err}") def pure_assistant(): assistant = Assistants.create( model='qwen-max', name='smart helper', description='智能助手,能基于用户的要求精准简洁地回答用户的提问', instructions='智能助手,能基于用户的要求精准简洁地回答用户的提问', tools=[ { "type": "code_interpreter" }, ] ) return assistant def llm_call(question, knowledge_name,file_id, result_queue, ans_index, llm_type): if llm_type==1: print(f"rag_assistant! question:{question}") assistant = rag_assistant(knowledge_name) # assistant=create_assistant(knowledge_name) elif llm_type==2: print(f"qianwen_long! question:{question}") qianwen_res = qianwen_long(file_id,question) result_queue.put((ans_index,(question,qianwen_res))) return else : assistant = pure_assistant() ans = send_message(assistant, message=question) result_queue.put((ans_index, (question, ans))) # 在队列中添加索引 (question, ans) def multi_threading(queries, knowledge_name="", file_id="", llm_type=1): if not queries: return [] print("多线程提问:starting multi_threading...") result_queue = queue.Queue() # 使用 ThreadPoolExecutor 管理线程 with concurrent.futures.ThreadPoolExecutor(max_workers=15) as executor: # 逐个提交任务,每提交一个任务后休眠1秒 future_to_query = {} for index, query in enumerate(queries): future = executor.submit(llm_call, query, knowledge_name, file_id, result_queue, index, llm_type) future_to_query[future] = index time.sleep(1) # 每提交一个任务后等待1秒 # 收集每个线程的结果 for future in concurrent.futures.as_completed(future_to_query): index = future_to_query[future] try: future.result() # 捕获异常或确认任务完成 except Exception as exc: print(f"Query {index} generated an exception: {exc}") # 确保在异常情况下也向 result_queue 添加占位符 result_queue.put((index, None)) # 从队列中获取所有结果并按索引排序 results = [None] * len(queries) while not result_queue.empty(): index, result = result_queue.get() results[index] = result # 检查是否所有结果都是 None if all(result is None for result in results): return [] # 过滤掉None值 results = [r for r in results if r is not None] # 返回一个保证是列表的结构 return results if __name__ == "__main__": start_time=time.time() # # 读取问题列表 baseinfo_file_path = 'D:\\flask_project\\flask_app\\static\\提示词\\前两章提问总结.txt' questions =read_questions_from_file(baseinfo_file_path) knowledge_name = "招标解析5word" llm_type=1 results = multi_threading(questions, knowledge_name) end_time = time.time() if not results: print("errror!") else: print("elapsed time:"+str(end_time-start_time)) # 打印结果 for question, response in results: print(f"Response: {response}") # file_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\6.2定版视频会议磋商文件(1)\\6.2定版视频会议磋商文件_1-21.pdf" # file_id = upload_file(file_path) # questions=["该招标文件的项目名称是?项目编号(或招标编号)是?采购人(或招标人)是?采购代理机构(或招标代理机构)是?请按json格式给我提供信息,键名分别是'项目名称','项目编号','采购人','采购代理机构',若存在未知信息,在对应的键值中填'未知'。","该招标文件的项目概况是?项目基本情况是?请按json格式给我提供信息,键名分别为'项目概况','项目基本情况',若存在嵌套信息,嵌套内容键名以文件中对应字段命名,而嵌套键值必须与原文保持一致,若存在未知信息,在对应的键值中填'未知'。"] # results=multi_threading(questions,"",file_id,2) #1代表使用百炼rag 2代表使用qianwen-long # if not results: # print("errror!") # else: # # 打印结果 # for question, response in results: # print(f"Question: {question}") # print(f"Response: {response}") # ques=["关于'资格要求',本采购文件第一章第二款要求的内容是怎样的?请按json格式给我提供信息,键名为'资格要求',而键值需要完全与原文保持一致,不要擅自总结、删减,如果存在未知信息,请在对应键值处填'未知'。"] # # ques=["该招标文件的工程名称(项目名称)是?招标编号是?招标人是?招标代理机构是?请按json格式给我提供信息,键名分别是'工程名称','招标编号','招标人','招标代理机构',若存在未知信息,在对应的键值中填'未知'。","该招标文件的工程概况(或项目概况)是?招标范围是?请按json格式给我提供信息,键名分别为'工程概况','招标范围',若存在嵌套信息,嵌套内容键名以文件中对应字段命名,若存在未知信息,在对应的键值中填'未知'。"] # results = multi_threading(ques, "6.2视频会议docx") # if not results: # print("errror!") # else: # # 打印结果 # for question, response in results: # print(f"Question: {question}") # print(f"Response: {response}")