zbparse/flask_app/main/知识库操作.py
2024-08-29 17:30:49 +08:00

58 lines
2.0 KiB
Python

import os
import uuid
from llama_index.readers.dashscope.base import DashScopeParse
from llama_index.readers.dashscope.utils import ResultType
from llama_index.indices.managed.dashscope import DashScopeCloudIndex
from flask_app.main.删除知识库 import delete_index, create_client
def addfileToKnowledge(filepath,knowledge_name):
parse = DashScopeParse(result_type=ResultType.DASHSCOPE_DOCMIND)
documents = parse.load_data(file_path=filepath)
index = DashScopeCloudIndex.from_documents(
documents,
knowledge_name,
verbose=True,
)
# index = DashScopeCloudIndex(knowledge_name)
# index._insert(documents)
# return index, documents
return index
def deleteKnowledge(index):
retriever = index.as_retriever()
index_id = str(retriever.pipeline_id)
workspace_id = os.environ.get('DASHSCOPE_WORKSPACE_ID')
client = create_client()
delete_index(client,workspace_id,index_id)
def deleteFileFromKnowledge(index, documents):
# 初始化一个列表来存储所有文档的 ID
file_ids = []
# 检查documents是否为列表且不为空
if isinstance(documents, list) and documents:
# 遍历每个文档
for document in documents:
# 使用属性访问方式获取每个文档的 id_
# 确保 document 对象有一个名为 id_ 的属性
file_id = getattr(document, 'id_', None) # 使用 getattr 防止属性不存在时抛出异常
if file_id:
file_ids.append(file_id) # 将 id 添加到列表中
print("deleted successfully")
index.delete_ref_doc(file_ids)
# 示例用法
if __name__ == "__main__":
filepath = "C:\\Users\\Administrator\\Desktop\\招标文件\\招标01.pdf"
unique_id = str(uuid.uuid4())
knowledge_name="招标解析"+unique_id
# index = addfileToKnowledge(filepath,knowledge_name)
index = DashScopeCloudIndex("招标解析e8cc45f4-cd41-47cf-a5e6-2b7885debfff")
# 删除文件
# deleteFileFromKnowledge(index, document)
deleteKnowledge(index)