SexBot / index_retrieve_test.py
Pew404's picture
Upload folder using huggingface_hub
13fbd2e verified
from llama_index.llms.ollama import Ollama
from langchain_community.embeddings.ollama import OllamaEmbeddings
from llama_index.core import Settings
import time
llm = Ollama(model="pornchat", request_timeout=120)
embed_model = OllamaEmbeddings(model="pornchat")
Settings.llm = llm
Settings.embed_model = embed_model
from llama_index.core import load_index_from_storage
from llama_index.core import StorageContext
# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="/data1/home/purui/projects/chatbot/tests/index/sexual_knowdledge_index")
doc_summary_index = load_index_from_storage(storage_context)
# doc_summary
# doc_summary = doc_summary_index.get_document_summary("c9d242d5-9c2c-4c07-b549-ed438908663e")
# print(f"doc_summary of c9d242d5-9c2c-4c07-b549-ed438908663e:\n {doc_summary}")
from llama_index.core.indices.document_summary import (
DocumentSummaryIndexLLMRetriever,
)
# doc summary index retriever
basic_retriever = doc_summary_index.as_query_engine(similarity_top_k=2)
# retriever = DocumentSummaryIndexLLMRetriever(
# doc_summary_index,
# # choice_select_prompt=None,
# choice_batch_size=5,
# choice_top_k=3,
# # format_node_batch_fn=None,
# # parse_choice_select_answer_fn=None,
# )
from llama_index.core.indices.document_summary import (
DocumentSummaryIndexEmbeddingRetriever,
)
retriever = DocumentSummaryIndexEmbeddingRetriever(
doc_summary_index,
# similarity_top_k=1,
)
from llama_index.core import VectorStoreIndex
vectorindex = load_index_from_storage(StorageContext.from_defaults(persist_dir="/data1/home/purui/projects/chatbot/tests/index/sexual_knowdledge_vectorindex"))
vector_retriever = vectorindex.as_retriever()
summary_index = load_index_from_storage(StorageContext.from_defaults(persist_dir="/data1/home/purui/projects/chatbot/tests/index/sexual_knowdledge_summaryindex"))
summary_retriever = summary_index.as_retriever(response_mode="tree_summarize", similarity_top_k=2)
query = "oral sex"
# documentsummaryindex retriever
start_time = time.time()
retrieved_nodes = retriever.retrieve(query)
end_time = time.time()
doc_sum_time_cost = end_time - start_time
# vectorindex retriever
start_time = time.time()
vector_retrieved_nodes = vector_retriever.retrieve(query)
end_time = time.time()
vector_time_cost = end_time - start_time
# summary index retriever
start_time = time.time()
summary_retrieved_nodes = summary_retriever.retrieve(query)
end_time = time.time()
summary_time_cost = end_time - start_time
print(f"tree_retrieved_nodes: {len(retrieved_nodes)} - Top_1_node: {retrieved_nodes[0].get_text()} - time cost: {doc_sum_time_cost}")
print("\n\n---------------------------------------")
print(f"vector_retrieved_nodes: {len(vector_retrieved_nodes)} - Top_1_node: {vector_retrieved_nodes[0].get_text()} - time cost: {vector_time_cost}")
print("\n\n---------------------------------------")
print(f"summary_retrieved_nodes: {len(summary_retrieved_nodes)} - Top_1_node: {summary_retrieved_nodes[0].get_text()} - time cost: {summary_time_cost}")