from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.huggingface import HuggingFaceLLM

# load documents
documents = SimpleDirectoryReader("./data/").load_data()


# setup prompts - specific to StableLM
from llama_index.core import PromptTemplate

system_prompt = """<|SYSTEM|># You are a Q&A assistant. Your goal is to provide
mentorship to user about their life problems using the context given
from bhagwat gita book answer questions as
accurately as possible based on the instructions and context provided.
"""

# This will wrap the default prompts that are internal to llama-index
query_wrapper_prompt = PromptTemplate("<|USER|>{query_str}<|ASSISTANT|>")

import torch

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.7, "do_sample": False},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="mistralai/Mistral-7B-Instruct-v0.1",
    model_name="mistralai/Mistral-7B-Instruct-v0.1",
    device_map="auto",
    stopping_ids=[50278, 50279, 50277, 1, 0],
    tokenizer_kwargs={"max_length": 4096},
    # uncomment this if using CUDA to reduce memory usage
    model_kwargs={"torch_dtype": torch.float16}
)


from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model =HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2")


from llama_index.core import VectorStoreIndex, ServiceContext

service_context = ServiceContext.from_defaults(
    chunk_size=1024,
    llm=llm,
    embed_model=embed_model
)

index = VectorStoreIndex.from_documents(documents, service_context=service_context)


query_engine = index.as_query_engine()


import streamlit as st

st.header('Ask your life question and find answer from "Bhagvad Gita"')

question = st.text_input('Ask your life question: ')

response = query_engine.query(question)

st.write(response)