from llama_index.core import VectorStoreIndex, SimpleDirectoryReader from llama_index.llms.huggingface import HuggingFaceLLM # load documents documents = SimpleDirectoryReader("./data/").load_data() # setup prompts - specific to StableLM from llama_index.core import PromptTemplate system_prompt = """<|SYSTEM|># You are a Q&A assistant. Your goal is to provide mentorship to user about their life problems using the context given from bhagwat gita book answer questions as accurately as possible based on the instructions and context provided. """ # This will wrap the default prompts that are internal to llama-index query_wrapper_prompt = PromptTemplate("<|USER|>{query_str}<|ASSISTANT|>") import torch llm = HuggingFaceLLM( context_window=4096, max_new_tokens=256, generate_kwargs={"temperature": 0.7, "do_sample": False}, system_prompt=system_prompt, query_wrapper_prompt=query_wrapper_prompt, tokenizer_name="mistralai/Mistral-7B-Instruct-v0.1", model_name="mistralai/Mistral-7B-Instruct-v0.1", device_map="auto", stopping_ids=[50278, 50279, 50277, 1, 0], tokenizer_kwargs={"max_length": 4096}, # uncomment this if using CUDA to reduce memory usage model_kwargs={"torch_dtype": torch.float16} ) from llama_index.embeddings.huggingface import HuggingFaceEmbedding embed_model =HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2") from llama_index.core import VectorStoreIndex, ServiceContext service_context = ServiceContext.from_defaults( chunk_size=1024, llm=llm, embed_model=embed_model ) index = VectorStoreIndex.from_documents(documents, service_context=service_context) query_engine = index.as_query_engine() import streamlit as st st.header('Ask your life question and find answer from "Bhagvad Gita"') question = st.text_input('Ask your life question: ') response = query_engine.query(question) st.write(response)