Spaces:

AI-RESEARCHER-2024
/

CHAINLIT-RAG

Runtime error

App Files Files Community

AI-RESEARCHER-2024 commited on Oct 30, 2024

Commit

fa23d20

verified ·

1 Parent(s): 45331d7

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -17

app.py CHANGED Viewed

@@ -1,38 +1,74 @@
 import os
 import chainlit as cl
-from langchain_community.llms import Ollama
 from langchain.prompts import ChatPromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
 from langchain_community.vectorstores import Chroma
-from langchain_community.embeddings import HuggingFaceEmbeddings
-embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 # Load the existing Chroma vector store
-persist_directory = 'mydb'
 vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
-# Initialize Ollama LLM
-llm = Ollama(
-    model="llama3.2",  # You can change this to any model you have pulled in Ollama
-    temperature=0
 )
 # Create the RAG prompt template
-template = """Answer the question based only on the following context:
 {context}
 Question: {question}
-Answer the question in a clear and concise way. If you cannot find the answer in the context, just say "I don't have enough information to answer this question."
-Make sure to:
-1. Only use information from the provided context
-2. Be concise and direct
-3. If you're unsure, acknowledge it
-"""
 prompt = ChatPromptTemplate.from_template(template)

 import os
+from typing import Any, List, Mapping, Optional
 import chainlit as cl
+from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.prompts import ChatPromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
 from langchain_community.vectorstores import Chroma
+from langchain.callbacks.manager import CallbackManagerForLLMRun
+from langchain.llms.base import LLM
+from llama_cpp import Llama
+class LlamaCppLLM(LLM):
+    """Custom LangChain wrapper for llama.cpp"""
+    model: Any
+    def __init__(self, model: Llama):
+        super().__init__()
+        self.model = model
+    @property
+    def _llm_type(self) -> str:
+        return "llama.cpp"
+    def _call(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> str:
+        response = self.model.create_chat_completion(
+            messages=[{"role": "user", "content": prompt}],
+            **kwargs
+        )
+        return response["choices"][0]["message"]["content"]
+# Initialize the embedding model
+embeddings = HuggingFaceEmbeddings(
+    model_name="sentence-transformers/all-MiniLM-L6-v2",
+    model_kwargs={'device': 'cpu'},
+    encode_kwargs={'normalize_embeddings': True}
+)
 # Load the existing Chroma vector store
+persist_directory = os.path.join(os.path.dirname(__file__), 'mydb')
 vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
+# Initialize Llama model
+llama_model = Llama.from_pretrained(
+    repo_id="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
+    filename="Meta-Llama-3.1-8B-Instruct-IQ2_M.gguf",
+    n_ctx=2048,  # Context window
+    n_threads=4,  # Number of CPU threads to use
+    n_gpu_layers=0  # Set to higher number if using GPU
 )
+# Create LangChain wrapper
+llm = LlamaCppLLM(model=llama_model)
 # Create the RAG prompt template
+template = """You are a helpful AI assistant. Using only the following context, answer the user's question.
+If you cannot find the answer in the context, say "I don't have enough information to answer this question."
+Context:
 {context}
 Question: {question}
+Answer: Let me help you with that."""
 prompt = ChatPromptTemplate.from_template(template)