Spaces:

AI-RESEARCHER-2024
/

CHAINLIT-RAG

Runtime error

App Files Files Community

AI-RESEARCHER-2024 commited on Oct 30, 2024

Commit

b0071a4

verified ·

1 Parent(s): dc2f415

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -18

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os
-from typing import Any, List, Mapping, Optional
 import chainlit as cl
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.prompts import ChatPromptTemplate
@@ -9,15 +9,25 @@ from langchain_community.vectorstores import Chroma
 from langchain.callbacks.manager import CallbackManagerForLLMRun
 from langchain.llms.base import LLM
 from llama_cpp import Llama
-class LlamaCppLLM(LLM):
     """Custom LangChain wrapper for llama.cpp"""
-    model: Any
-    def __init__(self, model: Llama):
-        super().__init__()
-        self.model = model
     @property
     def _llm_type(self) -> str:
@@ -30,7 +40,10 @@ class LlamaCppLLM(LLM):
         run_manager: Optional[CallbackManagerForLLMRun] = None,
         **kwargs: Any,
     ) -> str:
-        response = self.model.create_chat_completion(
             messages=[{"role": "user", "content": prompt}],
             **kwargs
         )
@@ -47,18 +60,15 @@ embeddings = HuggingFaceEmbeddings(
 persist_directory = os.path.join(os.path.dirname(__file__), 'mydb')
 vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
-# Initialize Llama model
-llama_model = Llama.from_pretrained(
-    repo_id="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
-    filename="Meta-Llama-3.1-8B-Instruct-IQ2_M.gguf",
-    n_ctx=2048,  # Context window
-    n_threads=4,  # Number of CPU threads to use
-    n_gpu_layers=0  # Set to higher number if using GPU
 )
-# Create LangChain wrapper
-llm = LlamaCppLLM(model=llama_model)
 # Create the RAG prompt template
 template = """You are a helpful AI assistant. Using only the following context, answer the user's question.
 If you cannot find the answer in the context, say "I don't have enough information to answer this question."
@@ -118,7 +128,9 @@ async def main(message: cl.Message):
                 await msg.update(elements=elements)
         except Exception as e:
-            await msg.update(content=f"An error occurred: {str(e)}")
 if __name__ == "__main__":
     cl.run()

 import os
+from typing import Any, List, Mapping, Optional, Dict
 import chainlit as cl
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.prompts import ChatPromptTemplate
 from langchain.callbacks.manager import CallbackManagerForLLMRun
 from langchain.llms.base import LLM
 from llama_cpp import Llama
+from pydantic import Field, BaseModel
+class LlamaCppLLM(LLM, BaseModel):
     """Custom LangChain wrapper for llama.cpp"""
+    client: Any = Field(default=None, exclude=True)
+    model_path: str = Field(..., description="Path to the model file")
+    n_ctx: int = Field(default=2048, description="Context window size")
+    n_threads: int = Field(default=4, description="Number of CPU threads")
+    n_gpu_layers: int = Field(default=0, description="Number of GPU layers")
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.client = Llama(
+            model_path=self.model_path,
+            n_ctx=self.n_ctx,
+            n_threads=self.n_threads,
+            n_gpu_layers=self.n_gpu_layers
+        )
     @property
     def _llm_type(self) -> str:
         run_manager: Optional[CallbackManagerForLLMRun] = None,
         **kwargs: Any,
     ) -> str:
+        if not self.client:
+            raise RuntimeError("Model not initialized")
+        response = self.client.create_chat_completion(
             messages=[{"role": "user", "content": prompt}],
             **kwargs
         )
 persist_directory = os.path.join(os.path.dirname(__file__), 'mydb')
 vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
+# Initialize the LLM
+model_path = os.path.join(os.path.dirname(__file__), "models", "llama-model.gguf")
+llm = LlamaCppLLM(
+    model_path=model_path,
+    n_ctx=2048,
+    n_threads=4,
+    n_gpu_layers=0
 )
 # Create the RAG prompt template
 template = """You are a helpful AI assistant. Using only the following context, answer the user's question.
 If you cannot find the answer in the context, say "I don't have enough information to answer this question."
                 await msg.update(elements=elements)
         except Exception as e:
+            import traceback
+            error_msg = f"An error occurred: {str(e)}\n{traceback.format_exc()}"
+            await msg.update(content=error_msg)
 if __name__ == "__main__":
     cl.run()