AI-RESEARCHER-2024 commited on
Commit
b0071a4
·
verified ·
1 Parent(s): dc2f415

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -18
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import os
2
- from typing import Any, List, Mapping, Optional
3
  import chainlit as cl
4
  from langchain_community.embeddings import HuggingFaceEmbeddings
5
  from langchain.prompts import ChatPromptTemplate
@@ -9,15 +9,25 @@ from langchain_community.vectorstores import Chroma
9
  from langchain.callbacks.manager import CallbackManagerForLLMRun
10
  from langchain.llms.base import LLM
11
  from llama_cpp import Llama
 
12
 
13
- class LlamaCppLLM(LLM):
14
  """Custom LangChain wrapper for llama.cpp"""
15
 
16
- model: Any
 
 
 
 
17
 
18
- def __init__(self, model: Llama):
19
- super().__init__()
20
- self.model = model
 
 
 
 
 
21
 
22
  @property
23
  def _llm_type(self) -> str:
@@ -30,7 +40,10 @@ class LlamaCppLLM(LLM):
30
  run_manager: Optional[CallbackManagerForLLMRun] = None,
31
  **kwargs: Any,
32
  ) -> str:
33
- response = self.model.create_chat_completion(
 
 
 
34
  messages=[{"role": "user", "content": prompt}],
35
  **kwargs
36
  )
@@ -47,18 +60,15 @@ embeddings = HuggingFaceEmbeddings(
47
  persist_directory = os.path.join(os.path.dirname(__file__), 'mydb')
48
  vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
49
 
50
- # Initialize Llama model
51
- llama_model = Llama.from_pretrained(
52
- repo_id="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
53
- filename="Meta-Llama-3.1-8B-Instruct-IQ2_M.gguf",
54
- n_ctx=2048, # Context window
55
- n_threads=4, # Number of CPU threads to use
56
- n_gpu_layers=0 # Set to higher number if using GPU
57
  )
58
 
59
- # Create LangChain wrapper
60
- llm = LlamaCppLLM(model=llama_model)
61
-
62
  # Create the RAG prompt template
63
  template = """You are a helpful AI assistant. Using only the following context, answer the user's question.
64
  If you cannot find the answer in the context, say "I don't have enough information to answer this question."
@@ -118,7 +128,9 @@ async def main(message: cl.Message):
118
  await msg.update(elements=elements)
119
 
120
  except Exception as e:
121
- await msg.update(content=f"An error occurred: {str(e)}")
 
 
122
 
123
  if __name__ == "__main__":
124
  cl.run()
 
1
  import os
2
+ from typing import Any, List, Mapping, Optional, Dict
3
  import chainlit as cl
4
  from langchain_community.embeddings import HuggingFaceEmbeddings
5
  from langchain.prompts import ChatPromptTemplate
 
9
  from langchain.callbacks.manager import CallbackManagerForLLMRun
10
  from langchain.llms.base import LLM
11
  from llama_cpp import Llama
12
+ from pydantic import Field, BaseModel
13
 
14
+ class LlamaCppLLM(LLM, BaseModel):
15
  """Custom LangChain wrapper for llama.cpp"""
16
 
17
+ client: Any = Field(default=None, exclude=True)
18
+ model_path: str = Field(..., description="Path to the model file")
19
+ n_ctx: int = Field(default=2048, description="Context window size")
20
+ n_threads: int = Field(default=4, description="Number of CPU threads")
21
+ n_gpu_layers: int = Field(default=0, description="Number of GPU layers")
22
 
23
+ def __init__(self, **kwargs):
24
+ super().__init__(**kwargs)
25
+ self.client = Llama(
26
+ model_path=self.model_path,
27
+ n_ctx=self.n_ctx,
28
+ n_threads=self.n_threads,
29
+ n_gpu_layers=self.n_gpu_layers
30
+ )
31
 
32
  @property
33
  def _llm_type(self) -> str:
 
40
  run_manager: Optional[CallbackManagerForLLMRun] = None,
41
  **kwargs: Any,
42
  ) -> str:
43
+ if not self.client:
44
+ raise RuntimeError("Model not initialized")
45
+
46
+ response = self.client.create_chat_completion(
47
  messages=[{"role": "user", "content": prompt}],
48
  **kwargs
49
  )
 
60
  persist_directory = os.path.join(os.path.dirname(__file__), 'mydb')
61
  vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
62
 
63
+ # Initialize the LLM
64
+ model_path = os.path.join(os.path.dirname(__file__), "models", "llama-model.gguf")
65
+ llm = LlamaCppLLM(
66
+ model_path=model_path,
67
+ n_ctx=2048,
68
+ n_threads=4,
69
+ n_gpu_layers=0
70
  )
71
 
 
 
 
72
  # Create the RAG prompt template
73
  template = """You are a helpful AI assistant. Using only the following context, answer the user's question.
74
  If you cannot find the answer in the context, say "I don't have enough information to answer this question."
 
128
  await msg.update(elements=elements)
129
 
130
  except Exception as e:
131
+ import traceback
132
+ error_msg = f"An error occurred: {str(e)}\n{traceback.format_exc()}"
133
+ await msg.update(content=error_msg)
134
 
135
  if __name__ == "__main__":
136
  cl.run()