AI-RESEARCHER-2024 commited on
Commit
fa23d20
·
verified ·
1 Parent(s): 45331d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -17
app.py CHANGED
@@ -1,38 +1,74 @@
1
  import os
 
2
  import chainlit as cl
3
- from langchain_community.llms import Ollama
4
  from langchain.prompts import ChatPromptTemplate
5
  from langchain_core.output_parsers import StrOutputParser
6
  from langchain_core.runnables import RunnablePassthrough
7
  from langchain_community.vectorstores import Chroma
8
- from langchain_community.embeddings import HuggingFaceEmbeddings
9
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
10
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Load the existing Chroma vector store
13
- persist_directory = 'mydb'
14
  vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
15
 
16
- # Initialize Ollama LLM
17
- llm = Ollama(
18
- model="llama3.2", # You can change this to any model you have pulled in Ollama
19
- temperature=0
 
 
 
20
  )
21
 
 
 
 
22
  # Create the RAG prompt template
23
- template = """Answer the question based only on the following context:
 
24
 
 
25
  {context}
26
 
27
  Question: {question}
28
 
29
- Answer the question in a clear and concise way. If you cannot find the answer in the context, just say "I don't have enough information to answer this question."
30
-
31
- Make sure to:
32
- 1. Only use information from the provided context
33
- 2. Be concise and direct
34
- 3. If you're unsure, acknowledge it
35
- """
36
 
37
  prompt = ChatPromptTemplate.from_template(template)
38
 
 
1
  import os
2
+ from typing import Any, List, Mapping, Optional
3
  import chainlit as cl
4
+ from langchain_community.embeddings import HuggingFaceEmbeddings
5
  from langchain.prompts import ChatPromptTemplate
6
  from langchain_core.output_parsers import StrOutputParser
7
  from langchain_core.runnables import RunnablePassthrough
8
  from langchain_community.vectorstores import Chroma
9
+ from langchain.callbacks.manager import CallbackManagerForLLMRun
10
+ from langchain.llms.base import LLM
11
+ from llama_cpp import Llama
12
+
13
+ class LlamaCppLLM(LLM):
14
+ """Custom LangChain wrapper for llama.cpp"""
15
+
16
+ model: Any
17
+
18
+ def __init__(self, model: Llama):
19
+ super().__init__()
20
+ self.model = model
21
+
22
+ @property
23
+ def _llm_type(self) -> str:
24
+ return "llama.cpp"
25
+
26
+ def _call(
27
+ self,
28
+ prompt: str,
29
+ stop: Optional[List[str]] = None,
30
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
31
+ **kwargs: Any,
32
+ ) -> str:
33
+ response = self.model.create_chat_completion(
34
+ messages=[{"role": "user", "content": prompt}],
35
+ **kwargs
36
+ )
37
+ return response["choices"][0]["message"]["content"]
38
+
39
+ # Initialize the embedding model
40
+ embeddings = HuggingFaceEmbeddings(
41
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
42
+ model_kwargs={'device': 'cpu'},
43
+ encode_kwargs={'normalize_embeddings': True}
44
+ )
45
 
46
  # Load the existing Chroma vector store
47
+ persist_directory = os.path.join(os.path.dirname(__file__), 'mydb')
48
  vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
49
 
50
+ # Initialize Llama model
51
+ llama_model = Llama.from_pretrained(
52
+ repo_id="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
53
+ filename="Meta-Llama-3.1-8B-Instruct-IQ2_M.gguf",
54
+ n_ctx=2048, # Context window
55
+ n_threads=4, # Number of CPU threads to use
56
+ n_gpu_layers=0 # Set to higher number if using GPU
57
  )
58
 
59
+ # Create LangChain wrapper
60
+ llm = LlamaCppLLM(model=llama_model)
61
+
62
  # Create the RAG prompt template
63
+ template = """You are a helpful AI assistant. Using only the following context, answer the user's question.
64
+ If you cannot find the answer in the context, say "I don't have enough information to answer this question."
65
 
66
+ Context:
67
  {context}
68
 
69
  Question: {question}
70
 
71
+ Answer: Let me help you with that."""
 
 
 
 
 
 
72
 
73
  prompt = ChatPromptTemplate.from_template(template)
74