devve1 commited on
Commit
2c5256c
1 Parent(s): c2e4689

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -12,7 +12,7 @@ from huggingface_hub import hf_hub_download
12
  from qdrant_client import QdrantClient, models
13
  from fastembed.sparse.splade_pp import supported_splade_models
14
  from fastembed import SparseTextEmbedding, SparseEmbedding
15
- from langchain_community.llms.llamacpp import LlamaCpp
16
  from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
17
  from fastembed_ext import FastEmbedEmbeddingsLc
18
  from langchain_community.document_loaders.wikipedia import WikipediaLoader
@@ -247,6 +247,14 @@ def load_models_and_components(show_spinner="Loading models..."):
247
  filename='Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q8_0.gguf'
248
  )
249
 
 
 
 
 
 
 
 
 
250
  llm = LlamaCpp(
251
  model_path=model_path,
252
  temperature=0.75,
 
12
  from qdrant_client import QdrantClient, models
13
  from fastembed.sparse.splade_pp import supported_splade_models
14
  from fastembed import SparseTextEmbedding, SparseEmbedding
15
+ from langchain_community.chat_models.ollama import ChatOllama
16
  from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
17
  from fastembed_ext import FastEmbedEmbeddingsLc
18
  from langchain_community.document_loaders.wikipedia import WikipediaLoader
 
247
  filename='Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q8_0.gguf'
248
  )
249
 
250
+ llm = ChatOllama(
251
+ model=model_name,
252
+ num_ctx=8192,
253
+ temperature=0,
254
+ num_gpu=0,
255
+ num_predict=3000
256
+ )
257
+
258
  llm = LlamaCpp(
259
  model_path=model_path,
260
  temperature=0.75,