Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,7 @@ from huggingface_hub import hf_hub_download
|
|
12 |
from qdrant_client import QdrantClient, models
|
13 |
from fastembed.sparse.splade_pp import supported_splade_models
|
14 |
from fastembed import SparseTextEmbedding, SparseEmbedding
|
15 |
-
from langchain_community.
|
16 |
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
|
17 |
from fastembed_ext import FastEmbedEmbeddingsLc
|
18 |
from langchain_community.document_loaders.wikipedia import WikipediaLoader
|
@@ -247,6 +247,14 @@ def load_models_and_components(show_spinner="Loading models..."):
|
|
247 |
filename='Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q8_0.gguf'
|
248 |
)
|
249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
llm = LlamaCpp(
|
251 |
model_path=model_path,
|
252 |
temperature=0.75,
|
|
|
12 |
from qdrant_client import QdrantClient, models
|
13 |
from fastembed.sparse.splade_pp import supported_splade_models
|
14 |
from fastembed import SparseTextEmbedding, SparseEmbedding
|
15 |
+
from langchain_community.chat_models.ollama import ChatOllama
|
16 |
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
|
17 |
from fastembed_ext import FastEmbedEmbeddingsLc
|
18 |
from langchain_community.document_loaders.wikipedia import WikipediaLoader
|
|
|
247 |
filename='Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q8_0.gguf'
|
248 |
)
|
249 |
|
250 |
+
llm = ChatOllama(
|
251 |
+
model=model_name,
|
252 |
+
num_ctx=8192,
|
253 |
+
temperature=0,
|
254 |
+
num_gpu=0,
|
255 |
+
num_predict=3000
|
256 |
+
)
|
257 |
+
|
258 |
llm = LlamaCpp(
|
259 |
model_path=model_path,
|
260 |
temperature=0.75,
|