Update app.py
Browse files
app.py
CHANGED
@@ -18,7 +18,8 @@ from typing import Iterable
|
|
18 |
from qdrant_client import AsyncQdrantClient, models
|
19 |
from fastembed.sparse.splade_pp import supported_splade_models
|
20 |
from fastembed import SparseTextEmbedding, SparseEmbedding
|
21 |
-
from langchain_community.
|
|
|
22 |
from fastembed_ext import FastEmbedEmbeddingsLc
|
23 |
from langchain_community.document_loaders.wikipedia import WikipediaLoader
|
24 |
from langchain_community.document_loaders.unstructured import UnstructuredFileLoader
|
@@ -252,12 +253,16 @@ async def main(query: str, chunks: list[str], llm, dense_model, sparse_model):
|
|
252 |
async def load_models_and_components():
|
253 |
model_name = 'adrienbrault/nous-hermes2theta-llama3-8b:q8_0'
|
254 |
|
255 |
-
llm =
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
|
|
|
|
|
|
|
|
261 |
)
|
262 |
|
263 |
provider = ['OpenVINOExecutionProvider']
|
|
|
18 |
from qdrant_client import AsyncQdrantClient, models
|
19 |
from fastembed.sparse.splade_pp import supported_splade_models
|
20 |
from fastembed import SparseTextEmbedding, SparseEmbedding
|
21 |
+
from langchain_community.llms.llamacpp import LlamaCpp
|
22 |
+
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
|
23 |
from fastembed_ext import FastEmbedEmbeddingsLc
|
24 |
from langchain_community.document_loaders.wikipedia import WikipediaLoader
|
25 |
from langchain_community.document_loaders.unstructured import UnstructuredFileLoader
|
|
|
253 |
async def load_models_and_components():
|
254 |
model_name = 'adrienbrault/nous-hermes2theta-llama3-8b:q8_0'
|
255 |
|
256 |
+
llm = LlamaCpp(
|
257 |
+
model_path=model_path,
|
258 |
+
temperature=0.75,
|
259 |
+
max_tokens=3000,
|
260 |
+
n_ctx=8192,
|
261 |
+
top_p=1,
|
262 |
+
callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
|
263 |
+
verbose=True,
|
264 |
+
n_gpu_layers=0,
|
265 |
+
n_batch=512
|
266 |
)
|
267 |
|
268 |
provider = ['OpenVINOExecutionProvider']
|