Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -242,10 +242,7 @@ def main(query: str, chunks: list[str], llm, dense_model, sparse_model):
|
|
242 |
|
243 |
@st.cache_resource
|
244 |
def load_models_and_components(show_spinner="Loading models..."):
|
245 |
-
|
246 |
-
repo_id='NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF',
|
247 |
-
filename='Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q8_0.gguf'
|
248 |
-
)
|
249 |
|
250 |
llm = ChatOllama(
|
251 |
model=model_name,
|
@@ -254,18 +251,6 @@ def load_models_and_components(show_spinner="Loading models..."):
|
|
254 |
num_gpu=0,
|
255 |
num_predict=3000
|
256 |
)
|
257 |
-
|
258 |
-
llm = LlamaCpp(
|
259 |
-
model_path=model_path,
|
260 |
-
temperature=0.75,
|
261 |
-
max_tokens=3000,
|
262 |
-
n_ctx=8192,
|
263 |
-
top_p=1,
|
264 |
-
callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
|
265 |
-
verbose=True,
|
266 |
-
n_gpu_layers=0,
|
267 |
-
n_batch=512
|
268 |
-
)
|
269 |
|
270 |
provider = ['CPUExecutionProvider']
|
271 |
|
|
|
242 |
|
243 |
@st.cache_resource
|
244 |
def load_models_and_components(show_spinner="Loading models..."):
|
245 |
+
model_name = 'adrienbrault/nous-hermes2theta-llama3-8b:q8_0'
|
|
|
|
|
|
|
246 |
|
247 |
llm = ChatOllama(
|
248 |
model=model_name,
|
|
|
251 |
num_gpu=0,
|
252 |
num_predict=3000
|
253 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
|
255 |
provider = ['CPUExecutionProvider']
|
256 |
|