devve1 commited on
Commit
c3e7614
1 Parent(s): aebf61d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -131,7 +131,8 @@ def load_models_and_documents():
131
  chat_format="llama-3",
132
  n_ctx=16384,
133
  n_gpu_layers=32,
134
- flash_attn=True
 
135
  )
136
 
137
  dense_model = OptimumEncoder(
 
131
  chat_format="llama-3",
132
  n_ctx=16384,
133
  n_gpu_layers=32,
134
+ flash_attn=True,
135
+ type_k='q8_0'
136
  )
137
 
138
  dense_model = OptimumEncoder(