devve1 commited on
Commit
0dc68fc
1 Parent(s): 41103cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -350,16 +350,17 @@ def load_models_and_documents():
350
  providers=['CPUExecutionProvider']
351
  )
352
 
353
- st.write('Downloading and Loading Mistral Nemo AI Model quantized with AWQ and using Outlines + vLLM Engine as backend...')
354
 
355
  llm = LLM(
356
- model='casperhansen/mistral-nemo-instruct-2407-awq',
 
357
  tensor_parallel_size=1,
358
  trust_remote_code=True,
359
  enforce_eager=True,
360
  quantization="awq",
361
- gpu_memory_utilization=0.9,
362
- max_model_len=11264,
363
  dtype=torch.float16,
364
  max_num_seqs=128
365
  )
 
350
  providers=['CPUExecutionProvider']
351
  )
352
 
353
+ st.write('Downloading and Loading Mistral v0.2 by AWS Prototyping quantized with AWQ and using Outlines + vLLM Engine as backend...')
354
 
355
  llm = LLM(
356
+ model='"aws-prototyping/MegaBeam-Mistral-7B-300k-AWQ"',
357
+ revision='MegaBeam-Mistral-7B-300k-AWQ-64g-4b-GEMM',
358
  tensor_parallel_size=1,
359
  trust_remote_code=True,
360
  enforce_eager=True,
361
  quantization="awq",
362
+ gpu_memory_utilization=0.7,
363
+ max_model_len=12288,
364
  dtype=torch.float16,
365
  max_num_seqs=128
366
  )