Spaces:

GameScribes
/

Multipurpose-AI-Agent-Development

Running on T4

devve1 commited on 26 days ago

Commit

0dc68fc

•

1 Parent(s): 41103cd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -350,16 +350,17 @@ def load_models_and_documents():
             providers=['CPUExecutionProvider']
         )
-        st.write('Downloading and Loading Mistral Nemo AI Model quantized with AWQ and using Outlines + vLLM Engine as backend...')
         llm = LLM(
-            model='casperhansen/mistral-nemo-instruct-2407-awq',
             tensor_parallel_size=1,
             trust_remote_code=True,
             enforce_eager=True,
             quantization="awq",
-            gpu_memory_utilization=0.9,
-            max_model_len=11264,
             dtype=torch.float16,
             max_num_seqs=128
         )

             providers=['CPUExecutionProvider']
         )
+        st.write('Downloading and Loading Mistral v0.2 by AWS Prototyping quantized with AWQ and using Outlines + vLLM Engine as backend...')
         llm = LLM(
+            model='"aws-prototyping/MegaBeam-Mistral-7B-300k-AWQ"',
+            revision='MegaBeam-Mistral-7B-300k-AWQ-64g-4b-GEMM',
             tensor_parallel_size=1,
             trust_remote_code=True,
             enforce_eager=True,
             quantization="awq",
+            gpu_memory_utilization=0.7,
+            max_model_len=12288,
             dtype=torch.float16,
             max_num_seqs=128
         )