VanguardAI commited on
Commit
12fad92
·
verified ·
1 Parent(s): 04fbb52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -6
app.py CHANGED
@@ -1,18 +1,17 @@
1
  import gradio as gr
2
- import spaces
3
  import torch
4
  import os
5
  import numpy as np
6
  from groq import Groq
7
- from transformers import AutoModel, AutoTokenizer
8
  from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler
9
  from parler_tts import ParlerTTSForConditionalGeneration
10
  import soundfile as sf
11
- from langchain.embeddings.openai import OpenAIEmbeddings
12
- from langchain.vectorstores import Chroma
13
  from langchain.text_splitter import RecursiveCharacterTextSplitter
14
  from langchain.chains import RetrievalQA
15
- from langchain.llms import OpenAI
16
  from PIL import Image
17
  from decord import VideoReader, cpu
18
  import requests
@@ -20,9 +19,17 @@ import requests
20
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
21
  MODEL = 'llama3-groq-70b-8192-tool-use-preview'
22
 
 
 
 
 
 
 
 
 
23
  # Load models for text, speech, and image processing
24
  text_model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True,
25
- attn_implementation='sdpa', torch_dtype=torch.bfloat16).eval().cuda()
26
  tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True)
27
 
28
  tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1").to('cuda')
 
1
  import gradio as gr
 
2
  import torch
3
  import os
4
  import numpy as np
5
  from groq import Groq
6
+ from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
7
  from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler
8
  from parler_tts import ParlerTTSForConditionalGeneration
9
  import soundfile as sf
10
+ from langchain.embeddings import OpenAIEmbeddings
11
+ from langchain.vectorstores import Chroma
12
  from langchain.text_splitter import RecursiveCharacterTextSplitter
13
  from langchain.chains import RetrievalQA
14
+ from langchain.llms import OpenAI
15
  from PIL import Image
16
  from decord import VideoReader, cpu
17
  import requests
 
19
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
20
  MODEL = 'llama3-groq-70b-8192-tool-use-preview'
21
 
22
+ # Configure transformers to load the model with 4-bit quantization
23
+ bnb_config = BitsAndBytesConfig(
24
+ load_in_4bit=True,
25
+ bnb_4bit_use_double_quant=True,
26
+ bnb_4bit_quant_type="nf4",
27
+ bnb_4bit_compute_dtype=torch.bfloat16
28
+ )
29
+
30
  # Load models for text, speech, and image processing
31
  text_model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True,
32
+ quantization_config=bnb_config, device_map="auto")
33
  tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True)
34
 
35
  tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1").to('cuda')