VanguardAI commited on
Commit
c8af3a0
·
verified ·
1 Parent(s): 8deacc0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -7,11 +7,11 @@ from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
7
  from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler
8
  from parler_tts import ParlerTTSForConditionalGeneration
9
  import soundfile as sf
10
- from langchain.embeddings import OpenAIEmbeddings
11
- from langchain.vectorstores import Chroma
12
  from langchain.text_splitter import RecursiveCharacterTextSplitter
13
  from langchain.chains import RetrievalQA
14
- from langchain.llms import OpenAI
15
  from PIL import Image
16
  from decord import VideoReader, cpu
17
  import requests
@@ -19,7 +19,7 @@ import requests
19
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
20
  MODEL = 'llama3-groq-70b-8192-tool-use-preview'
21
 
22
- # Configure transformers to load the model with 4-bit quantization
23
  bnb_config = BitsAndBytesConfig(
24
  load_in_4bit=True,
25
  bnb_4bit_use_double_quant=True,
@@ -27,9 +27,9 @@ bnb_config = BitsAndBytesConfig(
27
  bnb_4bit_compute_dtype=torch.bfloat16
28
  )
29
 
30
- # Load models for text, speech, and image processing
31
  text_model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True,
32
- quantization_config=bnb_config, device_map="auto")
33
  tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True)
34
 
35
  tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1").to('cuda')
 
7
  from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler
8
  from parler_tts import ParlerTTSForConditionalGeneration
9
  import soundfile as sf
10
+ from langchain_community.embeddings import OpenAIEmbeddings
11
+ from langchain_community.vectorstores import Chroma
12
  from langchain.text_splitter import RecursiveCharacterTextSplitter
13
  from langchain.chains import RetrievalQA
14
+ from langchain_community.llms import OpenAI
15
  from PIL import Image
16
  from decord import VideoReader, cpu
17
  import requests
 
19
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
20
  MODEL = 'llama3-groq-70b-8192-tool-use-preview'
21
 
22
+ # Configure BitsAndBytes for 4-bit quantization
23
  bnb_config = BitsAndBytesConfig(
24
  load_in_4bit=True,
25
  bnb_4bit_use_double_quant=True,
 
27
  bnb_4bit_compute_dtype=torch.bfloat16
28
  )
29
 
30
+ # Load MiniCPM-V-2_6 with 4-bit quantization
31
  text_model = AutoModel.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True,
32
+ quantization_config=bnb_config, device_map="auto")
33
  tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True)
34
 
35
  tts_model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-large-v1").to('cuda')