whispy commited on
Commit
9ba61bd
β€’
1 Parent(s): b301e2f

Upload 3 files

Browse files
Files changed (2) hide show
  1. app.py +3 -10
  2. requirements.txt +3 -3
app.py CHANGED
@@ -1,10 +1,10 @@
1
  import gradio as gr
2
  import torch
 
3
  from diffusers import DiffusionPipeline
4
  from transformers import (
5
  WhisperForConditionalGeneration,
6
  WhisperProcessor,
7
- pipeline,
8
  )
9
 
10
  import os
@@ -14,12 +14,10 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
14
  model = WhisperForConditionalGeneration.from_pretrained("whispy/whisper_italian").to(device)
15
  processor = WhisperProcessor.from_pretrained("whispy/whisper_italian")
16
 
17
- pipe = pipeline(model="whispy/whisper_italian")
18
-
19
  diffuser_pipeline = DiffusionPipeline.from_pretrained(
20
  "CompVis/stable-diffusion-v1-4",
21
  custom_pipeline="speech_to_image_diffusion",
22
- speech_model="whispy/whisper_italian",
23
  speech_processor=processor,
24
  use_auth_token=MY_SECRET_TOKEN,
25
  revision="fp16",
@@ -29,10 +27,6 @@ diffuser_pipeline = DiffusionPipeline.from_pretrained(
29
  diffuser_pipeline.enable_attention_slicing()
30
  diffuser_pipeline = diffuser_pipeline.to(device)
31
 
32
- def transcribe(audio):
33
- text = pipe(audio)["text"]
34
- return text
35
-
36
 
37
  #β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
38
  # GRADIO SETUP
@@ -51,8 +45,7 @@ image_output = gr.Image()
51
 
52
  def speech_to_text(audio_sample):
53
 
54
- #process_audio = whisper.load_audio(audio_sample)
55
- process_audio = transcribe(audio_sample)
56
  output = diffuser_pipeline(process_audio)
57
 
58
  print(f"""
 
1
  import gradio as gr
2
  import torch
3
+ import whisper
4
  from diffusers import DiffusionPipeline
5
  from transformers import (
6
  WhisperForConditionalGeneration,
7
  WhisperProcessor,
 
8
  )
9
 
10
  import os
 
14
  model = WhisperForConditionalGeneration.from_pretrained("whispy/whisper_italian").to(device)
15
  processor = WhisperProcessor.from_pretrained("whispy/whisper_italian")
16
 
 
 
17
  diffuser_pipeline = DiffusionPipeline.from_pretrained(
18
  "CompVis/stable-diffusion-v1-4",
19
  custom_pipeline="speech_to_image_diffusion",
20
+ speech_model=model,
21
  speech_processor=processor,
22
  use_auth_token=MY_SECRET_TOKEN,
23
  revision="fp16",
 
27
  diffuser_pipeline.enable_attention_slicing()
28
  diffuser_pipeline = diffuser_pipeline.to(device)
29
 
 
 
 
 
30
 
31
  #β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
32
  # GRADIO SETUP
 
45
 
46
  def speech_to_text(audio_sample):
47
 
48
+ process_audio = whisper.load_audio(audio_sample)
 
49
  output = diffuser_pipeline(process_audio)
50
 
51
  print(f"""
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
- transformers
2
- torch
3
  --extra-index-url https://download.pytorch.org/whl/cu113
4
  torch
5
  scipy
6
  ftfy
7
- diffusers
 
 
 
 
 
1
  --extra-index-url https://download.pytorch.org/whl/cu113
2
  torch
3
  scipy
4
  ftfy
5
+ git+https://github.com/huggingface/transformers
6
+ git+https://github.com/huggingface/diffusers
7
+ git+https://github.com/openai/whisper.git