flocolombari commited on
Commit
1fc91dd
·
1 Parent(s): b139e4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -11
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from transformers import pipeline, AutoProcessor, BarkModel
3
  import cv2
4
  from PIL import Image
5
  import io
@@ -11,8 +11,7 @@ import torch
11
  #Commit
12
  def video_to_descriptions(video, target_language="en"):
13
 
14
- processor = AutoProcessor.from_pretrained("suno/bark-small")
15
- model = BarkModel.from_pretrained("suno/bark-small")
16
 
17
  # Load the image-to-text and summarization pipelines
18
  ImgToText = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
@@ -20,6 +19,7 @@ def video_to_descriptions(video, target_language="en"):
20
 
21
  # Load the translation pipeline for the target language
22
  translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}")
 
23
  voice_preset = f"v2/{target_language}_speaker_1"
24
 
25
 
@@ -59,16 +59,10 @@ def video_to_descriptions(video, target_language="en"):
59
  translated_text = translator(summarized_description)[0]["translation_text"]
60
  print("TRANSLATION : " + translated_text)
61
 
62
- inputs = processor(translated_text, voice_preset=voice_preset)
63
- #audio_file = audio("translated_text", voice_preset=voice_preset)
64
-
65
-
66
- audio_array = model.generate(**inputs)
67
- audio_array = audio_array.squeeze()
68
- sample_rate = model.generation_config.sample_rate
69
 
70
  output_path = "./bark_out.wav"
71
- scipy.io.wavfile.write(output_path, rate=sample_rate, data=audio_array)
72
 
73
  return output_path
74
 
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
  import cv2
4
  from PIL import Image
5
  import io
 
11
  #Commit
12
  def video_to_descriptions(video, target_language="en"):
13
 
14
+
 
15
 
16
  # Load the image-to-text and summarization pipelines
17
  ImgToText = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
 
19
 
20
  # Load the translation pipeline for the target language
21
  translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}")
22
+ audio = pipeline("text-to-speech", model="suno/bark-small")
23
  voice_preset = f"v2/{target_language}_speaker_1"
24
 
25
 
 
59
  translated_text = translator(summarized_description)[0]["translation_text"]
60
  print("TRANSLATION : " + translated_text)
61
 
62
+ audio_file = audio(translated_text)
 
 
 
 
 
 
63
 
64
  output_path = "./bark_out.wav"
65
+ scipy.io.wavfile.write(output_path, rate=audio_file["audio"][0], data=audio_file["sampling_rate"])
66
 
67
  return output_path
68