flocolombari commited on
Commit
6e90515
1 Parent(s): 6e075d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -51
app.py CHANGED
@@ -8,75 +8,69 @@ import os
8
  #Commit
9
  def video_to_descriptions(video, target_language="en"):
10
  # Load the image-to-text and summarization pipelines
11
- ImgToText = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
12
- Summarize = pipeline("summarization", model="tuner007/pegasus_summarizer")
13
 
14
  # Load the translation pipeline for the target language
15
- translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}")
16
  audio = pipeline("text-to-speech", model="suno/bark")
17
-
18
- # Open the video
19
- cap = cv2.VideoCapture(video)
20
- fps = int(cap.get(cv2.CAP_PROP_FPS))
21
-
22
- descriptions = []
23
- frame_count = 0
24
-
25
- while True:
26
- ret, frame = cap.read()
27
- if not ret:
28
- break
29
-
30
- # Extract an image every 2 seconds
31
- if frame_count % (fps * 2) == 0:
32
- # Convert the image to RGB
33
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
34
- # Convert the numpy array to a PIL image
35
- pil_img = Image.fromarray(frame_rgb)
36
- # Get the image description
37
- outputs = ImgToText(pil_img)
38
- description = outputs[0]['generated_text']
39
- descriptions.append(description)
40
- print(str(frame_count) + " : " + outputs[0]['generated_text'])
41
-
42
- frame_count += 1
43
 
44
- # Close the video reader
45
- cap.release()
46
 
47
- # Concatenate the descriptions
48
- concatenated_description = " ".join(descriptions)
49
- summarized_description = Summarize(concatenated_description, max_length=31)[0]["summary_text"]
50
- print("SUMMARIZATION : " + summarized_description)
51
 
52
- translated_text = translator(summarized_description)[0]["translation_text"]
53
- print("TRANSLATION : " + translated_text)
54
 
55
- print(audio(translated_text))
56
 
57
- ##audio_file = audio(translated_text)[0]["audio"]
58
- #print("AUDIO : " + audio_file)
59
 
60
- #return audio_file
61
- return translated_text
62
 
63
  # Create a dropdown menu with language options
64
  language_dropdown = gr.Dropdown(
65
  ["en", "fr", "de", "es"], label="Language", info="The Language of the output"
66
  )
67
- example_videos = [
68
- "./meduses.mp4",
69
- "./paysage.mp4",
70
- # Add more example video file paths as needed
71
- ]
72
 
73
  # Create a dropdown menu with example video options
74
- example_video_dropdown = gr.Dropdown(example_videos, label="Exemples de vidéos")
75
  iface = gr.Interface(
76
  fn=video_to_descriptions,
77
- inputs=[example_video_dropdown, language_dropdown],
78
- #outputs="audio",
79
- outputs="text",
80
  live=False
81
  )
82
 
 
8
  #Commit
9
  def video_to_descriptions(video, target_language="en"):
10
  # Load the image-to-text and summarization pipelines
11
+ #ImgToText = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
12
+ #Summarize = pipeline("summarization", model="tuner007/pegasus_summarizer")
13
 
14
  # Load the translation pipeline for the target language
15
+ #translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}")
16
  audio = pipeline("text-to-speech", model="suno/bark")
17
+ #
18
+ ## Open the video
19
+ #cap = cv2.VideoCapture(video)
20
+ #fps = int(cap.get(cv2.CAP_PROP_FPS))
21
+ #
22
+ #descriptions = []
23
+ #frame_count = 0
24
+ #
25
+ #while True:
26
+ # ret, frame = cap.read()
27
+ # if not ret:
28
+ # break
29
+ #
30
+ # # Extract an image every 2 seconds
31
+ # if frame_count % (fps * 2) == 0:
32
+ # # Convert the image to RGB
33
+ # frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
34
+ # # Convert the numpy array to a PIL image
35
+ # pil_img = Image.fromarray(frame_rgb)
36
+ # # Get the image description
37
+ # outputs = ImgToText(pil_img)
38
+ # description = outputs[0]['generated_text']
39
+ # descriptions.append(description)
40
+ # print(str(frame_count) + " : " + outputs[0]['generated_text'])
41
+ #
42
+ # frame_count += 1
43
 
44
+ ## Close the video reader
45
+ #cap.release()
46
 
47
+ ## Concatenate the descriptions
48
+ #concatenated_description = " ".join(descriptions)
49
+ #summarized_description = Summarize(concatenated_description, max_length=31)[0]["summary_text"]
50
+ #print("SUMMARIZATION : " + summarized_description)
51
 
52
+ #translated_text = translator(summarized_description)[0]["translation_text"]
53
+ #print("TRANSLATION : " + translated_text)
54
 
55
+ print(audio("bonjour je m'appelle Florent et je fais un test"))
56
 
57
+ audio_file = audio("bonjour je m'appelle Florent et je fais un test")
58
+ print(audio_file)
59
 
60
+ return audio_file
61
+ #return translated_text
62
 
63
  # Create a dropdown menu with language options
64
  language_dropdown = gr.Dropdown(
65
  ["en", "fr", "de", "es"], label="Language", info="The Language of the output"
66
  )
 
 
 
 
 
67
 
68
  # Create a dropdown menu with example video options
 
69
  iface = gr.Interface(
70
  fn=video_to_descriptions,
71
+ inputs=[gr.Video(label="Import a Video", info="The Video to be described"), language_dropdown],
72
+ outputs="audio",
73
+ #outputs="text",
74
  live=False
75
  )
76