flocolombari commited on
Commit
0460dda
·
1 Parent(s): 9a69098

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -14
app.py CHANGED
@@ -4,13 +4,19 @@ import cv2
4
  from PIL import Image
5
  import io
6
 
7
- def video_to_descriptions(video):
8
- # Charger le modèle via pipeline
 
 
 
9
  ImgToText = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
10
  Summarize = pipeline("summarization", model="tuner007/pegasus_summarizer")
11
 
12
- # Ouvrir la vidéo
13
- cap = cv2.VideoCapture(video) # Ici, nous avons retiré ".name"
 
 
 
14
  fps = int(cap.get(cv2.CAP_PROP_FPS))
15
 
16
  descriptions = []
@@ -21,32 +27,37 @@ def video_to_descriptions(video):
21
  if not ret:
22
  break
23
 
24
- # Extraire une image toutes les demi-secondes
25
  if frame_count % (fps * 2) == 0:
26
- # Convertir l'image en RGB
27
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
28
- # Convertir le tableau numpy en une image PIL
29
  pil_img = Image.fromarray(frame_rgb)
30
- # Obtenir la description de l'image
31
  outputs = ImgToText(pil_img)
32
- print(outputs[0]) # Imprimer la sortie pour voir les clés disponibles
33
- description = outputs[0]['generated_text'] # Ceci provoquera une erreur tant que la clé correcte n'est pas utilisée
34
  descriptions.append(description)
35
 
36
  frame_count += 1
37
 
38
- # Fermer le lecteur vidéo
39
  cap.release()
40
 
41
- # Concaténer les descriptions
42
  concatenated_descriptions = " ".join(descriptions)
43
  concatenated_descriptions = Summarize(concatenated_descriptions, max_length=(len(concatenated_descriptions) / 3))
44
 
45
- return concatenated_descriptions["summarized-text"]
 
 
 
 
 
 
46
 
47
  iface = gr.Interface(
48
  fn=video_to_descriptions,
49
- inputs=gr.Video(type="file", label="Importez une vidéo"),
50
  outputs="text",
51
  live=False
52
  )
 
4
  from PIL import Image
5
  import io
6
 
7
+ # Import the translation pipeline
8
+ from transformers import pipeline as translation_pipeline
9
+
10
+ def video_to_descriptions(video, target_language="en"):
11
+ # Load the image-to-text and summarization pipelines
12
  ImgToText = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
13
  Summarize = pipeline("summarization", model="tuner007/pegasus_summarizer")
14
 
15
+ # Load the translation pipeline for the target language
16
+ translator = translation_pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}")
17
+
18
+ # Open the video
19
+ cap = cv2.VideoCapture(video)
20
  fps = int(cap.get(cv2.CAP_PROP_FPS))
21
 
22
  descriptions = []
 
27
  if not ret:
28
  break
29
 
30
+ # Extract an image every 2 seconds
31
  if frame_count % (fps * 2) == 0:
32
+ # Convert the image to RGB
33
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
34
+ # Convert the numpy array to a PIL image
35
  pil_img = Image.fromarray(frame_rgb)
36
+ # Get the image description
37
  outputs = ImgToText(pil_img)
38
+ description = outputs[0]['generated_text']
 
39
  descriptions.append(description)
40
 
41
  frame_count += 1
42
 
43
+ # Close the video reader
44
  cap.release()
45
 
46
+ # Concatenate the descriptions
47
  concatenated_descriptions = " ".join(descriptions)
48
  concatenated_descriptions = Summarize(concatenated_descriptions, max_length=(len(concatenated_descriptions) / 3))
49
 
50
+ # Translate the summarized text into the target language
51
+ translated_text = translator(concatenated_descriptions[0]["summarized-text"])[0]["translation_text"]
52
+
53
+ return translated_text
54
+
55
+ # Create a dropdown menu with language options
56
+ language_dropdown = gr.Interface.dropdown(["en", "fr", "de"], label="Langue cible")
57
 
58
  iface = gr.Interface(
59
  fn=video_to_descriptions,
60
+ inputs=[gr.Video(type="file", label="Importez une vidéo"), language_dropdown],
61
  outputs="text",
62
  live=False
63
  )