Spaces:

flocolombari
/

COLOMBARI_VIGNES-FERRINO_DERNIAUX_NIYONKURU

Runtime error

App Files Files Community

flocolombari commited on Sep 19, 2023

Commit

0460dda

1 Parent(s): 9a69098

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -14

app.py CHANGED Viewed

@@ -4,13 +4,19 @@ import cv2
 from PIL import Image
 import io
-def video_to_descriptions(video):
-    # Charger le modèle via pipeline
     ImgToText = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
     Summarize = pipeline("summarization", model="tuner007/pegasus_summarizer")
-    # Ouvrir la vidéo
-    cap = cv2.VideoCapture(video)  # Ici, nous avons retiré ".name"
     fps = int(cap.get(cv2.CAP_PROP_FPS))
     descriptions = []
@@ -21,32 +27,37 @@ def video_to_descriptions(video):
         if not ret:
             break
-        # Extraire une image toutes les demi-secondes
         if frame_count % (fps * 2) == 0:
-            # Convertir l'image en RGB
             frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            # Convertir le tableau numpy en une image PIL
             pil_img = Image.fromarray(frame_rgb)
-            # Obtenir la description de l'image
             outputs = ImgToText(pil_img)
-            print(outputs[0])  # Imprimer la sortie pour voir les clés disponibles
-            description = outputs[0]['generated_text']  # Ceci provoquera une erreur tant que la clé correcte n'est pas utilisée
             descriptions.append(description)
         frame_count += 1
-    # Fermer le lecteur vidéo
     cap.release()
-    # Concaténer les descriptions
     concatenated_descriptions = " ".join(descriptions)
     concatenated_descriptions = Summarize(concatenated_descriptions, max_length=(len(concatenated_descriptions) / 3))
-    return concatenated_descriptions["summarized-text"]
 iface = gr.Interface(
     fn=video_to_descriptions,
-    inputs=gr.Video(type="file", label="Importez une vidéo"),
     outputs="text",
     live=False
 )

 from PIL import Image
 import io
+# Import the translation pipeline
+from transformers import pipeline as translation_pipeline
+def video_to_descriptions(video, target_language="en"):
+    # Load the image-to-text and summarization pipelines
     ImgToText = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
     Summarize = pipeline("summarization", model="tuner007/pegasus_summarizer")
+    # Load the translation pipeline for the target language
+    translator = translation_pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}")
+    # Open the video
+    cap = cv2.VideoCapture(video)
     fps = int(cap.get(cv2.CAP_PROP_FPS))
     descriptions = []
         if not ret:
             break
+        # Extract an image every 2 seconds
         if frame_count % (fps * 2) == 0:
+            # Convert the image to RGB
             frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            # Convert the numpy array to a PIL image
             pil_img = Image.fromarray(frame_rgb)
+            # Get the image description
             outputs = ImgToText(pil_img)
+            description = outputs[0]['generated_text']
             descriptions.append(description)
         frame_count += 1
+    # Close the video reader
     cap.release()
+    # Concatenate the descriptions
     concatenated_descriptions = " ".join(descriptions)
     concatenated_descriptions = Summarize(concatenated_descriptions, max_length=(len(concatenated_descriptions) / 3))
+    # Translate the summarized text into the target language
+    translated_text = translator(concatenated_descriptions[0]["summarized-text"])[0]["translation_text"]
+    return translated_text
+# Create a dropdown menu with language options
+language_dropdown = gr.Interface.dropdown(["en", "fr", "de"], label="Langue cible")
 iface = gr.Interface(
     fn=video_to_descriptions,
+    inputs=[gr.Video(type="file", label="Importez une vidéo"), language_dropdown],
     outputs="text",
     live=False
 )