Spaces:

flocolombari
/

COLOMBARI_VIGNES-FERRINO_DERNIAUX_NIYONKURU

Runtime error

App Files Files Community

flocolombari commited on Sep 19, 2023

Commit

6e90515

1 Parent(s): 6e075d9

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -51

app.py CHANGED Viewed

@@ -8,75 +8,69 @@ import os
 #Commit
 def video_to_descriptions(video, target_language="en"):
     # Load the image-to-text and summarization pipelines
-    ImgToText = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
-    Summarize = pipeline("summarization", model="tuner007/pegasus_summarizer")
     # Load the translation pipeline for the target language
-    translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}")
     audio = pipeline("text-to-speech", model="suno/bark")
-    # Open the video
-    cap = cv2.VideoCapture(video)
-    fps = int(cap.get(cv2.CAP_PROP_FPS))
-    descriptions = []
-    frame_count = 0
-    while True:
-        ret, frame = cap.read()
-        if not ret:
-            break
-        # Extract an image every 2 seconds
-        if frame_count % (fps * 2) == 0:
-            # Convert the image to RGB
-            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            # Convert the numpy array to a PIL image
-            pil_img = Image.fromarray(frame_rgb)
-            # Get the image description
-            outputs = ImgToText(pil_img)
-            description = outputs[0]['generated_text']
-            descriptions.append(description)
-            print(str(frame_count) + " : " + outputs[0]['generated_text'])
-        frame_count += 1
-    # Close the video reader
-    cap.release()
-    # Concatenate the descriptions
-    concatenated_description = " ".join(descriptions)
-    summarized_description = Summarize(concatenated_description, max_length=31)[0]["summary_text"]
-    print("SUMMARIZATION : " + summarized_description)
-    translated_text = translator(summarized_description)[0]["translation_text"]
-    print("TRANSLATION : " + translated_text)
-    print(audio(translated_text))
-    ##audio_file = audio(translated_text)[0]["audio"]
-    #print("AUDIO : " + audio_file)
-    #return audio_file
-    return translated_text
 # Create a dropdown menu with language options
 language_dropdown = gr.Dropdown(
             ["en", "fr", "de", "es"], label="Language", info="The Language of the output"
         )
-example_videos = [
-    "./meduses.mp4",
-    "./paysage.mp4",
-    # Add more example video file paths as needed
-]
 # Create a dropdown menu with example video options
-example_video_dropdown = gr.Dropdown(example_videos, label="Exemples de vidéos")
 iface = gr.Interface(
     fn=video_to_descriptions,
-    inputs=[example_video_dropdown, language_dropdown],
-    #outputs="audio",
-    outputs="text",
     live=False
 )

 #Commit
 def video_to_descriptions(video, target_language="en"):
     # Load the image-to-text and summarization pipelines
+    #ImgToText = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
+    #Summarize = pipeline("summarization", model="tuner007/pegasus_summarizer")
     # Load the translation pipeline for the target language
+    #translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}")
     audio = pipeline("text-to-speech", model="suno/bark")
+    #
+    ## Open the video
+    #cap = cv2.VideoCapture(video)
+    #fps = int(cap.get(cv2.CAP_PROP_FPS))
+    #
+    #descriptions = []
+    #frame_count = 0
+    #
+    #while True:
+    #    ret, frame = cap.read()
+    #    if not ret:
+    #        break
+    #
+    #    # Extract an image every 2 seconds
+    #    if frame_count % (fps * 2) == 0:
+    #        # Convert the image to RGB
+    #        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+    #        # Convert the numpy array to a PIL image
+    #        pil_img = Image.fromarray(frame_rgb)
+    #        # Get the image description
+    #        outputs = ImgToText(pil_img)
+    #        description = outputs[0]['generated_text']
+    #        descriptions.append(description)
+    #        print(str(frame_count) + " : " + outputs[0]['generated_text'])
+    #
+    #    frame_count += 1
+    ## Close the video reader
+    #cap.release()
+    ## Concatenate the descriptions
+    #concatenated_description = " ".join(descriptions)
+    #summarized_description = Summarize(concatenated_description, max_length=31)[0]["summary_text"]
+    #print("SUMMARIZATION : " + summarized_description)
+    #translated_text = translator(summarized_description)[0]["translation_text"]
+    #print("TRANSLATION : " + translated_text)
+    print(audio("bonjour je m'appelle Florent et je fais un test"))
+    audio_file = audio("bonjour je m'appelle Florent et je fais un test")
+    print(audio_file)
+    return audio_file
+    #return translated_text
 # Create a dropdown menu with language options
 language_dropdown = gr.Dropdown(
             ["en", "fr", "de", "es"], label="Language", info="The Language of the output"
         )
 # Create a dropdown menu with example video options
 iface = gr.Interface(
     fn=video_to_descriptions,
+    inputs=[gr.Video(label="Import a Video", info="The Video to be described"), language_dropdown],
+    outputs="audio",
+    #outputs="text",
     live=False
 )