Spaces:

flocolombari
/

COLOMBARI_VIGNES-FERRINO_DERNIAUX_NIYONKURU

Runtime error

App Files Files Community

flocolombari commited on Sep 20, 2023

Commit

e95b86d

1 Parent(s): af073f3

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -43

app.py CHANGED Viewed

@@ -8,60 +8,56 @@ import scipy
 #Commit
 def video_to_descriptions(video, target_language="en"):
     # Load the image-to-text and summarization pipelines
-    #ImgToText = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
-    #Summarize = pipeline("summarization", model="tuner007/pegasus_summarizer")
     # Load the translation pipeline for the target language
-    #translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}")
-    audio = pipeline("text-to-speech", model="suno/bark-small")
-    #
-    ## Open the video
-    #cap = cv2.VideoCapture(video)
-    #fps = int(cap.get(cv2.CAP_PROP_FPS))
-    #
-    #descriptions = []
-    #frame_count = 0
-    #
-    #while True:
-    #    ret, frame = cap.read()
-    #    if not ret:
-    #        break
-    #
-    #    # Extract an image every 2 seconds
-    #    if frame_count % (fps * 2) == 0:
-    #        # Convert the image to RGB
-    #        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-    #        # Convert the numpy array to a PIL image
-    #        pil_img = Image.fromarray(frame_rgb)
-    #        # Get the image description
-    #        outputs = ImgToText(pil_img)
-    #        description = outputs[0]['generated_text']
-    #        descriptions.append(description)
-    #        print(str(frame_count) + " : " + outputs[0]['generated_text'])
-    #
-    #    frame_count += 1
-    ## Close the video reader
-    #cap.release()
-    ## Concatenate the descriptions
-    #concatenated_description = " ".join(descriptions)
-    #summarized_description = Summarize(concatenated_description, max_length=31)[0]["summary_text"]
-    #print("SUMMARIZATION : " + summarized_description)
-    #translated_text = translator(summarized_description)[0]["translation_text"]
-    #print("TRANSLATION : " + translated_text)
-    print(audio("bonjour je m'appelle Florent et je fais un test"))
-    audio_file = audio("bonjour je m'appelle Florent et je fais un test")
     print(audio_file)
     output_path = "./bark_out.wav"
     scipy.io.wavfile.write(output_path, rate=audio_file["sampling_rate"], data=audio_file["audio"][0].squeeze())
     return output_path
-    #return translated_text
 # Create a dropdown menu with language options
 language_dropdown = gr.Dropdown(

 #Commit
 def video_to_descriptions(video, target_language="en"):
     # Load the image-to-text and summarization pipelines
+    ImgToText = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
+    Summarize = pipeline("summarization", model="tuner007/pegasus_summarizer")
     # Load the translation pipeline for the target language
+    translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}")
+    audio = pipeline("text-to-speech", model="suno/bark")
+    # Open the video
+    cap = cv2.VideoCapture(video)
+    fps = int(cap.get(cv2.CAP_PROP_FPS))
+    descriptions = []
+    frame_count = 0
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        # Extract an image every 2 seconds
+        if frame_count % (fps * 2) == 0:
+            # Convert the image to RGB
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            # Convert the numpy array to a PIL image
+            pil_img = Image.fromarray(frame_rgb)
+            # Get the image description
+            outputs = ImgToText(pil_img)
+            description = outputs[0]['generated_text']
+            descriptions.append(description)
+            print(str(frame_count) + " : " + outputs[0]['generated_text'])
+        frame_count += 1
+    # Close the video reader
+    cap.release()
+    # Concatenate the descriptions
+    concatenated_description = " ".join(descriptions)
+    summarized_description = Summarize(concatenated_description, max_length=31)[0]["summary_text"]
+    print("SUMMARIZATION : " + summarized_description)
+    translated_text = translator(summarized_description)[0]["translation_text"]
+    print("TRANSLATION : " + translated_text)
+    audio_file = audio("translated_text", )
     print(audio_file)
     output_path = "./bark_out.wav"
     scipy.io.wavfile.write(output_path, rate=audio_file["sampling_rate"], data=audio_file["audio"][0].squeeze())
     return output_path
 # Create a dropdown menu with language options
 language_dropdown = gr.Dropdown(