Spaces:

flocolombari
/

COLOMBARI_VIGNES-FERRINO_DERNIAUX_NIYONKURU

Runtime error

App Files Files Community

COLOMBARI_VIGNES-FERRINO_DERNIAUX_NIYONKURU / app.py

flocolombari

Update app.py

d700d5f over 1 year ago

raw

history blame

2.48 kB

	import gradio as gr
	from transformers import pipeline
	import cv2
	from PIL import Image
	import io

	#Commit
	def video_to_descriptions(video, target_language="en"):
	# Load the image-to-text and summarization pipelines
	ImgToText = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
	Summarize = pipeline("summarization", model="tuner007/pegasus_summarizer")

	# Load the translation pipeline for the target language
	translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}")
	audio = pipeline("text-to-speech", model="suno/bark")

	# Open the video
	cap = cv2.VideoCapture(video)
	fps = int(cap.get(cv2.CAP_PROP_FPS))

	descriptions = []
	frame_count = 0

	while True:
	ret, frame = cap.read()
	if not ret:
	break

	# Extract an image every 2 seconds
	if frame_count % (fps * 2) == 0:
	# Convert the image to RGB
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	# Convert the numpy array to a PIL image
	pil_img = Image.fromarray(frame_rgb)
	# Get the image description
	outputs = ImgToText(pil_img)
	description = outputs[0]['generated_text']
	descriptions.append(description)
	print(str(frame_count) + " : " + outputs[0]['generated_text'])

	frame_count += 1

	# Close the video reader
	cap.release()

	# Concatenate the descriptions
	concatenated_description = " ".join(descriptions)
	summarized_description = Summarize(concatenated_description, max_length=31)[0]["summary_text"]
	print("SUMMARIZATION : " + summarized_description)

	translated_text = translator(summarized_description)[0]["translation_text"]
	print("TRANSLATION : " + translated_text)

	print(audio(translated_text))

	##audio_file = audio(translated_text)[0]["audio"]
	#print("AUDIO : " + audio_file)

	#return audio_file
	return translated_text

	# Create a dropdown menu with language options
	language_dropdown = gr.Dropdown(
	["en", "fr", "de", "es"], label="Language", info="The Language of the output"
	)

	iface = gr.Interface(
	fn=video_to_descriptions,
	inputs=[gr.Video(label="Import a Video", info="The Video to be described"), language_dropdown],
	#outputs="audio",
	outputs="text",
	live=False
	)

	if __name__ == "__main__":
	iface.launch()