Spaces:

Abhilashvj
/

video-search

Sleeping

App Files Files Community

video-search / app.py

Abhilashvj

Update app.py

f25ff37 verified 16 days ago

raw

history blame

6.27 kB

	import streamlit as st
	import torch
	from PIL import Image
	import face_recognition
	import faiss
	from sentence_transformers import SentenceTransformer
	from transformers import pipeline
	import cv2
	import numpy as np
	import subprocess
	import tempfile
	import os
	import yt_dlp
	from moviepy.editor import VideoFileClip

	# Helper functions
	def get_video_id(url):
	return url.split("v=")[1].split("&")[0]

	def download_youtube_video(url, output_path):
	ydl_opts = {
	'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
	'outtmpl': os.path.join(output_path, '%(id)s.%(ext)s'),
	}
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	info = ydl.extract_info(url, download=True)
	filename = ydl.prepare_filename(info)
	return filename

	def process_video(video_url, output_dir, video_id):
	# Placeholder for video processing logic
	# This should include face detection, object detection, transcription, etc.
	# For now, we'll just download the video
	video_path = download_youtube_video(video_url, output_dir)

	# Extract frames (simplified version)
	video = cv2.VideoCapture(video_path)
	fps = video.get(cv2.CAP_PROP_FPS)
	frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
	duration = frame_count / fps

	frames = []
	frame_times = []
	for i in range(0, frame_count, int(fps)): # Extract one frame per second
	video.set(cv2.CAP_PROP_POS_FRAMES, i)
	ret, frame = video.read()
	if ret:
	frames.append(frame)
	frame_times.append(i / fps)

	video.release()

	return {
	'video_path': video_path,
	'frames': frames,
	'frame_times': frame_times,
	'duration': duration,
	'fps': fps
	}

	def search(query, index_path, metadata_path, model):
	# Placeholder for search functionality
	# This should use FAISS for efficient similarity search
	return [], []

	# Load models
	@st.cache_resource
	def load_models():
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	clip_model, preprocess = torch.hub.load('openai/CLIP', 'clip_vit_b32', device=device)
	text_model = SentenceTransformer("all-MiniLM-L6-v2").to(device)
	qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", device=0 if torch.cuda.is_available() else -1)
	return clip_model, preprocess, text_model, qa_model

	clip_model, preprocess, text_model, qa_model = load_models()

	# Streamlit UI
	st.title("Enhanced YouTube Video Analysis")

	video_url = st.text_input("Enter YouTube Video URL")
	if st.button("Analyze"):
	with st.spinner("Processing video..."):
	video_id = get_video_id(video_url)
	results = process_video(video_url, "output_dir", video_id)

	if results:
	st.success("Video processed successfully!")

	# Text search and question answering
	st.subheader("Text Search and Q&A")
	query = st.text_input("Enter a search query or question")
	if query:
	# Placeholder for text search and QA
	st.write("Text search and QA functionality to be implemented")

	# Image upload and similarity search
	st.subheader("Image Search")
	uploaded_image = st.file_uploader("Upload an image to find similar frames", type=["jpg", "jpeg", "png"])
	if uploaded_image:
	# Placeholder for image search
	st.write("Image search functionality to be implemented")

	# Face upload and recognition
	st.subheader("Face Search")
	uploaded_face = st.file_uploader("Upload a face image to find appearances", type=["jpg", "jpeg", "png"])
	if uploaded_face:
	face_image = face_recognition.load_image_file(uploaded_face)
	face_encoding = face_recognition.face_encodings(face_image)[0]

	face_appearances = []
	face_frames = []

	for i, frame in enumerate(results['frames']):
	face_locations = face_recognition.face_locations(frame)
	face_encodings = face_recognition.face_encodings(frame, face_locations)

	for encoding in face_encodings:
	if face_recognition.compare_faces([face_encoding], encoding)[0]:
	face_appearances.append(results['frame_times'][i])
	face_frames.append(frame)

	st.write(f"Face appearances found at {len(face_appearances)} timestamps.")

	if face_frames:
	# Create a temporary directory to store frames
	with tempfile.TemporaryDirectory() as temp_dir:
	# Save frames as images
	for i, frame in enumerate(face_frames):
	cv2.imwrite(os.path.join(temp_dir, f"frame_{i:04d}.jpg"), frame)

	# Use FFmpeg to create a video from the frames
	output_video = "face_appearances.mp4"
	ffmpeg_command = [
	"ffmpeg",
	"-framerate", str(results['fps']),
	"-i", os.path.join(temp_dir, "frame_%04d.jpg"),
	"-c:v", "libx264",
	"-pix_fmt", "yuv420p",
	output_video
	]
	subprocess.run(ffmpeg_command, check=True)

	# Display the generated video
	st.video(output_video)

	# Provide download link for the video
	with open(output_video, "rb") as file:
	btn = st.download_button(
	label="Download Face Appearances Video",
	data=file,
	file_name="face_appearances.mp4",
	mime="video/mp4"
	)
	else:
	st.write("No frames with the uploaded face were found in the video.")

	# Display original video
	st.subheader("Original Video")
	st.video(results['video_path'])

	else:
	st.warning("Please enter a valid YouTube URL and click 'Analyze'")