Spaces:
Running
Running
import streamlit as st | |
from phi.agent import Agent | |
from phi.model.google import Gemini | |
from phi.tools.duckduckgo import DuckDuckGo | |
from google.generativeai import upload_file, get_file | |
import google.generativeai as genai | |
import time | |
from pathlib import Path | |
import tempfile | |
from dotenv import load_dotenv | |
load_dotenv() | |
import os | |
API_KEY = os.getenv("GOOGLE_API_KEY") | |
if API_KEY: | |
genai.configure(api_key = API_KEY) | |
# Page Configuration | |
st.set_page_config( | |
page_title="Multimodal AI Agent - Video Summarizer", | |
page_icon="(1f4f9_videocamera)β", | |
layout="wide" | |
) | |
st.title("Phidata Multimodal AI Summarizer Agent (1f4f9_videocamera)β") | |
st.header("Powered by Gemini 2.0 Flash Exp") | |
def initialize_agent(): | |
return Agent( | |
name = "Video AI Summarizer", | |
model = Gemini(id="gemini-2.0-exp"), | |
tools = [DuckDuckGo()], | |
markdown = True | |
) | |
# Initialize Agent | |
multimodal_agent = initialize_agent() | |
# File uploader | |
video_file = st.file_uploader( | |
"Upload a video file", type=['mp4', 'mov', 'avi'], help="Upload a video for AI Analysis" | |
) | |
if video_file: | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_video: | |
temp_video.write(video_file.read()) | |
video_path = temp_video.name | |
st.video(video_path, format="video/mp4", start_time=0) | |
user_query = st.text_area( | |
"What insights are you seeking from the video?", | |
placeholder="Ask anything about the video content. The AI Agent will analyze and gather additional information", | |
help="Provide specific question or insights you want from the video." | |
) | |
if st.button("Analyze video", key="analyze_video_button"): | |
if not user_query: | |
st.warning("Please enter a question or insight to analyze the video.") | |
else: | |
try: | |
with st.spinner("Processing video and gathering insights..."): | |
#Upload and process video file | |
processed_video = upload_file(video_path) | |
while processed_video.state.name == "PROCESSING": | |
time.sleep(1) | |
processed_video = get_file(processed_video.name) | |
#prompt generation for analysis | |
analyses_prompt = (f""" | |
Analyze the uploaded video for content and context. | |
Respond the following query using the video insights and supplimentary web research | |
{user_query} | |
Provide a detailed, user-friendly and actionable response. | |
""" | |
) | |
#AI agent processing | |
response = multimodal_agent.run(analyses_prompt, video = [processed_video]) | |
#Display the result | |
st.subheader("Analysis result") | |
st.markdown(response.content) | |
except Exception as error: | |
st.error(f"An error occured during analysis: {error}") | |
finally: | |
# Clean temporary file | |
Path(video_path).unlink(missing_ok=True) | |
else: | |
st.info("Upload a video file to begin analysis") | |
# Customize text are heihght | |
st.markdown( | |
""" | |
<style> | |
.stTextArea textarea { | |
height: 100px; | |
} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) |