Video to Social Media Post Generator

import pixeltable as pxt
import os
import openai
import gradio as gr
import getpass
from pixeltable.iterators import FrameIterator
from pixeltable.functions.video import extract_audio
from pixeltable.functions.audio import get_metadata
from pixeltable.functions import openai

"""## Store OpenAI API Key"""

if 'OPENAI_API_KEY' not in os.environ:
    os.environ['OPENAI_API_KEY'] = getpass.getpass('Enter your OpenAI API key:')

"""## Create a Table, a View, and Computed Columns"""

pxt.drop_dir('directory', force=True)
pxt.create_dir('directory')

t = pxt.create_table(
    'directory.video_table', {
    "video": pxt.VideoType(nullable=True),
    "sm_type": pxt.StringType(nullable=True),
    }
)

frames_view = pxt.create_view(
    "directory.frames",
    t,
    iterator=FrameIterator.create(video=t.video, fps=1)
)

# Create computed columns to store transformations and persist outputs
t['audio'] = extract_audio(t.video, format='mp3')
t['metadata'] = get_metadata(t.audio)
t['transcription'] = openai.transcriptions(audio=t.audio, model='whisper-1')
t['transcription_text'] = t.transcription.text

"""## Custom UDF for Generating Social Media Prompts"""

#Custom User-Defined Function (UDF) for Generating Social Media Prompts
@pxt.udf
def prompt(A: str, B: str) -> list[dict]:
    system_msg = 'You are an expert in creating social media content and you generate effective post, based on user content. Respect the social media platform guidelines and constraints.'
    user_msg = f'A: "{A}" \n B: "{B}"'
    return [
        {'role': 'system', 'content': system_msg},
        {'role': 'user', 'content': user_msg}
    ]

# Apply the UDF to create a new column
t['message'] = prompt(t.sm_type, t.transcription_text)

"""## Generating Responses with OpenAI's GPT Model"""

# # Generate responses using OpenAI's chat completion API
t['response'] = openai.chat_completions(messages=t.message, model='gpt-4o-mini-2024-07-18', max_tokens=500)

## Extract the content of the response
t['answer'] = t.response.choices[0].message.content

MAX_VIDEO_SIZE_MB = 35
CONCURRENCY_LIMIT = 1

def process_and_generate_post(video_file, social_media_type):
    if not video_file:
        return "Please upload a video file.", None

    try:
        # Check video file size
        video_size = os.path.getsize(video_file) / (1024 * 1024)  # Convert to MB
        if video_size > MAX_VIDEO_SIZE_MB:
            return f"The video file is larger than {MAX_VIDEO_SIZE_MB} MB. Please upload a smaller file.", None

        # # Insert a video into the table. Pixeltable supports referencing external data sources like URLs
        t.insert([{
            "video": video_file,
            "sm_type": social_media_type
        }])

        # Retrieve Social media posts
        social_media_post = t.select(t.answer).tail(1)['answer'][0]

        # Retrieve Audio
        audio = t.select(t.audio).tail(1)['audio'][0]

        # Retrieve thumbnails
        thumbnails = frames_view.select(frames_view.frame).tail(6)['frame']

        # Retrieve Pixeltable Table containing all videos and stored data
        df_output = t.select(t.transcription_text).collect().to_pandas()

        #Display content
        return social_media_post, thumbnails, df_output, audio

    except Exception as e:
        return f"An error occurred: {str(e)}", None

# Gradio Interface
import gradio as gr

def gradio_interface():
    with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
        gr.Markdown("""
            <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" width="20%" /></img>
            <h1>Video to Social Media Post Generator</h1>
               """
        )
        with gr.Row():
            with gr.Column():  
               gr.Markdown("""
                <ul>
                  <li><strong>Video Data Management:</strong> Creating tables and views to store and organize video data.</li>
                  <li><strong>Automated Video Processing:</strong> Extracting frames and audio from videos.</li>
                  <li><strong>Data Transformation:</strong> Computing and storing metadata, transcriptions, and AI-generated content.</li>
                </ul>
                """)
            with gr.Column():  
               gr.Markdown("""
                <ul>
                  <li><strong>AI Integration:</strong> Utilizing OpenAI's GPT and Whisper models for transcription and content generation.</li>
                  <li><strong>Custom Functions:</strong> Defining user-defined functions (UDFs) for specialized tasks like prompt construction.</li>
                  <li><strong>Data Persistence:</strong> Storing transformed data and AI outputs for easy retrieval and analysis.</li>
                </ul>
                """)

        with gr.Row():
            with gr.Column():
                video_input = gr.Video(
                    label=f"Upload Video File (max {MAX_VIDEO_SIZE_MB} MB):",
                    include_audio=True,
                    max_length=300,
                    height='400px',
                    autoplay=False
                )
                social_media_type = gr.Dropdown(
                    choices=["X (Twitter)", "Facebook", "LinkedIn", "Instagram"],
                    label="Select Social Media Platform:",
                    value="X (Twitter)",
                )
                generate_btn = gr.Button("Generate Post")

                gr.Examples(
            examples=[["example1.mp4"], ["example2.mp4"], ["example3.mp4"]],
            inputs=[video_input]
        )
            with gr.Column():
                output = gr.Textbox(label="Generated Social Media Post", show_copy_button=True)
                thumbnail = gr.Gallery(
                    label="Pick your favorite Post Thumbnail",
                    show_download_button=True,
                    show_fullscreen_button=True,
                    height='400px'
                )
                audio = gr.Audio(label="Extracted audio", show_download_button=True)

        df_output = gr.DataFrame(label="Transcription")

        generate_btn.click(
            fn=process_and_generate_post,
            trigger_mode='once',
            show_progress='full',
            inputs=[video_input, social_media_type],
            outputs=[output, thumbnail, df_output, audio],
        )

        gr.HTML(
            """
                    <p>Pixeltable is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data. Powered solely by <a href="https://github.com/pixeltable/pixeltable" style="text-decoration: underline;" target="_blank">Pixeltable</a> - running OpenAI (gpt-4o-mini-2024-07-18).</a></p>
                <p><a href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/pixeltable-basics.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Colab"></a></p>
           """
        )
    return demo

# Launch the Gradio interface
if __name__ == "__main__":
    gradio_interface().launch(show_api=False)

"""
Pixeltable simplifies complex video processing workflows and integrates AI capabilities to create a powerful tool for generating social media content from video inputs."""