PierreBrunelle's picture
Update app.py
a77c3ea verified
raw
history blame
7.35 kB
import pixeltable as pxt
import os
import openai
import gradio as gr
import getpass
from pixeltable.iterators import FrameIterator
from pixeltable.functions.video import extract_audio
from pixeltable.functions.audio import get_metadata
from pixeltable.functions import openai
"""## Store OpenAI API Key"""
if 'OPENAI_API_KEY' not in os.environ:
os.environ['OPENAI_API_KEY'] = getpass.getpass('Enter your OpenAI API key:')
"""## Create a Table, a View, and Computed Columns"""
pxt.drop_dir('directory', force=True)
pxt.create_dir('directory')
t = pxt.create_table(
'directory.video_table', {
"video": pxt.VideoType(nullable=True),
"sm_type": pxt.StringType(nullable=True),
}
)
frames_view = pxt.create_view(
"directory.frames",
t,
iterator=FrameIterator.create(video=t.video, fps=1)
)
# Create computed columns to store transformations and persist outputs
t['audio'] = extract_audio(t.video, format='mp3')
t['metadata'] = get_metadata(t.audio)
t['transcription'] = openai.transcriptions(audio=t.audio, model='whisper-1')
t['transcription_text'] = t.transcription.text
"""## Custom UDF for Generating Social Media Prompts"""
#Custom User-Defined Function (UDF) for Generating Social Media Prompts
@pxt.udf
def prompt(A: str, B: str) -> list[dict]:
system_msg = 'You are an expert in creating social media content and you generate effective post, based on user content. Respect the social media platform guidelines and constraints.'
user_msg = f'A: "{A}" \n B: "{B}"'
return [
{'role': 'system', 'content': system_msg},
{'role': 'user', 'content': user_msg}
]
# Apply the UDF to create a new column
t['message'] = prompt(t.sm_type, t.transcription_text)
"""## Generating Responses with OpenAI's GPT Model"""
# # Generate responses using OpenAI's chat completion API
t['response'] = openai.chat_completions(messages=t.message, model='gpt-4o-mini-2024-07-18', max_tokens=500)
## Extract the content of the response
t['answer'] = t.response.choices[0].message.content
MAX_VIDEO_SIZE_MB = 35
CONCURRENCY_LIMIT = 1
def process_and_generate_post(video_file, social_media_type):
if not video_file:
return "Please upload a video file.", None
try:
# Check video file size
video_size = os.path.getsize(video_file) / (1024 * 1024) # Convert to MB
if video_size > MAX_VIDEO_SIZE_MB:
return f"The video file is larger than {MAX_VIDEO_SIZE_MB} MB. Please upload a smaller file.", None
# # Insert a video into the table. Pixeltable supports referencing external data sources like URLs
t.insert([{
"video": video_file,
"sm_type": social_media_type
}])
# Retrieve Social media posts
social_media_post = t.select(t.answer).tail(1)['answer'][0]
# Retrieve Audio
audio = t.select(t.audio).tail(1)['audio'][0]
# Retrieve thumbnails
thumbnails = frames_view.select(frames_view.frame).tail(6)['frame']
# Retrieve Pixeltable Table containing all videos and stored data
df_output = t.select(t.transcription_text).collect().to_pandas()
#Display content
return social_media_post, thumbnails, df_output, audio
except Exception as e:
return f"An error occurred: {str(e)}", None
# Gradio Interface
import gradio as gr
def gradio_interface():
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
gr.Markdown("""
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" width="20%" /></img>
<h1>Video to Social Media Post Generator</h1>
"""
)
with gr.Row():
with gr.Column():
gr.Markdown("""
<ul>
<li><strong>Video Data Management:</strong> Creating tables and views to store and organize video data.</li>
<li><strong>Automated Video Processing:</strong> Extracting frames and audio from videos.</li>
<li><strong>Data Transformation:</strong> Computing and storing metadata, transcriptions, and AI-generated content.</li>
</ul>
""")
with gr.Column():
gr.Markdown("""
<ul>
<li><strong>AI Integration:</strong> Utilizing OpenAI's GPT and Whisper models for transcription and content generation.</li>
<li><strong>Custom Functions:</strong> Defining user-defined functions (UDFs) for specialized tasks like prompt construction.</li>
<li><strong>Data Persistence:</strong> Storing transformed data and AI outputs for easy retrieval and analysis.</li>
</ul>
""")
with gr.Row():
with gr.Column():
video_input = gr.Video(
label=f"Upload Video File (max {MAX_VIDEO_SIZE_MB} MB):",
include_audio=True,
max_length=300,
height='400px',
autoplay=False
)
social_media_type = gr.Dropdown(
choices=["X (Twitter)", "Facebook", "LinkedIn", "Instagram"],
label="Select Social Media Platform:",
value="X (Twitter)",
)
generate_btn = gr.Button("Generate Post")
gr.Examples(
examples=[["example1.mp4"], ["example2.mp4"], ["example3.mp4"]],
inputs=[video_input]
)
with gr.Column():
output = gr.Textbox(label="Generated Social Media Post", show_copy_button=True)
thumbnail = gr.Gallery(
label="Pick your favorite Post Thumbnail",
show_download_button=True,
show_fullscreen_button=True,
height='400px'
)
audio = gr.Audio(label="Extracted audio", show_download_button=True)
df_output = gr.DataFrame(label="Transcription")
generate_btn.click(
fn=process_and_generate_post,
trigger_mode='once',
show_progress='full',
inputs=[video_input, social_media_type],
outputs=[output, thumbnail, df_output, audio],
)
gr.HTML(
"""
<p>Pixeltable is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data. Powered solely by <a href="https://github.com/pixeltable/pixeltable" style="text-decoration: underline;" target="_blank">Pixeltable</a> - running OpenAI (gpt-4o-mini-2024-07-18).</a></p>
<p><a href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/pixeltable-basics.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Colab"></a></p>
"""
)
return demo
# Launch the Gradio interface
if __name__ == "__main__":
gradio_interface().launch(show_api=False)
"""
Pixeltable simplifies complex video processing workflows and integrates AI capabilities to create a powerful tool for generating social media content from video inputs."""