|
import pixeltable as pxt |
|
import os |
|
import openai |
|
import gradio as gr |
|
import getpass |
|
from pixeltable.iterators import FrameIterator |
|
from pixeltable.functions.video import extract_audio |
|
from pixeltable.functions.audio import get_metadata |
|
from pixeltable.functions import openai |
|
|
|
"""## Store OpenAI API Key""" |
|
|
|
if 'OPENAI_API_KEY' not in os.environ: |
|
os.environ['OPENAI_API_KEY'] = getpass.getpass('Enter your OpenAI API key:') |
|
|
|
"""## Create a Table, a View, and Computed Columns""" |
|
|
|
pxt.drop_dir('directory', force=True) |
|
pxt.create_dir('directory') |
|
|
|
t = pxt.create_table( |
|
'directory.video_table', { |
|
"video": pxt.VideoType(nullable=True), |
|
"sm_type": pxt.StringType(nullable=True), |
|
} |
|
) |
|
|
|
frames_view = pxt.create_view( |
|
"directory.frames", |
|
t, |
|
iterator=FrameIterator.create(video=t.video, fps=1) |
|
) |
|
|
|
|
|
t['audio'] = extract_audio(t.video, format='mp3') |
|
t['metadata'] = get_metadata(t.audio) |
|
t['transcription'] = openai.transcriptions(audio=t.audio, model='whisper-1') |
|
t['transcription_text'] = t.transcription.text |
|
|
|
"""## Custom UDF for Generating Social Media Prompts""" |
|
|
|
|
|
@pxt.udf |
|
def prompt(A: str, B: str) -> list[dict]: |
|
system_msg = 'You are an expert in creating social media content and you generate effective post, based on user content. Respect the social media platform guidelines and constraints.' |
|
user_msg = f'A: "{A}" \n B: "{B}"' |
|
return [ |
|
{'role': 'system', 'content': system_msg}, |
|
{'role': 'user', 'content': user_msg} |
|
] |
|
|
|
|
|
t['message'] = prompt(t.sm_type, t.transcription_text) |
|
|
|
"""## Generating Responses with OpenAI's GPT Model""" |
|
|
|
|
|
t['response'] = openai.chat_completions(messages=t.message, model='gpt-4o-mini-2024-07-18', max_tokens=500) |
|
|
|
|
|
t['answer'] = t.response.choices[0].message.content |
|
|
|
MAX_VIDEO_SIZE_MB = 35 |
|
CONCURRENCY_LIMIT = 1 |
|
|
|
def process_and_generate_post(video_file, social_media_type): |
|
if not video_file: |
|
return "Please upload a video file.", None |
|
|
|
try: |
|
|
|
video_size = os.path.getsize(video_file) / (1024 * 1024) |
|
if video_size > MAX_VIDEO_SIZE_MB: |
|
return f"The video file is larger than {MAX_VIDEO_SIZE_MB} MB. Please upload a smaller file.", None |
|
|
|
|
|
t.insert([{ |
|
"video": video_file, |
|
"sm_type": social_media_type |
|
}]) |
|
|
|
|
|
social_media_post = t.select(t.answer).tail(1)['answer'][0] |
|
|
|
|
|
audio = t.select(t.audio).tail(1)['audio'][0] |
|
|
|
|
|
thumbnails = frames_view.select(frames_view.frame).tail(6)['frame'] |
|
|
|
|
|
df_output = t.select(t.transcription_text).collect().to_pandas() |
|
|
|
|
|
return social_media_post, thumbnails, df_output, audio |
|
|
|
except Exception as e: |
|
return f"An error occurred: {str(e)}", None |
|
|
|
|
|
import gradio as gr |
|
|
|
def gradio_interface(): |
|
with gr.Blocks(theme=gr.themes.Monochrome()) as demo: |
|
gr.Markdown(""" |
|
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" width="20%" /></img> |
|
<h1>Video to Social Media Post Generator</h1> |
|
""" |
|
) |
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown(""" |
|
<ul> |
|
<li><strong>Video Data Management:</strong> Creating tables and views to store and organize video data.</li> |
|
<li><strong>Automated Video Processing:</strong> Extracting frames and audio from videos.</li> |
|
<li><strong>Data Transformation:</strong> Computing and storing metadata, transcriptions, and AI-generated content.</li> |
|
</ul> |
|
""") |
|
with gr.Column(): |
|
gr.Markdown(""" |
|
<ul> |
|
<li><strong>AI Integration:</strong> Utilizing OpenAI's GPT and Whisper models for transcription and content generation.</li> |
|
<li><strong>Custom Functions:</strong> Defining user-defined functions (UDFs) for specialized tasks like prompt construction.</li> |
|
<li><strong>Data Persistence:</strong> Storing transformed data and AI outputs for easy retrieval and analysis.</li> |
|
</ul> |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
video_input = gr.Video( |
|
label=f"Upload Video File (max {MAX_VIDEO_SIZE_MB} MB):", |
|
include_audio=True, |
|
max_length=300, |
|
height='400px', |
|
autoplay=False |
|
) |
|
social_media_type = gr.Dropdown( |
|
choices=["X (Twitter)", "Facebook", "LinkedIn", "Instagram"], |
|
label="Select Social Media Platform:", |
|
value="X (Twitter)", |
|
) |
|
generate_btn = gr.Button("Generate Post") |
|
|
|
gr.Examples( |
|
examples=[["example1.mp4"], ["example2.mp4"], ["example3.mp4"]], |
|
inputs=[video_input] |
|
) |
|
with gr.Column(): |
|
output = gr.Textbox(label="Generated Social Media Post", show_copy_button=True) |
|
thumbnail = gr.Gallery( |
|
label="Pick your favorite Post Thumbnail", |
|
show_download_button=True, |
|
show_fullscreen_button=True, |
|
height='400px' |
|
) |
|
audio = gr.Audio(label="Extracted audio", show_download_button=True) |
|
|
|
df_output = gr.DataFrame(label="Transcription") |
|
|
|
generate_btn.click( |
|
fn=process_and_generate_post, |
|
trigger_mode='once', |
|
show_progress='full', |
|
inputs=[video_input, social_media_type], |
|
outputs=[output, thumbnail, df_output, audio], |
|
) |
|
|
|
gr.HTML( |
|
""" |
|
<p>Pixeltable is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data. Powered solely by <a href="https://github.com/pixeltable/pixeltable" style="text-decoration: underline;" target="_blank">Pixeltable</a> - running OpenAI (gpt-4o-mini-2024-07-18).</a></p> |
|
<p><a href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/pixeltable-basics.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Colab"></a></p> |
|
""" |
|
) |
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
gradio_interface().launch(show_api=False) |
|
|
|
""" |
|
Pixeltable simplifies complex video processing workflows and integrates AI capabilities to create a powerful tool for generating social media content from video inputs.""" |