fixed some edge cases like existing transcription and some code refactoring
Browse files
app.py
CHANGED
@@ -6,13 +6,21 @@ from pixeltable.functions.video import extract_audio
|
|
6 |
from pixeltable.functions import openai as pxop
|
7 |
import openai
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
# Set your OpenAI API key
|
10 |
if 'OPENAI_API_KEY' not in os.environ:
|
11 |
os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')
|
12 |
|
13 |
-
# PixelTable setup
|
14 |
-
db_directory = 'video_db'
|
15 |
-
table_name = 'video_table'
|
16 |
|
17 |
pxt.drop_dir('video_db', force=True)
|
18 |
if table_name in pxt.list_tables():
|
@@ -42,12 +50,12 @@ else:
|
|
42 |
# Function to generate social media post using OpenAI GPT-4 API
|
43 |
def generate_social_media_post(transcript_text, social_media_type):
|
44 |
response = openai.chat.completions.create(
|
45 |
-
model=
|
46 |
messages=[
|
47 |
{"role": "system", "content": f"You are an expert in creating social media content for {social_media_type}."},
|
48 |
{"role": "user", "content": f"Generate an effective and casual social media post based on this video transcript below. Make it a viral and suitable post for {social_media_type}. Transcript:\n{transcript_text}."}
|
49 |
],
|
50 |
-
max_tokens=
|
51 |
)
|
52 |
return response.choices[0].message.content
|
53 |
|
@@ -58,34 +66,56 @@ def process_and_generate_post(video_file, social_media_type):
|
|
58 |
try:
|
59 |
video_size = os.path.getsize(video_file) / (1024 * 1024) # Convert to MB
|
60 |
|
61 |
-
if video_size >
|
62 |
-
return "The video file is larger than
|
63 |
|
64 |
video_filename = os.path.basename(video_file)
|
65 |
-
|
66 |
sm_gen_flag = True
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
# Check if video and sm type exists
|
69 |
video_type_df = t.where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).tail(1)
|
|
|
70 |
if video_type_df:
|
71 |
-
print('Video & Type Exists')
|
72 |
sm_gen_flag = False
|
73 |
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
# Insert video into PixelTable
|
76 |
t.insert([{'video': video_file, 'video_filename': video_filename, 'sm_type': social_media_type, 'sm_post': ''}])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
-
# Extract audio from video
|
79 |
-
if not t.get_column(name='audio'):
|
80 |
-
t['audio'] = extract_audio(t.video, format='mp3')
|
81 |
-
else:
|
82 |
-
t.audio = extract_audio(t.video, format='mp3')
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
|
|
|
|
89 |
|
90 |
#cur_video_df = t.where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).tail(1)[0]
|
91 |
|
@@ -100,10 +130,23 @@ def process_and_generate_post(video_file, social_media_type):
|
|
100 |
|
101 |
#plain_text = cur_video_df['transcription']['text']
|
102 |
|
103 |
-
#
|
104 |
-
|
105 |
-
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
return cur_video_df['sm_post']
|
109 |
|
@@ -122,7 +165,6 @@ def gradio_interface():
|
|
122 |
""")
|
123 |
gr.Markdown("""<center><font size=6>Data Ops powered by <a href="https://github.com/pixeltable/pixeltable">Pixeltable</a></center>""")
|
124 |
gr.Markdown("""<center>Pixeltable is a Python library providing a declarative interface for multimodal data (text, images, audio, video). It features built-in versioning, lineage tracking, and incremental updates, enabling users to store, transform, index, and iterate on data for their ML workflows. Data transformations, model inference, and custom logic are embedded as computed columns.
|
125 |
-
|
126 |
</center>""")
|
127 |
video_input = gr.Video(label="Upload Video File (max 25 MB):")
|
128 |
social_media_type = gr.Dropdown(choices=["X (Twitter)", "Facebook", "LinkedIn"], label="Select Social Media Platform:", value='X (Twitter)')
|
@@ -137,4 +179,4 @@ def gradio_interface():
|
|
137 |
return demo
|
138 |
|
139 |
|
140 |
-
gradio_interface().launch(show_api=False)
|
|
|
6 |
from pixeltable.functions import openai as pxop
|
7 |
import openai
|
8 |
|
9 |
+
# pixeltable setup
|
10 |
+
db_directory = 'video_db'
|
11 |
+
table_name = 'video_table'
|
12 |
+
|
13 |
+
# constants
|
14 |
+
|
15 |
+
MAX_VIDEO_SIZE_MB = 35
|
16 |
+
GPT_MODEL = "gpt-4o-mini-2024-07-18"
|
17 |
+
MAX_TOKENS = 500
|
18 |
+
WHISPER_MODEL = 'whisper-1'
|
19 |
+
|
20 |
# Set your OpenAI API key
|
21 |
if 'OPENAI_API_KEY' not in os.environ:
|
22 |
os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')
|
23 |
|
|
|
|
|
|
|
24 |
|
25 |
pxt.drop_dir('video_db', force=True)
|
26 |
if table_name in pxt.list_tables():
|
|
|
50 |
# Function to generate social media post using OpenAI GPT-4 API
|
51 |
def generate_social_media_post(transcript_text, social_media_type):
|
52 |
response = openai.chat.completions.create(
|
53 |
+
model= GPT_MODEL,
|
54 |
messages=[
|
55 |
{"role": "system", "content": f"You are an expert in creating social media content for {social_media_type}."},
|
56 |
{"role": "user", "content": f"Generate an effective and casual social media post based on this video transcript below. Make it a viral and suitable post for {social_media_type}. Transcript:\n{transcript_text}."}
|
57 |
],
|
58 |
+
max_tokens=MAX_TOKENS
|
59 |
)
|
60 |
return response.choices[0].message.content
|
61 |
|
|
|
66 |
try:
|
67 |
video_size = os.path.getsize(video_file) / (1024 * 1024) # Convert to MB
|
68 |
|
69 |
+
if video_size > MAX_VIDEO_SIZE_MB:
|
70 |
+
return f"The video file is larger than {MAX_VIDEO_SIZE_MB} MB. Please upload a smaller file."
|
71 |
|
72 |
video_filename = os.path.basename(video_file)
|
73 |
+
tr_audio_gen_flag = True
|
74 |
sm_gen_flag = True
|
75 |
+
print("##################\nthe video file and social media are..."+video_file+"....."+social_media_type)
|
76 |
+
video_df = t.where(t.video_filename == video_filename).tail(1)
|
77 |
+
|
78 |
+
if t.select().where(t.video_filename == video_filename).count() >=1:
|
79 |
+
#print('Video Exists')
|
80 |
+
tr_audio_gen_flag = False
|
81 |
|
82 |
# Check if video and sm type exists
|
83 |
video_type_df = t.where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).tail(1)
|
84 |
+
|
85 |
if video_type_df:
|
86 |
+
#print('Video & Type Exists')
|
87 |
sm_gen_flag = False
|
88 |
|
89 |
+
#print(video_df)
|
90 |
+
|
91 |
+
#print('both the cases....')
|
92 |
+
|
93 |
+
#print(video_df and not video_type_df)
|
94 |
+
|
95 |
+
#print(t.select().where(t.video_filename == video_filename).count() >=1 )
|
96 |
+
|
97 |
+
#print(t.select().where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).count() >=1 )
|
98 |
+
|
99 |
+
if (t.count() < 1) or not (t.select().where(t.video_filename == video_filename).count() >=1) or (video_df and not video_type_df) :
|
100 |
# Insert video into PixelTable
|
101 |
t.insert([{'video': video_file, 'video_filename': video_filename, 'sm_type': social_media_type, 'sm_post': ''}])
|
102 |
+
|
103 |
+
if tr_audio_gen_flag:
|
104 |
+
# Extract audio from video
|
105 |
+
|
106 |
+
if not t.get_column(name='audio'):
|
107 |
+
t['audio'] = extract_audio(t.video, format='mp3')
|
108 |
+
else:
|
109 |
+
t.audio = extract_audio(t.video, format='mp3')
|
110 |
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
+
print("########### processing transcription #############")
|
113 |
+
|
114 |
+
# Transcribe audio using OpenAI Whisper API
|
115 |
+
if not t.get_column(name='transcription'):
|
116 |
+
t['transcription'] = pxop.transcriptions(t.audio, model= WHISPER_MODEL)
|
117 |
+
else:
|
118 |
+
t.transcription = pxop.transcriptions(t.audio, model= WHISPER_MODEL)
|
119 |
|
120 |
#cur_video_df = t.where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).tail(1)[0]
|
121 |
|
|
|
130 |
|
131 |
#plain_text = cur_video_df['transcription']['text']
|
132 |
|
133 |
+
#print(t.show())
|
134 |
+
#print('status of social media type')
|
135 |
+
#print(t.select().where((t.video_filename == video_filename) & (t.sm_type == social_media_type)).count() >=1)
|
136 |
+
if t.select().where((t.video_filename == video_filename) & (t.sm_type == social_media_type) & (t.sm_post != '')).count() >=1:
|
137 |
+
|
138 |
+
print("retrieving existing social media post")
|
139 |
+
social_media_post = t.select(t.sm_post).where((t.sm_type ==social_media_type) & (t.video_filename == video_filename)).collect()['sm_post']
|
140 |
+
return(social_media_post)
|
141 |
+
|
142 |
+
else:
|
143 |
+
|
144 |
+
print("generating new social media post")
|
145 |
+
social_media_post = generate_social_media_post(plain_text, social_media_type)
|
146 |
+
if sm_gen_flag:
|
147 |
+
cur_video_df.update({'sm_post': social_media_post})
|
148 |
+
|
149 |
+
# print(t.show())
|
150 |
|
151 |
return cur_video_df['sm_post']
|
152 |
|
|
|
165 |
""")
|
166 |
gr.Markdown("""<center><font size=6>Data Ops powered by <a href="https://github.com/pixeltable/pixeltable">Pixeltable</a></center>""")
|
167 |
gr.Markdown("""<center>Pixeltable is a Python library providing a declarative interface for multimodal data (text, images, audio, video). It features built-in versioning, lineage tracking, and incremental updates, enabling users to store, transform, index, and iterate on data for their ML workflows. Data transformations, model inference, and custom logic are embedded as computed columns.
|
|
|
168 |
</center>""")
|
169 |
video_input = gr.Video(label="Upload Video File (max 25 MB):")
|
170 |
social_media_type = gr.Dropdown(choices=["X (Twitter)", "Facebook", "LinkedIn"], label="Select Social Media Platform:", value='X (Twitter)')
|
|
|
179 |
return demo
|
180 |
|
181 |
|
182 |
+
gradio_interface().launch(show_api=False)
|