WildKratts commited on
Commit
594bda4
โ€ข
1 Parent(s): 03b9797

App Name Change

Browse files
Files changed (1) hide show
  1. app.py +161 -0
app.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ from openai import Audio
3
+ import os
4
+ import yt_dlp as youtube_dl
5
+ import guidance
6
+ from moviepy.editor import AudioFileClip
7
+ import streamlit as st
8
+ import os
9
+
10
+ output_dir = "./Chunks"
11
+ output_file = "video_audio.mp3"
12
+ chunk_length = 120 * 15
13
+ transcripts = []
14
+ folder_path = "./Chunks"
15
+ audio_file_path = "./.DownloadedAudio"
16
+ summary_llm = guidance.llms.OpenAI('gpt-3.5-turbo-16k-0613', caching=False)
17
+
18
+ st.set_page_config(
19
+ page_title='YouTube Video to Summary',
20
+ page_icon='๐Ÿพ',
21
+ initial_sidebar_state="auto",
22
+ layout="wide"
23
+ )
24
+
25
+ st.markdown("<h1 style='text-align: center; color: white;'>YouTube Video to Summary</h1>", unsafe_allow_html=True)
26
+
27
+ st.sidebar.subheader("Enter Your API Key ๐Ÿ—๏ธ")
28
+ open_api_key = st.sidebar.text_input(
29
+ "Open API Key",
30
+ value=st.session_state.get('open_api_key', ''),
31
+ help="Get your API key from https://openai.com/",
32
+ type='password'
33
+ )
34
+ #os.environ["OPENAI_API_KEY"] = open_api_key
35
+ st.session_state['open_api_key'] = open_api_key
36
+ #load_dotenv(find_dotenv())
37
+
38
+ #Function to download the audio from a youtube video
39
+ def download_audio(url, output_file):
40
+ ydl_opts = {
41
+ 'format': 'bestaudio/best',
42
+ 'outtmpl': '.DownloadedAudio/' + output_file,
43
+ 'postprocessors': [{
44
+ 'key': 'FFmpegExtractAudio',
45
+ 'preferredcodec': 'mp3',
46
+ 'preferredquality': '192'
47
+ }],
48
+ 'ffmpeg_location': "C:/ffmpeg/bin"
49
+ }
50
+ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
51
+ ydl.download([url])
52
+ print("Download Complete")
53
+
54
+
55
+
56
+ # Function to split the audio file into smaller chunks
57
+ def split_audio_file(audio_file_path, chunk_length, output_dir):
58
+ print("== Splitting audio...")
59
+ audio = AudioFileClip(audio_file_path)
60
+ duration = audio.duration
61
+ chunks = []
62
+
63
+ start_time = 0
64
+ while start_time < duration:
65
+ end_time = min(start_time + chunk_length, duration)
66
+ chunk = audio.subclip(start_time, end_time)
67
+ chunk_file = os.path.join(output_dir, f"chunk_{start_time}-{end_time}.mp3")
68
+ chunk.write_audiofile(chunk_file)
69
+ chunks.append(chunk_file)
70
+ start_time += chunk_length
71
+
72
+ return chunks
73
+
74
+ def transcribe_audio(audio_file_path):
75
+ with open(audio_file_path, "rb") as audio_file:
76
+ transcript = openai.Audio.transcribe("whisper-1", audio_file)
77
+ print(transcript['text'])
78
+ return transcript['text']
79
+
80
+ def transcribe_audio_dir(output_dir):
81
+ global transcripts
82
+ for filename in os.listdir(output_dir):
83
+ if filename.endswith(".mp3"):
84
+ file_path = os.path.join(output_dir, filename)
85
+ transcript = transcribe_audio(file_path)
86
+ summary = generate_summary(transcript)
87
+ transcripts.append(summary)
88
+ os.remove(file_path)
89
+ print("Chunk Transcription and Summarization Complete")
90
+ print(transcripts)
91
+
92
+ #Summarize the transcripts using the LLM and write to a file
93
+
94
+ output_path = os.path.join(os.getcwd(), "Transcripts", "summary.txt")
95
+ with open(output_path, "w", encoding="utf-8") as file:
96
+ for transcript in transcripts:
97
+ file.write(transcript + "\n")
98
+
99
+ return transcripts
100
+
101
+ def generate_summary(text):
102
+ response = openai.ChatCompletion.create(
103
+ model="gpt-3.5-turbo-16k-0613",
104
+ messages=[
105
+ {"role": "system", "content": "You are a world's best Wild Kratts episode summarizer. Condense the transcript text, capturing essential points and core plot points. Include relevant examples, omit excess details, and ensure the summary's length matches the original's complexity."},
106
+ {"role": "user", "content": f"Please summarize the following text:\n{text}\nSummary:"},
107
+ ],
108
+ max_tokens=11000,
109
+ stop=None,
110
+ temperature=0.2,
111
+ )
112
+ summary = response['choices'][0]['message']['content'].strip()
113
+ return summary
114
+
115
+ #download_audio(url, output_file)
116
+
117
+ import gradio as gr
118
+
119
+
120
+ def main():
121
+ source = st.radio("Select audio source", ["YouTube Video", "Audio File"])
122
+ if source == "YouTube Video":
123
+ url = st.text_input(label="Video URL")
124
+ audio_file = None
125
+ else: # Audio File
126
+ audio_file = st.file_uploader("Upload audio file", type=["mp3", "wav"])
127
+ url = None
128
+
129
+ chunk_length = st.number_input(label="Chunk Length (seconds)", value=900, step=1)
130
+
131
+ return url, "default_output", chunk_length, audio_file
132
+
133
+ def download_and_split_video(url, output_file, chunk_length, transcripts):
134
+ download_audio(url, output_file)
135
+ audio_file_path = '.DownloadedAudio/' + f"{output_file}.mp3"
136
+ split_audio_file(audio_file_path, chunk_length, output_dir)
137
+ os.remove(audio_file_path)
138
+ return transcribe_audio_dir(output_dir)
139
+ #return transcribe_audio_dir(transcripts)
140
+
141
+
142
+ if __name__ == "__main__":
143
+ url, output_file, chunk_length, audio_file = main() # get the values here
144
+
145
+ if open_api_key: # Check if API key is provided
146
+ if st.button("Generate Summary"):
147
+ with st.spinner("Summary Generating..."):
148
+ if url: # If YouTube URL is provided
149
+ transcripts = download_and_split_video(url, output_file, chunk_length, transcripts)
150
+ elif audio_file: # If an audio file is uploaded
151
+ audio_file_path = os.path.join('.DownloadedAudio/', output_file)
152
+ with open(audio_file_path, "wb") as f:
153
+ f.write(audio_file.getvalue()) # Write the content of the uploaded file to a new file
154
+ split_audio_file(audio_file_path, chunk_length, output_dir)
155
+ transcripts = transcribe_audio_dir(output_dir)
156
+ os.remove(audio_file_path) # Delete saved audio file
157
+ st.subheader("Summary")
158
+ for transcript in transcripts: # Loop through the transcripts list
159
+ st.write(transcript)
160
+ else: # If API key is not provided
161
+ st.warning("Please Enter OpenAI API Key") # Display warning message