Spaces:
Sleeping
Sleeping
WildKratts
commited on
Commit
โข
c5a69a8
1
Parent(s):
45cecd4
Initial commit2
Browse files- Transcripts/summary.txt +1 -0
- YTSumv2.py +160 -0
- requirements.txt +6 -0
Transcripts/summary.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
The Kratt Brothers are zoologists who have dedicated their career to educating children about animals through successful TV shows. They have also founded the Kratt Brothers Creature Hero Foundation, which aims to establish wildlife refuges for endangered creatures. Dr. Jane Goodall is a global activist and founder of the Jane Goodall Institute and Roots & Shoots Youth Programme. She has spent six decades inspiring hope and taking action for the natural world.
|
YTSumv2.py
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
from openai import Audio
|
3 |
+
import os
|
4 |
+
import yt_dlp as youtube_dl
|
5 |
+
import guidance
|
6 |
+
from moviepy.editor import AudioFileClip
|
7 |
+
import streamlit as st
|
8 |
+
import os
|
9 |
+
|
10 |
+
output_dir = "C:/Users/Henry/Documents/GitHub/YTTranscriber/Chunks"
|
11 |
+
output_file = "video_audio.mp3"
|
12 |
+
chunk_length = 120 * 15
|
13 |
+
transcripts = []
|
14 |
+
folder_path = "C:/Users/Henry/Documents/GitHub/YTTranscriber/Chunks"
|
15 |
+
audio_file_path = "C:/Users/Henry/Documents/GitHub//YTTranscriber/.DownloadedAudio"
|
16 |
+
summary_llm = guidance.llms.OpenAI('gpt-3.5-turbo-16k-0613', caching=False)
|
17 |
+
|
18 |
+
st.set_page_config(
|
19 |
+
page_title='YouTube Video to Summary',
|
20 |
+
page_icon='๐พ',
|
21 |
+
initial_sidebar_state="auto",
|
22 |
+
layout="wide"
|
23 |
+
)
|
24 |
+
|
25 |
+
st.markdown("<h1 style='text-align: center; color: white;'>YouTube Video to Summary</h1>", unsafe_allow_html=True)
|
26 |
+
|
27 |
+
st.sidebar.subheader("Enter Your API Key ๐๏ธ")
|
28 |
+
open_api_key = st.sidebar.text_input(
|
29 |
+
"Open API Key",
|
30 |
+
value=st.session_state.get('open_api_key', ''),
|
31 |
+
help="Get your API key from https://openai.com/",
|
32 |
+
type='password'
|
33 |
+
)
|
34 |
+
#os.environ["OPENAI_API_KEY"] = open_api_key
|
35 |
+
st.session_state['open_api_key'] = open_api_key
|
36 |
+
#load_dotenv(find_dotenv())
|
37 |
+
|
38 |
+
#Function to download the audio from a youtube video
|
39 |
+
def download_audio(url, output_file):
|
40 |
+
ydl_opts = {
|
41 |
+
'format': 'bestaudio/best',
|
42 |
+
'outtmpl': '.DownloadedAudio/' + output_file,
|
43 |
+
'postprocessors': [{
|
44 |
+
'key': 'FFmpegExtractAudio',
|
45 |
+
'preferredcodec': 'mp3',
|
46 |
+
'preferredquality': '192'
|
47 |
+
}],
|
48 |
+
'ffmpeg_location': "C:/ffmpeg/bin"
|
49 |
+
}
|
50 |
+
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
51 |
+
ydl.download([url])
|
52 |
+
print("Download Complete")
|
53 |
+
|
54 |
+
|
55 |
+
|
56 |
+
# Function to split the audio file into smaller chunks
|
57 |
+
def split_audio_file(audio_file_path, chunk_length, output_dir):
|
58 |
+
print("== Splitting audio...")
|
59 |
+
audio = AudioFileClip(audio_file_path)
|
60 |
+
duration = audio.duration
|
61 |
+
chunks = []
|
62 |
+
|
63 |
+
start_time = 0
|
64 |
+
while start_time < duration:
|
65 |
+
end_time = min(start_time + chunk_length, duration)
|
66 |
+
chunk = audio.subclip(start_time, end_time)
|
67 |
+
chunk_file = os.path.join(output_dir, f"chunk_{start_time}-{end_time}.mp3")
|
68 |
+
chunk.write_audiofile(chunk_file)
|
69 |
+
chunks.append(chunk_file)
|
70 |
+
start_time += chunk_length
|
71 |
+
|
72 |
+
return chunks
|
73 |
+
|
74 |
+
def transcribe_audio(audio_file_path):
|
75 |
+
with open(audio_file_path, "rb") as audio_file:
|
76 |
+
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
77 |
+
print(transcript['text'])
|
78 |
+
return transcript['text']
|
79 |
+
|
80 |
+
def transcribe_audio_dir(output_dir):
|
81 |
+
global transcripts
|
82 |
+
for filename in os.listdir(output_dir):
|
83 |
+
if filename.endswith(".mp3"):
|
84 |
+
file_path = os.path.join(output_dir, filename)
|
85 |
+
transcript = transcribe_audio(file_path)
|
86 |
+
summary = generate_summary(transcript)
|
87 |
+
transcripts.append(summary)
|
88 |
+
os.remove(file_path)
|
89 |
+
print("Chunk Transcription and Summarization Complete")
|
90 |
+
print(transcripts)
|
91 |
+
|
92 |
+
#Summarize the transcripts using the LLM and write to a file
|
93 |
+
|
94 |
+
output_path = os.path.join(os.getcwd(), "Transcripts", "summary.txt")
|
95 |
+
with open(output_path, "w", encoding="utf-8") as file:
|
96 |
+
for transcript in transcripts:
|
97 |
+
file.write(transcript + "\n")
|
98 |
+
|
99 |
+
return transcripts
|
100 |
+
|
101 |
+
def generate_summary(text):
|
102 |
+
response = openai.ChatCompletion.create(
|
103 |
+
model="gpt-3.5-turbo-16k-0613",
|
104 |
+
messages=[
|
105 |
+
{"role": "system", "content": "You are a world bestsummarizer. Condense the transcript text, capturing essential points and core points. Include relevant examples, omit excess details, and ensure the summary's length matches the original's complexity."},
|
106 |
+
{"role": "user", "content": f"Please summarize the following text:\n{text}\nSummary:"},
|
107 |
+
],
|
108 |
+
max_tokens=11000,
|
109 |
+
stop=None,
|
110 |
+
temperature=0.2,
|
111 |
+
)
|
112 |
+
summary = response['choices'][0]['message']['content'].strip()
|
113 |
+
return summary
|
114 |
+
|
115 |
+
#download_audio(url, output_file)
|
116 |
+
|
117 |
+
import gradio as gr
|
118 |
+
|
119 |
+
|
120 |
+
def main():
|
121 |
+
source = st.radio("Select audio source", ["YouTube Video", "Audio File"])
|
122 |
+
if source == "YouTube Video":
|
123 |
+
url = st.text_input(label="Video URL")
|
124 |
+
audio_file = None
|
125 |
+
else: # Audio File
|
126 |
+
audio_file = st.file_uploader("Upload audio file", type=["mp3", "wav"])
|
127 |
+
url = None
|
128 |
+
|
129 |
+
chunk_length = st.number_input(label="Chunk Length (seconds)", value=900, step=1)
|
130 |
+
|
131 |
+
return url, "default_output", chunk_length, audio_file
|
132 |
+
|
133 |
+
def download_and_split_video(url, output_file, chunk_length, transcripts):
|
134 |
+
download_audio(url, output_file)
|
135 |
+
audio_file_path = '.DownloadedAudio/' + f"{output_file}.mp3"
|
136 |
+
split_audio_file(audio_file_path, chunk_length, output_dir)
|
137 |
+
os.remove(audio_file_path)
|
138 |
+
return transcribe_audio_dir(output_dir)
|
139 |
+
#return transcribe_audio_dir(transcripts)
|
140 |
+
|
141 |
+
|
142 |
+
if __name__ == "__main__":
|
143 |
+
url, output_file, chunk_length, audio_file = main() # get the values here
|
144 |
+
|
145 |
+
if open_api_key: # Check if API key is provided
|
146 |
+
if st.button("Generate Summary"):
|
147 |
+
with st.spinner("Summary Generating..."):
|
148 |
+
if url: # If YouTube URL is provided
|
149 |
+
transcripts = download_and_split_video(url, output_file, chunk_length, transcripts)
|
150 |
+
elif audio_file: # If an audio file is uploaded
|
151 |
+
audio_file_path = os.path.join('.DownloadedAudio/', output_file)
|
152 |
+
with open(audio_file_path, "wb") as f:
|
153 |
+
f.write(audio_file.getvalue()) # Write the content of the uploaded file to a new file
|
154 |
+
split_audio_file(audio_file_path, chunk_length, output_dir)
|
155 |
+
transcripts = transcribe_audio_dir(output_dir)
|
156 |
+
os.remove(audio_file_path) # Delete saved audio file
|
157 |
+
st.subheader("Summary")
|
158 |
+
st.write(transcripts[0])
|
159 |
+
else: # If API key is not provided
|
160 |
+
st.warning("Please Enter OpenAI API Key") # Display warning message
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
openai
|
2 |
+
yt-dlp
|
3 |
+
moviepy
|
4 |
+
streamlit
|
5 |
+
gradio
|
6 |
+
guidance
|