yetessam commited on
Commit
b82c290
·
verified ·
1 Parent(s): 3cdc010

Adding youtube audio tool

Browse files
Files changed (1) hide show
  1. tools/transcribe_youtube_audio +29 -0
tools/transcribe_youtube_audio ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pytube import YouTube
2
+ import whisper
3
+ import io
4
+
5
+
6
+ def transcribe_youtube_audio(youtube_url: str) -> str:
7
+ try:
8
+ # Step 1: Download audio from YouTube
9
+ yt = YouTube(youtube_url)
10
+ audio_stream = yt.streams.filter(only_audio=True).first()
11
+
12
+ # Use a BytesIO buffer to store the audio in memory
13
+ audio_buffer = io.BytesIO()
14
+ audio_stream.stream_to_buffer(audio_buffer)
15
+ audio_buffer.seek(0) # Reset buffer position to the beginning
16
+
17
+ # Step 2: Load Whisper model
18
+ model = whisper.load_model("base") # Use "small", "medium", or "large" for better accuracy
19
+
20
+ # Step 3: Transcribe audio from memory
21
+ result = model.transcribe(audio_buffer)
22
+ return result["text"]
23
+ except Exception as e:
24
+ return f"An error occurred: {str(e)}"
25
+
26
+ # Example usage
27
+ youtube_url = "https://www.youtube.com/watch?v=example"
28
+ lyrics = transcribe_youtube_audio(youtube_url)
29
+ print("Lyrics:", lyrics)