Spaces:
Running
Running
from typing import Any, Optional | |
from smolagents.tools import Tool | |
from pytube import youtube | |
import whisper | |
import io | |
class TranscribeYouTubeTool(Tool): | |
name = "transcribe_youtube" | |
description = "Returns a youtube transcript." | |
inputs = {'query': {'type': 'string', 'description': 'A YouTube URL.'}} | |
output_type = "string" | |
def __init__(self, max_results=10, **kwargs): | |
super().__init__() | |
self.max_results = max_results | |
try: | |
from pytub import YouTube | |
except ImportError as e: | |
raise ImportError( | |
"You must install package `pytube` to run this tool: for instance run `pip install pytube`." | |
) from e | |
self.yt = YouTube(**kwargs) | |
self.audio_buff = get_youtube_audio(yt = yt() | |
def forward(self, query: str) -> str: | |
results = self.ddgs.text(query, max_results=self.max_results) | |
if len(results) == 0: | |
raise Exception("No results found! Try a less restrictive/shorter query.") | |
postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results] | |
return "## Search Results\n\n" + "\n\n".join(postprocessed_results) | |
def get_audio(): | |
try: | |
audio_stream = self.yt.streams.filter(only_audio=True).first() | |
# Use a BytesIO buffer to store the audio in memory | |
audio_buffer = io.BytesIO() | |
audio_stream.stream_to_buffer(audio_buffer) | |
audio_buffer.seek(0) # Reset buffer position to the beginning | |
return audio_buffer | |
except Exception as e: | |
return f"An error occurred: {str(e)}" | |
def get_text(): | |
try: | |
# Step 2: Load Whisper model | |
model = whisper.load_model("base") # Use "small", "medium", or "large" for better accuracy | |
# Step 3: Transcribe audio from memory | |
result = model.transcribe(self.audio_buffer) | |
return result["text"] | |
except Exception as e: | |
return f"An error occurred: {str(e)}" | |
# Example usage | |
#youtube_url = "https://www.youtube.com/watch?v=example" | |
#lyrics = transcribe_youtube_audio(youtube_url) | |
#print("Lyrics:", lyrics) |