ContentAgent

Running

App Files Files Community

ContentAgent / tools /transcribe_youtube

yetessam

New transcribe object

b321e9e verified 14 days ago

raw

history blame contribute delete

2.26 kB

	from typing import Any, Optional
	from smolagents.tools import Tool
	from pytube import youtube
	import whisper
	import io


	class TranscribeYouTubeTool(Tool):
	name = "transcribe_youtube"
	description = "Returns a youtube transcript."
	inputs = {'query': {'type': 'string', 'description': 'A YouTube URL.'}}
	output_type = "string"

	def __init__(self, max_results=10, **kwargs):
	super().__init__()
	self.max_results = max_results
	try:
	from pytub import YouTube
	except ImportError as e:
	raise ImportError(
	"You must install package `pytube` to run this tool: for instance run `pip install pytube`."
	) from e

	self.yt = YouTube(**kwargs)
	self.audio_buff = get_youtube_audio(yt = yt()

	def forward(self, query: str) -> str:
	results = self.ddgs.text(query, max_results=self.max_results)
	if len(results) == 0:
	raise Exception("No results found! Try a less restrictive/shorter query.")
	postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results]
	return "## Search Results\n\n" + "\n\n".join(postprocessed_results)


	def get_audio():
	try:

	audio_stream = self.yt.streams.filter(only_audio=True).first()

	# Use a BytesIO buffer to store the audio in memory
	audio_buffer = io.BytesIO()
	audio_stream.stream_to_buffer(audio_buffer)
	audio_buffer.seek(0) # Reset buffer position to the beginning
	return audio_buffer

	except Exception as e:
	return f"An error occurred: {str(e)}"

	def get_text():
	try:

	# Step 2: Load Whisper model
	model = whisper.load_model("base") # Use "small", "medium", or "large" for better accuracy

	# Step 3: Transcribe audio from memory
	result = model.transcribe(self.audio_buffer)
	return result["text"]
	except Exception as e:
	return f"An error occurred: {str(e)}"


	# Example usage
	#youtube_url = "https://www.youtube.com/watch?v=example"
	#lyrics = transcribe_youtube_audio(youtube_url)
	#print("Lyrics:", lyrics)