tool-YoutubeTranscript-blog

Running

App Files Files Community

tool-YoutubeTranscript-blog / tool.py

VPCSinfo

[ADD] added fixes for tool model.

74d45f5 17 days ago

raw

history blame

3.81 kB

	from smolagents.tools import Tool
	from typing import Optional
	import os
	from transformers import pipeline
	import requests
	import io
	from PIL import Image
	from dotenv import load_dotenv

	load_dotenv()

	class TranscriptSummarizer(Tool):
	description = "Summarizes a transcript and generates blog content using the transformers library and Hugging Face API for image generation."
	name = "transcript_summarizer"
	inputs = {'transcript': {'type': 'string', 'description': 'The transcript to summarize.'}}
	output_type = "string"

	def __init__(self, args, *kwargs):
	super().__init__(args, *kwargs)
	self.summarizer = pipeline("summarization", model="google/pegasus-xsum")
	self.api_url = "https://api-inference.huggingface.co/models/ZB-Tech/Text-to-Image"
	self.headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"}

	def query(self, payload):
	response = requests.post(self.api_url, headers=self.headers, json=payload)
	return response.content

	def forward(self, transcript: str) -> str:
	try:
	transcript_length = len(transcript)

	def get_summary_lengths(length):
	if length <= 1000:
	max_length = 300
	min_length = 100
	elif length <= 3000:
	max_length = 750
	min_length = 250
	else:
	max_length = 1500
	min_length = 500
	return max_length, min_length

	max_length, min_length = get_summary_lengths(transcript_length)
	summary = self.summarizer(transcript, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text']
	key_entities = summary.split()[:3] # Extract first 3 words as key entities
	image_prompt = f"Generate an image related to: {' '.join(key_entities)}, cartoon style"
	image_bytes = self.query({"inputs": image_prompt})
	image = Image.open(io.BytesIO(image_bytes))
	image_folder = "Image"
	if not os.path.exists(image_folder):
	os.makedirs(image_folder)
	image_url = os.path.join(image_folder, "image.jpg") # Specify the folder path
	image.save(image_url) # Save the image to a file
	return f"{summary}\n\nImage URL: {image_url}" # Return the file path
	except Exception as e:
	return f"An unexpected error occurred: {str(e)}"

	class YouTubeTranscriptExtractor(Tool):
	description = "Extracts the transcript from a YouTube video."
	name = "youtube_transcript_extractor"
	inputs = {'video_url': {'type': 'string', 'description': 'The URL of the YouTube video.'}}
	output_type = "string"

	def forward(self, video_url: str) -> str:
	try:
	from pytubefix import YouTube
	# Create a YouTube object
	yt = YouTube(video_url)
	lang='en'
	# Get the video transcript
	if lang in yt.captions:
	transcript = yt.captions['en'].generate_srt_captions()
	else:
	transcript = yt.captions.all()[0].generate_srt_captions()
	lang = yt.captions.all()[0].code

	# Clean up the transcript by removing timestamps and line numbers
	cleaned_transcript = ""
	for line in transcript.splitlines():
	if not line.strip().isdigit() and "-->" not in line:
	cleaned_transcript += line + "\n"

	print("transcript : ", cleaned_transcript)
	return cleaned_transcript
	except Exception as e:
	return f"An unexpected error occurred: {str(e)}"

	def __init__(self, args, *kwargs):
	self.is_initialized = False