Spaces:

wavesoumen
/

WAVE_AI

Sleeping

App Files Files Community

WAVE_AI / app.py

wavesoumen

Update app.py

21be376 verified 5 months ago

raw

history blame

6.83 kB

	import streamlit as st
	from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
	import nltk
	from youtube_transcript_api import YouTubeTranscriptApi
	import torch
	from textblob import TextBlob
	from nltk.corpus import stopwords
	from nltk.tokenize import word_tokenize

	# Download NLTK data
	nltk.download('punkt')
	nltk.download('averaged_perceptron_tagger')
	nltk.download('stopwords')

	# Load models and tokenizers
	summary_model_name = 'utrobinmv/t5_summary_en_ru_zh_base_2048'
	summary_model = T5ForConditionalGeneration.from_pretrained(summary_model_name)
	summary_tokenizer = T5Tokenizer.from_pretrained(summary_model_name)

	tag_tokenizer = AutoTokenizer.from_pretrained("fabiochiu/t5-base-tag-generation")
	tag_model = AutoModelForSeq2SeqLM.from_pretrained("fabiochiu/t5-base-tag-generation")

	captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")

	# Function to summarize text
	def summarize_text(text, prefix):
	src_text = prefix + text
	input_ids = summary_tokenizer(src_text, return_tensors="pt")
	generated_tokens = summary_model.generate(**input_ids)
	result = summary_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
	return result[0]

	# Function to generate tags
	def generate_tags(text):
	with torch.no_grad():
	inputs = tag_tokenizer(text, max_length=256, truncation=True, return_tensors="pt")
	output = tag_model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64, num_return_sequences=1)
	decoded_output = tag_tokenizer.batch_decode(output, skip_special_tokens=True)[0]
	tags = list(set(decoded_output.strip().split(", ")))
	return tags

	# Function to fetch YouTube transcript
	def fetch_transcript(url):
	video_id = url.split('watch?v=')[-1]
	try:
	transcript = YouTubeTranscriptApi.get_transcript(video_id)
	transcript_text = ' '.join([entry['text'] for entry in transcript])
	return transcript_text
	except Exception as e:
	return str(e)

	# Function to extract keywords and generate hashtags
	def extract_keywords(content):
	text = content.lower()
	sentences = nltk.sent_tokenize(text)
	keywords = []
	for sentence in sentences:
	words = nltk.word_tokenize(sentence)
	tags = nltk.pos_tag(words)
	for word, tag in tags:
	if tag.startswith('NN'):
	keywords.append(word)
	return keywords

	def generate_hashtags(content, max_hashtags=10):
	keywords = extract_keywords(content)
	hashtags = []
	for keyword in keywords:
	hashtag = "#" + keyword
	if len(hashtag) <= 20:
	hashtags.append(hashtag)
	return hashtags[:max_hashtags]

	# Function to extract point of view
	def extract_point_of_view(text):
	stop_words = set(stopwords.words('english'))
	words = word_tokenize(str(text))
	filtered_words = [word for word in words if word.casefold() not in stop_words]
	text = ' '.join(filtered_words)

	blob = TextBlob(text)
	polarity = blob.sentiment.polarity
	subjectivity = blob.sentiment.subjectivity

	if polarity > 0.5:
	point_of_view = "Positive"
	elif polarity < -0.5:
	point_of_view = "Negative"
	else:
	point_of_view = "Neutral"

	return point_of_view

	# Streamlit app title
	st.title("Multi-purpose AI App: WAVE_AI")

	# Create tabs for different functionalities
	tab1, tab2, tab3, tab4, tab5 = st.tabs(["Text Summarization", "Text Tag Generation", "Image Captioning", "YouTube Transcript", "LinkedIn Post Analysis"])

	# Text Summarization Tab
	with tab1:
	st.header("Summarize Title Maker")

	input_text = st.text_area("Enter the text to summarize:", height=300)

	if st.button("Generate the Title"):
	if input_text:
	title1 = summarize_text(input_text, 'summary: ')
	title2 = summarize_text(input_text, 'summary brief: ')
	st.write("### Title 1")
	st.write(title1)
	st.write("### Title 2")
	st.write(title2)
	else:
	st.warning("Please enter some text to summarize.")

	# Text Tag Generation Tab
	with tab2:
	st.header("Tag Generation from Text")

	text = st.text_area("Enter the text for tag extraction:", height=200)

	if st.button("Generate Tags"):
	if text:
	try:
	tags = generate_tags(text)
	st.write("Generated Tags:")
	st.write(tags)
	except Exception as e:
	st.error(f"An error occurred: {e}")
	else:
	st.warning("Please enter some text to generate tags.")

	# Image Captioning Tab
	with tab3:
	st.header("Image Captioning Extractor")

	image_url = st.text_input("Enter the URL of the image:")

	if st.button("Analysis Image"):
	if image_url:
	try:
	st.image(image_url, caption="Provided Image", use_column_width=True)
	caption = captioner(image_url)
	st.write("Generated Caption:")
	st.write(caption[0]['generated_text'])
	except Exception as e:
	st.error(f"An error occurred: {e}")

	# YouTube Transcript Tab
	with tab4:
	st.header("YouTube Video Transcript Extractor")

	youtube_url = st.text_input("Enter YouTube URL:")

	if st.button("Get Transcript"):
	if youtube_url:
	transcript = fetch_transcript(youtube_url)
	if "error" not in transcript.lower():
	st.success("Transcript successfully fetched!")
	st.text_area("Transcript", transcript, height=300)
	else:
	st.error(f"An error occurred: {transcript}")
	else:
	st.warning("Please enter a URL.")

	# LinkedIn Post Analysis Tab
	with tab5:
	st.header("LinkedIn Post Analysis AI")

	text = st.text_area("Enter the LinkedIn Post:")

	if st.button("Analyze:"):
	if text:
	# Generate tags
	tags = generate_tags(text)
	st.subheader("The Most Tracked KeyWords:")
	st.write(tags)

	# Generate summaries
	summary1 = summarize_text(text, 'summary: ')
	summary2 = summarize_text(text, 'summary brief: ')
	st.subheader("Summary Title 1:")
	st.write(summary1)
	st.subheader("Summary Title 2:")
	st.write(summary2)

	# Generate hashtags
	hashtags = generate_hashtags(text)
	st.subheader("Generated Hashtags for the Post")
	st.write(hashtags)

	# Extract point of view
	point_of_view = extract_point_of_view(text)
	st.subheader("Tone of the Post:")
	st.write(point_of_view)
	else:
	st.warning("Please enter text to analyze.")