import streamlit as st from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline, AutoTokenizer, AutoModelForSeq2SeqLM import nltk from youtube_transcript_api import YouTubeTranscriptApi import torch from textblob import TextBlob from nltk.corpus import stopwords from nltk.tokenize import word_tokenize # Download NLTK data nltk.download('punkt') nltk.download('averaged_perceptron_tagger') nltk.download('stopwords') # Load models and tokenizers summary_model_name = 'utrobinmv/t5_summary_en_ru_zh_base_2048' summary_model = T5ForConditionalGeneration.from_pretrained(summary_model_name) summary_tokenizer = T5Tokenizer.from_pretrained(summary_model_name) tag_tokenizer = AutoTokenizer.from_pretrained("fabiochiu/t5-base-tag-generation") tag_model = AutoModelForSeq2SeqLM.from_pretrained("fabiochiu/t5-base-tag-generation") captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") # Function to summarize text def summarize_text(text, prefix): src_text = prefix + text input_ids = summary_tokenizer(src_text, return_tensors="pt") generated_tokens = summary_model.generate(**input_ids) result = summary_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) return result[0] # Function to generate tags def generate_tags(text): with torch.no_grad(): inputs = tag_tokenizer(text, max_length=256, truncation=True, return_tensors="pt") output = tag_model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64, num_return_sequences=1) decoded_output = tag_tokenizer.batch_decode(output, skip_special_tokens=True)[0] tags = list(set(decoded_output.strip().split(", "))) return tags # Function to fetch YouTube transcript def fetch_transcript(url): video_id = url.split('watch?v=')[-1] try: transcript = YouTubeTranscriptApi.get_transcript(video_id) transcript_text = ' '.join([entry['text'] for entry in transcript]) return transcript_text except Exception as e: return str(e) # Function to extract keywords and generate hashtags def extract_keywords(content): text = content.lower() sentences = nltk.sent_tokenize(text) keywords = [] for sentence in sentences: words = nltk.word_tokenize(sentence) tags = nltk.pos_tag(words) for word, tag in tags: if tag.startswith('NN'): keywords.append(word) return keywords def generate_hashtags(content, max_hashtags=10): keywords = extract_keywords(content) hashtags = [] for keyword in keywords: hashtag = "#" + keyword if len(hashtag) <= 20: hashtags.append(hashtag) return hashtags[:max_hashtags] # Function to extract point of view def extract_point_of_view(text): stop_words = set(stopwords.words('english')) words = word_tokenize(str(text)) filtered_words = [word for word in words if word.casefold() not in stop_words] text = ' '.join(filtered_words) blob = TextBlob(text) polarity = blob.sentiment.polarity subjectivity = blob.sentiment.subjectivity if polarity > 0.5: point_of_view = "Positive" elif polarity < -0.5: point_of_view = "Negative" else: point_of_view = "Neutral" return point_of_view # Streamlit app title st.title("Multi-purpose AI App: WAVE_AI") # Create tabs for different functionalities tab1, tab2, tab3, tab4, tab5 = st.tabs(["Text Summarization", "Text Tag Generation", "Image Captioning", "YouTube Transcript", "LinkedIn Post Analysis"]) # Text Summarization Tab with tab1: st.header("Summarize Title Maker") input_text = st.text_area("Enter the text to summarize:", height=300) if st.button("Generate the Title"): if input_text: title1 = summarize_text(input_text, 'summary: ') title2 = summarize_text(input_text, 'summary brief: ') st.write("### Title 1") st.write(title1) st.write("### Title 2") st.write(title2) else: st.warning("Please enter some text to summarize.") # Text Tag Generation Tab with tab2: st.header("Tag Generation from Text") text = st.text_area("Enter the text for tag extraction:", height=200) if st.button("Generate Tags"): if text: try: tags = generate_tags(text) st.write("**Generated Tags:**") st.write(tags) except Exception as e: st.error(f"An error occurred: {e}") else: st.warning("Please enter some text to generate tags.") # Image Captioning Tab with tab3: st.header("Image Captioning Extractor") image_url = st.text_input("Enter the URL of the image:") if st.button("Analysis Image"): if image_url: try: st.image(image_url, caption="Provided Image", use_column_width=True) caption = captioner(image_url) st.write("**Generated Caption:**") st.write(caption[0]['generated_text']) except Exception as e: st.error(f"An error occurred: {e}") # YouTube Transcript Tab with tab4: st.header("YouTube Video Transcript Extractor") youtube_url = st.text_input("Enter YouTube URL:") if st.button("Get Transcript"): if youtube_url: transcript = fetch_transcript(youtube_url) if "error" not in transcript.lower(): st.success("Transcript successfully fetched!") st.text_area("Transcript", transcript, height=300) else: st.error(f"An error occurred: {transcript}") else: st.warning("Please enter a URL.") # LinkedIn Post Analysis Tab with tab5: st.header("LinkedIn Post Analysis AI") text = st.text_area("Enter the LinkedIn Post:") if st.button("Analyze:"): if text: # Generate tags tags = generate_tags(text) st.subheader("The Most Tracked KeyWords:") st.write(tags) # Generate summaries summary1 = summarize_text(text, 'summary: ') summary2 = summarize_text(text, 'summary brief: ') st.subheader("Summary Title 1:") st.write(summary1) st.subheader("Summary Title 2:") st.write(summary2) # Generate hashtags hashtags = generate_hashtags(text) st.subheader("Generated Hashtags for the Post") st.write(hashtags) # Extract point of view point_of_view = extract_point_of_view(text) st.subheader("Tone of the Post:") st.write(point_of_view) else: st.warning("Please enter text to analyze.")