Spaces:
Sleeping
Sleeping
File size: 6,918 Bytes
5fe4ba5 d6764d1 412e4aa 7c7cb02 21be376 412e4aa 21be376 412e4aa d6764d1 8e377cb 412e4aa d6764d1 412e4aa 21be376 7c7cb02 21be376 412e4aa 21be376 412e4aa 21be376 412e4aa d6764d1 412e4aa 21be376 d6764d1 21be376 d6764d1 21be376 412e4aa 8e377cb 412e4aa 21be376 412e4aa d6764d1 8e377cb 21be376 ee2e9d3 8e377cb 7c7cb02 d6764d1 7c7cb02 8e377cb 7c7cb02 21be376 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
import streamlit as st
from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import nltk
from youtube_transcript_api import YouTubeTranscriptApi
import torch
from textblob import TextBlob
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
# Download NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')
# Load models and tokenizers
summary_model_name = 'utrobinmv/t5_summary_en_ru_zh_base_2048'
summary_model = T5ForConditionalGeneration.from_pretrained(summary_model_name)
summary_tokenizer = T5Tokenizer.from_pretrained(summary_model_name)
tag_tokenizer = AutoTokenizer.from_pretrained("fabiochiu/t5-base-tag-generation")
tag_model = AutoModelForSeq2SeqLM.from_pretrained("fabiochiu/t5-base-tag-generation")
captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
# Function to summarize text
def summarize_text(text, prefix):
src_text = prefix + text
input_ids = summary_tokenizer(src_text, return_tensors="pt")
generated_tokens = summary_model.generate(**input_ids)
result = summary_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
return result[0]
# Function to generate tags
def generate_tags(text):
with torch.no_grad():
inputs = tag_tokenizer(text, max_length=256, truncation=True, return_tensors="pt")
output = tag_model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64, num_return_sequences=1)
decoded_output = tag_tokenizer.batch_decode(output, skip_special_tokens=True)[0]
tags = list(set(decoded_output.strip().split(", ")))
return tags
# Function to fetch YouTube transcript
def fetch_transcript(url):
video_id = url.split('watch?v=')[-1]
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
transcript_text = ' '.join([entry['text'] for entry in transcript])
return transcript_text
except Exception as e:
return str(e)
# Function to extract keywords and generate hashtags
def extract_keywords(content):
text = content.lower()
sentences = nltk.sent_tokenize(text)
keywords = []
for sentence in sentences:
words = nltk.word_tokenize(sentence)
tags = nltk.pos_tag(words)
for word, tag in tags:
if tag.startswith('NN'):
keywords.append(word)
return keywords
def generate_hashtags(content, max_hashtags=10):
keywords = extract_keywords(content)
hashtags = []
for keyword in keywords:
hashtag = "#" + keyword
if len(hashtag) <= 20:
hashtags.append(hashtag)
return hashtags[:max_hashtags]
# Function to extract point of view
def extract_point_of_view(text):
stop_words = set(stopwords.words('english'))
words = word_tokenize(str(text))
filtered_words = [word for word in words if word.casefold() not in stop_words]
text = ' '.join(filtered_words)
blob = TextBlob(text)
polarity = blob.sentiment.polarity
subjectivity = blob.sentiment.subjectivity
if polarity > 0.5:
point_of_view = "Positive"
elif polarity < -0.5:
point_of_view = "Negative"
else:
point_of_view = "Neutral"
return point_of_view
# Streamlit app title
st.title("Multi-purpose AI App: WAVE_AI")
# Create tabs for different functionalities
tab1, tab2, tab3, tab4, tab5 = st.tabs(["Text Summarization", "Text Tag Generation", "Image Captioning", "YouTube Transcript", "LinkedIn Post Analysis"])
# Text Summarization Tab
with tab1:
st.header("Summarize Title Maker")
input_text = st.text_area("Enter the text to summarize:", height=300)
if st.button("Generate the Title"):
if input_text:
title1 = summarize_text(input_text, 'summary: ')
title2 = summarize_text(input_text, 'summary brief: ')
st.write("### Title 1")
st.write(title1)
st.write("### Title 2")
st.write(title2)
else:
st.warning("Please enter some text to summarize.")
# Text Tag Generation Tab
with tab2:
st.header("Tag Generation from Text")
text = st.text_area("Enter the text for tag extraction:", height=200)
if st.button("Generate Tags"):
if text:
try:
tags = generate_tags(text)
st.write("**Generated Tags:**")
st.write(tags)
except Exception as e:
st.error(f"An error occurred: {e}")
else:
st.warning("Please enter some text to generate tags.")
# Image Captioning Tab
with tab3:
st.header("Image Captioning Extractor")
image_url = st.text_input("Enter the URL of the image:")
if st.button("Analysis Image"):
if image_url:
try:
st.image(image_url, caption="Provided Image", use_column_width=True)
caption = captioner(image_url)
st.write("**Generated Caption:**")
st.write(caption[0]['generated_text'])
except Exception as e:
st.error(f"An error occurred: {e}")
else:
st.warning("Please give a image url.")
# YouTube Transcript Tab
with tab4:
st.header("YouTube Video Transcript Extractor")
youtube_url = st.text_input("Enter YouTube URL:")
if st.button("Get Transcript"):
if youtube_url:
transcript = fetch_transcript(youtube_url)
if "error" not in transcript.lower():
st.success("Transcript successfully fetched!")
st.text_area("Transcript", transcript, height=300)
else:
st.error(f"An error occurred: {transcript}")
else:
st.warning("Please enter a URL.")
# LinkedIn Post Analysis Tab
with tab5:
st.header("LinkedIn Post Analysis AI")
text = st.text_area("Enter the LinkedIn Post:")
if st.button("Analyze:"):
if text:
# Generate tags
tags = generate_tags(text)
st.subheader("The Most Tracked KeyWords:")
st.write(tags)
# Generate summaries
summary1 = summarize_text(text, 'summary: ')
summary2 = summarize_text(text, 'summary brief: ')
st.subheader("Summary Title 1:")
st.write(summary1)
st.subheader("Summary Title 2:")
st.write(summary2)
# Generate hashtags
hashtags = generate_hashtags(text)
st.subheader("Generated Hashtags for the Post")
st.write(hashtags)
# Extract point of view
point_of_view = extract_point_of_view(text)
st.subheader("Tone of the Post:")
st.write(point_of_view)
else:
st.warning("Please enter text to analyze.")
|