wavesoumen commited on
Commit
8e377cb
1 Parent(s): a80511b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -58
app.py CHANGED
@@ -1,40 +1,17 @@
1
- import streamlit as st
2
- import requests
3
- from PIL import Image
4
- from transformers import BlipProcessor, BlipForConditionalGeneration, pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
5
  import nltk
6
  from youtube_transcript_api import YouTubeTranscriptApi
7
 
8
  # Download NLTK data
9
  nltk.download('punkt')
10
 
11
- # Initialize the image captioning processor and model
12
- caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
13
- caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
14
 
15
- # Initialize the tokenizer and model for tag generation
16
- tag_tokenizer = AutoTokenizer.from_pretrained("fabiochiu/t5-base-tag-generation")
17
- tag_model = AutoModelForSeq2SeqLM.from_pretrained("fabiochiu/t5-base-tag-generation")
18
-
19
- # Function to generate captions for an image
20
- def generate_caption(img_url, text="a photography of"):
21
- try:
22
- raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
23
- except Exception as e:
24
- st.error(f"Error loading image: {e}")
25
- return None, None
26
-
27
- # Conditional image captioning
28
- inputs_conditional = caption_processor(raw_image, text, return_tensors="pt")
29
- out_conditional = caption_model.generate(**inputs_conditional)
30
- caption_conditional = caption_processor.decode(out_conditional[0], skip_special_tokens=True)
31
-
32
- # Unconditional image captioning
33
- inputs_unconditional = caption_processor(raw_image, return_tensors="pt")
34
- out_unconditional = caption_model.generate(**inputs_unconditional)
35
- caption_unconditional = caption_processor.decode(out_unconditional[0], skip_special_tokens=True)
36
-
37
- return caption_conditional, caption_unconditional
38
 
39
  # Function to fetch YouTube transcript
40
  def fetch_transcript(url):
@@ -47,51 +24,30 @@ def fetch_transcript(url):
47
  return str(e)
48
 
49
  # Streamlit app title
50
- st.title("Multi-purpose Machine Learning App")
51
 
52
  # Create tabs for different functionalities
53
- tab1, tab2, tab3 = st.tabs(["Image Captioning", "Text Tag Generation", "YouTube Transcript"])
54
 
55
  # Image Captioning Tab
56
  with tab1:
57
- st.header("Image Captioning")
58
-
59
- # Input for image URL
60
- img_url = st.text_input("Enter Image URL:")
61
-
62
- # If an image URL is provided
63
- if st.button("Generate Captions", key='caption_button'):
64
- if img_url:
65
- caption_conditional, caption_unconditional = generate_caption(img_url)
66
- if caption_conditional and caption_unconditional:
67
- st.success("Captions successfully generated!")
68
- st.image(img_url, caption="Input Image", use_column_width=True)
69
- st.write("### Conditional Caption")
70
- st.write(caption_conditional)
71
- st.write("### Unconditional Caption")
72
- st.write(caption_unconditional)
73
- else:
74
- st.warning("Please enter an image URL.")
75
-
76
- # Text Tag Generation Tab
77
- with tab2:
78
  st.header("Text Tag Generation")
79
 
80
  # Text area for user input
81
  text = st.text_area("Enter the text for tag extraction:", height=200)
82
 
83
  # Button to generate tags
84
- if st.button("Generate Tags", key='tag_button'):
85
  if text:
86
  try:
87
  # Tokenize and encode the input text
88
- inputs = tag_tokenizer([text], max_length=512, truncation=True, return_tensors="pt")
89
 
90
  # Generate tags
91
- output = tag_model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64)
92
 
93
  # Decode the output
94
- decoded_output = tag_tokenizer.batch_decode(output, skip_special_tokens=True)[0]
95
 
96
  # Extract unique tags
97
  tags = list(set(decoded_output.strip().split(", ")))
@@ -104,6 +60,28 @@ with tab2:
104
  else:
105
  st.warning("Please enter some text to generate tags.")
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  # YouTube Transcript Tab
108
  with tab3:
109
  st.header("YouTube Video Transcript Extractor")
@@ -112,7 +90,7 @@ with tab3:
112
  youtube_url = st.text_input("Enter YouTube URL:")
113
 
114
  # Button to get transcript
115
- if st.button("Get Transcript", key='transcript_button'):
116
  if youtube_url:
117
  transcript = fetch_transcript(youtube_url)
118
  if "error" not in transcript.lower():
 
1
+ im-port streamlit as st
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 
 
3
  import nltk
4
  from youtube_transcript_api import YouTubeTranscriptApi
5
 
6
  # Download NLTK data
7
  nltk.download('punkt')
8
 
9
+ # Initialize the image captioning pipeline
10
+ captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
 
11
 
12
+ # Load the tokenizer and model for tag generation
13
+ tokenizer = AutoTokenizer.from_pretrained("fabiochiu/t5-base-tag-generation")
14
+ model = AutoModelForSeq2SeqLM.from_pretrained("fabiochiu/t5-base-tag-generation")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # Function to fetch YouTube transcript
17
  def fetch_transcript(url):
 
24
  return str(e)
25
 
26
  # Streamlit app title
27
+ st.title("Multi-purpose Machine Learning App: WAVE_AI")
28
 
29
  # Create tabs for different functionalities
30
+ tab1, tab2, tab3 = st.tabs(["Text Tag Generation", "Image Captioning", "YouTube Transcript"])
31
 
32
  # Image Captioning Tab
33
  with tab1:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  st.header("Text Tag Generation")
35
 
36
  # Text area for user input
37
  text = st.text_area("Enter the text for tag extraction:", height=200)
38
 
39
  # Button to generate tags
40
+ if st.button("Generate Tags"):
41
  if text:
42
  try:
43
  # Tokenize and encode the input text
44
+ inputs = tokenizer([text], max_length=512, truncation=True, return_tensors="pt")
45
 
46
  # Generate tags
47
+ output = model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64)
48
 
49
  # Decode the output
50
+ decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
51
 
52
  # Extract unique tags
53
  tags = list(set(decoded_output.strip().split(", ")))
 
60
  else:
61
  st.warning("Please enter some text to generate tags.")
62
 
63
+ # Text Tag Generation Tab
64
+ with tab2:
65
+ st.header("Image Captioning Extractor")
66
+
67
+ # Input for image URL
68
+ image_url = st.text_input("Enter the URL of the image:")
69
+
70
+ # If an image URL is provided
71
+ if image_url:
72
+ try:
73
+ # Display the image
74
+ st.image(image_url, caption="Provided Image", use_column_width=True)
75
+
76
+ # Generate the caption
77
+ caption = captioner(image_url)
78
+
79
+ # Display the caption
80
+ st.write("**Generated Caption:**")
81
+ st.write(caption[0]['generated_text'])
82
+ except Exception as e:
83
+ st.error(f"An error occurred: {e}")
84
+
85
  # YouTube Transcript Tab
86
  with tab3:
87
  st.header("YouTube Video Transcript Extractor")
 
90
  youtube_url = st.text_input("Enter YouTube URL:")
91
 
92
  # Button to get transcript
93
+ if st.button("Get Transcript"):
94
  if youtube_url:
95
  transcript = fetch_transcript(youtube_url)
96
  if "error" not in transcript.lower():