kusumakar commited on
Commit
d90da65
·
1 Parent(s): 867889f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -13
app.py CHANGED
@@ -2,8 +2,7 @@ import torch
2
  import numpy as np
3
  from PIL import Image
4
  import streamlit as st
5
- from transformers import GPT2Tokenizer, GPT2LMHeadModel
6
- from transformers import AutoTokenizer, VisionEncoderDecoderModel, ViTFeatureExtractor
7
 
8
  # Load the Model,feature extractor and tokenizer
9
  model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
@@ -19,19 +18,19 @@ def generate_captions(image):
19
 
20
  # Load the pre-trained model and tokenizer
21
  model_name = "gpt2"
22
- tokenizer = GPT2Tokenizer.from_pretrained(model_name)
23
- model = GPT2LMHeadModel.from_pretrained(model_name)
24
 
25
  # Define the Streamlit app
26
  def generate_paragraph(prompt):
27
  # Tokenize the prompt
28
- input_ids = tokenizer.encode(prompt, return_tensors="pt")
29
 
30
  # Generate the paragraph
31
- output = model.generate(input_ids, max_length=200, num_return_sequences=1, early_stopping=True)
32
 
33
  # Decode the generated output into text
34
- paragraph = tokenizer.decode(output[0], skip_special_tokens=True)
35
  return paragraph
36
 
37
  # Streamlit app
@@ -47,14 +46,9 @@ def main():
47
  if uploaded_file is not None:
48
  # load the image
49
  image = Image.open(uploaded_file).convert("RGB")
50
- image = image.resize((224, 224))
51
- image_array = np.array(image)
52
- normalized_image = image_array / 255.0
53
- reshaped_image = normalized_image.reshape((1, 224, 224, 3))
54
- image_tensor = torch.from_numpy(normalized_image).permute(2, 0, 1).unsqueeze(0).float()
55
 
56
  # context as prompt
57
- prompt = generate_captions(image_tensor)
58
  st.write("The Context is:", prompt)
59
 
60
  # display the image
 
2
  import numpy as np
3
  from PIL import Image
4
  import streamlit as st
5
+ from transformers import AutoTokenizer, VisionEncoderDecoderModel, ViTFeatureExtractor, GPT2Tokenizer, GPT2LMHeadModel
 
6
 
7
  # Load the Model,feature extractor and tokenizer
8
  model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
 
18
 
19
  # Load the pre-trained model and tokenizer
20
  model_name = "gpt2"
21
+ tokenizer_2 = GPT2Tokenizer.from_pretrained(model_name)
22
+ model_2 = GPT2LMHeadModel.from_pretrained(model_name)
23
 
24
  # Define the Streamlit app
25
  def generate_paragraph(prompt):
26
  # Tokenize the prompt
27
+ input_ids = tokenizer_2.encode(prompt, return_tensors="pt")
28
 
29
  # Generate the paragraph
30
+ output = model_2.generate(input_ids, max_length=200, num_return_sequences=1, early_stopping=True)
31
 
32
  # Decode the generated output into text
33
+ paragraph = tokenizer_2.decode(output[0], skip_special_tokens=True)
34
  return paragraph
35
 
36
  # Streamlit app
 
46
  if uploaded_file is not None:
47
  # load the image
48
  image = Image.open(uploaded_file).convert("RGB")
 
 
 
 
 
49
 
50
  # context as prompt
51
+ prompt = generate_captions(image)
52
  st.write("The Context is:", prompt)
53
 
54
  # display the image