CaptionFLAN-T5 / app.py
hungchiayu's picture
Rename app-Copy1.py to app.py
7071623
raw
history blame
1.62 kB
import streamlit as st
from PIL import Image
import urllib.request
import torch
from torch import nn
import numpy as np
from VT5 import VT5
from transformers import (
AutoModelForSeq2SeqLM,
AutoTokenizer,
Trainer,
TrainingArguments,
T5Tokenizer,
T5ForConditionalGeneration,
CLIPVisionModelWithProjection,
AutoProcessor
)
clip = CLIPVisionModelWithProjection.from_pretrained("openai/clip-vit-base-patch32")
processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
default_img = Image.open("gfg.png")
default_img = processor(images=default_img,return_tensors='pt').pixel_values
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
t5 = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small")
vt5 = VT5(t5,tokenizer,clip)
vt5.load_state_dict(torch.load('weights.bin',map_location=torch.device('cpu')))
# Assuming you have this function that generates captions
def generate_caption(image):
# Your model code here
caption = "This is a placeholder caption"
print(image)
caption = vt5.generate_caption(image)
return caption
st.title("Image Captioning App")
#st.image(image.numpy().reshape(224,224,3), caption='Uploaded Image.', clamp=True,use_column_width=True)
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png","jpeg"])
if uploaded_file is not None:
image = Image.open(uploaded_file)
st.image(image, caption='Uploaded Image.', clamp=True,use_column_width=True)
st.write("")
st.write("Generating caption...")
caption = generate_caption(image)
st.write("Caption: ", caption)