from PIL import Image from transformers import AutoProcessor, AutoModelForPreTraining import streamlit as st from PIL import Image # import cv2 # import requests # from dotenv import load_dotenv # import google.generativeai as genai # from langchain_google_genai import ChatGoogleGenerativeAI import pandas as pd # from huggingface_hub import login import os print(os.getenv('hftoken')) processor = AutoProcessor.from_pretrained("google/paligemma-3b-pt-224") model = AutoModelForPreTraining.from_pretrained("google/paligemma-3b-pt-224") st.title("Image segmentation and object analysis") uploaded_file = st.file_uploader("Choose an image") if uploaded_file is not None: image_data = uploaded_file.read() st.image(image_data) st.write("file uploaded") image = Image.open(uploaded_file) # Specify the file path to save the image filepath = "./uploaded_image.jpg" # Save the image image.save(filepath) st.success(f"Image saved successfully at {filepath}") prompt = "Describe the image content in detail." # Preprocess the image and prompt using the processor inputs = processor( text=prompt, images=image, return_tensors="pt") # Pass the inputs to the model outputs = model(**inputs) # Assuming you have the output stored in a variable called `outputs` generated_text = processor.decode(outputs.logits.argmax(dim=-1)[0], skip_special_tokens=True) print(generated_text) st.write(generated_text)