from PIL import Image
from transformers import AutoProcessor, AutoModelForPreTraining
import streamlit as st
from PIL import Image
# import cv2
# import requests
# from dotenv import load_dotenv
# import google.generativeai as genai
# from langchain_google_genai import ChatGoogleGenerativeAI

import pandas as pd
# from huggingface_hub import login
import os
print(os.getenv('hftoken'))


processor = AutoProcessor.from_pretrained("google/paligemma-3b-pt-224")
model = AutoModelForPreTraining.from_pretrained("google/paligemma-3b-pt-224")

st.title("Image segmentation and object analysis")
uploaded_file = st.file_uploader("Choose an image")

if uploaded_file is not None:
    image_data = uploaded_file.read()
    st.image(image_data)
    st.write("file uploaded")
    image = Image.open(uploaded_file)
    # Specify the file path to save the image
    filepath = "./uploaded_image.jpg" 
    # Save the image
    image.save(filepath)
    st.success(f"Image saved successfully at {filepath}")
    prompt = "Describe the image content in detail."

    # Preprocess the image and prompt using the processor
    inputs = processor( text=prompt, images=image, return_tensors="pt")

    # Pass the inputs to the model
    outputs = model(**inputs)
    # Assuming you have the output stored in a variable called `outputs`
    generated_text = processor.decode(outputs.logits.argmax(dim=-1)[0], skip_special_tokens=True)
    print(generated_text)
    st.write(generated_text)