# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
import streamlit as st
import numpy as np
import torch

@st.cache_resource
def get_model():
    tokenizer = AutoTokenizer.from_pretrained("onurnsfw/Gemma2-9b-classifier")
    model = AutoModelForCausalLM.from_pretrained("onurnsfw/Gemma2-9b-classifier")
    return tokenizer,model

tokenizer,model = get_model()

user_input = st.text_area('Enter Text to Analyze')
button = st.button("Analyze")

if user_input and button :
    alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

    ### Instruction:
    {}

    ### Input:
    {}

    ### Response:
    {}"""

    inputs = tokenizer(
    [
        alpaca_prompt.format(
            "Match the potential use case with the corresponding activity and emission values based on the provided context.", # instruction
            "{user_input}",
            "", 
        )
    ], return_tensors = "pt").to("cuda")

    outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
    
    st.write("Prediction: ",tokenizer.batch_decode(outputs))