# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM import streamlit as st import numpy as np import torch @st.cache_resource def get_model(): tokenizer = AutoTokenizer.from_pretrained("onurnsfw/Gemma2-9b-classifier") model = AutoModelForCausalLM.from_pretrained("onurnsfw/Gemma2-9b-classifier") return tokenizer,model tokenizer,model = get_model() user_input = st.text_area('Enter Text to Analyze') button = st.button("Analyze") if user_input and button : alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: {} ### Input: {} ### Response: {}""" inputs = tokenizer( [ alpaca_prompt.format( "Match the potential use case with the corresponding activity and emission values based on the provided context.", # instruction "{user_input}", "", ) ], return_tensors = "pt").to("cuda") outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True) st.write("Prediction: ",tokenizer.batch_decode(outputs))