# pip install transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import streamlit as st
checkpoint = "HuggingFaceTB/SmolLM-135M-Instruct"

device = "cpu" # for GPU use "gpu" usage or "cpu" for CPU usage
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
# for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)

st.title("Dexy Chat Assistant")

# Initialize session state for chat history
if 'messages' not in st.session_state:
    st.session_state.messages = []

# Text input for user
user_name = st.text_input("Your name please?: ", key="user_name")
user_input = st.text_input("Enter your message:", key="user_input")

if st.button("Send"):
    if user_input:
        # Add user message to history
        st.session_state.messages.append({"role": "user", "content": user_input})
        
        # Process with model
        input_text = tokenizer.apply_chat_template(st.session_state.messages, tokenize=False)
        encoded = tokenizer(input_text, return_tensors="pt", padding=True)
        inputs = encoded.input_ids.to(device)
        attention_mask = encoded.attention_mask.to(device)
        outputs = model.generate(inputs, attention_mask=attention_mask, max_new_tokens=50, temperature=0.2, top_p=0.9, do_sample=True)
        response = tokenizer.decode(outputs[0])
        
        # Add assistant's response to history
        st.session_state.messages.append({"role": "assistant", "content": response})
        
    # Display full chat history
    for msg in st.session_state.messages:
        if msg["role"] == "user":
            st.write(f"{user_name}: {msg['content']}")
        else:
            # st.write(f"Dexy: {msg['content']}")
            st.write(f"Dexy: {msg['content'].split('<|im_start|>assistant')[-1].split('<|im_end|>')[0]}")