# pip install transformers from transformers import AutoModelForCausalLM, AutoTokenizer import streamlit as st checkpoint = "HuggingFaceTB/SmolLM-135M-Instruct" device = "cpu" # for GPU use "gpu" usage or "cpu" for CPU usage tokenizer = AutoTokenizer.from_pretrained(checkpoint) # for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")` model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device) st.title("Dexy Chat Assistant") # Initialize session state for chat history if 'messages' not in st.session_state: st.session_state.messages = [] # Text input for user user_name = st.text_input("Your name please?: ", key="user_name") user_input = st.text_input("Enter your message:", key="user_input") if st.button("Send"): if user_input: # Add user message to history st.session_state.messages.append({"role": "user", "content": user_input}) # Process with model input_text = tokenizer.apply_chat_template(st.session_state.messages, tokenize=False) encoded = tokenizer(input_text, return_tensors="pt", padding=True) inputs = encoded.input_ids.to(device) attention_mask = encoded.attention_mask.to(device) outputs = model.generate(inputs, attention_mask=attention_mask, max_new_tokens=50, temperature=0.2, top_p=0.9, do_sample=True) response = tokenizer.decode(outputs[0]) # Add assistant's response to history st.session_state.messages.append({"role": "assistant", "content": response}) # Display full chat history for msg in st.session_state.messages: if msg["role"] == "user": st.write(f"{user_name}: {msg['content']}") else: # st.write(f"Dexy: {msg['content']}") st.write(f"Dexy: {msg['content'].split('<|im_start|>assistant')[-1].split('<|im_end|>')[0]}")