import streamlit as st import transformers from transformers import AutoTokenizer, AutoModelForCausalLM import torch # device = "cpu" # if torch.cuda.is_available(): # device = "cuda" # if torch.backends.mps.is_available(): # device = "mps" # torch.set_default_device("cuda") # Load the Phi 2 model and tokenizer tokenizer = AutoTokenizer.from_pretrained( "microsoft/phi-2", trust_remote_code=True ) model = AutoModelForCausalLM.from_pretrained( "microsoft/phi-2", device_map="cpu", trust_remote_code=True, # offload_folder="offload", torch_dtype=torch.float32 # torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, ) # Streamlit UI st.title("Microsoft Phi 2 Streamlit App") # User input prompt prompt = st.text_area("Enter your prompt:", """Write a short summary about how to create a healthy lifestyle.""") # Generate output based on user input if st.button("Generate Output"): with torch.no_grad(): token_ids = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt", # return_attention_mask=False ) output_ids = model.generate( token_ids.to(model.device), max_new_tokens=512, do_sample=True, temperature=0.3, # max_length=200 ) output = tokenizer.decode(output_ids[0][token_ids.size(1):]) st.text("Generated Output:") st.write(output)