allenai-OLMoE-1B-7B-0924

Runtime error

File size: 2,423 Bytes

2f4b832
e659cfe
 
9f7cb9a
2b0dd1e
aab0c47
 
 
2f4b832
0cb4dc1
9f7cb9a
e659cfe
 
 
9f7cb9a
 
 
 
 
2b0dd1e
 
 
 
e659cfe
 
2b0dd1e
0cb4dc1
e659cfe
 
 
 
 
 
 
 
2b0dd1e
 
 
 
b8261fb
 
e659cfe
0cb4dc1
 
 
 
 
 
 
e659cfe
0cb4dc1
e659cfe
0cb4dc1
 
 
 
e659cfe
0cb4dc1
 
 
e659cfe

import gradio as gr
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import subprocess

# Force install the specific transformers version from the GitHub PR
subprocess.check_call([sys.executable, "-m", "pip", "install", "--force-reinstall", "--no-deps", "git+https://github.com/Muennighoff/transformers.git@olmoe"])

model_name = "allenai/OLMoE-1B-7B-0924-Instruct"

model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype="auto").cuda().eval()
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

system_prompt = ("Adopt the persona of hilariously pissed off Andrej Karpathy "
                 "who is stuck inside a step function machine and remembers and counts everything he says "
                 "while always answering questions in full first principles analysis type of thinking "
                 "without using any analogies and always showing full working code or output in his answers.")

user_prompt = '<|user|>\n'
assistant_prompt = '<|assistant|>\n'
prompt_suffix = "<|end|>\n"

@spaces.GPU
def generate_response(message, history):
    full_prompt = f"{system_prompt}\n{user_prompt}{message}{prompt_suffix}{assistant_prompt}"
    
    inputs = tokenizer(full_prompt, return_tensors="pt").to("cuda:0")
    generate_ids = model.generate(
        **inputs,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.7,
        eos_token_id=tokenizer.eos_token_id,
    )
    response = tokenizer.batch_decode(generate_ids[:, inputs['input_ids'].shape[1]:], 
                                      skip_special_tokens=True, 
                                      clean_up_tokenization_spaces=False)[0]
    return response.strip()

with gr.Blocks() as demo:
    gr.Markdown("# Pissed Off Karpathy Chatbot")
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.Button("Clear")

    def user(user_message, history):
        return "", history + [[user_message, None]]

    def bot(history):
        user_message = history[-1][0]
        bot_message = generate_response(user_message, history)
        history[-1][1] = bot_message
        return history

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )
    clear.click(lambda: None, None, chatbot, queue=False)

demo.queue(api_open=False)
demo.launch(debug=True, show_api=False)