Spaces:

codewithdark
/

LatentRecurrentDepthLM

Running

File size: 1,854 Bytes

59c5051
77153c2
2e7e9a5
59c5051
2e7e9a5
d86d806
 
 
77153c2
2e7e9a5
59c5051
2e7e9a5
 
 
59c5051
2e7e9a5
 
 
 
 
 
 
 
 
 
8fee25d
2e7e9a5
 
 
d86d806
 
2e7e9a5
 
 
77153c2
2e7e9a5
 
 
 
 
d86d806
 
 
59c5051
2e7e9a5
 
 
 
 
59c5051
2e7e9a5
59c5051

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModel

# Load tokenizer and model
model_name = "codewithdark/latent-recurrent-depth-lm"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device).eval()

# Define function for inference
def chat_with_model(input_text, num_iterations, max_tokens, temperature, top_k):
    input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)

    with torch.no_grad():
        generated_ids = model.generate(
            input_ids,
            max_length=max_tokens,
            num_iterations=num_iterations,  # Assuming the model supports it
            temperature=temperature,
            top_k=top_k
        )
    
    response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    response = response.replace('Ġ','')
    return response

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 🤖 Chat with Latent Recurrent Depth LM")

    with gr.Row():
        text_input = gr.Textbox(label="Enter your message")
    
    with gr.Row():
        num_iterations = gr.Slider(1, 20, step=1, value=10, label="Number of Iterations")
        max_tokens = gr.Slider(10, 200, step=10, value=50, label="Max Tokens")
        temperature = gr.Slider(0.1, 1.0, step=0.1, value=0.5, label="Temperature")
        top_k = gr.Slider(10, 100, step=10, value=50, label="Top-K Sampling")
    
    submit_button = gr.Button("Generate Response")
    output_text = gr.Textbox(label="Model Response")

    submit_button.click(
        fn=chat_with_model,
        inputs=[text_input, num_iterations, max_tokens, temperature, top_k],
        outputs=output_text
    )

# Launch Gradio app
if __name__ == "__main__":
    demo.launch()