Mistral-Nemo-custom

Sleeping

File size: 1,513 Bytes

import os
import time
import spaces
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
import gradio as gr
from threading import Thread

MODEL_LIST = ["mistralai/Mistral-Nemo-Instruct-2407"]
HF_TOKEN = os.environ.get("HF_TOKEN", None)
MODEL = os.environ.get("MODEL_ID")
# filename: gradio_app.py

import gradio as gr
from huggingface_hub import InferenceClient

# Initialize the InferenceClient
client = InferenceClient(
    MODEL,
    token=HF_TOKEN,
)

def chat_with_model(system_prompt, user_message):
    # Prepare messages for the chat completion
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_message}
    ]
    
    # Collect the response from the model
    response = ""
    for message in client.chat_completion(
            messages=messages,
            max_tokens=500,
            stream=True
    ):
        response += message.choices[0].delta.content
    
    return response

# Create the Gradio interface
iface = gr.Interface(
    fn=chat_with_model,
    inputs=[
        gr.Textbox(label="System Prompt", placeholder="Enter the system prompt here..."),
        gr.Textbox(label="User Message", placeholder="Ask a question..."),
    ],
    outputs=gr.Textbox(label="Response"),
    title="Mistral Chatbot",
    description="Chat with Mistral model using your own system prompts."
)

# Launch the app
if __name__ == "__main__":
    iface.launch(show_api=True, share=False,show_error=True)