File size: 1,513 Bytes
cc5b602
6f619d7
d381360
6386510
3eed0af
51a7d9e
3eed0af
6386510
d381360
51a7d9e
d381360
0a751e6
e6367a7
0a751e6
 
 
 
 
f5942e7
 
0a751e6
 
 
 
 
 
 
 
e59867b
0a751e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51a7d9e
0a751e6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
import time
import spaces
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
import gradio as gr
from threading import Thread

MODEL_LIST = ["mistralai/Mistral-Nemo-Instruct-2407"]
HF_TOKEN = os.environ.get("HF_TOKEN", None)
MODEL = os.environ.get("MODEL_ID")
# filename: gradio_app.py

import gradio as gr
from huggingface_hub import InferenceClient

# Initialize the InferenceClient
client = InferenceClient(
    MODEL,
    token=HF_TOKEN,
)

def chat_with_model(system_prompt, user_message):
    # Prepare messages for the chat completion
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_message}
    ]
    
    # Collect the response from the model
    response = ""
    for message in client.chat_completion(
            messages=messages,
            max_tokens=500,
            stream=True
    ):
        response += message.choices[0].delta.content
    
    return response

# Create the Gradio interface
iface = gr.Interface(
    fn=chat_with_model,
    inputs=[
        gr.Textbox(label="System Prompt", placeholder="Enter the system prompt here..."),
        gr.Textbox(label="User Message", placeholder="Ask a question..."),
    ],
    outputs=gr.Textbox(label="Response"),
    title="Mistral Chatbot",
    description="Chat with Mistral model using your own system prompts."
)

# Launch the app
if __name__ == "__main__":
    iface.launch(show_api=True, share=False,show_error=True)