File size: 1,096 Bytes
01ed2fe
9c70ca8
bdd13cc
 
 
01ed2fe
 
bdd13cc
9c70ca8
01ed2fe
bdd13cc
01ed2fe
 
bdd13cc
97e52cd
f9acf93
bdd13cc
01ed2fe
bdd13cc
01ed2fe
9c70ca8
 
 
 
01ed2fe
9c70ca8
 
01ed2fe
9c70ca8
01ed2fe
 
9c70ca8
 
01ed2fe
 
cc8ac42
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import os
import gradio as gr
from ctransformers import AutoModelForCausalLM

# Define the model repository and file
MODEL_REPO = "TheBloke/OpenHermes-2-Mistral-7B-GGUF"
MODEL_FILE = "openhermes-2-mistral-7b.Q8_0.gguf"  # Use Q8_0 for better CPU performance

# Download and load the model
print(f"Downloading {MODEL_FILE} from {MODEL_REPO}...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_REPO,
    model_file=MODEL_FILE,
    model_type="mistral",
    # gpu_layers=50 if torch.cuda.is_available() else 0,  # Use GPU if available
    context_length=256  # Reduce context length for faster response
)
print("Model loaded successfully.")

# Function to generate responses
def chat_with_model(prompt):
    response = model(prompt)
    return response

# Gradio UI
iface = gr.Interface(
    fn=chat_with_model,
    inputs=gr.Textbox(lines=2, placeholder="Enter your query..."),
    outputs="text",
    title="Mistral-7B Chatbot",
    description="Optimized chatbot using Mistral-7B GGUF with improved speed.",
)

# Run the Gradio app
if __name__ == "__main__":
    iface.launch(share=True)