Spaces:
Running
Running
File size: 1,096 Bytes
01ed2fe 9c70ca8 bdd13cc 01ed2fe bdd13cc 9c70ca8 01ed2fe bdd13cc 01ed2fe bdd13cc 97e52cd f9acf93 bdd13cc 01ed2fe bdd13cc 01ed2fe 9c70ca8 01ed2fe 9c70ca8 01ed2fe 9c70ca8 01ed2fe 9c70ca8 01ed2fe cc8ac42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import os
import gradio as gr
from ctransformers import AutoModelForCausalLM
# Define the model repository and file
MODEL_REPO = "TheBloke/OpenHermes-2-Mistral-7B-GGUF"
MODEL_FILE = "openhermes-2-mistral-7b.Q8_0.gguf" # Use Q8_0 for better CPU performance
# Download and load the model
print(f"Downloading {MODEL_FILE} from {MODEL_REPO}...")
model = AutoModelForCausalLM.from_pretrained(
MODEL_REPO,
model_file=MODEL_FILE,
model_type="mistral",
# gpu_layers=50 if torch.cuda.is_available() else 0, # Use GPU if available
context_length=256 # Reduce context length for faster response
)
print("Model loaded successfully.")
# Function to generate responses
def chat_with_model(prompt):
response = model(prompt)
return response
# Gradio UI
iface = gr.Interface(
fn=chat_with_model,
inputs=gr.Textbox(lines=2, placeholder="Enter your query..."),
outputs="text",
title="Mistral-7B Chatbot",
description="Optimized chatbot using Mistral-7B GGUF with improved speed.",
)
# Run the Gradio app
if __name__ == "__main__":
iface.launch(share=True) |