ProfCool / app.py
ntaexams's picture
Update app.py
f9acf93 verified
import os
import gradio as gr
from ctransformers import AutoModelForCausalLM
# Define the model repository and file
MODEL_REPO = "TheBloke/OpenHermes-2-Mistral-7B-GGUF"
MODEL_FILE = "openhermes-2-mistral-7b.Q8_0.gguf" # Use Q8_0 for better CPU performance
# Download and load the model
print(f"Downloading {MODEL_FILE} from {MODEL_REPO}...")
model = AutoModelForCausalLM.from_pretrained(
MODEL_REPO,
model_file=MODEL_FILE,
model_type="mistral",
# gpu_layers=50 if torch.cuda.is_available() else 0, # Use GPU if available
context_length=256 # Reduce context length for faster response
)
print("Model loaded successfully.")
# Function to generate responses
def chat_with_model(prompt):
response = model(prompt)
return response
# Gradio UI
iface = gr.Interface(
fn=chat_with_model,
inputs=gr.Textbox(lines=2, placeholder="Enter your query..."),
outputs="text",
title="Mistral-7B Chatbot",
description="Optimized chatbot using Mistral-7B GGUF with improved speed.",
)
# Run the Gradio app
if __name__ == "__main__":
iface.launch(share=True)