Spaces:

ntaexams
/

ProfCool

Running

File size: 1,096 Bytes

01ed2fe
9c70ca8
bdd13cc
 
 
01ed2fe
 
bdd13cc
9c70ca8
01ed2fe
bdd13cc
01ed2fe
 
bdd13cc
97e52cd
f9acf93
bdd13cc
01ed2fe
bdd13cc
01ed2fe
9c70ca8
 
 
 
01ed2fe
9c70ca8
 
01ed2fe
9c70ca8
01ed2fe
 
9c70ca8
 
01ed2fe
 
cc8ac42

import os
import gradio as gr
from ctransformers import AutoModelForCausalLM

# Define the model repository and file
MODEL_REPO = "TheBloke/OpenHermes-2-Mistral-7B-GGUF"
MODEL_FILE = "openhermes-2-mistral-7b.Q8_0.gguf"  # Use Q8_0 for better CPU performance

# Download and load the model
print(f"Downloading {MODEL_FILE} from {MODEL_REPO}...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_REPO,
    model_file=MODEL_FILE,
    model_type="mistral",
    # gpu_layers=50 if torch.cuda.is_available() else 0,  # Use GPU if available
    context_length=256  # Reduce context length for faster response
)
print("Model loaded successfully.")

# Function to generate responses
def chat_with_model(prompt):
    response = model(prompt)
    return response

# Gradio UI
iface = gr.Interface(
    fn=chat_with_model,
    inputs=gr.Textbox(lines=2, placeholder="Enter your query..."),
    outputs="text",
    title="Mistral-7B Chatbot",
    description="Optimized chatbot using Mistral-7B GGUF with improved speed.",
)

# Run the Gradio app
if __name__ == "__main__":
    iface.launch(share=True)