Spaces:

danishmuhammad
/

CCAT2025Helpdesk

Runtime error

danishmuhammad commited on Dec 24, 2024

Commit

b018467

verified ·

1 Parent(s): cb69ba3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ fourbit_models = [
 ] # More models at https://huggingface.co/unsloth
 model, tokenizer = FastLanguageModel.from_pretrained(
-    model_name = "danishmuhammad/ccat2025_llama_gguf",
     max_seq_length = max_seq_length,
     dtype = dtype,
     load_in_4bit = load_in_4bit,
@@ -43,7 +43,7 @@ with gr.Blocks() as demo:
     def answers_chat(user_input,history):
       history = history or []
       formatted_input = alpaca_prompt.format(user_input, "")
-      inputs = tokenizer([formatted_input], return_tensors="pt").to("cuda")
         # Generate response with adjusted parameters
       outputs = model.generate(

 ] # More models at https://huggingface.co/unsloth
 model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name = "PrunaAI/danishmuhammad-ccat2025_llama_lora_16bit-bnb-8bit-smashed",
     max_seq_length = max_seq_length,
     dtype = dtype,
     load_in_4bit = load_in_4bit,
     def answers_chat(user_input,history):
       history = history or []
       formatted_input = alpaca_prompt.format(user_input, "")
+      inputs = tokenizer([formatted_input], return_tensors="pt").to(model.device)["input_ids"]
         # Generate response with adjusted parameters
       outputs = model.generate(