danishmuhammad commited on
Commit
b018467
·
verified ·
1 Parent(s): cb69ba3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -18,7 +18,7 @@ fourbit_models = [
18
  ] # More models at https://huggingface.co/unsloth
19
 
20
  model, tokenizer = FastLanguageModel.from_pretrained(
21
- model_name = "danishmuhammad/ccat2025_llama_gguf",
22
  max_seq_length = max_seq_length,
23
  dtype = dtype,
24
  load_in_4bit = load_in_4bit,
@@ -43,7 +43,7 @@ with gr.Blocks() as demo:
43
  def answers_chat(user_input,history):
44
  history = history or []
45
  formatted_input = alpaca_prompt.format(user_input, "")
46
- inputs = tokenizer([formatted_input], return_tensors="pt").to("cuda")
47
 
48
  # Generate response with adjusted parameters
49
  outputs = model.generate(
 
18
  ] # More models at https://huggingface.co/unsloth
19
 
20
  model, tokenizer = FastLanguageModel.from_pretrained(
21
+ model_name = "PrunaAI/danishmuhammad-ccat2025_llama_lora_16bit-bnb-8bit-smashed",
22
  max_seq_length = max_seq_length,
23
  dtype = dtype,
24
  load_in_4bit = load_in_4bit,
 
43
  def answers_chat(user_input,history):
44
  history = history or []
45
  formatted_input = alpaca_prompt.format(user_input, "")
46
+ inputs = tokenizer([formatted_input], return_tensors="pt").to(model.device)["input_ids"]
47
 
48
  # Generate response with adjusted parameters
49
  outputs = model.generate(