sasan commited on
Commit
a3debde
·
1 Parent(s): 491a509

Refactor model.py and requirements.txt for better code organization and remove flash-attn dependency

Browse files
Files changed (2) hide show
  1. kitt/core/model.py +0 -1
  2. requirements.txt +0 -1
kitt/core/model.py CHANGED
@@ -347,7 +347,6 @@ def run_inference_ollama(prompt):
347
 
348
  def load_gpu_model():
349
  import bitsandbytes
350
- import flash_attn
351
  from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
352
 
353
  tokenizer = AutoTokenizer.from_pretrained(
 
347
 
348
  def load_gpu_model():
349
  import bitsandbytes
 
350
  from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
351
 
352
  tokenizer = AutoTokenizer.from_pretrained(
requirements.txt CHANGED
@@ -6,7 +6,6 @@ wurlitzer
6
  accelerate
7
  bitsandbytes
8
  optimum
9
- flash-attn
10
  # auto-gptq
11
  gradio
12
  TTS
 
6
  accelerate
7
  bitsandbytes
8
  optimum
 
9
  # auto-gptq
10
  gradio
11
  TTS