Refactor model.py and requirements.txt for better code organization and remove flash-attn dependency
Browse files- kitt/core/model.py +0 -1
- requirements.txt +0 -1
kitt/core/model.py
CHANGED
@@ -347,7 +347,6 @@ def run_inference_ollama(prompt):
|
|
347 |
|
348 |
def load_gpu_model():
|
349 |
import bitsandbytes
|
350 |
-
import flash_attn
|
351 |
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
|
352 |
|
353 |
tokenizer = AutoTokenizer.from_pretrained(
|
|
|
347 |
|
348 |
def load_gpu_model():
|
349 |
import bitsandbytes
|
|
|
350 |
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM
|
351 |
|
352 |
tokenizer = AutoTokenizer.from_pretrained(
|
requirements.txt
CHANGED
@@ -6,7 +6,6 @@ wurlitzer
|
|
6 |
accelerate
|
7 |
bitsandbytes
|
8 |
optimum
|
9 |
-
flash-attn
|
10 |
# auto-gptq
|
11 |
gradio
|
12 |
TTS
|
|
|
6 |
accelerate
|
7 |
bitsandbytes
|
8 |
optimum
|
|
|
9 |
# auto-gptq
|
10 |
gradio
|
11 |
TTS
|