Spaces:
Running
on
Zero
Running
on
Zero
Remove bitsandbytes
Browse files- README.md +0 -3
- app.py +0 -1
- lib/loader.py +1 -15
- requirements.txt +0 -1
README.md
CHANGED
@@ -17,9 +17,6 @@ preload_from_hub:
|
|
17 |
- >-
|
18 |
01-ai/Yi-Coder-1.5B-Chat
|
19 |
config.json,generation_config.json,model.safetensors,special_tokens_map.json,tokenizer.model,tokenizer_config.json
|
20 |
-
- >-
|
21 |
-
hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4
|
22 |
-
config.json,generation_config.json,model-00001-of-00002.safetensors,model-00002-of-00002.safetensors,model.safetensors.index.json,special_tokens_map.json,tokenizer.json,tokenizer_config.json
|
23 |
- >-
|
24 |
HuggingFaceTB/SmolLM2-135M-Instruct
|
25 |
config.json,generation_config.json,merges.txt,model.safetensors,special_tokens_map.json,tokenizer.json,tokenizer_config.json,vocab.json
|
|
|
17 |
- >-
|
18 |
01-ai/Yi-Coder-1.5B-Chat
|
19 |
config.json,generation_config.json,model.safetensors,special_tokens_map.json,tokenizer.model,tokenizer_config.json
|
|
|
|
|
|
|
20 |
- >-
|
21 |
HuggingFaceTB/SmolLM2-135M-Instruct
|
22 |
config.json,generation_config.json,merges.txt,model.safetensors,special_tokens_map.json,tokenizer.json,tokenizer_config.json,vocab.json
|
app.py
CHANGED
@@ -46,7 +46,6 @@ chat_interface = gr.ChatInterface(
|
|
46 |
value="HuggingFaceTB/SmolLM2-135M-Instruct",
|
47 |
choices=[
|
48 |
"01-ai/Yi-Coder-1.5B-Chat",
|
49 |
-
"hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4",
|
50 |
"HuggingFaceTB/SmolLM2-135M-Instruct",
|
51 |
"HuggingFaceTB/SmolLM2-360M-Instruct",
|
52 |
"HuggingFaceTB/SmolLM2-1.7B-Instruct",
|
|
|
46 |
value="HuggingFaceTB/SmolLM2-135M-Instruct",
|
47 |
choices=[
|
48 |
"01-ai/Yi-Coder-1.5B-Chat",
|
|
|
49 |
"HuggingFaceTB/SmolLM2-135M-Instruct",
|
50 |
"HuggingFaceTB/SmolLM2-360M-Instruct",
|
51 |
"HuggingFaceTB/SmolLM2-1.7B-Instruct",
|
lib/loader.py
CHANGED
@@ -2,9 +2,6 @@ import os
|
|
2 |
|
3 |
import torch
|
4 |
from transformers import (
|
5 |
-
AutoConfig,
|
6 |
-
Gemma2ForCausalLM,
|
7 |
-
GemmaTokenizer,
|
8 |
GlmForCausalLM,
|
9 |
GPT2Tokenizer,
|
10 |
LlamaForCausalLM,
|
@@ -37,7 +34,6 @@ class Loader:
|
|
37 |
model_fns = {
|
38 |
# Could have used auto-classes or a pipeline
|
39 |
"01-ai/Yi-Coder-1.5B-Chat": LlamaForCausalLM.from_pretrained,
|
40 |
-
"hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4": LlamaForCausalLM.from_pretrained,
|
41 |
"HuggingFaceTB/SmolLM2-135M-Instruct": LlamaForCausalLM.from_pretrained,
|
42 |
"HuggingFaceTB/SmolLM2-360M-Instruct": LlamaForCausalLM.from_pretrained,
|
43 |
"HuggingFaceTB/SmolLM2-1.7B-Instruct": LlamaForCausalLM.from_pretrained,
|
@@ -47,7 +43,6 @@ class Loader:
|
|
47 |
}
|
48 |
model_tokenizers = {
|
49 |
"01-ai/Yi-Coder-1.5B-Chat": LlamaTokenizer,
|
50 |
-
"hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4": PreTrainedTokenizerFast,
|
51 |
"HuggingFaceTB/SmolLM2-135M-Instruct": GPT2Tokenizer,
|
52 |
"HuggingFaceTB/SmolLM2-360M-Instruct": GPT2Tokenizer,
|
53 |
"HuggingFaceTB/SmolLM2-1.7B-Instruct": GPT2Tokenizer,
|
@@ -58,16 +53,7 @@ class Loader:
|
|
58 |
|
59 |
llm_fn = model_fns[model]
|
60 |
self.tokenizer = model_tokenizers[model].from_pretrained(model)
|
61 |
-
|
62 |
-
if model == "hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4":
|
63 |
-
# Remove unused settings
|
64 |
-
config = AutoConfig.from_pretrained(model)
|
65 |
-
for key in ["_load_in_4bit", "_load_in_8bit", "quant_method"]:
|
66 |
-
del config.quantization_config[key]
|
67 |
-
self.llm = llm_fn(model, config=config, **kwargs)
|
68 |
-
else:
|
69 |
-
self.llm = llm_fn(model, **kwargs)
|
70 |
-
|
71 |
self.llm.eval()
|
72 |
self.model = model
|
73 |
|
|
|
2 |
|
3 |
import torch
|
4 |
from transformers import (
|
|
|
|
|
|
|
5 |
GlmForCausalLM,
|
6 |
GPT2Tokenizer,
|
7 |
LlamaForCausalLM,
|
|
|
34 |
model_fns = {
|
35 |
# Could have used auto-classes or a pipeline
|
36 |
"01-ai/Yi-Coder-1.5B-Chat": LlamaForCausalLM.from_pretrained,
|
|
|
37 |
"HuggingFaceTB/SmolLM2-135M-Instruct": LlamaForCausalLM.from_pretrained,
|
38 |
"HuggingFaceTB/SmolLM2-360M-Instruct": LlamaForCausalLM.from_pretrained,
|
39 |
"HuggingFaceTB/SmolLM2-1.7B-Instruct": LlamaForCausalLM.from_pretrained,
|
|
|
43 |
}
|
44 |
model_tokenizers = {
|
45 |
"01-ai/Yi-Coder-1.5B-Chat": LlamaTokenizer,
|
|
|
46 |
"HuggingFaceTB/SmolLM2-135M-Instruct": GPT2Tokenizer,
|
47 |
"HuggingFaceTB/SmolLM2-360M-Instruct": GPT2Tokenizer,
|
48 |
"HuggingFaceTB/SmolLM2-1.7B-Instruct": GPT2Tokenizer,
|
|
|
53 |
|
54 |
llm_fn = model_fns[model]
|
55 |
self.tokenizer = model_tokenizers[model].from_pretrained(model)
|
56 |
+
self.llm = llm_fn(model, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
self.llm.eval()
|
58 |
self.model = model
|
59 |
|
requirements.txt
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
accelerate
|
2 |
-
bitsandbytes
|
3 |
gradio==4.44.1
|
4 |
hf-transfer
|
5 |
numpy==1.26.4
|
|
|
1 |
accelerate
|
|
|
2 |
gradio==4.44.1
|
3 |
hf-transfer
|
4 |
numpy==1.26.4
|