Spaces:

adamelliotfields
/

chat

Running on Zero

App Files Files Community

adamelliotfields commited on Dec 6, 2024

Commit

249c14b

verified ·

1 Parent(s): bdd9bc0

Remove bitsandbytes

Browse files

Files changed (4) hide show

README.md +0 -3
app.py +0 -1
lib/loader.py +1 -15
requirements.txt +0 -1

README.md CHANGED Viewed

@@ -17,9 +17,6 @@ preload_from_hub:
   - >-
     01-ai/Yi-Coder-1.5B-Chat
     config.json,generation_config.json,model.safetensors,special_tokens_map.json,tokenizer.model,tokenizer_config.json
-  - >-
-    hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4
-    config.json,generation_config.json,model-00001-of-00002.safetensors,model-00002-of-00002.safetensors,model.safetensors.index.json,special_tokens_map.json,tokenizer.json,tokenizer_config.json
   - >-
     HuggingFaceTB/SmolLM2-135M-Instruct
     config.json,generation_config.json,merges.txt,model.safetensors,special_tokens_map.json,tokenizer.json,tokenizer_config.json,vocab.json

   - >-
     01-ai/Yi-Coder-1.5B-Chat
     config.json,generation_config.json,model.safetensors,special_tokens_map.json,tokenizer.model,tokenizer_config.json
   - >-
     HuggingFaceTB/SmolLM2-135M-Instruct
     config.json,generation_config.json,merges.txt,model.safetensors,special_tokens_map.json,tokenizer.json,tokenizer_config.json,vocab.json

app.py CHANGED Viewed

@@ -46,7 +46,6 @@ chat_interface = gr.ChatInterface(
             value="HuggingFaceTB/SmolLM2-135M-Instruct",
             choices=[
                 "01-ai/Yi-Coder-1.5B-Chat",
-                "hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4",
                 "HuggingFaceTB/SmolLM2-135M-Instruct",
                 "HuggingFaceTB/SmolLM2-360M-Instruct",
                 "HuggingFaceTB/SmolLM2-1.7B-Instruct",

             value="HuggingFaceTB/SmolLM2-135M-Instruct",
             choices=[
                 "01-ai/Yi-Coder-1.5B-Chat",
                 "HuggingFaceTB/SmolLM2-135M-Instruct",
                 "HuggingFaceTB/SmolLM2-360M-Instruct",
                 "HuggingFaceTB/SmolLM2-1.7B-Instruct",

lib/loader.py CHANGED Viewed

@@ -2,9 +2,6 @@ import os
 import torch
 from transformers import (
-    AutoConfig,
-    Gemma2ForCausalLM,
-    GemmaTokenizer,
     GlmForCausalLM,
     GPT2Tokenizer,
     LlamaForCausalLM,
@@ -37,7 +34,6 @@ class Loader:
             model_fns = {
                 # Could have used auto-classes or a pipeline
                 "01-ai/Yi-Coder-1.5B-Chat": LlamaForCausalLM.from_pretrained,
-                "hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4": LlamaForCausalLM.from_pretrained,
                 "HuggingFaceTB/SmolLM2-135M-Instruct": LlamaForCausalLM.from_pretrained,
                 "HuggingFaceTB/SmolLM2-360M-Instruct": LlamaForCausalLM.from_pretrained,
                 "HuggingFaceTB/SmolLM2-1.7B-Instruct": LlamaForCausalLM.from_pretrained,
@@ -47,7 +43,6 @@ class Loader:
             }
             model_tokenizers = {
                 "01-ai/Yi-Coder-1.5B-Chat": LlamaTokenizer,
-                "hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4": PreTrainedTokenizerFast,
                 "HuggingFaceTB/SmolLM2-135M-Instruct": GPT2Tokenizer,
                 "HuggingFaceTB/SmolLM2-360M-Instruct": GPT2Tokenizer,
                 "HuggingFaceTB/SmolLM2-1.7B-Instruct": GPT2Tokenizer,
@@ -58,16 +53,7 @@ class Loader:
             llm_fn = model_fns[model]
             self.tokenizer = model_tokenizers[model].from_pretrained(model)
-            if model == "hugging-quants/Meta-Llama-3.1-8B-Instruct-BNB-NF4":
-                # Remove unused settings
-                config = AutoConfig.from_pretrained(model)
-                for key in ["_load_in_4bit", "_load_in_8bit", "quant_method"]:
-                    del config.quantization_config[key]
-                self.llm = llm_fn(model, config=config, **kwargs)
-            else:
-                self.llm = llm_fn(model, **kwargs)
             self.llm.eval()
             self.model = model

 import torch
 from transformers import (
     GlmForCausalLM,
     GPT2Tokenizer,
     LlamaForCausalLM,
             model_fns = {
                 # Could have used auto-classes or a pipeline
                 "01-ai/Yi-Coder-1.5B-Chat": LlamaForCausalLM.from_pretrained,
                 "HuggingFaceTB/SmolLM2-135M-Instruct": LlamaForCausalLM.from_pretrained,
                 "HuggingFaceTB/SmolLM2-360M-Instruct": LlamaForCausalLM.from_pretrained,
                 "HuggingFaceTB/SmolLM2-1.7B-Instruct": LlamaForCausalLM.from_pretrained,
             }
             model_tokenizers = {
                 "01-ai/Yi-Coder-1.5B-Chat": LlamaTokenizer,
                 "HuggingFaceTB/SmolLM2-135M-Instruct": GPT2Tokenizer,
                 "HuggingFaceTB/SmolLM2-360M-Instruct": GPT2Tokenizer,
                 "HuggingFaceTB/SmolLM2-1.7B-Instruct": GPT2Tokenizer,
             llm_fn = model_fns[model]
             self.tokenizer = model_tokenizers[model].from_pretrained(model)
+            self.llm = llm_fn(model, **kwargs)
             self.llm.eval()
             self.model = model

requirements.txt CHANGED Viewed

@@ -1,5 +1,4 @@
 accelerate
-bitsandbytes
 gradio==4.44.1
 hf-transfer
 numpy==1.26.4

 accelerate
 gradio==4.44.1
 hf-transfer
 numpy==1.26.4