Fan21
/

Llama-mt-lora

Question Answering

text-generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Fan21 commited on Jun 14, 2023

Commit

5c55299

•

1 Parent(s): 877b98a

Update README.md

Files changed (1) hide show

README.md +2 -28

README.md CHANGED Viewed

@@ -15,31 +15,12 @@ import torch
 import transformers
 from transformers import LlamaTokenizer, AutoModelForCausalLM
 tokenizer = LlamaTokenizer.from_pretrained("Fan21/Llama-mt-lora")
-BASE_MODEL = "Fan21/Llama-mt-lora"
-if torch.cuda.is_available():
-    device = "cuda"
-else:
-    device = "cpu"
-if device == "cuda":
-    model = LlamaForCausalLM.from_pretrained(
-        BASE_MODEL,
         load_in_8bit=False,
         torch_dtype=torch.float16,
         device_map="auto",
     )
-elif device == "mps":
-    model = LlamaForCausalLM.from_pretrained(
-        BASE_MODEL,
-        device_map={"": device},
-        torch_dtype=torch.float16,
-    )
-else:
-    model = LlamaForCausalLM.from_pretrained(
-        BASE_MODEL, device_map={"": device}, low_cpu_mem_usage=True
-    )
 def generate_prompt(instruction, input=None):
     if input:
         return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
@@ -54,13 +35,6 @@ def generate_prompt(instruction, input=None):
 {instruction}
 ### Response:"""
-if device != "cpu":
-    model.half()
-model.eval()
-if torch.__version__ >= "2":
-    model = torch.compile(model)
 def evaluate(
     instruction,
     input=None,

 import transformers
 from transformers import LlamaTokenizer, AutoModelForCausalLM
 tokenizer = LlamaTokenizer.from_pretrained("Fan21/Llama-mt-lora")
+mdoel = LlamaForCausalLM.from_pretrained(
+        "Fan21/Llama-mt-lora",
         load_in_8bit=False,
         torch_dtype=torch.float16,
         device_map="auto",
     )
 def generate_prompt(instruction, input=None):
     if input:
         return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 {instruction}
 ### Response:"""
 def evaluate(
     instruction,
     input=None,