Sandiago21
/

llama-13b-hf-prompt-answering

@@ -96,75 +96,50 @@ def generate_prompt(instruction: str, input_ctxt: str = None) -> str:
 Use the code below to get started with the model.
 ```python
-from transformers import LlamaTokenizer, LlamaForCausalLM
-from peft import PeftModel
-MODEL_NAME = "decapoda-research/llama-13b-hf"
-tokenizer = LlamaTokenizer.from_pretrained(MODEL_NAME, add_eos_token=True)
-tokenizer.pad_token_id = 0
-model = LlamaForCausalLM.from_pretrained(MODEL_NAME, load_in_8bit=True, device_map="auto")
-model = PeftModel.from_pretrained(model, "Sandiago21/llama-13b-hf")
-```
-### Example of Usage
-```python
-from transformers import GenerationConfig
-PROMPT = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nWhich is the capital city of Greece and with which countries does Greece border?\n\n### Input:\nQuestion answering\n\n### Response:\n"""
-DEVICE = "cuda"
-inputs = tokenizer(
-    PROMPT,
-    return_tensors="pt",
 )
-input_ids = inputs["input_ids"].to(DEVICE)
 generation_config = GenerationConfig(
-    temperature=0.1,
-    top_p=0.95,
-    repetition_penalty=1.2,
 )
-print("Generating Response ... ")
-with torch.no_grad():
-  generation_output = model.generate(
-      input_ids=input_ids,
-      generation_config=generation_config,
-      return_dict_in_generate=True,
-      output_scores=True,
-      max_new_tokens=256,
-  )
-for s in generation_output.sequences:
-    print(tokenizer.decode(s))
 ```
-### Example Output
 ```python
-Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
-### Instruction:
-Which is the capital city of Greece and with which countries does Greece border?
-### Input:
-Question answering
-### Response:
-Generating...
-<unk> Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
-### Instruction:
-Which is the capital city of Greece and with which countries does Greece border?
-### Input:
-Question answering
-### Response:
-<unk>capital city of Athens and it borders Albania to the northwest, North Macedonia and Bulgaria to the northeast, Turkey to the east, and Libya to the southeast across the Mediterranean Sea.
 ```
 ## Training Details

 Use the code below to get started with the model.
 ```python
+import torch
+from transformers import GenerationConfig, LlamaTokenizer, LlamaForCausalLM
+tokenizer = LlamaTokenizer.from_pretrained("chainyo/alpaca-lora-7b")
+model = LlamaForCausalLM.from_pretrained(
+    "chainyo/alpaca-lora-7b",
+    load_in_8bit=True,
+    torch_dtype=torch.float16,
+    device_map="auto",
 )
 generation_config = GenerationConfig(
+    temperature=0.2,
+    top_p=0.75,
+    top_k=40,
+    num_beams=4,
+    max_new_tokens=128,
 )
+model.eval()
+if torch.__version__ >= "2":
+    model = torch.compile(model)
 ```
+### Example of Usage
 ```python
+instruction = "What is the capital city of Greece and with which countries does Greece border?"
+input_ctxt = None  # For some tasks, you can provide an input context to help the model generate a better response.
+prompt = generate_prompt(instruction, input_ctxt)
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids
+input_ids = input_ids.to(model.device)
+with torch.no_grad():
+    outputs = model.generate(
+        input_ids=input_ids,
+        generation_config=generation_config,
+        return_dict_in_generate=True,
+        output_scores=True,
+    )
+response = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
+print(response)
+>>> The capital city of Greece is Athens and it borders Albania, Macedonia, Bulgaria and Turkey.
 ```
 ## Training Details