|
from unsloth import FastLanguageModel |
|
import torch |
|
|
|
|
|
max_seq_length = 4096 |
|
dtype = None |
|
load_in_4bit = True |
|
|
|
|
|
model, tokenizer = FastLanguageModel.from_pretrained( |
|
model_name="unsloth/tinyllama-chat-bnb-4bit", |
|
max_seq_length=max_seq_length, |
|
dtype=dtype, |
|
load_in_4bit=load_in_4bit, |
|
|
|
) |
|
|
|
|
|
model = FastLanguageModel.get_peft_model( |
|
model, |
|
r=32, |
|
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", |
|
"gate_proj", "up_proj", "down_proj",], |
|
lora_alpha=32, |
|
lora_dropout=0, |
|
bias="none", |
|
use_gradient_checkpointing=False, |
|
random_state=3407, |
|
use_rslora=False, |
|
loftq_config=None, |
|
) |
|
|
|
|
|
import pandas as pd |
|
from sklearn.model_selection import train_test_split |
|
import datasets |
|
|
|
|
|
train = datasets.load_dataset("grammarly/coedit", split="train").to_pandas() |
|
val = datasets.load_dataset("grammarly/coedit", split="validation").to_pandas() |
|
|
|
|
|
data = pd.concat([train, val]) |
|
data[['instruction', 'input']] = data['src'].str.split(': ', n=1, expand=True) |
|
data = data.rename(columns={"tgt": "output"}) |
|
data = data.drop(columns=["_id", "src"]) |
|
|
|
|
|
stratify_col = data['task'] |
|
|
|
|
|
train_df, test_df = train_test_split( |
|
data, |
|
test_size=0.2, |
|
random_state=42, |
|
stratify=stratify_col |
|
) |
|
|
|
def formatting_prompts_func(examples, tokenizer): |
|
""" |
|
Formats the examples into the desired chat format for training. |
|
|
|
Args: |
|
examples: A dictionary of examples from the dataset. |
|
tokenizer: The tokenizer used for processing text. |
|
|
|
Returns: |
|
A dictionary containing the formatted text for each example. |
|
""" |
|
instructions = examples["instruction"] |
|
inputs = examples["input"] |
|
outputs = examples["output"] |
|
texts = [] |
|
for instruction, input, output in zip(instructions, inputs, outputs): |
|
message = [ |
|
{"role": "user", "content": instruction + ": " + input}, |
|
{"role": "assistant", "content": output}, |
|
] |
|
text = tokenizer.apply_chat_template( |
|
message, tokenize=False, add_generation_prompt=False) |
|
texts.append(text) |
|
return {"text": texts, } |
|
|
|
|
|
train_ds = datasets.Dataset.from_pandas(train_df) |
|
test_ds = datasets.Dataset.from_pandas(test_df) |
|
|
|
|
|
train_ds = train_ds.map(formatting_prompts_func, fn_kwargs={"tokenizer": tokenizer}, batched=True,) |
|
test_ds = test_ds.map(formatting_prompts_func, fn_kwargs={"tokenizer": tokenizer}, batched=True,) |
|
|
|
print(train_ds[0]['text']) |
|
|
|
|
|
from trl import SFTTrainer |
|
from transformers import TrainingArguments |
|
|
|
|
|
trainer = SFTTrainer( |
|
model=model, |
|
tokenizer=tokenizer, |
|
train_dataset=train_ds, |
|
eval_dataset=test_ds, |
|
dataset_text_field="text", |
|
max_seq_length=max_seq_length, |
|
dataset_num_proc=10, |
|
packing=False, |
|
args=TrainingArguments( |
|
per_device_train_batch_size=8, |
|
per_device_eval_batch_size=8, |
|
gradient_accumulation_steps=4, |
|
warmup_steps=5, |
|
num_train_epochs=2, |
|
learning_rate=2e-4, |
|
fp16=not torch.cuda.is_bf16_supported(), |
|
bf16=torch.cuda.is_bf16_supported(), |
|
logging_steps=1, |
|
save_steps=100, |
|
save_total_limit=4, |
|
evaluation_strategy="steps", |
|
eval_steps=100, |
|
optim="adamw_8bit", |
|
weight_decay=0.01, |
|
lr_scheduler_type="linear", |
|
seed=3407, |
|
output_dir="outputs", |
|
load_best_model_at_end=True, |
|
save_strategy="steps", |
|
), |
|
) |
|
|
|
|
|
gpu_stats = torch.cuda.get_device_properties(0) |
|
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3) |
|
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3) |
|
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.") |
|
print(f"{start_gpu_memory} GB of memory reserved.") |
|
|
|
|
|
trainer_stats = trainer.train() |
|
|
|
|
|
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3) |
|
used_memory_for_lora = round(used_memory - start_gpu_memory, 3) |
|
used_percentage = round(used_memory / max_memory * 100, 3) |
|
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3) |
|
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.") |
|
print(f"{round(trainer_stats.metrics['train_runtime'] / 60, 2)} minutes used for training.") |
|
print(f"Peak reserved memory = {used_memory} GB.") |
|
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.") |
|
print(f"Peak reserved memory % of max memory = {used_percentage} %.") |
|
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.") |
|
|
|
|
|
print("Saving model to local") |
|
model.save_pretrained("coedit-tinyllama-chat-bnb-4bit") |
|
tokenizer.save_pretrained("coedit-tinyllama-chat-bnb-4bit") |
|
|
|
|
|
|