File size: 30,396 Bytes
6f46aeb 5ba66b9 6f46aeb 5ba66b9 6f46aeb 5ba66b9 6f46aeb 5ba66b9 6f46aeb 5ba66b9 6f46aeb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 |
[
{
"challenge": "Integrate Early Stopping Callback to Prevent Over-training",
"solution": "early_stopping = EarlyStoppingCallback(\n early_stopping_patience=3,\n early_stopping_threshold=0.01\n)\ntrainer = SFTTrainer(\n model=model,\n train_dataset=dataset,\n config=SFTConfig(),\n callbacks=[early_stopping]\n)",
"placeholder": "# The current early stopping settings allow training to proceed too long. Adjust to stop training promptly upon divergence:\nearly_stopping = EarlyStoppingCallback(\n early_stopping_patience=10, # Patience value is too high\n early_stopping_threshold=0.1 # Threshold is too loose\n)\ntrainer = SFTTrainer(\n model=model,\n train_dataset=dataset,\n config=SFTConfig(),\n callbacks=[] # Add the early stopping callback here\n)",
"context": "Validation loss starts diverging around step 400 and increases by approximately 0.02 per step for 3 consecutive steps. The early stopping mechanism should be sensitive enough (patience between 2-4 steps and a threshold between 0.005-0.02) to halt training when overfitting begins.",
"assessment_criteria": [
"Ensure the early_stopping_patience is within 2 to 4 steps.",
"Verify that the early_stopping_threshold is between 0.005 and 0.02.",
"Confirm that EarlyStoppingCallback is added to the callbacks list.",
"Make sure EarlyStoppingCallback is correctly imported."
],
"image": "/Users/ben/code/code_assignment_app/images/2.png"
},
{
"challenge": "Set Up a Linear Learning Rate Scheduler Reflecting Gradual Loss Reduction",
"solution": "trainer = SFTTrainer(\n model=model,\n train_dataset=dataset,\n config=SFTConfig(\n learning_rate=3e-4,\n lr_scheduler_type='linear',\n num_train_epochs=3\n )\n)",
"placeholder": "# The current configuration uses an inappropriate scheduler and parameter values. Update to match a linear decay:\ntrainer = SFTTrainer(\n model=model,\n train_dataset=dataset,\n config=SFTConfig(\n learning_rate=1e-4, # Learning rate is too low\n lr_scheduler_type='cosine', # Incorrect scheduler type\n num_train_epochs=5 # Too many epochs\n )\n)",
"context": "The image shows a linear decrease in loss from 0.8 to 0.2 over approximately 3 epochs. The learning rate scheduler should follow a linear decay pattern, so parameters must be adjusted to reflect this behavior.",
"assessment_criteria": [
"Ensure lr_scheduler_type is explicitly set to 'linear'.",
"Verify that learning_rate is within the range of 2e-4 to 4e-4.",
"Confirm that num_train_epochs is set between 2 and 4 to match the convergence pattern."
],
"image": "/Users/ben/code/code_assignment_app/images/3.png"
},
{
"challenge": "Tune TRL Training Arguments for Stable Convergence",
"solution": "trainer = SFTTrainer(\n model=model,\n args=TrainingArguments(\n max_steps=2000,\n learning_rate=5e-5,\n gradient_accumulation_steps=4,\n logging_steps=10\n )\n)",
"placeholder": "trainer = SFTTrainer(\n model=model,\n args=TrainingArguments(\n max_steps=____, # Pick a value between 1000-3000 steps\n learning_rate=____, # Set a learning rate between 1e-5 and 1e-4\n gradient_accumulation_steps=____, # Choose between 1 and 8\n logging_steps=____ # Choose a value between 5 and 50\n )\n)",
"context": "The provided image suggests a smooth and stable convergence over about 2000 steps, with a final loss near 0.1 and logs generated roughly every 10 steps. The training arguments must mirror this stability and reporting frequency.",
"assessment_criteria": [
"Confirm that max_steps is set between 1800 and 2200.",
"Ensure learning_rate lies between 4e-5 and 6e-5.",
"Verify that gradient_accumulation_steps is within 2 to 6.",
"Check that logging_steps is between 8 and 12."
],
"image": "/Users/ben/code/code_assignment_app/images/4.png"
},
{
"challenge": "Optimize PEFT and Enable 4-bit Quantization for Memory-Efficient Training",
"solution": "peft_config = LoraConfig(r=16, lora_alpha=32)\nquant_config = BitsAndBytesConfig(load_in_4bit=True)\n\ntrainer = SFTTrainer(\n model=model,\n peft_config=peft_config,\n quantization_config=quant_config\n)",
"placeholder": "peft_config = LoraConfig(\n r=____, # Select a value between 4 and 32\n lora_alpha=____ # Set to 4 times the chosen r\n)\nquant_config = BitsAndBytesConfig(\n load_in_4bit=____ # Set to True or False\n)\n\ntrainer = SFTTrainer(\n model=model,\n peft_config=____,\n quantization_config=____\n)",
"context": "For a 7B parameter model running on 24GB GPU, efficient training is critical. Adjust the PEFT settings with a LoRA adapter—choose r within 8 and 24 and set lora_alpha to 4 times the chosen r—to ensure low memory usage and effective regularization. Additionally, enable 4-bit quantization to further reduce resource consumption.",
"assessment_criteria": [
"Verify that r is set between 8 and 24.",
"Confirm that lora_alpha is exactly 4 times the r value.",
"Ensure that 4-bit quantization (load_in_4bit) is enabled (set to True).",
"Check that both peft_config and quantization_config are properly passed to the trainer."
]
},
{
"challenge": "Format Multi-turn Chat Conversation for Llama 2 Inference",
"solution": "tokenizer.apply_chat_template(\n conversation=[\n {\"role\": \"user\", \"content\": \"Hello!\"},\n {\"role\": \"assistant\", \"content\": \"Hi there!\"},\n {\"role\": \"user\", \"content\": \"How are you?\"}\n ],\n tokenize=False,\n add_generation_prompt=True\n)",
"placeholder": "tokenizer.apply_chat_template(\n conversation=____, # Provide a list of message dictionaries with 'role' and 'content'\n tokenize=____, # Set to False to return a formatted string\n add_generation_prompt=____ # Set to True to include the generation prompt\n)",
"context": "For proper inference with Llama 2, the conversation must be formatted as a multi-turn dialogue with clearly defined roles. The tokenizer should output a concatenated string (not tokenized) while also including a generation prompt to initiate the response.",
"assessment_criteria": [
"Ensure the conversation is formatted as a list of dictionaries each containing 'role' and 'content'.",
"Check that tokenize is explicitly set to False.",
"Confirm that add_generation_prompt is set to True."
]
},
{
"challenge": "Set Up a LoRA Adapter Configuration for Efficient Model Fine-tuning",
"solution": "config = LoraConfig(\n r=8,\n lora_alpha=32,\n target_modules=[\"q_proj\", \"v_proj\"],\n lora_dropout=0.05,\n bias=\"none\"\n)",
"placeholder": "config = LoraConfig(\n r=____, # Choose rank within 4 to 16\n lora_alpha=____, # Should be 4 times the chosen rank\n target_modules=____, # Specify the attention modules (e.g., ['q_proj', 'v_proj'])\n lora_dropout=____, # Set dropout between 0.01 and 0.1\n bias=____ # Choose from 'none', 'all', or 'lora_only'\n)",
"context": "When fine-tuning a large (7B) model on limited GPU resources, a LoRA adapter helps reduce memory consumption and computational overhead.",
"assessment_criteria": [
"Confirm that r is within the range of 4 to 16.",
"Verify that lora_alpha is exactly 4 times the r value.",
"Ensure that target_modules is set to an appropriate list",
"Check that lora_dropout lies between 0.01 and 0.1."
]
},
{
"challenge": "Combine Multiple LoRA Adapters Using Weighted Sum",
"solution": "merged_model = merge_lora_weights(\n base_model=model,\n adapters=[\n (adapter1, 0.7),\n (adapter2, 0.3)\n ],\n merge_strategy=\"weighted_sum\"\n)",
"placeholder": "merged_model = merge_lora_weights(\n base_model=model,\n adapters=[\n (____, ____), # Add the first adapter and its weight\n (____, ____) # Add the second adapter and its weight\n ],\n merge_strategy=____ # Specify the merge strategy (e.g., 'weighted_sum')\n)",
"context": "For enhanced performance, you may need to merge different LoRA adapters (for example, one tuned for general instruction and another for task-specific nuances). The weighted sum should reflect the relative contribution of each adapter, with the weights summing to 1.0.",
"assessment_criteria": [
"Ensure that the weights for the adapters sum up to 1.0 (or very close, accounting for rounding).",
"Confirm that an appropriate merge_strategy (such as 'weighted_sum') is specified.",
"Verify that the adapters being merged have compatible architectures."
]
},
{
"challenge": "Load Base Causal LM and Integrate a Pre-trained LoRA Adapter for Inference",
"solution": "model = AutoModelForCausalLM.from_pretrained(\n \"base_model\",\n device_map=\"auto\"\n)\nmodel = PeftModel.from_pretrained(\n model,\n \"lora_adapter\",\n adapter_name=\"default\"\n).merge_and_unload()",
"placeholder": "model = AutoModelForCausalLM.from_pretrained(\n ____, # Specify the base model identifier\n device_map=____ # Configure device mapping, e.g., 'auto'\n)\nmodel = PeftModel.from_pretrained(\n ____, # Provide the loaded base model\n ____, # Provide the LoRA adapter path\n adapter_name=____ # Use the correct adapter name\n).____() # Call the method to merge and unload adapter weights (e.g., merge_and_unload)\n",
"context": "For inference, first load the base model with device mapping, then incorporate the LoRA adapter",
"assessment_criteria": [
"Verify correct base model loading with device mapping",
"Ensure correct adapter loading",
"Confirm proper merging for inference"
]
},
{
"challenge": "Configure SFTTrainer Learning Rate",
"solution": "trainer = SFTTrainer(\n model=model,\n train_dataset=dataset,\n config=SFTConfig(\n learning_rate=2e-5\n )\n)",
"placeholder": "trainer = SFTTrainer(\n model=model,\n train_dataset=dataset,\n config=SFTConfig(\n learning_rate=5e-5 # TODO: Lower this value to prevent overfitting (should be < 3e-5)\n )\n)",
"context": "The model is showing signs of overfitting with the current learning rate of 5e-5. A lower learning rate is needed for more stable training.",
"assessment_criteria": [
"Verify that learning_rate is below 3e-5"
]
},
{
"challenge": "Configure LoRA Adapter Rank",
"solution": "config = LoraConfig(\n r=16\n)",
"placeholder": "config = LoraConfig(\n r=4 # TODO: Increase rank for better adaptation (should be between 8-24)\n)",
"context": "The current LoRA rank is too low for effective model adaptation. A higher rank will improve model capacity while keeping memory usage reasonable.",
"assessment_criteria": [
"Verify that r is set between 8 and 24"
]
},
{
"challenge": "Configure SFTTrainer: Set max_steps for training duration",
"solution": "trainer = SFTTrainer(\n model=model,\n train_dataset=dataset,\n config=SFTConfig(\n max_steps=1000\n )\n)",
"placeholder": "trainer = SFTTrainer(\n model=model,\n train_dataset=dataset,\n config=SFTConfig(\n max_steps=____ # Choose between 800-1200 steps\n )\n)",
"context": "Based on the training curves, setting an appropriate number of steps is crucial to avoid overfitting while allowing sufficient training progress.",
"assessment_criteria": [
"Verify that max_steps is set between 800 and 1200 steps."
]
},
{
"challenge": "Refine SFTTrainer: Adjust learning_rate to prevent overfitting",
"solution": "trainer = SFTTrainer(\n model=model,\n train_dataset=dataset,\n config=SFTConfig(\n learning_rate=2e-5\n )\n)",
"placeholder": "sft_config = SFTConfig(\n max_steps=____, \n learning_rate=____, \n weight_decay=____, \n warmup_steps=____\n)",
"context": "A cautious learning rate is essential to prevent overly aggressive updates that can lead to overfitting.",
"assessment_criteria": [
"Verify that learning_rate is below 3e-5."
]
},
{
"challenge": "Refine SFTTrainer: Increase weight_decay for stronger regularization",
"solution": "trainer = SFTTrainer(\n model=model,\n train_dataset=dataset,\n config=SFTConfig(\n weight_decay=0.02\n )\n)",
"placeholder": "sft_config = SFTConfig(\n max_steps=____, \n learning_rate=____, \n weight_decay=____, \n warmup_steps=____\n)",
"context": "Increasing weight decay enhances regularization, which helps mitigate overfitting issues.",
"assessment_criteria": [
"Confirm that weight_decay is increased (greater than 0.01)."
]
},
{
"challenge": "Refine SFTTrainer: Set appropriate warmup_steps relative to max_steps",
"solution": "trainer = SFTTrainer(\n model=model,\n train_dataset=dataset,\n config=SFTConfig(\n warmup_steps=100\n )\n)",
"placeholder": "sft_config = SFTConfig(\n max_steps=____, \n learning_rate=____, \n weight_decay=____, \n warmup_steps=____\n)",
"context": "A sufficient warmup period helps the optimizer gradually adjust and avoids sudden gradient spikes.",
"assessment_criteria": [
"Check that warmup_steps is at least 10% of max_steps."
]
},
{
"challenge": "Integrate Early Stopping: Set Callback Patience Correctly",
"solution": "early_stopping = EarlyStoppingCallback(\n early_stopping_patience=3\n)",
"placeholder": "early_stopping = EarlyStoppingCallback(\n early_stopping_patience=____ # Choose between 2-4 steps\n)",
"context": "An appropriate patience value helps stop training promptly when validation loss begins to increase.",
"assessment_criteria": [
"Confirm that early_stopping_patience is set between 2 and 4 steps."
]
},
{
"challenge": "Integrate Early Stopping: Define Threshold for Early Stopping Trigger",
"solution": "early_stopping = EarlyStoppingCallback(\n early_stopping_threshold=0.01\n)",
"placeholder": "early_stopping = EarlyStoppingCallback(\n early_stopping_threshold=____\n)",
"context": "The threshold determines how sensitive the early stopping callback is when detecting divergence in validation loss.",
"assessment_criteria": [
"Verify that early_stopping_threshold is between 0.005 and 0.02."
]
},
{
"challenge": "Configure Linear LR Scheduler: Set Correct Learning Rate",
"solution": "trainer = SFTTrainer(\n model=model,\n train_dataset=dataset,\n config=SFTConfig(\n learning_rate=3e-4\n )\n)",
"placeholder": "sft_config = SFTConfig(\n max_steps=____, \n learning_rate=____, \n weight_decay=____, \n warmup_steps=____\n)",
"context": "A proper learning rate within the recommended range ensures a smooth linear decay as observed in training curves.",
"assessment_criteria": [
"Verify that learning_rate is within the range of 2e-4 to 4e-4."
],
"image": "/Users/ben/code/code_assignment_app/images/3.png"
},
{
"challenge": "Configure Linear LR Scheduler: Set Proper Scheduler Type",
"solution": "trainer = SFTTrainer(\n model=model,\n train_dataset=dataset,\n config=SFTConfig(\n lr_scheduler_type='linear'\n )\n)",
"placeholder": "sft_config = SFTConfig(\n max_steps=____, \n learning_rate=____, \n weight_decay=____, \n warmup_steps=____\n)",
"context": "Specifying a 'linear' scheduler type ensures that the learning rate decays uniformly.",
"assessment_criteria": [
"Ensure lr_scheduler_type is explicitly set to 'linear'."
],
"image": "/Users/ben/code/code_assignment_app/images/3.png"
},
{
"challenge": "Configure Linear LR Scheduler: Adjust Number of Training Epochs",
"solution": "trainer = SFTTrainer(\n model=model,\n train_dataset=dataset,\n config=SFTConfig(\n num_train_epochs=3\n )\n)",
"placeholder": "sft_config = SFTConfig(\n max_steps=____, \n learning_rate=____, \n weight_decay=____, \n warmup_steps=____\n)",
"context": "Setting the proper number of epochs helps the model converge in line with the observed linear loss reduction.",
"assessment_criteria": [
"Confirm that num_train_epochs is set between 2 and 4."
],
"image": "/Users/ben/code/code_assignment_app/images/3.png"
},
{
"challenge": "Set TRL Training Args: Choose appropriate max_steps",
"solution": "trainer = SFTTrainer(\n model=model,\n args=TrainingArguments(\n max_steps=2000\n )\n)",
"placeholder": "sft_config = SFTConfig(\n max_steps=____, \n learning_rate=____, \n weight_decay=____, \n warmup_steps=____\n)",
"context": "Choosing an optimal value for max_steps ensures the training process is neither too short nor unnecessarily long.",
"assessment_criteria": [
"Confirm that max_steps is set between 1800 and 2200."
],
"image": "/Users/ben/code/code_assignment_app/images/4.png"
},
{
"challenge": "Set TRL Training Args: Adjust learning_rate for stability",
"solution": "trainer = SFTTrainer(\n model=model,\n args=TrainingArguments(\n learning_rate=5e-5\n )\n)",
"placeholder": "sft_config = SFTConfig(\n max_steps=____, \n learning_rate=____, \n weight_decay=____, \n warmup_steps=____\n)",
"context": "A stable learning rate helps maintain smooth and consistent training progress.",
"assessment_criteria": [
"Ensure learning_rate lies between 4e-5 and 6e-5."
],
"image": "/Users/ben/code/code_assignment_app/images/4.png"
},
{
"challenge": "Set TRL Training Args: Optimize gradient_accumulation_steps",
"solution": "trainer = SFTTrainer(\n model=model,\n args=TrainingArguments(\n gradient_accumulation_steps=4\n )\n)",
"placeholder": "sft_config = SFTConfig(\n max_steps=____, \n learning_rate=____, \n weight_decay=____, \n warmup_steps=____\n)",
"context": "Optimizing gradient accumulation helps smooth updates and is key for training stability.",
"assessment_criteria": [
"Verify that gradient_accumulation_steps is within 2 to 6."
],
"image": "/Users/ben/code/code_assignment_app/images/4.png"
},
{
"challenge": "Set TRL Training Args: Define proper logging_steps frequency",
"solution": "trainer = SFTTrainer(\n model=model,\n args=SFTConfig(\n logging_steps=10\n )\n)",
"placeholder": "trainer = SFTTrainer(\n model=model,\n args=SFTConfig(\n logging_steps=____ # Choose between 8 and 12\n )\n)",
"context": "Logging at the correct frequency provides clear insights into training without excessive output.",
"assessment_criteria": [
"Check that logging_steps is between 8 and 12."
],
"image": "/Users/ben/code/code_assignment_app/images/4.png"
},
{
"challenge": "Optimize PEFT: Select appropriate LoRA rank (r)",
"solution": "peft_config = LoraConfig(\n r=16, \n lora_alpha=32, \n target_modules=[\"q_proj\", \"v_proj\"]\n)",
"placeholder": "peft_config = LoraConfig(\n r=____ # Choose r value\n ora_alpha=32, \n target_modules=[\"q_proj\", \"v_proj\"]\n)",
"context": "The LoRA rank (r) directly affects model complexity and resource usage, so it should fall within an optimal range.",
"assessment_criteria": [
"Verify that r is set between 8 and 24."
]
},
{
"challenge": "Optimize PEFT: Choose correct lora_alpha based on r",
"solution": "peft_config = LoraConfig(\n r=16, \n lora_alpha=32, \n target_modules=[\"q_proj\", \"v_proj\"]\n)",
"placeholder": "peft_config = LoraConfig(\n r=____, # Choose r value\n lora_alpha=____, # Should be 4 times the chosen r (e.g., if r=8, then lora_alpha=32)\n target_modules=[\"q_proj\", \"v_proj\"]\n)",
"context": "Setting lora_alpha proportionally (4× the rank, r) ensures balanced adaptive scaling as recommended in TRL examples.",
"assessment_criteria": [
"Confirm that lora_alpha is exactly 4 times the r value."
]
},
{
"challenge": "Enable 4-bit Quantization for Efficient Training",
"solution": "quant_config = BitsAndBytesConfig(\n load_in_4bit=True\n)",
"placeholder": "quant_config = BitsAndBytesConfig(\n load_in_4bit=____\n)",
"context": "4-bit quantization significantly reduces memory requirements while maintaining acceptable performance.",
"assessment_criteria": [
"Ensure that load_in_4bit is set to True."
]
},
{
"challenge": "Format Chat Conversation: Provide proper conversation list",
"solution": "tokenizer.apply_chat_template(\n conversation=[\n {\"role\": \"user\", \"content\": \"Hello!\"},\n {\"role\": \"assistant\", \"content\": \"Hi there!\"}\n ]\n)",
"placeholder": "tokenizer.apply_chat_template(\n conversation=____ # Provide a list of dictionaries with 'role' and 'content'\n)",
"context": "A correctly formatted conversation list is essential to initiate multi-turn chat inference.",
"assessment_criteria": [
"Ensure the conversation is formatted as a list of dictionaries with 'role' and 'content'."
]
},
{
"challenge": "Format Chat Conversation: Set tokenize option appropriately",
"solution": "tokenizer.apply_chat_template(\n tokenize=False\n)",
"placeholder": "tokenizer.apply_chat_template(_____)",
"context": "Setting tokenize to False makes sure that the output remains a fully formatted string.",
"assessment_criteria": [
"Check that tokenize is explicitly set to False."
]
},
{
"challenge": "Format Chat Conversation: Enable Generation Prompt",
"solution": "tokenizer.apply_chat_template(\n add_generation_prompt=True\n)",
"placeholder": "tokenizer.apply_chat_template(_____)",
"context": "Enabling the generation prompt helps trigger the model's response generation effectively.",
"assessment_criteria": [
"Confirm that add_generation_prompt is set to True."
]
},
{
"challenge": "Configure LoRA Adapter: Set rank parameter for efficient adaptation",
"solution": "config = LoraConfig(\n r=8\n)",
"placeholder": "config = LoraConfig(\n r=____, # Choose r value\n lora_alpha=16,\n)",
"context": "Choosing a proper rank for the LoRA adapter is key for efficient fine-tuning with limited resources.",
"assessment_criteria": [
"Confirm that r is within the range of 4 to 16."
]
},
{
"challenge": "Configure LoRA Adapter: Set lora_alpha as 4 times r",
"solution": "config = LoraConfig(\n lora_alpha=32\n)",
"placeholder": "config = LoraConfig(\n lora_alpha=____, # Should be 4 times the chosen r\n r=4\n)",
"context": "Maintaining the ratio between lora_alpha and r is important for balanced adapter scaling.",
"assessment_criteria": [
"Verify that lora_alpha is exactly 4 times the r value."
]
},
{
"challenge": "Configure LoRA Adapter: Specify target attention modules",
"solution": "config = LoraConfig(\n target_modules=[\"q_proj\", \"v_proj\"]\n)",
"placeholder": "config = LoraConfig(\n target_modules=____, # Specify a list of attention modules\n r=4\n)",
"context": "Identifying and targeting the relevant attention modules helps focus the adapter's adjustments.",
"assessment_criteria": [
"Ensure that target_modules is set to an appropriate list (e.g., ['q_proj', 'v_proj'])."
]
},
{
"challenge": "Configure LoRA Adapter: Define dropout rate",
"solution": "config = LoraConfig(\n lora_dropout=0.05\n)",
"placeholder": "config = LoraConfig(\n lora_dropout=____, # Set value between 0.01 and 0.1\n r=4\n)",
"context": "An optimal dropout rate helps prevent overfitting during fine-tuning.",
"assessment_criteria": [
"Check that lora_dropout is between 0.01 and 0.1."
]
},
{
"challenge": "Combine LoRA Adapters: Verify adapter weight sum",
"solution": "merged_model = merge_lora_weights(\n base_model=model,\n adapters=[(adapter1, 0.7), (adapter2, 0.3)],\n merge_strategy=\"weighted_sum\"\n)",
"placeholder": "merged_model = merge_lora_weights(\n base_model=model,\n adapters=[(adapter1, 0.7), (adapter2, 0.3)],\n merge_strategy=____\n)",
"context": "For a balanced merge of multiple adapters, their weights must sum to 1.0 (or very close, accounting for rounding).",
"assessment_criteria": [
"Ensure that the weights for the adapters sum up to 1.0 (or very close, accounting for rounding)."
]
},
{
"challenge": "Combine LoRA Adapters: Specify a valid merge strategy",
"solution": "merged_model = merge_lora_weights(\n base_model=model,\n adapters=[(adapter1, 0.7), (adapter2, 0.3)],\n merge_strategy=\"weighted_sum\"\n)",
"placeholder": "merged_model = merge_lora_weights(\n base_model=model,\n adapters=[(adapter1, 0.7), (adapter2, 0.3)],\n merge_strategy=____\n)",
"context": "A valid merge strategy must be specified to correctly combine the contributions of each adapter.",
"assessment_criteria": [
"Confirm that an appropriate merge_strategy is specified (e.g., 'weighted_sum')."
]
},
{
"challenge": "Load Base Model: Provide correct model identifier and device mapping",
"solution": "model = AutoModelForCausalLM.from_pretrained(\n \"base_model\",\n device_map=\"auto\"\n)",
"placeholder": "model = AutoModelForCausalLM.from_pretrained(____)\npeft_model = PeftModel.from_pretrained(____, ____)\n# Merge weights\nmodel = peft_model.____",
"context": "The base model must be loaded correctly with its device mapping before integrating adapters.",
"assessment_criteria": [
"Verify that the base model is loaded correctly with the proper device mapping."
]
},
{
"challenge": "Load Pre-trained LoRA Adapter: Use correct adapter identifier",
"solution": "model = PeftModel.from_pretrained(\n model,\n \"lora_adapter\",\n adapter_name=\"default\"\n)\n # Merge LoRA weights into base model\nmodel = peft_model.merge_and_unload()",
"placeholder": "Ensure to provide the correct adapter identifier",
"context": "model = PeftModel.from_pretrained(model, \"lora_adapter\")\n# Merge LoRA weights into base model",
"assessment_criteria": [
"Ensure that the correct adapter identifier is used to load the LoRA adapter."
]
},
{
"challenge": "Merge LoRA Adapter: Successfully merge and unload adapter weights",
"solution": "model = PeftModel.from_pretrained(\n model,\n \"lora_adapter\",\n adapter_name=\"default\"\n).merge_and_unload()",
"placeholder": "model = PeftModel.from_pretrained(____, ____)\n# Merge weights\nmodel = peft_model.____",
"context": "Merging and unloading the adapter weights prepares the model for efficient inference.",
"assessment_criteria": [
"Confirm that the adapter is merged with the base model and unloaded appropriately to optimize inference performance."
]
},
{
"challenge": "Merge a LoRA adapter into the base model for inference",
"solution": "model = AutoModelForCausalLM.from_pretrained(\"base_model\")\npeft_model = PeftModel.from_pretrained(model, \"lora_adapter\")\n# Merge LoRA weights into base model\nmodel = peft_model.merge_and_unload()",
"placeholder": "model = AutoModelForCausalLM.from_pretrained(____)\npeft_model = PeftModel.from_pretrained(____, ____)\n# Merge weights\nmodel = peft_model.____",
"context": "You need to merge a trained LoRA adapter back into the base model for efficient inference",
"assessment_criteria": [
"Is base model loaded correctly?",
"Is LoRA adapter loaded with PeftModel?",
"Is merge_and_unload() used to combine weights?"
]
},
{
"challenge": "Configure Training Duration for Fine-tuning",
"solution": "trainer = SFTTrainer(\n model=model,\n train_dataset=dataset,\n num_train_epochs=3,\n max_steps=None\n)",
"placeholder": "trainer = SFTTrainer(\n model=model,\n train_dataset=dataset,\n num_train_epochs=10, \n max_steps=None\n)",
"context": "The model is showing signs of overfitting after epoch 5. Configure the trainer to use fewer epochs (2-4) to prevent this.",
"assessment_criteria": [
"Is num_train_epochs set between 2 and 4?",
"Is max_steps left as None to use epoch-based training?"
]
}
] |