|
{ |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"model_type": "llama", |
|
"hidden_size": 4096, |
|
"num_attention_heads": 16, |
|
"num_hidden_layers": 12, |
|
"max_position_embeddings": 2048, |
|
"vocab_size": 50265, |
|
|
|
"lora_config": { |
|
"r": 16, |
|
"target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], |
|
"lora_alpha": 16, |
|
"lora_dropout": 0, |
|
"bias": "none", |
|
"use_gradient_checkpointing": true, |
|
"use_rslora": false, |
|
"use_dora": false, |
|
"loftq_config": null |
|
}, |
|
|
|
"training_dataset": { |
|
"name": "cyberblip/Travel_india", |
|
"split": "train", |
|
"input_field": "prompt" |
|
}, |
|
|
|
"training_config": { |
|
"per_device_train_batch_size": 2, |
|
"gradient_accumulation_steps": 4, |
|
"warmup_steps": 5, |
|
"max_steps": 0, |
|
"num_train_epochs": 1, |
|
"learning_rate": 2e-4, |
|
"fp16": false, |
|
"bf16": true, |
|
"logging_steps": 1, |
|
"optim": "adamw_8bit", |
|
"weight_decay": 0.01, |
|
"lr_scheduler_type": "linear", |
|
"seed": 42, |
|
"output_dir": "outputs" |
|
} |
|
} |