cyberblip's picture
Update config.json
c74a973 verified
{
"architectures": [
"LlamaForCausalLM"
],
"model_type": "llama",
"hidden_size": 4096,
"num_attention_heads": 16,
"num_hidden_layers": 12,
"max_position_embeddings": 2048,
"vocab_size": 50265,
"lora_config": {
"r": 16,
"target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
"lora_alpha": 16,
"lora_dropout": 0,
"bias": "none",
"use_gradient_checkpointing": true,
"use_rslora": false,
"use_dora": false,
"loftq_config": null
},
"training_dataset": {
"name": "cyberblip/Travel_india",
"split": "train",
"input_field": "prompt"
},
"training_config": {
"per_device_train_batch_size": 2,
"gradient_accumulation_steps": 4,
"warmup_steps": 5,
"max_steps": 0,
"num_train_epochs": 1,
"learning_rate": 2e-4,
"fp16": false,
"bf16": true,
"logging_steps": 1,
"optim": "adamw_8bit",
"weight_decay": 0.01,
"lr_scheduler_type": "linear",
"seed": 42,
"output_dir": "outputs"
}
}