File size: 1,191 Bytes
c74a973
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
{
    "architectures": [
        "LlamaForCausalLM"  
    ],
    "model_type": "llama",
    "hidden_size": 4096,  
    "num_attention_heads": 16,  
    "num_hidden_layers": 12,  
    "max_position_embeddings": 2048,  
    "vocab_size": 50265,  

    "lora_config": {
        "r": 16,
        "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
        "lora_alpha": 16,
        "lora_dropout": 0,
        "bias": "none",
        "use_gradient_checkpointing": true,
        "use_rslora": false,
        "use_dora": false,
        "loftq_config": null
    },

    "training_dataset": {
        "name": "cyberblip/Travel_india",
        "split": "train",
        "input_field": "prompt"
    },

    "training_config": {
        "per_device_train_batch_size": 2,
        "gradient_accumulation_steps": 4,
        "warmup_steps": 5,
        "max_steps": 0,
        "num_train_epochs": 1,
        "learning_rate": 2e-4,
        "fp16": false,
        "bf16": true,
        "logging_steps": 1,
        "optim": "adamw_8bit",
        "weight_decay": 0.01,
        "lr_scheduler_type": "linear",
        "seed": 42,
        "output_dir": "outputs"
    }
}