File size: 2,387 Bytes
9c5668e
 
 
b933dc2
9c5668e
b933dc2
9c5668e
 
 
 
 
b933dc2
7b8ddf5
 
 
9c5668e
 
 
b933dc2
7b8ddf5
 
 
9c5668e
 
 
b933dc2
7b8ddf5
 
 
9c5668e
 
 
b933dc2
7b8ddf5
 
 
9c5668e
 
 
b933dc2
7b8ddf5
 
 
9c5668e
 
 
b933dc2
7b8ddf5
 
 
9c5668e
 
 
b933dc2
7b8ddf5
 
 
9c5668e
 
 
b933dc2
7b8ddf5
 
 
9c5668e
 
 
b933dc2
 
 
7b8ddf5
 
 
 
9c5668e
 
 
b933dc2
9c5668e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b933dc2
 
9c5668e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 4.0,
  "eval_steps": 500,
  "global_step": 2060,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.4854368932038835,
      "grad_norm": 1.1003823280334473,
      "learning_rate": 8.794946550048592e-05,
      "loss": 0.8551,
      "step": 250
    },
    {
      "epoch": 0.970873786407767,
      "grad_norm": 0.7180963158607483,
      "learning_rate": 7.580174927113704e-05,
      "loss": 0.4137,
      "step": 500
    },
    {
      "epoch": 1.4563106796116505,
      "grad_norm": 0.6523966789245605,
      "learning_rate": 6.365403304178815e-05,
      "loss": 0.3735,
      "step": 750
    },
    {
      "epoch": 1.941747572815534,
      "grad_norm": 0.6745087504386902,
      "learning_rate": 5.150631681243926e-05,
      "loss": 0.3583,
      "step": 1000
    },
    {
      "epoch": 2.4271844660194173,
      "grad_norm": 0.6706854104995728,
      "learning_rate": 3.9358600583090386e-05,
      "loss": 0.345,
      "step": 1250
    },
    {
      "epoch": 2.912621359223301,
      "grad_norm": 0.7672198414802551,
      "learning_rate": 2.72108843537415e-05,
      "loss": 0.333,
      "step": 1500
    },
    {
      "epoch": 3.3980582524271843,
      "grad_norm": 0.7942991256713867,
      "learning_rate": 1.5063168124392615e-05,
      "loss": 0.3142,
      "step": 1750
    },
    {
      "epoch": 3.883495145631068,
      "grad_norm": 0.7696407437324524,
      "learning_rate": 2.915451895043732e-06,
      "loss": 0.302,
      "step": 2000
    },
    {
      "epoch": 4.0,
      "step": 2060,
      "total_flos": 1.4445804612483994e+18,
      "train_loss": 0.40855656966422366,
      "train_runtime": 23649.5822,
      "train_samples_per_second": 22.298,
      "train_steps_per_second": 0.087
    }
  ],
  "logging_steps": 250,
  "max_steps": 2060,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": false,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1.4445804612483994e+18,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}