File size: 2,495 Bytes
262245a
 
 
4e9d2b3
262245a
4e9d2b3
262245a
 
 
 
7066f4b
1a653ea
4e9d2b3
 
1a653ea
 
 
b3e8e33
4e9d2b3
 
b3e8e33
 
 
 
4e9d2b3
 
b3e8e33
 
 
8efdb84
4e9d2b3
 
8efdb84
 
 
 
4e9d2b3
 
8efdb84
 
 
 
4e9d2b3
 
8efdb84
 
 
 
4e9d2b3
 
8efdb84
 
 
4e9d2b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262245a
 
 
4e9d2b3
 
262245a
4e9d2b3
262245a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 100.0,
  "eval_steps": 500,
  "global_step": 7900,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 6.33,
      "learning_rate": 4.683544303797468e-05,
      "loss": 1.1738,
      "step": 500
    },
    {
      "epoch": 12.66,
      "learning_rate": 4.367088607594937e-05,
      "loss": 0.1263,
      "step": 1000
    },
    {
      "epoch": 18.99,
      "learning_rate": 4.050632911392405e-05,
      "loss": 0.0547,
      "step": 1500
    },
    {
      "epoch": 25.32,
      "learning_rate": 3.7341772151898736e-05,
      "loss": 0.0303,
      "step": 2000
    },
    {
      "epoch": 31.65,
      "learning_rate": 3.4177215189873416e-05,
      "loss": 0.0218,
      "step": 2500
    },
    {
      "epoch": 37.97,
      "learning_rate": 3.10126582278481e-05,
      "loss": 0.0211,
      "step": 3000
    },
    {
      "epoch": 44.3,
      "learning_rate": 2.7848101265822786e-05,
      "loss": 0.0274,
      "step": 3500
    },
    {
      "epoch": 50.63,
      "learning_rate": 2.468354430379747e-05,
      "loss": 0.0399,
      "step": 4000
    },
    {
      "epoch": 56.96,
      "learning_rate": 2.1518987341772153e-05,
      "loss": 0.0601,
      "step": 4500
    },
    {
      "epoch": 63.29,
      "learning_rate": 1.8354430379746836e-05,
      "loss": 0.1468,
      "step": 5000
    },
    {
      "epoch": 69.62,
      "learning_rate": 1.5189873417721521e-05,
      "loss": 0.403,
      "step": 5500
    },
    {
      "epoch": 75.95,
      "learning_rate": 1.2025316455696203e-05,
      "loss": 0.5652,
      "step": 6000
    },
    {
      "epoch": 82.28,
      "learning_rate": 8.860759493670886e-06,
      "loss": 0.6874,
      "step": 6500
    },
    {
      "epoch": 88.61,
      "learning_rate": 5.69620253164557e-06,
      "loss": 0.764,
      "step": 7000
    },
    {
      "epoch": 94.94,
      "learning_rate": 2.531645569620253e-06,
      "loss": 1.0024,
      "step": 7500
    },
    {
      "epoch": 100.0,
      "step": 7900,
      "total_flos": 4.8994912512e+16,
      "train_loss": 0.3752000265483615,
      "train_runtime": 1847.2329,
      "train_samples_per_second": 270.675,
      "train_steps_per_second": 4.277
    }
  ],
  "logging_steps": 500,
  "max_steps": 7900,
  "num_train_epochs": 100,
  "save_steps": 500,
  "total_flos": 4.8994912512e+16,
  "trial_name": null,
  "trial_params": null
}