File size: 2,496 Bytes
262245a
 
 
187dcb0
262245a
187dcb0
262245a
 
 
 
7066f4b
07fcd55
187dcb0
 
07fcd55
 
 
745c8c6
187dcb0
 
745c8c6
 
 
 
187dcb0
 
745c8c6
 
 
13dfecf
187dcb0
 
13dfecf
 
 
 
187dcb0
 
13dfecf
 
 
 
187dcb0
 
13dfecf
 
 
 
187dcb0
 
13dfecf
 
 
187dcb0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262245a
 
 
187dcb0
 
262245a
187dcb0
262245a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 100.0,
  "eval_steps": 500,
  "global_step": 7900,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 6.33,
      "learning_rate": 4.683544303797468e-05,
      "loss": 0.8991,
      "step": 500
    },
    {
      "epoch": 12.66,
      "learning_rate": 4.367088607594937e-05,
      "loss": 0.1014,
      "step": 1000
    },
    {
      "epoch": 18.99,
      "learning_rate": 4.050632911392405e-05,
      "loss": 0.0476,
      "step": 1500
    },
    {
      "epoch": 25.32,
      "learning_rate": 3.7341772151898736e-05,
      "loss": 0.0275,
      "step": 2000
    },
    {
      "epoch": 31.65,
      "learning_rate": 3.4177215189873416e-05,
      "loss": 0.0243,
      "step": 2500
    },
    {
      "epoch": 37.97,
      "learning_rate": 3.10126582278481e-05,
      "loss": 0.0165,
      "step": 3000
    },
    {
      "epoch": 44.3,
      "learning_rate": 2.7848101265822786e-05,
      "loss": 0.0252,
      "step": 3500
    },
    {
      "epoch": 50.63,
      "learning_rate": 2.468354430379747e-05,
      "loss": 0.034,
      "step": 4000
    },
    {
      "epoch": 56.96,
      "learning_rate": 2.1518987341772153e-05,
      "loss": 0.0469,
      "step": 4500
    },
    {
      "epoch": 63.29,
      "learning_rate": 1.8354430379746836e-05,
      "loss": 0.091,
      "step": 5000
    },
    {
      "epoch": 69.62,
      "learning_rate": 1.5189873417721521e-05,
      "loss": 0.2735,
      "step": 5500
    },
    {
      "epoch": 75.95,
      "learning_rate": 1.2025316455696203e-05,
      "loss": 0.4183,
      "step": 6000
    },
    {
      "epoch": 82.28,
      "learning_rate": 8.860759493670886e-06,
      "loss": 0.5516,
      "step": 6500
    },
    {
      "epoch": 88.61,
      "learning_rate": 5.69620253164557e-06,
      "loss": 0.6946,
      "step": 7000
    },
    {
      "epoch": 94.94,
      "learning_rate": 2.531645569620253e-06,
      "loss": 1.0278,
      "step": 7500
    },
    {
      "epoch": 100.0,
      "step": 7900,
      "total_flos": 9.7986283776e+16,
      "train_loss": 0.33136190450644193,
      "train_runtime": 3703.1966,
      "train_samples_per_second": 135.018,
      "train_steps_per_second": 2.133
    }
  ],
  "logging_steps": 500,
  "max_steps": 7900,
  "num_train_epochs": 100,
  "save_steps": 500,
  "total_flos": 9.7986283776e+16,
  "trial_name": null,
  "trial_params": null
}