diff --git "a/run-2024-07-08T02:33:22+00:00.log" "b/run-2024-07-08T02:33:22+00:00.log" --- "a/run-2024-07-08T02:33:22+00:00.log" +++ "b/run-2024-07-08T02:33:22+00:00.log" @@ -1214,4 +1214,1169 @@ Non-default generation parameters: {'max_length': 200, 'early_stopping': True, ' 6%|▋ | 23380/371472 [1:53:09<29:15:42, 3.30it/s] 6%|▋ | 23381/371472 [1:53:09<32:41:49, 2.96it/s] 6%|▋ | 23382/371472 [1:53:10<30:34:24, 3.16it/s] 6%|▋ | 23383/371472 [1:53:10<31:09:47, 3.10it/s] 6%|▋ | 23384/371472 [1:53:10<30:38:05, 3.16it/s] 6%|▋ | 23385/371472 [1:53:11<29:02:15, 3.33it/s] 6%|▋ | 23386/371472 [1:53:11<27:54:40, 3.46it/s] 6%|▋ | 23387/371472 [1:53:11<28:09:12, 3.43it/s] 6%|▋ | 23388/371472 [1:53:11<27:53:41, 3.47it/s] 6%|▋ | 23389/371472 [1:53:12<29:16:41, 3.30it/s] 6%|▋ | 23390/371472 [1:53:12<28:15:19, 3.42it/s] 6%|▋ | 23391/371472 [1:53:12<27:01:57, 3.58it/s] 6%|▋ | 23392/371472 [1:53:13<27:00:23, 3.58it/s] 6%|▋ | 23393/371472 [1:53:13<27:02:57, 3.57it/s] 6%|▋ | 23394/371472 [1:53:13<28:07:39, 3.44it/s] 6%|▋ | 23395/371472 [1:53:14<27:53:53, 3.47it/s] 6%|▋ | 23396/371472 [1:53:14<28:05:22, 3.44it/s] 6%|▋ | 23397/371472 [1:53:14<27:19:21, 3.54it/s] 6%|▋ | 23398/371472 [1:53:14<27:08:47, 3.56it/s] 6%|▋ | 23399/371472 [1:53:15<26:24:13, 3.66it/s] 6%|▋ | 23400/371472 [1:53:15<26:29:18, 3.65it/s] {'loss': 4.7279, 'learning_rate': 9.437609084444828e-07, 'epoch': 1.01} 6%|▋ | 23400/371472 [1:53:15<26:29:18, 3.65it/s] 6%|▋ | 23401/371472 [1:53:15<27:17:58, 3.54it/s] 6%|▋ | 23402/371472 [1:53:15<27:31:37, 3.51it/s] 6%|▋ | 23403/371472 [1:53:16<27:20:08, 3.54it/s] 6%|▋ | 23404/371472 [1:53:16<27:49:40, 3.47it/s] 6%|▋ | 23405/371472 [1:53:16<28:18:07, 3.42it/s] 6%|▋ | 23406/371472 [1:53:17<27:16:46, 3.54it/s] 6%|▋ | 23407/371472 [1:53:17<26:25:04, 3.66it/s] 6%|▋ | 23408/371472 [1:53:17<27:54:43, 3.46it/s] 6%|▋ | 23409/371472 [1:53:17<27:59:09, 3.45it/s] 6%|▋ | 23410/371472 [1:53:18<28:28:27, 3.40it/s] 6%|▋ | 23411/371472 [1:53:18<28:42:40, 3.37it/s] 6%|▋ | 23412/371472 [1:53:18<27:54:18, 3.46it/s] 6%|▋ | 23413/371472 [1:53:19<27:06:40, 3.57it/s] 6%|▋ | 23414/371472 [1:53:19<27:30:07, 3.52it/s] 6%|▋ | 23415/371472 [1:53:19<28:10:37, 3.43it/s] 6%|▋ | 23416/371472 [1:53:19<26:55:47, 3.59it/s] 6%|▋ | 23417/371472 [1:53:20<27:01:57, 3.58it/s] 6%|▋ | 23418/371472 [1:53:20<29:13:42, 3.31it/s] 6%|▋ | 23419/371472 [1:53:20<30:27:03, 3.17it/s] 6%|▋ | 23420/371472 [1:53:21<29:19:02, 3.30it/s] {'loss': 4.4206, 'learning_rate': 9.437124264690038e-07, 'epoch': 1.01} 6%|▋ | 23420/371472 [1:53:21<29:19:02, 3.30it/s] 6%|▋ | 23421/371472 [1:53:21<30:00:23, 3.22it/s] 6%|▋ | 23422/371472 [1:53:21<30:05:17, 3.21it/s] 6%|▋ | 23423/371472 [1:53:22<29:46:30, 3.25it/s] 6%|▋ | 23424/371472 [1:53:22<29:36:49, 3.26it/s] 6%|▋ | 23425/371472 [1:53:22<29:32:13, 3.27it/s] 6%|▋ | 23426/371472 [1:53:23<28:19:29, 3.41it/s] 6%|▋ | 23427/371472 [1:53:23<28:00:07, 3.45it/s] 6%|▋ | 23428/371472 [1:53:23<28:01:42, 3.45it/s] 6%|▋ | 23429/371472 [1:53:23<28:22:30, 3.41it/s] 6%|▋ | 23430/371472 [1:53:24<27:35:17, 3.50it/s] 6%|▋ | 23431/371472 [1:53:24<27:12:37, 3.55it/s] 6%|▋ | 23432/371472 [1:53:24<27:25:57, 3.52it/s] 6%|▋ | 23433/371472 [1:53:25<27:36:37, 3.50it/s] 6%|▋ | 23434/371472 [1:53:25<26:39:18, 3.63it/s] 6%|▋ | 23435/371472 [1:53:25<26:31:42, 3.64it/s] 6%|▋ | 23436/371472 [1:53:25<28:16:11, 3.42it/s] 6%|▋ | 23437/371472 [1:53:26<27:08:44, 3.56it/s] 6%|▋ | 23438/371472 [1:53:26<27:14:18, 3.55it/s] 6%|▋ | 23439/371472 [1:53:26<26:49:21, 3.60it/s] 6%|▋ | 23440/371472 [1:53:26<26:16:50, 3.68it/s] {'loss': 4.5451, 'learning_rate': 9.436639444935248e-07, 'epoch': 1.01} - 6%|▋ | 23440/371472 [1:53:26<26:16:50, 3.68it/s] 6%|▋ | 23441/371472 [1:53:27<26:08:53, 3.70it/s] 6%|▋ | 23442/371472 [1:53:27<26:16:43, 3.68it/s] \ No newline at end of file + 6%|▋ | 23440/371472 [1:53:26<26:16:50, 3.68it/s] 6%|▋ | 23441/371472 [1:53:27<26:08:53, 3.70it/s] 6%|▋ | 23442/371472 [1:53:27<26:16:43, 3.68it/s] 6%|▋ | 23443/371472 [1:53:27<27:40:09, 3.49it/s] 6%|▋ | 23444/371472 [1:53:28<27:34:35, 3.51it/s] 6%|▋ | 23445/371472 [1:53:28<28:05:10, 3.44it/s] 6%|▋ | 23446/371472 [1:53:28<27:36:38, 3.50it/s] 6%|▋ | 23447/371472 [1:53:28<26:45:53, 3.61it/s] 6%|▋ | 23448/371472 [1:53:29<26:11:21, 3.69it/s] 6%|▋ | 23449/371472 [1:53:29<26:32:25, 3.64it/s] 6%|▋ | 23450/371472 [1:53:29<26:27:19, 3.65it/s] 6%|▋ | 23451/371472 [1:53:30<27:08:38, 3.56it/s] 6%|▋ | 23452/371472 [1:53:30<28:55:59, 3.34it/s] 6%|▋ | 23453/371472 [1:53:30<27:36:25, 3.50it/s] 6%|▋ | 23454/371472 [1:53:30<27:03:22, 3.57it/s] 6%|▋ | 23455/371472 [1:53:31<27:45:38, 3.48it/s] 6%|▋ | 23456/371472 [1:53:31<30:12:49, 3.20it/s] 6%|▋ | 23457/371472 [1:53:31<30:34:35, 3.16it/s] 6%|▋ | 23458/371472 [1:53:32<29:33:20, 3.27it/s] 6%|▋ | 23459/371472 [1:53:32<28:06:21, 3.44it/s] 6%|▋ | 23460/371472 [1:53:32<27:43:52, 3.49it/s] {'loss': 4.6053, 'learning_rate': 9.43615462518046e-07, 'epoch': 1.01} + 6%|▋ | 23460/371472 [1:53:32<27:43:52, 3.49it/s] 6%|▋ | 23461/371472 [1:53:32<27:01:09, 3.58it/s] 6%|▋ | 23462/371472 [1:53:33<27:06:12, 3.57it/s] 6%|▋ | 23463/371472 [1:53:33<26:32:26, 3.64it/s] 6%|▋ | 23464/371472 [1:53:33<26:44:35, 3.61it/s] 6%|▋ | 23465/371472 [1:53:34<26:26:19, 3.66it/s] 6%|▋ | 23466/371472 [1:53:34<27:43:52, 3.49it/s] 6%|▋ | 23467/371472 [1:53:34<27:05:20, 3.57it/s] 6%|▋ | 23468/371472 [1:53:34<27:11:49, 3.55it/s] 6%|▋ | 23469/371472 [1:53:35<26:59:59, 3.58it/s] 6%|▋ | 23470/371472 [1:53:35<27:26:04, 3.52it/s] 6%|▋ | 23471/371472 [1:53:35<27:37:44, 3.50it/s] 6%|▋ | 23472/371472 [1:53:36<27:12:57, 3.55it/s] 6%|▋ | 23473/371472 [1:53:36<26:44:46, 3.61it/s] 6%|▋ | 23474/371472 [1:53:36<26:42:49, 3.62it/s] 6%|▋ | 23475/371472 [1:53:36<28:53:37, 3.35it/s] 6%|▋ | 23476/371472 [1:53:37<28:29:41, 3.39it/s] 6%|▋ | 23477/371472 [1:53:37<27:25:54, 3.52it/s] 6%|▋ | 23478/371472 [1:53:37<27:51:43, 3.47it/s] 6%|▋ | 23479/371472 [1:53:38<27:54:29, 3.46it/s] 6%|▋ | 23480/371472 [1:53:38<27:37:33, 3.50it/s] {'loss': 4.6269, 'learning_rate': 9.435669805425671e-07, 'epoch': 1.01} + 6%|▋ | 23480/371472 [1:53:38<27:37:33, 3.50it/s] 6%|▋ | 23481/371472 [1:53:38<27:50:44, 3.47it/s] 6%|▋ | 23482/371472 [1:53:38<26:53:09, 3.60it/s] 6%|▋ | 23483/371472 [1:53:39<29:05:54, 3.32it/s] 6%|▋ | 23484/371472 [1:53:39<28:08:45, 3.43it/s] 6%|▋ | 23485/371472 [1:53:39<27:39:07, 3.50it/s] 6%|▋ | 23486/371472 [1:53:40<27:56:17, 3.46it/s] 6%|▋ | 23487/371472 [1:53:40<28:11:33, 3.43it/s] 6%|▋ | 23488/371472 [1:53:40<27:54:22, 3.46it/s] 6%|▋ | 23489/371472 [1:53:40<27:52:47, 3.47it/s] 6%|▋ | 23490/371472 [1:53:41<31:51:29, 3.03it/s] 6%|▋ | 23491/371472 [1:53:41<29:38:38, 3.26it/s] 6%|▋ | 23492/371472 [1:53:41<29:38:18, 3.26it/s] 6%|▋ | 23493/371472 [1:53:42<29:05:13, 3.32it/s] 6%|▋ | 23494/371472 [1:53:42<28:25:52, 3.40it/s] 6%|▋ | 23495/371472 [1:53:42<28:10:08, 3.43it/s] 6%|▋ | 23496/371472 [1:53:43<27:13:57, 3.55it/s] 6%|▋ | 23497/371472 [1:53:43<27:56:57, 3.46it/s] 6%|▋ | 23498/371472 [1:53:43<27:59:32, 3.45it/s] 6%|▋ | 23499/371472 [1:53:43<28:55:57, 3.34it/s] 6%|▋ | 23500/371472 [1:53:44<27:50:25, 3.47it/s] {'loss': 4.559, 'learning_rate': 9.435184985670883e-07, 'epoch': 1.01} + 6%|▋ | 23500/371472 [1:53:44<27:50:25, 3.47it/s] 6%|▋ | 23501/371472 [1:53:44<27:45:32, 3.48it/s] 6%|▋ | 23502/371472 [1:53:44<26:57:38, 3.59it/s] 6%|▋ | 23503/371472 [1:53:45<26:37:10, 3.63it/s] 6%|▋ | 23504/371472 [1:53:45<26:50:17, 3.60it/s] 6%|▋ | 23505/371472 [1:53:45<26:28:44, 3.65it/s] 6%|▋ | 23506/371472 [1:53:45<27:51:49, 3.47it/s] 6%|▋ | 23507/371472 [1:53:46<27:52:25, 3.47it/s] 6%|▋ | 23508/371472 [1:53:46<29:06:37, 3.32it/s] 6%|▋ | 23509/371472 [1:53:46<28:20:48, 3.41it/s] 6%|▋ | 23510/371472 [1:53:47<28:21:27, 3.41it/s] 6%|▋ | 23511/371472 [1:53:47<27:23:34, 3.53it/s] 6%|▋ | 23512/371472 [1:53:47<28:59:48, 3.33it/s] 6%|▋ | 23513/371472 [1:53:48<29:05:53, 3.32it/s] 6%|▋ | 23514/371472 [1:53:48<30:23:53, 3.18it/s] 6%|▋ | 23515/371472 [1:53:48<30:16:32, 3.19it/s] 6%|▋ | 23516/371472 [1:53:49<31:18:28, 3.09it/s] 6%|▋ | 23517/371472 [1:53:49<31:03:48, 3.11it/s] 6%|▋ | 23518/371472 [1:53:49<30:10:39, 3.20it/s] 6%|▋ | 23519/371472 [1:53:49<29:19:25, 3.30it/s] 6%|▋ | 23520/371472 [1:53:50<28:31:56, 3.39it/s] {'loss': 4.4435, 'learning_rate': 9.434700165916093e-07, 'epoch': 1.01} + 6%|▋ | 23520/371472 [1:53:50<28:31:56, 3.39it/s] 6%|▋ | 23521/371472 [1:53:50<27:44:34, 3.48it/s] 6%|▋ | 23522/371472 [1:53:50<28:11:40, 3.43it/s] 6%|▋ | 23523/371472 [1:53:51<29:13:34, 3.31it/s] 6%|▋ | 23524/371472 [1:53:51<30:02:46, 3.22it/s] 6%|▋ | 23525/371472 [1:53:51<31:32:45, 3.06it/s] 6%|▋ | 23526/371472 [1:53:52<30:40:56, 3.15it/s] 6%|▋ | 23527/371472 [1:53:52<29:37:47, 3.26it/s] 6%|▋ | 23528/371472 [1:53:52<29:26:49, 3.28it/s] 6%|▋ | 23529/371472 [1:53:53<32:14:12, 3.00it/s] 6%|▋ | 23530/371472 [1:53:53<33:27:35, 2.89it/s] 6%|▋ | 23531/371472 [1:53:53<32:47:27, 2.95it/s] 6%|▋ | 23532/371472 [1:53:54<30:51:47, 3.13it/s] 6%|▋ | 23533/371472 [1:53:54<30:32:20, 3.16it/s] 6%|▋ | 23534/371472 [1:53:54<29:22:17, 3.29it/s] 6%|▋ | 23535/371472 [1:53:54<28:50:28, 3.35it/s] 6%|▋ | 23536/371472 [1:53:55<30:04:28, 3.21it/s] 6%|▋ | 23537/371472 [1:53:55<29:07:28, 3.32it/s] 6%|▋ | 23538/371472 [1:53:55<28:33:55, 3.38it/s] 6%|▋ | 23539/371472 [1:53:56<28:00:06, 3.45it/s] 6%|▋ | 23540/371472 [1:53:56<29:30:30, 3.28it/s] {'loss': 4.6021, 'learning_rate': 9.434215346161305e-07, 'epoch': 1.01} + 6%|▋ | 23540/371472 [1:53:56<29:30:30, 3.28it/s] 6%|▋ | 23541/371472 [1:53:56<29:01:50, 3.33it/s] 6%|▋ | 23542/371472 [1:53:57<28:57:51, 3.34it/s] 6%|▋ | 23543/371472 [1:53:57<29:37:25, 3.26it/s] 6%|▋ | 23544/371472 [1:53:57<28:01:16, 3.45it/s] 6%|▋ | 23545/371472 [1:53:57<28:22:17, 3.41it/s] 6%|▋ | 23546/371472 [1:53:58<27:43:34, 3.49it/s] 6%|▋ | 23547/371472 [1:53:58<27:11:30, 3.55it/s] 6%|▋ | 23548/371472 [1:53:58<27:49:12, 3.47it/s] 6%|▋ | 23549/371472 [1:53:58<27:03:01, 3.57it/s] 6%|▋ | 23550/371472 [1:53:59<32:28:40, 2.98it/s] 6%|▋ | 23551/371472 [1:53:59<32:25:43, 2.98it/s] 6%|▋ | 23552/371472 [1:54:00<30:23:32, 3.18it/s] 6%|▋ | 23553/371472 [1:54:00<31:15:10, 3.09it/s] 6%|▋ | 23554/371472 [1:54:00<32:10:40, 3.00it/s] 6%|▋ | 23555/371472 [1:54:01<30:11:28, 3.20it/s] 6%|▋ | 23556/371472 [1:54:01<29:03:02, 3.33it/s] 6%|▋ | 23557/371472 [1:54:01<31:02:32, 3.11it/s] 6%|▋ | 23558/371472 [1:54:01<30:39:11, 3.15it/s] 6%|▋ | 23559/371472 [1:54:02<29:28:15, 3.28it/s] 6%|▋ | 23560/371472 [1:54:02<28:31:13, 3.39it/s] {'loss': 4.8653, 'learning_rate': 9.433730526406515e-07, 'epoch': 1.01} + 6%|▋ | 23560/371472 [1:54:02<28:31:13, 3.39it/s] 6%|▋ | 23561/371472 [1:54:02<29:50:22, 3.24it/s] 6%|▋ | 23562/371472 [1:54:03<29:17:04, 3.30it/s] 6%|▋ | 23563/371472 [1:54:03<29:21:48, 3.29it/s] 6%|▋ | 23564/371472 [1:54:03<29:17:31, 3.30it/s] 6%|▋ | 23565/371472 [1:54:04<28:50:27, 3.35it/s] 6%|▋ | 23566/371472 [1:54:04<28:27:53, 3.40it/s] 6%|▋ | 23567/371472 [1:54:04<28:48:24, 3.35it/s] 6%|▋ | 23568/371472 [1:54:04<28:06:27, 3.44it/s] 6%|▋ | 23569/371472 [1:54:05<27:43:57, 3.48it/s] 6%|▋ | 23570/371472 [1:54:05<28:51:16, 3.35it/s] 6%|▋ | 23571/371472 [1:54:05<30:33:46, 3.16it/s] 6%|▋ | 23572/371472 [1:54:06<29:21:33, 3.29it/s] 6%|▋ | 23573/371472 [1:54:06<28:25:14, 3.40it/s] 6%|▋ | 23574/371472 [1:54:06<27:38:56, 3.50it/s] 6%|▋ | 23575/371472 [1:54:07<32:18:36, 2.99it/s] 6%|▋ | 23576/371472 [1:54:07<29:57:31, 3.23it/s] 6%|▋ | 23577/371472 [1:54:07<29:21:59, 3.29it/s] 6%|▋ | 23578/371472 [1:54:07<27:53:26, 3.46it/s] 6%|▋ | 23579/371472 [1:54:08<27:39:50, 3.49it/s] 6%|▋ | 23580/371472 [1:54:08<27:35:59, 3.50it/s] {'loss': 4.6137, 'learning_rate': 9.433245706651727e-07, 'epoch': 1.02} + 6%|▋ | 23580/371472 [1:54:08<27:35:59, 3.50it/s] 6%|▋ | 23581/371472 [1:54:08<29:14:00, 3.31it/s] 6%|▋ | 23582/371472 [1:54:09<29:39:10, 3.26it/s] 6%|▋ | 23583/371472 [1:54:09<28:13:48, 3.42it/s] 6%|▋ | 23584/371472 [1:54:09<30:59:07, 3.12it/s] 6%|▋ | 23585/371472 [1:54:10<31:47:16, 3.04it/s] 6%|▋ | 23586/371472 [1:54:10<30:35:22, 3.16it/s] 6%|▋ | 23587/371472 [1:54:10<29:07:13, 3.32it/s] 6%|▋ | 23588/371472 [1:54:11<29:12:01, 3.31it/s] 6%|▋ | 23589/371472 [1:54:11<28:49:37, 3.35it/s] 6%|▋ | 23590/371472 [1:54:11<27:59:32, 3.45it/s] 6%|▋ | 23591/371472 [1:54:11<29:03:13, 3.33it/s] 6%|▋ | 23592/371472 [1:54:12<29:41:38, 3.25it/s] 6%|▋ | 23593/371472 [1:54:12<30:49:51, 3.13it/s] 6%|▋ | 23594/371472 [1:54:12<31:08:26, 3.10it/s] 6%|▋ | 23595/371472 [1:54:13<32:03:05, 3.01it/s] 6%|▋ | 23596/371472 [1:54:13<31:31:30, 3.07it/s] 6%|▋ | 23597/371472 [1:54:13<30:33:24, 3.16it/s] 6%|▋ | 23598/371472 [1:54:14<30:33:20, 3.16it/s] 6%|▋ | 23599/371472 [1:54:14<29:44:29, 3.25it/s] 6%|▋ | 23600/371472 [1:54:14<28:32:36, 3.39it/s] {'loss': 4.4127, 'learning_rate': 9.432760886896937e-07, 'epoch': 1.02} + 6%|▋ | 23600/371472 [1:54:14<28:32:36, 3.39it/s] 6%|▋ | 23601/371472 [1:54:14<27:43:45, 3.48it/s] 6%|▋ | 23602/371472 [1:54:15<27:46:44, 3.48it/s] 6%|▋ | 23603/371472 [1:54:15<27:36:37, 3.50it/s] 6%|▋ | 23604/371472 [1:54:15<26:55:01, 3.59it/s] 6%|▋ | 23605/371472 [1:54:16<28:20:57, 3.41it/s] 6%|▋ | 23606/371472 [1:54:16<28:43:49, 3.36it/s] 6%|▋ | 23607/371472 [1:54:16<27:52:39, 3.47it/s] 6%|▋ | 23608/371472 [1:54:17<28:43:24, 3.36it/s] 6%|▋ | 23609/371472 [1:54:17<29:02:39, 3.33it/s] 6%|▋ | 23610/371472 [1:54:17<29:33:23, 3.27it/s] 6%|▋ | 23611/371472 [1:54:17<29:25:44, 3.28it/s] 6%|▋ | 23612/371472 [1:54:18<28:41:20, 3.37it/s] 6%|▋ | 23613/371472 [1:54:18<28:36:00, 3.38it/s] 6%|▋ | 23614/371472 [1:54:18<29:02:01, 3.33it/s] 6%|▋ | 23615/371472 [1:54:19<28:55:40, 3.34it/s] 6%|▋ | 23616/371472 [1:54:19<28:25:22, 3.40it/s] 6%|▋ | 23617/371472 [1:54:19<27:26:56, 3.52it/s] 6%|▋ | 23618/371472 [1:54:19<27:49:21, 3.47it/s] 6%|▋ | 23619/371472 [1:54:20<28:48:30, 3.35it/s] 6%|▋ | 23620/371472 [1:54:20<28:51:01, 3.35it/s] {'loss': 4.43, 'learning_rate': 9.432276067142149e-07, 'epoch': 1.02} + 6%|▋ | 23620/371472 [1:54:20<28:51:01, 3.35it/s] 6%|▋ | 23621/371472 [1:54:20<27:58:29, 3.45it/s] 6%|▋ | 23622/371472 [1:54:21<27:25:18, 3.52it/s] 6%|▋ | 23623/371472 [1:54:21<26:58:47, 3.58it/s] 6%|▋ | 23624/371472 [1:54:21<26:39:18, 3.62it/s] 6%|▋ | 23625/371472 [1:54:21<26:24:21, 3.66it/s] 6%|▋ | 23626/371472 [1:54:22<28:12:27, 3.43it/s] 6%|▋ | 23627/371472 [1:54:22<27:45:53, 3.48it/s] 6%|▋ | 23628/371472 [1:54:22<28:33:09, 3.38it/s] 6%|▋ | 23629/371472 [1:54:23<28:51:12, 3.35it/s] 6%|▋ | 23630/371472 [1:54:23<29:25:00, 3.28it/s] 6%|▋ | 23631/371472 [1:54:23<29:33:57, 3.27it/s] 6%|▋ | 23632/371472 [1:54:24<28:23:06, 3.40it/s] 6%|▋ | 23633/371472 [1:54:24<28:52:22, 3.35it/s] 6%|▋ | 23634/371472 [1:54:24<28:43:37, 3.36it/s] 6%|▋ | 23635/371472 [1:54:24<27:40:57, 3.49it/s] 6%|▋ | 23636/371472 [1:54:25<27:45:05, 3.48it/s] 6%|▋ | 23637/371472 [1:54:25<28:11:01, 3.43it/s] 6%|▋ | 23638/371472 [1:54:25<28:49:51, 3.35it/s] 6%|▋ | 23639/371472 [1:54:26<30:38:49, 3.15it/s] 6%|▋ | 23640/371472 [1:54:26<34:06:46, 2.83it/s] {'loss': 4.5782, 'learning_rate': 9.43179124738736e-07, 'epoch': 1.02} + 6%|▋ | 23640/371472 [1:54:26<34:06:46, 2.83it/s] 6%|▋ | 23641/371472 [1:54:26<32:42:42, 2.95it/s] 6%|▋ | 23642/371472 [1:54:27<30:36:42, 3.16it/s] 6%|▋ | 23643/371472 [1:54:27<32:30:03, 2.97it/s] 6%|▋ | 23644/371472 [1:54:27<30:33:26, 3.16it/s] 6%|▋ | 23645/371472 [1:54:28<29:05:16, 3.32it/s] 6%|▋ | 23646/371472 [1:54:28<28:14:03, 3.42it/s] 6%|▋ | 23647/371472 [1:54:28<28:21:24, 3.41it/s] 6%|▋ | 23648/371472 [1:54:28<27:51:03, 3.47it/s] 6%|▋ | 23649/371472 [1:54:29<28:09:08, 3.43it/s] 6%|▋ | 23650/371472 [1:54:29<27:54:11, 3.46it/s] 6%|▋ | 23651/371472 [1:54:29<28:20:09, 3.41it/s] 6%|▋ | 23652/371472 [1:54:30<30:06:55, 3.21it/s] 6%|▋ | 23653/371472 [1:54:30<30:32:31, 3.16it/s] 6%|▋ | 23654/371472 [1:54:30<29:26:59, 3.28it/s] 6%|▋ | 23655/371472 [1:54:31<29:36:17, 3.26it/s] 6%|▋ | 23656/371472 [1:54:31<28:50:53, 3.35it/s] 6%|▋ | 23657/371472 [1:54:31<29:23:34, 3.29it/s] 6%|▋ | 23658/371472 [1:54:32<30:18:38, 3.19it/s] 6%|▋ | 23659/371472 [1:54:32<29:53:49, 3.23it/s] 6%|▋ | 23660/371472 [1:54:32<30:27:48, 3.17it/s] {'loss': 4.6049, 'learning_rate': 9.431306427632572e-07, 'epoch': 1.02} + 6%|▋ | 23660/371472 [1:54:32<30:27:48, 3.17it/s] 6%|▋ | 23661/371472 [1:54:33<30:48:02, 3.14it/s] 6%|▋ | 23662/371472 [1:54:33<29:12:55, 3.31it/s] 6%|▋ | 23663/371472 [1:54:33<28:19:09, 3.41it/s] 6%|▋ | 23664/371472 [1:54:33<27:33:07, 3.51it/s] 6%|▋ | 23665/371472 [1:54:34<28:37:56, 3.37it/s] 6%|▋ | 23666/371472 [1:54:34<28:08:49, 3.43it/s] 6%|▋ | 23667/371472 [1:54:34<28:06:49, 3.44it/s] 6%|▋ | 23668/371472 [1:54:34<27:26:55, 3.52it/s] 6%|▋ | 23669/371472 [1:54:35<26:52:52, 3.59it/s] 6%|▋ | 23670/371472 [1:54:35<29:43:14, 3.25it/s] 6%|▋ | 23671/371472 [1:54:35<28:09:37, 3.43it/s] 6%|▋ | 23672/371472 [1:54:36<28:18:56, 3.41it/s] 6%|▋ | 23673/371472 [1:54:36<27:56:37, 3.46it/s] 6%|▋ | 23674/371472 [1:54:36<28:00:15, 3.45it/s] 6%|▋ | 23675/371472 [1:54:37<27:14:18, 3.55it/s] 6%|▋ | 23676/371472 [1:54:37<27:01:31, 3.57it/s] 6%|▋ | 23677/371472 [1:54:37<27:18:24, 3.54it/s] 6%|▋ | 23678/371472 [1:54:37<27:05:08, 3.57it/s] 6%|▋ | 23679/371472 [1:54:38<26:16:37, 3.68it/s] 6%|▋ | 23680/371472 [1:54:38<27:09:00, 3.56it/s] {'loss': 4.6632, 'learning_rate': 9.430821607877781e-07, 'epoch': 1.02} + 6%|▋ | 23680/371472 [1:54:38<27:09:00, 3.56it/s] 6%|▋ | 23681/371472 [1:54:38<27:54:23, 3.46it/s] 6%|▋ | 23682/371472 [1:54:39<28:37:18, 3.38it/s] 6%|▋ | 23683/371472 [1:54:39<28:34:08, 3.38it/s] 6%|▋ | 23684/371472 [1:54:39<28:08:20, 3.43it/s] 6%|▋ | 23685/371472 [1:54:39<27:45:30, 3.48it/s] 6%|▋ | 23686/371472 [1:54:40<27:40:16, 3.49it/s] 6%|▋ | 23687/371472 [1:54:40<29:03:25, 3.32it/s] 6%|▋ | 23688/371472 [1:54:40<29:16:14, 3.30it/s] 6%|▋ | 23689/371472 [1:54:41<31:34:26, 3.06it/s] 6%|▋ | 23690/371472 [1:54:41<31:40:15, 3.05it/s] 6%|▋ | 23691/371472 [1:54:41<30:44:18, 3.14it/s] 6%|▋ | 23692/371472 [1:54:42<29:20:58, 3.29it/s] 6%|▋ | 23693/371472 [1:54:42<28:43:29, 3.36it/s] 6%|▋ | 23694/371472 [1:54:42<28:02:42, 3.44it/s] 6%|▋ | 23695/371472 [1:54:42<29:02:47, 3.33it/s] 6%|▋ | 23696/371472 [1:54:43<29:11:19, 3.31it/s] 6%|▋ | 23697/371472 [1:54:43<28:48:31, 3.35it/s] 6%|▋ | 23698/371472 [1:54:43<30:10:09, 3.20it/s] 6%|▋ | 23699/371472 [1:54:44<28:45:55, 3.36it/s] 6%|▋ | 23700/371472 [1:54:44<29:36:18, 3.26it/s] {'loss': 4.3574, 'learning_rate': 9.430336788122993e-07, 'epoch': 1.02} + 6%|▋ | 23700/371472 [1:54:44<29:36:18, 3.26it/s] 6%|▋ | 23701/371472 [1:54:44<29:45:38, 3.25it/s] 6%|▋ | 23702/371472 [1:54:45<28:23:16, 3.40it/s] 6%|▋ | 23703/371472 [1:54:45<28:45:32, 3.36it/s] 6%|▋ | 23704/371472 [1:54:45<27:31:57, 3.51it/s] 6%|▋ | 23705/371472 [1:54:45<27:50:35, 3.47it/s] 6%|▋ | 23706/371472 [1:54:46<29:02:08, 3.33it/s] 6%|▋ | 23707/371472 [1:54:46<29:50:13, 3.24it/s] 6%|▋ | 23708/371472 [1:54:46<29:44:43, 3.25it/s] 6%|▋ | 23709/371472 [1:54:47<30:47:55, 3.14it/s] 6%|▋ | 23710/371472 [1:54:47<29:52:55, 3.23it/s] 6%|▋ | 23711/371472 [1:54:47<30:48:34, 3.14it/s] 6%|▋ | 23712/371472 [1:54:48<29:45:37, 3.25it/s] 6%|▋ | 23713/371472 [1:54:48<29:31:26, 3.27it/s] 6%|▋ | 23714/371472 [1:54:48<29:29:39, 3.28it/s] 6%|▋ | 23715/371472 [1:54:49<28:19:27, 3.41it/s] 6%|▋ | 23716/371472 [1:54:49<27:51:57, 3.47it/s] 6%|▋ | 23717/371472 [1:54:49<28:19:50, 3.41it/s] 6%|▋ | 23718/371472 [1:54:49<28:31:14, 3.39it/s] 6%|▋ | 23719/371472 [1:54:50<28:31:12, 3.39it/s] 6%|▋ | 23720/371472 [1:54:50<29:23:49, 3.29it/s] {'loss': 4.7483, 'learning_rate': 9.429851968368204e-07, 'epoch': 1.02} + 6%|▋ | 23720/371472 [1:54:50<29:23:49, 3.29it/s] 6%|▋ | 23721/371472 [1:54:50<28:15:37, 3.42it/s] 6%|▋ | 23722/371472 [1:54:51<27:32:44, 3.51it/s] 6%|▋ | 23723/371472 [1:54:51<29:08:05, 3.32it/s] 6%|▋ | 23724/371472 [1:54:51<31:17:07, 3.09it/s] 6%|▋ | 23725/371472 [1:54:52<29:50:14, 3.24it/s] 6%|▋ | 23726/371472 [1:54:52<29:26:00, 3.28it/s] 6%|▋ | 23727/371472 [1:54:52<29:18:00, 3.30it/s] 6%|▋ | 23728/371472 [1:54:52<29:03:55, 3.32it/s] 6%|▋ | 23729/371472 [1:54:53<29:15:50, 3.30it/s] 6%|▋ | 23730/371472 [1:54:53<28:47:33, 3.35it/s] 6%|▋ | 23731/371472 [1:54:53<29:25:44, 3.28it/s] 6%|▋ | 23732/371472 [1:54:54<29:57:47, 3.22it/s] 6%|▋ | 23733/371472 [1:54:54<29:28:49, 3.28it/s] 6%|▋ | 23734/371472 [1:54:54<30:03:48, 3.21it/s] 6%|▋ | 23735/371472 [1:54:55<29:59:57, 3.22it/s] 6%|▋ | 23736/371472 [1:54:55<30:09:03, 3.20it/s] 6%|▋ | 23737/371472 [1:54:55<30:30:13, 3.17it/s] 6%|▋ | 23738/371472 [1:54:56<29:03:43, 3.32it/s] 6%|▋ | 23739/371472 [1:54:56<28:38:33, 3.37it/s] 6%|▋ | 23740/371472 [1:54:56<28:56:58, 3.34it/s] {'loss': 4.4544, 'learning_rate': 9.429367148613415e-07, 'epoch': 1.02} + 6%|▋ | 23740/371472 [1:54:56<28:56:58, 3.34it/s] 6%|▋ | 23741/371472 [1:54:56<27:55:22, 3.46it/s] 6%|▋ | 23742/371472 [1:54:57<27:21:40, 3.53it/s] 6%|▋ | 23743/371472 [1:54:57<28:16:51, 3.42it/s] 6%|▋ | 23744/371472 [1:54:57<27:59:38, 3.45it/s] 6%|▋ | 23745/371472 [1:54:58<27:32:31, 3.51it/s] 6%|▋ | 23746/371472 [1:54:58<29:54:30, 3.23it/s] 6%|▋ | 23747/371472 [1:54:58<28:34:07, 3.38it/s] 6%|▋ | 23748/371472 [1:54:58<27:40:07, 3.49it/s] 6%|▋ | 23749/371472 [1:54:59<27:14:43, 3.55it/s] 6%|▋ | 23750/371472 [1:54:59<27:31:22, 3.51it/s] 6%|▋ | 23751/371472 [1:54:59<27:04:42, 3.57it/s] 6%|▋ | 23752/371472 [1:55:00<26:56:56, 3.58it/s] 6%|▋ | 23753/371472 [1:55:00<27:19:44, 3.53it/s] 6%|▋ | 23754/371472 [1:55:00<26:57:14, 3.58it/s] 6%|▋ | 23755/371472 [1:55:00<27:12:52, 3.55it/s] 6%|▋ | 23756/371472 [1:55:01<28:50:18, 3.35it/s] 6%|▋ | 23757/371472 [1:55:01<29:48:56, 3.24it/s] 6%|▋ | 23758/371472 [1:55:01<29:31:23, 3.27it/s] 6%|▋ | 23759/371472 [1:55:02<28:49:04, 3.35it/s] 6%|▋ | 23760/371472 [1:55:02<27:38:57, 3.49it/s] {'loss': 4.652, 'learning_rate': 9.428882328858626e-07, 'epoch': 1.02} + 6%|▋ | 23760/371472 [1:55:02<27:38:57, 3.49it/s] 6%|▋ | 23761/371472 [1:55:02<27:43:25, 3.48it/s] 6%|▋ | 23762/371472 [1:55:02<28:13:03, 3.42it/s] 6%|▋ | 23763/371472 [1:55:03<28:21:56, 3.41it/s] 6%|▋ | 23764/371472 [1:55:03<28:50:35, 3.35it/s] 6%|▋ | 23765/371472 [1:55:03<28:45:01, 3.36it/s] 6%|▋ | 23766/371472 [1:55:04<28:33:00, 3.38it/s] 6%|▋ | 23767/371472 [1:55:04<27:49:44, 3.47it/s] 6%|▋ | 23768/371472 [1:55:04<29:07:36, 3.32it/s] 6%|▋ | 23769/371472 [1:55:05<29:55:35, 3.23it/s] 6%|▋ | 23770/371472 [1:55:05<29:38:28, 3.26it/s] 6%|▋ | 23771/371472 [1:55:05<32:12:10, 3.00it/s] 6%|▋ | 23772/371472 [1:55:06<30:20:25, 3.18it/s] 6%|▋ | 23773/371472 [1:55:06<29:52:41, 3.23it/s] 6%|▋ | 23774/371472 [1:55:06<29:02:57, 3.32it/s] 6%|▋ | 23775/371472 [1:55:06<28:27:28, 3.39it/s] 6%|▋ | 23776/371472 [1:55:07<30:05:46, 3.21it/s] 6%|▋ | 23777/371472 [1:55:07<29:00:43, 3.33it/s] 6%|▋ | 23778/371472 [1:55:07<28:41:53, 3.37it/s] 6%|▋ | 23779/371472 [1:55:08<28:38:32, 3.37it/s] 6%|▋ | 23780/371472 [1:55:08<30:29:55, 3.17it/s] {'loss': 4.7933, 'learning_rate': 9.428397509103838e-07, 'epoch': 1.02} + 6%|▋ | 23780/371472 [1:55:08<30:29:55, 3.17it/s] 6%|▋ | 23781/371472 [1:55:08<31:47:42, 3.04it/s] 6%|▋ | 23782/371472 [1:55:09<34:21:47, 2.81it/s] 6%|▋ | 23783/371472 [1:55:09<32:30:15, 2.97it/s] 6%|▋ | 23784/371472 [1:55:09<31:22:51, 3.08it/s] 6%|▋ | 23785/371472 [1:55:10<29:46:15, 3.24it/s] 6%|▋ | 23786/371472 [1:55:10<29:10:33, 3.31it/s] 6%|▋ | 23787/371472 [1:55:10<28:47:30, 3.35it/s] 6%|▋ | 23788/371472 [1:55:10<28:45:57, 3.36it/s] 6%|▋ | 23789/371472 [1:55:11<28:04:28, 3.44it/s] 6%|▋ | 23790/371472 [1:55:11<27:21:33, 3.53it/s] 6%|▋ | 23791/371472 [1:55:11<26:39:25, 3.62it/s] 6%|▋ | 23792/371472 [1:55:12<27:20:31, 3.53it/s] 6%|▋ | 23793/371472 [1:55:12<28:15:48, 3.42it/s] 6%|▋ | 23794/371472 [1:55:12<28:01:00, 3.45it/s] 6%|▋ | 23795/371472 [1:55:13<29:11:45, 3.31it/s] 6%|▋ | 23796/371472 [1:55:13<28:55:50, 3.34it/s] 6%|▋ | 23797/371472 [1:55:13<28:47:14, 3.35it/s] 6%|▋ | 23798/371472 [1:55:13<29:08:00, 3.31it/s] 6%|▋ | 23799/371472 [1:55:14<28:20:19, 3.41it/s] 6%|▋ | 23800/371472 [1:55:14<28:40:46, 3.37it/s] {'loss': 4.6546, 'learning_rate': 9.427912689349049e-07, 'epoch': 1.03} + 6%|▋ | 23800/371472 [1:55:14<28:40:46, 3.37it/s] 6%|▋ | 23801/371472 [1:55:14<28:18:01, 3.41it/s] 6%|▋ | 23802/371472 [1:55:15<27:33:28, 3.50it/s] 6%|▋ | 23803/371472 [1:55:15<28:03:20, 3.44it/s] 6%|▋ | 23804/371472 [1:55:15<27:29:21, 3.51it/s] 6%|▋ | 23805/371472 [1:55:15<29:25:06, 3.28it/s] 6%|▋ | 23806/371472 [1:55:16<30:07:27, 3.21it/s] 6%|▋ | 23807/371472 [1:55:16<30:10:35, 3.20it/s] 6%|▋ | 23808/371472 [1:55:16<30:53:07, 3.13it/s] 6%|▋ | 23809/371472 [1:55:17<30:37:46, 3.15it/s] 6%|▋ | 23810/371472 [1:55:17<30:09:01, 3.20it/s] 6%|▋ | 23811/371472 [1:55:17<29:32:24, 3.27it/s] 6%|▋ | 23812/371472 [1:55:18<29:59:17, 3.22it/s] 6%|▋ | 23813/371472 [1:55:18<29:00:47, 3.33it/s] 6%|▋ | 23814/371472 [1:55:18<28:17:07, 3.41it/s] 6%|▋ | 23815/371472 [1:55:19<27:45:49, 3.48it/s] 6%|▋ | 23816/371472 [1:55:19<29:51:27, 3.23it/s] 6%|▋ | 23817/371472 [1:55:19<30:20:17, 3.18it/s] 6%|▋ | 23818/371472 [1:55:20<32:46:03, 2.95it/s] 6%|▋ | 23819/371472 [1:55:20<31:52:33, 3.03it/s] 6%|▋ | 23820/371472 [1:55:20<29:56:13, 3.23it/s] {'loss': 4.5992, 'learning_rate': 9.427427869594258e-07, 'epoch': 1.03} + 6%|▋ | 23820/371472 [1:55:20<29:56:13, 3.23it/s] 6%|▋ | 23821/371472 [1:55:20<30:21:46, 3.18it/s] 6%|▋ | 23822/371472 [1:55:21<29:17:22, 3.30it/s] 6%|▋ | 23823/371472 [1:55:21<28:14:00, 3.42it/s] 6%|▋ | 23824/371472 [1:55:21<27:49:04, 3.47it/s] 6%|▋ | 23825/371472 [1:55:22<26:54:31, 3.59it/s] 6%|▋ | 23826/371472 [1:55:22<27:35:40, 3.50it/s] 6%|▋ | 23827/371472 [1:55:22<27:02:32, 3.57it/s] 6%|▋ | 23828/371472 [1:55:22<28:23:27, 3.40it/s] 6%|▋ | 23829/371472 [1:55:23<29:15:31, 3.30it/s] 6%|▋ | 23830/371472 [1:55:23<31:46:46, 3.04it/s] 6%|▋ | 23831/371472 [1:55:23<31:09:37, 3.10it/s] 6%|▋ | 23832/371472 [1:55:24<31:13:00, 3.09it/s] 6%|▋ | 23833/371472 [1:55:24<31:24:13, 3.07it/s] 6%|▋ | 23834/371472 [1:55:24<29:33:47, 3.27it/s] 6%|▋ | 23835/371472 [1:55:25<32:02:52, 3.01it/s] 6%|▋ | 23836/371472 [1:55:25<31:24:09, 3.08it/s] 6%|▋ | 23837/371472 [1:55:25<30:12:20, 3.20it/s] 6%|▋ | 23838/371472 [1:55:26<30:52:46, 3.13it/s] 6%|▋ | 23839/371472 [1:55:26<29:46:29, 3.24it/s] 6%|▋ | 23840/371472 [1:55:26<28:59:31, 3.33it/s] {'loss': 4.3489, 'learning_rate': 9.42694304983947e-07, 'epoch': 1.03} + 6%|▋ | 23840/371472 [1:55:26<28:59:31, 3.33it/s] 6%|▋ | 23841/371472 [1:55:27<28:13:40, 3.42it/s] 6%|▋ | 23842/371472 [1:55:27<28:40:11, 3.37it/s] 6%|▋ | 23843/371472 [1:55:27<28:00:03, 3.45it/s] 6%|▋ | 23844/371472 [1:55:27<28:30:49, 3.39it/s] 6%|▋ | 23845/371472 [1:55:28<29:38:45, 3.26it/s] 6%|▋ | 23846/371472 [1:55:28<28:09:26, 3.43it/s] 6%|▋ | 23847/371472 [1:55:28<27:39:45, 3.49it/s] 6%|▋ | 23848/371472 [1:55:29<27:40:46, 3.49it/s] 6%|▋ | 23849/371472 [1:55:29<27:42:19, 3.49it/s] 6%|▋ | 23850/371472 [1:55:29<26:48:29, 3.60it/s] 6%|▋ | 23851/371472 [1:55:29<26:48:33, 3.60it/s] 6%|▋ | 23852/371472 [1:55:30<27:41:02, 3.49it/s] 6%|▋ | 23853/371472 [1:55:30<28:57:43, 3.33it/s] 6%|▋ | 23854/371472 [1:55:30<28:07:04, 3.43it/s] 6%|▋ | 23855/371472 [1:55:31<27:04:42, 3.57it/s] 6%|▋ | 23856/371472 [1:55:31<26:40:36, 3.62it/s] 6%|▋ | 23857/371472 [1:55:31<26:00:06, 3.71it/s] 6%|▋ | 23858/371472 [1:55:31<26:21:10, 3.66it/s] 6%|▋ | 23859/371472 [1:55:32<25:35:27, 3.77it/s] 6%|▋ | 23860/371472 [1:55:32<26:23:00, 3.66it/s] {'loss': 4.5493, 'learning_rate': 9.426458230084681e-07, 'epoch': 1.03} + 6%|▋ | 23860/371472 [1:55:32<26:23:00, 3.66it/s] 6%|▋ | 23861/371472 [1:55:32<26:20:44, 3.67it/s] 6%|▋ | 23862/371472 [1:55:32<25:38:04, 3.77it/s] 6%|▋ | 23863/371472 [1:55:33<26:59:41, 3.58it/s] 6%|▋ | 23864/371472 [1:55:33<27:40:54, 3.49it/s] 6%|▋ | 23865/371472 [1:55:33<27:52:53, 3.46it/s] 6%|▋ | 23866/371472 [1:55:34<27:10:22, 3.55it/s] 6%|▋ | 23867/371472 [1:55:34<28:50:33, 3.35it/s] 6%|▋ | 23868/371472 [1:55:34<28:10:17, 3.43it/s] 6%|▋ | 23869/371472 [1:55:34<27:18:39, 3.54it/s] 6%|▋ | 23870/371472 [1:55:35<26:54:17, 3.59it/s] 6%|▋ | 23871/371472 [1:55:35<26:14:08, 3.68it/s] 6%|▋ | 23872/371472 [1:55:35<26:42:36, 3.61it/s] 6%|▋ | 23873/371472 [1:55:36<26:16:57, 3.67it/s] 6%|▋ | 23874/371472 [1:55:36<26:50:02, 3.60it/s] 6%|▋ | 23875/371472 [1:55:36<28:28:02, 3.39it/s] 6%|▋ | 23876/371472 [1:55:36<27:16:50, 3.54it/s] 6%|▋ | 23877/371472 [1:55:37<26:45:09, 3.61it/s] 6%|▋ | 23878/371472 [1:55:37<28:34:29, 3.38it/s] 6%|▋ | 23879/371472 [1:55:37<28:35:21, 3.38it/s] 6%|▋ | 23880/371472 [1:55:38<27:40:26, 3.49it/s] {'loss': 4.4427, 'learning_rate': 9.425973410329893e-07, 'epoch': 1.03} + 6%|▋ | 23880/371472 [1:55:38<27:40:26, 3.49it/s] 6%|▋ | 23881/371472 [1:55:38<27:09:15, 3.56it/s] 6%|▋ | 23882/371472 [1:55:38<27:31:34, 3.51it/s] 6%|▋ | 23883/371472 [1:55:38<27:59:45, 3.45it/s] 6%|▋ | 23884/371472 [1:55:39<27:23:45, 3.52it/s] 6%|▋ | 23885/371472 [1:55:39<26:34:36, 3.63it/s] 6%|▋ | 23886/371472 [1:55:39<26:55:35, 3.59it/s] 6%|▋ | 23887/371472 [1:55:40<26:08:34, 3.69it/s] 6%|▋ | 23888/371472 [1:55:40<26:21:04, 3.66it/s] 6%|▋ | 23889/371472 [1:55:40<26:25:53, 3.65it/s] 6%|▋ | 23890/371472 [1:55:40<26:35:23, 3.63it/s] 6%|▋ | 23891/371472 [1:55:41<27:32:15, 3.51it/s] 6%|▋ | 23892/371472 [1:55:41<27:04:46, 3.57it/s] 6%|▋ | 23893/371472 [1:55:41<26:26:08, 3.65it/s] 6%|▋ | 23894/371472 [1:55:41<26:38:39, 3.62it/s] 6%|▋ | 23895/371472 [1:55:42<26:04:38, 3.70it/s] 6%|▋ | 23896/371472 [1:55:42<26:23:48, 3.66it/s] 6%|▋ | 23897/371472 [1:55:42<26:12:57, 3.68it/s] 6%|▋ | 23898/371472 [1:55:43<28:10:01, 3.43it/s] 6%|▋ | 23899/371472 [1:55:43<30:43:00, 3.14it/s] 6%|▋ | 23900/371472 [1:55:43<29:55:41, 3.23it/s] {'loss': 4.4732, 'learning_rate': 9.425488590575104e-07, 'epoch': 1.03} + 6%|▋ | 23900/371472 [1:55:43<29:55:41, 3.23it/s] 6%|▋ | 23901/371472 [1:55:44<30:15:01, 3.19it/s] 6%|▋ | 23902/371472 [1:55:44<29:26:03, 3.28it/s] 6%|▋ | 23903/371472 [1:55:44<28:48:38, 3.35it/s] 6%|▋ | 23904/371472 [1:55:44<28:12:37, 3.42it/s] 6%|▋ | 23905/371472 [1:55:45<27:55:37, 3.46it/s] 6%|▋ | 23906/371472 [1:55:45<29:12:21, 3.31it/s] 6%|▋ | 23907/371472 [1:55:45<30:19:36, 3.18it/s] 6%|▋ | 23908/371472 [1:55:46<29:09:24, 3.31it/s] 6%|▋ | 23909/371472 [1:55:46<28:56:45, 3.34it/s] 6%|▋ | 23910/371472 [1:55:46<30:34:49, 3.16it/s] 6%|▋ | 23911/371472 [1:55:47<29:50:09, 3.24it/s] 6%|▋ | 23912/371472 [1:55:47<28:33:44, 3.38it/s] 6%|▋ | 23913/371472 [1:55:47<27:43:00, 3.48it/s] 6%|▋ | 23914/371472 [1:55:47<27:11:29, 3.55it/s] 6%|▋ | 23915/371472 [1:55:48<28:29:06, 3.39it/s] 6%|▋ | 23916/371472 [1:55:48<28:06:04, 3.44it/s] 6%|▋ | 23917/371472 [1:55:48<28:42:31, 3.36it/s] 6%|▋ | 23918/371472 [1:55:49<28:00:06, 3.45it/s] 6%|▋ | 23919/371472 [1:55:49<27:06:53, 3.56it/s] 6%|▋ | 23920/371472 [1:55:49<27:06:19, 3.56it/s] {'loss': 4.4412, 'learning_rate': 9.425003770820316e-07, 'epoch': 1.03} + 6%|▋ | 23920/371472 [1:55:49<27:06:19, 3.56it/s] 6%|▋ | 23921/371472 [1:55:49<28:19:46, 3.41it/s] 6%|▋ | 23922/371472 [1:55:50<29:21:47, 3.29it/s] 6%|▋ | 23923/371472 [1:55:50<27:40:06, 3.49it/s] 6%|▋ | 23924/371472 [1:55:50<27:40:23, 3.49it/s] 6%|▋ | 23925/371472 [1:55:51<27:50:59, 3.47it/s] 6%|▋ | 23926/371472 [1:55:51<27:08:12, 3.56it/s] 6%|▋ | 23927/371472 [1:55:51<27:45:50, 3.48it/s] 6%|▋ | 23928/371472 [1:55:51<27:00:29, 3.57it/s] 6%|▋ | 23929/371472 [1:55:52<26:51:49, 3.59it/s] 6%|▋ | 23930/371472 [1:55:52<26:16:59, 3.67it/s] 6%|▋ | 23931/371472 [1:55:52<28:26:21, 3.39it/s] 6%|▋ | 23932/371472 [1:55:53<30:57:08, 3.12it/s] 6%|▋ | 23933/371472 [1:55:53<29:24:09, 3.28it/s] 6%|▋ | 23934/371472 [1:55:53<29:58:08, 3.22it/s] 6%|▋ | 23935/371472 [1:55:54<27:59:30, 3.45it/s] 6%|▋ | 23936/371472 [1:55:54<27:25:57, 3.52it/s] 6%|▋ | 23937/371472 [1:55:54<27:34:40, 3.50it/s] 6%|▋ | 23938/371472 [1:55:54<27:05:07, 3.56it/s] 6%|▋ | 23939/371472 [1:55:55<26:39:24, 3.62it/s] 6%|▋ | 23940/371472 [1:55:55<27:42:52, 3.48it/s] {'loss': 4.3106, 'learning_rate': 9.424518951065525e-07, 'epoch': 1.03} + 6%|▋ | 23940/371472 [1:55:55<27:42:52, 3.48it/s] 6%|▋ | 23941/371472 [1:55:55<26:48:37, 3.60it/s] 6%|▋ | 23942/371472 [1:55:56<29:14:41, 3.30it/s] 6%|▋ | 23943/371472 [1:55:56<28:29:22, 3.39it/s] 6%|▋ | 23944/371472 [1:55:56<28:23:08, 3.40it/s] 6%|▋ | 23945/371472 [1:55:56<28:14:05, 3.42it/s] 6%|▋ | 23946/371472 [1:55:57<30:25:54, 3.17it/s] 6%|▋ | 23947/371472 [1:55:57<31:30:19, 3.06it/s] 6%|▋ | 23948/371472 [1:55:57<30:22:35, 3.18it/s] 6%|▋ | 23949/371472 [1:55:58<30:19:32, 3.18it/s] 6%|▋ | 23950/371472 [1:55:58<28:53:06, 3.34it/s] 6%|▋ | 23951/371472 [1:55:58<28:18:36, 3.41it/s] 6%|▋ | 23952/371472 [1:55:59<28:17:55, 3.41it/s] 6%|▋ | 23953/371472 [1:55:59<28:36:13, 3.37it/s] 6%|▋ | 23954/371472 [1:55:59<27:18:09, 3.54it/s] 6%|▋ | 23955/371472 [1:55:59<27:11:36, 3.55it/s] 6%|▋ | 23956/371472 [1:56:00<26:30:22, 3.64it/s] 6%|▋ | 23957/371472 [1:56:00<26:03:38, 3.70it/s] 6%|▋ | 23958/371472 [1:56:00<27:56:46, 3.45it/s] 6%|▋ | 23959/371472 [1:56:01<27:22:56, 3.53it/s] 6%|▋ | 23960/371472 [1:56:01<27:30:34, 3.51it/s] {'loss': 4.5784, 'learning_rate': 9.424034131310737e-07, 'epoch': 1.03} + 6%|▋ | 23960/371472 [1:56:01<27:30:34, 3.51it/s] 6%|▋ | 23961/371472 [1:56:01<27:11:08, 3.55it/s] 6%|▋ | 23962/371472 [1:56:01<27:24:49, 3.52it/s] 6%|▋ | 23963/371472 [1:56:02<26:51:31, 3.59it/s] 6%|▋ | 23964/371472 [1:56:02<27:53:25, 3.46it/s] 6%|▋ | 23965/371472 [1:56:02<28:31:45, 3.38it/s] 6%|▋ | 23966/371472 [1:56:03<27:38:32, 3.49it/s] 6%|▋ | 23967/371472 [1:56:03<28:44:44, 3.36it/s] 6%|▋ | 23968/371472 [1:56:03<28:15:21, 3.42it/s] 6%|▋ | 23969/371472 [1:56:04<29:36:35, 3.26it/s] 6%|▋ | 23970/371472 [1:56:04<29:16:17, 3.30it/s] 6%|▋ | 23971/371472 [1:56:04<29:09:24, 3.31it/s] 6%|▋ | 23972/371472 [1:56:04<27:52:15, 3.46it/s] 6%|▋ | 23973/371472 [1:56:05<27:33:11, 3.50it/s] 6%|▋ | 23974/371472 [1:56:05<28:03:24, 3.44it/s] 6%|▋ | 23975/371472 [1:56:05<27:06:14, 3.56it/s] 6%|▋ | 23976/371472 [1:56:06<27:24:04, 3.52it/s] 6%|▋ | 23977/371472 [1:56:06<26:58:07, 3.58it/s] 6%|▋ | 23978/371472 [1:56:06<28:03:19, 3.44it/s] 6%|▋ | 23979/371472 [1:56:06<27:36:02, 3.50it/s] 6%|▋ | 23980/371472 [1:56:07<27:32:25, 3.50it/s] {'loss': 4.5837, 'learning_rate': 9.423549311555947e-07, 'epoch': 1.03} + 6%|▋ | 23980/371472 [1:56:07<27:32:25, 3.50it/s] 6%|▋ | 23981/371472 [1:56:07<27:24:48, 3.52it/s] 6%|▋ | 23982/371472 [1:56:07<26:45:05, 3.61it/s] 6%|▋ | 23983/371472 [1:56:07<26:03:12, 3.70it/s] 6%|▋ | 23984/371472 [1:56:08<26:17:49, 3.67it/s] 6%|▋ | 23985/371472 [1:56:08<26:18:36, 3.67it/s] 6%|▋ | 23986/371472 [1:56:08<26:13:32, 3.68it/s] 6%|▋ | 23987/371472 [1:56:09<26:05:29, 3.70it/s] 6%|▋ | 23988/371472 [1:56:09<26:01:43, 3.71it/s] 6%|▋ | 23989/371472 [1:56:09<25:42:24, 3.75it/s] 6%|▋ | 23990/371472 [1:56:09<25:34:24, 3.77it/s] 6%|▋ | 23991/371472 [1:56:10<27:12:00, 3.55it/s] 6%|▋ | 23992/371472 [1:56:10<28:07:29, 3.43it/s] 6%|▋ | 23993/371472 [1:56:10<28:28:15, 3.39it/s] 6%|▋ | 23994/371472 [1:56:11<27:37:37, 3.49it/s] 6%|▋ | 23995/371472 [1:56:11<26:26:46, 3.65it/s] 6%|▋ | 23996/371472 [1:56:11<26:25:02, 3.65it/s] 6%|▋ | 23997/371472 [1:56:11<28:20:21, 3.41it/s] 6%|▋ | 23998/371472 [1:56:12<27:14:13, 3.54it/s] 6%|▋ | 23999/371472 [1:56:12<26:28:45, 3.65it/s] 6%|▋ | 24000/371472 [1:56:12<26:06:16, 3.70it/s] {'loss': 4.451, 'learning_rate': 9.423064491801159e-07, 'epoch': 1.03} + 6%|▋ | 24000/371472 [1:56:12<26:06:16, 3.70it/s] 6%|▋ | 24001/371472 [1:56:12<25:56:50, 3.72it/s] 6%|▋ | 24002/371472 [1:56:13<25:47:00, 3.74it/s] 6%|▋ | 24003/371472 [1:56:13<27:57:04, 3.45it/s] 6%|▋ | 24004/371472 [1:56:13<27:28:59, 3.51it/s] 6%|▋ | 24005/371472 [1:56:14<28:22:01, 3.40it/s] 6%|▋ | 24006/371472 [1:56:14<29:00:09, 3.33it/s] 6%|▋ | 24007/371472 [1:56:14<28:45:19, 3.36it/s] 6%|▋ | 24008/371472 [1:56:14<28:09:39, 3.43it/s] 6%|▋ | 24009/371472 [1:56:15<27:07:49, 3.56it/s] 6%|▋ | 24010/371472 [1:56:15<26:37:13, 3.63it/s] 6%|▋ | 24011/371472 [1:56:15<26:20:32, 3.66it/s] 6%|▋ | 24012/371472 [1:56:16<28:33:38, 3.38it/s] 6%|▋ | 24013/371472 [1:56:16<28:04:48, 3.44it/s] 6%|▋ | 24014/371472 [1:56:16<27:30:41, 3.51it/s] 6%|▋ | 24015/371472 [1:56:17<28:37:01, 3.37it/s] 6%|▋ | 24016/371472 [1:56:17<29:52:23, 3.23it/s] 6%|▋ | 24017/371472 [1:56:17<28:30:09, 3.39it/s] 6%|▋ | 24018/371472 [1:56:17<27:40:43, 3.49it/s] 6%|▋ | 24019/371472 [1:56:18<27:05:16, 3.56it/s] 6%|▋ | 24020/371472 [1:56:18<28:10:03, 3.43it/s] {'loss': 4.52, 'learning_rate': 9.42257967204637e-07, 'epoch': 1.03} + 6%|▋ | 24020/371472 [1:56:18<28:10:03, 3.43it/s] 6%|▋ | 24021/371472 [1:56:18<27:25:48, 3.52it/s] 6%|▋ | 24022/371472 [1:56:19<27:11:01, 3.55it/s] 6%|▋ | 24023/371472 [1:56:19<27:36:01, 3.50it/s] 6%|▋ | 24024/371472 [1:56:19<27:09:00, 3.55it/s] 6%|▋ | 24025/371472 [1:56:19<27:34:53, 3.50it/s] 6%|▋ | 24026/371472 [1:56:20<27:18:24, 3.53it/s] 6%|▋ | 24027/371472 [1:56:20<28:10:46, 3.42it/s] 6%|▋ | 24028/371472 [1:56:20<27:26:51, 3.52it/s] 6%|▋ | 24029/371472 [1:56:21<27:28:48, 3.51it/s] 6%|▋ | 24030/371472 [1:56:21<28:19:08, 3.41it/s] 6%|▋ | 24031/371472 [1:56:21<28:25:08, 3.40it/s] 6%|▋ | 24032/371472 [1:56:21<29:08:47, 3.31it/s] 6%|▋ | 24033/371472 [1:56:22<29:06:51, 3.31it/s] 6%|▋ | 24034/371472 [1:56:22<28:55:15, 3.34it/s] 6%|▋ | 24035/371472 [1:56:22<28:23:23, 3.40it/s] 6%|▋ | 24036/371472 [1:56:23<28:11:25, 3.42it/s] 6%|▋ | 24037/371472 [1:56:23<27:12:08, 3.55it/s] 6%|▋ | 24038/371472 [1:56:23<30:15:09, 3.19it/s] 6%|▋ | 24039/371472 [1:56:24<29:03:40, 3.32it/s] 6%|▋ | 24040/371472 [1:56:24<27:41:47, 3.48it/s] {'loss': 4.2861, 'learning_rate': 9.422094852291581e-07, 'epoch': 1.04} + 6%|▋ | 24040/371472 [1:56:24<27:41:47, 3.48it/s] 6%|▋ | 24041/371472 [1:56:24<28:15:55, 3.41it/s] 6%|▋ | 24042/371472 [1:56:24<27:06:13, 3.56it/s] 6%|▋ | 24043/371472 [1:56:25<29:12:52, 3.30it/s] 6%|▋ | 24044/371472 [1:56:25<30:14:26, 3.19it/s] 6%|▋ | 24045/371472 [1:56:25<29:41:11, 3.25it/s] 6%|▋ | 24046/371472 [1:56:26<28:23:04, 3.40it/s] 6%|▋ | 24047/371472 [1:56:26<28:51:29, 3.34it/s] 6%|▋ | 24048/371472 [1:56:26<29:12:16, 3.30it/s] 6%|▋ | 24049/371472 [1:56:26<27:46:38, 3.47it/s] 6%|▋ | 24050/371472 [1:56:27<31:15:03, 3.09it/s] 6%|▋ | 24051/371472 [1:56:27<31:05:50, 3.10it/s] 6%|▋ | 24052/371472 [1:56:27<29:21:20, 3.29it/s] 6%|▋ | 24053/371472 [1:56:28<30:31:38, 3.16it/s] 6%|▋ | 24054/371472 [1:56:28<29:35:28, 3.26it/s] 6%|▋ | 24055/371472 [1:56:28<30:16:49, 3.19it/s] 6%|▋ | 24056/371472 [1:56:29<30:20:51, 3.18it/s] 6%|▋ | 24057/371472 [1:56:29<29:35:53, 3.26it/s] 6%|▋ | 24058/371472 [1:56:29<28:56:35, 3.33it/s] 6%|▋ | 24059/371472 [1:56:30<28:44:48, 3.36it/s] 6%|▋ | 24060/371472 [1:56:30<28:20:17, 3.41it/s] {'loss': 4.5557, 'learning_rate': 9.421610032536791e-07, 'epoch': 1.04} + 6%|▋ | 24060/371472 [1:56:30<28:20:17, 3.41it/s] 6%|▋ | 24061/371472 [1:56:30<27:29:56, 3.51it/s] 6%|▋ | 24062/371472 [1:56:30<26:56:15, 3.58it/s] 6%|▋ | 24063/371472 [1:56:31<26:08:02, 3.69it/s] 6%|▋ | 24064/371472 [1:56:31<26:12:47, 3.68it/s] 6%|▋ | 24065/371472 [1:56:31<25:51:17, 3.73it/s] 6%|▋ | 24066/371472 [1:56:31<25:53:40, 3.73it/s] 6%|▋ | 24067/371472 [1:56:32<26:06:36, 3.70it/s] 6%|▋ | 24068/371472 [1:56:32<26:38:59, 3.62it/s] 6%|▋ | 24069/371472 [1:56:32<25:50:44, 3.73it/s] 6%|▋ | 24070/371472 [1:56:33<25:59:52, 3.71it/s] 6%|▋ | 24071/371472 [1:56:33<27:01:23, 3.57it/s] 6%|▋ | 24072/371472 [1:56:33<25:58:26, 3.72it/s] 6%|▋ | 24073/371472 [1:56:33<26:22:33, 3.66it/s] 6%|▋ | 24074/371472 [1:56:34<25:43:26, 3.75it/s] 6%|▋ | 24075/371472 [1:56:34<27:46:18, 3.47it/s] 6%|▋ | 24076/371472 [1:56:34<27:03:46, 3.57it/s] 6%|▋ | 24077/371472 [1:56:35<27:12:11, 3.55it/s] 6%|▋ | 24078/371472 [1:56:35<26:30:16, 3.64it/s] 6%|▋ | 24079/371472 [1:56:35<26:12:37, 3.68it/s] 6%|▋ | 24080/371472 [1:56:35<25:36:49, 3.77it/s] {'loss': 4.807, 'learning_rate': 9.421125212782003e-07, 'epoch': 1.04} + 6%|▋ | 24080/371472 [1:56:35<25:36:49, 3.77it/s] 6%|▋ | 24081/371472 [1:56:36<25:41:32, 3.76it/s] 6%|▋ | 24082/371472 [1:56:36<26:38:08, 3.62it/s] 6%|▋ | 24083/371472 [1:56:36<25:39:05, 3.76it/s] 6%|▋ | 24084/371472 [1:56:36<27:23:58, 3.52it/s] 6%|▋ | 24085/371472 [1:56:37<27:07:24, 3.56it/s] 6%|▋ | 24086/371472 [1:56:37<27:57:10, 3.45it/s] 6%|▋ | 24087/371472 [1:56:37<26:30:56, 3.64it/s] 6%|▋ | 24088/371472 [1:56:38<27:05:34, 3.56it/s] 6%|▋ | 24089/371472 [1:56:38<27:16:14, 3.54it/s] 6%|▋ | 24090/371472 [1:56:38<26:40:48, 3.62it/s] 6%|▋ | 24091/371472 [1:56:38<26:52:26, 3.59it/s] 6%|▋ | 24092/371472 [1:56:39<28:29:51, 3.39it/s] 6%|▋ | 24093/371472 [1:56:39<28:27:59, 3.39it/s] 6%|▋ | 24094/371472 [1:56:39<30:02:00, 3.21it/s] 6%|▋ | 24095/371472 [1:56:40<30:01:25, 3.21it/s] 6%|▋ | 24096/371472 [1:56:40<28:39:10, 3.37it/s] 6%|▋ | 24097/371472 [1:56:40<28:45:00, 3.36it/s] 6%|▋ | 24098/371472 [1:56:41<29:01:46, 3.32it/s] 6%|▋ | 24099/371472 [1:56:41<28:12:57, 3.42it/s] 6%|▋ | 24100/371472 [1:56:41<29:01:24, 3.32it/s] {'loss': 4.514, 'learning_rate': 9.420640393027214e-07, 'epoch': 1.04} + 6%|▋ | 24100/371472 [1:56:41<29:01:24, 3.32it/s] 6%|▋ | 24101/371472 [1:56:41<28:29:45, 3.39it/s] 6%|▋ | 24102/371472 [1:56:42<27:31:06, 3.51it/s] 6%|▋ | 24103/371472 [1:56:42<28:23:49, 3.40it/s] 6%|▋ | 24104/371472 [1:56:42<27:28:21, 3.51it/s] 6%|▋ | 24105/371472 [1:56:43<27:15:15, 3.54it/s] 6%|▋ | 24106/371472 [1:56:43<27:06:52, 3.56it/s] 6%|▋ | 24107/371472 [1:56:43<27:01:52, 3.57it/s] 6%|▋ | 24108/371472 [1:56:43<27:13:02, 3.55it/s] 6%|▋ | 24109/371472 [1:56:44<27:43:21, 3.48it/s] 6%|▋ | 24110/371472 [1:56:44<28:03:30, 3.44it/s] 6%|▋ | 24111/371472 [1:56:44<27:51:59, 3.46it/s] 6%|▋ | 24112/371472 [1:56:45<27:26:35, 3.52it/s] 6%|▋ | 24113/371472 [1:56:45<27:44:39, 3.48it/s] 6%|▋ | 24114/371472 [1:56:45<27:40:22, 3.49it/s] 6%|▋ | 24115/371472 [1:56:45<28:16:11, 3.41it/s] 6%|▋ | 24116/371472 [1:56:46<27:37:37, 3.49it/s] 6%|▋ | 24117/371472 [1:56:46<27:19:49, 3.53it/s] 6%|▋ | 24118/371472 [1:56:46<26:28:49, 3.64it/s] 6%|▋ | 24119/371472 [1:56:47<27:15:09, 3.54it/s] 6%|▋ | 24120/371472 [1:56:47<28:42:51, 3.36it/s] {'loss': 4.4858, 'learning_rate': 9.420155573272425e-07, 'epoch': 1.04} + 6%|▋ | 24120/371472 [1:56:47<28:42:51, 3.36it/s] 6%|▋ | 24121/371472 [1:56:47<27:48:43, 3.47it/s] 6%|▋ | 24122/371472 [1:56:47<29:51:30, 3.23it/s] 6%|▋ | 24123/371472 [1:56:48<28:34:51, 3.38it/s] 6%|▋ | 24124/371472 [1:56:48<29:03:45, 3.32it/s] 6%|▋ | 24125/371472 [1:56:48<27:47:30, 3.47it/s] 6%|▋ | 24126/371472 [1:56:49<29:29:44, 3.27it/s] 6%|▋ | 24127/371472 [1:56:49<29:45:23, 3.24it/s] 6%|▋ | 24128/371472 [1:56:49<29:25:19, 3.28it/s] 6%|▋ | 24129/371472 [1:56:50<27:59:27, 3.45it/s] 6%|▋ | 24130/371472 [1:56:50<27:33:35, 3.50it/s] 6%|▋ | 24131/371472 [1:56:50<28:20:24, 3.40it/s] 6%|▋ | 24132/371472 [1:56:50<28:14:37, 3.42it/s] 6%|▋ | 24133/371472 [1:56:51<27:03:44, 3.57it/s] 6%|▋ | 24134/371472 [1:56:51<28:51:45, 3.34it/s] 6%|▋ | 24135/371472 [1:56:51<29:44:58, 3.24it/s] 6%|▋ | 24136/371472 [1:56:52<30:23:45, 3.17it/s] 6%|▋ | 24137/371472 [1:56:52<30:18:08, 3.18it/s] 6%|▋ | 24138/371472 [1:56:52<29:13:53, 3.30it/s] 6%|▋ | 24139/371472 [1:56:53<29:55:54, 3.22it/s] 6%|▋ | 24140/371472 [1:56:53<28:54:08, 3.34it/s] {'loss': 4.2943, 'learning_rate': 9.419670753517636e-07, 'epoch': 1.04} + 6%|▋ | 24140/371472 [1:56:53<28:54:08, 3.34it/s] 6%|▋ | 24141/371472 [1:56:53<27:59:03, 3.45it/s] 6%|▋ | 24142/371472 [1:56:53<28:45:20, 3.36it/s] 6%|▋ | 24143/371472 [1:56:54<29:04:47, 3.32it/s] 6%|▋ | 24144/371472 [1:56:54<28:41:58, 3.36it/s] 6%|▋ | 24145/371472 [1:56:54<27:48:27, 3.47it/s] 7%|▋ | 24146/371472 [1:56:55<27:46:44, 3.47it/s] 7%|▋ | 24147/371472 [1:56:55<28:45:44, 3.35it/s] 7%|▋ | 24148/371472 [1:56:55<29:00:45, 3.33it/s] 7%|▋ | 24149/371472 [1:56:55<28:21:51, 3.40it/s] 7%|▋ | 24150/371472 [1:56:56<27:08:34, 3.55it/s] 7%|▋ | 24151/371472 [1:56:56<28:04:40, 3.44it/s] 7%|▋ | 24152/371472 [1:56:56<27:10:08, 3.55it/s] 7%|▋ | 24153/371472 [1:56:57<26:49:08, 3.60it/s] 7%|▋ | 24154/371472 [1:56:57<27:14:52, 3.54it/s] 7%|▋ | 24155/371472 [1:56:57<27:40:23, 3.49it/s] 7%|▋ | 24156/371472 [1:56:57<27:27:54, 3.51it/s] 7%|▋ | 24157/371472 [1:56:58<27:33:23, 3.50it/s] 7%|▋ | 24158/371472 [1:56:58<27:46:04, 3.47it/s] 7%|▋ | 24159/371472 [1:56:58<26:53:42, 3.59it/s] 7%|▋ | 24160/371472 [1:56:59<27:56:21, 3.45it/s] {'loss': 4.6211, 'learning_rate': 9.419185933762848e-07, 'epoch': 1.04} + 7%|▋ | 24160/371472 [1:56:59<27:56:21, 3.45it/s] 7%|▋ | 24161/371472 [1:56:59<27:01:57, 3.57it/s] 7%|▋ | 24162/371472 [1:56:59<26:59:15, 3.57it/s] 7%|▋ | 24163/371472 [1:56:59<27:38:46, 3.49it/s] 7%|▋ | 24164/371472 [1:57:00<27:18:27, 3.53it/s] 7%|▋ | 24165/371472 [1:57:00<27:04:22, 3.56it/s] 7%|▋ | 24166/371472 [1:57:00<27:10:33, 3.55it/s] 7%|▋ | 24167/371472 [1:57:01<26:54:16, 3.59it/s] 7%|▋ | 24168/371472 [1:57:01<27:36:32, 3.49it/s] 7%|▋ | 24169/371472 [1:57:01<27:17:56, 3.53it/s] 7%|▋ | 24170/371472 [1:57:01<26:37:30, 3.62it/s] 7%|▋ | 24171/371472 [1:57:02<26:14:51, 3.68it/s] 7%|▋ | 24172/371472 [1:57:02<27:36:59, 3.49it/s] 7%|▋ | 24173/371472 [1:57:02<26:56:21, 3.58it/s] 7%|▋ | 24174/371472 [1:57:02<26:17:49, 3.67it/s] 7%|▋ | 24175/371472 [1:57:03<26:46:16, 3.60it/s] 7%|▋ | 24176/371472 [1:57:03<28:12:31, 3.42it/s] 7%|▋ | 24177/371472 [1:57:03<27:40:16, 3.49it/s] 7%|▋ | 24178/371472 [1:57:04<27:37:24, 3.49it/s] 7%|▋ | 24179/371472 [1:57:04<27:04:47, 3.56it/s] 7%|▋ | 24180/371472 [1:57:04<32:10:33, 3.00it/s] {'loss': 4.7033, 'learning_rate': 9.418701114008059e-07, 'epoch': 1.04} + 7%|▋ | 24180/371472 [1:57:04<32:10:33, 3.00it/s] 7%|▋ | 24181/371472 [1:57:05<30:32:40, 3.16it/s] 7%|▋ | 24182/371472 [1:57:05<29:48:50, 3.24it/s] 7%|▋ | 24183/371472 [1:57:05<30:01:06, 3.21it/s] 7%|▋ | 24184/371472 [1:57:06<28:39:45, 3.37it/s] 7%|▋ | 24185/371472 [1:57:06<29:10:03, 3.31it/s] 7%|▋ | 24186/371472 [1:57:06<29:42:35, 3.25it/s] 7%|▋ | 24187/371472 [1:57:06<29:06:30, 3.31it/s] 7%|▋ | 24188/371472 [1:57:07<28:01:56, 3.44it/s] 7%|▋ | 24189/371472 [1:57:07<26:59:37, 3.57it/s] 7%|▋ | 24190/371472 [1:57:07<26:04:28, 3.70it/s] 7%|▋ | 24191/371472 [1:57:08<26:32:37, 3.63it/s] 7%|▋ | 24192/371472 [1:57:08<26:17:44, 3.67it/s] 7%|▋ | 24193/371472 [1:57:08<25:49:40, 3.73it/s] 7%|▋ | 24194/371472 [1:57:08<25:37:07, 3.77it/s] 7%|▋ | 24195/371472 [1:57:09<25:53:17, 3.73it/s] 7%|▋ | 24196/371472 [1:57:09<26:39:26, 3.62it/s] 7%|▋ | 24197/371472 [1:57:09<28:47:34, 3.35it/s] 7%|▋ | 24198/371472 [1:57:10<29:01:43, 3.32it/s] 7%|▋ | 24199/371472 [1:57:10<29:15:03, 3.30it/s] 7%|▋ | 24200/371472 [1:57:10<28:04:18, 3.44it/s] {'loss': 4.5263, 'learning_rate': 9.418216294253269e-07, 'epoch': 1.04} + 7%|▋ | 24200/371472 [1:57:10<28:04:18, 3.44it/s] 7%|▋ | 24201/371472 [1:57:10<27:39:14, 3.49it/s] 7%|▋ | 24202/371472 [1:57:11<27:53:25, 3.46it/s] 7%|▋ | 24203/371472 [1:57:11<26:52:12, 3.59it/s] 7%|▋ | 24204/371472 [1:57:11<27:06:02, 3.56it/s] 7%|▋ | 24205/371472 [1:57:11<26:36:39, 3.62it/s] 7%|▋ | 24206/371472 [1:57:12<27:21:49, 3.53it/s] 7%|▋ | 24207/371472 [1:57:12<27:37:06, 3.49it/s] 7%|▋ | 24208/371472 [1:57:12<27:35:41, 3.50it/s] 7%|▋ | 24209/371472 [1:57:13<26:58:41, 3.58it/s] 7%|▋ | 24210/371472 [1:57:13<26:11:19, 3.68it/s] 7%|▋ | 24211/371472 [1:57:13<26:43:59, 3.61it/s] 7%|▋ | 24212/371472 [1:57:13<28:21:50, 3.40it/s] 7%|▋ | 24213/371472 [1:57:14<27:25:05, 3.52it/s] 7%|▋ | 24214/371472 [1:57:14<26:42:40, 3.61it/s] 7%|▋ | 24215/371472 [1:57:14<28:06:04, 3.43it/s] 7%|▋ | 24216/371472 [1:57:15<26:59:16, 3.57it/s] 7%|▋ | 24217/371472 [1:57:15<28:07:44, 3.43it/s] 7%|▋ | 24218/371472 [1:57:15<27:04:00, 3.56it/s] 7%|▋ | 24219/371472 [1:57:15<27:25:19, 3.52it/s] 7%|▋ | 24220/371472 [1:57:16<27:02:04, 3.57it/s] {'loss': 4.4634, 'learning_rate': 9.41773147449848e-07, 'epoch': 1.04} + 7%|▋ | 24220/371472 [1:57:16<27:02:04, 3.57it/s] 7%|▋ | 24221/371472 [1:57:16<27:37:04, 3.49it/s] 7%|▋ | 24222/371472 [1:57:16<27:54:10, 3.46it/s] 7%|▋ | 24223/371472 [1:57:17<27:13:07, 3.54it/s] 7%|▋ | 24224/371472 [1:57:17<26:39:14, 3.62it/s] 7%|▋ | 24225/371472 [1:57:17<30:01:57, 3.21it/s] 7%|▋ | 24226/371472 [1:57:18<29:48:57, 3.24it/s] 7%|▋ | 24227/371472 [1:57:18<28:36:16, 3.37it/s] 7%|▋ | 24228/371472 [1:57:18<28:17:11, 3.41it/s] 7%|▋ | 24229/371472 [1:57:18<27:34:35, 3.50it/s] 7%|▋ | 24230/371472 [1:57:19<28:18:07, 3.41it/s] 7%|▋ | 24231/371472 [1:57:19<28:40:23, 3.36it/s] 7%|▋ | 24232/371472 [1:57:19<28:33:59, 3.38it/s] 7%|▋ | 24233/371472 [1:57:20<27:30:03, 3.51it/s] 7%|▋ | 24234/371472 [1:57:20<30:16:21, 3.19it/s] 7%|▋ | 24235/371472 [1:57:20<28:50:23, 3.34it/s] 7%|▋ | 24236/371472 [1:57:20<28:59:37, 3.33it/s] 7%|▋ | 24237/371472 [1:57:21<29:07:52, 3.31it/s] 7%|▋ | 24238/371472 [1:57:21<28:25:22, 3.39it/s] 7%|▋ | 24239/371472 [1:57:21<27:30:09, 3.51it/s] 7%|▋ | 24240/371472 [1:57:22<26:43:13, 3.61it/s] {'loss': 4.6139, 'learning_rate': 9.417246654743692e-07, 'epoch': 1.04} + 7%|▋ | 24240/371472 [1:57:22<26:43:13, 3.61it/s] 7%|▋ | 24241/371472 [1:57:22<27:00:36, 3.57it/s] 7%|▋ | 24242/371472 [1:57:22<27:20:39, 3.53it/s] 7%|▋ | 24243/371472 [1:57:22<28:03:23, 3.44it/s] 7%|▋ | 24244/371472 [1:57:23<28:05:41, 3.43it/s] 7%|▋ | 24245/371472 [1:57:23<34:28:13, 2.80it/s] 7%|▋ | 24246/371472 [1:57:24<33:17:35, 2.90it/s] 7%|▋ | 24247/371472 [1:57:24<31:42:43, 3.04it/s] 7%|▋ | 24248/371472 [1:57:24<29:25:11, 3.28it/s] 7%|▋ | 24249/371472 [1:57:24<27:50:00, 3.47it/s] 7%|▋ | 24250/371472 [1:57:25<28:27:26, 3.39it/s] 7%|▋ | 24251/371472 [1:57:25<30:26:02, 3.17it/s] 7%|▋ | 24252/371472 [1:57:25<29:31:15, 3.27it/s] 7%|▋ | 24253/371472 [1:57:26<28:02:01, 3.44it/s] 7%|▋ | 24254/371472 [1:57:26<27:05:10, 3.56it/s] 7%|▋ | 24255/371472 [1:57:26<27:19:04, 3.53it/s] 7%|▋ | 24256/371472 [1:57:26<26:41:47, 3.61it/s] 7%|▋ | 24257/371472 [1:57:27<26:56:26, 3.58it/s] 7%|▋ | 24258/371472 [1:57:27<26:30:41, 3.64it/s] 7%|▋ | 24259/371472 [1:57:27<27:27:44, 3.51it/s] 7%|▋ | 24260/371472 [1:57:28<26:57:53, 3.58it/s] {'loss': 4.2911, 'learning_rate': 9.416761834988903e-07, 'epoch': 1.04} + 7%|▋ | 24260/371472 [1:57:28<26:57:53, 3.58it/s] 7%|▋ | 24261/371472 [1:57:28<26:23:28, 3.65it/s] 7%|▋ | 24262/371472 [1:57:28<26:39:54, 3.62it/s] 7%|▋ | 24263/371472 [1:57:28<27:04:40, 3.56it/s] 7%|▋ | 24264/371472 [1:57:29<26:33:06, 3.63it/s] 7%|▋ | 24265/371472 [1:57:29<26:40:15, 3.62it/s] 7%|▋ | 24266/371472 [1:57:29<29:35:48, 3.26it/s] 7%|▋ | 24267/371472 [1:57:30<29:25:23, 3.28it/s] 7%|▋ | 24268/371472 [1:57:30<30:17:18, 3.18it/s] 7%|▋ | 24269/371472 [1:57:30<31:19:51, 3.08it/s] 7%|▋ | 24270/371472 [1:57:31<30:29:08, 3.16it/s] 7%|▋ | 24271/371472 [1:57:31<30:43:10, 3.14it/s] 7%|▋ | 24272/371472 [1:57:31<30:40:14, 3.14it/s] 7%|▋ | 24273/371472 [1:57:31<29:30:32, 3.27it/s] 7%|▋ | 24274/371472 [1:57:32<28:49:35, 3.35it/s] 7%|▋ | 24275/371472 [1:57:32<29:33:50, 3.26it/s] 7%|▋ | 24276/371472 [1:57:32<28:35:45, 3.37it/s] 7%|▋ | 24277/371472 [1:57:33<28:41:53, 3.36it/s] 7%|▋ | 24278/371472 [1:57:33<27:46:47, 3.47it/s] 7%|▋ | 24279/371472 [1:57:33<28:44:06, 3.36it/s] 7%|▋ | 24280/371472 [1:57:34<27:55:56, 3.45it/s] {'loss': 4.5205, 'learning_rate': 9.416277015234114e-07, 'epoch': 1.05} + 7%|▋ | 24280/371472 [1:57:34<27:55:56, 3.45it/s] 7%|▋ | 24281/371472 [1:57:34<29:06:22, 3.31it/s] 7%|▋ | 24282/371472 [1:57:34<35:32:55, 2.71it/s] 7%|▋ | 24283/371472 [1:57:35<32:05:08, 3.01it/s] 7%|▋ | 24284/371472 [1:57:35<32:54:46, 2.93it/s] 7%|▋ | 24285/371472 [1:57:35<32:16:17, 2.99it/s] 7%|▋ | 24286/371472 [1:57:36<32:29:16, 2.97it/s] 7%|▋ | 24287/371472 [1:57:36<30:37:27, 3.15it/s] 7%|▋ | 24288/371472 [1:57:36<30:21:06, 3.18it/s] 7%|▋ | 24289/371472 [1:57:37<29:11:58, 3.30it/s] 7%|▋ | 24290/371472 [1:57:37<27:59:42, 3.44it/s] 7%|▋ | 24291/371472 [1:57:37<29:11:17, 3.30it/s] 7%|▋ | 24292/371472 [1:57:37<27:47:59, 3.47it/s] 7%|▋ | 24293/371472 [1:57:38<27:24:10, 3.52it/s] 7%|▋ | 24294/371472 [1:57:38<28:26:21, 3.39it/s] 7%|▋ | 24295/371472 [1:57:38<27:55:17, 3.45it/s] 7%|▋ | 24296/371472 [1:57:38<26:48:04, 3.60it/s] 7%|▋ | 24297/371472 [1:57:39<25:48:02, 3.74it/s] 7%|▋ | 24298/371472 [1:57:39<25:49:03, 3.74it/s] 7%|▋ | 24299/371472 [1:57:39<25:31:09, 3.78it/s] 7%|▋ | 24300/371472 [1:57:40<26:37:26, 3.62it/s] {'loss': 4.5224, 'learning_rate': 9.415792195479325e-07, 'epoch': 1.05} + 7%|▋ | 24300/371472 [1:57:40<26:37:26, 3.62it/s] 7%|▋ | 24301/371472 [1:57:40<26:48:22, 3.60it/s] 7%|▋ | 24302/371472 [1:57:40<27:09:54, 3.55it/s] 7%|▋ | 24303/371472 [1:57:40<27:40:18, 3.49it/s] 7%|▋ | 24304/371472 [1:57:41<28:34:07, 3.38it/s] 7%|▋ | 24305/371472 [1:57:41<27:53:26, 3.46it/s] 7%|▋ | 24306/371472 [1:57:41<27:30:14, 3.51it/s] 7%|▋ | 24307/371472 [1:57:42<28:04:30, 3.43it/s] 7%|▋ | 24308/371472 [1:57:42<30:01:19, 3.21it/s] 7%|▋ | 24309/371472 [1:57:42<29:30:46, 3.27it/s] 7%|▋ | 24310/371472 [1:57:43<29:17:49, 3.29it/s] 7%|▋ | 24311/371472 [1:57:43<28:04:40, 3.43it/s] 7%|▋ | 24312/371472 [1:57:43<28:22:09, 3.40it/s] 7%|▋ | 24313/371472 [1:57:43<29:10:07, 3.31it/s] 7%|▋ | 24314/371472 [1:57:44<29:03:30, 3.32it/s] 7%|▋ | 24315/371472 [1:57:44<27:55:12, 3.45it/s] 7%|▋ | 24316/371472 [1:57:44<26:59:33, 3.57it/s] 7%|▋ | 24317/371472 [1:57:45<27:03:12, 3.56it/s] 7%|▋ | 24318/371472 [1:57:45<27:23:41, 3.52it/s] 7%|▋ | 24319/371472 [1:57:45<26:49:11, 3.60it/s] 7%|▋ | 24320/371472 [1:57:46<30:44:12, 3.14it/s] {'loss': 4.4512, 'learning_rate': 9.415307375724536e-07, 'epoch': 1.05} + 7%|▋ | 24320/371472 [1:57:46<30:44:12, 3.14it/s] 7%|▋ | 24321/371472 [1:57:46<29:35:51, 3.26it/s] 7%|▋ | 24322/371472 [1:57:46<29:09:21, 3.31it/s] 7%|▋ | 24323/371472 [1:57:46<28:11:53, 3.42it/s] 7%|▋ | 24324/371472 [1:57:47<27:30:08, 3.51it/s] 7%|▋ | 24325/371472 [1:57:47<28:05:21, 3.43it/s] 7%|▋ | 24326/371472 [1:57:47<27:22:19, 3.52it/s] 7%|▋ | 24327/371472 [1:57:47<27:02:32, 3.57it/s] 7%|▋ | 24328/371472 [1:57:48<26:45:26, 3.60it/s] 7%|▋ | 24329/371472 [1:57:48<26:44:21, 3.61it/s] 7%|▋ | 24330/371472 [1:57:48<25:58:17, 3.71it/s] 7%|▋ | 24331/371472 [1:57:49<26:42:35, 3.61it/s] 7%|▋ | 24332/371472 [1:57:49<26:13:15, 3.68it/s] 7%|▋ | 24333/371472 [1:57:49<27:34:42, 3.50it/s] 7%|▋ | 24334/371472 [1:57:49<26:34:37, 3.63it/s] 7%|▋ | 24335/371472 [1:57:50<25:40:57, 3.75it/s] 7%|▋ | 24336/371472 [1:57:50<26:31:29, 3.64it/s] 7%|▋ | 24337/371472 [1:57:50<27:48:51, 3.47it/s] 7%|▋ | 24338/371472 [1:57:51<27:51:50, 3.46it/s] 7%|▋ | 24339/371472 [1:57:51<27:24:25, 3.52it/s] 7%|▋ | 24340/371472 [1:57:51<26:50:51, 3.59it/s] {'loss': 4.7147, 'learning_rate': 9.414822555969746e-07, 'epoch': 1.05} + 7%|▋ | 24340/371472 [1:57:51<26:50:51, 3.59it/s] 7%|▋ | 24341/371472 [1:57:51<26:31:15, 3.64it/s] 7%|▋ | 24342/371472 [1:57:52<26:37:11, 3.62it/s] 7%|▋ | 24343/371472 [1:57:52<28:05:38, 3.43it/s] 7%|▋ | 24344/371472 [1:57:52<27:14:10, 3.54it/s] 7%|▋ | 24345/371472 [1:57:52<26:55:14, 3.58it/s] 7%|▋ | 24346/371472 [1:57:53<26:35:11, 3.63it/s] 7%|▋ | 24347/371472 [1:57:53<28:24:23, 3.39it/s] 7%|▋ | 24348/371472 [1:57:53<28:37:59, 3.37it/s] 7%|▋ | 24349/371472 [1:57:54<28:36:38, 3.37it/s] 7%|▋ | 24350/371472 [1:57:54<27:43:50, 3.48it/s] 7%|▋ | 24351/371472 [1:57:54<27:47:34, 3.47it/s] 7%|▋ | 24352/371472 [1:57:55<29:34:34, 3.26it/s] 7%|▋ | 24353/371472 [1:57:55<31:32:02, 3.06it/s] 7%|▋ | 24354/371472 [1:57:55<30:23:41, 3.17it/s] 7%|▋ | 24355/371472 [1:57:56<28:54:45, 3.33it/s] 7%|▋ | 24356/371472 [1:57:56<28:54:52, 3.33it/s] 7%|▋ | 24357/371472 [1:57:56<29:47:33, 3.24it/s] 7%|▋ | 24358/371472 [1:57:56<28:55:06, 3.33it/s] 7%|▋ | 24359/371472 [1:57:57<29:26:15, 3.28it/s] 7%|▋ | 24360/371472 [1:57:57<30:05:20, 3.20it/s] {'loss': 4.7131, 'learning_rate': 9.414337736214958e-07, 'epoch': 1.05} + 7%|▋ | 24360/371472 [1:57:57<30:05:20, 3.20it/s] 7%|▋ | 24361/371472 [1:57:57<31:04:27, 3.10it/s] 7%|▋ | 24362/371472 [1:57:58<29:57:11, 3.22it/s] 7%|▋ | 24363/371472 [1:57:58<30:13:57, 3.19it/s] 7%|▋ | 24364/371472 [1:57:58<29:03:35, 3.32it/s] 7%|▋ | 24365/371472 [1:57:59<27:49:55, 3.46it/s] 7%|▋ | 24366/371472 [1:57:59<27:46:07, 3.47it/s] 7%|▋ | 24367/371472 [1:57:59<28:18:06, 3.41it/s] 7%|▋ | 24368/371472 [1:57:59<26:59:14, 3.57it/s] 7%|▋ | 24369/371472 [1:58:00<27:22:25, 3.52it/s] 7%|▋ | 24370/371472 [1:58:00<27:05:16, 3.56it/s] 7%|▋ | 24371/371472 [1:58:00<28:35:00, 3.37it/s] 7%|▋ | 24372/371472 [1:58:01<27:22:03, 3.52it/s] 7%|▋ | 24373/371472 [1:58:01<26:48:02, 3.60it/s] 7%|▋ | 24374/371472 [1:58:01<27:43:36, 3.48it/s] 7%|▋ | 24375/371472 [1:58:01<27:39:13, 3.49it/s] 7%|▋ | 24376/371472 [1:58:02<27:13:20, 3.54it/s] 7%|▋ | 24377/371472 [1:58:02<27:05:15, 3.56it/s] 7%|▋ | 24378/371472 [1:58:02<27:26:07, 3.51it/s] 7%|▋ | 24379/371472 [1:58:03<27:26:28, 3.51it/s] 7%|▋ | 24380/371472 [1:58:03<27:04:38, 3.56it/s] {'loss': 4.4743, 'learning_rate': 9.413852916460169e-07, 'epoch': 1.05} + 7%|▋ | 24380/371472 [1:58:03<27:04:38, 3.56it/s] 7%|▋ | 24381/371472 [1:58:03<32:06:40, 3.00it/s] 7%|▋ | 24382/371472 [1:58:04<30:07:33, 3.20it/s] 7%|▋ | 24383/371472 [1:58:04<29:36:35, 3.26it/s] 7%|▋ | 24384/371472 [1:58:04<28:46:29, 3.35it/s] 7%|▋ | 24385/371472 [1:58:04<27:59:05, 3.45it/s] 7%|▋ | 24386/371472 [1:58:05<26:57:56, 3.58it/s] 7%|▋ | 24387/371472 [1:58:05<26:31:06, 3.64it/s] 7%|▋ | 24388/371472 [1:58:05<27:25:05, 3.52it/s] 7%|▋ | 24389/371472 [1:58:05<27:27:40, 3.51it/s] 7%|▋ | 24390/371472 [1:58:06<27:27:41, 3.51it/s] 7%|▋ | 24391/371472 [1:58:06<27:04:45, 3.56it/s] 7%|▋ | 24392/371472 [1:58:06<27:47:47, 3.47it/s] 7%|▋ | 24393/371472 [1:58:07<27:30:31, 3.50it/s] 7%|▋ | 24394/371472 [1:58:07<26:50:24, 3.59it/s] 7%|▋ | 24395/371472 [1:58:07<27:15:10, 3.54it/s] 7%|▋ | 24396/371472 [1:58:07<26:21:44, 3.66it/s] 7%|▋ | 24397/371472 [1:58:08<25:28:50, 3.78it/s] 7%|▋ | 24398/371472 [1:58:08<25:32:19, 3.78it/s] 7%|▋ | 24399/371472 [1:58:08<26:52:45, 3.59it/s] 7%|▋ | 24400/371472 [1:58:09<26:25:40, 3.65it/s] {'loss': 4.5406, 'learning_rate': 9.41336809670538e-07, 'epoch': 1.05} + 7%|▋ | 24400/371472 [1:58:09<26:25:40, 3.65it/s] 7%|▋ | 24401/371472 [1:58:09<26:43:28, 3.61it/s] 7%|▋ | 24402/371472 [1:58:09<26:13:06, 3.68it/s] 7%|▋ | 24403/371472 [1:58:09<26:56:08, 3.58it/s] 7%|▋ | 24404/371472 [1:58:10<26:40:06, 3.62it/s] 7%|▋ | 24405/371472 [1:58:10<27:45:30, 3.47it/s] 7%|▋ | 24406/371472 [1:58:10<30:00:09, 3.21it/s] 7%|▋ | 24407/371472 [1:58:11<29:20:14, 3.29it/s] 7%|▋ | 24408/371472 [1:58:11<30:33:57, 3.15it/s] 7%|▋ | 24409/371472 [1:58:11<29:28:24, 3.27it/s] 7%|▋ | 24410/371472 [1:58:11<28:32:14, 3.38it/s] 7%|▋ | 24411/371472 [1:58:12<27:56:51, 3.45it/s] 7%|▋ | 24412/371472 [1:58:12<28:56:41, 3.33it/s] 7%|▋ | 24413/371472 [1:58:12<30:36:32, 3.15it/s] 7%|▋ | 24414/371472 [1:58:13<29:14:54, 3.30it/s] 7%|▋ | 24415/371472 [1:58:13<28:35:29, 3.37it/s] 7%|▋ | 24416/371472 [1:58:13<28:16:48, 3.41it/s] 7%|▋ | 24417/371472 [1:58:14<27:33:46, 3.50it/s] 7%|▋ | 24418/371472 [1:58:14<26:55:49, 3.58it/s] 7%|▋ | 24419/371472 [1:58:14<25:50:07, 3.73it/s] 7%|▋ | 24420/371472 [1:58:14<27:04:17, 3.56it/s] {'loss': 4.5443, 'learning_rate': 9.412883276950591e-07, 'epoch': 1.05} + 7%|▋ | 24420/371472 [1:58:14<27:04:17, 3.56it/s] 7%|▋ | 24421/371472 [1:58:15<27:44:33, 3.47it/s] 7%|▋ | 24422/371472 [1:58:15<28:08:01, 3.43it/s] 7%|▋ | 24423/371472 [1:58:15<27:22:48, 3.52it/s] 7%|▋ | 24424/371472 [1:58:16<27:59:46, 3.44it/s] 7%|▋ | 24425/371472 [1:58:16<26:46:11, 3.60it/s] 7%|▋ | 24426/371472 [1:58:16<30:16:07, 3.18it/s] 7%|▋ | 24427/371472 [1:58:16<28:31:46, 3.38it/s] 7%|▋ | 24428/371472 [1:58:17<28:15:56, 3.41it/s] 7%|▋ | 24429/371472 [1:58:17<28:16:45, 3.41it/s] 7%|▋ | 24430/371472 [1:58:17<28:45:24, 3.35it/s] 7%|▋ | 24431/371472 [1:58:18<27:47:09, 3.47it/s] 7%|▋ | 24432/371472 [1:58:18<28:29:04, 3.38it/s] 7%|▋ | 24433/371472 [1:58:18<29:40:27, 3.25it/s] 7%|▋ | 24434/371472 [1:58:19<29:33:11, 3.26it/s] 7%|▋ | 24435/371472 [1:58:19<28:21:49, 3.40it/s] 7%|▋ | 24436/371472 [1:58:19<26:48:38, 3.60it/s] 7%|▋ | 24437/371472 [1:58:20<33:27:36, 2.88it/s] 7%|▋ | 24438/371472 [1:58:20<31:16:52, 3.08it/s] 7%|▋ | 24439/371472 [1:58:20<30:09:50, 3.20it/s] 7%|▋ | 24440/371472 [1:58:20<29:44:41, 3.24it/s] {'loss': 4.6274, 'learning_rate': 9.412398457195802e-07, 'epoch': 1.05} + 7%|▋ | 24440/371472 [1:58:20<29:44:41, 3.24it/s] 7%|▋ | 24441/371472 [1:58:21<29:42:44, 3.24it/s] 7%|▋ | 24442/371472 [1:58:21<30:02:45, 3.21it/s] 7%|▋ | 24443/371472 [1:58:21<29:48:29, 3.23it/s] 7%|▋ | 24444/371472 [1:58:22<27:55:35, 3.45it/s] 7%|▋ | 24445/371472 [1:58:22<27:16:26, 3.53it/s] 7%|▋ | 24446/371472 [1:58:22<26:45:56, 3.60it/s] 7%|▋ | 24447/371472 [1:58:22<26:44:09, 3.61it/s] 7%|▋ | 24448/371472 [1:58:23<26:01:51, 3.70it/s] 7%|▋ | 24449/371472 [1:58:23<25:52:35, 3.73it/s] 7%|▋ | 24450/371472 [1:58:23<26:51:38, 3.59it/s] 7%|▋ | 24451/371472 [1:58:23<26:23:29, 3.65it/s] 7%|▋ | 24452/371472 [1:58:24<25:43:42, 3.75it/s] 7%|▋ | 24453/371472 [1:58:24<25:11:39, 3.83it/s] 7%|▋ | 24454/371472 [1:58:24<25:15:14, 3.82it/s] 7%|▋ | 24455/371472 [1:58:25<27:58:23, 3.45it/s] 7%|▋ | 24456/371472 [1:58:25<26:50:54, 3.59it/s] 7%|▋ | 24457/371472 [1:58:25<28:02:28, 3.44it/s] 7%|▋ | 24458/371472 [1:58:25<27:17:17, 3.53it/s] 7%|▋ | 24459/371472 [1:58:26<26:49:43, 3.59it/s] 7%|▋ | 24460/371472 [1:58:26<26:00:37, 3.71it/s] {'loss': 4.6099, 'learning_rate': 9.411913637441013e-07, 'epoch': 1.05} + 7%|▋ | 24460/371472 [1:58:26<26:00:37, 3.71it/s] 7%|▋ | 24461/371472 [1:58:26<25:37:32, 3.76it/s] 7%|▋ | 24462/371472 [1:58:26<25:12:54, 3.82it/s] 7%|▋ | 24463/371472 [1:58:27<26:13:28, 3.68it/s] 7%|▋ | 24464/371472 [1:58:27<28:17:14, 3.41it/s] 7%|▋ | 24465/371472 [1:58:27<28:13:04, 3.42it/s] 7%|▋ | 24466/371472 [1:58:28<27:14:46, 3.54it/s] 7%|▋ | 24467/371472 [1:58:28<26:58:58, 3.57it/s] 7%|▋ | 24468/371472 [1:58:28<26:21:01, 3.66it/s] 7%|▋ | 24469/371472 [1:58:28<26:24:22, 3.65it/s] 7%|▋ | 24470/371472 [1:58:29<28:15:59, 3.41it/s] 7%|▋ | 24471/371472 [1:58:29<28:59:52, 3.32it/s] 7%|▋ | 24472/371472 [1:58:29<31:13:15, 3.09it/s] 7%|▋ | 24473/371472 [1:58:30<29:46:20, 3.24it/s] 7%|▋ | 24474/371472 [1:58:30<29:21:28, 3.28it/s] 7%|▋ | 24475/371472 [1:58:30<28:16:39, 3.41it/s] 7%|▋ | 24476/371472 [1:58:31<28:17:40, 3.41it/s] 7%|▋ | 24477/371472 [1:58:31<27:52:42, 3.46it/s] 7%|▋ | 24478/371472 [1:58:31<29:31:01, 3.27it/s] 7%|▋ | 24479/371472 [1:58:32<28:34:24, 3.37it/s] 7%|▋ | 24480/371472 [1:58:32<27:35:11, 3.49it/s] {'loss': 4.5776, 'learning_rate': 9.411428817686224e-07, 'epoch': 1.05} + 7%|▋ | 24480/371472 [1:58:32<27:35:11, 3.49it/s] 7%|▋ | 24481/371472 [1:58:32<27:20:28, 3.53it/s] 7%|▋ | 24482/371472 [1:58:32<26:32:20, 3.63it/s] 7%|▋ | 24483/371472 [1:58:33<27:03:11, 3.56it/s] 7%|▋ | 24484/371472 [1:58:33<27:08:42, 3.55it/s] 7%|▋ | 24485/371472 [1:58:33<28:16:12, 3.41it/s] 7%|▋ | 24486/371472 [1:58:33<27:34:55, 3.49it/s] 7%|▋ | 24487/371472 [1:58:34<27:41:37, 3.48it/s] 7%|▋ | 24488/371472 [1:58:34<26:41:18, 3.61it/s] 7%|▋ | 24489/371472 [1:58:34<26:39:36, 3.62it/s] 7%|▋ | 24490/371472 [1:58:35<27:39:05, 3.49it/s] 7%|▋ | 24491/371472 [1:58:35<27:27:56, 3.51it/s] 7%|▋ | 24492/371472 [1:58:35<28:27:17, 3.39it/s] 7%|▋ | 24493/371472 [1:58:36<28:28:36, 3.38it/s] 7%|▋ | 24494/371472 [1:58:36<27:23:46, 3.52it/s] 7%|▋ | 24495/371472 [1:58:36<27:10:20, 3.55it/s] 7%|▋ | 24496/371472 [1:58:36<26:26:25, 3.65it/s] 7%|▋ | 24497/371472 [1:58:37<27:11:01, 3.55it/s] 7%|▋ | 24498/371472 [1:58:37<26:21:52, 3.66it/s] 7%|▋ | 24499/371472 [1:58:37<27:18:04, 3.53it/s] 7%|▋ | 24500/371472 [1:58:37<27:10:09, 3.55it/s] {'loss': 4.6551, 'learning_rate': 9.410943997931435e-07, 'epoch': 1.06} + 7%|▋ | 24500/371472 [1:58:37<27:10:09, 3.55it/s] 7%|▋ | 24501/371472 [1:58:38<27:28:15, 3.51it/s] 7%|▋ | 24502/371472 [1:58:38<27:28:27, 3.51it/s] 7%|▋ | 24503/371472 [1:58:38<27:36:54, 3.49it/s] 7%|▋ | 24504/371472 [1:58:39<27:04:07, 3.56it/s] 7%|▋ | 24505/371472 [1:58:39<26:43:19, 3.61it/s] 7%|▋ | 24506/371472 [1:58:39<25:57:23, 3.71it/s] 7%|▋ | 24507/371472 [1:58:39<27:54:37, 3.45it/s] 7%|▋ | 24508/371472 [1:58:40<26:45:08, 3.60it/s] 7%|▋ | 24509/371472 [1:58:40<26:41:49, 3.61it/s] 7%|▋ | 24510/371472 [1:58:40<26:35:43, 3.62it/s] 7%|▋ | 24511/371472 [1:58:41<27:20:26, 3.53it/s] 7%|▋ | 24512/371472 [1:58:41<28:44:08, 3.35it/s] 7%|▋ | 24513/371472 [1:58:41<28:04:34, 3.43it/s] 7%|▋ | 24514/371472 [1:58:42<30:57:55, 3.11it/s] 7%|▋ | 24515/371472 [1:58:42<29:21:29, 3.28it/s] 7%|▋ | 24516/371472 [1:58:42<30:07:10, 3.20it/s] 7%|▋ | 24517/371472 [1:58:42<28:47:29, 3.35it/s] 7%|▋ | 24518/371472 [1:58:43<27:19:31, 3.53it/s] 7%|▋ | 24519/371472 [1:58:43<28:19:21, 3.40it/s] 7%|▋ | 24520/371472 [1:58:43<29:27:18, 3.27it/s] {'loss': 4.713, 'learning_rate': 9.410459178176646e-07, 'epoch': 1.06} + 7%|▋ | 24520/371472 [1:58:43<29:27:18, 3.27it/s] 7%|▋ | 24521/371472 [1:58:44<28:31:53, 3.38it/s] 7%|▋ | 24522/371472 [1:58:44<27:18:33, 3.53it/s] 7%|▋ | 24523/371472 [1:58:44<26:21:04, 3.66it/s] 7%|▋ | 24524/371472 [1:58:44<25:38:02, 3.76it/s] 7%|▋ | 24525/371472 [1:58:45<26:35:56, 3.62it/s] 7%|▋ | 24526/371472 [1:58:45<27:38:50, 3.49it/s] 7%|▋ | 24527/371472 [1:58:45<27:13:07, 3.54it/s] 7%|▋ | 24528/371472 [1:58:45<27:14:52, 3.54it/s] 7%|▋ | 24529/371472 [1:58:46<28:16:51, 3.41it/s] 7%|▋ | 24530/371472 [1:58:46<28:28:36, 3.38it/s] 7%|▋ | 24531/371472 [1:58:46<28:17:13, 3.41it/s] 7%|▋ | 24532/371472 [1:58:47<28:25:05, 3.39it/s] 7%|▋ | 24533/371472 [1:58:47<27:34:49, 3.49it/s] 7%|▋ | 24534/371472 [1:58:47<27:16:52, 3.53it/s] 7%|▋ | 24535/371472 [1:58:48<28:58:15, 3.33it/s] 7%|▋ | 24536/371472 [1:58:48<27:44:24, 3.47it/s] 7%|▋ | 24537/371472 [1:58:48<27:09:39, 3.55it/s] 7%|▋ | 24538/371472 [1:58:48<26:59:28, 3.57it/s] 7%|▋ | 24539/371472 [1:58:49<28:53:42, 3.34it/s] 7%|▋ | 24540/371472 [1:58:49<29:25:27, 3.28it/s] {'loss': 4.6046, 'learning_rate': 9.409974358421858e-07, 'epoch': 1.06} + 7%|▋ | 24540/371472 [1:58:49<29:25:27, 3.28it/s] 7%|▋ | 24541/371472 [1:58:49<32:44:20, 2.94it/s] 7%|▋ | 24542/371472 [1:58:50<30:15:40, 3.18it/s] 7%|▋ | 24543/371472 [1:58:50<28:34:21, 3.37it/s] 7%|▋ | 24544/371472 [1:58:50<28:46:24, 3.35it/s] 7%|▋ | 24545/371472 [1:58:51<28:03:17, 3.43it/s] 7%|▋ | 24546/371472 [1:58:51<27:15:18, 3.54it/s] 7%|▋ | 24547/371472 [1:58:51<26:31:46, 3.63it/s] 7%|▋ | 24548/371472 [1:58:51<27:28:52, 3.51it/s] 7%|▋ | 24549/371472 [1:58:52<27:33:43, 3.50it/s] 7%|▋ | 24550/371472 [1:58:52<28:08:24, 3.42it/s] 7%|▋ | 24551/371472 [1:58:52<28:12:09, 3.42it/s] 7%|▋ | 24552/371472 [1:58:53<28:58:50, 3.33it/s] 7%|▋ | 24553/371472 [1:58:53<31:06:42, 3.10it/s] 7%|▋ | 24554/371472 [1:58:53<30:29:12, 3.16it/s] 7%|▋ | 24555/371472 [1:58:54<29:31:48, 3.26it/s] 7%|▋ | 24556/371472 [1:58:54<28:58:22, 3.33it/s] 7%|▋ | 24557/371472 [1:58:54<28:06:00, 3.43it/s] 7%|▋ | 24558/371472 [1:58:54<27:05:42, 3.56it/s] 7%|▋ | 24559/371472 [1:58:55<29:50:02, 3.23it/s] 7%|▋ | 24560/371472 [1:58:55<28:37:44, 3.37it/s] {'loss': 4.3717, 'learning_rate': 9.409489538667069e-07, 'epoch': 1.06} + 7%|▋ | 24560/371472 [1:58:55<28:37:44, 3.37it/s] 7%|▋ | 24561/371472 [1:58:55<29:25:35, 3.27it/s] 7%|▋ | 24562/371472 [1:58:56<27:53:24, 3.46it/s] 7%|▋ | 24563/371472 [1:58:56<28:54:49, 3.33it/s] 7%|▋ | 24564/371472 [1:58:56<28:18:29, 3.40it/s] 7%|▋ | 24565/371472 [1:58:57<30:37:48, 3.15it/s] 7%|▋ | 24566/371472 [1:58:57<29:23:56, 3.28it/s] 7%|▋ | 24567/371472 [1:58:57<28:08:32, 3.42it/s] 7%|▋ | 24568/371472 [1:58:57<28:21:37, 3.40it/s] 7%|▋ | 24569/371472 [1:58:58<27:58:35, 3.44it/s] 7%|▋ | 24570/371472 [1:58:58<27:31:18, 3.50it/s] 7%|▋ | 24571/371472 [1:58:58<27:46:54, 3.47it/s] 7%|▋ | 24572/371472 [1:58:59<28:24:23, 3.39it/s] 7%|▋ | 24573/371472 [1:58:59<28:23:30, 3.39it/s] 7%|▋ | 24574/371472 [1:58:59<27:18:34, 3.53it/s] 7%|▋ | 24575/371472 [1:58:59<26:30:58, 3.63it/s] 7%|▋ | 24576/371472 [1:59:00<26:36:23, 3.62it/s] 7%|▋ | 24577/371472 [1:59:00<27:11:01, 3.54it/s] 7%|▋ | 24578/371472 [1:59:00<26:27:01, 3.64it/s] 7%|▋ | 24579/371472 [1:59:00<26:09:56, 3.68it/s] 7%|▋ | 24580/371472 [1:59:01<25:58:12, 3.71it/s] {'loss': 4.7505, 'learning_rate': 9.409004718912279e-07, 'epoch': 1.06} + 7%|▋ | 24580/371472 [1:59:01<25:58:12, 3.71it/s] 7%|▋ | 24581/371472 [1:59:01<27:27:57, 3.51it/s] 7%|▋ | 24582/371472 [1:59:01<27:02:28, 3.56it/s] 7%|▋ | 24583/371472 [1:59:02<26:40:18, 3.61it/s] 7%|▋ | 24584/371472 [1:59:02<26:58:05, 3.57it/s] 7%|▋ | 24585/371472 [1:59:02<26:49:12, 3.59it/s] 7%|▋ | 24586/371472 [1:59:03<29:35:14, 3.26it/s] 7%|▋ | 24587/371472 [1:59:03<28:04:08, 3.43it/s] 7%|▋ | 24588/371472 [1:59:03<28:03:07, 3.43it/s] 7%|▋ | 24589/371472 [1:59:03<28:17:03, 3.41it/s] 7%|▋ | 24590/371472 [1:59:04<28:14:36, 3.41it/s] 7%|▋ | 24591/371472 [1:59:04<27:57:40, 3.45it/s] 7%|▋ | 24592/371472 [1:59:04<29:18:02, 3.29it/s] 7%|▋ | 24593/371472 [1:59:05<29:06:35, 3.31it/s] 7%|▋ | 24594/371472 [1:59:05<29:05:00, 3.31it/s] 7%|▋ | 24595/371472 [1:59:05<28:46:53, 3.35it/s] 7%|▋ | 24596/371472 [1:59:05<27:55:51, 3.45it/s] 7%|▋ | 24597/371472 [1:59:06<27:41:00, 3.48it/s] 7%|▋ | 24598/371472 [1:59:06<28:35:38, 3.37it/s] 7%|▋ | 24599/371472 [1:59:06<28:11:18, 3.42it/s] 7%|▋ | 24600/371472 [1:59:07<30:12:15, 3.19it/s] {'loss': 4.4285, 'learning_rate': 9.40851989915749e-07, 'epoch': 1.06} + 7%|▋ | 24600/371472 [1:59:07<30:12:15, 3.19it/s] 7%|▋ | 24601/371472 [1:59:07<30:38:35, 3.14it/s] 7%|▋ | 24602/371472 [1:59:07<30:20:32, 3.18it/s] 7%|▋ | 24603/371472 [1:59:08<29:11:46, 3.30it/s] 7%|▋ | 24604/371472 [1:59:08<28:04:18, 3.43it/s] 7%|▋ | 24605/371472 [1:59:08<27:27:01, 3.51it/s] 7%|▋ | 24606/371472 [1:59:08<28:57:17, 3.33it/s] 7%|▋ | 24607/371472 [1:59:09<27:56:30, 3.45it/s] 7%|▋ | 24608/371472 [1:59:09<28:11:09, 3.42it/s] 7%|▋ | 24609/371472 [1:59:09<28:03:17, 3.43it/s] 7%|▋ | 24610/371472 [1:59:10<27:09:03, 3.55it/s] 7%|▋ | 24611/371472 [1:59:10<27:23:31, 3.52it/s] 7%|▋ | 24612/371472 [1:59:10<29:15:29, 3.29it/s] 7%|▋ | 24613/371472 [1:59:10<28:11:39, 3.42it/s] 7%|▋ | 24614/371472 [1:59:11<29:14:20, 3.30it/s] 7%|▋ | 24615/371472 [1:59:11<28:34:10, 3.37it/s] 7%|▋ | 24616/371472 [1:59:11<27:24:44, 3.51it/s] 7%|▋ | 24617/371472 [1:59:12<27:14:59, 3.54it/s] 7%|▋ | 24618/371472 [1:59:12<28:54:26, 3.33it/s] 7%|▋ | 24619/371472 [1:59:12<28:37:53, 3.37it/s] 7%|▋ | 24620/371472 [1:59:13<28:30:35, 3.38it/s] {'loss': 4.3466, 'learning_rate': 9.408035079402702e-07, 'epoch': 1.06} + 7%|▋ | 24620/371472 [1:59:13<28:30:35, 3.38it/s] 7%|▋ | 24621/371472 [1:59:13<28:08:01, 3.42it/s] 7%|▋ | 24622/371472 [1:59:13<27:15:48, 3.53it/s] 7%|▋ | 24623/371472 [1:59:13<27:32:09, 3.50it/s] 7%|▋ | 24624/371472 [1:59:14<26:51:17, 3.59it/s] 7%|▋ | 24625/371472 [1:59:14<27:00:17, 3.57it/s] 7%|▋ | 24626/371472 [1:59:14<26:33:20, 3.63it/s] 7%|▋ | 24627/371472 [1:59:15<27:29:42, 3.50it/s] 7%|▋ | 24628/371472 [1:59:15<28:16:27, 3.41it/s] 7%|▋ | 24629/371472 [1:59:15<26:56:01, 3.58it/s] 7%|▋ | 24630/371472 [1:59:15<29:12:13, 3.30it/s] 7%|▋ | 24631/371472 [1:59:16<27:50:57, 3.46it/s] 7%|▋ | 24632/371472 [1:59:16<27:21:24, 3.52it/s] 7%|▋ | 24633/371472 [1:59:16<26:47:24, 3.60it/s] 7%|▋ | 24634/371472 [1:59:17<27:01:16, 3.57it/s] 7%|▋ | 24635/371472 [1:59:17<26:25:54, 3.64it/s] 7%|▋ | 24636/371472 [1:59:17<26:51:25, 3.59it/s] 7%|▋ | 24637/371472 [1:59:17<28:09:12, 3.42it/s] 7%|▋ | 24638/371472 [1:59:18<27:25:27, 3.51it/s] 7%|▋ | 24639/371472 [1:59:18<27:08:33, 3.55it/s] 7%|▋ | 24640/371472 [1:59:18<26:43:55, 3.60it/s] {'loss': 4.7403, 'learning_rate': 9.407550259647912e-07, 'epoch': 1.06} + 7%|▋ | 24640/371472 [1:59:18<26:43:55, 3.60it/s] 7%|▋ | 24641/371472 [1:59:18<25:56:22, 3.71it/s] 7%|▋ | 24642/371472 [1:59:19<25:41:02, 3.75it/s] 7%|▋ | 24643/371472 [1:59:19<25:26:52, 3.79it/s] 7%|▋ | 24644/371472 [1:59:19<26:10:15, 3.68it/s] 7%|▋ | 24645/371472 [1:59:20<27:00:55, 3.57it/s] 7%|▋ | 24646/371472 [1:59:20<30:14:58, 3.18it/s] 7%|▋ | 24647/371472 [1:59:20<28:58:20, 3.33it/s] 7%|▋ | 24648/371472 [1:59:20<27:50:11, 3.46it/s] 7%|▋ | 24649/371472 [1:59:21<28:15:39, 3.41it/s] 7%|▋ | 24650/371472 [1:59:21<27:16:42, 3.53it/s] 7%|▋ | 24651/371472 [1:59:21<26:51:11, 3.59it/s] 7%|▋ | 24652/371472 [1:59:22<26:26:51, 3.64it/s] 7%|▋ | 24653/371472 [1:59:22<26:06:59, 3.69it/s] 7%|▋ | 24654/371472 [1:59:22<28:07:59, 3.42it/s] 7%|▋ | 24655/371472 [1:59:23<29:22:49, 3.28it/s] 7%|▋ | 24656/371472 [1:59:23<28:00:03, 3.44it/s] 7%|▋ | 24657/371472 [1:59:23<27:39:42, 3.48it/s] 7%|▋ | 24658/371472 [1:59:23<28:38:04, 3.36it/s] 7%|▋ | 24659/371472 [1:59:24<29:08:10, 3.31it/s] 7%|▋ | 24660/371472 [1:59:24<28:50:06, 3.34it/s] {'loss': 4.5777, 'learning_rate': 9.407065439893124e-07, 'epoch': 1.06} + 7%|▋ | 24660/371472 [1:59:24<28:50:06, 3.34it/s] 7%|▋ | 24661/371472 [1:59:24<29:04:42, 3.31it/s] 7%|▋ | 24662/371472 [1:59:25<29:29:52, 3.27it/s] 7%|▋ | 24663/371472 [1:59:25<28:33:12, 3.37it/s] 7%|▋ | 24664/371472 [1:59:25<28:32:39, 3.37it/s] 7%|▋ | 24665/371472 [1:59:25<28:26:12, 3.39it/s] 7%|▋ | 24666/371472 [1:59:26<28:27:19, 3.39it/s] 7%|▋ | 24667/371472 [1:59:26<28:22:27, 3.40it/s] 7%|▋ | 24668/371472 [1:59:26<27:43:54, 3.47it/s] 7%|▋ | 24669/371472 [1:59:27<28:35:55, 3.37it/s] 7%|▋ | 24670/371472 [1:59:27<28:27:05, 3.39it/s] 7%|▋ | 24671/371472 [1:59:27<29:44:17, 3.24it/s] 7%|▋ | 24672/371472 [1:59:28<29:06:17, 3.31it/s] 7%|▋ | 24673/371472 [1:59:28<27:52:30, 3.46it/s] 7%|▋ | 24674/371472 [1:59:28<26:56:29, 3.58it/s] 7%|▋ | 24675/371472 [1:59:28<26:26:24, 3.64it/s] 7%|▋ | 24676/371472 [1:59:29<25:47:58, 3.73it/s] 7%|▋ | 24677/371472 [1:59:29<25:41:50, 3.75it/s] 7%|▋ | 24678/371472 [1:59:29<25:30:55, 3.78it/s] 7%|▋ | 24679/371472 [1:59:29<26:11:29, 3.68it/s] 7%|▋ | 24680/371472 [1:59:30<26:01:13, 3.70it/s] {'loss': 4.4154, 'learning_rate': 9.406580620138335e-07, 'epoch': 1.06} + 7%|▋ | 24680/371472 [1:59:30<26:01:13, 3.70it/s] 7%|▋ | 24681/371472 [1:59:30<28:09:40, 3.42it/s] 7%|▋ | 24682/371472 [1:59:30<27:22:20, 3.52it/s] 7%|▋ | 24683/371472 [1:59:31<27:05:25, 3.56it/s] 7%|▋ | 24684/371472 [1:59:31<28:09:17, 3.42it/s] 7%|▋ | 24685/371472 [1:59:31<28:48:49, 3.34it/s] 7%|▋ | 24686/371472 [1:59:31<28:06:49, 3.43it/s] 7%|▋ | 24687/371472 [1:59:32<28:24:55, 3.39it/s] 7%|▋ | 24688/371472 [1:59:32<27:36:08, 3.49it/s] 7%|▋ | 24689/371472 [1:59:32<27:04:50, 3.56it/s] 7%|▋ | 24690/371472 [1:59:33<27:24:42, 3.51it/s] 7%|▋ | 24691/371472 [1:59:33<27:05:11, 3.56it/s] 7%|▋ | 24692/371472 [1:59:33<26:24:59, 3.65it/s] 7%|▋ | 24693/371472 [1:59:33<27:19:37, 3.52it/s] 7%|▋ | 24694/371472 [1:59:34<26:49:13, 3.59it/s] 7%|▋ | 24695/371472 [1:59:34<26:56:11, 3.58it/s] 7%|▋ | 24696/371472 [1:59:34<26:39:37, 3.61it/s] 7%|▋ | 24697/371472 [1:59:35<28:02:57, 3.43it/s] 7%|▋ | 24698/371472 [1:59:35<27:58:13, 3.44it/s] 7%|▋ | 24699/371472 [1:59:35<28:29:14, 3.38it/s] 7%|▋ | 24700/371472 [1:59:35<27:56:44, 3.45it/s] {'loss': 4.6982, 'learning_rate': 9.406095800383546e-07, 'epoch': 1.06} + 7%|▋ | 24700/371472 [1:59:35<27:56:44, 3.45it/s] 7%|▋ | 24701/371472 [1:59:36<27:39:18, 3.48it/s] 7%|▋ | 24702/371472 [1:59:36<27:47:09, 3.47it/s] 7%|▋ | 24703/371472 [1:59:36<31:00:46, 3.11it/s] 7%|▋ | 24704/371472 [1:59:37<31:33:58, 3.05it/s] 7%|▋ | 24705/371472 [1:59:37<33:20:27, 2.89it/s] 7%|▋ | 24706/371472 [1:59:37<31:22:54, 3.07it/s] 7%|▋ | 24707/371472 [1:59:38<30:36:11, 3.15it/s] 7%|▋ | 24708/371472 [1:59:38<29:30:37, 3.26it/s] 7%|▋ | 24709/371472 [1:59:38<28:09:08, 3.42it/s] 7%|▋ | 24710/371472 [1:59:39<27:55:45, 3.45it/s] 7%|▋ | 24711/371472 [1:59:39<27:20:40, 3.52it/s] 7%|▋ | 24712/371472 [1:59:39<27:27:17, 3.51it/s] 7%|▋ | 24713/371472 [1:59:39<27:20:01, 3.52it/s] 7%|▋ | 24714/371472 [1:59:40<27:38:52, 3.48it/s] 7%|▋ | 24715/371472 [1:59:40<27:33:14, 3.50it/s] 7%|▋ | 24716/371472 [1:59:40<27:02:04, 3.56it/s] 7%|▋ | 24717/371472 [1:59:40<26:42:53, 3.61it/s] 7%|▋ | 24718/371472 [1:59:41<26:12:33, 3.68it/s] 7%|▋ | 24719/371472 [1:59:41<26:27:00, 3.64it/s] 7%|▋ | 24720/371472 [1:59:41<26:18:01, 3.66it/s] {'loss': 4.3179, 'learning_rate': 9.405610980628756e-07, 'epoch': 1.06} + 7%|▋ | 24720/371472 [1:59:41<26:18:01, 3.66it/s] 7%|▋ | 24721/371472 [1:59:42<27:34:27, 3.49it/s] 7%|▋ | 24722/371472 [1:59:42<27:13:58, 3.54it/s] 7%|▋ | 24723/371472 [1:59:42<28:18:03, 3.40it/s] 7%|▋ | 24724/371472 [1:59:43<29:52:55, 3.22it/s] 7%|▋ | 24725/371472 [1:59:43<29:23:14, 3.28it/s] 7%|▋ | 24726/371472 [1:59:43<29:07:05, 3.31it/s] 7%|▋ | 24727/371472 [1:59:43<28:40:20, 3.36it/s] 7%|▋ | 24728/371472 [1:59:44<28:43:40, 3.35it/s] 7%|▋ | 24729/371472 [1:59:44<27:18:18, 3.53it/s] 7%|▋ | 24730/371472 [1:59:44<28:47:16, 3.35it/s] 7%|▋ | 24731/371472 [1:59:45<29:20:35, 3.28it/s] 7%|▋ | 24732/371472 [1:59:45<30:28:07, 3.16it/s] 7%|▋ | 24733/371472 [1:59:45<29:00:00, 3.32it/s] 7%|▋ | 24734/371472 [1:59:46<28:55:48, 3.33it/s] 7%|▋ | 24735/371472 [1:59:46<30:09:47, 3.19it/s] 7%|▋ | 24736/371472 [1:59:46<28:56:00, 3.33it/s] 7%|▋ | 24737/371472 [1:59:46<27:48:08, 3.46it/s] 7%|▋ | 24738/371472 [1:59:47<27:44:26, 3.47it/s] 7%|▋ | 24739/371472 [1:59:47<28:11:33, 3.42it/s] 7%|▋ | 24740/371472 [1:59:47<27:11:38, 3.54it/s] {'loss': 4.1368, 'learning_rate': 9.405126160873968e-07, 'epoch': 1.07} + 7%|▋ | 24740/371472 [1:59:47<27:11:38, 3.54it/s] 7%|▋ | 24741/371472 [1:59:48<26:30:38, 3.63it/s] 7%|▋ | 24742/371472 [1:59:48<28:26:15, 3.39it/s] 7%|▋ | 24743/371472 [1:59:48<31:35:02, 3.05it/s] 7%|▋ | 24744/371472 [1:59:49<29:45:45, 3.24it/s] 7%|▋ | 24745/371472 [1:59:49<28:33:28, 3.37it/s] 7%|▋ | 24746/371472 [1:59:49<27:39:54, 3.48it/s] 7%|▋ | 24747/371472 [1:59:49<28:19:21, 3.40it/s] 7%|▋ | 24748/371472 [1:59:50<27:45:04, 3.47it/s] 7%|▋ | 24749/371472 [1:59:50<28:26:15, 3.39it/s] 7%|▋ | 24750/371472 [1:59:50<28:08:03, 3.42it/s] 7%|▋ | 24751/371472 [1:59:51<30:30:34, 3.16it/s] 7%|▋ | 24752/371472 [1:59:51<29:09:19, 3.30it/s] 7%|▋ | 24753/371472 [1:59:51<27:18:08, 3.53it/s] 7%|▋ | 24754/371472 [1:59:51<27:07:36, 3.55it/s] 7%|▋ | 24755/371472 [1:59:52<27:14:05, 3.54it/s] 7%|▋ | 24756/371472 [1:59:52<26:46:46, 3.60it/s] 7%|▋ | 24757/371472 [1:59:52<26:27:02, 3.64it/s] 7%|▋ | 24758/371472 [1:59:53<27:39:39, 3.48it/s] 7%|▋ | 24759/371472 [1:59:53<27:15:09, 3.53it/s] 7%|▋ | 24760/371472 [1:59:53<28:43:39, 3.35it/s] {'loss': 4.5744, 'learning_rate': 9.404641341119179e-07, 'epoch': 1.07} + 7%|▋ | 24760/371472 [1:59:53<28:43:39, 3.35it/s] 7%|▋ | 24761/371472 [1:59:53<28:37:18, 3.36it/s] 7%|▋ | 24762/371472 [1:59:54<29:07:29, 3.31it/s] 7%|▋ | 24763/371472 [1:59:54<28:55:05, 3.33it/s] 7%|▋ | 24764/371472 [1:59:54<27:51:27, 3.46it/s] 7%|▋ | 24765/371472 [1:59:55<26:56:25, 3.57it/s] 7%|▋ | 24766/371472 [1:59:55<27:15:36, 3.53it/s] 7%|▋ | 24767/371472 [1:59:55<31:13:08, 3.08it/s] 7%|▋ | 24768/371472 [1:59:56<29:35:26, 3.25it/s] 7%|▋ | 24769/371472 [1:59:56<28:24:19, 3.39it/s] 7%|▋ | 24770/371472 [1:59:56<28:13:58, 3.41it/s] 7%|▋ | 24771/371472 [1:59:56<27:09:49, 3.55it/s] 7%|▋ | 24772/371472 [1:59:57<27:01:31, 3.56it/s] 7%|▋ | 24773/371472 [1:59:57<26:57:59, 3.57it/s] 7%|▋ | 24774/371472 [1:59:57<26:28:23, 3.64it/s] 7%|▋ | 24775/371472 [1:59:57<25:51:37, 3.72it/s] 7%|▋ | 24776/371472 [1:59:58<25:53:12, 3.72it/s] 7%|▋ | 24777/371472 [1:59:58<25:37:45, 3.76it/s] 7%|▋ | 24778/371472 [1:59:58<25:30:58, 3.77it/s] 7%|▋ | 24779/371472 [1:59:59<26:33:00, 3.63it/s] 7%|▋ | 24780/371472 [1:59:59<26:31:41, 3.63it/s] {'loss': 4.3126, 'learning_rate': 9.404156521364391e-07, 'epoch': 1.07} + 7%|▋ | 24780/371472 [1:59:59<26:31:41, 3.63it/s] 7%|▋ | 24781/371472 [1:59:59<26:55:51, 3.58it/s] 7%|▋ | 24782/371472 [1:59:59<27:19:34, 3.52it/s] 7%|▋ | 24783/371472 [2:00:00<28:17:23, 3.40it/s] 7%|▋ | 24784/371472 [2:00:00<26:50:21, 3.59it/s] 7%|▋ | 24785/371472 [2:00:00<28:02:14, 3.43it/s] 7%|▋ | 24786/371472 [2:00:01<27:57:22, 3.44it/s] 7%|▋ | 24787/371472 [2:00:01<28:00:50, 3.44it/s] 7%|▋ | 24788/371472 [2:00:01<27:10:10, 3.54it/s] 7%|▋ | 24789/371472 [2:00:01<26:47:11, 3.60it/s] 7%|▋ | 24790/371472 [2:00:02<27:58:46, 3.44it/s] 7%|▋ | 24791/371472 [2:00:02<31:10:44, 3.09it/s] 7%|▋ | 24792/371472 [2:00:02<29:05:45, 3.31it/s] 7%|▋ | 24793/371472 [2:00:03<28:35:58, 3.37it/s] 7%|▋ | 24794/371472 [2:00:03<28:32:05, 3.37it/s] 7%|▋ | 24795/371472 [2:00:03<27:58:42, 3.44it/s] 7%|▋ | 24796/371472 [2:00:03<27:44:05, 3.47it/s] 7%|▋ | 24797/371472 [2:00:04<28:01:18, 3.44it/s] 7%|▋ | 24798/371472 [2:00:04<27:04:55, 3.56it/s] 7%|▋ | 24799/371472 [2:00:04<26:17:49, 3.66it/s] 7%|▋ | 24800/371472 [2:00:05<26:31:57, 3.63it/s] {'loss': 4.3316, 'learning_rate': 9.403671701609601e-07, 'epoch': 1.07} + 7%|▋ | 24800/371472 [2:00:05<26:31:57, 3.63it/s] 7%|▋ | 24801/371472 [2:00:05<27:18:42, 3.53it/s] 7%|▋ | 24802/371472 [2:00:05<27:02:15, 3.56it/s] 7%|▋ | 24803/371472 [2:00:05<26:45:36, 3.60it/s] 7%|▋ | 24804/371472 [2:00:06<29:20:35, 3.28it/s] 7%|▋ | 24805/371472 [2:00:06<29:13:16, 3.30it/s] 7%|▋ | 24806/371472 [2:00:06<27:55:27, 3.45it/s] 7%|▋ | 24807/371472 [2:00:07<27:05:14, 3.56it/s] 7%|▋ | 24808/371472 [2:00:07<27:11:02, 3.54it/s] 7%|▋ | 24809/371472 [2:00:07<28:09:21, 3.42it/s] 7%|▋ | 24810/371472 [2:00:08<28:42:05, 3.36it/s] 7%|▋ | 24811/371472 [2:00:08<27:53:15, 3.45it/s] 7%|▋ | 24812/371472 [2:00:08<27:53:17, 3.45it/s] 7%|▋ | 24813/371472 [2:00:08<28:27:17, 3.38it/s] 7%|▋ | 24814/371472 [2:00:09<30:04:25, 3.20it/s] 7%|▋ | 24815/371472 [2:00:09<30:37:49, 3.14it/s] 7%|▋ | 24816/371472 [2:00:09<31:15:22, 3.08it/s] 7%|▋ | 24817/371472 [2:00:10<30:28:29, 3.16it/s] 7%|▋ | 24818/371472 [2:00:10<29:27:53, 3.27it/s] 7%|▋ | 24819/371472 [2:00:10<28:05:40, 3.43it/s] 7%|▋ | 24820/371472 [2:00:11<27:27:08, 3.51it/s] {'loss': 4.4101, 'learning_rate': 9.403186881854812e-07, 'epoch': 1.07} + 7%|▋ | 24820/371472 [2:00:11<27:27:08, 3.51it/s] 7%|▋ | 24821/371472 [2:00:11<27:21:43, 3.52it/s] 7%|▋ | 24822/371472 [2:00:11<28:06:12, 3.43it/s] 7%|▋ | 24823/371472 [2:00:11<27:26:55, 3.51it/s] 7%|▋ | 24824/371472 [2:00:12<27:40:45, 3.48it/s] 7%|▋ | 24825/371472 [2:00:12<30:03:46, 3.20it/s] 7%|▋ | 24826/371472 [2:00:12<29:47:31, 3.23it/s] 7%|▋ | 24827/371472 [2:00:13<29:04:14, 3.31it/s] 7%|▋ | 24828/371472 [2:00:13<28:18:46, 3.40it/s] 7%|▋ | 24829/371472 [2:00:13<28:29:58, 3.38it/s] 7%|▋ | 24830/371472 [2:00:14<28:05:06, 3.43it/s] 7%|▋ | 24831/371472 [2:00:14<27:09:24, 3.55it/s] 7%|▋ | 24832/371472 [2:00:14<26:54:57, 3.58it/s] 7%|▋ | 24833/371472 [2:00:14<26:22:31, 3.65it/s] 7%|▋ | 24834/371472 [2:00:15<25:53:11, 3.72it/s] 7%|▋ | 24835/371472 [2:00:15<26:41:03, 3.61it/s] 7%|▋ | 24836/371472 [2:00:15<27:42:15, 3.48it/s] 7%|▋ | 24837/371472 [2:00:15<26:46:00, 3.60it/s] 7%|▋ | 24838/371472 [2:00:16<29:42:40, 3.24it/s] 7%|▋ | 24839/371472 [2:00:16<28:58:39, 3.32it/s] 7%|▋ | 24840/371472 [2:00:16<28:14:43, 3.41it/s] {'loss': 4.5575, 'learning_rate': 9.402702062100023e-07, 'epoch': 1.07} + 7%|▋ | 24840/371472 [2:00:16<28:14:43, 3.41it/s] 7%|▋ | 24841/371472 [2:00:17<28:59:30, 3.32it/s] 7%|▋ | 24842/371472 [2:00:17<28:55:33, 3.33it/s] 7%|▋ | 24843/371472 [2:00:17<27:58:33, 3.44it/s] 7%|▋ | 24844/371472 [2:00:18<28:10:48, 3.42it/s] 7%|▋ | 24845/371472 [2:00:18<27:21:00, 3.52it/s] 7%|▋ | 24846/371472 [2:00:18<27:17:48, 3.53it/s] 7%|▋ | 24847/371472 [2:00:18<26:26:34, 3.64it/s] 7%|▋ | 24848/371472 [2:00:19<26:55:09, 3.58it/s] 7%|▋ | 24849/371472 [2:00:19<26:41:43, 3.61it/s] 7%|▋ | 24850/371472 [2:00:19<27:40:32, 3.48it/s] 7%|▋ | 24851/371472 [2:00:19<27:17:15, 3.53it/s] 7%|▋ | 24852/371472 [2:00:20<28:00:32, 3.44it/s] 7%|▋ | 24853/371472 [2:00:20<27:43:12, 3.47it/s] 7%|▋ | 24854/371472 [2:00:20<29:59:39, 3.21it/s] 7%|▋ | 24855/371472 [2:00:21<30:33:21, 3.15it/s] 7%|▋ | 24856/371472 [2:00:21<29:08:02, 3.30it/s] 7%|▋ | 24857/371472 [2:00:21<28:48:03, 3.34it/s] 7%|▋ | 24858/371472 [2:00:22<28:19:54, 3.40it/s] 7%|▋ | 24859/371472 [2:00:22<28:13:06, 3.41it/s] 7%|▋ | 24860/371472 [2:00:22<27:56:05, 3.45it/s] {'loss': 4.4639, 'learning_rate': 9.402217242345235e-07, 'epoch': 1.07} + 7%|▋ | 24860/371472 [2:00:22<27:56:05, 3.45it/s] 7%|▋ | 24861/371472 [2:00:22<27:41:13, 3.48it/s] 7%|▋ | 24862/371472 [2:00:23<27:09:39, 3.54it/s] 7%|▋ | 24863/371472 [2:00:23<27:24:37, 3.51it/s] 7%|▋ | 24864/371472 [2:00:23<28:15:22, 3.41it/s] 7%|▋ | 24865/371472 [2:00:24<27:11:34, 3.54it/s] 7%|▋ | 24866/371472 [2:00:24<26:39:32, 3.61it/s] 7%|▋ | 24867/371472 [2:00:24<28:08:18, 3.42it/s] 7%|▋ | 24868/371472 [2:00:24<27:59:25, 3.44it/s] 7%|▋ | 24869/371472 [2:00:25<26:37:09, 3.62it/s] 7%|▋ | 24870/371472 [2:00:25<27:23:38, 3.51it/s] 7%|▋ | 24871/371472 [2:00:25<26:48:41, 3.59it/s] 7%|▋ | 24872/371472 [2:00:26<30:18:09, 3.18it/s] 7%|▋ | 24873/371472 [2:00:26<29:09:27, 3.30it/s] 7%|▋ | 24874/371472 [2:00:26<28:47:01, 3.34it/s] 7%|▋ | 24875/371472 [2:00:27<28:07:41, 3.42it/s] 7%|▋ | 24876/371472 [2:00:27<29:08:45, 3.30it/s] 7%|▋ | 24877/371472 [2:00:27<28:25:19, 3.39it/s] 7%|▋ | 24878/371472 [2:00:27<28:11:24, 3.42it/s] 7%|▋ | 24879/371472 [2:00:28<27:29:36, 3.50it/s] 7%|▋ | 24880/371472 [2:00:28<26:59:03, 3.57it/s] {'loss': 4.5161, 'learning_rate': 9.401732422590445e-07, 'epoch': 1.07} + 7%|▋ | 24880/371472 [2:00:28<26:59:03, 3.57it/s] 7%|▋ | 24881/371472 [2:00:28<26:45:49, 3.60it/s] 7%|▋ | 24882/371472 [2:00:29<27:16:50, 3.53it/s] 7%|▋ | 24883/371472 [2:00:29<26:28:58, 3.64it/s] 7%|▋ | 24884/371472 [2:00:29<25:49:15, 3.73it/s] 7%|▋ | 24885/371472 [2:00:29<27:48:10, 3.46it/s] 7%|▋ | 24886/371472 [2:00:30<27:30:30, 3.50it/s] 7%|▋ | 24887/371472 [2:00:30<30:12:05, 3.19it/s] 7%|▋ | 24888/371472 [2:00:30<29:27:34, 3.27it/s] 7%|▋ | 24889/371472 [2:00:31<29:05:10, 3.31it/s] 7%|▋ | 24890/371472 [2:00:31<30:08:41, 3.19it/s] 7%|▋ | 24891/371472 [2:00:31<29:38:50, 3.25it/s] 7%|▋ | 24892/371472 [2:00:31<27:57:05, 3.44it/s] 7%|▋ | 24893/371472 [2:00:32<29:30:29, 3.26it/s] 7%|▋ | 24894/371472 [2:00:32<29:44:40, 3.24it/s] 7%|▋ | 24895/371472 [2:00:32<28:21:37, 3.39it/s] 7%|▋ | 24896/371472 [2:00:33<27:32:32, 3.50it/s] 7%|▋ | 24897/371472 [2:00:33<26:56:51, 3.57it/s] 7%|▋ | 24898/371472 [2:00:33<27:17:19, 3.53it/s] 7%|▋ | 24899/371472 [2:00:34<27:59:35, 3.44it/s] 7%|▋ | 24900/371472 [2:00:34<28:00:20, 3.44it/s] {'loss': 4.5473, 'learning_rate': 9.401247602835657e-07, 'epoch': 1.07} + 7%|▋ | 24900/371472 [2:00:34<28:00:20, 3.44it/s] 7%|▋ | 24901/371472 [2:00:34<28:42:55, 3.35it/s] 7%|▋ | 24902/371472 [2:00:34<28:30:46, 3.38it/s] 7%|▋ | 24903/371472 [2:00:35<27:18:13, 3.53it/s] 7%|▋ | 24904/371472 [2:00:35<27:27:21, 3.51it/s] 7%|▋ | 24905/371472 [2:00:35<30:10:05, 3.19it/s] 7%|▋ | 24906/371472 [2:00:36<30:08:53, 3.19it/s] 7%|▋ | 24907/371472 [2:00:36<30:31:34, 3.15it/s] 7%|▋ | 24908/371472 [2:00:36<28:59:54, 3.32it/s] 7%|▋ | 24909/371472 [2:00:37<27:15:23, 3.53it/s] 7%|▋ | 24910/371472 [2:00:37<28:25:58, 3.39it/s] 7%|▋ | 24911/371472 [2:00:37<27:35:27, 3.49it/s] 7%|▋ | 24912/371472 [2:00:37<27:47:24, 3.46it/s] 7%|▋ | 24913/371472 [2:00:38<28:25:37, 3.39it/s] 7%|▋ | 24914/371472 [2:00:38<27:55:11, 3.45it/s] 7%|▋ | 24915/371472 [2:00:38<27:23:18, 3.51it/s] 7%|▋ | 24916/371472 [2:00:39<30:28:40, 3.16it/s] 7%|▋ | 24917/371472 [2:00:39<30:15:06, 3.18it/s] 7%|▋ | 24918/371472 [2:00:39<31:25:22, 3.06it/s] 7%|▋ | 24919/371472 [2:00:40<30:09:08, 3.19it/s] 7%|▋ | 24920/371472 [2:00:40<32:32:09, 2.96it/s] {'loss': 4.3648, 'learning_rate': 9.400762783080868e-07, 'epoch': 1.07} + 7%|▋ | 24920/371472 [2:00:40<32:32:09, 2.96it/s] 7%|▋ | 24921/371472 [2:00:40<31:52:28, 3.02it/s] 7%|▋ | 24922/371472 [2:00:41<30:14:05, 3.18it/s] 7%|▋ | 24923/371472 [2:00:41<29:48:22, 3.23it/s] 7%|▋ | 24924/371472 [2:00:41<29:30:17, 3.26it/s] 7%|▋ | 24925/371472 [2:00:41<29:13:20, 3.29it/s] 7%|▋ | 24926/371472 [2:00:42<28:11:54, 3.41it/s] 7%|▋ | 24927/371472 [2:00:42<28:04:18, 3.43it/s] 7%|▋ | 24928/371472 [2:00:42<27:30:44, 3.50it/s] 7%|▋ | 24929/371472 [2:00:43<27:33:43, 3.49it/s] 7%|▋ | 24930/371472 [2:00:43<26:34:58, 3.62it/s] 7%|▋ | 24931/371472 [2:00:43<29:17:45, 3.29it/s] 7%|▋ | 24932/371472 [2:00:43<28:09:13, 3.42it/s] 7%|▋ | 24933/371472 [2:00:44<28:46:55, 3.34it/s] 7%|▋ | 24934/371472 [2:00:44<27:41:07, 3.48it/s] 7%|▋ | 24935/371472 [2:00:44<27:32:40, 3.49it/s] 7%|▋ | 24936/371472 [2:00:45<26:25:06, 3.64it/s] 7%|▋ | 24937/371472 [2:00:45<26:28:57, 3.63it/s] 7%|▋ | 24938/371472 [2:00:45<27:41:27, 3.48it/s] 7%|▋ | 24939/371472 [2:00:46<29:10:26, 3.30it/s] 7%|▋ | 24940/371472 [2:00:46<27:53:57, 3.45it/s] {'loss': 4.6174, 'learning_rate': 9.400277963326078e-07, 'epoch': 1.07} + 7%|▋ | 24940/371472 [2:00:46<27:53:57, 3.45it/s] 7%|▋ | 24941/371472 [2:00:46<28:19:56, 3.40it/s] 7%|▋ | 24942/371472 [2:00:46<27:47:20, 3.46it/s] 7%|▋ | 24943/371472 [2:00:47<26:46:23, 3.60it/s] 7%|▋ | 24944/371472 [2:00:47<29:42:09, 3.24it/s] 7%|▋ | 24945/371472 [2:00:47<28:53:43, 3.33it/s] 7%|▋ | 24946/371472 [2:00:48<29:18:33, 3.28it/s] 7%|▋ | 24947/371472 [2:00:48<30:25:46, 3.16it/s] 7%|▋ | 24948/371472 [2:00:48<29:52:03, 3.22it/s] 7%|▋ | 24949/371472 [2:00:49<30:09:40, 3.19it/s] 7%|▋ | 24950/371472 [2:00:49<29:25:52, 3.27it/s] 7%|▋ | 24951/371472 [2:00:49<28:15:58, 3.41it/s] 7%|▋ | 24952/371472 [2:00:49<27:55:29, 3.45it/s] 7%|▋ | 24953/371472 [2:00:50<27:16:00, 3.53it/s] 7%|▋ | 24954/371472 [2:00:50<27:45:42, 3.47it/s] 7%|▋ | 24955/371472 [2:00:50<27:16:37, 3.53it/s] 7%|▋ | 24956/371472 [2:00:50<26:28:01, 3.64it/s] 7%|▋ | 24957/371472 [2:00:51<26:08:29, 3.68it/s] 7%|▋ | 24958/371472 [2:00:51<26:42:17, 3.60it/s] 7%|▋ | 24959/371472 [2:00:51<26:57:03, 3.57it/s] 7%|▋ | 24960/371472 [2:00:52<26:46:12, 3.60it/s] {'loss': 4.697, 'learning_rate': 9.399793143571289e-07, 'epoch': 1.08} + 7%|▋ | 24960/371472 [2:00:52<26:46:12, 3.60it/s] 7%|▋ | 24961/371472 [2:00:52<27:04:49, 3.55it/s] 7%|▋ | 24962/371472 [2:00:52<29:43:53, 3.24it/s] 7%|▋ | 24963/371472 [2:00:53<30:15:11, 3.18it/s] 7%|▋ | 24964/371472 [2:00:53<29:23:19, 3.28it/s] 7%|▋ | 24965/371472 [2:00:53<28:10:49, 3.42it/s] 7%|▋ | 24966/371472 [2:00:53<27:28:51, 3.50it/s] 7%|▋ | 24967/371472 [2:00:54<27:46:38, 3.47it/s] 7%|▋ | 24968/371472 [2:00:54<27:36:56, 3.49it/s] 7%|▋ | 24969/371472 [2:00:54<27:21:52, 3.52it/s] 7%|▋ | 24970/371472 [2:00:55<26:28:35, 3.64it/s] 7%|▋ | 24971/371472 [2:00:55<27:33:22, 3.49it/s] 7%|▋ | 24972/371472 [2:00:55<27:32:30, 3.49it/s] 7%|▋ | 24973/371472 [2:00:55<27:06:22, 3.55it/s] 7%|▋ | 24974/371472 [2:00:56<26:24:08, 3.65it/s] 7%|▋ | 24975/371472 [2:00:56<29:01:46, 3.32it/s] 7%|▋ | 24976/371472 [2:00:56<29:50:55, 3.22it/s] 7%|▋ | 24977/371472 [2:00:57<30:01:09, 3.21it/s] 7%|▋ | 24978/371472 [2:00:57<29:08:36, 3.30it/s] 7%|▋ | 24979/371472 [2:00:57<28:44:55, 3.35it/s] 7%|▋ | 24980/371472 [2:00:58<32:10:39, 2.99it/s] {'loss': 4.4841, 'learning_rate': 9.399308323816501e-07, 'epoch': 1.08} + 7%|▋ | 24980/371472 [2:00:58<32:10:39, 2.99it/s] 7%|▋ | 24981/371472 [2:00:58<30:56:39, 3.11it/s] 7%|▋ | 24982/371472 [2:00:58<30:00:13, 3.21it/s] 7%|▋ | 24983/371472 [2:00:58<28:48:04, 3.34it/s] 7%|▋ | 24984/371472 [2:00:59<28:40:20, 3.36it/s] 7%|▋ | 24985/371472 [2:00:59<28:10:18, 3.42it/s] 7%|▋ | 24986/371472 [2:00:59<27:45:34, 3.47it/s] 7%|▋ | 24987/371472 [2:01:00<27:33:35, 3.49it/s] 7%|▋ | 24988/371472 [2:01:00<27:08:43, 3.55it/s] 7%|▋ | 24989/371472 [2:01:00<26:13:28, 3.67it/s] 7%|▋ | 24990/371472 [2:01:00<26:51:31, 3.58it/s] 7%|▋ | 24991/371472 [2:01:01<27:45:16, 3.47it/s] 7%|▋ | 24992/371472 [2:01:01<27:15:39, 3.53it/s] 7%|▋ | 24993/371472 [2:01:01<28:11:30, 3.41it/s] 7%|▋ | 24994/371472 [2:01:02<28:00:09, 3.44it/s] 7%|▋ | 24995/371472 [2:01:02<28:08:31, 3.42it/s] 7%|▋ | 24996/371472 [2:01:02<28:05:27, 3.43it/s] 7%|▋ | 24997/371472 [2:01:02<26:37:46, 3.61it/s] 7%|▋ | 24998/371472 [2:01:03<26:05:58, 3.69it/s] 7%|▋ | 24999/371472 [2:01:03<27:39:26, 3.48it/s] 7%|▋ | 25000/371472 [2:01:03<28:57:12, 3.32it/s] {'loss': 4.5219, 'learning_rate': 9.398823504061712e-07, 'epoch': 1.08} + 7%|▋ | 25000/371472 [2:01:03<28:57:12, 3.32it/s] 7%|▋ | 25001/371472 [2:01:04<27:47:40, 3.46it/s] 7%|▋ | 25002/371472 [2:01:04<27:49:14, 3.46it/s] 7%|▋ | 25003/371472 [2:01:04<28:42:36, 3.35it/s] 7%|▋ | 25004/371472 [2:01:05<29:17:13, 3.29it/s] 7%|▋ | 25005/371472 [2:01:05<28:54:56, 3.33it/s] 7%|▋ | 25006/371472 [2:01:05<27:53:51, 3.45it/s] 7%|▋ | 25007/371472 [2:01:05<26:54:11, 3.58it/s] 7%|▋ | 25008/371472 [2:01:06<26:18:50, 3.66it/s] 7%|▋ | 25009/371472 [2:01:06<26:57:30, 3.57it/s] 7%|▋ | 25010/371472 [2:01:06<27:06:21, 3.55it/s] 7%|▋ | 25011/371472 [2:01:06<27:14:34, 3.53it/s] 7%|▋ | 25012/371472 [2:01:07<27:03:29, 3.56it/s] 7%|▋ | 25013/371472 [2:01:07<26:48:25, 3.59it/s] 7%|▋ | 25014/371472 [2:01:07<26:45:56, 3.60it/s] 7%|▋ | 25015/371472 [2:01:08<26:48:33, 3.59it/s] 7%|▋ | 25016/371472 [2:01:08<26:17:21, 3.66it/s] 7%|▋ | 25017/371472 [2:01:08<27:24:29, 3.51it/s] 7%|▋ | 25018/371472 [2:01:08<27:09:41, 3.54it/s] 7%|▋ | 25019/371472 [2:01:09<28:28:38, 3.38it/s] 7%|▋ | 25020/371472 [2:01:09<28:07:35, 3.42it/s] {'loss': 4.6514, 'learning_rate': 9.398338684306922e-07, 'epoch': 1.08} + 7%|▋ | 25020/371472 [2:01:09<28:07:35, 3.42it/s] 7%|▋ | 25021/371472 [2:01:09<27:21:45, 3.52it/s] 7%|▋ | 25022/371472 [2:01:10<30:12:38, 3.19it/s] 7%|▋ | 25023/371472 [2:01:10<29:46:17, 3.23it/s] 7%|▋ | 25024/371472 [2:01:10<30:52:12, 3.12it/s] 7%|▋ | 25025/371472 [2:01:11<31:01:04, 3.10it/s] 7%|▋ | 25026/371472 [2:01:11<29:18:49, 3.28it/s] 7%|▋ | 25027/371472 [2:01:11<28:47:38, 3.34it/s] 7%|▋ | 25028/371472 [2:01:11<27:52:08, 3.45it/s] 7%|▋ | 25029/371472 [2:01:12<27:51:08, 3.46it/s] 7%|▋ | 25030/371472 [2:01:12<29:22:14, 3.28it/s] 7%|▋ | 25031/371472 [2:01:12<30:58:59, 3.11it/s] 7%|▋ | 25032/371472 [2:01:13<30:45:16, 3.13it/s] 7%|▋ | 25033/371472 [2:01:13<29:11:41, 3.30it/s] 7%|▋ | 25034/371472 [2:01:13<29:09:43, 3.30it/s] 7%|▋ | 25035/371472 [2:01:14<28:37:02, 3.36it/s] 7%|▋ | 25036/371472 [2:01:14<30:03:39, 3.20it/s] 7%|▋ | 25037/371472 [2:01:14<30:27:29, 3.16it/s] 7%|▋ | 25038/371472 [2:01:15<29:38:09, 3.25it/s] 7%|▋ | 25039/371472 [2:01:15<32:34:49, 2.95it/s] 7%|▋ | 25040/371472 [2:01:15<30:38:19, 3.14it/s] {'loss': 4.4543, 'learning_rate': 9.397853864552134e-07, 'epoch': 1.08} + 7%|▋ | 25040/371472 [2:01:15<30:38:19, 3.14it/s] 7%|▋ | 25041/371472 [2:01:16<29:21:57, 3.28it/s] 7%|▋ | 25042/371472 [2:01:16<28:24:17, 3.39it/s] 7%|▋ | 25043/371472 [2:01:16<28:19:38, 3.40it/s] 7%|▋ | 25044/371472 [2:01:16<27:52:19, 3.45it/s] 7%|▋ | 25045/371472 [2:01:17<27:24:00, 3.51it/s] 7%|▋ | 25046/371472 [2:01:17<28:59:42, 3.32it/s] 7%|▋ | 25047/371472 [2:01:17<27:20:49, 3.52it/s] 7%|▋ | 25048/371472 [2:01:18<27:14:42, 3.53it/s] 7%|▋ | 25049/371472 [2:01:18<26:54:33, 3.58it/s] 7%|▋ | 25050/371472 [2:01:18<29:37:48, 3.25it/s] 7%|▋ | 25051/371472 [2:01:18<28:06:57, 3.42it/s] 7%|▋ | 25052/371472 [2:01:19<28:36:03, 3.36it/s] 7%|▋ | 25053/371472 [2:01:19<27:40:06, 3.48it/s] 7%|▋ | 25054/371472 [2:01:19<27:54:52, 3.45it/s] 7%|▋ | 25055/371472 [2:01:20<26:58:06, 3.57it/s] 7%|▋ | 25056/371472 [2:01:20<27:30:32, 3.50it/s] 7%|▋ | 25057/371472 [2:01:20<27:19:06, 3.52it/s] 7%|▋ | 25058/371472 [2:01:20<28:33:36, 3.37it/s] 7%|▋ | 25059/371472 [2:01:21<27:31:19, 3.50it/s] 7%|▋ | 25060/371472 [2:01:21<26:45:37, 3.60it/s] {'loss': 4.5329, 'learning_rate': 9.397369044797345e-07, 'epoch': 1.08} + 7%|▋ | 25060/371472 [2:01:21<26:45:37, 3.60it/s] 7%|▋ | 25061/371472 [2:01:21<27:18:13, 3.52it/s] 7%|▋ | 25062/371472 [2:01:22<26:43:44, 3.60it/s] 7%|▋ | 25063/371472 [2:01:22<26:02:41, 3.69it/s] 7%|▋ | 25064/371472 [2:01:22<25:29:01, 3.78it/s] 7%|▋ | 25065/371472 [2:01:22<25:19:28, 3.80it/s] 7%|▋ | 25066/371472 [2:01:23<26:34:48, 3.62it/s] 7%|▋ | 25067/371472 [2:01:23<26:10:53, 3.68it/s] 7%|▋ | 25068/371472 [2:01:23<26:29:21, 3.63it/s] 7%|▋ | 25069/371472 [2:01:24<27:59:49, 3.44it/s] 7%|▋ | 25070/371472 [2:01:24<27:23:00, 3.51it/s] 7%|▋ | 25071/371472 [2:01:24<27:14:20, 3.53it/s] 7%|▋ | 25072/371472 [2:01:24<26:35:57, 3.62it/s] 7%|▋ | 25073/371472 [2:01:25<26:18:24, 3.66it/s] 7%|▋ | 25074/371472 [2:01:25<25:56:38, 3.71it/s] 7%|▋ | 25075/371472 [2:01:25<25:50:48, 3.72it/s] 7%|▋ | 25076/371472 [2:01:25<25:16:13, 3.81it/s] 7%|▋ | 25077/371472 [2:01:26<25:40:54, 3.75it/s] 7%|▋ | 25078/371472 [2:01:26<25:23:43, 3.79it/s] 7%|▋ | 25079/371472 [2:01:26<26:10:01, 3.68it/s] 7%|▋ | 25080/371472 [2:01:26<26:01:46, 3.70it/s] {'loss': 4.7501, 'learning_rate': 9.396884225042555e-07, 'epoch': 1.08} + 7%|▋ | 25080/371472 [2:01:26<26:01:46, 3.70it/s] 7%|▋ | 25081/371472 [2:01:27<25:37:35, 3.75it/s] 7%|▋ | 25082/371472 [2:01:27<26:27:35, 3.64it/s] 7%|▋ | 25083/371472 [2:01:27<27:27:38, 3.50it/s] 7%|▋ | 25084/371472 [2:01:28<27:00:17, 3.56it/s] 7%|▋ | 25085/371472 [2:01:28<26:22:11, 3.65it/s] 7%|▋ | 25086/371472 [2:01:28<26:42:13, 3.60it/s] 7%|▋ | 25087/371472 [2:01:28<26:32:36, 3.62it/s] 7%|▋ | 25088/371472 [2:01:29<27:53:02, 3.45it/s] 7%|▋ | 25089/371472 [2:01:29<27:26:13, 3.51it/s] 7%|▋ | 25090/371472 [2:01:29<27:01:14, 3.56it/s] 7%|▋ | 25091/371472 [2:01:30<26:26:31, 3.64it/s] 7%|▋ | 25092/371472 [2:01:30<26:28:10, 3.63it/s] 7%|▋ | 25093/371472 [2:01:30<26:19:10, 3.66it/s] 7%|▋ | 25094/371472 [2:01:30<28:07:13, 3.42it/s] 7%|▋ | 25095/371472 [2:01:31<29:04:53, 3.31it/s] 7%|▋ | 25096/371472 [2:01:31<29:10:55, 3.30it/s] 7%|▋ | 25097/371472 [2:01:31<28:54:56, 3.33it/s] 7%|▋ | 25098/371472 [2:01:32<28:30:39, 3.37it/s] 7%|▋ | 25099/371472 [2:01:32<27:24:07, 3.51it/s] 7%|▋ | 25100/371472 [2:01:32<27:22:31, 3.51it/s] {'loss': 4.2067, 'learning_rate': 9.396399405287766e-07, 'epoch': 1.08} + 7%|▋ | 25100/371472 [2:01:32<27:22:31, 3.51it/s] 7%|▋ | 25101/371472 [2:01:32<27:41:13, 3.48it/s] 7%|▋ | 25102/371472 [2:01:33<27:13:28, 3.53it/s] 7%|▋ | 25103/371472 [2:01:33<27:06:39, 3.55it/s] 7%|▋ | 25104/371472 [2:01:33<29:28:23, 3.26it/s] 7%|▋ | 25105/371472 [2:01:34<28:20:48, 3.39it/s] 7%|▋ | 25106/371472 [2:01:34<27:16:43, 3.53it/s] 7%|▋ | 25107/371472 [2:01:34<27:51:14, 3.45it/s] 7%|▋ | 25108/371472 [2:01:35<29:59:48, 3.21it/s] 7%|▋ | 25109/371472 [2:01:35<32:37:49, 2.95it/s] 7%|▋ | 25110/371472 [2:01:35<30:20:48, 3.17it/s] 7%|▋ | 25111/371472 [2:01:36<31:50:37, 3.02it/s] 7%|▋ | 25112/371472 [2:01:36<29:40:05, 3.24it/s] 7%|▋ | 25113/371472 [2:01:36<28:25:35, 3.38it/s] 7%|▋ | 25114/371472 [2:01:37<31:40:40, 3.04it/s] 7%|▋ | 25115/371472 [2:01:37<31:58:27, 3.01it/s] 7%|▋ | 25116/371472 [2:01:37<29:48:03, 3.23it/s] 7%|▋ | 25117/371472 [2:01:37<29:39:24, 3.24it/s] 7%|▋ | 25118/371472 [2:01:38<32:14:42, 2.98it/s] 7%|▋ | 25119/371472 [2:01:38<31:37:54, 3.04it/s] 7%|▋ | 25120/371472 [2:01:38<29:28:34, 3.26it/s] {'loss': 4.5308, 'learning_rate': 9.395914585532978e-07, 'epoch': 1.08} + 7%|▋ | 25120/371472 [2:01:38<29:28:34, 3.26it/s] 7%|▋ | 25121/371472 [2:01:39<28:04:53, 3.43it/s] 7%|▋ | 25122/371472 [2:01:39<27:04:59, 3.55it/s] 7%|▋ | 25123/371472 [2:01:39<26:55:41, 3.57it/s] 7%|▋ | 25124/371472 [2:01:39<26:37:46, 3.61it/s] 7%|▋ | 25125/371472 [2:01:40<25:55:56, 3.71it/s] 7%|▋ | 25126/371472 [2:01:40<26:28:45, 3.63it/s] 7%|▋ | 25127/371472 [2:01:40<25:49:53, 3.72it/s] 7%|▋ | 25128/371472 [2:01:41<27:20:14, 3.52it/s] 7%|▋ | 25129/371472 [2:01:41<26:54:40, 3.57it/s] 7%|▋ | 25130/371472 [2:01:41<26:59:11, 3.56it/s] 7%|▋ | 25131/371472 [2:01:41<26:29:17, 3.63it/s] 7%|▋ | 25132/371472 [2:01:42<26:12:55, 3.67it/s] 7%|▋ | 25133/371472 [2:01:42<26:52:40, 3.58it/s] 7%|▋ | 25134/371472 [2:01:42<26:28:55, 3.63it/s] 7%|▋ | 25135/371472 [2:01:42<26:02:44, 3.69it/s] 7%|▋ | 25136/371472 [2:01:43<27:29:24, 3.50it/s] 7%|▋ | 25137/371472 [2:01:43<27:35:07, 3.49it/s] 7%|▋ | 25138/371472 [2:01:43<27:36:33, 3.48it/s] 7%|▋ | 25139/371472 [2:01:44<27:54:13, 3.45it/s] 7%|▋ | 25140/371472 [2:01:44<27:02:45, 3.56it/s] {'loss': 4.4915, 'learning_rate': 9.395429765778189e-07, 'epoch': 1.08} + 7%|▋ | 25140/371472 [2:01:44<27:02:45, 3.56it/s] 7%|▋ | 25141/371472 [2:01:44<27:17:42, 3.52it/s] 7%|▋ | 25142/371472 [2:01:44<27:11:24, 3.54it/s] 7%|▋ | 25143/371472 [2:01:45<27:35:24, 3.49it/s] 7%|▋ | 25144/371472 [2:01:45<29:00:51, 3.32it/s] 7%|▋ | 25145/371472 [2:01:45<27:55:20, 3.45it/s] 7%|▋ | 25146/371472 [2:01:46<27:28:13, 3.50it/s] 7%|▋ | 25147/371472 [2:01:46<27:55:13, 3.45it/s] 7%|▋ | 25148/371472 [2:01:46<28:14:27, 3.41it/s] 7%|▋ | 25149/371472 [2:01:47<29:44:35, 3.23it/s] 7%|▋ | 25150/371472 [2:01:47<29:12:06, 3.29it/s] 7%|▋ | 25151/371472 [2:01:47<28:07:06, 3.42it/s] 7%|▋ | 25152/371472 [2:01:47<27:36:50, 3.48it/s] 7%|▋ | 25153/371472 [2:01:48<28:08:38, 3.42it/s] 7%|▋ | 25154/371472 [2:01:48<27:23:03, 3.51it/s] 7%|▋ | 25155/371472 [2:01:48<28:23:59, 3.39it/s] 7%|▋ | 25156/371472 [2:01:49<28:11:07, 3.41it/s] 7%|▋ | 25157/371472 [2:01:49<27:45:53, 3.46it/s] 7%|▋ | 25158/371472 [2:01:49<27:13:26, 3.53it/s] 7%|▋ | 25159/371472 [2:01:49<27:36:49, 3.48it/s] 7%|▋ | 25160/371472 [2:01:50<26:47:18, 3.59it/s] {'loss': 4.5847, 'learning_rate': 9.394944946023401e-07, 'epoch': 1.08} + 7%|▋ | 25160/371472 [2:01:50<26:47:18, 3.59it/s] 7%|▋ | 25161/371472 [2:01:50<27:03:40, 3.55it/s] 7%|▋ | 25162/371472 [2:01:50<26:41:47, 3.60it/s] 7%|▋ | 25163/371472 [2:01:51<25:46:01, 3.73it/s] 7%|▋ | 25164/371472 [2:01:51<28:12:00, 3.41it/s] 7%|▋ | 25165/371472 [2:01:51<28:59:35, 3.32it/s] 7%|▋ | 25166/371472 [2:01:51<27:44:25, 3.47it/s] 7%|▋ | 25167/371472 [2:01:52<26:55:05, 3.57it/s] 7%|▋ | 25168/371472 [2:01:52<26:53:44, 3.58it/s] 7%|▋ | 25169/371472 [2:01:52<25:57:50, 3.70it/s] 7%|▋ | 25170/371472 [2:01:53<26:05:43, 3.69it/s] 7%|▋ | 25171/371472 [2:01:53<27:47:39, 3.46it/s] 7%|▋ | 25172/371472 [2:01:53<27:13:21, 3.53it/s] 7%|▋ | 25173/371472 [2:01:53<28:13:04, 3.41it/s] 7%|▋ | 25174/371472 [2:01:54<27:50:15, 3.46it/s] 7%|▋ | 25175/371472 [2:01:54<27:21:02, 3.52it/s] 7%|▋ | 25176/371472 [2:01:54<27:31:29, 3.49it/s] 7%|▋ | 25177/371472 [2:01:55<27:50:33, 3.45it/s] 7%|▋ | 25178/371472 [2:01:55<27:46:32, 3.46it/s] 7%|▋ | 25179/371472 [2:01:55<28:18:12, 3.40it/s] 7%|▋ | 25180/371472 [2:01:55<28:31:15, 3.37it/s] {'loss': 4.4696, 'learning_rate': 9.394460126268612e-07, 'epoch': 1.08} + 7%|▋ | 25180/371472 [2:01:55<28:31:15, 3.37it/s] 7%|▋ | 25181/371472 [2:01:56<30:20:48, 3.17it/s] 7%|▋ | 25182/371472 [2:01:56<29:45:05, 3.23it/s] 7%|▋ | 25183/371472 [2:01:56<30:17:55, 3.17it/s] 7%|▋ | 25184/371472 [2:01:57<29:35:19, 3.25it/s] 7%|▋ | 25185/371472 [2:01:57<28:55:19, 3.33it/s] 7%|▋ | 25186/371472 [2:01:57<29:37:29, 3.25it/s] 7%|▋ | 25187/371472 [2:01:58<29:18:58, 3.28it/s] 7%|▋ | 25188/371472 [2:01:58<28:22:00, 3.39it/s] 7%|▋ | 25189/371472 [2:01:58<28:10:35, 3.41it/s] 7%|▋ | 25190/371472 [2:01:59<28:02:36, 3.43it/s] 7%|▋ | 25191/371472 [2:01:59<29:11:28, 3.30it/s] 7%|▋ | 25192/371472 [2:01:59<27:48:29, 3.46it/s] 7%|▋ | 25193/371472 [2:01:59<27:03:24, 3.56it/s] 7%|▋ | 25194/371472 [2:02:00<27:42:47, 3.47it/s] 7%|▋ | 25195/371472 [2:02:00<26:50:03, 3.58it/s] 7%|▋ | 25196/371472 [2:02:00<26:28:49, 3.63it/s] 7%|▋ | 25197/371472 [2:02:00<26:51:03, 3.58it/s] 7%|▋ | 25198/371472 [2:02:01<27:26:39, 3.50it/s] 7%|▋ | 25199/371472 [2:02:01<28:35:57, 3.36it/s] 7%|▋ | 25200/371472 [2:02:01<29:01:47, 3.31it/s] {'loss': 4.4294, 'learning_rate': 9.393975306513822e-07, 'epoch': 1.09} + 7%|▋ | 25200/371472 [2:02:01<29:01:47, 3.31it/s] 7%|▋ | 25201/371472 [2:02:02<27:39:41, 3.48it/s] 7%|▋ | 25202/371472 [2:02:02<27:05:16, 3.55it/s] 7%|▋ | 25203/371472 [2:02:02<28:02:19, 3.43it/s] 7%|▋ | 25204/371472 [2:02:03<28:17:33, 3.40it/s] 7%|▋ | 25205/371472 [2:02:03<29:04:06, 3.31it/s] 7%|▋ | 25206/371472 [2:02:03<28:06:16, 3.42it/s] 7%|▋ | 25207/371472 [2:02:03<27:52:16, 3.45it/s] 7%|▋ | 25208/371472 [2:02:04<28:20:13, 3.39it/s] 7%|▋ | 25209/371472 [2:02:04<27:15:17, 3.53it/s] 7%|▋ | 25210/371472 [2:02:04<26:41:34, 3.60it/s] 7%|▋ | 25211/371472 [2:02:05<27:40:56, 3.47it/s] 7%|▋ | 25212/371472 [2:02:05<28:11:32, 3.41it/s] 7%|▋ | 25213/371472 [2:02:05<29:58:05, 3.21it/s] 7%|▋ | 25214/371472 [2:02:05<28:55:41, 3.32it/s] 7%|▋ | 25215/371472 [2:02:06<28:03:24, 3.43it/s] 7%|▋ | 25216/371472 [2:02:06<26:57:38, 3.57it/s] 7%|▋ | 25217/371472 [2:02:06<26:17:43, 3.66it/s] 7%|▋ | 25218/371472 [2:02:07<26:03:28, 3.69it/s] 7%|▋ | 25219/371472 [2:02:07<26:09:49, 3.68it/s] 7%|▋ | 25220/371472 [2:02:07<25:57:26, 3.71it/s] {'loss': 4.4869, 'learning_rate': 9.393490486759033e-07, 'epoch': 1.09} + 7%|▋ | 25220/371472 [2:02:07<25:57:26, 3.71it/s] 7%|▋ | 25221/371472 [2:02:07<26:00:24, 3.70it/s] 7%|▋ | 25222/371472 [2:02:08<25:44:52, 3.74it/s] 7%|▋ | 25223/371472 [2:02:08<26:16:58, 3.66it/s] 7%|▋ | 25224/371472 [2:02:08<27:01:04, 3.56it/s] 7%|▋ | 25225/371472 [2:02:08<27:04:08, 3.55it/s] 7%|▋ | 25226/371472 [2:02:09<26:50:10, 3.58it/s] 7%|▋ | 25227/371472 [2:02:09<26:42:13, 3.60it/s] 7%|▋ | 25228/371472 [2:02:09<26:20:55, 3.65it/s] 7%|▋ | 25229/371472 [2:02:10<26:38:07, 3.61it/s] 7%|▋ | 25230/371472 [2:02:10<27:47:40, 3.46it/s] 7%|▋ | 25231/371472 [2:02:10<28:20:25, 3.39it/s] 7%|▋ | 25232/371472 [2:02:11<28:48:06, 3.34it/s] 7%|▋ | 25233/371472 [2:02:11<28:29:07, 3.38it/s] 7%|▋ | 25234/371472 [2:02:11<27:38:52, 3.48it/s] 7%|▋ | 25235/371472 [2:02:11<26:53:29, 3.58it/s] 7%|▋ | 25236/371472 [2:02:12<27:46:18, 3.46it/s] 7%|▋ | 25237/371472 [2:02:12<27:19:35, 3.52it/s] 7%|▋ | 25238/371472 [2:02:12<27:05:07, 3.55it/s] 7%|▋ | 25239/371472 [2:02:12<27:22:58, 3.51it/s] 7%|▋ | 25240/371472 [2:02:13<27:12:45, 3.53it/s] {'loss': 4.5689, 'learning_rate': 9.393005667004245e-07, 'epoch': 1.09} + 7%|▋ | 25240/371472 [2:02:13<27:12:45, 3.53it/s] 7%|▋ | 25241/371472 [2:02:13<26:48:16, 3.59it/s] 7%|▋ | 25242/371472 [2:02:13<28:24:29, 3.39it/s] 7%|▋ | 25243/371472 [2:02:14<27:37:00, 3.48it/s] 7%|▋ | 25244/371472 [2:02:14<27:37:45, 3.48it/s] 7%|▋ | 25245/371472 [2:02:14<27:23:00, 3.51it/s] 7%|▋ | 25246/371472 [2:02:14<26:53:30, 3.58it/s] 7%|▋ | 25247/371472 [2:02:15<27:38:49, 3.48it/s] 7%|▋ | 25248/371472 [2:02:15<27:54:28, 3.45it/s] 7%|▋ | 25249/371472 [2:02:15<31:42:35, 3.03it/s] 7%|▋ | 25250/371472 [2:02:16<31:44:18, 3.03it/s] 7%|▋ | 25251/371472 [2:02:16<34:18:12, 2.80it/s] 7%|▋ | 25252/371472 [2:02:17<33:13:22, 2.89it/s] 7%|▋ | 25253/371472 [2:02:17<30:20:52, 3.17it/s] 7%|▋ | 25254/371472 [2:02:17<29:13:12, 3.29it/s] 7%|▋ | 25255/371472 [2:02:17<28:50:06, 3.34it/s] 7%|▋ | 25256/371472 [2:02:18<30:00:35, 3.20it/s] 7%|▋ | 25257/371472 [2:02:18<29:03:32, 3.31it/s] 7%|▋ | 25258/371472 [2:02:18<28:46:04, 3.34it/s] 7%|▋ | 25259/371472 [2:02:19<29:28:34, 3.26it/s] 7%|▋ | 25260/371472 [2:02:19<28:02:46, 3.43it/s] {'loss': 4.4144, 'learning_rate': 9.392520847249455e-07, 'epoch': 1.09} + 7%|▋ | 25260/371472 [2:02:19<28:02:46, 3.43it/s] 7%|▋ | 25261/371472 [2:02:19<27:44:51, 3.47it/s] 7%|▋ | 25262/371472 [2:02:19<27:05:20, 3.55it/s] 7%|▋ | 25263/371472 [2:02:20<28:57:51, 3.32it/s] 7%|▋ | 25264/371472 [2:02:20<29:24:08, 3.27it/s] 7%|▋ | 25265/371472 [2:02:20<29:16:53, 3.28it/s] 7%|▋ | 25266/371472 [2:02:21<27:52:02, 3.45it/s] 7%|▋ | 25267/371472 [2:02:21<28:05:05, 3.42it/s] 7%|▋ | 25268/371472 [2:02:21<28:51:42, 3.33it/s] 7%|▋ | 25269/371472 [2:02:22<28:20:12, 3.39it/s] 7%|▋ | 25270/371472 [2:02:22<29:25:03, 3.27it/s] 7%|▋ | 25271/371472 [2:02:22<28:33:11, 3.37it/s] 7%|▋ | 25272/371472 [2:02:23<30:55:52, 3.11it/s] 7%|▋ | 25273/371472 [2:02:23<29:11:15, 3.29it/s] 7%|▋ | 25274/371472 [2:02:23<28:43:58, 3.35it/s] 7%|▋ | 25275/371472 [2:02:23<28:58:06, 3.32it/s] 7%|▋ | 25276/371472 [2:02:24<27:32:21, 3.49it/s] 7%|▋ | 25277/371472 [2:02:24<27:11:05, 3.54it/s] 7%|▋ | 25278/371472 [2:02:24<28:33:58, 3.37it/s] 7%|▋ | 25279/371472 [2:02:25<28:50:03, 3.34it/s] 7%|▋ | 25280/371472 [2:02:25<27:55:06, 3.44it/s] {'loss': 4.4279, 'learning_rate': 9.392036027494667e-07, 'epoch': 1.09} + 7%|▋ | 25280/371472 [2:02:25<27:55:06, 3.44it/s] 7%|▋ | 25281/371472 [2:02:25<29:59:49, 3.21it/s] 7%|▋ | 25282/371472 [2:02:25<29:00:22, 3.32it/s] 7%|▋ | 25283/371472 [2:02:26<29:20:39, 3.28it/s] 7%|▋ | 25284/371472 [2:02:26<28:16:08, 3.40it/s] 7%|▋ | 25285/371472 [2:02:26<27:43:40, 3.47it/s] 7%|▋ | 25286/371472 [2:02:27<26:36:04, 3.61it/s] 7%|▋ | 25287/371472 [2:02:27<28:10:54, 3.41it/s] 7%|▋ | 25288/371472 [2:02:27<29:18:50, 3.28it/s] 7%|▋ | 25289/371472 [2:02:27<28:46:50, 3.34it/s] 7%|▋ | 25290/371472 [2:02:28<29:34:27, 3.25it/s] 7%|▋ | 25291/371472 [2:02:28<28:38:50, 3.36it/s] 7%|▋ | 25292/371472 [2:02:28<28:22:36, 3.39it/s] 7%|▋ | 25293/371472 [2:02:29<27:55:11, 3.44it/s] 7%|▋ | 25294/371472 [2:02:29<26:57:21, 3.57it/s] 7%|▋ | 25295/371472 [2:02:29<27:01:59, 3.56it/s] 7%|▋ | 25296/371472 [2:02:29<26:37:02, 3.61it/s] 7%|▋ | 25297/371472 [2:02:30<26:26:37, 3.64it/s] 7%|▋ | 25298/371472 [2:02:30<26:14:15, 3.66it/s] 7%|▋ | 25299/371472 [2:02:30<26:30:23, 3.63it/s] 7%|▋ | 25300/371472 [2:02:31<26:13:36, 3.67it/s] {'loss': 4.5716, 'learning_rate': 9.391551207739878e-07, 'epoch': 1.09} + 7%|▋ | 25300/371472 [2:02:31<26:13:36, 3.67it/s] 7%|▋ | 25301/371472 [2:02:31<25:41:03, 3.74it/s] 7%|▋ | 25302/371472 [2:02:31<27:41:50, 3.47it/s] 7%|▋ | 25303/371472 [2:02:31<27:18:13, 3.52it/s] 7%|▋ | 25304/371472 [2:02:32<26:37:16, 3.61it/s] 7%|▋ | 25305/371472 [2:02:32<28:09:41, 3.41it/s] 7%|▋ | 25306/371472 [2:02:32<28:53:44, 3.33it/s] 7%|▋ | 25307/371472 [2:02:33<28:54:19, 3.33it/s] 7%|▋ | 25308/371472 [2:02:33<28:18:10, 3.40it/s] 7%|▋ | 25309/371472 [2:02:33<30:46:37, 3.12it/s] 7%|▋ | 25310/371472 [2:02:34<29:05:05, 3.31it/s] 7%|▋ | 25311/371472 [2:02:34<28:32:55, 3.37it/s] 7%|▋ | 25312/371472 [2:02:34<28:24:41, 3.38it/s] 7%|▋ | 25313/371472 [2:02:34<28:23:49, 3.39it/s] 7%|▋ | 25314/371472 [2:02:35<27:35:56, 3.48it/s] 7%|▋ | 25315/371472 [2:02:35<27:45:26, 3.46it/s] 7%|▋ | 25316/371472 [2:02:35<29:29:13, 3.26it/s] 7%|▋ | 25317/371472 [2:02:36<28:46:22, 3.34it/s] 7%|▋ | 25318/371472 [2:02:36<27:33:40, 3.49it/s] 7%|▋ | 25319/371472 [2:02:36<27:27:38, 3.50it/s] 7%|▋ | 25320/371472 [2:02:36<28:37:33, 3.36it/s] {'loss': 4.4915, 'learning_rate': 9.39106638798509e-07, 'epoch': 1.09} + 7%|▋ | 25320/371472 [2:02:36<28:37:33, 3.36it/s] 7%|▋ | 25321/371472 [2:02:37<29:19:01, 3.28it/s] 7%|▋ | 25322/371472 [2:02:37<28:36:14, 3.36it/s] 7%|▋ | 25323/371472 [2:02:37<29:10:03, 3.30it/s] 7%|▋ | 25324/371472 [2:02:38<34:08:56, 2.82it/s] 7%|▋ | 25325/371472 [2:02:38<33:00:04, 2.91it/s] 7%|▋ | 25326/371472 [2:02:38<30:55:55, 3.11it/s] 7%|▋ | 25327/371472 [2:02:39<29:47:54, 3.23it/s] 7%|▋ | 25328/371472 [2:02:39<28:26:00, 3.38it/s] 7%|▋ | 25329/371472 [2:02:39<28:06:16, 3.42it/s] 7%|▋ | 25330/371472 [2:02:40<28:06:04, 3.42it/s] 7%|▋ | 25331/371472 [2:02:40<28:00:06, 3.43it/s] 7%|▋ | 25332/371472 [2:02:40<28:52:21, 3.33it/s] 7%|▋ | 25333/371472 [2:02:40<28:05:18, 3.42it/s] 7%|▋ | 25334/371472 [2:02:41<28:25:13, 3.38it/s] 7%|▋ | 25335/371472 [2:02:41<31:16:56, 3.07it/s] 7%|▋ | 25336/371472 [2:02:41<29:48:45, 3.23it/s] 7%|▋ | 25337/371472 [2:02:42<28:12:37, 3.41it/s] 7%|▋ | 25338/371472 [2:02:42<27:11:18, 3.54it/s] 7%|▋ | 25339/371472 [2:02:42<26:50:43, 3.58it/s] 7%|▋ | 25340/371472 [2:02:42<25:58:20, 3.70it/s] {'loss': 4.3967, 'learning_rate': 9.390581568230299e-07, 'epoch': 1.09} + 7%|▋ | 25340/371472 [2:02:42<25:58:20, 3.70it/s] 7%|▋ | 25341/371472 [2:02:43<30:04:08, 3.20it/s] 7%|▋ | 25342/371472 [2:02:43<30:11:15, 3.19it/s] 7%|▋ | 25343/371472 [2:02:44<29:47:19, 3.23it/s] 7%|▋ | 25344/371472 [2:02:44<28:10:28, 3.41it/s] 7%|▋ | 25345/371472 [2:02:44<27:33:11, 3.49it/s] 7%|▋ | 25346/371472 [2:02:44<27:35:46, 3.48it/s] 7%|▋ | 25347/371472 [2:02:45<28:42:25, 3.35it/s] 7%|▋ | 25348/371472 [2:02:45<29:36:15, 3.25it/s] 7%|▋ | 25349/371472 [2:02:45<28:07:55, 3.42it/s] 7%|▋ | 25350/371472 [2:02:46<30:47:03, 3.12it/s] 7%|▋ | 25351/371472 [2:02:46<29:47:47, 3.23it/s] 7%|▋ | 25352/371472 [2:02:46<28:29:20, 3.37it/s] 7%|▋ | 25353/371472 [2:02:46<28:44:59, 3.34it/s] 7%|▋ | 25354/371472 [2:02:47<27:52:20, 3.45it/s] 7%|▋ | 25355/371472 [2:02:47<28:31:31, 3.37it/s] 7%|▋ | 25356/371472 [2:02:47<28:32:17, 3.37it/s] 7%|▋ | 25357/371472 [2:02:48<32:38:41, 2.95it/s] 7%|▋ | 25358/371472 [2:02:48<31:03:34, 3.10it/s] 7%|▋ | 25359/371472 [2:02:48<28:59:35, 3.32it/s] 7%|▋ | 25360/371472 [2:02:49<28:54:12, 3.33it/s] {'loss': 4.5609, 'learning_rate': 9.390096748475511e-07, 'epoch': 1.09} + 7%|▋ | 25360/371472 [2:02:49<28:54:12, 3.33it/s] 7%|▋ | 25361/371472 [2:02:49<29:19:18, 3.28it/s] 7%|▋ | 25362/371472 [2:02:49<33:03:04, 2.91it/s] 7%|▋ | 25363/371472 [2:02:50<32:57:47, 2.92it/s] 7%|▋ | 25364/371472 [2:02:50<33:00:38, 2.91it/s] 7%|▋ | 25365/371472 [2:02:50<33:36:25, 2.86it/s] 7%|▋ | 25366/371472 [2:02:51<31:30:09, 3.05it/s] 7%|▋ | 25367/371472 [2:02:51<30:55:30, 3.11it/s] 7%|▋ | 25368/371472 [2:02:51<30:08:30, 3.19it/s] 7%|▋ | 25369/371472 [2:02:52<29:55:08, 3.21it/s] 7%|▋ | 25370/371472 [2:02:52<28:56:31, 3.32it/s] 7%|▋ | 25371/371472 [2:02:52<30:37:02, 3.14it/s] 7%|▋ | 25372/371472 [2:02:53<29:18:14, 3.28it/s] 7%|▋ | 25373/371472 [2:02:53<28:21:32, 3.39it/s] 7%|▋ | 25374/371472 [2:02:53<28:55:16, 3.32it/s] 7%|▋ | 25375/371472 [2:02:53<29:27:45, 3.26it/s] 7%|▋ | 25376/371472 [2:02:54<28:34:57, 3.36it/s] 7%|▋ | 25377/371472 [2:02:54<28:21:07, 3.39it/s] 7%|▋ | 25378/371472 [2:02:54<27:43:49, 3.47it/s] 7%|▋ | 25379/371472 [2:02:55<26:55:20, 3.57it/s] 7%|▋ | 25380/371472 [2:02:55<29:22:53, 3.27it/s] {'loss': 4.4044, 'learning_rate': 9.389611928720722e-07, 'epoch': 1.09} + 7%|▋ | 25380/371472 [2:02:55<29:22:53, 3.27it/s] 7%|▋ | 25381/371472 [2:02:55<29:41:43, 3.24it/s] 7%|▋ | 25382/371472 [2:02:55<28:42:42, 3.35it/s] 7%|▋ | 25383/371472 [2:02:56<27:23:59, 3.51it/s] 7%|▋ | 25384/371472 [2:02:56<26:53:28, 3.57it/s] 7%|▋ | 25385/371472 [2:02:56<25:59:03, 3.70it/s] 7%|▋ | 25386/371472 [2:02:57<26:45:20, 3.59it/s] 7%|▋ | 25387/371472 [2:02:57<26:13:27, 3.67it/s] 7%|▋ | 25388/371472 [2:02:57<26:34:01, 3.62it/s] 7%|▋ | 25389/371472 [2:02:57<26:26:35, 3.64it/s] 7%|▋ | 25390/371472 [2:02:58<25:35:29, 3.76it/s] 7%|▋ | 25391/371472 [2:02:58<25:20:17, 3.79it/s] 7%|▋ | 25392/371472 [2:02:58<24:44:16, 3.89it/s] 7%|▋ | 25393/371472 [2:02:58<25:13:32, 3.81it/s] 7%|▋ | 25394/371472 [2:02:59<25:54:17, 3.71it/s] 7%|▋ | 25395/371472 [2:02:59<27:45:51, 3.46it/s] 7%|▋ | 25396/371472 [2:02:59<28:55:40, 3.32it/s] 7%|▋ | 25397/371472 [2:03:00<29:25:17, 3.27it/s] 7%|▋ | 25398/371472 [2:03:00<27:55:00, 3.44it/s] 7%|▋ | 25399/371472 [2:03:00<27:52:31, 3.45it/s] 7%|▋ | 25400/371472 [2:03:00<26:59:35, 3.56it/s] {'loss': 4.5284, 'learning_rate': 9.389127108965933e-07, 'epoch': 1.09} + 7%|▋ | 25400/371472 [2:03:00<26:59:35, 3.56it/s] 7%|▋ | 25401/371472 [2:03:01<28:41:50, 3.35it/s] 7%|▋ | 25402/371472 [2:03:01<27:10:19, 3.54it/s] 7%|▋ | 25403/371472 [2:03:01<26:55:14, 3.57it/s] 7%|▋ | 25404/371472 [2:03:02<28:44:46, 3.34it/s] 7%|▋ | 25405/371472 [2:03:02<27:11:05, 3.54it/s] 7%|▋ | 25406/371472 [2:03:02<26:43:14, 3.60it/s] 7%|▋ | 25407/371472 [2:03:03<29:59:17, 3.21it/s] 7%|▋ | 25408/371472 [2:03:03<28:44:49, 3.34it/s] 7%|▋ | 25409/371472 [2:03:03<28:33:39, 3.37it/s] 7%|▋ | 25410/371472 [2:03:03<28:37:19, 3.36it/s] 7%|▋ | 25411/371472 [2:03:04<27:36:52, 3.48it/s] 7%|▋ | 25412/371472 [2:03:04<28:12:00, 3.41it/s] 7%|▋ | 25413/371472 [2:03:04<27:47:02, 3.46it/s] 7%|▋ | 25414/371472 [2:03:05<27:53:15, 3.45it/s] 7%|▋ | 25415/371472 [2:03:05<29:34:51, 3.25it/s] 7%|▋ | 25416/371472 [2:03:05<28:56:55, 3.32it/s] 7%|▋ | 25417/371472 [2:03:06<30:51:32, 3.12it/s] 7%|▋ | 25418/371472 [2:03:06<29:23:36, 3.27it/s] 7%|▋ | 25419/371472 [2:03:06<28:20:57, 3.39it/s] 7%|▋ | 25420/371472 [2:03:06<27:50:05, 3.45it/s] {'loss': 4.558, 'learning_rate': 9.388642289211144e-07, 'epoch': 1.09} + 7%|▋ | 25420/371472 [2:03:06<27:50:05, 3.45it/s] 7%|▋ | 25421/371472 [2:03:07<26:44:15, 3.60it/s] 7%|▋ | 25422/371472 [2:03:07<26:48:55, 3.58it/s] 7%|▋ | 25423/371472 [2:03:07<26:42:45, 3.60it/s] 7%|▋ | 25424/371472 [2:03:07<27:28:45, 3.50it/s] 7%|▋ | 25425/371472 [2:03:08<27:23:00, 3.51it/s] 7%|▋ | 25426/371472 [2:03:08<27:46:41, 3.46it/s] 7%|▋ | 25427/371472 [2:03:08<28:07:58, 3.42it/s] 7%|▋ | 25428/371472 [2:03:09<27:45:18, 3.46it/s] 7%|▋ | 25429/371472 [2:03:09<26:34:47, 3.62it/s] 7%|▋ | 25430/371472 [2:03:09<27:45:47, 3.46it/s] 7%|▋ | 25431/371472 [2:03:09<27:23:29, 3.51it/s] 7%|▋ | 25432/371472 [2:03:10<26:23:43, 3.64it/s] 7%|▋ | 25433/371472 [2:03:10<26:27:03, 3.63it/s] 7%|▋ | 25434/371472 [2:03:10<26:31:25, 3.62it/s] 7%|▋ | 25435/371472 [2:03:11<26:06:51, 3.68it/s] 7%|▋ | 25436/371472 [2:03:11<29:04:14, 3.31it/s] 7%|▋ | 25437/371472 [2:03:11<28:17:46, 3.40it/s] 7%|▋ | 25438/371472 [2:03:11<27:08:59, 3.54it/s] 7%|▋ | 25439/371472 [2:03:12<28:29:42, 3.37it/s] 7%|▋ | 25440/371472 [2:03:12<28:51:17, 3.33it/s] {'loss': 4.647, 'learning_rate': 9.388157469456356e-07, 'epoch': 1.1} + 7%|▋ | 25440/371472 [2:03:12<28:51:17, 3.33it/s] 7%|▋ | 25441/371472 [2:03:12<28:28:14, 3.38it/s] 7%|▋ | 25442/371472 [2:03:13<28:25:37, 3.38it/s] 7%|▋ | 25443/371472 [2:03:13<27:48:47, 3.46it/s] 7%|▋ | 25444/371472 [2:03:13<27:54:15, 3.44it/s] 7%|▋ | 25445/371472 [2:03:14<28:02:14, 3.43it/s] 7%|▋ | 25446/371472 [2:03:14<27:02:49, 3.55it/s] 7%|▋ | 25447/371472 [2:03:14<28:23:19, 3.39it/s] 7%|▋ | 25448/371472 [2:03:14<28:10:10, 3.41it/s] 7%|▋ | 25449/371472 [2:03:15<30:06:32, 3.19it/s] 7%|▋ | 25450/371472 [2:03:15<28:04:29, 3.42it/s] 7%|▋ | 25451/371472 [2:03:15<27:53:13, 3.45it/s] 7%|▋ | 25452/371472 [2:03:16<27:19:29, 3.52it/s] 7%|▋ | 25453/371472 [2:03:16<27:32:48, 3.49it/s] 7%|▋ | 25454/371472 [2:03:16<27:42:42, 3.47it/s] 7%|▋ | 25455/371472 [2:03:16<27:06:39, 3.55it/s] 7%|▋ | 25456/371472 [2:03:17<26:23:10, 3.64it/s] 7%|▋ | 25457/371472 [2:03:17<26:19:32, 3.65it/s] 7%|▋ | 25458/371472 [2:03:17<26:00:51, 3.69it/s] 7%|▋ | 25459/371472 [2:03:17<25:48:10, 3.72it/s] 7%|▋ | 25460/371472 [2:03:18<26:22:07, 3.65it/s] {'loss': 4.4125, 'learning_rate': 9.387672649701566e-07, 'epoch': 1.1} + 7%|▋ | 25460/371472 [2:03:18<26:22:07, 3.65it/s] 7%|▋ | 25461/371472 [2:03:18<26:38:58, 3.61it/s] 7%|▋ | 25462/371472 [2:03:18<26:14:25, 3.66it/s] 7%|▋ | 25463/371472 [2:03:19<28:08:13, 3.42it/s] 7%|▋ | 25464/371472 [2:03:19<26:54:21, 3.57it/s] 7%|▋ | 25465/371472 [2:03:19<29:18:53, 3.28it/s] 7%|▋ | 25466/371472 [2:03:20<28:35:36, 3.36it/s] 7%|▋ | 25467/371472 [2:03:20<27:51:01, 3.45it/s] 7%|▋ | 25468/371472 [2:03:20<27:15:56, 3.53it/s] 7%|▋ | 25469/371472 [2:03:20<26:52:43, 3.58it/s] 7%|▋ | 25470/371472 [2:03:21<25:48:57, 3.72it/s] 7%|▋ | 25471/371472 [2:03:21<27:57:13, 3.44it/s] 7%|▋ | 25472/371472 [2:03:21<27:56:10, 3.44it/s] 7%|▋ | 25473/371472 [2:03:22<27:02:19, 3.55it/s] 7%|▋ | 25474/371472 [2:03:22<26:46:27, 3.59it/s] 7%|▋ | 25475/371472 [2:03:22<27:15:55, 3.52it/s] 7%|▋ | 25476/371472 [2:03:22<26:31:43, 3.62it/s] 7%|▋ | 25477/371472 [2:03:23<27:44:16, 3.46it/s] 7%|▋ | 25478/371472 [2:03:23<28:57:02, 3.32it/s] 7%|▋ | 25479/371472 [2:03:23<31:27:41, 3.05it/s] 7%|▋ | 25480/371472 [2:03:24<29:54:23, 3.21it/s] {'loss': 4.3079, 'learning_rate': 9.387187829946776e-07, 'epoch': 1.1} + 7%|▋ | 25480/371472 [2:03:24<29:54:23, 3.21it/s] 7%|▋ | 25481/371472 [2:03:24<28:34:59, 3.36it/s] 7%|▋ | 25482/371472 [2:03:24<27:32:17, 3.49it/s] 7%|▋ | 25483/371472 [2:03:24<27:14:29, 3.53it/s] 7%|▋ | 25484/371472 [2:03:25<26:56:18, 3.57it/s] 7%|▋ | 25485/371472 [2:03:25<27:22:44, 3.51it/s] 7%|▋ | 25486/371472 [2:03:25<27:21:27, 3.51it/s] 7%|▋ | 25487/371472 [2:03:26<27:48:55, 3.46it/s] 7%|▋ | 25488/371472 [2:03:26<27:32:04, 3.49it/s] 7%|▋ | 25489/371472 [2:03:26<30:16:20, 3.17it/s] 7%|▋ | 25490/371472 [2:03:27<28:56:56, 3.32it/s] 7%|▋ | 25491/371472 [2:03:27<28:52:27, 3.33it/s] 7%|▋ | 25492/371472 [2:03:27<28:09:42, 3.41it/s] 7%|▋ | 25493/371472 [2:03:27<27:32:48, 3.49it/s] 7%|▋ | 25494/371472 [2:03:28<32:08:57, 2.99it/s] 7%|▋ | 25495/371472 [2:03:28<31:23:46, 3.06it/s] 7%|▋ | 25496/371472 [2:03:28<29:49:29, 3.22it/s] 7%|▋ | 25497/371472 [2:03:29<31:33:35, 3.05it/s] 7%|▋ | 25498/371472 [2:03:29<30:00:26, 3.20it/s] 7%|▋ | 25499/371472 [2:03:29<28:40:02, 3.35it/s] 7%|▋ | 25500/371472 [2:03:30<31:31:47, 3.05it/s] {'loss': 4.5785, 'learning_rate': 9.386703010191988e-07, 'epoch': 1.1} + 7%|▋ | 25500/371472 [2:03:30<31:31:47, 3.05it/s] 7%|▋ | 25501/371472 [2:03:30<30:53:35, 3.11it/s] 7%|▋ | 25502/371472 [2:03:30<29:31:13, 3.26it/s] 7%|▋ | 25503/371472 [2:03:31<29:06:48, 3.30it/s] 7%|▋ | 25504/371472 [2:03:31<27:46:21, 3.46it/s] 7%|▋ | 25505/371472 [2:03:31<28:09:37, 3.41it/s] 7%|▋ | 25506/371472 [2:03:31<27:03:06, 3.55it/s] 7%|▋ | 25507/371472 [2:03:32<29:00:33, 3.31it/s] 7%|▋ | 25508/371472 [2:03:32<29:55:31, 3.21it/s] 7%|▋ | 25509/371472 [2:03:32<29:05:14, 3.30it/s] 7%|▋ | 25510/371472 [2:03:33<27:39:37, 3.47it/s] 7%|▋ | 25511/371472 [2:03:33<27:16:51, 3.52it/s] 7%|▋ | 25512/371472 [2:03:33<27:04:19, 3.55it/s] 7%|▋ | 25513/371472 [2:03:33<27:11:43, 3.53it/s] 7%|▋ | 25514/371472 [2:03:34<27:34:13, 3.49it/s] 7%|▋ | 25515/371472 [2:03:34<27:19:09, 3.52it/s] 7%|▋ | 25516/371472 [2:03:34<26:47:47, 3.59it/s] 7%|▋ | 25517/371472 [2:03:35<26:39:18, 3.61it/s] 7%|▋ | 25518/371472 [2:03:35<26:27:03, 3.63it/s] 7%|▋ | 25519/371472 [2:03:35<26:10:55, 3.67it/s] 7%|▋ | 25520/371472 [2:03:35<26:07:51, 3.68it/s] {'loss': 4.5369, 'learning_rate': 9.3862181904372e-07, 'epoch': 1.1} + 7%|▋ | 25520/371472 [2:03:35<26:07:51, 3.68it/s] 7%|▋ | 25521/371472 [2:03:36<26:26:16, 3.63it/s] 7%|▋ | 25522/371472 [2:03:36<26:12:57, 3.67it/s] 7%|▋ | 25523/371472 [2:03:36<25:50:14, 3.72it/s] 7%|▋ | 25524/371472 [2:03:36<26:43:51, 3.59it/s] 7%|▋ | 25525/371472 [2:03:37<26:33:21, 3.62it/s] 7%|▋ | 25526/371472 [2:03:37<28:07:05, 3.42it/s] 7%|▋ | 25527/371472 [2:03:37<28:40:48, 3.35it/s] 7%|▋ | 25528/371472 [2:03:38<28:13:33, 3.40it/s] 7%|▋ | 25529/371472 [2:03:38<27:28:28, 3.50it/s] 7%|▋ | 25530/371472 [2:03:38<27:12:09, 3.53it/s] 7%|▋ | 25531/371472 [2:03:39<29:26:31, 3.26it/s] 7%|▋ | 25532/371472 [2:03:39<29:58:02, 3.21it/s] 7%|▋ | 25533/371472 [2:03:39<28:56:31, 3.32it/s] 7%|▋ | 25534/371472 [2:03:39<27:53:22, 3.45it/s] 7%|▋ | 25535/371472 [2:03:40<27:19:45, 3.52it/s] 7%|▋ | 25536/371472 [2:03:40<26:22:37, 3.64it/s] 7%|▋ | 25537/371472 [2:03:40<26:14:21, 3.66it/s] 7%|▋ | 25538/371472 [2:03:41<26:12:09, 3.67it/s] 7%|▋ | 25539/371472 [2:03:41<26:09:33, 3.67it/s] 7%|▋ | 25540/371472 [2:03:41<26:13:33, 3.66it/s] {'loss': 4.4112, 'learning_rate': 9.38573337068241e-07, 'epoch': 1.1} + 7%|▋ | 25540/371472 [2:03:41<26:13:33, 3.66it/s] 7%|▋ | 25541/371472 [2:03:41<26:21:48, 3.64it/s] 7%|▋ | 25542/371472 [2:03:42<26:10:03, 3.67it/s] 7%|▋ | 25543/371472 [2:03:42<25:45:19, 3.73it/s] 7%|▋ | 25544/371472 [2:03:42<26:23:10, 3.64it/s] 7%|▋ | 25545/371472 [2:03:42<26:04:31, 3.69it/s] 7%|▋ | 25546/371472 [2:03:43<26:39:20, 3.60it/s] 7%|▋ | 25547/371472 [2:03:43<27:01:28, 3.56it/s] 7%|▋ | 25548/371472 [2:03:43<26:45:02, 3.59it/s] 7%|▋ | 25549/371472 [2:03:44<26:39:11, 3.61it/s] 7%|▋ | 25550/371472 [2:03:44<26:18:16, 3.65it/s] 7%|▋ | 25551/371472 [2:03:44<26:06:59, 3.68it/s] 7%|▋ | 25552/371472 [2:03:44<26:43:00, 3.60it/s] 7%|▋ | 25553/371472 [2:03:45<26:59:45, 3.56it/s] 7%|▋ | 25554/371472 [2:03:45<27:09:25, 3.54it/s] 7%|▋ | 25555/371472 [2:03:45<27:50:38, 3.45it/s] 7%|▋ | 25556/371472 [2:03:46<28:17:55, 3.40it/s] 7%|▋ | 25557/371472 [2:03:46<28:08:37, 3.41it/s] 7%|▋ | 25558/371472 [2:03:46<27:42:35, 3.47it/s] 7%|▋ | 25559/371472 [2:03:46<27:39:33, 3.47it/s] 7%|▋ | 25560/371472 [2:03:47<27:27:44, 3.50it/s] {'loss': 4.6571, 'learning_rate': 9.385248550927621e-07, 'epoch': 1.1} + 7%|▋ | 25560/371472 [2:03:47<27:27:44, 3.50it/s] 7%|▋ | 25561/371472 [2:03:47<27:02:38, 3.55it/s] 7%|▋ | 25562/371472 [2:03:47<26:24:22, 3.64it/s] 7%|▋ | 25563/371472 [2:03:48<27:14:43, 3.53it/s] 7%|▋ | 25564/371472 [2:03:48<29:17:19, 3.28it/s] 7%|▋ | 25565/371472 [2:03:48<28:12:21, 3.41it/s] 7%|▋ | 25566/371472 [2:03:48<27:51:43, 3.45it/s] 7%|▋ | 25567/371472 [2:03:49<29:57:21, 3.21it/s] 7%|▋ | 25568/371472 [2:03:49<29:25:57, 3.26it/s] 7%|▋ | 25569/371472 [2:03:49<29:29:15, 3.26it/s] 7%|▋ | 25570/371472 [2:03:50<29:09:29, 3.30it/s] 7%|▋ | 25571/371472 [2:03:50<27:53:41, 3.44it/s] 7%|▋ | 25572/371472 [2:03:50<27:51:10, 3.45it/s] 7%|▋ | 25573/371472 [2:03:51<28:04:10, 3.42it/s] 7%|▋ | 25574/371472 [2:03:51<27:25:57, 3.50it/s] 7%|▋ | 25575/371472 [2:03:51<27:01:23, 3.56it/s] 7%|▋ | 25576/371472 [2:03:51<26:52:39, 3.57it/s] 7%|▋ | 25577/371472 [2:03:52<28:07:57, 3.42it/s] 7%|▋ | 25578/371472 [2:03:52<27:35:06, 3.48it/s] 7%|▋ | 25579/371472 [2:03:52<27:39:52, 3.47it/s] 7%|▋ | 25580/371472 [2:03:52<26:34:16, 3.62it/s] {'loss': 4.2386, 'learning_rate': 9.384763731172833e-07, 'epoch': 1.1} + 7%|▋ | 25580/371472 [2:03:52<26:34:16, 3.62it/s] 7%|▋ | 25581/371472 [2:03:53<26:56:13, 3.57it/s] 7%|▋ | 25582/371472 [2:03:53<27:06:01, 3.55it/s] 7%|▋ | 25583/371472 [2:03:53<29:06:07, 3.30it/s] 7%|▋ | 25584/371472 [2:03:54<28:09:23, 3.41it/s] 7%|▋ | 25585/371472 [2:03:54<27:27:00, 3.50it/s] 7%|▋ | 25586/371472 [2:03:54<26:42:45, 3.60it/s] 7%|▋ | 25587/371472 [2:03:55<27:12:00, 3.53it/s] 7%|▋ | 25588/371472 [2:03:55<28:42:13, 3.35it/s] 7%|▋ | 25589/371472 [2:03:55<28:17:25, 3.40it/s] 7%|▋ | 25590/371472 [2:03:55<29:43:09, 3.23it/s] 7%|▋ | 25591/371472 [2:03:56<29:07:02, 3.30it/s] 7%|▋ | 25592/371472 [2:03:56<30:02:19, 3.20it/s] 7%|▋ | 25593/371472 [2:03:56<28:16:44, 3.40it/s] 7%|▋ | 25594/371472 [2:03:57<27:57:45, 3.44it/s] 7%|▋ | 25595/371472 [2:03:57<27:44:23, 3.46it/s] 7%|▋ | 25596/371472 [2:03:57<27:23:13, 3.51it/s] 7%|▋ | 25597/371472 [2:03:57<26:29:57, 3.63it/s] 7%|▋ | 25598/371472 [2:03:58<26:31:56, 3.62it/s] 7%|▋ | 25599/371472 [2:03:58<25:49:33, 3.72it/s] 7%|▋ | 25600/371472 [2:03:58<28:14:32, 3.40it/s] {'loss': 4.577, 'learning_rate': 9.384278911418043e-07, 'epoch': 1.1} + 7%|▋ | 25600/371472 [2:03:58<28:14:32, 3.40it/s] 7%|▋ | 25601/371472 [2:03:59<27:16:23, 3.52it/s] 7%|▋ | 25602/371472 [2:03:59<27:22:08, 3.51it/s] 7%|▋ | 25603/371472 [2:03:59<27:07:50, 3.54it/s] 7%|▋ | 25604/371472 [2:03:59<28:07:06, 3.42it/s] 7%|▋ | 25605/371472 [2:04:00<27:17:25, 3.52it/s] 7%|▋ | 25606/371472 [2:04:00<26:49:36, 3.58it/s] 7%|▋ | 25607/371472 [2:04:00<26:58:13, 3.56it/s] 7%|▋ | 25608/371472 [2:04:01<27:02:43, 3.55it/s] 7%|▋ | 25609/371472 [2:04:01<28:09:39, 3.41it/s] 7%|▋ | 25610/371472 [2:04:01<28:38:33, 3.35it/s] 7%|▋ | 25611/371472 [2:04:01<28:09:21, 3.41it/s] 7%|▋ | 25612/371472 [2:04:02<27:19:13, 3.52it/s] 7%|▋ | 25613/371472 [2:04:02<27:43:21, 3.47it/s] 7%|▋ | 25614/371472 [2:04:02<26:54:57, 3.57it/s] 7%|▋ | 25615/371472 [2:04:03<27:25:13, 3.50it/s] 7%|▋ | 25616/371472 [2:04:03<27:17:56, 3.52it/s] 7%|▋ | 25617/371472 [2:04:03<28:27:16, 3.38it/s] 7%|▋ | 25618/371472 [2:04:03<27:05:35, 3.55it/s] 7%|▋ | 25619/371472 [2:04:04<25:58:11, 3.70it/s] 7%|▋ | 25620/371472 [2:04:04<26:25:17, 3.64it/s] {'loss': 4.623, 'learning_rate': 9.383794091663254e-07, 'epoch': 1.1} + 7%|▋ | 25620/371472 [2:04:04<26:25:17, 3.64it/s] 7%|▋ | 25621/371472 [2:04:04<27:29:45, 3.49it/s] 7%|▋ | 25622/371472 [2:04:05<27:28:32, 3.50it/s] 7%|▋ | 25623/371472 [2:04:05<28:13:43, 3.40it/s] 7%|▋ | 25624/371472 [2:04:05<27:07:47, 3.54it/s] 7%|▋ | 25625/371472 [2:04:05<26:33:47, 3.62it/s] 7%|▋ | 25626/371472 [2:04:06<25:55:57, 3.70it/s] 7%|▋ | 25627/371472 [2:04:06<26:52:02, 3.58it/s] 7%|▋ | 25628/371472 [2:04:06<29:30:18, 3.26it/s] 7%|▋ | 25629/371472 [2:04:07<31:35:33, 3.04it/s] 7%|▋ | 25630/371472 [2:04:07<31:40:10, 3.03it/s] 7%|▋ | 25631/371472 [2:04:07<30:02:41, 3.20it/s] 7%|▋ | 25632/371472 [2:04:08<30:09:28, 3.19it/s] 7%|▋ | 25633/371472 [2:04:08<29:21:06, 3.27it/s] 7%|▋ | 25634/371472 [2:04:08<28:05:45, 3.42it/s] 7%|▋ | 25635/371472 [2:04:08<28:16:14, 3.40it/s] 7%|▋ | 25636/371472 [2:04:09<27:29:13, 3.49it/s] 7%|▋ | 25637/371472 [2:04:09<26:38:20, 3.61it/s] 7%|▋ | 25638/371472 [2:04:09<27:37:54, 3.48it/s] 7%|▋ | 25639/371472 [2:04:10<27:57:34, 3.44it/s] 7%|▋ | 25640/371472 [2:04:10<27:42:51, 3.47it/s] {'loss': 4.6539, 'learning_rate': 9.383309271908465e-07, 'epoch': 1.1} + 7%|▋ | 25640/371472 [2:04:10<27:42:51, 3.47it/s] 7%|▋ | 25641/371472 [2:04:10<32:53:32, 2.92it/s] 7%|▋ | 25642/371472 [2:04:11<31:00:03, 3.10it/s] 7%|▋ | 25643/371472 [2:04:11<29:16:19, 3.28it/s] 7%|▋ | 25644/371472 [2:04:11<28:28:38, 3.37it/s] 7%|▋ | 25645/371472 [2:04:11<27:18:38, 3.52it/s] 7%|▋ | 25646/371472 [2:04:12<29:02:37, 3.31it/s] 7%|▋ | 25647/371472 [2:04:12<28:03:53, 3.42it/s] 7%|▋ | 25648/371472 [2:04:12<26:47:27, 3.59it/s] 7%|▋ | 25649/371472 [2:04:13<27:44:20, 3.46it/s] 7%|▋ | 25650/371472 [2:04:13<27:43:24, 3.46it/s] 7%|▋ | 25651/371472 [2:04:13<27:22:40, 3.51it/s] 7%|▋ | 25652/371472 [2:04:13<27:44:42, 3.46it/s] 7%|▋ | 25653/371472 [2:04:14<29:18:34, 3.28it/s] 7%|▋ | 25654/371472 [2:04:14<29:20:08, 3.27it/s] 7%|▋ | 25655/371472 [2:04:14<30:53:21, 3.11it/s] 7%|▋ | 25656/371472 [2:04:15<29:06:52, 3.30it/s] 7%|▋ | 25657/371472 [2:04:15<27:40:58, 3.47it/s] 7%|▋ | 25658/371472 [2:04:15<26:55:19, 3.57it/s] 7%|▋ | 25659/371472 [2:04:16<26:45:21, 3.59it/s] 7%|▋ | 25660/371472 [2:04:16<26:15:08, 3.66it/s] {'loss': 4.5414, 'learning_rate': 9.382824452153677e-07, 'epoch': 1.11} + 7%|▋ | 25660/371472 [2:04:16<26:15:08, 3.66it/s] 7%|▋ | 25661/371472 [2:04:16<25:52:16, 3.71it/s] 7%|▋ | 25662/371472 [2:04:16<29:00:32, 3.31it/s] 7%|▋ | 25663/371472 [2:04:17<39:14:56, 2.45it/s] 7%|▋ | 25664/371472 [2:04:17<35:01:02, 2.74it/s] 7%|▋ | 25665/371472 [2:04:18<31:56:53, 3.01it/s] 7%|▋ | 25666/371472 [2:04:18<31:24:24, 3.06it/s] 7%|▋ | 25667/371472 [2:04:18<29:36:17, 3.24it/s] 7%|▋ | 25668/371472 [2:04:18<28:32:13, 3.37it/s] 7%|▋ | 25669/371472 [2:04:19<28:21:11, 3.39it/s] 7%|▋ | 25670/371472 [2:04:19<27:16:28, 3.52it/s] 7%|▋ | 25671/371472 [2:04:19<26:32:17, 3.62it/s] 7%|▋ | 25672/371472 [2:04:20<26:43:53, 3.59it/s] 7%|▋ | 25673/371472 [2:04:20<28:07:39, 3.41it/s] 7%|▋ | 25674/371472 [2:04:20<27:35:20, 3.48it/s] 7%|▋ | 25675/371472 [2:04:20<27:26:28, 3.50it/s] 7%|▋ | 25676/371472 [2:04:21<27:53:36, 3.44it/s] 7%|▋ | 25677/371472 [2:04:21<27:29:10, 3.49it/s] 7%|▋ | 25678/371472 [2:04:21<26:42:06, 3.60it/s] 7%|▋ | 25679/371472 [2:04:22<27:12:17, 3.53it/s] 7%|▋ | 25680/371472 [2:04:22<31:41:43, 3.03it/s] {'loss': 4.6351, 'learning_rate': 9.382339632398888e-07, 'epoch': 1.11} + 7%|▋ | 25680/371472 [2:04:22<31:41:43, 3.03it/s] 7%|▋ | 25681/371472 [2:04:22<29:39:03, 3.24it/s] 7%|▋ | 25682/371472 [2:04:23<28:42:15, 3.35it/s] 7%|▋ | 25683/371472 [2:04:23<28:47:52, 3.34it/s] 7%|▋ | 25684/371472 [2:04:23<28:17:52, 3.39it/s] 7%|▋ | 25685/371472 [2:04:23<29:10:25, 3.29it/s] 7%|▋ | 25686/371472 [2:04:24<28:14:35, 3.40it/s] 7%|▋ | 25687/371472 [2:04:24<27:46:21, 3.46it/s] 7%|▋ | 25688/371472 [2:04:24<27:12:11, 3.53it/s] 7%|▋ | 25689/371472 [2:04:25<26:22:41, 3.64it/s] 7%|▋ | 25690/371472 [2:04:25<26:17:27, 3.65it/s] 7%|▋ | 25691/371472 [2:04:25<26:36:39, 3.61it/s] 7%|▋ | 25692/371472 [2:04:25<25:45:53, 3.73it/s] 7%|▋ | 25693/371472 [2:04:26<28:33:38, 3.36it/s] 7%|▋ | 25694/371472 [2:04:26<28:36:52, 3.36it/s] 7%|▋ | 25695/371472 [2:04:26<30:23:14, 3.16it/s] 7%|▋ | 25696/371472 [2:04:27<28:29:55, 3.37it/s] 7%|▋ | 25697/371472 [2:04:27<28:55:28, 3.32it/s] 7%|▋ | 25698/371472 [2:04:27<28:08:07, 3.41it/s] 7%|▋ | 25699/371472 [2:04:27<27:56:58, 3.44it/s] 7%|▋ | 25700/371472 [2:04:28<27:13:06, 3.53it/s] {'loss': 4.5367, 'learning_rate': 9.3818548126441e-07, 'epoch': 1.11} + 7%|▋ | 25700/371472 [2:04:28<27:13:06, 3.53it/s] 7%|▋ | 25701/371472 [2:04:28<28:49:46, 3.33it/s] 7%|▋ | 25702/371472 [2:04:28<29:50:44, 3.22it/s] 7%|▋ | 25703/371472 [2:04:29<29:55:30, 3.21it/s] 7%|▋ | 25704/371472 [2:04:29<28:35:07, 3.36it/s] 7%|▋ | 25705/371472 [2:04:29<28:11:41, 3.41it/s] 7%|▋ | 25706/371472 [2:04:30<28:19:21, 3.39it/s] 7%|▋ | 25707/371472 [2:04:30<29:33:15, 3.25it/s] 7%|▋ | 25708/371472 [2:04:30<29:12:45, 3.29it/s] 7%|▋ | 25709/371472 [2:04:30<28:47:31, 3.34it/s] 7%|▋ | 25710/371472 [2:04:31<28:05:51, 3.42it/s] 7%|▋ | 25711/371472 [2:04:31<27:47:29, 3.46it/s] 7%|▋ | 25712/371472 [2:04:31<28:21:49, 3.39it/s] 7%|▋ | 25713/371472 [2:04:32<28:19:46, 3.39it/s] 7%|▋ | 25714/371472 [2:04:32<29:33:56, 3.25it/s] 7%|▋ | 25715/371472 [2:04:32<28:32:11, 3.37it/s] 7%|▋ | 25716/371472 [2:04:33<29:59:03, 3.20it/s] 7%|▋ | 25717/371472 [2:04:33<29:15:28, 3.28it/s] 7%|▋ | 25718/371472 [2:04:33<28:49:28, 3.33it/s] 7%|▋ | 25719/371472 [2:04:34<29:44:20, 3.23it/s] 7%|▋ | 25720/371472 [2:04:34<29:17:25, 3.28it/s] {'loss': 4.403, 'learning_rate': 9.381369992889309e-07, 'epoch': 1.11} + 7%|▋ | 25720/371472 [2:04:34<29:17:25, 3.28it/s] 7%|▋ | 25721/371472 [2:04:34<28:24:28, 3.38it/s] 7%|▋ | 25722/371472 [2:04:34<27:49:56, 3.45it/s] 7%|▋ | 25723/371472 [2:04:35<28:08:10, 3.41it/s] 7%|▋ | 25724/371472 [2:04:35<27:36:44, 3.48it/s] 7%|▋ | 25725/371472 [2:04:35<26:48:46, 3.58it/s] 7%|▋ | 25726/371472 [2:04:35<26:52:57, 3.57it/s] 7%|▋ | 25727/371472 [2:04:36<26:58:42, 3.56it/s] 7%|▋ | 25728/371472 [2:04:36<26:46:49, 3.59it/s] 7%|▋ | 25729/371472 [2:04:36<26:49:30, 3.58it/s] 7%|▋ | 25730/371472 [2:04:37<26:40:42, 3.60it/s] 7%|▋ | 25731/371472 [2:04:37<26:14:44, 3.66it/s] 7%|▋ | 25732/371472 [2:04:37<31:30:58, 3.05it/s] 7%|▋ | 25733/371472 [2:04:38<30:24:29, 3.16it/s] 7%|▋ | 25734/371472 [2:04:38<29:08:31, 3.30it/s] 7%|▋ | 25735/371472 [2:04:38<27:55:46, 3.44it/s] 7%|▋ | 25736/371472 [2:04:38<29:08:28, 3.30it/s] 7%|▋ | 25737/371472 [2:04:39<28:18:34, 3.39it/s] 7%|▋ | 25738/371472 [2:04:39<28:56:24, 3.32it/s] 7%|▋ | 25739/371472 [2:04:39<28:17:22, 3.39it/s] 7%|▋ | 25740/371472 [2:04:40<27:23:17, 3.51it/s] {'loss': 4.4065, 'learning_rate': 9.380885173134521e-07, 'epoch': 1.11} + 7%|▋ | 25740/371472 [2:04:40<27:23:17, 3.51it/s] 7%|▋ | 25741/371472 [2:04:40<27:46:06, 3.46it/s] 7%|▋ | 25742/371472 [2:04:40<30:48:16, 3.12it/s] 7%|▋ | 25743/371472 [2:04:41<29:41:42, 3.23it/s] 7%|▋ | 25744/371472 [2:04:41<29:14:01, 3.29it/s] 7%|▋ | 25745/371472 [2:04:41<28:14:25, 3.40it/s] 7%|▋ | 25746/371472 [2:04:41<27:26:49, 3.50it/s] 7%|▋ | 25747/371472 [2:04:42<27:21:23, 3.51it/s] 7%|▋ | 25748/371472 [2:04:42<26:40:47, 3.60it/s] 7%|▋ | 25749/371472 [2:04:42<27:12:28, 3.53it/s] 7%|▋ | 25750/371472 [2:04:43<27:43:26, 3.46it/s] 7%|▋ | 25751/371472 [2:04:43<26:43:01, 3.59it/s] 7%|▋ | 25752/371472 [2:04:43<26:59:02, 3.56it/s] 7%|▋ | 25753/371472 [2:04:43<27:31:12, 3.49it/s] 7%|▋ | 25754/371472 [2:04:44<27:25:19, 3.50it/s] 7%|▋ | 25755/371472 [2:04:44<27:14:24, 3.53it/s] 7%|▋ | 25756/371472 [2:04:44<27:12:48, 3.53it/s] 7%|▋ | 25757/371472 [2:04:45<28:27:56, 3.37it/s] 7%|▋ | 25758/371472 [2:04:45<27:40:49, 3.47it/s] 7%|▋ | 25759/371472 [2:04:45<26:49:53, 3.58it/s] 7%|▋ | 25760/371472 [2:04:45<27:43:14, 3.46it/s] {'loss': 4.4638, 'learning_rate': 9.380400353379732e-07, 'epoch': 1.11} + 7%|▋ | 25760/371472 [2:04:45<27:43:14, 3.46it/s] 7%|▋ | 25761/371472 [2:04:46<27:54:56, 3.44it/s] 7%|▋ | 25762/371472 [2:04:46<28:18:26, 3.39it/s] 7%|▋ | 25763/371472 [2:04:46<28:18:16, 3.39it/s] 7%|▋ | 25764/371472 [2:04:47<28:35:06, 3.36it/s] 7%|▋ | 25765/371472 [2:04:47<28:32:30, 3.36it/s] 7%|▋ | 25766/371472 [2:04:47<28:01:22, 3.43it/s] 7%|▋ | 25767/371472 [2:04:47<28:25:21, 3.38it/s] 7%|▋ | 25768/371472 [2:04:48<27:44:16, 3.46it/s] 7%|▋ | 25769/371472 [2:04:48<26:43:40, 3.59it/s] 7%|▋ | 25770/371472 [2:04:48<26:17:32, 3.65it/s] 7%|▋ | 25771/371472 [2:04:49<27:26:01, 3.50it/s] 7%|▋ | 25772/371472 [2:04:49<26:46:45, 3.59it/s] 7%|▋ | 25773/371472 [2:04:49<27:54:09, 3.44it/s] 7%|▋ | 25774/371472 [2:04:49<28:53:51, 3.32it/s] 7%|▋ | 25775/371472 [2:04:50<28:45:04, 3.34it/s] 7%|▋ | 25776/371472 [2:04:50<27:48:07, 3.45it/s] 7%|▋ | 25777/371472 [2:04:50<27:12:32, 3.53it/s] 7%|▋ | 25778/371472 [2:04:51<28:23:40, 3.38it/s] 7%|▋ | 25779/371472 [2:04:51<27:16:58, 3.52it/s] 7%|▋ | 25780/371472 [2:04:51<27:40:12, 3.47it/s] {'loss': 4.5257, 'learning_rate': 9.379915533624943e-07, 'epoch': 1.11} + 7%|▋ | 25780/371472 [2:04:51<27:40:12, 3.47it/s] 7%|▋ | 25781/371472 [2:04:51<27:26:37, 3.50it/s] 7%|▋ | 25782/371472 [2:04:52<26:48:47, 3.58it/s] 7%|▋ | 25783/371472 [2:04:52<26:37:15, 3.61it/s] 7%|▋ | 25784/371472 [2:04:52<26:28:18, 3.63it/s] 7%|▋ | 25785/371472 [2:04:53<26:21:52, 3.64it/s] 7%|▋ | 25786/371472 [2:04:53<25:53:29, 3.71it/s] 7%|▋ | 25787/371472 [2:04:53<26:35:15, 3.61it/s] 7%|▋ | 25788/371472 [2:04:53<26:57:01, 3.56it/s] 7%|▋ | 25789/371472 [2:04:54<26:52:48, 3.57it/s] 7%|▋ | 25790/371472 [2:04:54<26:52:50, 3.57it/s] 7%|▋ | 25791/371472 [2:04:54<27:44:51, 3.46it/s] 7%|▋ | 25792/371472 [2:04:55<29:46:56, 3.22it/s] 7%|▋ | 25793/371472 [2:04:55<28:16:41, 3.40it/s] 7%|▋ | 25794/371472 [2:04:55<27:46:44, 3.46it/s] 7%|▋ | 25795/371472 [2:04:55<27:58:24, 3.43it/s] 7%|▋ | 25796/371472 [2:04:56<27:38:30, 3.47it/s] 7%|▋ | 25797/371472 [2:04:56<26:41:52, 3.60it/s] 7%|▋ | 25798/371472 [2:04:56<28:19:29, 3.39it/s] 7%|▋ | 25799/371472 [2:04:57<29:16:01, 3.28it/s] 7%|▋ | 25800/371472 [2:04:57<28:01:32, 3.43it/s] {'loss': 4.568, 'learning_rate': 9.379430713870154e-07, 'epoch': 1.11} + 7%|▋ | 25800/371472 [2:04:57<28:01:32, 3.43it/s] 7%|▋ | 25801/371472 [2:04:57<32:15:31, 2.98it/s] 7%|▋ | 25802/371472 [2:04:58<33:33:58, 2.86it/s] 7%|▋ | 25803/371472 [2:04:58<30:38:22, 3.13it/s] 7%|▋ | 25804/371472 [2:04:58<31:02:39, 3.09it/s] 7%|▋ | 25805/371472 [2:04:59<29:23:48, 3.27it/s] 7%|▋ | 25806/371472 [2:04:59<29:33:00, 3.25it/s] 7%|▋ | 25807/371472 [2:04:59<28:31:58, 3.37it/s] 7%|▋ | 25808/371472 [2:04:59<27:35:11, 3.48it/s] 7%|▋ | 25809/371472 [2:05:00<27:35:31, 3.48it/s] 7%|▋ | 25810/371472 [2:05:00<26:46:38, 3.59it/s] 7%|▋ | 25811/371472 [2:05:00<27:12:59, 3.53it/s] 7%|▋ | 25812/371472 [2:05:01<26:55:23, 3.57it/s] 7%|▋ | 25813/371472 [2:05:01<26:46:10, 3.59it/s] 7%|▋ | 25814/371472 [2:05:01<26:30:55, 3.62it/s] 7%|▋ | 25815/371472 [2:05:01<26:45:42, 3.59it/s] 7%|▋ | 25816/371472 [2:05:02<26:20:35, 3.64it/s] 7%|▋ | 25817/371472 [2:05:02<26:28:08, 3.63it/s] 7%|▋ | 25818/371472 [2:05:02<26:27:00, 3.63it/s] 7%|▋ | 25819/371472 [2:05:02<26:37:44, 3.61it/s] 7%|▋ | 25820/371472 [2:05:03<26:50:35, 3.58it/s] {'loss': 4.4757, 'learning_rate': 9.378945894115366e-07, 'epoch': 1.11} + 7%|▋ | 25820/371472 [2:05:03<26:50:35, 3.58it/s] 7%|▋ | 25821/371472 [2:05:03<27:23:45, 3.50it/s] 7%|▋ | 25822/371472 [2:05:03<28:26:25, 3.38it/s] 7%|▋ | 25823/371472 [2:05:04<27:53:54, 3.44it/s] 7%|▋ | 25824/371472 [2:05:04<27:41:42, 3.47it/s] 7%|▋ | 25825/371472 [2:05:04<27:14:08, 3.53it/s] 7%|▋ | 25826/371472 [2:05:04<26:34:07, 3.61it/s] 7%|▋ | 25827/371472 [2:05:05<27:24:57, 3.50it/s] 7%|▋ | 25828/371472 [2:05:05<27:16:51, 3.52it/s] 7%|▋ | 25829/371472 [2:05:05<26:53:37, 3.57it/s] 7%|▋ | 25830/371472 [2:05:06<26:48:12, 3.58it/s] 7%|▋ | 25831/371472 [2:05:06<26:22:48, 3.64it/s] 7%|▋ | 25832/371472 [2:05:06<29:18:24, 3.28it/s] 7%|▋ | 25833/371472 [2:05:07<28:36:00, 3.36it/s] 7%|▋ | 25834/371472 [2:05:07<28:09:17, 3.41it/s] 7%|▋ | 25835/371472 [2:05:07<28:19:14, 3.39it/s] 7%|▋ | 25836/371472 [2:05:07<27:32:58, 3.48it/s] 7%|▋ | 25837/371472 [2:05:08<28:46:40, 3.34it/s] 7%|▋ | 25838/371472 [2:05:08<27:48:04, 3.45it/s] 7%|▋ | 25839/371472 [2:05:08<28:55:03, 3.32it/s] 7%|▋ | 25840/371472 [2:05:09<28:23:45, 3.38it/s] {'loss': 4.4528, 'learning_rate': 9.378461074360575e-07, 'epoch': 1.11} + 7%|▋ | 25840/371472 [2:05:09<28:23:45, 3.38it/s] 7%|▋ | 25841/371472 [2:05:09<27:52:28, 3.44it/s] 7%|▋ | 25842/371472 [2:05:09<27:19:32, 3.51it/s] 7%|▋ | 25843/371472 [2:05:09<27:10:36, 3.53it/s] 7%|▋ | 25844/371472 [2:05:10<26:46:24, 3.59it/s] 7%|▋ | 25845/371472 [2:05:10<27:27:45, 3.50it/s] 7%|▋ | 25846/371472 [2:05:10<27:54:32, 3.44it/s] 7%|▋ | 25847/371472 [2:05:11<27:08:12, 3.54it/s] 7%|▋ | 25848/371472 [2:05:11<26:35:06, 3.61it/s] 7%|▋ | 25849/371472 [2:05:11<26:27:57, 3.63it/s] 7%|▋ | 25850/371472 [2:05:11<28:55:53, 3.32it/s] 7%|▋ | 25851/371472 [2:05:12<31:01:51, 3.09it/s] 7%|▋ | 25852/371472 [2:05:12<29:33:42, 3.25it/s] 7%|▋ | 25853/371472 [2:05:12<28:28:12, 3.37it/s] 7%|▋ | 25854/371472 [2:05:13<27:18:31, 3.52it/s] 7%|▋ | 25855/371472 [2:05:13<26:43:28, 3.59it/s] 7%|▋ | 25856/371472 [2:05:13<27:08:28, 3.54it/s] 7%|▋ | 25857/371472 [2:05:13<27:07:15, 3.54it/s] 7%|▋ | 25858/371472 [2:05:14<27:23:36, 3.50it/s] 7%|▋ | 25859/371472 [2:05:14<27:02:25, 3.55it/s] 7%|▋ | 25860/371472 [2:05:14<26:19:09, 3.65it/s] {'loss': 4.5492, 'learning_rate': 9.377976254605787e-07, 'epoch': 1.11} + 7%|▋ | 25860/371472 [2:05:14<26:19:09, 3.65it/s] 7%|▋ | 25861/371472 [2:05:15<26:17:31, 3.65it/s] 7%|▋ | 25862/371472 [2:05:15<27:24:56, 3.50it/s] 7%|▋ | 25863/371472 [2:05:15<26:40:56, 3.60it/s] 7%|▋ | 25864/371472 [2:05:15<25:55:25, 3.70it/s] 7%|▋ | 25865/371472 [2:05:16<28:17:01, 3.39it/s] 7%|▋ | 25866/371472 [2:05:16<28:00:27, 3.43it/s] 7%|▋ | 25867/371472 [2:05:16<27:08:36, 3.54it/s] 7%|▋ | 25868/371472 [2:05:17<27:45:13, 3.46it/s] 7%|▋ | 25869/371472 [2:05:17<27:07:44, 3.54it/s] 7%|▋ | 25870/371472 [2:05:17<27:42:11, 3.47it/s] 7%|▋ | 25871/371472 [2:05:17<28:26:14, 3.38it/s] 7%|▋ | 25872/371472 [2:05:18<27:06:26, 3.54it/s] 7%|▋ | 25873/371472 [2:05:18<27:59:13, 3.43it/s] 7%|▋ | 25874/371472 [2:05:18<29:03:27, 3.30it/s] 7%|▋ | 25875/371472 [2:05:19<27:56:04, 3.44it/s] 7%|▋ | 25876/371472 [2:05:19<27:58:25, 3.43it/s] 7%|▋ | 25877/371472 [2:05:19<32:50:37, 2.92it/s] 7%|▋ | 25878/371472 [2:05:20<31:12:06, 3.08it/s] 7%|▋ | 25879/371472 [2:05:20<28:58:35, 3.31it/s] 7%|▋ | 25880/371472 [2:05:20<28:48:52, 3.33it/s] {'loss': 4.3896, 'learning_rate': 9.377491434850998e-07, 'epoch': 1.11} + 7%|▋ | 25880/371472 [2:05:20<28:48:52, 3.33it/s] 7%|▋ | 25881/371472 [2:05:21<29:06:10, 3.30it/s] 7%|▋ | 25882/371472 [2:05:21<28:14:43, 3.40it/s] 7%|▋ | 25883/371472 [2:05:21<27:19:30, 3.51it/s] 7%|▋ | 25884/371472 [2:05:21<26:56:38, 3.56it/s] 7%|▋ | 25885/371472 [2:05:22<28:24:54, 3.38it/s] 7%|▋ | 25886/371472 [2:05:22<29:31:22, 3.25it/s] 7%|▋ | 25887/371472 [2:05:22<28:15:56, 3.40it/s] 7%|▋ | 25888/371472 [2:05:23<28:25:44, 3.38it/s] 7%|▋ | 25889/371472 [2:05:23<27:48:28, 3.45it/s] 7%|▋ | 25890/371472 [2:05:23<27:43:22, 3.46it/s] 7%|▋ | 25891/371472 [2:05:23<27:49:04, 3.45it/s] 7%|▋ | 25892/371472 [2:05:24<27:25:58, 3.50it/s] 7%|▋ | 25893/371472 [2:05:24<27:39:00, 3.47it/s] 7%|▋ | 25894/371472 [2:05:24<27:07:25, 3.54it/s] 7%|▋ | 25895/371472 [2:05:25<26:17:27, 3.65it/s] 7%|▋ | 25896/371472 [2:05:25<26:33:39, 3.61it/s] 7%|▋ | 25897/371472 [2:05:25<26:37:43, 3.60it/s] 7%|▋ | 25898/371472 [2:05:25<26:39:55, 3.60it/s] 7%|▋ | 25899/371472 [2:05:26<27:04:58, 3.54it/s] 7%|▋ | 25900/371472 [2:05:26<26:26:29, 3.63it/s] {'loss': 4.6046, 'learning_rate': 9.37700661509621e-07, 'epoch': 1.12} + 7%|▋ | 25900/371472 [2:05:26<26:26:29, 3.63it/s] 7%|▋ | 25901/371472 [2:05:26<25:59:29, 3.69it/s] 7%|▋ | 25902/371472 [2:05:26<26:33:47, 3.61it/s] 7%|▋ | 25903/371472 [2:05:27<26:57:18, 3.56it/s] 7%|▋ | 25904/371472 [2:05:27<26:34:15, 3.61it/s] 7%|▋ | 25905/371472 [2:05:27<26:57:15, 3.56it/s] 7%|▋ | 25906/371472 [2:05:28<27:14:33, 3.52it/s] 7%|▋ | 25907/371472 [2:05:28<26:38:26, 3.60it/s] 7%|▋ | 25908/371472 [2:05:28<25:43:07, 3.73it/s] 7%|▋ | 25909/371472 [2:05:28<25:08:24, 3.82it/s] 7%|▋ | 25910/371472 [2:05:29<25:58:39, 3.70it/s] 7%|▋ | 25911/371472 [2:05:29<26:07:33, 3.67it/s] 7%|▋ | 25912/371472 [2:05:29<26:37:37, 3.60it/s] 7%|▋ | 25913/371472 [2:05:29<26:13:50, 3.66it/s] 7%|▋ | 25914/371472 [2:05:30<28:17:07, 3.39it/s] 7%|▋ | 25915/371472 [2:05:30<30:09:54, 3.18it/s] 7%|▋ | 25916/371472 [2:05:30<29:15:33, 3.28it/s] 7%|▋ | 25917/371472 [2:05:31<29:06:13, 3.30it/s] 7%|▋ | 25918/371472 [2:05:31<30:10:28, 3.18it/s] 7%|▋ | 25919/371472 [2:05:31<29:43:45, 3.23it/s] 7%|▋ | 25920/371472 [2:05:32<27:59:23, 3.43it/s] {'loss': 4.516, 'learning_rate': 9.37652179534142e-07, 'epoch': 1.12} + 7%|▋ | 25920/371472 [2:05:32<27:59:23, 3.43it/s] 7%|▋ | 25921/371472 [2:05:32<27:06:08, 3.54it/s] 7%|▋ | 25922/371472 [2:05:32<25:58:41, 3.69it/s] 7%|▋ | 25923/371472 [2:05:32<26:11:08, 3.67it/s] 7%|▋ | 25924/371472 [2:05:33<26:56:56, 3.56it/s] 7%|▋ | 25925/371472 [2:05:33<26:33:30, 3.61it/s] 7%|▋ | 25926/371472 [2:05:33<29:35:57, 3.24it/s] 7%|▋ | 25927/371472 [2:05:34<28:59:59, 3.31it/s] 7%|▋ | 25928/371472 [2:05:34<27:55:06, 3.44it/s] 7%|▋ | 25929/371472 [2:05:34<28:12:32, 3.40it/s] 7%|▋ | 25930/371472 [2:05:35<30:09:53, 3.18it/s] 7%|▋ | 25931/371472 [2:05:35<29:16:09, 3.28it/s] 7%|▋ | 25932/371472 [2:05:35<29:20:52, 3.27it/s] 7%|▋ | 25933/371472 [2:05:35<29:47:23, 3.22it/s] 7%|▋ | 25934/371472 [2:05:36<28:59:01, 3.31it/s] 7%|▋ | 25935/371472 [2:05:36<27:53:16, 3.44it/s] 7%|▋ | 25936/371472 [2:05:36<28:01:28, 3.42it/s] 7%|▋ | 25937/371472 [2:05:37<28:09:13, 3.41it/s] 7%|▋ | 25938/371472 [2:05:37<29:45:49, 3.22it/s] 7%|▋ | 25939/371472 [2:05:37<28:24:24, 3.38it/s] 7%|▋ | 25940/371472 [2:05:38<30:09:45, 3.18it/s] {'loss': 4.4047, 'learning_rate': 9.376036975586632e-07, 'epoch': 1.12} + 7%|▋ | 25940/371472 [2:05:38<30:09:45, 3.18it/s] 7%|▋ | 25941/371472 [2:05:38<29:48:44, 3.22it/s] 7%|▋ | 25942/371472 [2:05:38<28:30:46, 3.37it/s] 7%|▋ | 25943/371472 [2:05:38<28:53:31, 3.32it/s] 7%|▋ | 25944/371472 [2:05:39<27:46:09, 3.46it/s] 7%|▋ | 25945/371472 [2:05:39<27:54:26, 3.44it/s] 7%|▋ | 25946/371472 [2:05:39<27:34:21, 3.48it/s] 7%|▋ | 25947/371472 [2:05:40<26:45:08, 3.59it/s] 7%|▋ | 25948/371472 [2:05:40<26:47:18, 3.58it/s] 7%|▋ | 25949/371472 [2:05:40<26:41:48, 3.60it/s] 7%|▋ | 25950/371472 [2:05:40<26:13:09, 3.66it/s] 7%|▋ | 25951/371472 [2:05:41<26:10:41, 3.67it/s] 7%|▋ | 25952/371472 [2:05:41<27:05:28, 3.54it/s] 7%|▋ | 25953/371472 [2:05:41<26:20:51, 3.64it/s] 7%|▋ | 25954/371472 [2:05:42<27:27:38, 3.50it/s] 7%|▋ | 25955/371472 [2:05:42<30:27:24, 3.15it/s] 7%|▋ | 25956/371472 [2:05:42<28:44:55, 3.34it/s] 7%|▋ | 25957/371472 [2:05:43<29:27:41, 3.26it/s] 7%|▋ | 25958/371472 [2:05:43<28:26:22, 3.37it/s] 7%|▋ | 25959/371472 [2:05:43<27:50:11, 3.45it/s] 7%|▋ | 25960/371472 [2:05:43<27:23:41, 3.50it/s] {'loss': 4.442, 'learning_rate': 9.375552155831843e-07, 'epoch': 1.12} + 7%|▋ | 25960/371472 [2:05:43<27:23:41, 3.50it/s] 7%|▋ | 25961/371472 [2:05:44<26:39:25, 3.60it/s] 7%|▋ | 25962/371472 [2:05:44<27:55:53, 3.44it/s] 7%|▋ | 25963/371472 [2:05:44<26:50:49, 3.57it/s] 7%|▋ | 25964/371472 [2:05:44<26:35:13, 3.61it/s] 7%|▋ | 25965/371472 [2:05:45<26:46:02, 3.59it/s] 7%|▋ | 25966/371472 [2:05:45<27:28:24, 3.49it/s] 7%|▋ | 25967/371472 [2:05:45<29:27:56, 3.26it/s] 7%|▋ | 25968/371472 [2:05:46<32:39:36, 2.94it/s] 7%|▋ | 25969/371472 [2:05:46<31:21:51, 3.06it/s] 7%|▋ | 25970/371472 [2:05:46<29:57:10, 3.20it/s] 7%|▋ | 25971/371472 [2:05:47<30:27:05, 3.15it/s] 7%|▋ | 25972/371472 [2:05:47<29:13:30, 3.28it/s] 7%|▋ | 25973/371472 [2:05:47<29:05:55, 3.30it/s] 7%|▋ | 25974/371472 [2:05:48<27:24:18, 3.50it/s] 7%|▋ | 25975/371472 [2:05:48<27:17:41, 3.52it/s] 7%|▋ | 25976/371472 [2:05:48<27:42:44, 3.46it/s] 7%|▋ | 25977/371472 [2:05:48<29:37:06, 3.24it/s] 7%|▋ | 25978/371472 [2:05:49<29:21:45, 3.27it/s] 7%|▋ | 25979/371472 [2:05:49<28:32:21, 3.36it/s] 7%|▋ | 25980/371472 [2:05:49<28:02:57, 3.42it/s] {'loss': 4.5514, 'learning_rate': 9.375067336077054e-07, 'epoch': 1.12} + 7%|▋ | 25980/371472 [2:05:49<28:02:57, 3.42it/s] 7%|▋ | 25981/371472 [2:05:50<27:55:55, 3.44it/s] 7%|▋ | 25982/371472 [2:05:50<27:22:50, 3.50it/s] 7%|▋ | 25983/371472 [2:05:50<26:42:47, 3.59it/s] 7%|▋ | 25984/371472 [2:05:50<28:32:09, 3.36it/s] 7%|▋ | 25985/371472 [2:05:51<28:43:08, 3.34it/s] 7%|▋ | 25986/371472 [2:05:51<27:31:01, 3.49it/s] 7%|▋ | 25987/371472 [2:05:51<27:46:10, 3.46it/s] 7%|▋ | 25988/371472 [2:05:52<27:53:58, 3.44it/s] 7%|▋ | 25989/371472 [2:05:52<27:52:04, 3.44it/s] 7%|▋ | 25990/371472 [2:05:52<26:53:20, 3.57it/s] 7%|▋ | 25991/371472 [2:05:52<26:52:55, 3.57it/s] 7%|▋ | 25992/371472 [2:05:53<27:20:32, 3.51it/s] 7%|▋ | 25993/371472 [2:05:53<27:05:38, 3.54it/s] 7%|▋ | 25994/371472 [2:05:53<28:35:55, 3.36it/s] 7%|▋ | 25995/371472 [2:05:54<27:32:41, 3.48it/s] 7%|▋ | 25996/371472 [2:05:54<28:44:22, 3.34it/s] 7%|▋ | 25997/371472 [2:05:54<27:59:52, 3.43it/s] 7%|▋ | 25998/371472 [2:05:55<28:39:53, 3.35it/s] 7%|▋ | 25999/371472 [2:05:55<31:51:12, 3.01it/s] 7%|▋ | 26000/371472 [2:05:55<31:03:04, 3.09it/s] {'loss': 4.9406, 'learning_rate': 9.374582516322264e-07, 'epoch': 1.12} + 7%|▋ | 26000/371472 [2:05:55<31:03:04, 3.09it/s] 7%|▋ | 26001/371472 [2:05:56<29:09:17, 3.29it/s] 7%|▋ | 26002/371472 [2:05:56<28:07:05, 3.41it/s] 7%|▋ | 26003/371472 [2:05:56<28:21:58, 3.38it/s] 7%|▋ | 26004/371472 [2:05:56<27:58:18, 3.43it/s] 7%|▋ | 26005/371472 [2:05:57<29:29:25, 3.25it/s] 7%|▋ | 26006/371472 [2:05:57<27:58:46, 3.43it/s] 7%|▋ | 26007/371472 [2:05:57<27:02:52, 3.55it/s] 7%|▋ | 26008/371472 [2:05:57<26:41:43, 3.59it/s] 7%|▋ | 26009/371472 [2:05:58<28:28:54, 3.37it/s] 7%|▋ | 26010/371472 [2:05:58<27:24:38, 3.50it/s] 7%|▋ | 26011/371472 [2:05:58<27:18:40, 3.51it/s] 7%|▋ | 26012/371472 [2:05:59<29:19:30, 3.27it/s] 7%|▋ | 26013/371472 [2:05:59<28:17:54, 3.39it/s] 7%|▋ | 26014/371472 [2:05:59<28:01:29, 3.42it/s] 7%|▋ | 26015/371472 [2:06:00<27:38:13, 3.47it/s] 7%|▋ | 26016/371472 [2:06:00<28:18:00, 3.39it/s] 7%|▋ | 26017/371472 [2:06:00<27:08:50, 3.53it/s] 7%|▋ | 26018/371472 [2:06:00<26:53:44, 3.57it/s] 7%|▋ | 26019/371472 [2:06:01<28:22:44, 3.38it/s] 7%|▋ | 26020/371472 [2:06:01<29:28:19, 3.26it/s] {'loss': 4.5569, 'learning_rate': 9.374097696567476e-07, 'epoch': 1.12} + 7%|▋ | 26020/371472 [2:06:01<29:28:19, 3.26it/s] 7%|▋ | 26021/371472 [2:06:01<28:00:10, 3.43it/s] 7%|▋ | 26022/371472 [2:06:02<27:45:57, 3.46it/s] 7%|▋ | 26023/371472 [2:06:02<27:31:33, 3.49it/s] 7%|▋ | 26024/371472 [2:06:02<27:34:58, 3.48it/s] 7%|▋ | 26025/371472 [2:06:02<27:49:44, 3.45it/s] 7%|▋ | 26026/371472 [2:06:03<26:58:25, 3.56it/s] 7%|▋ | 26027/371472 [2:06:03<26:41:49, 3.59it/s] 7%|▋ | 26028/371472 [2:06:03<26:23:27, 3.64it/s] 7%|▋ | 26029/371472 [2:06:04<25:51:53, 3.71it/s] 7%|▋ | 26030/371472 [2:06:04<26:10:07, 3.67it/s] 7%|▋ | 26031/371472 [2:06:04<25:39:44, 3.74it/s] 7%|▋ | 26032/371472 [2:06:04<26:23:28, 3.64it/s] 7%|▋ | 26033/371472 [2:06:05<27:26:08, 3.50it/s] 7%|▋ | 26034/371472 [2:06:05<26:56:12, 3.56it/s] 7%|▋ | 26035/371472 [2:06:05<26:25:15, 3.63it/s] 7%|▋ | 26036/371472 [2:06:05<26:45:49, 3.59it/s] 7%|▋ | 26037/371472 [2:06:06<27:25:47, 3.50it/s] 7%|▋ | 26038/371472 [2:06:06<29:51:15, 3.21it/s] 7%|▋ | 26039/371472 [2:06:06<29:05:16, 3.30it/s] 7%|▋ | 26040/371472 [2:06:07<30:50:42, 3.11it/s] {'loss': 4.5373, 'learning_rate': 9.373612876812687e-07, 'epoch': 1.12} + 7%|▋ | 26040/371472 [2:06:07<30:50:42, 3.11it/s] 7%|▋ | 26041/371472 [2:06:07<30:27:39, 3.15it/s] 7%|▋ | 26042/371472 [2:06:07<29:01:14, 3.31it/s] 7%|▋ | 26043/371472 [2:06:08<29:05:47, 3.30it/s] 7%|▋ | 26044/371472 [2:06:08<30:48:50, 3.11it/s] 7%|▋ | 26045/371472 [2:06:08<30:44:37, 3.12it/s] 7%|▋ | 26046/371472 [2:06:09<29:37:45, 3.24it/s] 7%|▋ | 26047/371472 [2:06:09<30:45:47, 3.12it/s] 7%|▋ | 26048/371472 [2:06:09<29:45:36, 3.22it/s] 7%|▋ | 26049/371472 [2:06:10<29:14:14, 3.28it/s] 7%|▋ | 26050/371472 [2:06:10<29:28:51, 3.25it/s] 7%|▋ | 26051/371472 [2:06:10<28:21:49, 3.38it/s] 7%|▋ | 26052/371472 [2:06:10<27:27:25, 3.49it/s] 7%|▋ | 26053/371472 [2:06:11<27:28:57, 3.49it/s] 7%|▋ | 26054/371472 [2:06:11<28:11:44, 3.40it/s] 7%|▋ | 26055/371472 [2:06:11<27:46:39, 3.45it/s] 7%|▋ | 26056/371472 [2:06:12<27:38:59, 3.47it/s] 7%|▋ | 26057/371472 [2:06:12<26:35:43, 3.61it/s] 7%|▋ | 26058/371472 [2:06:12<27:08:45, 3.53it/s] 7%|▋ | 26059/371472 [2:06:12<26:32:18, 3.62it/s] 7%|▋ | 26060/371472 [2:06:13<26:07:08, 3.67it/s] {'loss': 4.4326, 'learning_rate': 9.373128057057899e-07, 'epoch': 1.12} + 7%|▋ | 26060/371472 [2:06:13<26:07:08, 3.67it/s] 7%|▋ | 26061/371472 [2:06:13<26:23:24, 3.64it/s] 7%|▋ | 26062/371472 [2:06:13<27:28:47, 3.49it/s] 7%|▋ | 26063/371472 [2:06:14<27:08:32, 3.53it/s] 7%|▋ | 26064/371472 [2:06:14<26:27:02, 3.63it/s] 7%|▋ | 26065/371472 [2:06:14<26:13:43, 3.66it/s] 7%|▋ | 26066/371472 [2:06:14<26:44:58, 3.59it/s] 7%|▋ | 26067/371472 [2:06:15<26:42:25, 3.59it/s] 7%|▋ | 26068/371472 [2:06:15<28:34:31, 3.36it/s] 7%|▋ | 26069/371472 [2:06:15<29:13:01, 3.28it/s] 7%|▋ | 26070/371472 [2:06:16<28:09:25, 3.41it/s] 7%|▋ | 26071/371472 [2:06:16<27:25:28, 3.50it/s] 7%|▋ | 26072/371472 [2:06:16<26:46:36, 3.58it/s] 7%|▋ | 26073/371472 [2:06:16<26:45:09, 3.59it/s] 7%|▋ | 26074/371472 [2:06:17<26:59:26, 3.55it/s] 7%|▋ | 26075/371472 [2:06:17<27:26:43, 3.50it/s] 7%|▋ | 26076/371472 [2:06:17<27:53:31, 3.44it/s] 7%|▋ | 26077/371472 [2:06:18<27:37:33, 3.47it/s] 7%|▋ | 26078/371472 [2:06:18<27:57:11, 3.43it/s] 7%|▋ | 26079/371472 [2:06:18<27:29:28, 3.49it/s] 7%|▋ | 26080/371472 [2:06:18<28:19:11, 3.39it/s] {'loss': 4.4376, 'learning_rate': 9.372643237303109e-07, 'epoch': 1.12} + 7%|▋ | 26080/371472 [2:06:18<28:19:11, 3.39it/s] 7%|▋ | 26081/371472 [2:06:19<29:14:57, 3.28it/s] 7%|▋ | 26082/371472 [2:06:19<28:32:35, 3.36it/s] 7%|▋ | 26083/371472 [2:06:19<29:15:20, 3.28it/s] 7%|▋ | 26084/371472 [2:06:20<27:51:34, 3.44it/s] 7%|▋ | 26085/371472 [2:06:20<27:21:31, 3.51it/s] 7%|▋ | 26086/371472 [2:06:20<26:37:00, 3.60it/s] 7%|▋ | 26087/371472 [2:06:21<29:41:32, 3.23it/s] 7%|▋ | 26088/371472 [2:06:21<29:29:04, 3.25it/s] 7%|▋ | 26089/371472 [2:06:21<28:23:10, 3.38it/s] 7%|▋ | 26090/371472 [2:06:21<29:39:01, 3.24it/s] 7%|▋ | 26091/371472 [2:06:22<28:26:03, 3.37it/s] 7%|▋ | 26092/371472 [2:06:22<28:16:52, 3.39it/s] 7%|▋ | 26093/371472 [2:06:22<27:34:20, 3.48it/s] 7%|▋ | 26094/371472 [2:06:23<27:25:58, 3.50it/s] 7%|▋ | 26095/371472 [2:06:23<29:59:21, 3.20it/s] 7%|▋ | 26096/371472 [2:06:23<31:44:49, 3.02it/s] 7%|▋ | 26097/371472 [2:06:24<31:17:20, 3.07it/s] 7%|▋ | 26098/371472 [2:06:24<29:44:41, 3.23it/s] 7%|▋ | 26099/371472 [2:06:24<31:28:23, 3.05it/s] 7%|▋ | 26100/371472 [2:06:25<30:08:00, 3.18it/s] {'loss': 4.5073, 'learning_rate': 9.372158417548319e-07, 'epoch': 1.12} + 7%|▋ | 26100/371472 [2:06:25<30:08:00, 3.18it/s] 7%|▋ | 26101/371472 [2:06:25<29:23:58, 3.26it/s] 7%|▋ | 26102/371472 [2:06:25<28:59:58, 3.31it/s] 7%|▋ | 26103/371472 [2:06:25<29:03:46, 3.30it/s] 7%|▋ | 26104/371472 [2:06:26<29:03:47, 3.30it/s] 7%|▋ | 26105/371472 [2:06:26<28:00:27, 3.43it/s] 7%|▋ | 26106/371472 [2:06:26<27:34:02, 3.48it/s] 7%|▋ | 26107/371472 [2:06:27<28:09:51, 3.41it/s] 7%|▋ | 26108/371472 [2:06:27<28:26:59, 3.37it/s] 7%|▋ | 26109/371472 [2:06:27<27:34:09, 3.48it/s] 7%|▋ | 26110/371472 [2:06:27<27:56:03, 3.43it/s] 7%|▋ | 26111/371472 [2:06:28<27:49:38, 3.45it/s] 7%|▋ | 26112/371472 [2:06:28<29:09:22, 3.29it/s] 7%|▋ | 26113/371472 [2:06:28<27:39:20, 3.47it/s] 7%|▋ | 26114/371472 [2:06:29<26:48:15, 3.58it/s] 7%|▋ | 26115/371472 [2:06:29<27:10:37, 3.53it/s] 7%|▋ | 26116/371472 [2:06:29<26:09:51, 3.67it/s] 7%|▋ | 26117/371472 [2:06:29<25:46:06, 3.72it/s] 7%|▋ | 26118/371472 [2:06:30<26:17:32, 3.65it/s] 7%|▋ | 26119/371472 [2:06:30<26:09:38, 3.67it/s] 7%|▋ | 26120/371472 [2:06:30<25:16:17, 3.80it/s] {'loss': 4.5078, 'learning_rate': 9.371673597793531e-07, 'epoch': 1.13} + 7%|▋ | 26120/371472 [2:06:30<25:16:17, 3.80it/s] 7%|▋ | 26121/371472 [2:06:31<27:15:16, 3.52it/s] 7%|▋ | 26122/371472 [2:06:31<27:04:30, 3.54it/s] 7%|▋ | 26123/371472 [2:06:31<27:53:24, 3.44it/s] 7%|▋ | 26124/371472 [2:06:31<27:10:34, 3.53it/s] 7%|▋ | 26125/371472 [2:06:32<26:27:47, 3.63it/s] 7%|▋ | 26126/371472 [2:06:32<25:50:59, 3.71it/s] 7%|▋ | 26127/371472 [2:06:32<25:17:39, 3.79it/s] 7%|▋ | 26128/371472 [2:06:32<25:18:41, 3.79it/s] 7%|▋ | 26129/371472 [2:06:33<25:33:49, 3.75it/s] 7%|▋ | 26130/371472 [2:06:33<26:40:57, 3.60it/s] 7%|▋ | 26131/371472 [2:06:33<26:10:08, 3.67it/s] 7%|▋ | 26132/371472 [2:06:34<26:42:01, 3.59it/s] 7%|▋ | 26133/371472 [2:06:34<30:17:58, 3.17it/s] 7%|▋ | 26134/371472 [2:06:34<30:30:41, 3.14it/s] 7%|▋ | 26135/371472 [2:06:35<29:42:10, 3.23it/s] 7%|▋ | 26136/371472 [2:06:35<30:24:40, 3.15it/s] 7%|▋ | 26137/371472 [2:06:35<30:38:43, 3.13it/s] 7%|▋ | 26138/371472 [2:06:35<28:47:50, 3.33it/s] 7%|▋ | 26139/371472 [2:06:36<29:53:33, 3.21it/s] 7%|▋ | 26140/371472 [2:06:36<29:27:43, 3.26it/s] {'loss': 4.3807, 'learning_rate': 9.371188778038741e-07, 'epoch': 1.13} + 7%|▋ | 26140/371472 [2:06:36<29:27:43, 3.26it/s] 7%|▋ | 26141/371472 [2:06:36<29:13:04, 3.28it/s] 7%|▋ | 26142/371472 [2:06:37<29:22:54, 3.26it/s] 7%|▋ | 26143/371472 [2:06:37<28:20:46, 3.38it/s] 7%|▋ | 26144/371472 [2:06:37<27:10:46, 3.53it/s] 7%|▋ | 26145/371472 [2:06:38<27:41:02, 3.46it/s] 7%|▋ | 26146/371472 [2:06:38<26:59:55, 3.55it/s] 7%|▋ | 26147/371472 [2:06:38<26:44:44, 3.59it/s] 7%|▋ | 26148/371472 [2:06:38<26:56:44, 3.56it/s] 7%|▋ | 26149/371472 [2:06:39<26:51:47, 3.57it/s] 7%|▋ | 26150/371472 [2:06:39<27:01:32, 3.55it/s] 7%|▋ | 26151/371472 [2:06:39<27:03:10, 3.55it/s] 7%|▋ | 26152/371472 [2:06:39<27:43:35, 3.46it/s] 7%|▋ | 26153/371472 [2:06:40<26:46:55, 3.58it/s] 7%|▋ | 26154/371472 [2:06:40<27:11:18, 3.53it/s] 7%|▋ | 26155/371472 [2:06:40<28:47:12, 3.33it/s] 7%|▋ | 26156/371472 [2:06:41<27:26:55, 3.49it/s] 7%|▋ | 26157/371472 [2:06:41<26:52:29, 3.57it/s] 7%|▋ | 26158/371472 [2:06:41<26:48:44, 3.58it/s] 7%|▋ | 26159/371472 [2:06:41<26:46:24, 3.58it/s] 7%|▋ | 26160/371472 [2:06:42<26:47:02, 3.58it/s] {'loss': 4.5271, 'learning_rate': 9.370703958283953e-07, 'epoch': 1.13} + 7%|▋ | 26160/371472 [2:06:42<26:47:02, 3.58it/s] 7%|▋ | 26161/371472 [2:06:42<26:51:11, 3.57it/s] 7%|▋ | 26162/371472 [2:06:42<28:11:04, 3.40it/s] 7%|▋ | 26163/371472 [2:06:43<28:15:23, 3.39it/s] 7%|▋ | 26164/371472 [2:06:43<27:27:11, 3.49it/s] 7%|▋ | 26165/371472 [2:06:43<27:17:03, 3.52it/s] 7%|▋ | 26166/371472 [2:06:43<27:15:44, 3.52it/s] 7%|▋ | 26167/371472 [2:06:44<27:31:03, 3.49it/s] 7%|▋ | 26168/371472 [2:06:44<26:18:45, 3.65it/s] 7%|▋ | 26169/371472 [2:06:44<26:15:50, 3.65it/s] 7%|▋ | 26170/371472 [2:06:45<28:05:36, 3.41it/s] 7%|▋ | 26171/371472 [2:06:45<27:47:39, 3.45it/s] 7%|▋ | 26172/371472 [2:06:45<27:17:55, 3.51it/s] 7%|▋ | 26173/371472 [2:06:45<26:34:28, 3.61it/s] 7%|▋ | 26174/371472 [2:06:46<27:15:44, 3.52it/s] 7%|▋ | 26175/371472 [2:06:46<27:27:17, 3.49it/s] 7%|▋ | 26176/371472 [2:06:46<27:15:39, 3.52it/s] 7%|▋ | 26177/371472 [2:06:47<29:26:35, 3.26it/s] 7%|▋ | 26178/371472 [2:06:47<28:43:07, 3.34it/s] 7%|▋ | 26179/371472 [2:06:47<28:46:51, 3.33it/s] 7%|▋ | 26180/371472 [2:06:48<29:34:08, 3.24it/s] {'loss': 4.5432, 'learning_rate': 9.370219138529164e-07, 'epoch': 1.13} + 7%|▋ | 26180/371472 [2:06:48<29:34:08, 3.24it/s] 7%|▋ | 26181/371472 [2:06:48<32:41:05, 2.93it/s] 7%|▋ | 26182/371472 [2:06:48<30:28:49, 3.15it/s] 7%|▋ | 26183/371472 [2:06:49<29:20:14, 3.27it/s] 7%|▋ | 26184/371472 [2:06:49<28:53:03, 3.32it/s] 7%|▋ | 26185/371472 [2:06:49<27:54:10, 3.44it/s] 7%|▋ | 26186/371472 [2:06:49<26:59:52, 3.55it/s] 7%|▋ | 26187/371472 [2:06:50<27:21:59, 3.50it/s] 7%|▋ | 26188/371472 [2:06:50<27:46:58, 3.45it/s] 7%|▋ | 26189/371472 [2:06:50<29:01:16, 3.30it/s] 7%|▋ | 26190/371472 [2:06:51<28:44:52, 3.34it/s] 7%|▋ | 26191/371472 [2:06:51<29:38:11, 3.24it/s] 7%|▋ | 26192/371472 [2:06:51<29:44:26, 3.22it/s] 7%|▋ | 26193/371472 [2:06:51<28:43:12, 3.34it/s] 7%|▋ | 26194/371472 [2:06:52<27:33:42, 3.48it/s] 7%|▋ | 26195/371472 [2:06:52<28:07:06, 3.41it/s] 7%|▋ | 26196/371472 [2:06:52<28:10:50, 3.40it/s] 7%|▋ | 26197/371472 [2:06:53<29:11:51, 3.28it/s] 7%|▋ | 26198/371472 [2:06:53<29:22:23, 3.27it/s] 7%|▋ | 26199/371472 [2:06:53<28:41:18, 3.34it/s] 7%|▋ | 26200/371472 [2:06:54<27:55:48, 3.43it/s] {'loss': 4.5553, 'learning_rate': 9.369734318774376e-07, 'epoch': 1.13} + 7%|▋ | 26200/371472 [2:06:54<27:55:48, 3.43it/s] 7%|▋ | 26201/371472 [2:06:54<27:44:07, 3.46it/s] 7%|▋ | 26202/371472 [2:06:54<27:49:19, 3.45it/s] 7%|▋ | 26203/371472 [2:06:54<27:43:33, 3.46it/s] 7%|▋ | 26204/371472 [2:06:55<27:00:05, 3.55it/s] 7%|▋ | 26205/371472 [2:06:55<26:08:57, 3.67it/s] 7%|▋ | 26206/371472 [2:06:55<27:46:30, 3.45it/s] 7%|▋ | 26207/371472 [2:06:56<27:11:05, 3.53it/s] 7%|▋ | 26208/371472 [2:06:56<27:55:13, 3.43it/s] 7%|▋ | 26209/371472 [2:06:56<26:48:24, 3.58it/s] 7%|▋ | 26210/371472 [2:06:56<27:25:28, 3.50it/s] 7%|▋ | 26211/371472 [2:06:57<28:58:54, 3.31it/s] 7%|▋ | 26212/371472 [2:06:57<27:40:56, 3.46it/s] 7%|▋ | 26213/371472 [2:06:57<26:17:07, 3.65it/s] 7%|▋ | 26214/371472 [2:06:58<26:33:03, 3.61it/s] 7%|▋ | 26215/371472 [2:06:58<28:33:40, 3.36it/s] 7%|▋ | 26216/371472 [2:06:58<28:03:06, 3.42it/s] 7%|▋ | 26217/371472 [2:06:58<27:16:06, 3.52it/s] 7%|▋ | 26218/371472 [2:06:59<27:07:51, 3.53it/s] 7%|▋ | 26219/371472 [2:06:59<26:36:06, 3.61it/s] 7%|▋ | 26220/371472 [2:06:59<28:24:06, 3.38it/s] {'loss': 4.6125, 'learning_rate': 9.369249499019586e-07, 'epoch': 1.13} + 7%|▋ | 26220/371472 [2:06:59<28:24:06, 3.38it/s] 7%|▋ | 26221/371472 [2:07:00<27:47:10, 3.45it/s] 7%|▋ | 26222/371472 [2:07:00<27:32:31, 3.48it/s] 7%|▋ | 26223/371472 [2:07:00<27:06:36, 3.54it/s] 7%|▋ | 26224/371472 [2:07:00<28:22:48, 3.38it/s] 7%|▋ | 26225/371472 [2:07:01<27:13:53, 3.52it/s] 7%|▋ | 26226/371472 [2:07:01<27:16:15, 3.52it/s] 7%|▋ | 26227/371472 [2:07:01<29:43:36, 3.23it/s] 7%|▋ | 26228/371472 [2:07:02<29:56:18, 3.20it/s] 7%|▋ | 26229/371472 [2:07:02<28:38:29, 3.35it/s] 7%|▋ | 26230/371472 [2:07:02<28:21:33, 3.38it/s] 7%|▋ | 26231/371472 [2:07:03<27:56:09, 3.43it/s] 7%|▋ | 26232/371472 [2:07:03<27:08:30, 3.53it/s] 7%|▋ | 26233/371472 [2:07:03<26:23:32, 3.63it/s] 7%|▋ | 26234/371472 [2:07:03<27:09:10, 3.53it/s] 7%|▋ | 26235/371472 [2:07:04<26:20:56, 3.64it/s] 7%|▋ | 26236/371472 [2:07:04<26:38:09, 3.60it/s] 7%|▋ | 26237/371472 [2:07:04<26:14:36, 3.65it/s] 7%|▋ | 26238/371472 [2:07:04<25:58:50, 3.69it/s] 7%|▋ | 26239/371472 [2:07:05<25:35:13, 3.75it/s] 7%|▋ | 26240/371472 [2:07:05<25:37:35, 3.74it/s] {'loss': 4.6217, 'learning_rate': 9.368764679264797e-07, 'epoch': 1.13} + 7%|▋ | 26240/371472 [2:07:05<25:37:35, 3.74it/s] 7%|▋ | 26241/371472 [2:07:05<25:02:48, 3.83it/s] 7%|▋ | 26242/371472 [2:07:05<25:24:14, 3.77it/s] 7%|▋ | 26243/371472 [2:07:06<26:22:53, 3.64it/s] 7%|▋ | 26244/371472 [2:07:06<25:37:36, 3.74it/s] 7%|▋ | 26245/371472 [2:07:06<26:01:11, 3.69it/s] 7%|▋ | 26246/371472 [2:07:07<28:13:40, 3.40it/s] 7%|▋ | 26247/371472 [2:07:07<27:29:04, 3.49it/s] 7%|▋ | 26248/371472 [2:07:07<27:19:58, 3.51it/s] 7%|▋ | 26249/371472 [2:07:07<26:58:38, 3.55it/s] 7%|▋ | 26250/371472 [2:07:08<26:34:23, 3.61it/s] 7%|▋ | 26251/371472 [2:07:08<26:34:24, 3.61it/s] 7%|▋ | 26252/371472 [2:07:08<26:39:11, 3.60it/s] 7%|▋ | 26253/371472 [2:07:09<26:17:50, 3.65it/s] 7%|▋ | 26254/371472 [2:07:09<26:51:51, 3.57it/s] 7%|▋ | 26255/371472 [2:07:09<26:37:21, 3.60it/s] 7%|▋ | 26256/371472 [2:07:09<26:26:48, 3.63it/s] 7%|▋ | 26257/371472 [2:07:10<27:32:41, 3.48it/s] 7%|▋ | 26258/371472 [2:07:10<26:53:07, 3.57it/s] 7%|▋ | 26259/371472 [2:07:10<28:15:33, 3.39it/s] 7%|▋ | 26260/371472 [2:07:11<28:13:50, 3.40it/s] {'loss': 4.5566, 'learning_rate': 9.368279859510008e-07, 'epoch': 1.13} + 7%|▋ | 26260/371472 [2:07:11<28:13:50, 3.40it/s] 7%|▋ | 26261/371472 [2:07:11<27:09:56, 3.53it/s] 7%|▋ | 26262/371472 [2:07:11<27:08:58, 3.53it/s] 7%|▋ | 26263/371472 [2:07:11<28:43:13, 3.34it/s] 7%|▋ | 26264/371472 [2:07:12<27:51:39, 3.44it/s] 7%|▋ | 26265/371472 [2:07:12<27:01:37, 3.55it/s] 7%|▋ | 26266/371472 [2:07:12<26:51:30, 3.57it/s] 7%|▋ | 26267/371472 [2:07:13<27:31:52, 3.48it/s] 7%|▋ | 26268/371472 [2:07:13<26:37:09, 3.60it/s] 7%|▋ | 26269/371472 [2:07:13<26:58:23, 3.56it/s] 7%|▋ | 26270/371472 [2:07:13<28:04:26, 3.42it/s] 7%|▋ | 26271/371472 [2:07:14<31:17:56, 3.06it/s] 7%|▋ | 26272/371472 [2:07:14<31:49:42, 3.01it/s] 7%|▋ | 26273/371472 [2:07:14<30:08:28, 3.18it/s] 7%|▋ | 26274/371472 [2:07:15<28:42:01, 3.34it/s] 7%|▋ | 26275/371472 [2:07:15<28:09:08, 3.41it/s] 7%|▋ | 26276/371472 [2:07:15<27:37:56, 3.47it/s] 7%|▋ | 26277/371472 [2:07:16<26:22:20, 3.64it/s] 7%|▋ | 26278/371472 [2:07:16<26:00:51, 3.69it/s] 7%|▋ | 26279/371472 [2:07:16<28:04:48, 3.41it/s] 7%|▋ | 26280/371472 [2:07:16<27:14:01, 3.52it/s] {'loss': 4.4153, 'learning_rate': 9.36779503975522e-07, 'epoch': 1.13} + 7%|▋ | 26280/371472 [2:07:16<27:14:01, 3.52it/s] 7%|▋ | 26281/371472 [2:07:17<27:51:57, 3.44it/s] 7%|▋ | 26282/371472 [2:07:17<27:25:30, 3.50it/s] 7%|▋ | 26283/371472 [2:07:17<26:49:28, 3.57it/s] 7%|▋ | 26284/371472 [2:07:17<26:03:57, 3.68it/s] 7%|▋ | 26285/371472 [2:07:18<26:38:11, 3.60it/s] 7%|▋ | 26286/371472 [2:07:18<27:27:36, 3.49it/s] 7%|▋ | 26287/371472 [2:07:18<27:45:48, 3.45it/s] 7%|▋ | 26288/371472 [2:07:19<26:47:12, 3.58it/s] 7%|▋ | 26289/371472 [2:07:19<26:49:58, 3.57it/s] 7%|▋ | 26290/371472 [2:07:19<29:10:00, 3.29it/s] 7%|▋ | 26291/371472 [2:07:20<28:16:22, 3.39it/s] 7%|▋ | 26292/371472 [2:07:20<27:28:06, 3.49it/s] 7%|▋ | 26293/371472 [2:07:20<26:35:29, 3.61it/s] 7%|▋ | 26294/371472 [2:07:20<29:54:24, 3.21it/s] 7%|▋ | 26295/371472 [2:07:21<28:07:12, 3.41it/s] 7%|▋ | 26296/371472 [2:07:21<27:41:30, 3.46it/s] 7%|▋ | 26297/371472 [2:07:21<27:18:21, 3.51it/s] 7%|▋ | 26298/371472 [2:07:22<27:19:52, 3.51it/s] 7%|▋ | 26299/371472 [2:07:22<26:30:05, 3.62it/s] 7%|▋ | 26300/371472 [2:07:22<28:26:35, 3.37it/s] {'loss': 4.3595, 'learning_rate': 9.36731022000043e-07, 'epoch': 1.13} + 7%|▋ | 26300/371472 [2:07:22<28:26:35, 3.37it/s] 7%|▋ | 26301/371472 [2:07:22<28:47:01, 3.33it/s] 7%|▋ | 26302/371472 [2:07:23<27:08:05, 3.53it/s] 7%|▋ | 26303/371472 [2:07:23<26:49:55, 3.57it/s] 7%|▋ | 26304/371472 [2:07:23<26:43:07, 3.59it/s] 7%|▋ | 26305/371472 [2:07:24<26:57:02, 3.56it/s] 7%|▋ | 26306/371472 [2:07:24<26:23:32, 3.63it/s] 7%|▋ | 26307/371472 [2:07:24<27:47:25, 3.45it/s] 7%|▋ | 26308/371472 [2:07:24<27:58:49, 3.43it/s] 7%|▋ | 26309/371472 [2:07:25<27:02:51, 3.54it/s] 7%|▋ | 26310/371472 [2:07:25<27:02:26, 3.55it/s] 7%|▋ | 26311/371472 [2:07:25<27:28:48, 3.49it/s] 7%|▋ | 26312/371472 [2:07:26<27:22:35, 3.50it/s] 7%|▋ | 26313/371472 [2:07:26<26:45:35, 3.58it/s] 7%|▋ | 26314/371472 [2:07:26<26:43:37, 3.59it/s] 7%|▋ | 26315/371472 [2:07:26<26:18:23, 3.64it/s] 7%|▋ | 26316/371472 [2:07:27<26:05:42, 3.67it/s] 7%|▋ | 26317/371472 [2:07:27<25:50:31, 3.71it/s] 7%|▋ | 26318/371472 [2:07:27<25:37:29, 3.74it/s] 7%|▋ | 26319/371472 [2:07:27<25:20:40, 3.78it/s] 7%|▋ | 26320/371472 [2:07:28<26:40:56, 3.59it/s] {'loss': 4.5201, 'learning_rate': 9.366825400245642e-07, 'epoch': 1.13} + 7%|▋ | 26320/371472 [2:07:28<26:40:56, 3.59it/s] 7%|▋ | 26321/371472 [2:07:28<27:59:19, 3.43it/s] 7%|▋ | 26322/371472 [2:07:28<28:01:14, 3.42it/s] 7%|▋ | 26323/371472 [2:07:29<28:12:28, 3.40it/s] 7%|▋ | 26324/371472 [2:07:29<28:11:17, 3.40it/s] 7%|▋ | 26325/371472 [2:07:29<29:03:31, 3.30it/s] 7%|▋ | 26326/371472 [2:07:30<27:58:13, 3.43it/s] 7%|▋ | 26327/371472 [2:07:30<27:35:34, 3.47it/s] 7%|▋ | 26328/371472 [2:07:30<27:28:18, 3.49it/s] 7%|▋ | 26329/371472 [2:07:30<27:03:55, 3.54it/s] 7%|▋ | 26330/371472 [2:07:31<26:20:04, 3.64it/s] 7%|▋ | 26331/371472 [2:07:31<25:57:28, 3.69it/s] 7%|▋ | 26332/371472 [2:07:31<26:06:09, 3.67it/s] 7%|▋ | 26333/371472 [2:07:31<25:47:24, 3.72it/s] 7%|▋ | 26334/371472 [2:07:32<25:48:56, 3.71it/s] 7%|▋ | 26335/371472 [2:07:32<26:38:56, 3.60it/s] 7%|▋ | 26336/371472 [2:07:32<27:25:22, 3.50it/s] 7%|▋ | 26337/371472 [2:07:33<26:43:03, 3.59it/s] 7%|▋ | 26338/371472 [2:07:33<27:00:31, 3.55it/s] 7%|▋ | 26339/371472 [2:07:33<26:08:04, 3.67it/s] 7%|▋ | 26340/371472 [2:07:33<28:55:53, 3.31it/s] {'loss': 4.7035, 'learning_rate': 9.366340580490853e-07, 'epoch': 1.13} + 7%|▋ | 26340/371472 [2:07:33<28:55:53, 3.31it/s] 7%|▋ | 26341/371472 [2:07:34<30:07:50, 3.18it/s] 7%|▋ | 26342/371472 [2:07:34<29:29:54, 3.25it/s] 7%|▋ | 26343/371472 [2:07:34<28:37:02, 3.35it/s] 7%|▋ | 26344/371472 [2:07:35<27:11:45, 3.53it/s] 7%|▋ | 26345/371472 [2:07:35<26:04:28, 3.68it/s] 7%|▋ | 26346/371472 [2:07:35<26:08:54, 3.67it/s] 7%|▋ | 26347/371472 [2:07:35<25:33:36, 3.75it/s] 7%|▋ | 26348/371472 [2:07:36<26:10:39, 3.66it/s] 7%|▋ | 26349/371472 [2:07:36<26:13:31, 3.66it/s] 7%|▋ | 26350/371472 [2:07:36<26:19:18, 3.64it/s] 7%|▋ | 26351/371472 [2:07:37<27:05:46, 3.54it/s] 7%|▋ | 26352/371472 [2:07:37<27:37:54, 3.47it/s] 7%|▋ | 26353/371472 [2:07:37<27:46:50, 3.45it/s] 7%|▋ | 26354/371472 [2:07:37<26:53:27, 3.56it/s] 7%|▋ | 26355/371472 [2:07:38<26:13:53, 3.65it/s] 7%|▋ | 26356/371472 [2:07:38<25:42:13, 3.73it/s] 7%|▋ | 26357/371472 [2:07:38<25:37:37, 3.74it/s] 7%|▋ | 26358/371472 [2:07:38<26:58:12, 3.55it/s] 7%|▋ | 26359/371472 [2:07:39<26:49:11, 3.57it/s] 7%|▋ | 26360/371472 [2:07:39<28:18:04, 3.39it/s] {'loss': 4.2116, 'learning_rate': 9.365855760736064e-07, 'epoch': 1.14} + 7%|▋ | 26360/371472 [2:07:39<28:18:04, 3.39it/s] 7%|▋ | 26361/371472 [2:07:39<27:32:05, 3.48it/s] 7%|▋ | 26362/371472 [2:07:40<26:39:35, 3.60it/s] 7%|▋ | 26363/371472 [2:07:40<29:24:01, 3.26it/s] 7%|▋ | 26364/371472 [2:07:40<29:27:30, 3.25it/s] 7%|▋ | 26365/371472 [2:07:41<28:56:12, 3.31it/s] 7%|▋ | 26366/371472 [2:07:41<29:33:44, 3.24it/s] 7%|▋ | 26367/371472 [2:07:41<28:41:49, 3.34it/s] 7%|▋ | 26368/371472 [2:07:41<29:11:20, 3.28it/s] 7%|▋ | 26369/371472 [2:07:42<27:51:33, 3.44it/s] 7%|▋ | 26370/371472 [2:07:42<28:05:24, 3.41it/s] 7%|▋ | 26371/371472 [2:07:42<27:17:24, 3.51it/s] 7%|▋ | 26372/371472 [2:07:43<26:55:42, 3.56it/s] 7%|▋ | 26373/371472 [2:07:43<26:25:19, 3.63it/s] 7%|▋ | 26374/371472 [2:07:43<26:34:07, 3.61it/s] 7%|▋ | 26375/371472 [2:07:43<28:26:32, 3.37it/s] 7%|▋ | 26376/371472 [2:07:44<28:42:48, 3.34it/s] 7%|▋ | 26377/371472 [2:07:44<28:53:41, 3.32it/s] 7%|▋ | 26378/371472 [2:07:44<29:12:01, 3.28it/s] 7%|▋ | 26379/371472 [2:07:45<27:57:58, 3.43it/s] 7%|▋ | 26380/371472 [2:07:45<28:10:10, 3.40it/s] {'loss': 4.2687, 'learning_rate': 9.365370940981274e-07, 'epoch': 1.14} + 7%|▋ | 26380/371472 [2:07:45<28:10:10, 3.40it/s] 7%|▋ | 26381/371472 [2:07:45<28:26:51, 3.37it/s] 7%|▋ | 26382/371472 [2:07:46<27:59:38, 3.42it/s] 7%|▋ | 26383/371472 [2:07:46<28:47:51, 3.33it/s] 7%|▋ | 26384/371472 [2:07:46<28:25:32, 3.37it/s] 7%|▋ | 26385/371472 [2:07:46<28:52:17, 3.32it/s] 7%|▋ | 26386/371472 [2:07:47<27:50:55, 3.44it/s] 7%|▋ | 26387/371472 [2:07:47<27:42:51, 3.46it/s] 7%|▋ | 26388/371472 [2:07:47<27:57:09, 3.43it/s] 7%|▋ | 26389/371472 [2:07:48<29:52:27, 3.21it/s] 7%|▋ | 26390/371472 [2:07:48<30:34:56, 3.13it/s] 7%|▋ | 26391/371472 [2:07:48<29:01:20, 3.30it/s] 7%|▋ | 26392/371472 [2:07:49<28:02:17, 3.42it/s] 7%|▋ | 26393/371472 [2:07:49<27:44:04, 3.46it/s] 7%|▋ | 26394/371472 [2:07:49<26:54:41, 3.56it/s] 7%|▋ | 26395/371472 [2:07:49<27:43:20, 3.46it/s] 7%|▋ | 26396/371472 [2:07:50<26:58:19, 3.55it/s] 7%|▋ | 26397/371472 [2:07:50<26:43:12, 3.59it/s] 7%|▋ | 26398/371472 [2:07:50<27:54:50, 3.43it/s] 7%|▋ | 26399/371472 [2:07:50<27:00:16, 3.55it/s] 7%|▋ | 26400/371472 [2:07:51<26:26:08, 3.63it/s] {'loss': 4.55, 'learning_rate': 9.364886121226486e-07, 'epoch': 1.14} + 7%|▋ | 26400/371472 [2:07:51<26:26:08, 3.63it/s] 7%|▋ | 26401/371472 [2:07:51<26:47:41, 3.58it/s] 7%|▋ | 26402/371472 [2:07:51<26:02:05, 3.68it/s] 7%|▋ | 26403/371472 [2:07:52<26:25:07, 3.63it/s] 7%|▋ | 26404/371472 [2:07:52<26:04:56, 3.67it/s] 7%|▋ | 26405/371472 [2:07:52<26:02:54, 3.68it/s] 7%|▋ | 26406/371472 [2:07:52<26:24:36, 3.63it/s] 7%|▋ | 26407/371472 [2:07:53<25:53:12, 3.70it/s] 7%|▋ | 26408/371472 [2:07:53<26:07:03, 3.67it/s] 7%|▋ | 26409/371472 [2:07:53<26:55:01, 3.56it/s] 7%|▋ | 26410/371472 [2:07:54<27:01:54, 3.55it/s] 7%|▋ | 26411/371472 [2:07:54<26:28:29, 3.62it/s] 7%|▋ | 26412/371472 [2:07:54<25:54:15, 3.70it/s] 7%|▋ | 26413/371472 [2:07:54<25:51:59, 3.71it/s] 7%|▋ | 26414/371472 [2:07:55<26:32:40, 3.61it/s] 7%|▋ | 26415/371472 [2:07:55<27:26:57, 3.49it/s] 7%|▋ | 26416/371472 [2:07:55<27:09:24, 3.53it/s] 7%|▋ | 26417/371472 [2:07:55<26:50:10, 3.57it/s] 7%|▋ | 26418/371472 [2:07:56<27:19:38, 3.51it/s] 7%|▋ | 26419/371472 [2:07:56<27:21:17, 3.50it/s] 7%|▋ | 26420/371472 [2:07:56<27:33:45, 3.48it/s] {'loss': 4.9486, 'learning_rate': 9.364401301471697e-07, 'epoch': 1.14} + 7%|▋ | 26420/371472 [2:07:56<27:33:45, 3.48it/s] 7%|▋ | 26421/371472 [2:07:57<26:22:18, 3.63it/s] 7%|▋ | 26422/371472 [2:07:57<28:06:32, 3.41it/s] 7%|▋ | 26423/371472 [2:07:57<26:49:19, 3.57it/s] 7%|▋ | 26424/371472 [2:07:57<26:48:28, 3.58it/s] 7%|▋ | 26425/371472 [2:07:58<26:56:55, 3.56it/s] 7%|▋ | 26426/371472 [2:07:58<27:03:05, 3.54it/s] 7%|▋ | 26427/371472 [2:07:58<28:05:13, 3.41it/s] 7%|▋ | 26428/371472 [2:07:59<27:15:44, 3.52it/s] 7%|▋ | 26429/371472 [2:07:59<27:25:18, 3.50it/s] 7%|▋ | 26430/371472 [2:07:59<26:30:21, 3.62it/s] 7%|▋ | 26431/371472 [2:07:59<26:18:49, 3.64it/s] 7%|▋ | 26432/371472 [2:08:00<26:06:39, 3.67it/s] 7%|▋ | 26433/371472 [2:08:00<27:30:47, 3.48it/s] 7%|▋ | 26434/371472 [2:08:00<28:41:36, 3.34it/s] 7%|▋ | 26435/371472 [2:08:01<28:18:18, 3.39it/s] 7%|▋ | 26436/371472 [2:08:01<28:19:14, 3.38it/s] 7%|▋ | 26437/371472 [2:08:01<28:37:21, 3.35it/s] 7%|▋ | 26438/371472 [2:08:02<29:53:22, 3.21it/s] 7%|▋ | 26439/371472 [2:08:02<28:59:26, 3.31it/s] 7%|▋ | 26440/371472 [2:08:02<27:32:15, 3.48it/s] {'loss': 4.3644, 'learning_rate': 9.363916481716909e-07, 'epoch': 1.14} + 7%|▋ | 26440/371472 [2:08:02<27:32:15, 3.48it/s] 7%|▋ | 26441/371472 [2:08:02<26:44:43, 3.58it/s] 7%|▋ | 26442/371472 [2:08:03<28:02:44, 3.42it/s] 7%|▋ | 26443/371472 [2:08:03<28:00:12, 3.42it/s] 7%|▋ | 26444/371472 [2:08:03<27:33:55, 3.48it/s] 7%|▋ | 26445/371472 [2:08:04<27:26:31, 3.49it/s] 7%|▋ | 26446/371472 [2:08:04<30:08:43, 3.18it/s] 7%|▋ | 26447/371472 [2:08:04<28:38:30, 3.35it/s] 7%|▋ | 26448/371472 [2:08:04<27:24:45, 3.50it/s] 7%|▋ | 26449/371472 [2:08:05<26:58:42, 3.55it/s] 7%|▋ | 26450/371472 [2:08:05<26:48:01, 3.58it/s] 7%|▋ | 26451/371472 [2:08:05<26:23:11, 3.63it/s] 7%|▋ | 26452/371472 [2:08:06<26:16:55, 3.65it/s] 7%|▋ | 26453/371472 [2:08:06<27:37:07, 3.47it/s] 7%|▋ | 26454/371472 [2:08:06<28:04:22, 3.41it/s] 7%|▋ | 26455/371472 [2:08:06<26:54:09, 3.56it/s] 7%|▋ | 26456/371472 [2:08:07<27:11:49, 3.52it/s] 7%|▋ | 26457/371472 [2:08:07<30:23:56, 3.15it/s] 7%|▋ | 26458/371472 [2:08:07<28:39:56, 3.34it/s] 7%|▋ | 26459/371472 [2:08:08<29:03:35, 3.30it/s] 7%|▋ | 26460/371472 [2:08:08<28:32:39, 3.36it/s] {'loss': 4.6457, 'learning_rate': 9.363431661962119e-07, 'epoch': 1.14} + 7%|▋ | 26460/371472 [2:08:08<28:32:39, 3.36it/s] 7%|▋ | 26461/371472 [2:08:08<28:13:27, 3.40it/s] 7%|▋ | 26462/371472 [2:08:09<28:27:15, 3.37it/s] 7%|▋ | 26463/371472 [2:08:09<27:48:20, 3.45it/s] 7%|▋ | 26464/371472 [2:08:09<27:18:12, 3.51it/s] 7%|▋ | 26465/371472 [2:08:09<28:59:46, 3.31it/s] 7%|▋ | 26466/371472 [2:08:10<28:26:22, 3.37it/s] 7%|▋ | 26467/371472 [2:08:10<27:39:20, 3.47it/s] 7%|▋ | 26468/371472 [2:08:10<27:04:42, 3.54it/s] 7%|▋ | 26469/371472 [2:08:11<26:50:23, 3.57it/s] 7%|▋ | 26470/371472 [2:08:11<26:44:12, 3.58it/s] 7%|▋ | 26471/371472 [2:08:11<26:34:51, 3.61it/s] 7%|▋ | 26472/371472 [2:08:11<27:28:49, 3.49it/s] 7%|▋ | 26473/371472 [2:08:12<28:57:37, 3.31it/s] 7%|▋ | 26474/371472 [2:08:12<30:02:48, 3.19it/s] 7%|▋ | 26475/371472 [2:08:12<28:37:09, 3.35it/s] 7%|▋ | 26476/371472 [2:08:13<26:56:19, 3.56it/s] 7%|▋ | 26477/371472 [2:08:13<27:20:26, 3.51it/s] 7%|▋ | 26478/371472 [2:08:13<26:34:44, 3.61it/s] 7%|▋ | 26479/371472 [2:08:13<27:38:48, 3.47it/s] 7%|▋ | 26480/371472 [2:08:14<30:31:56, 3.14it/s] {'loss': 4.5175, 'learning_rate': 9.36294684220733e-07, 'epoch': 1.14} + 7%|▋ | 26480/371472 [2:08:14<30:31:56, 3.14it/s] 7%|▋ | 26481/371472 [2:08:14<31:51:27, 3.01it/s] 7%|▋ | 26482/371472 [2:08:14<30:26:26, 3.15it/s] 7%|▋ | 26483/371472 [2:08:15<29:31:16, 3.25it/s] 7%|▋ | 26484/371472 [2:08:15<28:03:57, 3.41it/s] 7%|▋ | 26485/371472 [2:08:15<28:53:50, 3.32it/s] 7%|▋ | 26486/371472 [2:08:16<27:42:38, 3.46it/s] 7%|▋ | 26487/371472 [2:08:16<27:31:05, 3.48it/s] 7%|▋ | 26488/371472 [2:08:16<27:47:12, 3.45it/s] 7%|▋ | 26489/371472 [2:08:17<30:21:55, 3.16it/s] 7%|▋ | 26490/371472 [2:08:17<29:35:33, 3.24it/s] 7%|▋ | 26491/371472 [2:08:17<28:46:52, 3.33it/s] 7%|▋ | 26492/371472 [2:08:17<27:43:33, 3.46it/s] 7%|▋ | 26493/371472 [2:08:18<28:40:02, 3.34it/s] 7%|▋ | 26494/371472 [2:08:18<28:10:15, 3.40it/s] 7%|▋ | 26495/371472 [2:08:18<28:35:30, 3.35it/s] 7%|▋ | 26496/371472 [2:08:19<27:57:58, 3.43it/s] 7%|▋ | 26497/371472 [2:08:19<26:46:21, 3.58it/s] 7%|▋ | 26498/371472 [2:08:19<27:19:08, 3.51it/s] 7%|▋ | 26499/371472 [2:08:19<28:05:21, 3.41it/s] 7%|▋ | 26500/371472 [2:08:20<28:03:34, 3.42it/s] {'loss': 4.4466, 'learning_rate': 9.362462022452541e-07, 'epoch': 1.14} + 7%|▋ | 26500/371472 [2:08:20<28:03:34, 3.42it/s] 7%|▋ | 26501/371472 [2:08:20<27:03:19, 3.54it/s] 7%|▋ | 26502/371472 [2:08:20<28:11:43, 3.40it/s] 7%|▋ | 26503/371472 [2:08:21<27:49:33, 3.44it/s] 7%|▋ | 26504/371472 [2:08:21<27:58:34, 3.43it/s] 7%|▋ | 26505/371472 [2:08:21<28:51:40, 3.32it/s] 7%|▋ | 26506/371472 [2:08:21<27:18:31, 3.51it/s] 7%|▋ | 26507/371472 [2:08:22<26:36:02, 3.60it/s] 7%|▋ | 26508/371472 [2:08:22<26:35:40, 3.60it/s] 7%|▋ | 26509/371472 [2:08:22<26:31:42, 3.61it/s] 7%|▋ | 26510/371472 [2:08:23<28:37:49, 3.35it/s] 7%|▋ | 26511/371472 [2:08:23<27:56:39, 3.43it/s] 7%|▋ | 26512/371472 [2:08:23<29:44:34, 3.22it/s] 7%|▋ | 26513/371472 [2:08:23<28:23:44, 3.37it/s] 7%|▋ | 26514/371472 [2:08:24<27:40:37, 3.46it/s] 7%|▋ | 26515/371472 [2:08:24<27:20:46, 3.50it/s] 7%|▋ | 26516/371472 [2:08:24<26:36:34, 3.60it/s] 7%|▋ | 26517/371472 [2:08:25<27:02:35, 3.54it/s] 7%|▋ | 26518/371472 [2:08:25<26:44:28, 3.58it/s] 7%|▋ | 26519/371472 [2:08:25<28:02:02, 3.42it/s] 7%|▋ | 26520/371472 [2:08:25<27:28:12, 3.49it/s] {'loss': 4.4972, 'learning_rate': 9.361977202697752e-07, 'epoch': 1.14} + 7%|▋ | 26520/371472 [2:08:25<27:28:12, 3.49it/s] 7%|▋ | 26521/371472 [2:08:26<28:01:13, 3.42it/s] 7%|▋ | 26522/371472 [2:08:26<28:17:45, 3.39it/s] 7%|▋ | 26523/371472 [2:08:26<28:40:58, 3.34it/s] 7%|▋ | 26524/371472 [2:08:27<27:30:58, 3.48it/s] 7%|▋ | 26525/371472 [2:08:27<26:42:41, 3.59it/s] 7%|▋ | 26526/371472 [2:08:27<26:43:14, 3.59it/s] 7%|▋ | 26527/371472 [2:08:27<26:29:00, 3.62it/s] 7%|▋ | 26528/371472 [2:08:28<27:42:43, 3.46it/s] 7%|▋ | 26529/371472 [2:08:28<26:51:48, 3.57it/s] 7%|▋ | 26530/371472 [2:08:28<27:02:31, 3.54it/s] 7%|▋ | 26531/371472 [2:08:29<29:48:43, 3.21it/s] 7%|▋ | 26532/371472 [2:08:29<28:50:25, 3.32it/s] 7%|▋ | 26533/371472 [2:08:29<27:19:08, 3.51it/s] 7%|▋ | 26534/371472 [2:08:30<28:02:37, 3.42it/s] 7%|▋ | 26535/371472 [2:08:30<29:21:40, 3.26it/s] 7%|▋ | 26536/371472 [2:08:30<30:05:48, 3.18it/s] 7%|▋ | 26537/371472 [2:08:30<29:11:24, 3.28it/s] 7%|▋ | 26538/371472 [2:08:31<28:06:33, 3.41it/s] 7%|▋ | 26539/371472 [2:08:31<27:12:05, 3.52it/s] 7%|▋ | 26540/371472 [2:08:31<27:09:12, 3.53it/s] {'loss': 4.4487, 'learning_rate': 9.361492382942963e-07, 'epoch': 1.14} + 7%|▋ | 26540/371472 [2:08:31<27:09:12, 3.53it/s] 7%|▋ | 26541/371472 [2:08:32<27:48:32, 3.45it/s] 7%|▋ | 26542/371472 [2:08:32<28:04:26, 3.41it/s] 7%|▋ | 26543/371472 [2:08:32<27:24:03, 3.50it/s] 7%|▋ | 26544/371472 [2:08:32<27:03:31, 3.54it/s] 7%|▋ | 26545/371472 [2:08:33<27:28:22, 3.49it/s] 7%|▋ | 26546/371472 [2:08:33<35:55:55, 2.67it/s] 7%|▋ | 26547/371472 [2:08:34<33:51:00, 2.83it/s] 7%|▋ | 26548/371472 [2:08:34<31:04:45, 3.08it/s] 7%|▋ | 26549/371472 [2:08:34<29:34:24, 3.24it/s] 7%|▋ | 26550/371472 [2:08:34<28:34:30, 3.35it/s] 7%|▋ | 26551/371472 [2:08:35<28:28:04, 3.37it/s] 7%|▋ | 26552/371472 [2:08:35<26:52:15, 3.57it/s] 7%|▋ | 26553/371472 [2:08:35<27:20:40, 3.50it/s] 7%|▋ | 26554/371472 [2:08:36<28:49:43, 3.32it/s] 7%|▋ | 26555/371472 [2:08:36<27:58:49, 3.42it/s] 7%|▋ | 26556/371472 [2:08:36<27:00:16, 3.55it/s] 7%|▋ | 26557/371472 [2:08:36<28:18:12, 3.39it/s] 7%|▋ | 26558/371472 [2:08:37<28:33:52, 3.35it/s] 7%|▋ | 26559/371472 [2:08:37<28:53:03, 3.32it/s] 7%|▋ | 26560/371472 [2:08:37<27:47:11, 3.45it/s] {'loss': 4.3274, 'learning_rate': 9.361007563188174e-07, 'epoch': 1.14} + 7%|▋ | 26560/371472 [2:08:37<27:47:11, 3.45it/s] 7%|▋ | 26561/371472 [2:08:38<29:13:57, 3.28it/s] 7%|▋ | 26562/371472 [2:08:38<28:37:19, 3.35it/s] 7%|▋ | 26563/371472 [2:08:38<27:43:36, 3.46it/s] 7%|▋ | 26564/371472 [2:08:38<27:19:51, 3.51it/s] 7%|▋ | 26565/371472 [2:08:39<28:06:40, 3.41it/s] 7%|▋ | 26566/371472 [2:08:39<27:56:50, 3.43it/s] 7%|▋ | 26567/371472 [2:08:39<29:23:08, 3.26it/s] 7%|▋ | 26568/371472 [2:08:40<28:13:21, 3.39it/s] 7%|▋ | 26569/371472 [2:08:40<27:37:54, 3.47it/s] 7%|▋ | 26570/371472 [2:08:40<27:07:11, 3.53it/s] 7%|▋ | 26571/371472 [2:08:41<28:50:19, 3.32it/s] 7%|▋ | 26572/371472 [2:08:41<27:28:37, 3.49it/s] 7%|▋ | 26573/371472 [2:08:41<26:24:28, 3.63it/s] 7%|▋ | 26574/371472 [2:08:41<26:03:10, 3.68it/s] 7%|▋ | 26575/371472 [2:08:42<27:13:57, 3.52it/s] 7%|▋ | 26576/371472 [2:08:42<28:18:20, 3.38it/s] 7%|▋ | 26577/371472 [2:08:42<30:41:04, 3.12it/s] 7%|▋ | 26578/371472 [2:08:43<28:31:29, 3.36it/s] 7%|▋ | 26579/371472 [2:08:43<26:58:40, 3.55it/s] 7%|▋ | 26580/371472 [2:08:43<27:08:32, 3.53it/s] {'loss': 4.4503, 'learning_rate': 9.360522743433386e-07, 'epoch': 1.14} + 7%|▋ | 26580/371472 [2:08:43<27:08:32, 3.53it/s] 7%|▋ | 26581/371472 [2:08:43<27:48:23, 3.45it/s] 7%|▋ | 26582/371472 [2:08:44<27:30:07, 3.48it/s] 7%|▋ | 26583/371472 [2:08:44<27:40:57, 3.46it/s] 7%|▋ | 26584/371472 [2:08:44<27:34:37, 3.47it/s] 7%|▋ | 26585/371472 [2:08:45<29:33:35, 3.24it/s] 7%|▋ | 26586/371472 [2:08:45<29:25:36, 3.26it/s] 7%|▋ | 26587/371472 [2:08:45<28:58:25, 3.31it/s] 7%|▋ | 26588/371472 [2:08:46<27:14:34, 3.52it/s] 7%|▋ | 26589/371472 [2:08:46<27:09:02, 3.53it/s] 7%|▋ | 26590/371472 [2:08:46<28:13:25, 3.39it/s] 7%|▋ | 26591/371472 [2:08:46<27:16:52, 3.51it/s] 7%|▋ | 26592/371472 [2:08:47<26:05:55, 3.67it/s] 7%|▋ | 26593/371472 [2:08:47<26:17:18, 3.64it/s] 7%|▋ | 26594/371472 [2:08:47<26:46:12, 3.58it/s] 7%|▋ | 26595/371472 [2:08:47<26:33:59, 3.61it/s] 7%|▋ | 26596/371472 [2:08:48<27:06:21, 3.53it/s] 7%|▋ | 26597/371472 [2:08:48<27:44:33, 3.45it/s] 7%|▋ | 26598/371472 [2:08:48<28:14:51, 3.39it/s] 7%|▋ | 26599/371472 [2:08:49<28:28:45, 3.36it/s] 7%|▋ | 26600/371472 [2:08:49<27:47:18, 3.45it/s] {'loss': 4.3373, 'learning_rate': 9.360037923678596e-07, 'epoch': 1.15} + 7%|▋ | 26600/371472 [2:08:49<27:47:18, 3.45it/s] 7%|▋ | 26601/371472 [2:08:49<28:12:42, 3.40it/s] 7%|▋ | 26602/371472 [2:08:50<28:59:40, 3.30it/s] 7%|▋ | 26603/371472 [2:08:50<28:31:50, 3.36it/s] 7%|▋ | 26604/371472 [2:08:50<28:44:10, 3.33it/s] 7%|▋ | 26605/371472 [2:08:50<29:34:11, 3.24it/s] 7%|▋ | 26606/371472 [2:08:51<29:34:33, 3.24it/s] 7%|▋ | 26607/371472 [2:08:51<29:29:25, 3.25it/s] 7%|▋ | 26608/371472 [2:08:51<28:18:56, 3.38it/s] 7%|▋ | 26609/371472 [2:08:52<27:31:30, 3.48it/s] 7%|▋ | 26610/371472 [2:08:52<27:39:41, 3.46it/s] 7%|▋ | 26611/371472 [2:08:52<26:50:14, 3.57it/s] 7%|▋ | 26612/371472 [2:08:52<26:19:55, 3.64it/s] 7%|▋ | 26613/371472 [2:08:53<26:38:23, 3.60it/s] 7%|▋ | 26614/371472 [2:08:53<27:50:43, 3.44it/s] 7%|▋ | 26615/371472 [2:08:53<28:00:53, 3.42it/s] 7%|▋ | 26616/371472 [2:08:54<28:13:24, 3.39it/s] 7%|▋ | 26617/371472 [2:08:54<27:55:33, 3.43it/s] 7%|▋ | 26618/371472 [2:08:54<28:07:29, 3.41it/s] 7%|▋ | 26619/371472 [2:08:55<27:35:06, 3.47it/s] 7%|▋ | 26620/371472 [2:08:55<26:53:40, 3.56it/s] {'loss': 4.3123, 'learning_rate': 9.359553103923807e-07, 'epoch': 1.15} + 7%|▋ | 26620/371472 [2:08:55<26:53:40, 3.56it/s] 7%|▋ | 26621/371472 [2:08:55<26:55:35, 3.56it/s] 7%|▋ | 26622/371472 [2:08:55<25:54:15, 3.70it/s] 7%|▋ | 26623/371472 [2:08:56<25:29:22, 3.76it/s] 7%|▋ | 26624/371472 [2:08:56<25:45:54, 3.72it/s] 7%|▋ | 26625/371472 [2:08:56<26:33:03, 3.61it/s] 7%|▋ | 26626/371472 [2:08:56<27:16:24, 3.51it/s] 7%|▋ | 26627/371472 [2:08:57<28:05:40, 3.41it/s] 7%|▋ | 26628/371472 [2:08:57<28:07:16, 3.41it/s] 7%|▋ | 26629/371472 [2:08:57<27:47:59, 3.45it/s] 7%|▋ | 26630/371472 [2:08:58<27:31:09, 3.48it/s] 7%|▋ | 26631/371472 [2:08:58<26:53:48, 3.56it/s] 7%|▋ | 26632/371472 [2:08:58<28:05:01, 3.41it/s] 7%|▋ | 26633/371472 [2:08:59<29:26:50, 3.25it/s] 7%|▋ | 26634/371472 [2:08:59<29:49:37, 3.21it/s] 7%|▋ | 26635/371472 [2:08:59<29:36:14, 3.24it/s] 7%|▋ | 26636/371472 [2:08:59<28:37:10, 3.35it/s] 7%|▋ | 26637/371472 [2:09:00<28:17:25, 3.39it/s] 7%|▋ | 26638/371472 [2:09:00<27:21:26, 3.50it/s] 7%|▋ | 26639/371472 [2:09:00<26:18:04, 3.64it/s] 7%|▋ | 26640/371472 [2:09:01<27:30:43, 3.48it/s] {'loss': 4.4104, 'learning_rate': 9.359068284169018e-07, 'epoch': 1.15} + 7%|▋ | 26640/371472 [2:09:01<27:30:43, 3.48it/s] 7%|▋ | 26641/371472 [2:09:01<26:59:04, 3.55it/s] 7%|▋ | 26642/371472 [2:09:01<26:52:04, 3.57it/s] 7%|▋ | 26643/371472 [2:09:01<26:54:01, 3.56it/s] 7%|▋ | 26644/371472 [2:09:02<29:13:37, 3.28it/s] 7%|▋ | 26645/371472 [2:09:02<27:58:35, 3.42it/s] 7%|▋ | 26646/371472 [2:09:02<28:45:39, 3.33it/s] 7%|▋ | 26647/371472 [2:09:03<29:11:13, 3.28it/s] 7%|▋ | 26648/371472 [2:09:03<29:40:09, 3.23it/s] 7%|▋ | 26649/371472 [2:09:03<29:49:43, 3.21it/s] 7%|▋ | 26650/371472 [2:09:04<29:56:30, 3.20it/s] 7%|▋ | 26651/371472 [2:09:04<29:39:13, 3.23it/s] 7%|▋ | 26652/371472 [2:09:04<28:38:20, 3.34it/s] 7%|▋ | 26653/371472 [2:09:04<27:26:13, 3.49it/s] 7%|▋ | 26654/371472 [2:09:05<27:07:38, 3.53it/s] 7%|▋ | 26655/371472 [2:09:05<27:32:59, 3.48it/s] 7%|▋ | 26656/371472 [2:09:05<28:28:22, 3.36it/s] 7%|▋ | 26657/371472 [2:09:06<27:59:22, 3.42it/s] 7%|▋ | 26658/371472 [2:09:06<27:17:39, 3.51it/s] 7%|▋ | 26659/371472 [2:09:06<27:09:29, 3.53it/s] 7%|▋ | 26660/371472 [2:09:06<26:33:46, 3.61it/s] {'loss': 4.7109, 'learning_rate': 9.35858346441423e-07, 'epoch': 1.15} + 7%|▋ | 26660/371472 [2:09:06<26:33:46, 3.61it/s] 7%|▋ | 26661/371472 [2:09:07<26:42:04, 3.59it/s] 7%|▋ | 26662/371472 [2:09:07<26:05:28, 3.67it/s] 7%|▋ | 26663/371472 [2:09:07<25:28:07, 3.76it/s] 7%|▋ | 26664/371472 [2:09:07<25:04:32, 3.82it/s] 7%|▋ | 26665/371472 [2:09:08<26:36:31, 3.60it/s] 7%|▋ | 26666/371472 [2:09:08<25:59:29, 3.69it/s] 7%|▋ | 26667/371472 [2:09:08<25:01:52, 3.83it/s] 7%|▋ | 26668/371472 [2:09:09<24:58:52, 3.83it/s] 7%|▋ | 26669/371472 [2:09:09<26:01:05, 3.68it/s] 7%|▋ | 26670/371472 [2:09:09<26:28:43, 3.62it/s] 7%|▋ | 26671/371472 [2:09:09<26:28:51, 3.62it/s] 7%|▋ | 26672/371472 [2:09:10<25:57:03, 3.69it/s] 7%|▋ | 26673/371472 [2:09:10<26:17:15, 3.64it/s] 7%|▋ | 26674/371472 [2:09:10<26:00:31, 3.68it/s] 7%|▋ | 26675/371472 [2:09:10<25:33:08, 3.75it/s] 7%|▋ | 26676/371472 [2:09:11<25:27:53, 3.76it/s] 7%|▋ | 26677/371472 [2:09:11<27:38:30, 3.46it/s] 7%|▋ | 26678/371472 [2:09:11<29:12:37, 3.28it/s] 7%|▋ | 26679/371472 [2:09:12<28:39:40, 3.34it/s] 7%|▋ | 26680/371472 [2:09:12<29:09:44, 3.28it/s] {'loss': 4.5663, 'learning_rate': 9.35809864465944e-07, 'epoch': 1.15} + 7%|▋ | 26680/371472 [2:09:12<29:09:44, 3.28it/s] 7%|▋ | 26681/371472 [2:09:12<30:02:23, 3.19it/s] 7%|▋ | 26682/371472 [2:09:13<28:34:48, 3.35it/s] 7%|▋ | 26683/371472 [2:09:13<27:27:01, 3.49it/s] 7%|▋ | 26684/371472 [2:09:13<26:26:43, 3.62it/s] 7%|▋ | 26685/371472 [2:09:13<26:29:34, 3.62it/s] 7%|▋ | 26686/371472 [2:09:14<28:22:54, 3.37it/s] 7%|▋ | 26687/371472 [2:09:14<32:19:39, 2.96it/s] 7%|▋ | 26688/371472 [2:09:14<32:16:51, 2.97it/s] 7%|▋ | 26689/371472 [2:09:15<30:46:45, 3.11it/s] 7%|▋ | 26690/371472 [2:09:15<29:50:31, 3.21it/s] 7%|▋ | 26691/371472 [2:09:15<28:16:56, 3.39it/s] 7%|▋ | 26692/371472 [2:09:16<28:53:38, 3.31it/s] 7%|▋ | 26693/371472 [2:09:16<28:35:00, 3.35it/s] 7%|▋ | 26694/371472 [2:09:16<27:56:50, 3.43it/s] 7%|▋ | 26695/371472 [2:09:16<26:47:56, 3.57it/s] 7%|▋ | 26696/371472 [2:09:17<27:38:59, 3.46it/s] 7%|▋ | 26697/371472 [2:09:17<27:44:24, 3.45it/s] 7%|▋ | 26698/371472 [2:09:17<26:59:19, 3.55it/s] 7%|▋ | 26699/371472 [2:09:18<27:26:44, 3.49it/s] 7%|▋ | 26700/371472 [2:09:18<29:51:38, 3.21it/s] {'loss': 4.3707, 'learning_rate': 9.357613824904652e-07, 'epoch': 1.15} + 7%|▋ | 26700/371472 [2:09:18<29:51:38, 3.21it/s] 7%|▋ | 26701/371472 [2:09:18<28:54:04, 3.31it/s] 7%|▋ | 26702/371472 [2:09:19<27:32:21, 3.48it/s] 7%|▋ | 26703/371472 [2:09:19<28:39:37, 3.34it/s] 7%|▋ | 26704/371472 [2:09:19<30:14:11, 3.17it/s] 7%|▋ | 26705/371472 [2:09:20<33:43:41, 2.84it/s] 7%|▋ | 26706/371472 [2:09:20<32:40:26, 2.93it/s] 7%|▋ | 26707/371472 [2:09:20<30:18:04, 3.16it/s] 7%|▋ | 26708/371472 [2:09:20<28:44:34, 3.33it/s] 7%|▋ | 26709/371472 [2:09:21<27:37:22, 3.47it/s] 7%|▋ | 26710/371472 [2:09:21<26:17:11, 3.64it/s] 7%|▋ | 26711/371472 [2:09:21<26:03:56, 3.67it/s] 7%|▋ | 26712/371472 [2:09:22<29:02:05, 3.30it/s] 7%|▋ | 26713/371472 [2:09:22<27:47:41, 3.45it/s] 7%|▋ | 26714/371472 [2:09:22<28:10:11, 3.40it/s] 7%|▋ | 26715/371472 [2:09:22<27:48:30, 3.44it/s] 7%|▋ | 26716/371472 [2:09:23<27:37:57, 3.47it/s] 7%|▋ | 26717/371472 [2:09:23<28:42:25, 3.34it/s] 7%|▋ | 26718/371472 [2:09:23<29:12:55, 3.28it/s] 7%|▋ | 26719/371472 [2:09:24<28:16:11, 3.39it/s] 7%|▋ | 26720/371472 [2:09:24<27:26:04, 3.49it/s] {'loss': 4.3986, 'learning_rate': 9.357129005149863e-07, 'epoch': 1.15} + 7%|▋ | 26720/371472 [2:09:24<27:26:04, 3.49it/s] 7%|▋ | 26721/371472 [2:09:24<27:56:14, 3.43it/s] 7%|▋ | 26722/371472 [2:09:25<27:50:32, 3.44it/s] 7%|▋ | 26723/371472 [2:09:25<27:17:08, 3.51it/s] 7%|▋ | 26724/371472 [2:09:25<27:27:34, 3.49it/s] 7%|▋ | 26725/371472 [2:09:25<28:19:52, 3.38it/s] 7%|▋ | 26726/371472 [2:09:26<30:03:33, 3.19it/s] 7%|▋ | 26727/371472 [2:09:26<29:28:52, 3.25it/s] 7%|▋ | 26728/371472 [2:09:26<29:12:05, 3.28it/s] 7%|▋ | 26729/371472 [2:09:27<29:28:43, 3.25it/s] 7%|▋ | 26730/371472 [2:09:27<27:43:14, 3.45it/s] 7%|▋ | 26731/371472 [2:09:27<27:37:56, 3.47it/s] 7%|▋ | 26732/371472 [2:09:27<26:46:03, 3.58it/s] 7%|▋ | 26733/371472 [2:09:28<26:43:29, 3.58it/s] 7%|▋ | 26734/371472 [2:09:28<26:40:06, 3.59it/s] 7%|▋ | 26735/371472 [2:09:28<25:55:52, 3.69it/s] 7%|▋ | 26736/371472 [2:09:29<25:51:48, 3.70it/s] 7%|▋ | 26737/371472 [2:09:29<25:42:10, 3.73it/s] 7%|▋ | 26738/371472 [2:09:29<26:04:12, 3.67it/s] 7%|▋ | 26739/371472 [2:09:29<25:47:47, 3.71it/s] 7%|▋ | 26740/371472 [2:09:30<26:20:10, 3.64it/s] {'loss': 4.4001, 'learning_rate': 9.356644185395074e-07, 'epoch': 1.15} + 7%|▋ | 26740/371472 [2:09:30<26:20:10, 3.64it/s] 7%|▋ | 26741/371472 [2:09:30<29:18:53, 3.27it/s] 7%|▋ | 26742/371472 [2:09:30<30:25:22, 3.15it/s] 7%|▋ | 26743/371472 [2:09:31<29:34:26, 3.24it/s] 7%|▋ | 26744/371472 [2:09:31<30:08:07, 3.18it/s] 7%|▋ | 26745/371472 [2:09:31<29:02:21, 3.30it/s] 7%|▋ | 26746/371472 [2:09:32<28:09:54, 3.40it/s] 7%|▋ | 26747/371472 [2:09:32<27:15:13, 3.51it/s] 7%|▋ | 26748/371472 [2:09:32<28:26:22, 3.37it/s] 7%|▋ | 26749/371472 [2:09:32<28:05:00, 3.41it/s] 7%|▋ | 26750/371472 [2:09:33<28:10:56, 3.40it/s] 7%|▋ | 26751/371472 [2:09:33<27:09:41, 3.53it/s] 7%|▋ | 26752/371472 [2:09:33<28:23:51, 3.37it/s] 7%|▋ | 26753/371472 [2:09:34<28:03:46, 3.41it/s] 7%|▋ | 26754/371472 [2:09:34<29:20:20, 3.26it/s] 7%|▋ | 26755/371472 [2:09:34<28:10:24, 3.40it/s] 7%|▋ | 26756/371472 [2:09:34<27:12:54, 3.52it/s] 7%|▋ | 26757/371472 [2:09:35<26:48:49, 3.57it/s] 7%|▋ | 26758/371472 [2:09:35<27:02:57, 3.54it/s] 7%|▋ | 26759/371472 [2:09:35<28:16:50, 3.39it/s] 7%|▋ | 26760/371472 [2:09:36<28:07:07, 3.41it/s] {'loss': 4.4226, 'learning_rate': 9.356159365640284e-07, 'epoch': 1.15} + 7%|▋ | 26760/371472 [2:09:36<28:07:07, 3.41it/s] 7%|▋ | 26761/371472 [2:09:36<27:37:24, 3.47it/s] 7%|▋ | 26762/371472 [2:09:36<27:04:59, 3.54it/s] 7%|▋ | 26763/371472 [2:09:36<27:01:37, 3.54it/s] 7%|▋ | 26764/371472 [2:09:37<26:12:44, 3.65it/s] 7%|▋ | 26765/371472 [2:09:37<26:10:58, 3.66it/s] 7%|▋ | 26766/371472 [2:09:37<26:38:17, 3.59it/s] 7%|▋ | 26767/371472 [2:09:38<26:59:27, 3.55it/s] 7%|▋ | 26768/371472 [2:09:38<26:43:21, 3.58it/s] 7%|▋ | 26769/371472 [2:09:38<26:25:37, 3.62it/s] 7%|▋ | 26770/371472 [2:09:38<25:53:31, 3.70it/s] 7%|▋ | 26771/371472 [2:09:39<26:10:00, 3.66it/s] 7%|▋ | 26772/371472 [2:09:39<25:39:08, 3.73it/s] 7%|▋ | 26773/371472 [2:09:39<25:06:49, 3.81it/s] 7%|▋ | 26774/371472 [2:09:39<24:57:12, 3.84it/s] 7%|▋ | 26775/371472 [2:09:40<25:25:08, 3.77it/s] 7%|▋ | 26776/371472 [2:09:40<25:33:31, 3.75it/s] 7%|▋ | 26777/371472 [2:09:40<26:31:31, 3.61it/s] 7%|▋ | 26778/371472 [2:09:41<28:41:59, 3.34it/s] 7%|▋ | 26779/371472 [2:09:41<27:48:40, 3.44it/s] 7%|▋ | 26780/371472 [2:09:41<27:19:01, 3.51it/s] {'loss': 4.8085, 'learning_rate': 9.355674545885496e-07, 'epoch': 1.15} + 7%|▋ | 26780/371472 [2:09:41<27:19:01, 3.51it/s] 7%|▋ | 26781/371472 [2:09:41<27:09:32, 3.53it/s] 7%|▋ | 26782/371472 [2:09:42<27:45:02, 3.45it/s] 7%|▋ | 26783/371472 [2:09:42<27:37:19, 3.47it/s] 7%|▋ | 26784/371472 [2:09:42<27:49:04, 3.44it/s] 7%|▋ | 26785/371472 [2:09:43<28:07:48, 3.40it/s] 7%|▋ | 26786/371472 [2:09:43<30:01:34, 3.19it/s] 7%|▋ | 26787/371472 [2:09:43<30:33:00, 3.13it/s] 7%|▋ | 26788/371472 [2:09:44<29:11:57, 3.28it/s] 7%|▋ | 26789/371472 [2:09:44<28:14:01, 3.39it/s] 7%|▋ | 26790/371472 [2:09:44<26:52:27, 3.56it/s] 7%|▋ | 26791/371472 [2:09:44<28:00:33, 3.42it/s] 7%|▋ | 26792/371472 [2:09:45<27:56:07, 3.43it/s] 7%|▋ | 26793/371472 [2:09:45<27:08:54, 3.53it/s] 7%|▋ | 26794/371472 [2:09:45<26:55:46, 3.56it/s] 7%|▋ | 26795/371472 [2:09:45<26:13:02, 3.65it/s] 7%|▋ | 26796/371472 [2:09:46<26:13:15, 3.65it/s] 7%|▋ | 26797/371472 [2:09:46<26:16:45, 3.64it/s] 7%|▋ | 26798/371472 [2:09:46<26:09:39, 3.66it/s] 7%|▋ | 26799/371472 [2:09:47<31:48:38, 3.01it/s] 7%|▋ | 26800/371472 [2:09:47<30:15:32, 3.16it/s] {'loss': 4.5611, 'learning_rate': 9.355189726130707e-07, 'epoch': 1.15} + 7%|▋ | 26800/371472 [2:09:47<30:15:32, 3.16it/s] 7%|▋ | 26801/371472 [2:09:47<29:03:34, 3.29it/s] 7%|▋ | 26802/371472 [2:09:48<28:31:26, 3.36it/s] 7%|▋ | 26803/371472 [2:09:48<28:03:22, 3.41it/s] 7%|▋ | 26804/371472 [2:09:48<27:34:59, 3.47it/s] 7%|▋ | 26805/371472 [2:09:48<28:04:09, 3.41it/s] 7%|▋ | 26806/371472 [2:09:49<26:58:42, 3.55it/s] 7%|▋ | 26807/371472 [2:09:49<27:22:32, 3.50it/s] 7%|▋ | 26808/371472 [2:09:49<27:32:19, 3.48it/s] 7%|▋ | 26809/371472 [2:09:50<26:52:15, 3.56it/s] 7%|▋ | 26810/371472 [2:09:50<26:54:34, 3.56it/s] 7%|▋ | 26811/371472 [2:09:50<27:26:43, 3.49it/s] 7%|▋ | 26812/371472 [2:09:50<26:47:16, 3.57it/s] 7%|▋ | 26813/371472 [2:09:51<26:54:36, 3.56it/s] 7%|▋ | 26814/371472 [2:09:51<27:16:57, 3.51it/s] 7%|▋ | 26815/371472 [2:09:51<28:53:19, 3.31it/s] 7%|▋ | 26816/371472 [2:09:52<27:54:53, 3.43it/s] 7%|▋ | 26817/371472 [2:09:52<27:18:32, 3.51it/s] 7%|▋ | 26818/371472 [2:09:52<27:11:44, 3.52it/s] 7%|▋ | 26819/371472 [2:09:52<26:38:03, 3.59it/s] 7%|▋ | 26820/371472 [2:09:53<27:25:15, 3.49it/s] {'loss': 4.3543, 'learning_rate': 9.354704906375919e-07, 'epoch': 1.16} + 7%|▋ | 26820/371472 [2:09:53<27:25:15, 3.49it/s] 7%|▋ | 26821/371472 [2:09:53<28:53:35, 3.31it/s] 7%|▋ | 26822/371472 [2:09:53<29:16:31, 3.27it/s] 7%|▋ | 26823/371472 [2:09:54<28:15:31, 3.39it/s] 7%|▋ | 26824/371472 [2:09:54<27:45:14, 3.45it/s] 7%|▋ | 26825/371472 [2:09:54<28:57:24, 3.31it/s] 7%|▋ | 26826/371472 [2:09:55<28:38:54, 3.34it/s] 7%|▋ | 26827/371472 [2:09:55<30:06:35, 3.18it/s] 7%|▋ | 26828/371472 [2:09:55<29:04:24, 3.29it/s] 7%|▋ | 26829/371472 [2:09:55<28:38:41, 3.34it/s] 7%|▋ | 26830/371472 [2:09:56<29:14:24, 3.27it/s] 7%|▋ | 26831/371472 [2:09:56<30:11:13, 3.17it/s] 7%|▋ | 26832/371472 [2:09:56<28:56:15, 3.31it/s] 7%|▋ | 26833/371472 [2:09:57<30:38:06, 3.12it/s] 7%|▋ | 26834/371472 [2:09:57<29:04:11, 3.29it/s] 7%|▋ | 26835/371472 [2:09:57<27:59:28, 3.42it/s] 7%|▋ | 26836/371472 [2:09:58<27:04:09, 3.54it/s] 7%|▋ | 26837/371472 [2:09:58<29:33:52, 3.24it/s] 7%|▋ | 26838/371472 [2:09:58<29:46:54, 3.21it/s] 7%|▋ | 26839/371472 [2:09:58<28:28:09, 3.36it/s] 7%|▋ | 26840/371472 [2:09:59<28:24:01, 3.37it/s] {'loss': 4.4754, 'learning_rate': 9.354220086621129e-07, 'epoch': 1.16} + 7%|▋ | 26840/371472 [2:09:59<28:24:01, 3.37it/s] 7%|▋ | 26841/371472 [2:09:59<27:55:13, 3.43it/s] 7%|▋ | 26842/371472 [2:09:59<29:29:09, 3.25it/s] 7%|▋ | 26843/371472 [2:10:00<29:15:31, 3.27it/s] 7%|▋ | 26844/371472 [2:10:00<29:20:28, 3.26it/s] 7%|▋ | 26845/371472 [2:10:00<28:54:36, 3.31it/s] 7%|▋ | 26846/371472 [2:10:01<27:25:51, 3.49it/s] 7%|▋ | 26847/371472 [2:10:01<26:53:00, 3.56it/s] 7%|▋ | 26848/371472 [2:10:01<27:52:00, 3.44it/s] 7%|▋ | 26849/371472 [2:10:01<27:51:36, 3.44it/s] 7%|▋ | 26850/371472 [2:10:02<29:53:50, 3.20it/s] 7%|▋ | 26851/371472 [2:10:02<28:50:37, 3.32it/s] 7%|▋ | 26852/371472 [2:10:02<28:01:27, 3.42it/s] 7%|▋ | 26853/371472 [2:10:03<27:22:17, 3.50it/s] 7%|▋ | 26854/371472 [2:10:03<27:01:40, 3.54it/s] 7%|▋ | 26855/371472 [2:10:03<27:32:11, 3.48it/s] 7%|▋ | 26856/371472 [2:10:03<27:29:07, 3.48it/s] 7%|▋ | 26857/371472 [2:10:04<27:00:31, 3.54it/s] 7%|▋ | 26858/371472 [2:10:04<26:56:04, 3.55it/s] 7%|▋ | 26859/371472 [2:10:04<26:41:35, 3.59it/s] 7%|▋ | 26860/371472 [2:10:05<26:24:02, 3.63it/s] {'loss': 4.6213, 'learning_rate': 9.353735266866339e-07, 'epoch': 1.16} + 7%|▋ | 26860/371472 [2:10:05<26:24:02, 3.63it/s] 7%|▋ | 26861/371472 [2:10:05<26:05:06, 3.67it/s] 7%|▋ | 26862/371472 [2:10:05<27:23:57, 3.49it/s] 7%|▋ | 26863/371472 [2:10:05<27:55:51, 3.43it/s] 7%|▋ | 26864/371472 [2:10:06<27:40:16, 3.46it/s] 7%|▋ | 26865/371472 [2:10:06<26:52:52, 3.56it/s] 7%|▋ | 26866/371472 [2:10:06<26:43:51, 3.58it/s] 7%|▋ | 26867/371472 [2:10:07<27:56:01, 3.43it/s] 7%|▋ | 26868/371472 [2:10:07<28:04:37, 3.41it/s] 7%|▋ | 26869/371472 [2:10:07<27:46:18, 3.45it/s] 7%|▋ | 26870/371472 [2:10:07<26:56:10, 3.55it/s] 7%|▋ | 26871/371472 [2:10:08<26:22:27, 3.63it/s] 7%|▋ | 26872/371472 [2:10:08<27:12:28, 3.52it/s] 7%|▋ | 26873/371472 [2:10:08<26:51:57, 3.56it/s] 7%|▋ | 26874/371472 [2:10:09<26:22:39, 3.63it/s] 7%|▋ | 26875/371472 [2:10:09<29:59:31, 3.19it/s] 7%|▋ | 26876/371472 [2:10:09<28:53:39, 3.31it/s] 7%|▋ | 26877/371472 [2:10:09<27:30:12, 3.48it/s] 7%|▋ | 26878/371472 [2:10:10<28:43:28, 3.33it/s] 7%|▋ | 26879/371472 [2:10:10<27:26:06, 3.49it/s] 7%|▋ | 26880/371472 [2:10:10<26:38:11, 3.59it/s] {'loss': 4.3756, 'learning_rate': 9.353250447111551e-07, 'epoch': 1.16} + 7%|▋ | 26880/371472 [2:10:10<26:38:11, 3.59it/s] 7%|▋ | 26881/371472 [2:10:11<26:12:57, 3.65it/s] 7%|▋ | 26882/371472 [2:10:11<25:56:19, 3.69it/s] 7%|▋ | 26883/371472 [2:10:11<26:48:36, 3.57it/s] 7%|▋ | 26884/371472 [2:10:11<27:52:57, 3.43it/s] 7%|▋ | 26885/371472 [2:10:12<30:55:10, 3.10it/s] 7%|▋ | 26886/371472 [2:10:12<30:45:44, 3.11it/s] 7%|▋ | 26887/371472 [2:10:12<30:19:30, 3.16it/s] 7%|▋ | 26888/371472 [2:10:13<29:12:19, 3.28it/s] 7%|▋ | 26889/371472 [2:10:13<29:12:10, 3.28it/s] 7%|▋ | 26890/371472 [2:10:13<30:25:20, 3.15it/s] 7%|▋ | 26891/371472 [2:10:14<29:46:37, 3.21it/s] 7%|▋ | 26892/371472 [2:10:14<30:37:41, 3.13it/s] 7%|▋ | 26893/371472 [2:10:14<29:07:36, 3.29it/s] 7%|▋ | 26894/371472 [2:10:15<28:02:34, 3.41it/s] 7%|▋ | 26895/371472 [2:10:15<27:18:01, 3.51it/s] 7%|▋ | 26896/371472 [2:10:15<28:27:42, 3.36it/s] 7%|▋ | 26897/371472 [2:10:15<27:55:57, 3.43it/s] 7%|▋ | 26898/371472 [2:10:16<28:05:30, 3.41it/s] 7%|▋ | 26899/371472 [2:10:16<27:55:21, 3.43it/s] 7%|▋ | 26900/371472 [2:10:16<28:15:52, 3.39it/s] {'loss': 4.2725, 'learning_rate': 9.352765627356762e-07, 'epoch': 1.16} + 7%|▋ | 26900/371472 [2:10:16<28:15:52, 3.39it/s] 7%|▋ | 26901/371472 [2:10:17<27:18:20, 3.51it/s] 7%|▋ | 26902/371472 [2:10:17<27:33:33, 3.47it/s] 7%|▋ | 26903/371472 [2:10:17<27:02:58, 3.54it/s] 7%|▋ | 26904/371472 [2:10:17<27:31:09, 3.48it/s] 7%|▋ | 26905/371472 [2:10:18<28:31:47, 3.35it/s] 7%|▋ | 26906/371472 [2:10:18<27:35:04, 3.47it/s] 7%|▋ | 26907/371472 [2:10:18<26:36:11, 3.60it/s] 7%|▋ | 26908/371472 [2:10:19<27:29:54, 3.48it/s] 7%|▋ | 26909/371472 [2:10:19<27:21:09, 3.50it/s] 7%|▋ | 26910/371472 [2:10:19<28:10:11, 3.40it/s] 7%|▋ | 26911/371472 [2:10:19<27:28:57, 3.48it/s] 7%|▋ | 26912/371472 [2:10:20<29:31:41, 3.24it/s] 7%|▋ | 26913/371472 [2:10:20<30:02:54, 3.19it/s] 7%|▋ | 26914/371472 [2:10:20<28:45:38, 3.33it/s] 7%|▋ | 26915/371472 [2:10:21<27:55:10, 3.43it/s] 7%|▋ | 26916/371472 [2:10:21<27:30:26, 3.48it/s] 7%|▋ | 26917/371472 [2:10:21<27:54:29, 3.43it/s] 7%|▋ | 26918/371472 [2:10:22<26:43:22, 3.58it/s] 7%|▋ | 26919/371472 [2:10:22<25:42:03, 3.72it/s] 7%|▋ | 26920/371472 [2:10:22<26:40:35, 3.59it/s] {'loss': 4.4786, 'learning_rate': 9.352280807601973e-07, 'epoch': 1.16} + 7%|▋ | 26920/371472 [2:10:22<26:40:35, 3.59it/s] 7%|▋ | 26921/371472 [2:10:22<26:27:24, 3.62it/s] 7%|▋ | 26922/371472 [2:10:23<29:25:30, 3.25it/s] 7%|▋ | 26923/371472 [2:10:23<28:31:23, 3.36it/s] 7%|▋ | 26924/371472 [2:10:23<28:26:49, 3.36it/s] 7%|▋ | 26925/371472 [2:10:24<32:32:59, 2.94it/s] 7%|▋ | 26926/371472 [2:10:24<30:44:16, 3.11it/s] 7%|▋ | 26927/371472 [2:10:24<29:04:38, 3.29it/s] 7%|▋ | 26928/371472 [2:10:25<27:15:18, 3.51it/s] 7%|▋ | 26929/371472 [2:10:25<26:53:07, 3.56it/s] 7%|▋ | 26930/371472 [2:10:25<26:34:24, 3.60it/s] 7%|▋ | 26931/371472 [2:10:25<26:32:29, 3.61it/s] 7%|▋ | 26932/371472 [2:10:26<26:45:51, 3.58it/s] 7%|▋ | 26933/371472 [2:10:26<26:28:10, 3.62it/s] 7%|▋ | 26934/371472 [2:10:26<27:30:06, 3.48it/s] 7%|▋ | 26935/371472 [2:10:26<27:14:43, 3.51it/s] 7%|▋ | 26936/371472 [2:10:27<26:36:49, 3.60it/s] 7%|▋ | 26937/371472 [2:10:27<26:34:43, 3.60it/s] 7%|▋ | 26938/371472 [2:10:27<26:29:00, 3.61it/s] 7%|▋ | 26939/371472 [2:10:28<25:53:53, 3.70it/s] 7%|▋ | 26940/371472 [2:10:28<25:54:45, 3.69it/s] {'loss': 4.6271, 'learning_rate': 9.351795987847185e-07, 'epoch': 1.16} + 7%|▋ | 26940/371472 [2:10:28<25:54:45, 3.69it/s] 7%|▋ | 26941/371472 [2:10:28<26:44:26, 3.58it/s] 7%|▋ | 26942/371472 [2:10:28<27:18:20, 3.50it/s] 7%|▋ | 26943/371472 [2:10:29<27:06:26, 3.53it/s] 7%|▋ | 26944/371472 [2:10:29<26:22:19, 3.63it/s] 7%|▋ | 26945/371472 [2:10:29<26:56:26, 3.55it/s] 7%|▋ | 26946/371472 [2:10:30<26:50:34, 3.57it/s] 7%|▋ | 26947/371472 [2:10:30<26:56:20, 3.55it/s] 7%|▋ | 26948/371472 [2:10:30<26:48:27, 3.57it/s] 7%|▋ | 26949/371472 [2:10:30<25:46:24, 3.71it/s] 7%|▋ | 26950/371472 [2:10:31<26:03:43, 3.67it/s] 7%|▋ | 26951/371472 [2:10:31<27:43:03, 3.45it/s] 7%|▋ | 26952/371472 [2:10:31<28:00:24, 3.42it/s] 7%|▋ | 26953/371472 [2:10:32<28:43:30, 3.33it/s] 7%|▋ | 26954/371472 [2:10:32<29:20:57, 3.26it/s] 7%|▋ | 26955/371472 [2:10:32<28:29:58, 3.36it/s] 7%|▋ | 26956/371472 [2:10:32<28:27:18, 3.36it/s] 7%|▋ | 26957/371472 [2:10:33<27:47:31, 3.44it/s] 7%|▋ | 26958/371472 [2:10:33<26:56:11, 3.55it/s] 7%|▋ | 26959/371472 [2:10:33<28:00:34, 3.42it/s] 7%|▋ | 26960/371472 [2:10:34<26:55:54, 3.55it/s] {'loss': 4.6945, 'learning_rate': 9.351311168092397e-07, 'epoch': 1.16} + 7%|▋ | 26960/371472 [2:10:34<26:55:54, 3.55it/s] 7%|▋ | 26961/371472 [2:10:34<26:16:16, 3.64it/s] 7%|▋ | 26962/371472 [2:10:34<26:39:34, 3.59it/s] 7%|▋ | 26963/371472 [2:10:34<26:06:11, 3.67it/s] 7%|▋ | 26964/371472 [2:10:35<26:36:04, 3.60it/s] 7%|▋ | 26965/371472 [2:10:35<25:47:21, 3.71it/s] 7%|▋ | 26966/371472 [2:10:35<26:23:05, 3.63it/s] 7%|▋ | 26967/371472 [2:10:35<25:35:38, 3.74it/s] 7%|▋ | 26968/371472 [2:10:36<26:54:40, 3.56it/s] 7%|▋ | 26969/371472 [2:10:36<27:00:34, 3.54it/s] 7%|▋ | 26970/371472 [2:10:36<26:22:09, 3.63it/s] 7%|▋ | 26971/371472 [2:10:37<27:45:48, 3.45it/s] 7%|▋ | 26972/371472 [2:10:37<27:24:23, 3.49it/s] 7%|▋ | 26973/371472 [2:10:37<26:18:12, 3.64it/s] 7%|▋ | 26974/371472 [2:10:37<25:34:47, 3.74it/s] 7%|▋ | 26975/371472 [2:10:38<29:18:52, 3.26it/s] 7%|▋ | 26976/371472 [2:10:38<28:58:35, 3.30it/s] 7%|▋ | 26977/371472 [2:10:38<29:15:11, 3.27it/s] 7%|▋ | 26978/371472 [2:10:39<28:56:30, 3.31it/s] 7%|▋ | 26979/371472 [2:10:39<28:15:31, 3.39it/s] 7%|▋ | 26980/371472 [2:10:39<27:02:40, 3.54it/s] {'loss': 4.5041, 'learning_rate': 9.350826348337606e-07, 'epoch': 1.16} + 7%|▋ | 26980/371472 [2:10:39<27:02:40, 3.54it/s] 7%|▋ | 26981/371472 [2:10:40<29:20:24, 3.26it/s] 7%|▋ | 26982/371472 [2:10:40<28:41:02, 3.34it/s] 7%|▋ | 26983/371472 [2:10:40<28:04:11, 3.41it/s] 7%|▋ | 26984/371472 [2:10:41<30:53:07, 3.10it/s] 7%|▋ | 26985/371472 [2:10:41<30:04:00, 3.18it/s] 7%|▋ | 26986/371472 [2:10:41<32:11:33, 2.97it/s] 7%|▋ | 26987/371472 [2:10:42<30:38:24, 3.12it/s] 7%|▋ | 26988/371472 [2:10:42<29:36:51, 3.23it/s] 7%|▋ | 26989/371472 [2:10:42<30:10:26, 3.17it/s] 7%|▋ | 26990/371472 [2:10:42<29:11:26, 3.28it/s] 7%|▋ | 26991/371472 [2:10:43<28:55:38, 3.31it/s] 7%|▋ | 26992/371472 [2:10:43<27:50:11, 3.44it/s] 7%|▋ | 26993/371472 [2:10:43<27:07:44, 3.53it/s] 7%|▋ | 26994/371472 [2:10:44<26:34:48, 3.60it/s] 7%|▋ | 26995/371472 [2:10:44<27:07:55, 3.53it/s] 7%|▋ | 26996/371472 [2:10:44<28:20:16, 3.38it/s] 7%|▋ | 26997/371472 [2:10:44<28:03:49, 3.41it/s] 7%|▋ | 26998/371472 [2:10:45<26:58:10, 3.55it/s] 7%|▋ | 26999/371472 [2:10:45<28:00:09, 3.42it/s] 7%|▋ | 27000/371472 [2:10:45<30:21:11, 3.15it/s] {'loss': 4.6746, 'learning_rate': 9.350341528582817e-07, 'epoch': 1.16} + 7%|▋ | 27000/371472 [2:10:45<30:21:11, 3.15it/s] 7%|▋ | 27001/371472 [2:10:46<29:23:08, 3.26it/s] 7%|▋ | 27002/371472 [2:10:46<31:33:57, 3.03it/s] 7%|▋ | 27003/371472 [2:10:46<29:47:00, 3.21it/s] 7%|▋ | 27004/371472 [2:10:47<28:52:55, 3.31it/s] 7%|▋ | 27005/371472 [2:10:47<28:35:38, 3.35it/s] 7%|▋ | 27006/371472 [2:10:47<28:34:13, 3.35it/s] 7%|▋ | 27007/371472 [2:10:47<27:09:29, 3.52it/s] 7%|▋ | 27008/371472 [2:10:48<26:30:59, 3.61it/s] 7%|▋ | 27009/371472 [2:10:48<27:13:48, 3.51it/s] 7%|▋ | 27010/371472 [2:10:48<27:11:49, 3.52it/s] 7%|▋ | 27011/371472 [2:10:49<29:19:50, 3.26it/s] 7%|▋ | 27012/371472 [2:10:49<28:53:10, 3.31it/s] 7%|▋ | 27013/371472 [2:10:49<30:12:46, 3.17it/s] 7%|▋ | 27014/371472 [2:10:50<30:42:03, 3.12it/s] 7%|▋ | 27015/371472 [2:10:50<29:49:26, 3.21it/s] 7%|▋ | 27016/371472 [2:10:50<28:51:49, 3.31it/s] 7%|▋ | 27017/371472 [2:10:50<28:11:17, 3.39it/s] 7%|▋ | 27018/371472 [2:10:51<28:04:03, 3.41it/s] 7%|▋ | 27019/371472 [2:10:51<28:08:39, 3.40it/s] 7%|▋ | 27020/371472 [2:10:51<28:12:18, 3.39it/s] {'loss': 4.5183, 'learning_rate': 9.349856708828029e-07, 'epoch': 1.16} + 7%|▋ | 27020/371472 [2:10:51<28:12:18, 3.39it/s] 7%|▋ | 27021/371472 [2:10:52<27:54:56, 3.43it/s] 7%|▋ | 27022/371472 [2:10:52<27:55:46, 3.43it/s] 7%|▋ | 27023/371472 [2:10:52<27:37:43, 3.46it/s] 7%|▋ | 27024/371472 [2:10:52<27:30:51, 3.48it/s] 7%|▋ | 27025/371472 [2:10:53<27:12:31, 3.52it/s] 7%|▋ | 27026/371472 [2:10:53<27:45:13, 3.45it/s] 7%|▋ | 27027/371472 [2:10:53<27:29:31, 3.48it/s] 7%|▋ | 27028/371472 [2:10:54<28:02:29, 3.41it/s] 7%|▋ | 27029/371472 [2:10:54<27:30:14, 3.48it/s] 7%|▋ | 27030/371472 [2:10:54<27:00:07, 3.54it/s] 7%|▋ | 27031/371472 [2:10:55<29:02:45, 3.29it/s] 7%|▋ | 27032/371472 [2:10:55<29:29:28, 3.24it/s] 7%|▋ | 27033/371472 [2:10:55<29:20:45, 3.26it/s] 7%|▋ | 27034/371472 [2:10:55<28:48:54, 3.32it/s] 7%|▋ | 27035/371472 [2:10:56<30:58:14, 3.09it/s] 7%|▋ | 27036/371472 [2:10:56<29:06:40, 3.29it/s] 7%|▋ | 27037/371472 [2:10:56<28:49:42, 3.32it/s] 7%|▋ | 27038/371472 [2:10:57<28:29:14, 3.36it/s] 7%|▋ | 27039/371472 [2:10:57<28:09:59, 3.40it/s] 7%|▋ | 27040/371472 [2:10:57<28:24:46, 3.37it/s] {'loss': 4.5165, 'learning_rate': 9.34937188907324e-07, 'epoch': 1.16} + 7%|▋ | 27040/371472 [2:10:57<28:24:46, 3.37it/s] 7%|▋ | 27041/371472 [2:10:58<27:37:26, 3.46it/s] 7%|▋ | 27042/371472 [2:10:58<27:05:57, 3.53it/s] 7%|▋ | 27043/371472 [2:10:58<26:43:35, 3.58it/s] 7%|▋ | 27044/371472 [2:10:58<26:05:34, 3.67it/s] 7%|▋ | 27045/371472 [2:10:59<26:21:09, 3.63it/s] 7%|▋ | 27046/371472 [2:10:59<26:00:55, 3.68it/s] 7%|▋ | 27047/371472 [2:10:59<26:18:58, 3.64it/s] 7%|▋ | 27048/371472 [2:10:59<27:39:26, 3.46it/s] 7%|▋ | 27049/371472 [2:11:00<27:12:23, 3.52it/s] 7%|▋ | 27050/371472 [2:11:00<27:58:33, 3.42it/s] 7%|▋ | 27051/371472 [2:11:00<27:32:15, 3.47it/s] 7%|▋ | 27052/371472 [2:11:01<28:07:59, 3.40it/s] 7%|▋ | 27053/371472 [2:11:01<27:17:20, 3.51it/s] 7%|▋ | 27054/371472 [2:11:01<27:33:12, 3.47it/s] 7%|▋ | 27055/371472 [2:11:02<28:31:37, 3.35it/s] 7%|▋ | 27056/371472 [2:11:02<27:21:38, 3.50it/s] 7%|▋ | 27057/371472 [2:11:02<27:16:47, 3.51it/s] 7%|▋ | 27058/371472 [2:11:02<26:50:39, 3.56it/s] 7%|▋ | 27059/371472 [2:11:03<27:00:46, 3.54it/s] 7%|▋ | 27060/371472 [2:11:03<28:32:23, 3.35it/s] {'loss': 4.3108, 'learning_rate': 9.348887069318451e-07, 'epoch': 1.17} + 7%|▋ | 27060/371472 [2:11:03<28:32:23, 3.35it/s] 7%|▋ | 27061/371472 [2:11:03<28:12:10, 3.39it/s] 7%|▋ | 27062/371472 [2:11:04<27:28:25, 3.48it/s] 7%|▋ | 27063/371472 [2:11:04<28:49:15, 3.32it/s] 7%|▋ | 27064/371472 [2:11:04<28:24:18, 3.37it/s] 7%|▋ | 27065/371472 [2:11:04<27:24:31, 3.49it/s] 7%|▋ | 27066/371472 [2:11:05<28:29:28, 3.36it/s] 7%|▋ | 27067/371472 [2:11:05<29:55:59, 3.20it/s] 7%|▋ | 27068/371472 [2:11:05<28:38:48, 3.34it/s] 7%|▋ | 27069/371472 [2:11:06<29:22:21, 3.26it/s] 7%|▋ | 27070/371472 [2:11:06<29:42:33, 3.22it/s] 7%|▋ | 27071/371472 [2:11:06<28:29:37, 3.36it/s] 7%|▋ | 27072/371472 [2:11:07<28:33:23, 3.35it/s] 7%|▋ | 27073/371472 [2:11:07<28:15:03, 3.39it/s] 7%|▋ | 27074/371472 [2:11:07<27:22:07, 3.50it/s] 7%|▋ | 27075/371472 [2:11:07<27:00:45, 3.54it/s] 7%|▋ | 27076/371472 [2:11:08<26:17:56, 3.64it/s] 7%|▋ | 27077/371472 [2:11:08<26:55:29, 3.55it/s] 7%|▋ | 27078/371472 [2:11:08<26:33:19, 3.60it/s] 7%|▋ | 27079/371472 [2:11:08<26:15:33, 3.64it/s] 7%|▋ | 27080/371472 [2:11:09<26:53:48, 3.56it/s] {'loss': 4.321, 'learning_rate': 9.348402249563662e-07, 'epoch': 1.17} + 7%|▋ | 27080/371472 [2:11:09<26:53:48, 3.56it/s] 7%|▋ | 27081/371472 [2:11:09<26:40:54, 3.59it/s] 7%|▋ | 27082/371472 [2:11:09<25:40:14, 3.73it/s] 7%|▋ | 27083/371472 [2:11:10<25:59:59, 3.68it/s] 7%|▋ | 27084/371472 [2:11:10<27:08:27, 3.52it/s] 7%|▋ | 27085/371472 [2:11:10<27:55:45, 3.43it/s] 7%|▋ | 27086/371472 [2:11:10<28:01:31, 3.41it/s] 7%|▋ | 27087/371472 [2:11:11<27:32:49, 3.47it/s] 7%|▋ | 27088/371472 [2:11:11<27:46:11, 3.44it/s] 7%|▋ | 27089/371472 [2:11:11<28:18:58, 3.38it/s] 7%|▋ | 27090/371472 [2:11:12<27:49:52, 3.44it/s] 7%|▋ | 27091/371472 [2:11:12<27:38:09, 3.46it/s] 7%|▋ | 27092/371472 [2:11:12<27:19:25, 3.50it/s] 7%|▋ | 27093/371472 [2:11:12<26:07:26, 3.66it/s] 7%|▋ | 27094/371472 [2:11:13<27:24:58, 3.49it/s] 7%|▋ | 27095/371472 [2:11:13<27:51:22, 3.43it/s] 7%|▋ | 27096/371472 [2:11:13<28:43:04, 3.33it/s] 7%|▋ | 27097/371472 [2:11:14<28:18:43, 3.38it/s] 7%|▋ | 27098/371472 [2:11:14<27:34:53, 3.47it/s] 7%|▋ | 27099/371472 [2:11:14<26:41:59, 3.58it/s] 7%|▋ | 27100/371472 [2:11:15<28:02:29, 3.41it/s] {'loss': 4.3061, 'learning_rate': 9.347917429808873e-07, 'epoch': 1.17} + 7%|▋ | 27100/371472 [2:11:15<28:02:29, 3.41it/s] 7%|▋ | 27101/371472 [2:11:15<28:26:04, 3.36it/s] 7%|▋ | 27102/371472 [2:11:15<27:55:41, 3.43it/s] 7%|▋ | 27103/371472 [2:11:15<27:33:01, 3.47it/s] 7%|▋ | 27104/371472 [2:11:16<27:58:32, 3.42it/s] 7%|▋ | 27105/371472 [2:11:16<28:45:33, 3.33it/s] 7%|▋ | 27106/371472 [2:11:16<27:32:16, 3.47it/s] 7%|▋ | 27107/371472 [2:11:17<28:51:20, 3.32it/s] 7%|▋ | 27108/371472 [2:11:17<28:11:05, 3.39it/s] 7%|▋ | 27109/371472 [2:11:17<28:06:33, 3.40it/s] 7%|▋ | 27110/371472 [2:11:17<27:14:23, 3.51it/s] 7%|▋ | 27111/371472 [2:11:18<27:45:34, 3.45it/s] 7%|▋ | 27112/371472 [2:11:18<27:30:02, 3.48it/s] 7%|▋ | 27113/371472 [2:11:18<27:09:25, 3.52it/s] 7%|▋ | 27114/371472 [2:11:19<27:35:57, 3.47it/s] 7%|▋ | 27115/371472 [2:11:19<27:12:23, 3.52it/s] 7%|▋ | 27116/371472 [2:11:19<28:12:45, 3.39it/s] 7%|▋ | 27117/371472 [2:11:19<28:03:52, 3.41it/s] 7%|▋ | 27118/371472 [2:11:20<27:50:41, 3.44it/s] 7%|▋ | 27119/371472 [2:11:20<27:18:16, 3.50it/s] 7%|▋ | 27120/371472 [2:11:20<26:51:27, 3.56it/s] {'loss': 4.617, 'learning_rate': 9.347432610054083e-07, 'epoch': 1.17} + 7%|▋ | 27120/371472 [2:11:20<26:51:27, 3.56it/s] 7%|▋ | 27121/371472 [2:11:21<26:40:35, 3.59it/s] 7%|▋ | 27122/371472 [2:11:21<27:49:12, 3.44it/s] 7%|▋ | 27123/371472 [2:11:21<26:36:19, 3.60it/s] 7%|▋ | 27124/371472 [2:11:21<27:11:54, 3.52it/s] 7%|▋ | 27125/371472 [2:11:22<27:25:50, 3.49it/s] 7%|▋ | 27126/371472 [2:11:22<26:38:16, 3.59it/s] 7%|▋ | 27127/371472 [2:11:22<26:37:48, 3.59it/s] 7%|▋ | 27128/371472 [2:11:23<26:29:18, 3.61it/s] 7%|▋ | 27129/371472 [2:11:23<26:57:45, 3.55it/s] 7%|▋ | 27130/371472 [2:11:23<26:34:36, 3.60it/s] 7%|▋ | 27131/371472 [2:11:23<28:10:25, 3.40it/s] 7%|▋ | 27132/371472 [2:11:24<27:40:50, 3.46it/s] 7%|▋ | 27133/371472 [2:11:24<27:27:46, 3.48it/s] 7%|▋ | 27134/371472 [2:11:24<29:04:14, 3.29it/s] 7%|▋ | 27135/371472 [2:11:25<28:17:53, 3.38it/s] 7%|▋ | 27136/371472 [2:11:25<30:04:13, 3.18it/s] 7%|▋ | 27137/371472 [2:11:25<28:38:40, 3.34it/s] 7%|▋ | 27138/371472 [2:11:26<30:23:53, 3.15it/s] 7%|▋ | 27139/371472 [2:11:26<29:17:24, 3.27it/s] 7%|▋ | 27140/371472 [2:11:26<28:47:41, 3.32it/s] {'loss': 4.5026, 'learning_rate': 9.346947790299295e-07, 'epoch': 1.17} + 7%|▋ | 27140/371472 [2:11:26<28:47:41, 3.32it/s] 7%|▋ | 27141/371472 [2:11:26<28:40:19, 3.34it/s] 7%|▋ | 27142/371472 [2:11:27<28:35:41, 3.34it/s] 7%|▋ | 27143/371472 [2:11:27<27:42:36, 3.45it/s] 7%|▋ | 27144/371472 [2:11:27<26:46:15, 3.57it/s] 7%|▋ | 27145/371472 [2:11:28<28:38:03, 3.34it/s] 7%|▋ | 27146/371472 [2:11:28<27:54:42, 3.43it/s] 7%|▋ | 27147/371472 [2:11:28<28:00:01, 3.42it/s] 7%|▋ | 27148/371472 [2:11:28<27:21:48, 3.50it/s] 7%|▋ | 27149/371472 [2:11:29<27:15:13, 3.51it/s] 7%|▋ | 27150/371472 [2:11:29<26:56:18, 3.55it/s] 7%|▋ | 27151/371472 [2:11:29<26:51:27, 3.56it/s] 7%|▋ | 27152/371472 [2:11:30<26:27:41, 3.61it/s] 7%|▋ | 27153/371472 [2:11:30<26:17:15, 3.64it/s] 7%|▋ | 27154/371472 [2:11:30<27:58:28, 3.42it/s] 7%|▋ | 27155/371472 [2:11:30<27:00:15, 3.54it/s] 7%|▋ | 27156/371472 [2:11:31<26:32:46, 3.60it/s] 7%|▋ | 27157/371472 [2:11:31<26:41:54, 3.58it/s] 7%|�� | 27158/371472 [2:11:31<28:15:58, 3.38it/s] 7%|▋ | 27159/371472 [2:11:32<28:11:35, 3.39it/s] 7%|▋ | 27160/371472 [2:11:32<28:47:13, 3.32it/s] {'loss': 4.5797, 'learning_rate': 9.346462970544506e-07, 'epoch': 1.17} + 7%|▋ | 27160/371472 [2:11:32<28:47:13, 3.32it/s] 7%|▋ | 27161/371472 [2:11:32<28:51:54, 3.31it/s] 7%|▋ | 27162/371472 [2:11:33<28:17:14, 3.38it/s] 7%|▋ | 27163/371472 [2:11:33<28:07:27, 3.40it/s] 7%|▋ | 27164/371472 [2:11:33<29:15:04, 3.27it/s] 7%|▋ | 27165/371472 [2:11:33<29:53:09, 3.20it/s] 7%|▋ | 27166/371472 [2:11:34<30:18:17, 3.16it/s] 7%|▋ | 27167/371472 [2:11:34<29:00:51, 3.30it/s] 7%|▋ | 27168/371472 [2:11:34<28:44:31, 3.33it/s] 7%|▋ | 27169/371472 [2:11:35<28:27:22, 3.36it/s] 7%|▋ | 27170/371472 [2:11:35<28:14:09, 3.39it/s] 7%|▋ | 27171/371472 [2:11:35<27:27:34, 3.48it/s] 7%|▋ | 27172/371472 [2:11:36<27:15:52, 3.51it/s] 7%|▋ | 27173/371472 [2:11:36<28:53:29, 3.31it/s] 7%|▋ | 27174/371472 [2:11:36<28:17:08, 3.38it/s] 7%|▋ | 27175/371472 [2:11:36<27:00:52, 3.54it/s] 7%|▋ | 27176/371472 [2:11:37<26:43:17, 3.58it/s] 7%|▋ | 27177/371472 [2:11:37<28:23:52, 3.37it/s] 7%|▋ | 27178/371472 [2:11:37<28:56:05, 3.31it/s] 7%|▋ | 27179/371472 [2:11:38<27:28:43, 3.48it/s] 7%|▋ | 27180/371472 [2:11:38<26:51:08, 3.56it/s] {'loss': 4.5256, 'learning_rate': 9.345978150789717e-07, 'epoch': 1.17} + 7%|▋ | 27180/371472 [2:11:38<26:51:08, 3.56it/s] 7%|▋ | 27181/371472 [2:11:38<26:33:51, 3.60it/s] 7%|▋ | 27182/371472 [2:11:38<28:31:34, 3.35it/s] 7%|▋ | 27183/371472 [2:11:39<27:47:37, 3.44it/s] 7%|▋ | 27184/371472 [2:11:39<27:18:29, 3.50it/s] 7%|▋ | 27185/371472 [2:11:39<28:16:32, 3.38it/s] 7%|▋ | 27186/371472 [2:11:40<28:28:56, 3.36it/s] 7%|▋ | 27187/371472 [2:11:40<28:37:44, 3.34it/s] 7%|▋ | 27188/371472 [2:11:40<27:56:05, 3.42it/s] 7%|▋ | 27189/371472 [2:11:40<26:59:03, 3.54it/s] 7%|▋ | 27190/371472 [2:11:41<26:34:57, 3.60it/s] 7%|▋ | 27191/371472 [2:11:41<25:48:32, 3.71it/s] 7%|▋ | 27192/371472 [2:11:41<26:42:18, 3.58it/s] 7%|▋ | 27193/371472 [2:11:42<27:14:14, 3.51it/s] 7%|▋ | 27194/371472 [2:11:42<27:38:38, 3.46it/s] 7%|▋ | 27195/371472 [2:11:42<29:15:33, 3.27it/s] 7%|▋ | 27196/371472 [2:11:42<27:47:24, 3.44it/s] 7%|▋ | 27197/371472 [2:11:43<28:32:06, 3.35it/s] 7%|▋ | 27198/371472 [2:11:43<29:45:56, 3.21it/s] 7%|▋ | 27199/371472 [2:11:43<29:01:01, 3.30it/s] 7%|▋ | 27200/371472 [2:11:44<29:01:12, 3.30it/s] {'loss': 4.4388, 'learning_rate': 9.345493331034928e-07, 'epoch': 1.17} + 7%|▋ | 27200/371472 [2:11:44<29:01:12, 3.30it/s] 7%|▋ | 27201/371472 [2:11:44<28:57:10, 3.30it/s] 7%|▋ | 27202/371472 [2:11:44<28:33:44, 3.35it/s] 7%|▋ | 27203/371472 [2:11:45<28:24:23, 3.37it/s] 7%|▋ | 27204/371472 [2:11:45<28:03:41, 3.41it/s] 7%|▋ | 27205/371472 [2:11:45<27:43:41, 3.45it/s] 7%|▋ | 27206/371472 [2:11:45<27:01:19, 3.54it/s] 7%|▋ | 27207/371472 [2:11:46<26:33:01, 3.60it/s] 7%|▋ | 27208/371472 [2:11:46<26:20:28, 3.63it/s] 7%|▋ | 27209/371472 [2:11:46<27:27:03, 3.48it/s] 7%|▋ | 27210/371472 [2:11:47<27:30:16, 3.48it/s] 7%|▋ | 27211/371472 [2:11:47<27:26:59, 3.48it/s] 7%|▋ | 27212/371472 [2:11:47<28:33:33, 3.35it/s] 7%|▋ | 27213/371472 [2:11:47<28:37:09, 3.34it/s] 7%|▋ | 27214/371472 [2:11:48<27:35:35, 3.47it/s] 7%|▋ | 27215/371472 [2:11:48<26:26:15, 3.62it/s] 7%|▋ | 27216/371472 [2:11:48<27:13:26, 3.51it/s] 7%|▋ | 27217/371472 [2:11:49<26:25:13, 3.62it/s] 7%|▋ | 27218/371472 [2:11:49<26:14:47, 3.64it/s] 7%|▋ | 27219/371472 [2:11:49<26:38:07, 3.59it/s] 7%|▋ | 27220/371472 [2:11:49<28:09:22, 3.40it/s] {'loss': 4.523, 'learning_rate': 9.345008511280139e-07, 'epoch': 1.17} + 7%|▋ | 27220/371472 [2:11:49<28:09:22, 3.40it/s] 7%|▋ | 27221/371472 [2:11:50<28:37:16, 3.34it/s] 7%|▋ | 27222/371472 [2:11:50<27:51:31, 3.43it/s] 7%|▋ | 27223/371472 [2:11:50<26:40:23, 3.59it/s] 7%|▋ | 27224/371472 [2:11:51<28:17:17, 3.38it/s] 7%|▋ | 27225/371472 [2:11:51<27:09:28, 3.52it/s] 7%|▋ | 27226/371472 [2:11:51<26:31:36, 3.60it/s] 7%|▋ | 27227/371472 [2:11:51<28:41:30, 3.33it/s] 7%|▋ | 27228/371472 [2:11:52<27:23:17, 3.49it/s] 7%|▋ | 27229/371472 [2:11:52<29:13:39, 3.27it/s] 7%|▋ | 27230/371472 [2:11:52<28:43:16, 3.33it/s] 7%|▋ | 27231/371472 [2:11:53<31:54:43, 3.00it/s] 7%|▋ | 27232/371472 [2:11:53<30:51:00, 3.10it/s] 7%|▋ | 27233/371472 [2:11:53<31:51:17, 3.00it/s] 7%|▋ | 27234/371472 [2:11:54<32:08:00, 2.98it/s] 7%|▋ | 27235/371472 [2:11:54<31:44:31, 3.01it/s] 7%|▋ | 27236/371472 [2:11:54<30:46:10, 3.11it/s] 7%|▋ | 27237/371472 [2:11:55<29:49:34, 3.21it/s] 7%|▋ | 27238/371472 [2:11:55<28:39:01, 3.34it/s] 7%|▋ | 27239/371472 [2:11:55<29:31:34, 3.24it/s] 7%|▋ | 27240/371472 [2:11:56<30:18:27, 3.15it/s] {'loss': 4.4003, 'learning_rate': 9.34452369152535e-07, 'epoch': 1.17} + 7%|▋ | 27240/371472 [2:11:56<30:18:27, 3.15it/s] 7%|▋ | 27241/371472 [2:11:56<29:22:55, 3.25it/s] 7%|▋ | 27242/371472 [2:11:56<28:47:21, 3.32it/s] 7%|▋ | 27243/371472 [2:11:56<27:29:42, 3.48it/s] 7%|▋ | 27244/371472 [2:11:57<27:49:50, 3.44it/s] 7%|▋ | 27245/371472 [2:11:57<26:44:15, 3.58it/s] 7%|▋ | 27246/371472 [2:11:57<30:21:07, 3.15it/s] 7%|▋ | 27247/371472 [2:11:58<30:02:14, 3.18it/s] 7%|▋ | 27248/371472 [2:11:58<28:24:54, 3.37it/s] 7%|▋ | 27249/371472 [2:11:58<27:52:37, 3.43it/s] 7%|▋ | 27250/371472 [2:11:58<26:21:12, 3.63it/s] 7%|▋ | 27251/371472 [2:11:59<26:49:03, 3.57it/s] 7%|▋ | 27252/371472 [2:11:59<26:18:35, 3.63it/s] 7%|▋ | 27253/371472 [2:11:59<27:40:27, 3.46it/s] 7%|▋ | 27254/371472 [2:12:00<27:05:01, 3.53it/s] 7%|▋ | 27255/371472 [2:12:00<30:43:58, 3.11it/s] 7%|▋ | 27256/371472 [2:12:00<29:44:26, 3.21it/s] 7%|▋ | 27257/371472 [2:12:01<32:04:39, 2.98it/s] 7%|▋ | 27258/371472 [2:12:01<29:46:24, 3.21it/s] 7%|▋ | 27259/371472 [2:12:01<29:57:54, 3.19it/s] 7%|▋ | 27260/371472 [2:12:02<28:10:26, 3.39it/s] {'loss': 4.1231, 'learning_rate': 9.344038871770561e-07, 'epoch': 1.17} + 7%|▋ | 27260/371472 [2:12:02<28:10:26, 3.39it/s] 7%|▋ | 27261/371472 [2:12:02<27:43:38, 3.45it/s] 7%|▋ | 27262/371472 [2:12:02<27:37:04, 3.46it/s] 7%|▋ | 27263/371472 [2:12:02<26:45:12, 3.57it/s] 7%|▋ | 27264/371472 [2:12:03<26:04:02, 3.67it/s] 7%|▋ | 27265/371472 [2:12:03<26:34:53, 3.60it/s] 7%|▋ | 27266/371472 [2:12:03<26:44:52, 3.57it/s] 7%|▋ | 27267/371472 [2:12:03<26:25:14, 3.62it/s] 7%|▋ | 27268/371472 [2:12:04<26:11:34, 3.65it/s] 7%|▋ | 27269/371472 [2:12:04<26:25:15, 3.62it/s] 7%|▋ | 27270/371472 [2:12:05<33:47:09, 2.83it/s] 7%|▋ | 27271/371472 [2:12:05<31:36:36, 3.02it/s] 7%|▋ | 27272/371472 [2:12:05<29:55:07, 3.20it/s] 7%|▋ | 27273/371472 [2:12:06<34:16:48, 2.79it/s] 7%|▋ | 27274/371472 [2:12:06<32:18:14, 2.96it/s] 7%|▋ | 27275/371472 [2:12:06<31:16:50, 3.06it/s] 7%|▋ | 27276/371472 [2:12:06<29:22:46, 3.25it/s] 7%|▋ | 27277/371472 [2:12:07<28:55:49, 3.30it/s] 7%|▋ | 27278/371472 [2:12:07<28:48:53, 3.32it/s] 7%|▋ | 27279/371472 [2:12:07<30:06:47, 3.17it/s] 7%|▋ | 27280/371472 [2:12:08<28:53:15, 3.31it/s] {'loss': 4.4005, 'learning_rate': 9.343554052015772e-07, 'epoch': 1.18} + 7%|▋ | 27280/371472 [2:12:08<28:53:15, 3.31it/s] 7%|▋ | 27281/371472 [2:12:08<28:16:00, 3.38it/s] 7%|▋ | 27282/371472 [2:12:08<27:28:28, 3.48it/s] 7%|▋ | 27283/371472 [2:12:08<27:37:19, 3.46it/s] 7%|▋ | 27284/371472 [2:12:09<26:45:20, 3.57it/s] 7%|▋ | 27285/371472 [2:12:09<26:32:26, 3.60it/s] 7%|▋ | 27286/371472 [2:12:09<26:26:19, 3.62it/s] 7%|▋ | 27287/371472 [2:12:10<26:09:12, 3.66it/s] 7%|▋ | 27288/371472 [2:12:10<26:53:01, 3.56it/s] 7%|▋ | 27289/371472 [2:12:10<27:43:20, 3.45it/s] 7%|▋ | 27290/371472 [2:12:10<27:47:54, 3.44it/s] 7%|▋ | 27291/371472 [2:12:11<26:45:06, 3.57it/s] 7%|▋ | 27292/371472 [2:12:11<28:04:38, 3.41it/s] 7%|▋ | 27293/371472 [2:12:11<27:51:06, 3.43it/s] 7%|▋ | 27294/371472 [2:12:12<28:04:40, 3.40it/s] 7%|▋ | 27295/371472 [2:12:12<28:02:28, 3.41it/s] 7%|▋ | 27296/371472 [2:12:12<28:37:58, 3.34it/s] 7%|▋ | 27297/371472 [2:12:13<29:35:53, 3.23it/s] 7%|▋ | 27298/371472 [2:12:13<30:28:03, 3.14it/s] 7%|▋ | 27299/371472 [2:12:13<30:07:58, 3.17it/s] 7%|▋ | 27300/371472 [2:12:14<29:41:43, 3.22it/s] {'loss': 4.5794, 'learning_rate': 9.343069232260983e-07, 'epoch': 1.18} + 7%|▋ | 27300/371472 [2:12:14<29:41:43, 3.22it/s] 7%|▋ | 27301/371472 [2:12:14<29:45:01, 3.21it/s] 7%|▋ | 27302/371472 [2:12:14<29:41:47, 3.22it/s] 7%|▋ | 27303/371472 [2:12:14<28:12:08, 3.39it/s] 7%|▋ | 27304/371472 [2:12:15<28:03:59, 3.41it/s] 7%|▋ | 27305/371472 [2:12:15<27:39:17, 3.46it/s] 7%|▋ | 27306/371472 [2:12:15<27:15:59, 3.51it/s] 7%|▋ | 27307/371472 [2:12:15<26:28:44, 3.61it/s] 7%|▋ | 27308/371472 [2:12:16<28:05:34, 3.40it/s] 7%|▋ | 27309/371472 [2:12:16<28:57:51, 3.30it/s] 7%|▋ | 27310/371472 [2:12:16<27:59:02, 3.42it/s] 7%|▋ | 27311/371472 [2:12:17<28:58:40, 3.30it/s] 7%|▋ | 27312/371472 [2:12:17<27:24:58, 3.49it/s] 7%|▋ | 27313/371472 [2:12:17<26:45:14, 3.57it/s] 7%|▋ | 27314/371472 [2:12:18<26:18:00, 3.63it/s] 7%|▋ | 27315/371472 [2:12:18<25:58:13, 3.68it/s] 7%|▋ | 27316/371472 [2:12:18<26:22:52, 3.62it/s] 7%|▋ | 27317/371472 [2:12:18<26:07:36, 3.66it/s] 7%|▋ | 27318/371472 [2:12:19<27:04:18, 3.53it/s] 7%|▋ | 27319/371472 [2:12:19<26:26:00, 3.62it/s] 7%|▋ | 27320/371472 [2:12:19<26:59:05, 3.54it/s] {'loss': 4.4316, 'learning_rate': 9.342584412506195e-07, 'epoch': 1.18} + 7%|▋ | 27320/371472 [2:12:19<26:59:05, 3.54it/s] 7%|▋ | 27321/371472 [2:12:19<26:36:01, 3.59it/s] 7%|▋ | 27322/371472 [2:12:20<29:20:34, 3.26it/s] 7%|▋ | 27323/371472 [2:12:20<29:19:25, 3.26it/s] 7%|▋ | 27324/371472 [2:12:20<28:42:40, 3.33it/s] 7%|▋ | 27325/371472 [2:12:21<27:32:54, 3.47it/s] 7%|▋ | 27326/371472 [2:12:21<27:19:25, 3.50it/s] 7%|▋ | 27327/371472 [2:12:21<26:42:58, 3.58it/s] 7%|▋ | 27328/371472 [2:12:22<26:39:53, 3.59it/s] 7%|▋ | 27329/371472 [2:12:22<26:37:55, 3.59it/s] 7%|▋ | 27330/371472 [2:12:22<27:34:52, 3.47it/s] 7%|▋ | 27331/371472 [2:12:22<26:59:17, 3.54it/s] 7%|▋ | 27332/371472 [2:12:23<26:34:52, 3.60it/s] 7%|▋ | 27333/371472 [2:12:23<27:38:15, 3.46it/s] 7%|▋ | 27334/371472 [2:12:23<27:02:03, 3.54it/s] 7%|▋ | 27335/371472 [2:12:24<27:30:42, 3.47it/s] 7%|▋ | 27336/371472 [2:12:24<27:32:38, 3.47it/s] 7%|▋ | 27337/371472 [2:12:24<27:44:14, 3.45it/s] 7%|▋ | 27338/371472 [2:12:24<27:18:55, 3.50it/s] 7%|▋ | 27339/371472 [2:12:25<28:56:00, 3.30it/s] 7%|▋ | 27340/371472 [2:12:25<28:40:35, 3.33it/s] {'loss': 4.5104, 'learning_rate': 9.342099592751406e-07, 'epoch': 1.18} + 7%|▋ | 27340/371472 [2:12:25<28:40:35, 3.33it/s] 7%|▋ | 27341/371472 [2:12:25<28:24:45, 3.36it/s] 7%|▋ | 27342/371472 [2:12:26<27:22:31, 3.49it/s] 7%|▋ | 27343/371472 [2:12:26<27:42:28, 3.45it/s] 7%|▋ | 27344/371472 [2:12:26<26:42:45, 3.58it/s] 7%|▋ | 27345/371472 [2:12:26<26:16:33, 3.64it/s] 7%|▋ | 27346/371472 [2:12:27<25:29:41, 3.75it/s] 7%|▋ | 27347/371472 [2:12:27<26:00:15, 3.68it/s] 7%|▋ | 27348/371472 [2:12:27<25:40:28, 3.72it/s] 7%|▋ | 27349/371472 [2:12:27<26:42:20, 3.58it/s] 7%|▋ | 27350/371472 [2:12:28<28:34:36, 3.35it/s] 7%|▋ | 27351/371472 [2:12:28<29:14:03, 3.27it/s] 7%|▋ | 27352/371472 [2:12:28<30:15:27, 3.16it/s] 7%|▋ | 27353/371472 [2:12:29<28:26:33, 3.36it/s] 7%|▋ | 27354/371472 [2:12:29<27:42:32, 3.45it/s] 7%|▋ | 27355/371472 [2:12:29<27:22:25, 3.49it/s] 7%|▋ | 27356/371472 [2:12:30<30:25:49, 3.14it/s] 7%|▋ | 27357/371472 [2:12:30<29:53:25, 3.20it/s] 7%|▋ | 27358/371472 [2:12:30<29:32:05, 3.24it/s] 7%|▋ | 27359/371472 [2:12:31<28:53:31, 3.31it/s] 7%|▋ | 27360/371472 [2:12:31<30:02:04, 3.18it/s] {'loss': 4.6388, 'learning_rate': 9.341614772996617e-07, 'epoch': 1.18} + 7%|▋ | 27360/371472 [2:12:31<30:02:04, 3.18it/s] 7%|▋ | 27361/371472 [2:12:31<29:53:30, 3.20it/s] 7%|▋ | 27362/371472 [2:12:32<29:43:48, 3.22it/s] 7%|▋ | 27363/371472 [2:12:32<28:15:02, 3.38it/s] 7%|▋ | 27364/371472 [2:12:32<27:35:07, 3.47it/s] 7%|▋ | 27365/371472 [2:12:32<27:24:47, 3.49it/s] 7%|▋ | 27366/371472 [2:12:33<26:10:14, 3.65it/s] 7%|▋ | 27367/371472 [2:12:33<26:45:23, 3.57it/s] 7%|▋ | 27368/371472 [2:12:33<26:54:01, 3.55it/s] 7%|▋ | 27369/371472 [2:12:33<27:57:04, 3.42it/s] 7%|▋ | 27370/371472 [2:12:34<27:38:59, 3.46it/s] 7%|�� | 27371/371472 [2:12:34<27:03:36, 3.53it/s] 7%|▋ | 27372/371472 [2:12:34<30:21:35, 3.15it/s] 7%|▋ | 27373/371472 [2:12:35<29:49:41, 3.20it/s] 7%|▋ | 27374/371472 [2:12:35<29:51:03, 3.20it/s] 7%|▋ | 27375/371472 [2:12:35<29:46:02, 3.21it/s] 7%|▋ | 27376/371472 [2:12:36<28:30:09, 3.35it/s] 7%|▋ | 27377/371472 [2:12:36<28:32:28, 3.35it/s] 7%|▋ | 27378/371472 [2:12:36<31:08:53, 3.07it/s] 7%|▋ | 27379/371472 [2:12:37<29:29:34, 3.24it/s] 7%|▋ | 27380/371472 [2:12:37<28:37:07, 3.34it/s] {'loss': 4.5311, 'learning_rate': 9.341129953241827e-07, 'epoch': 1.18} + 7%|▋ | 27380/371472 [2:12:37<28:37:07, 3.34it/s] 7%|▋ | 27381/371472 [2:12:37<27:41:28, 3.45it/s] 7%|▋ | 27382/371472 [2:12:37<29:00:31, 3.29it/s] 7%|▋ | 27383/371472 [2:12:38<30:33:45, 3.13it/s] 7%|▋ | 27384/371472 [2:12:38<29:41:48, 3.22it/s] 7%|▋ | 27385/371472 [2:12:38<29:58:23, 3.19it/s] 7%|▋ | 27386/371472 [2:12:39<28:47:45, 3.32it/s] 7%|▋ | 27387/371472 [2:12:39<28:16:11, 3.38it/s] 7%|▋ | 27388/371472 [2:12:39<27:40:21, 3.45it/s] 7%|▋ | 27389/371472 [2:12:40<27:25:57, 3.48it/s] 7%|▋ | 27390/371472 [2:12:40<27:08:36, 3.52it/s] 7%|▋ | 27391/371472 [2:12:40<28:46:11, 3.32it/s] 7%|▋ | 27392/371472 [2:12:40<27:15:17, 3.51it/s] 7%|▋ | 27393/371472 [2:12:41<27:54:49, 3.42it/s] 7%|▋ | 27394/371472 [2:12:41<28:02:32, 3.41it/s] 7%|▋ | 27395/371472 [2:12:41<27:41:30, 3.45it/s] 7%|▋ | 27396/371472 [2:12:42<29:32:43, 3.23it/s] 7%|▋ | 27397/371472 [2:12:42<28:11:49, 3.39it/s] 7%|▋ | 27398/371472 [2:12:42<27:29:28, 3.48it/s] 7%|▋ | 27399/371472 [2:12:42<26:56:47, 3.55it/s] 7%|▋ | 27400/371472 [2:12:43<26:33:55, 3.60it/s] {'loss': 4.2994, 'learning_rate': 9.340645133487039e-07, 'epoch': 1.18} + 7%|▋ | 27400/371472 [2:12:43<26:33:55, 3.60it/s] 7%|▋ | 27401/371472 [2:12:43<28:36:56, 3.34it/s] 7%|▋ | 27402/371472 [2:12:43<29:10:40, 3.28it/s] 7%|▋ | 27403/371472 [2:12:44<28:39:02, 3.34it/s] 7%|▋ | 27404/371472 [2:12:44<28:11:41, 3.39it/s] 7%|▋ | 27405/371472 [2:12:44<28:46:50, 3.32it/s] 7%|▋ | 27406/371472 [2:12:45<28:30:59, 3.35it/s] 7%|▋ | 27407/371472 [2:12:45<27:40:27, 3.45it/s] 7%|▋ | 27408/371472 [2:12:45<27:50:24, 3.43it/s] 7%|▋ | 27409/371472 [2:12:45<29:20:56, 3.26it/s] 7%|▋ | 27410/371472 [2:12:46<27:49:39, 3.43it/s] 7%|▋ | 27411/371472 [2:12:46<27:16:40, 3.50it/s] 7%|▋ | 27412/371472 [2:12:46<26:47:37, 3.57it/s] 7%|▋ | 27413/371472 [2:12:47<27:46:34, 3.44it/s] 7%|▋ | 27414/371472 [2:12:47<27:31:03, 3.47it/s] 7%|▋ | 27415/371472 [2:12:47<26:51:58, 3.56it/s] 7%|▋ | 27416/371472 [2:12:47<27:39:49, 3.45it/s] 7%|▋ | 27417/371472 [2:12:48<27:33:33, 3.47it/s] 7%|▋ | 27418/371472 [2:12:48<27:07:50, 3.52it/s] 7%|▋ | 27419/371472 [2:12:48<26:34:15, 3.60it/s] 7%|▋ | 27420/371472 [2:12:49<27:10:01, 3.52it/s] {'loss': 4.2871, 'learning_rate': 9.34016031373225e-07, 'epoch': 1.18} + 7%|▋ | 27420/371472 [2:12:49<27:10:01, 3.52it/s] 7%|▋ | 27421/371472 [2:12:49<26:40:51, 3.58it/s] 7%|▋ | 27422/371472 [2:12:49<26:11:55, 3.65it/s] 7%|▋ | 27423/371472 [2:12:49<27:31:21, 3.47it/s] 7%|▋ | 27424/371472 [2:12:50<27:56:20, 3.42it/s] 7%|▋ | 27425/371472 [2:12:50<28:58:32, 3.30it/s] 7%|▋ | 27426/371472 [2:12:50<28:22:27, 3.37it/s] 7%|▋ | 27427/371472 [2:12:51<29:38:01, 3.22it/s] 7%|▋ | 27428/371472 [2:12:51<28:16:57, 3.38it/s] 7%|▋ | 27429/371472 [2:12:51<29:19:06, 3.26it/s] 7%|▋ | 27430/371472 [2:12:52<28:04:58, 3.40it/s] 7%|▋ | 27431/371472 [2:12:52<28:41:44, 3.33it/s] 7%|▋ | 27432/371472 [2:12:52<28:01:30, 3.41it/s] 7%|▋ | 27433/371472 [2:12:52<27:15:33, 3.51it/s] 7%|▋ | 27434/371472 [2:12:53<29:29:06, 3.24it/s] 7%|▋ | 27435/371472 [2:12:53<28:19:12, 3.37it/s] 7%|▋ | 27436/371472 [2:12:53<28:45:12, 3.32it/s] 7%|▋ | 27437/371472 [2:12:54<27:44:12, 3.45it/s] 7%|▋ | 27438/371472 [2:12:54<28:08:57, 3.39it/s] 7%|▋ | 27439/371472 [2:12:54<27:02:51, 3.53it/s] 7%|▋ | 27440/371472 [2:12:54<26:51:06, 3.56it/s] {'loss': 4.5998, 'learning_rate': 9.339675493977461e-07, 'epoch': 1.18} + 7%|▋ | 27440/371472 [2:12:54<26:51:06, 3.56it/s] 7%|▋ | 27441/371472 [2:12:55<25:59:37, 3.68it/s] 7%|▋ | 27442/371472 [2:12:55<25:07:48, 3.80it/s] 7%|▋ | 27443/371472 [2:12:55<27:00:36, 3.54it/s] 7%|▋ | 27444/371472 [2:12:56<26:47:57, 3.57it/s] 7%|▋ | 27445/371472 [2:12:56<26:41:08, 3.58it/s] 7%|▋ | 27446/371472 [2:12:56<26:41:46, 3.58it/s] 7%|▋ | 27447/371472 [2:12:56<27:06:37, 3.52it/s] 7%|▋ | 27448/371472 [2:12:57<26:27:24, 3.61it/s] 7%|▋ | 27449/371472 [2:12:57<26:14:08, 3.64it/s] 7%|▋ | 27450/371472 [2:12:57<26:48:27, 3.56it/s] 7%|▋ | 27451/371472 [2:12:57<26:25:12, 3.62it/s] 7%|▋ | 27452/371472 [2:12:58<26:32:14, 3.60it/s] 7%|▋ | 27453/371472 [2:12:58<29:34:45, 3.23it/s] 7%|▋ | 27454/371472 [2:12:58<30:58:00, 3.09it/s] 7%|▋ | 27455/371472 [2:12:59<31:35:19, 3.03it/s] 7%|▋ | 27456/371472 [2:12:59<30:11:06, 3.17it/s] 7%|▋ | 27457/371472 [2:12:59<30:02:36, 3.18it/s] 7%|▋ | 27458/371472 [2:13:00<30:00:58, 3.18it/s] 7%|▋ | 27459/371472 [2:13:00<29:01:15, 3.29it/s] 7%|▋ | 27460/371472 [2:13:00<28:03:19, 3.41it/s] {'loss': 4.6689, 'learning_rate': 9.339190674222672e-07, 'epoch': 1.18} + 7%|▋ | 27460/371472 [2:13:00<28:03:19, 3.41it/s] 7%|▋ | 27461/371472 [2:13:01<27:52:49, 3.43it/s] 7%|▋ | 27462/371472 [2:13:01<30:18:45, 3.15it/s] 7%|▋ | 27463/371472 [2:13:01<30:10:26, 3.17it/s] 7%|▋ | 27464/371472 [2:13:02<28:42:36, 3.33it/s] 7%|▋ | 27465/371472 [2:13:02<27:14:49, 3.51it/s] 7%|▋ | 27466/371472 [2:13:02<26:39:07, 3.59it/s] 7%|▋ | 27467/371472 [2:13:02<26:55:41, 3.55it/s] 7%|▋ | 27468/371472 [2:13:03<27:31:53, 3.47it/s] 7%|▋ | 27469/371472 [2:13:03<30:32:48, 3.13it/s] 7%|▋ | 27470/371472 [2:13:03<31:36:56, 3.02it/s] 7%|▋ | 27471/371472 [2:13:04<29:47:53, 3.21it/s] 7%|▋ | 27472/371472 [2:13:04<28:16:14, 3.38it/s] 7%|▋ | 27473/371472 [2:13:04<27:39:05, 3.46it/s] 7%|▋ | 27474/371472 [2:13:04<26:23:49, 3.62it/s] 7%|▋ | 27475/371472 [2:13:05<26:08:25, 3.66it/s] 7%|▋ | 27476/371472 [2:13:05<27:29:53, 3.47it/s] 7%|▋ | 27477/371472 [2:13:05<27:38:05, 3.46it/s] 7%|▋ | 27478/371472 [2:13:06<27:46:17, 3.44it/s] 7%|▋ | 27479/371472 [2:13:06<27:40:49, 3.45it/s] 7%|▋ | 27480/371472 [2:13:06<27:05:56, 3.53it/s] {'loss': 4.5299, 'learning_rate': 9.338705854467885e-07, 'epoch': 1.18} + 7%|▋ | 27480/371472 [2:13:06<27:05:56, 3.53it/s] 7%|▋ | 27481/371472 [2:13:06<27:25:27, 3.48it/s] 7%|▋ | 27482/371472 [2:13:07<26:22:50, 3.62it/s] 7%|▋ | 27483/371472 [2:13:07<27:36:21, 3.46it/s] 7%|▋ | 27484/371472 [2:13:07<26:44:04, 3.57it/s] 7%|▋ | 27485/371472 [2:13:08<26:32:48, 3.60it/s] 7%|▋ | 27486/371472 [2:13:08<25:35:08, 3.73it/s] 7%|▋ | 27487/371472 [2:13:08<25:48:17, 3.70it/s] 7%|▋ | 27488/371472 [2:13:08<26:12:32, 3.65it/s] 7%|▋ | 27489/371472 [2:13:09<26:49:39, 3.56it/s] 7%|▋ | 27490/371472 [2:13:09<26:56:54, 3.55it/s] 7%|▋ | 27491/371472 [2:13:09<27:16:25, 3.50it/s] 7%|▋ | 27492/371472 [2:13:10<28:20:13, 3.37it/s] 7%|▋ | 27493/371472 [2:13:10<27:50:22, 3.43it/s] 7%|▋ | 27494/371472 [2:13:10<28:32:30, 3.35it/s] 7%|▋ | 27495/371472 [2:13:10<28:30:51, 3.35it/s] 7%|▋ | 27496/371472 [2:13:11<28:26:09, 3.36it/s] 7%|▋ | 27497/371472 [2:13:11<28:06:37, 3.40it/s] 7%|▋ | 27498/371472 [2:13:11<28:55:33, 3.30it/s] 7%|▋ | 27499/371472 [2:13:12<28:33:47, 3.35it/s] 7%|▋ | 27500/371472 [2:13:12<28:15:26, 3.38it/s] {'loss': 4.5076, 'learning_rate': 9.338221034713093e-07, 'epoch': 1.18} + 7%|▋ | 27500/371472 [2:13:12<28:15:26, 3.38it/s] 7%|▋ | 27501/371472 [2:13:12<27:52:18, 3.43it/s] 7%|▋ | 27502/371472 [2:13:13<28:13:23, 3.39it/s] 7%|▋ | 27503/371472 [2:13:13<29:43:50, 3.21it/s] 7%|▋ | 27504/371472 [2:13:13<28:21:16, 3.37it/s] 7%|▋ | 27505/371472 [2:13:13<28:36:16, 3.34it/s] 7%|▋ | 27506/371472 [2:13:14<27:50:10, 3.43it/s] 7%|▋ | 27507/371472 [2:13:14<28:33:13, 3.35it/s] 7%|▋ | 27508/371472 [2:13:14<28:56:18, 3.30it/s] 7%|▋ | 27509/371472 [2:13:15<27:54:38, 3.42it/s] 7%|▋ | 27510/371472 [2:13:15<27:01:08, 3.54it/s] 7%|▋ | 27511/371472 [2:13:15<26:30:53, 3.60it/s] 7%|▋ | 27512/371472 [2:13:15<26:20:53, 3.63it/s] 7%|▋ | 27513/371472 [2:13:16<27:44:47, 3.44it/s] 7%|▋ | 27514/371472 [2:13:16<29:21:07, 3.26it/s] 7%|▋ | 27515/371472 [2:13:16<28:50:39, 3.31it/s] 7%|▋ | 27516/371472 [2:13:17<27:41:45, 3.45it/s] 7%|▋ | 27517/371472 [2:13:17<26:44:37, 3.57it/s] 7%|▋ | 27518/371472 [2:13:17<26:06:03, 3.66it/s] 7%|▋ | 27519/371472 [2:13:17<26:25:01, 3.62it/s] 7%|▋ | 27520/371472 [2:13:18<26:44:34, 3.57it/s] {'loss': 4.6751, 'learning_rate': 9.337736214958305e-07, 'epoch': 1.19} + 7%|▋ | 27520/371472 [2:13:18<26:44:34, 3.57it/s] 7%|▋ | 27521/371472 [2:13:18<26:21:07, 3.63it/s] 7%|▋ | 27522/371472 [2:13:18<27:48:22, 3.44it/s] 7%|▋ | 27523/371472 [2:13:19<27:37:36, 3.46it/s] 7%|▋ | 27524/371472 [2:13:19<26:52:11, 3.56it/s] 7%|▋ | 27525/371472 [2:13:19<26:38:31, 3.59it/s] 7%|▋ | 27526/371472 [2:13:19<26:52:23, 3.56it/s] 7%|▋ | 27527/371472 [2:13:20<26:28:41, 3.61it/s] 7%|▋ | 27528/371472 [2:13:20<26:05:40, 3.66it/s] 7%|▋ | 27529/371472 [2:13:20<27:00:22, 3.54it/s] 7%|▋ | 27530/371472 [2:13:21<27:39:30, 3.45it/s] 7%|▋ | 27531/371472 [2:13:21<27:34:07, 3.47it/s] 7%|▋ | 27532/371472 [2:13:21<27:16:34, 3.50it/s] 7%|▋ | 27533/371472 [2:13:21<26:27:27, 3.61it/s] 7%|▋ | 27534/371472 [2:13:22<26:34:06, 3.60it/s] 7%|▋ | 27535/371472 [2:13:22<26:34:46, 3.59it/s] 7%|▋ | 27536/371472 [2:13:22<27:05:33, 3.53it/s] 7%|▋ | 27537/371472 [2:13:22<26:39:09, 3.58it/s] 7%|▋ | 27538/371472 [2:13:23<26:33:47, 3.60it/s] 7%|▋ | 27539/371472 [2:13:23<25:49:41, 3.70it/s] 7%|▋ | 27540/371472 [2:13:23<26:17:00, 3.63it/s] {'loss': 4.4489, 'learning_rate': 9.337251395203516e-07, 'epoch': 1.19} + 7%|▋ | 27540/371472 [2:13:23<26:17:00, 3.63it/s] 7%|▋ | 27541/371472 [2:13:24<26:52:54, 3.55it/s] 7%|▋ | 27542/371472 [2:13:24<28:14:13, 3.38it/s] 7%|▋ | 27543/371472 [2:13:24<28:55:51, 3.30it/s] 7%|▋ | 27544/371472 [2:13:25<27:49:46, 3.43it/s] 7%|▋ | 27545/371472 [2:13:25<28:38:18, 3.34it/s] 7%|▋ | 27546/371472 [2:13:25<27:48:34, 3.44it/s] 7%|▋ | 27547/371472 [2:13:25<29:23:54, 3.25it/s] 7%|▋ | 27548/371472 [2:13:26<28:35:14, 3.34it/s] 7%|▋ | 27549/371472 [2:13:26<27:38:49, 3.46it/s] 7%|▋ | 27550/371472 [2:13:26<28:46:43, 3.32it/s] 7%|▋ | 27551/371472 [2:13:27<29:10:19, 3.27it/s] 7%|▋ | 27552/371472 [2:13:27<28:57:22, 3.30it/s] 7%|▋ | 27553/371472 [2:13:27<29:54:40, 3.19it/s] 7%|▋ | 27554/371472 [2:13:28<29:27:39, 3.24it/s] 7%|▋ | 27555/371472 [2:13:28<28:51:29, 3.31it/s] 7%|▋ | 27556/371472 [2:13:28<28:21:18, 3.37it/s] 7%|▋ | 27557/371472 [2:13:28<27:39:48, 3.45it/s] 7%|▋ | 27558/371472 [2:13:29<28:01:15, 3.41it/s] 7%|▋ | 27559/371472 [2:13:29<27:58:23, 3.42it/s] 7%|▋ | 27560/371472 [2:13:29<27:49:35, 3.43it/s] {'loss': 4.5556, 'learning_rate': 9.336766575448728e-07, 'epoch': 1.19} + 7%|▋ | 27560/371472 [2:13:29<27:49:35, 3.43it/s] 7%|▋ | 27561/371472 [2:13:30<28:11:04, 3.39it/s] 7%|▋ | 27562/371472 [2:13:30<27:47:18, 3.44it/s] 7%|▋ | 27563/371472 [2:13:30<26:58:53, 3.54it/s] 7%|▋ | 27564/371472 [2:13:30<26:10:12, 3.65it/s] 7%|▋ | 27565/371472 [2:13:31<28:25:32, 3.36it/s] 7%|▋ | 27566/371472 [2:13:31<27:43:54, 3.44it/s] 7%|▋ | 27567/371472 [2:13:31<27:25:29, 3.48it/s] 7%|▋ | 27568/371472 [2:13:32<26:21:59, 3.62it/s] 7%|▋ | 27569/371472 [2:13:32<27:09:03, 3.52it/s] 7%|▋ | 27570/371472 [2:13:32<27:09:02, 3.52it/s] 7%|▋ | 27571/371472 [2:13:32<26:54:49, 3.55it/s] 7%|▋ | 27572/371472 [2:13:33<26:43:54, 3.57it/s] 7%|▋ | 27573/371472 [2:13:33<25:47:12, 3.70it/s] 7%|▋ | 27574/371472 [2:13:33<25:39:57, 3.72it/s] 7%|▋ | 27575/371472 [2:13:34<28:02:25, 3.41it/s] 7%|▋ | 27576/371472 [2:13:34<29:50:48, 3.20it/s] 7%|▋ | 27577/371472 [2:13:34<32:14:44, 2.96it/s] 7%|▋ | 27578/371472 [2:13:35<31:39:00, 3.02it/s] 7%|▋ | 27579/371472 [2:13:35<30:32:29, 3.13it/s] 7%|▋ | 27580/371472 [2:13:35<29:20:14, 3.26it/s] {'loss': 4.543, 'learning_rate': 9.336281755693938e-07, 'epoch': 1.19} + 7%|▋ | 27580/371472 [2:13:35<29:20:14, 3.26it/s] 7%|▋ | 27581/371472 [2:13:36<30:28:08, 3.14it/s] 7%|▋ | 27582/371472 [2:13:36<28:34:55, 3.34it/s] 7%|▋ | 27583/371472 [2:13:36<27:35:06, 3.46it/s] 7%|�� | 27584/371472 [2:13:36<27:32:29, 3.47it/s] 7%|▋ | 27585/371472 [2:13:37<26:41:38, 3.58it/s] 7%|▋ | 27586/371472 [2:13:37<26:49:14, 3.56it/s] 7%|▋ | 27587/371472 [2:13:37<26:38:13, 3.59it/s] 7%|▋ | 27588/371472 [2:13:37<26:32:02, 3.60it/s] 7%|▋ | 27589/371472 [2:13:38<29:15:57, 3.26it/s] 7%|▋ | 27590/371472 [2:13:38<28:03:28, 3.40it/s] 7%|▋ | 27591/371472 [2:13:38<28:52:10, 3.31it/s] 7%|▋ | 27592/371472 [2:13:39<28:18:47, 3.37it/s] 7%|▋ | 27593/371472 [2:13:39<26:59:07, 3.54it/s] 7%|▋ | 27594/371472 [2:13:39<27:32:31, 3.47it/s] 7%|▋ | 27595/371472 [2:13:40<29:04:09, 3.29it/s] 7%|▋ | 27596/371472 [2:13:40<28:19:26, 3.37it/s] 7%|▋ | 27597/371472 [2:13:40<27:19:01, 3.50it/s] 7%|▋ | 27598/371472 [2:13:40<28:25:05, 3.36it/s] 7%|▋ | 27599/371472 [2:13:41<27:55:05, 3.42it/s] 7%|▋ | 27600/371472 [2:13:41<26:52:07, 3.56it/s] {'loss': 4.6117, 'learning_rate': 9.33579693593915e-07, 'epoch': 1.19} + 7%|▋ | 27600/371472 [2:13:41<26:52:07, 3.56it/s] 7%|▋ | 27601/371472 [2:13:41<26:36:07, 3.59it/s] 7%|▋ | 27602/371472 [2:13:42<28:56:42, 3.30it/s] 7%|▋ | 27603/371472 [2:13:42<28:00:54, 3.41it/s] 7%|▋ | 27604/371472 [2:13:42<29:02:40, 3.29it/s] 7%|▋ | 27605/371472 [2:13:42<28:22:03, 3.37it/s] 7%|▋ | 27606/371472 [2:13:43<28:49:25, 3.31it/s] 7%|▋ | 27607/371472 [2:13:43<28:04:29, 3.40it/s] 7%|▋ | 27608/371472 [2:13:43<28:21:41, 3.37it/s] 7%|▋ | 27609/371472 [2:13:44<27:29:37, 3.47it/s] 7%|▋ | 27610/371472 [2:13:44<26:58:39, 3.54it/s] 7%|▋ | 27611/371472 [2:13:44<27:52:39, 3.43it/s] 7%|▋ | 27612/371472 [2:13:45<29:46:10, 3.21it/s] 7%|▋ | 27613/371472 [2:13:45<29:00:34, 3.29it/s] 7%|▋ | 27614/371472 [2:13:45<28:15:38, 3.38it/s] 7%|▋ | 27615/371472 [2:13:45<29:11:25, 3.27it/s] 7%|▋ | 27616/371472 [2:13:46<28:17:25, 3.38it/s] 7%|▋ | 27617/371472 [2:13:46<27:25:50, 3.48it/s] 7%|▋ | 27618/371472 [2:13:46<27:06:49, 3.52it/s] 7%|▋ | 27619/371472 [2:13:47<27:00:12, 3.54it/s] 7%|▋ | 27620/371472 [2:13:47<26:23:12, 3.62it/s] {'loss': 4.4452, 'learning_rate': 9.33531211618436e-07, 'epoch': 1.19} + 7%|▋ | 27620/371472 [2:13:47<26:23:12, 3.62it/s] 7%|▋ | 27621/371472 [2:13:47<27:53:24, 3.42it/s] 7%|▋ | 27622/371472 [2:13:47<27:32:24, 3.47it/s] 7%|▋ | 27623/371472 [2:13:48<27:48:50, 3.43it/s] 7%|▋ | 27624/371472 [2:13:48<28:28:50, 3.35it/s] 7%|▋ | 27625/371472 [2:13:48<29:06:50, 3.28it/s] 7%|▋ | 27626/371472 [2:13:49<27:39:29, 3.45it/s] 7%|▋ | 27627/371472 [2:13:49<27:08:02, 3.52it/s] 7%|▋ | 27628/371472 [2:13:49<26:54:46, 3.55it/s] 7%|▋ | 27629/371472 [2:13:49<26:26:11, 3.61it/s] 7%|▋ | 27630/371472 [2:13:50<25:55:46, 3.68it/s] 7%|▋ | 27631/371472 [2:13:50<26:41:15, 3.58it/s] 7%|▋ | 27632/371472 [2:13:50<26:05:21, 3.66it/s] 7%|▋ | 27633/371472 [2:13:51<25:34:58, 3.73it/s] 7%|▋ | 27634/371472 [2:13:51<25:04:31, 3.81it/s] 7%|▋ | 27635/371472 [2:13:51<25:50:33, 3.70it/s] 7%|▋ | 27636/371472 [2:13:51<25:52:38, 3.69it/s] 7%|▋ | 27637/371472 [2:13:52<26:15:51, 3.64it/s] 7%|▋ | 27638/371472 [2:13:52<26:57:35, 3.54it/s] 7%|▋ | 27639/371472 [2:13:52<27:42:15, 3.45it/s] 7%|▋ | 27640/371472 [2:13:52<26:42:55, 3.58it/s] {'loss': 4.4695, 'learning_rate': 9.33482729642957e-07, 'epoch': 1.19} + 7%|▋ | 27640/371472 [2:13:52<26:42:55, 3.58it/s] 7%|▋ | 27641/371472 [2:13:53<28:06:18, 3.40it/s] 7%|▋ | 27642/371472 [2:13:53<27:23:57, 3.49it/s] 7%|▋ | 27643/371472 [2:13:53<28:54:37, 3.30it/s] 7%|▋ | 27644/371472 [2:13:54<29:02:22, 3.29it/s] 7%|▋ | 27645/371472 [2:13:54<29:21:17, 3.25it/s] 7%|▋ | 27646/371472 [2:13:54<28:41:32, 3.33it/s] 7%|▋ | 27647/371472 [2:13:55<28:13:44, 3.38it/s] 7%|▋ | 27648/371472 [2:13:55<28:32:11, 3.35it/s] 7%|▋ | 27649/371472 [2:13:55<31:32:42, 3.03it/s] 7%|▋ | 27650/371472 [2:13:56<29:40:43, 3.22it/s] 7%|▋ | 27651/371472 [2:13:56<30:37:52, 3.12it/s] 7%|▋ | 27652/371472 [2:13:56<28:51:49, 3.31it/s] 7%|▋ | 27653/371472 [2:13:56<27:41:22, 3.45it/s] 7%|▋ | 27654/371472 [2:13:57<26:49:55, 3.56it/s] 7%|▋ | 27655/371472 [2:13:57<26:38:29, 3.58it/s] 7%|▋ | 27656/371472 [2:13:57<28:21:44, 3.37it/s] 7%|▋ | 27657/371472 [2:13:58<30:50:53, 3.10it/s] 7%|▋ | 27658/371472 [2:13:58<29:13:44, 3.27it/s] 7%|▋ | 27659/371472 [2:13:58<27:50:51, 3.43it/s] 7%|▋ | 27660/371472 [2:13:59<27:25:21, 3.48it/s] {'loss': 4.4768, 'learning_rate': 9.334342476674782e-07, 'epoch': 1.19} + 7%|▋ | 27660/371472 [2:13:59<27:25:21, 3.48it/s] 7%|▋ | 27661/371472 [2:13:59<27:36:39, 3.46it/s] 7%|▋ | 27662/371472 [2:13:59<26:41:12, 3.58it/s] 7%|▋ | 27663/371472 [2:13:59<27:44:57, 3.44it/s] 7%|▋ | 27664/371472 [2:14:00<27:40:32, 3.45it/s] 7%|▋ | 27665/371472 [2:14:00<27:18:23, 3.50it/s] 7%|▋ | 27666/371472 [2:14:00<26:43:48, 3.57it/s] 7%|▋ | 27667/371472 [2:14:01<27:29:37, 3.47it/s] 7%|▋ | 27668/371472 [2:14:01<29:28:44, 3.24it/s] 7%|▋ | 27669/371472 [2:14:01<28:54:10, 3.30it/s] 7%|▋ | 27670/371472 [2:14:01<28:02:48, 3.41it/s] 7%|▋ | 27671/371472 [2:14:02<27:41:25, 3.45it/s] 7%|▋ | 27672/371472 [2:14:02<28:58:15, 3.30it/s] 7%|▋ | 27673/371472 [2:14:02<27:59:34, 3.41it/s] 7%|▋ | 27674/371472 [2:14:03<27:56:04, 3.42it/s] 7%|▋ | 27675/371472 [2:14:03<27:59:44, 3.41it/s] 7%|▋ | 27676/371472 [2:14:03<27:41:09, 3.45it/s] 7%|▋ | 27677/371472 [2:14:03<27:07:13, 3.52it/s] 7%|▋ | 27678/371472 [2:14:04<25:57:31, 3.68it/s] 7%|▋ | 27679/371472 [2:14:04<25:43:12, 3.71it/s] 7%|▋ | 27680/371472 [2:14:04<25:16:29, 3.78it/s] {'loss': 4.4135, 'learning_rate': 9.333857656919994e-07, 'epoch': 1.19} + 7%|▋ | 27680/371472 [2:14:04<25:16:29, 3.78it/s] 7%|▋ | 27681/371472 [2:14:05<27:10:29, 3.51it/s] 7%|▋ | 27682/371472 [2:14:05<26:49:33, 3.56it/s] 7%|▋ | 27683/371472 [2:14:05<27:46:08, 3.44it/s] 7%|▋ | 27684/371472 [2:14:05<26:55:35, 3.55it/s] 7%|▋ | 27685/371472 [2:14:06<27:12:51, 3.51it/s] 7%|▋ | 27686/371472 [2:14:06<27:25:38, 3.48it/s] 7%|▋ | 27687/371472 [2:14:06<26:32:36, 3.60it/s] 7%|▋ | 27688/371472 [2:14:07<26:27:07, 3.61it/s] 7%|▋ | 27689/371472 [2:14:07<26:01:01, 3.67it/s] 7%|▋ | 27690/371472 [2:14:07<26:23:55, 3.62it/s] 7%|▋ | 27691/371472 [2:14:07<27:15:27, 3.50it/s] 7%|▋ | 27692/371472 [2:14:08<27:04:44, 3.53it/s] 7%|▋ | 27693/371472 [2:14:08<27:21:11, 3.49it/s] 7%|▋ | 27694/371472 [2:14:08<26:54:02, 3.55it/s] 7%|▋ | 27695/371472 [2:14:08<26:30:40, 3.60it/s] 7%|▋ | 27696/371472 [2:14:09<26:04:42, 3.66it/s] 7%|▋ | 27697/371472 [2:14:09<26:16:56, 3.63it/s] 7%|▋ | 27698/371472 [2:14:09<28:56:13, 3.30it/s] 7%|▋ | 27699/371472 [2:14:10<29:12:23, 3.27it/s] 7%|▋ | 27700/371472 [2:14:10<28:27:22, 3.36it/s] {'loss': 4.4234, 'learning_rate': 9.333372837165205e-07, 'epoch': 1.19} + 7%|▋ | 27700/371472 [2:14:10<28:27:22, 3.36it/s] 7%|▋ | 27701/371472 [2:14:10<27:22:03, 3.49it/s] 7%|▋ | 27702/371472 [2:14:11<29:34:11, 3.23it/s] 7%|▋ | 27703/371472 [2:14:11<31:01:12, 3.08it/s] 7%|▋ | 27704/371472 [2:14:11<30:46:03, 3.10it/s] 7%|▋ | 27705/371472 [2:14:12<29:31:35, 3.23it/s] 7%|▋ | 27706/371472 [2:14:12<29:06:00, 3.28it/s] 7%|▋ | 27707/371472 [2:14:12<29:20:30, 3.25it/s] 7%|▋ | 27708/371472 [2:14:13<31:08:31, 3.07it/s] 7%|▋ | 27709/371472 [2:14:13<29:40:39, 3.22it/s] 7%|▋ | 27710/371472 [2:14:13<28:47:50, 3.32it/s] 7%|▋ | 27711/371472 [2:14:13<28:07:30, 3.40it/s] 7%|▋ | 27712/371472 [2:14:14<27:09:57, 3.52it/s] 7%|▋ | 27713/371472 [2:14:14<26:26:39, 3.61it/s] 7%|▋ | 27714/371472 [2:14:14<26:49:57, 3.56it/s] 7%|▋ | 27715/371472 [2:14:14<27:34:18, 3.46it/s] 7%|▋ | 27716/371472 [2:14:15<27:04:37, 3.53it/s] 7%|▋ | 27717/371472 [2:14:15<26:18:24, 3.63it/s] 7%|▋ | 27718/371472 [2:14:15<26:09:54, 3.65it/s] 7%|▋ | 27719/371472 [2:14:16<26:23:20, 3.62it/s] 7%|▋ | 27720/371472 [2:14:16<26:48:38, 3.56it/s] {'loss': 4.7231, 'learning_rate': 9.332888017410416e-07, 'epoch': 1.19} + 7%|▋ | 27720/371472 [2:14:16<26:48:38, 3.56it/s] 7%|▋ | 27721/371472 [2:14:16<27:26:40, 3.48it/s] 7%|▋ | 27722/371472 [2:14:16<28:39:19, 3.33it/s] 7%|▋ | 27723/371472 [2:14:17<28:04:43, 3.40it/s] 7%|▋ | 27724/371472 [2:14:17<28:52:35, 3.31it/s] 7%|▋ | 27725/371472 [2:14:17<27:36:09, 3.46it/s] 7%|▋ | 27726/371472 [2:14:18<27:15:04, 3.50it/s] 7%|▋ | 27727/371472 [2:14:18<27:15:16, 3.50it/s] 7%|▋ | 27728/371472 [2:14:18<27:22:56, 3.49it/s] 7%|▋ | 27729/371472 [2:14:18<27:18:18, 3.50it/s] 7%|▋ | 27730/371472 [2:14:19<27:35:56, 3.46it/s] 7%|▋ | 27731/371472 [2:14:19<28:13:01, 3.38it/s] 7%|▋ | 27732/371472 [2:14:19<27:31:24, 3.47it/s] 7%|▋ | 27733/371472 [2:14:20<26:56:26, 3.54it/s] 7%|▋ | 27734/371472 [2:14:20<27:16:45, 3.50it/s] 7%|▋ | 27735/371472 [2:14:20<26:37:58, 3.59it/s] 7%|▋ | 27736/371472 [2:14:20<26:32:42, 3.60it/s] 7%|▋ | 27737/371472 [2:14:21<29:14:20, 3.27it/s] 7%|▋ | 27738/371472 [2:14:21<28:01:13, 3.41it/s] 7%|▋ | 27739/371472 [2:14:21<26:57:12, 3.54it/s] 7%|▋ | 27740/371472 [2:14:22<26:32:30, 3.60it/s] {'loss': 4.5024, 'learning_rate': 9.332403197655627e-07, 'epoch': 1.19} + 7%|▋ | 27740/371472 [2:14:22<26:32:30, 3.60it/s] 7%|▋ | 27741/371472 [2:14:22<25:55:53, 3.68it/s] 7%|▋ | 27742/371472 [2:14:22<25:35:03, 3.73it/s] 7%|▋ | 27743/371472 [2:14:22<25:53:32, 3.69it/s] 7%|▋ | 27744/371472 [2:14:23<26:00:59, 3.67it/s] 7%|▋ | 27745/371472 [2:14:23<26:58:30, 3.54it/s] 7%|▋ | 27746/371472 [2:14:23<26:34:01, 3.59it/s] 7%|▋ | 27747/371472 [2:14:24<26:59:23, 3.54it/s] 7%|▋ | 27748/371472 [2:14:24<25:55:13, 3.68it/s] 7%|▋ | 27749/371472 [2:14:24<26:40:24, 3.58it/s] 7%|▋ | 27750/371472 [2:14:24<28:00:05, 3.41it/s] 7%|▋ | 27751/371472 [2:14:25<28:28:44, 3.35it/s] 7%|▋ | 27752/371472 [2:14:25<27:51:40, 3.43it/s] 7%|▋ | 27753/371472 [2:14:25<28:57:49, 3.30it/s] 7%|▋ | 27754/371472 [2:14:26<28:49:22, 3.31it/s] 7%|▋ | 27755/371472 [2:14:26<27:39:42, 3.45it/s] 7%|▋ | 27756/371472 [2:14:26<27:24:04, 3.48it/s] 7%|▋ | 27757/371472 [2:14:27<31:02:33, 3.08it/s] 7%|▋ | 27758/371472 [2:14:27<29:00:00, 3.29it/s] 7%|▋ | 27759/371472 [2:14:27<29:13:46, 3.27it/s] 7%|▋ | 27760/371472 [2:14:27<28:29:03, 3.35it/s] {'loss': 4.3848, 'learning_rate': 9.331918377900837e-07, 'epoch': 1.2} + 7%|▋ | 27760/371472 [2:14:27<28:29:03, 3.35it/s] 7%|▋ | 27761/371472 [2:14:28<29:50:20, 3.20it/s] 7%|▋ | 27762/371472 [2:14:28<29:01:20, 3.29it/s] 7%|▋ | 27763/371472 [2:14:28<28:30:54, 3.35it/s] 7%|▋ | 27764/371472 [2:14:29<28:31:16, 3.35it/s] 7%|▋ | 27765/371472 [2:14:29<28:18:54, 3.37it/s] 7%|▋ | 27766/371472 [2:14:29<29:09:56, 3.27it/s] 7%|▋ | 27767/371472 [2:14:30<28:45:59, 3.32it/s] 7%|▋ | 27768/371472 [2:14:30<27:52:19, 3.43it/s] 7%|▋ | 27769/371472 [2:14:30<27:06:59, 3.52it/s] 7%|▋ | 27770/371472 [2:14:30<27:05:45, 3.52it/s] 7%|▋ | 27771/371472 [2:14:31<26:33:31, 3.59it/s] 7%|▋ | 27772/371472 [2:14:31<26:40:58, 3.58it/s] 7%|▋ | 27773/371472 [2:14:31<26:10:32, 3.65it/s] 7%|▋ | 27774/371472 [2:14:31<26:30:39, 3.60it/s] 7%|▋ | 27775/371472 [2:14:32<26:20:44, 3.62it/s] 7%|▋ | 27776/371472 [2:14:32<26:06:51, 3.66it/s] 7%|▋ | 27777/371472 [2:14:32<26:37:06, 3.59it/s] 7%|▋ | 27778/371472 [2:14:33<27:54:04, 3.42it/s] 7%|▋ | 27779/371472 [2:14:33<27:26:59, 3.48it/s] 7%|▋ | 27780/371472 [2:14:33<27:44:20, 3.44it/s] {'loss': 4.4045, 'learning_rate': 9.331433558146049e-07, 'epoch': 1.2} + 7%|▋ | 27780/371472 [2:14:33<27:44:20, 3.44it/s] 7%|▋ | 27781/371472 [2:14:33<27:05:26, 3.52it/s] 7%|▋ | 27782/371472 [2:14:34<26:32:55, 3.60it/s] 7%|▋ | 27783/371472 [2:14:34<26:07:17, 3.65it/s] 7%|▋ | 27784/371472 [2:14:34<27:14:21, 3.50it/s] 7%|▋ | 27785/371472 [2:14:35<26:59:28, 3.54it/s] 7%|▋ | 27786/371472 [2:14:35<26:19:20, 3.63it/s] 7%|▋ | 27787/371472 [2:14:35<28:50:48, 3.31it/s] 7%|▋ | 27788/371472 [2:14:35<27:51:16, 3.43it/s] 7%|▋ | 27789/371472 [2:14:36<27:06:40, 3.52it/s] 7%|▋ | 27790/371472 [2:14:36<27:20:18, 3.49it/s] 7%|▋ | 27791/371472 [2:14:36<28:16:50, 3.38it/s] 7%|▋ | 27792/371472 [2:14:37<27:20:01, 3.49it/s] 7%|▋ | 27793/371472 [2:14:37<28:33:30, 3.34it/s] 7%|▋ | 27794/371472 [2:14:37<28:19:48, 3.37it/s] 7%|▋ | 27795/371472 [2:14:38<27:25:21, 3.48it/s] 7%|▋ | 27796/371472 [2:14:38<27:10:25, 3.51it/s] 7%|▋ | 27797/371472 [2:14:38<28:00:20, 3.41it/s] 7%|▋ | 27798/371472 [2:14:38<29:05:15, 3.28it/s] 7%|▋ | 27799/371472 [2:14:39<28:48:05, 3.31it/s] 7%|▋ | 27800/371472 [2:14:39<28:55:15, 3.30it/s] {'loss': 4.5017, 'learning_rate': 9.33094873839126e-07, 'epoch': 1.2} + 7%|▋ | 27800/371472 [2:14:39<28:55:15, 3.30it/s] 7%|▋ | 27801/371472 [2:14:39<28:33:03, 3.34it/s] 7%|▋ | 27802/371472 [2:14:40<28:20:55, 3.37it/s] 7%|▋ | 27803/371472 [2:14:40<30:04:39, 3.17it/s] 7%|▋ | 27804/371472 [2:14:40<29:05:06, 3.28it/s] 7%|▋ | 27805/371472 [2:14:41<29:27:27, 3.24it/s] 7%|▋ | 27806/371472 [2:14:41<29:04:55, 3.28it/s] 7%|▋ | 27807/371472 [2:14:41<28:20:58, 3.37it/s] 7%|▋ | 27808/371472 [2:14:41<28:26:02, 3.36it/s] 7%|▋ | 27809/371472 [2:14:42<28:20:37, 3.37it/s] 7%|▋ | 27810/371472 [2:14:42<28:15:16, 3.38it/s] 7%|▋ | 27811/371472 [2:14:42<27:57:03, 3.42it/s] 7%|▋ | 27812/371472 [2:14:43<27:45:32, 3.44it/s] 7%|▋ | 27813/371472 [2:14:43<27:11:15, 3.51it/s] 7%|▋ | 27814/371472 [2:14:43<27:40:22, 3.45it/s] 7%|▋ | 27815/371472 [2:14:43<27:18:28, 3.50it/s] 7%|▋ | 27816/371472 [2:14:44<27:15:22, 3.50it/s] 7%|▋ | 27817/371472 [2:14:44<31:35:35, 3.02it/s] 7%|▋ | 27818/371472 [2:14:45<31:15:36, 3.05it/s] 7%|▋ | 27819/371472 [2:14:45<29:52:56, 3.19it/s] 7%|▋ | 27820/371472 [2:14:45<28:37:59, 3.33it/s] {'loss': 4.397, 'learning_rate': 9.330463918636471e-07, 'epoch': 1.2} + 7%|▋ | 27820/371472 [2:14:45<28:37:59, 3.33it/s] 7%|▋ | 27821/371472 [2:14:45<27:44:09, 3.44it/s] 7%|▋ | 27822/371472 [2:14:46<26:25:00, 3.61it/s] 7%|▋ | 27823/371472 [2:14:46<26:46:02, 3.57it/s] 7%|▋ | 27824/371472 [2:14:46<27:24:55, 3.48it/s] 7%|▋ | 27825/371472 [2:14:46<27:33:59, 3.46it/s] 7%|▋ | 27826/371472 [2:14:47<26:52:02, 3.55it/s] 7%|▋ | 27827/371472 [2:14:47<26:37:07, 3.59it/s] 7%|▋ | 27828/371472 [2:14:47<28:01:44, 3.41it/s] 7%|▋ | 27829/371472 [2:14:48<27:43:47, 3.44it/s] 7%|▋ | 27830/371472 [2:14:48<26:38:45, 3.58it/s] 7%|▋ | 27831/371472 [2:14:48<26:05:48, 3.66it/s] 7%|▋ | 27832/371472 [2:14:48<26:05:29, 3.66it/s] 7%|▋ | 27833/371472 [2:14:49<27:04:26, 3.53it/s] 7%|▋ | 27834/371472 [2:14:49<27:10:35, 3.51it/s] 7%|▋ | 27835/371472 [2:14:49<28:21:39, 3.37it/s] 7%|▋ | 27836/371472 [2:14:50<26:52:24, 3.55it/s] 7%|▋ | 27837/371472 [2:14:50<26:19:12, 3.63it/s] 7%|▋ | 27838/371472 [2:14:50<26:55:23, 3.55it/s] 7%|▋ | 27839/371472 [2:14:50<29:29:14, 3.24it/s] 7%|▋ | 27840/371472 [2:14:51<30:21:14, 3.14it/s] {'loss': 4.4905, 'learning_rate': 9.329979098881682e-07, 'epoch': 1.2} + 7%|▋ | 27840/371472 [2:14:51<30:21:14, 3.14it/s] 7%|▋ | 27841/371472 [2:14:51<29:08:35, 3.28it/s] 7%|▋ | 27842/371472 [2:14:51<28:17:43, 3.37it/s] 7%|▋ | 27843/371472 [2:14:52<28:00:30, 3.41it/s] 7%|▋ | 27844/371472 [2:14:52<30:57:05, 3.08it/s] 7%|▋ | 27845/371472 [2:14:52<29:06:17, 3.28it/s] 7%|▋ | 27846/371472 [2:14:53<32:18:14, 2.95it/s] 7%|▋ | 27847/371472 [2:14:53<32:33:51, 2.93it/s] 7%|▋ | 27848/371472 [2:14:53<31:02:59, 3.07it/s] 7%|▋ | 27849/371472 [2:14:54<30:08:46, 3.17it/s] 7%|▋ | 27850/371472 [2:14:54<29:31:58, 3.23it/s] 7%|▋ | 27851/371472 [2:14:54<28:32:15, 3.34it/s] 7%|▋ | 27852/371472 [2:14:55<28:28:13, 3.35it/s] 7%|▋ | 27853/371472 [2:14:55<26:55:05, 3.55it/s] 7%|▋ | 27854/371472 [2:14:55<27:03:45, 3.53it/s] 7%|▋ | 27855/371472 [2:14:55<26:43:26, 3.57it/s] 7%|▋ | 27856/371472 [2:14:56<27:17:58, 3.50it/s] 7%|▋ | 27857/371472 [2:14:56<26:34:39, 3.59it/s] 7%|▋ | 27858/371472 [2:14:56<28:52:01, 3.31it/s] 7%|▋ | 27859/371472 [2:14:57<27:39:45, 3.45it/s] 7%|▋ | 27860/371472 [2:14:57<27:56:18, 3.42it/s] {'loss': 4.3495, 'learning_rate': 9.329494279126894e-07, 'epoch': 1.2} + 7%|▋ | 27860/371472 [2:14:57<27:56:18, 3.42it/s] 8%|▊ | 27861/371472 [2:14:57<26:48:46, 3.56it/s] 8%|▊ | 27862/371472 [2:14:57<26:12:42, 3.64it/s] 8%|▊ | 27863/371472 [2:14:58<26:21:58, 3.62it/s] 8%|▊ | 27864/371472 [2:14:58<26:55:28, 3.54it/s] 8%|▊ | 27865/371472 [2:14:58<26:29:11, 3.60it/s] 8%|▊ | 27866/371472 [2:14:58<26:28:59, 3.60it/s] 8%|▊ | 27867/371472 [2:14:59<26:02:33, 3.66it/s] 8%|▊ | 27868/371472 [2:14:59<25:28:02, 3.75it/s] 8%|▊ | 27869/371472 [2:14:59<25:43:24, 3.71it/s] 8%|▊ | 27870/371472 [2:15:00<26:31:56, 3.60it/s] 8%|▊ | 27871/371472 [2:15:00<25:40:22, 3.72it/s] 8%|▊ | 27872/371472 [2:15:00<26:45:00, 3.57it/s] 8%|▊ | 27873/371472 [2:15:00<26:48:06, 3.56it/s] 8%|▊ | 27874/371472 [2:15:01<27:37:00, 3.46it/s] 8%|▊ | 27875/371472 [2:15:01<28:08:06, 3.39it/s] 8%|▊ | 27876/371472 [2:15:01<27:24:59, 3.48it/s] 8%|▊ | 27877/371472 [2:15:02<26:48:37, 3.56it/s] 8%|▊ | 27878/371472 [2:15:02<26:47:32, 3.56it/s] 8%|▊ | 27879/371472 [2:15:02<26:15:25, 3.63it/s] 8%|▊ | 27880/371472 [2:15:02<26:51:46, 3.55it/s] {'loss': 4.4544, 'learning_rate': 9.329009459372103e-07, 'epoch': 1.2} + 8%|▊ | 27880/371472 [2:15:02<26:51:46, 3.55it/s] 8%|▊ | 27881/371472 [2:15:03<26:51:23, 3.55it/s] 8%|▊ | 27882/371472 [2:15:03<26:37:58, 3.58it/s] 8%|▊ | 27883/371472 [2:15:03<26:40:35, 3.58it/s] 8%|▊ | 27884/371472 [2:15:04<30:08:20, 3.17it/s] 8%|▊ | 27885/371472 [2:15:04<29:11:13, 3.27it/s] 8%|▊ | 27886/371472 [2:15:04<29:41:40, 3.21it/s] 8%|▊ | 27887/371472 [2:15:05<29:31:20, 3.23it/s] 8%|▊ | 27888/371472 [2:15:05<27:58:44, 3.41it/s] 8%|▊ | 27889/371472 [2:15:05<28:27:29, 3.35it/s] 8%|▊ | 27890/371472 [2:15:05<28:43:45, 3.32it/s] 8%|▊ | 27891/371472 [2:15:06<28:12:14, 3.38it/s] 8%|▊ | 27892/371472 [2:15:06<27:17:28, 3.50it/s] 8%|▊ | 27893/371472 [2:15:06<27:59:09, 3.41it/s] 8%|▊ | 27894/371472 [2:15:06<27:03:27, 3.53it/s] 8%|▊ | 27895/371472 [2:15:07<27:31:09, 3.47it/s] 8%|▊ | 27896/371472 [2:15:07<27:02:42, 3.53it/s] 8%|▊ | 27897/371472 [2:15:07<26:00:13, 3.67it/s] 8%|▊ | 27898/371472 [2:15:08<27:16:23, 3.50it/s] 8%|▊ | 27899/371472 [2:15:08<26:28:22, 3.61it/s] 8%|▊ | 27900/371472 [2:15:08<27:22:39, 3.49it/s] {'loss': 4.5536, 'learning_rate': 9.328524639617315e-07, 'epoch': 1.2} + 8%|▊ | 27900/371472 [2:15:08<27:22:39, 3.49it/s] 8%|▊ | 27901/371472 [2:15:08<26:51:57, 3.55it/s] 8%|▊ | 27902/371472 [2:15:09<28:07:17, 3.39it/s] 8%|▊ | 27903/371472 [2:15:09<29:36:10, 3.22it/s] 8%|▊ | 27904/371472 [2:15:09<28:28:17, 3.35it/s] 8%|▊ | 27905/371472 [2:15:10<27:56:06, 3.42it/s] 8%|▊ | 27906/371472 [2:15:10<27:43:11, 3.44it/s] 8%|▊ | 27907/371472 [2:15:10<26:47:40, 3.56it/s] 8%|▊ | 27908/371472 [2:15:11<28:04:13, 3.40it/s] 8%|▊ | 27909/371472 [2:15:11<27:03:27, 3.53it/s] 8%|▊ | 27910/371472 [2:15:11<27:00:34, 3.53it/s] 8%|▊ | 27911/371472 [2:15:11<27:15:30, 3.50it/s] 8%|▊ | 27912/371472 [2:15:12<26:43:08, 3.57it/s] 8%|▊ | 27913/371472 [2:15:12<26:38:57, 3.58it/s] 8%|▊ | 27914/371472 [2:15:12<27:10:14, 3.51it/s] 8%|▊ | 27915/371472 [2:15:13<27:05:13, 3.52it/s] 8%|▊ | 27916/371472 [2:15:13<27:27:54, 3.47it/s] 8%|▊ | 27917/371472 [2:15:13<26:59:32, 3.54it/s] 8%|▊ | 27918/371472 [2:15:13<27:53:49, 3.42it/s] 8%|▊ | 27919/371472 [2:15:14<28:54:27, 3.30it/s] 8%|▊ | 27920/371472 [2:15:14<28:40:43, 3.33it/s] {'loss': 4.534, 'learning_rate': 9.328039819862526e-07, 'epoch': 1.2} + 8%|▊ | 27920/371472 [2:15:14<28:40:43, 3.33it/s] 8%|▊ | 27921/371472 [2:15:14<28:24:10, 3.36it/s] 8%|▊ | 27922/371472 [2:15:15<29:14:39, 3.26it/s] 8%|▊ | 27923/371472 [2:15:15<27:58:57, 3.41it/s] 8%|▊ | 27924/371472 [2:15:15<29:40:08, 3.22it/s] 8%|▊ | 27925/371472 [2:15:16<28:19:46, 3.37it/s] 8%|▊ | 27926/371472 [2:15:16<27:48:17, 3.43it/s] 8%|▊ | 27927/371472 [2:15:16<26:51:28, 3.55it/s] 8%|▊ | 27928/371472 [2:15:16<26:48:10, 3.56it/s] 8%|▊ | 27929/371472 [2:15:17<28:01:11, 3.41it/s] 8%|▊ | 27930/371472 [2:15:17<29:04:23, 3.28it/s] 8%|▊ | 27931/371472 [2:15:17<27:57:51, 3.41it/s] 8%|▊ | 27932/371472 [2:15:18<26:51:13, 3.55it/s] 8%|▊ | 27933/371472 [2:15:18<26:38:18, 3.58it/s] 8%|▊ | 27934/371472 [2:15:18<25:45:41, 3.70it/s] 8%|▊ | 27935/371472 [2:15:18<26:46:13, 3.56it/s] 8%|▊ | 27936/371472 [2:15:19<26:20:57, 3.62it/s] 8%|▊ | 27937/371472 [2:15:19<26:08:57, 3.65it/s] 8%|▊ | 27938/371472 [2:15:19<26:31:06, 3.60it/s] 8%|▊ | 27939/371472 [2:15:19<26:38:24, 3.58it/s] 8%|▊ | 27940/371472 [2:15:20<26:42:18, 3.57it/s] {'loss': 4.6825, 'learning_rate': 9.327555000107738e-07, 'epoch': 1.2} + 8%|▊ | 27940/371472 [2:15:20<26:42:18, 3.57it/s] 8%|▊ | 27941/371472 [2:15:20<27:08:07, 3.52it/s] 8%|▊ | 27942/371472 [2:15:20<29:15:22, 3.26it/s] 8%|▊ | 27943/371472 [2:15:21<28:22:13, 3.36it/s] 8%|▊ | 27944/371472 [2:15:21<27:44:13, 3.44it/s] 8%|▊ | 27945/371472 [2:15:21<26:40:29, 3.58it/s] 8%|▊ | 27946/371472 [2:15:21<26:23:07, 3.62it/s] 8%|▊ | 27947/371472 [2:15:22<27:03:05, 3.53it/s] 8%|▊ | 27948/371472 [2:15:22<28:38:28, 3.33it/s] 8%|▊ | 27949/371472 [2:15:22<29:16:08, 3.26it/s] 8%|▊ | 27950/371472 [2:15:23<29:57:44, 3.18it/s] 8%|▊ | 27951/371472 [2:15:23<28:57:37, 3.29it/s] 8%|▊ | 27952/371472 [2:15:23<28:54:31, 3.30it/s] 8%|▊ | 27953/371472 [2:15:24<28:11:50, 3.38it/s] 8%|▊ | 27954/371472 [2:15:24<27:29:07, 3.47it/s] 8%|▊ | 27955/371472 [2:15:24<26:59:11, 3.54it/s] 8%|▊ | 27956/371472 [2:15:24<27:04:01, 3.53it/s] 8%|▊ | 27957/371472 [2:15:25<29:01:01, 3.29it/s] 8%|▊ | 27958/371472 [2:15:25<30:15:54, 3.15it/s] 8%|▊ | 27959/371472 [2:15:25<29:20:16, 3.25it/s] 8%|▊ | 27960/371472 [2:15:26<28:34:54, 3.34it/s] {'loss': 4.7677, 'learning_rate': 9.327070180352948e-07, 'epoch': 1.2} + 8%|▊ | 27960/371472 [2:15:26<28:34:54, 3.34it/s] 8%|▊ | 27961/371472 [2:15:26<28:34:21, 3.34it/s] 8%|▊ | 27962/371472 [2:15:26<27:39:49, 3.45it/s] 8%|▊ | 27963/371472 [2:15:27<27:00:53, 3.53it/s] 8%|▊ | 27964/371472 [2:15:27<26:50:08, 3.56it/s] 8%|▊ | 27965/371472 [2:15:27<29:20:50, 3.25it/s] 8%|▊ | 27966/371472 [2:15:27<29:52:05, 3.19it/s] 8%|▊ | 27967/371472 [2:15:28<30:30:05, 3.13it/s] 8%|▊ | 27968/371472 [2:15:28<32:42:13, 2.92it/s] 8%|▊ | 27969/371472 [2:15:28<29:50:16, 3.20it/s] 8%|▊ | 27970/371472 [2:15:29<29:38:26, 3.22it/s] 8%|▊ | 27971/371472 [2:15:29<28:39:49, 3.33it/s] 8%|▊ | 27972/371472 [2:15:29<27:38:24, 3.45it/s] 8%|▊ | 27973/371472 [2:15:30<27:06:59, 3.52it/s] 8%|▊ | 27974/371472 [2:15:30<26:24:45, 3.61it/s] 8%|▊ | 27975/371472 [2:15:30<27:00:11, 3.53it/s] 8%|▊ | 27976/371472 [2:15:30<26:36:19, 3.59it/s] 8%|▊ | 27977/371472 [2:15:31<26:23:55, 3.61it/s] 8%|▊ | 27978/371472 [2:15:31<27:01:51, 3.53it/s] 8%|▊ | 27979/371472 [2:15:31<27:29:56, 3.47it/s] 8%|▊ | 27980/371472 [2:15:32<26:53:59, 3.55it/s] {'loss': 4.5732, 'learning_rate': 9.32658536059816e-07, 'epoch': 1.21} + 8%|▊ | 27980/371472 [2:15:32<26:53:59, 3.55it/s] 8%|▊ | 27981/371472 [2:15:32<28:22:02, 3.36it/s] 8%|▊ | 27982/371472 [2:15:32<27:01:57, 3.53it/s] 8%|▊ | 27983/371472 [2:15:32<26:50:03, 3.56it/s] 8%|▊ | 27984/371472 [2:15:33<26:26:21, 3.61it/s] 8%|▊ | 27985/371472 [2:15:33<26:11:58, 3.64it/s] 8%|▊ | 27986/371472 [2:15:33<26:26:09, 3.61it/s] 8%|▊ | 27987/371472 [2:15:33<25:39:35, 3.72it/s] 8%|▊ | 27988/371472 [2:15:34<26:03:37, 3.66it/s] 8%|▊ | 27989/371472 [2:15:34<26:10:59, 3.64it/s] 8%|▊ | 27990/371472 [2:15:34<27:04:52, 3.52it/s] 8%|▊ | 27991/371472 [2:15:35<27:16:07, 3.50it/s] 8%|▊ | 27992/371472 [2:15:35<28:42:15, 3.32it/s] 8%|▊ | 27993/371472 [2:15:35<28:35:40, 3.34it/s] 8%|▊ | 27994/371472 [2:15:36<28:25:31, 3.36it/s] 8%|▊ | 27995/371472 [2:15:36<27:50:29, 3.43it/s] 8%|▊ | 27996/371472 [2:15:36<28:24:20, 3.36it/s] 8%|▊ | 27997/371472 [2:15:37<29:50:00, 3.20it/s] 8%|▊ | 27998/371472 [2:15:37<28:27:02, 3.35it/s] 8%|▊ | 27999/371472 [2:15:37<27:30:35, 3.47it/s] 8%|▊ | 28000/371472 [2:15:37<27:54:00, 3.42it/s] {'loss': 4.4788, 'learning_rate': 9.326100540843371e-07, 'epoch': 1.21} + 8%|▊ | 28000/371472 [2:15:37<27:54:00, 3.42it/s] 8%|▊ | 28001/371472 [2:15:38<27:04:50, 3.52it/s] 8%|▊ | 28002/371472 [2:15:38<27:12:41, 3.51it/s] 8%|▊ | 28003/371472 [2:15:38<28:32:46, 3.34it/s] 8%|▊ | 28004/371472 [2:15:38<27:48:38, 3.43it/s] 8%|▊ | 28005/371472 [2:15:39<27:51:08, 3.43it/s] 8%|▊ | 28006/371472 [2:15:39<27:21:51, 3.49it/s] 8%|▊ | 28007/371472 [2:15:39<26:57:36, 3.54it/s] 8%|▊ | 28008/371472 [2:15:40<27:13:22, 3.50it/s] 8%|▊ | 28009/371472 [2:15:40<27:13:59, 3.50it/s] 8%|▊ | 28010/371472 [2:15:40<26:36:09, 3.59it/s] 8%|▊ | 28011/371472 [2:15:40<26:28:24, 3.60it/s] 8%|▊ | 28012/371472 [2:15:41<25:43:25, 3.71it/s] 8%|▊ | 28013/371472 [2:15:41<26:42:28, 3.57it/s] 8%|▊ | 28014/371472 [2:15:41<26:18:39, 3.63it/s] 8%|▊ | 28015/371472 [2:15:42<25:47:57, 3.70it/s] 8%|▊ | 28016/371472 [2:15:42<25:21:56, 3.76it/s] 8%|▊ | 28017/371472 [2:15:42<26:21:19, 3.62it/s] 8%|▊ | 28018/371472 [2:15:42<26:48:34, 3.56it/s] 8%|▊ | 28019/371472 [2:15:43<28:17:08, 3.37it/s] 8%|▊ | 28020/371472 [2:15:43<27:35:49, 3.46it/s] {'loss': 4.4752, 'learning_rate': 9.325615721088582e-07, 'epoch': 1.21} + 8%|▊ | 28020/371472 [2:15:43<27:35:49, 3.46it/s] 8%|▊ | 28021/371472 [2:15:43<28:48:49, 3.31it/s] 8%|▊ | 28022/371472 [2:15:44<30:16:07, 3.15it/s] 8%|▊ | 28023/371472 [2:15:44<28:41:31, 3.33it/s] 8%|▊ | 28024/371472 [2:15:44<27:40:01, 3.45it/s] 8%|▊ | 28025/371472 [2:15:44<27:51:25, 3.42it/s] 8%|▊ | 28026/371472 [2:15:45<27:35:00, 3.46it/s] 8%|▊ | 28027/371472 [2:15:45<27:39:51, 3.45it/s] 8%|▊ | 28028/371472 [2:15:45<27:54:20, 3.42it/s] 8%|▊ | 28029/371472 [2:15:46<27:12:16, 3.51it/s] 8%|▊ | 28030/371472 [2:15:46<26:25:30, 3.61it/s] 8%|▊ | 28031/371472 [2:15:46<27:21:40, 3.49it/s] 8%|▊ | 28032/371472 [2:15:47<27:52:08, 3.42it/s] 8%|▊ | 28033/371472 [2:15:47<28:01:47, 3.40it/s] 8%|▊ | 28034/371472 [2:15:47<28:34:34, 3.34it/s] 8%|▊ | 28035/371472 [2:15:47<28:21:37, 3.36it/s] 8%|▊ | 28036/371472 [2:15:48<28:24:23, 3.36it/s] 8%|▊ | 28037/371472 [2:15:48<27:28:56, 3.47it/s] 8%|▊ | 28038/371472 [2:15:48<28:29:52, 3.35it/s] 8%|▊ | 28039/371472 [2:15:49<28:18:18, 3.37it/s] 8%|▊ | 28040/371472 [2:15:49<29:20:39, 3.25it/s] {'loss': 4.5123, 'learning_rate': 9.325130901333792e-07, 'epoch': 1.21} + 8%|▊ | 28040/371472 [2:15:49<29:20:39, 3.25it/s] 8%|▊ | 28041/371472 [2:15:49<27:50:50, 3.43it/s] 8%|▊ | 28042/371472 [2:15:49<28:13:12, 3.38it/s] 8%|▊ | 28043/371472 [2:15:50<30:06:48, 3.17it/s] 8%|▊ | 28044/371472 [2:15:50<28:10:57, 3.38it/s] 8%|▊ | 28045/371472 [2:15:50<31:23:35, 3.04it/s] 8%|▊ | 28046/371472 [2:15:51<30:54:33, 3.09it/s] 8%|▊ | 28047/371472 [2:15:51<29:49:44, 3.20it/s] 8%|▊ | 28048/371472 [2:15:51<29:59:33, 3.18it/s] 8%|▊ | 28049/371472 [2:15:52<29:03:57, 3.28it/s] 8%|▊ | 28050/371472 [2:15:52<28:07:35, 3.39it/s] 8%|▊ | 28051/371472 [2:15:52<29:56:42, 3.19it/s] 8%|▊ | 28052/371472 [2:15:53<30:05:59, 3.17it/s] 8%|▊ | 28053/371472 [2:15:53<28:06:59, 3.39it/s] 8%|▊ | 28054/371472 [2:15:53<27:19:04, 3.49it/s] 8%|▊ | 28055/371472 [2:15:53<28:17:55, 3.37it/s] 8%|▊ | 28056/371472 [2:15:54<27:20:37, 3.49it/s] 8%|▊ | 28057/371472 [2:15:54<28:01:02, 3.40it/s] 8%|▊ | 28058/371472 [2:15:54<29:05:57, 3.28it/s] 8%|▊ | 28059/371472 [2:15:55<30:48:07, 3.10it/s] 8%|▊ | 28060/371472 [2:15:55<30:10:28, 3.16it/s] {'loss': 4.2612, 'learning_rate': 9.324646081579004e-07, 'epoch': 1.21} + 8%|▊ | 28060/371472 [2:15:55<30:10:28, 3.16it/s] 8%|▊ | 28061/371472 [2:15:55<29:37:15, 3.22it/s] 8%|▊ | 28062/371472 [2:15:56<30:09:11, 3.16it/s] 8%|▊ | 28063/371472 [2:15:56<29:25:06, 3.24it/s] 8%|▊ | 28064/371472 [2:15:56<28:52:41, 3.30it/s] 8%|▊ | 28065/371472 [2:15:57<30:05:51, 3.17it/s] 8%|▊ | 28066/371472 [2:15:57<28:02:46, 3.40it/s] 8%|▊ | 28067/371472 [2:15:57<28:16:42, 3.37it/s] 8%|▊ | 28068/371472 [2:15:57<27:13:41, 3.50it/s] 8%|▊ | 28069/371472 [2:15:58<26:54:15, 3.55it/s] 8%|▊ | 28070/371472 [2:15:58<27:31:01, 3.47it/s] 8%|▊ | 28071/371472 [2:15:58<27:09:08, 3.51it/s] 8%|▊ | 28072/371472 [2:15:59<29:05:25, 3.28it/s] 8%|▊ | 28073/371472 [2:15:59<29:44:05, 3.21it/s] 8%|▊ | 28074/371472 [2:15:59<28:07:56, 3.39it/s] 8%|▊ | 28075/371472 [2:15:59<28:07:34, 3.39it/s] 8%|▊ | 28076/371472 [2:16:00<27:03:00, 3.53it/s] 8%|▊ | 28077/371472 [2:16:00<26:37:22, 3.58it/s] 8%|▊ | 28078/371472 [2:16:00<27:29:59, 3.47it/s] 8%|▊ | 28079/371472 [2:16:01<27:11:22, 3.51it/s] 8%|▊ | 28080/371472 [2:16:01<28:18:01, 3.37it/s] {'loss': 4.7906, 'learning_rate': 9.324161261824215e-07, 'epoch': 1.21} + 8%|▊ | 28080/371472 [2:16:01<28:18:01, 3.37it/s] 8%|▊ | 28081/371472 [2:16:01<27:09:15, 3.51it/s] 8%|▊ | 28082/371472 [2:16:01<27:01:21, 3.53it/s] 8%|▊ | 28083/371472 [2:16:02<27:09:34, 3.51it/s] 8%|▊ | 28084/371472 [2:16:02<26:27:53, 3.60it/s] 8%|▊ | 28085/371472 [2:16:02<27:07:05, 3.52it/s] 8%|▊ | 28086/371472 [2:16:03<27:22:57, 3.48it/s] 8%|▊ | 28087/371472 [2:16:03<27:53:04, 3.42it/s] 8%|▊ | 28088/371472 [2:16:03<27:36:45, 3.45it/s] 8%|▊ | 28089/371472 [2:16:03<26:57:31, 3.54it/s] 8%|▊ | 28090/371472 [2:16:04<27:29:59, 3.47it/s] 8%|▊ | 28091/371472 [2:16:04<26:32:04, 3.59it/s] 8%|▊ | 28092/371472 [2:16:04<27:00:03, 3.53it/s] 8%|▊ | 28093/371472 [2:16:05<26:57:00, 3.54it/s] 8%|▊ | 28094/371472 [2:16:05<27:24:47, 3.48it/s] 8%|▊ | 28095/371472 [2:16:05<27:20:13, 3.49it/s] 8%|▊ | 28096/371472 [2:16:05<27:58:41, 3.41it/s] 8%|▊ | 28097/371472 [2:16:06<27:24:37, 3.48it/s] 8%|▊ | 28098/371472 [2:16:06<29:38:21, 3.22it/s] 8%|▊ | 28099/371472 [2:16:06<28:07:45, 3.39it/s] 8%|▊ | 28100/371472 [2:16:07<27:36:51, 3.45it/s] {'loss': 4.4646, 'learning_rate': 9.323676442069426e-07, 'epoch': 1.21} + 8%|▊ | 28100/371472 [2:16:07<27:36:51, 3.45it/s] 8%|▊ | 28101/371472 [2:16:07<26:36:25, 3.58it/s] 8%|▊ | 28102/371472 [2:16:07<27:34:28, 3.46it/s] 8%|▊ | 28103/371472 [2:16:08<27:19:32, 3.49it/s] 8%|▊ | 28104/371472 [2:16:08<28:34:48, 3.34it/s] 8%|▊ | 28105/371472 [2:16:08<27:42:49, 3.44it/s] 8%|▊ | 28106/371472 [2:16:08<27:16:43, 3.50it/s] 8%|▊ | 28107/371472 [2:16:09<28:12:53, 3.38it/s] 8%|▊ | 28108/371472 [2:16:09<27:32:01, 3.46it/s] 8%|▊ | 28109/371472 [2:16:09<26:03:48, 3.66it/s] 8%|▊ | 28110/371472 [2:16:09<25:47:40, 3.70it/s] 8%|▊ | 28111/371472 [2:16:10<26:26:22, 3.61it/s] 8%|▊ | 28112/371472 [2:16:10<25:52:52, 3.69it/s] 8%|▊ | 28113/371472 [2:16:10<26:14:22, 3.63it/s] 8%|▊ | 28114/371472 [2:16:11<26:36:52, 3.58it/s] 8%|▊ | 28115/371472 [2:16:11<25:37:04, 3.72it/s] 8%|▊ | 28116/371472 [2:16:11<26:05:01, 3.66it/s] 8%|▊ | 28117/371472 [2:16:11<25:39:11, 3.72it/s] 8%|▊ | 28118/371472 [2:16:12<25:11:15, 3.79it/s] 8%|▊ | 28119/371472 [2:16:12<26:24:56, 3.61it/s] 8%|▊ | 28120/371472 [2:16:12<28:04:02, 3.40it/s] {'loss': 4.2376, 'learning_rate': 9.323191622314637e-07, 'epoch': 1.21} + 8%|▊ | 28120/371472 [2:16:12<28:04:02, 3.40it/s] 8%|▊ | 28121/371472 [2:16:13<30:08:03, 3.17it/s] 8%|▊ | 28122/371472 [2:16:13<29:47:50, 3.20it/s] 8%|▊ | 28123/371472 [2:16:13<28:38:45, 3.33it/s] 8%|▊ | 28124/371472 [2:16:13<27:51:08, 3.42it/s] 8%|▊ | 28125/371472 [2:16:14<29:50:35, 3.20it/s] 8%|▊ | 28126/371472 [2:16:14<28:11:46, 3.38it/s] 8%|▊ | 28127/371472 [2:16:14<28:05:21, 3.40it/s] 8%|▊ | 28128/371472 [2:16:15<28:18:27, 3.37it/s] 8%|▊ | 28129/371472 [2:16:15<27:40:09, 3.45it/s] 8%|▊ | 28130/371472 [2:16:15<27:32:03, 3.46it/s] 8%|▊ | 28131/371472 [2:16:16<27:18:40, 3.49it/s] 8%|▊ | 28132/371472 [2:16:16<26:40:41, 3.57it/s] 8%|▊ | 28133/371472 [2:16:16<26:20:19, 3.62it/s] 8%|▊ | 28134/371472 [2:16:16<27:03:53, 3.52it/s] 8%|▊ | 28135/371472 [2:16:17<27:35:03, 3.46it/s] 8%|▊ | 28136/371472 [2:16:17<27:53:46, 3.42it/s] 8%|▊ | 28137/371472 [2:16:17<27:23:54, 3.48it/s] 8%|▊ | 28138/371472 [2:16:18<27:36:30, 3.45it/s] 8%|▊ | 28139/371472 [2:16:18<27:07:30, 3.52it/s] 8%|▊ | 28140/371472 [2:16:18<26:42:41, 3.57it/s] {'loss': 4.5163, 'learning_rate': 9.322706802559848e-07, 'epoch': 1.21} + 8%|▊ | 28140/371472 [2:16:18<26:42:41, 3.57it/s] 8%|▊ | 28141/371472 [2:16:18<26:33:30, 3.59it/s] 8%|▊ | 28142/371472 [2:16:19<25:51:48, 3.69it/s] 8%|▊ | 28143/371472 [2:16:19<25:31:43, 3.74it/s] 8%|▊ | 28144/371472 [2:16:19<25:54:08, 3.68it/s] 8%|▊ | 28145/371472 [2:16:20<28:05:00, 3.40it/s] 8%|▊ | 28146/371472 [2:16:20<27:23:52, 3.48it/s] 8%|▊ | 28147/371472 [2:16:20<28:49:41, 3.31it/s] 8%|▊ | 28148/371472 [2:16:20<28:25:21, 3.36it/s] 8%|▊ | 28149/371472 [2:16:21<29:21:40, 3.25it/s] 8%|▊ | 28150/371472 [2:16:21<29:24:57, 3.24it/s] 8%|▊ | 28151/371472 [2:16:21<29:28:23, 3.24it/s] 8%|▊ | 28152/371472 [2:16:22<30:00:15, 3.18it/s] 8%|▊ | 28153/371472 [2:16:22<29:28:17, 3.24it/s] 8%|▊ | 28154/371472 [2:16:22<28:20:10, 3.37it/s] 8%|▊ | 28155/371472 [2:16:23<28:40:01, 3.33it/s] 8%|▊ | 28156/371472 [2:16:23<27:23:11, 3.48it/s] 8%|▊ | 28157/371472 [2:16:23<27:12:27, 3.51it/s] 8%|▊ | 28158/371472 [2:16:23<27:01:36, 3.53it/s] 8%|▊ | 28159/371472 [2:16:24<26:40:21, 3.58it/s] 8%|▊ | 28160/371472 [2:16:24<26:49:56, 3.55it/s] {'loss': 4.3873, 'learning_rate': 9.322221982805059e-07, 'epoch': 1.21} + 8%|▊ | 28160/371472 [2:16:24<26:49:56, 3.55it/s] 8%|▊ | 28161/371472 [2:16:24<27:06:54, 3.52it/s] 8%|▊ | 28162/371472 [2:16:24<26:46:55, 3.56it/s] 8%|▊ | 28163/371472 [2:16:25<26:50:11, 3.55it/s] 8%|▊ | 28164/371472 [2:16:25<28:04:25, 3.40it/s] 8%|▊ | 28165/371472 [2:16:25<28:22:51, 3.36it/s] 8%|▊ | 28166/371472 [2:16:26<28:34:49, 3.34it/s] 8%|▊ | 28167/371472 [2:16:26<28:12:27, 3.38it/s] 8%|▊ | 28168/371472 [2:16:26<27:09:34, 3.51it/s] 8%|▊ | 28169/371472 [2:16:27<26:59:58, 3.53it/s] 8%|▊ | 28170/371472 [2:16:27<26:54:21, 3.54it/s] 8%|▊ | 28171/371472 [2:16:27<28:20:05, 3.37it/s] 8%|▊ | 28172/371472 [2:16:27<28:59:26, 3.29it/s] 8%|▊ | 28173/371472 [2:16:28<28:22:34, 3.36it/s] 8%|▊ | 28174/371472 [2:16:28<28:08:21, 3.39it/s] 8%|▊ | 28175/371472 [2:16:28<28:06:03, 3.39it/s] 8%|▊ | 28176/371472 [2:16:29<28:26:53, 3.35it/s] 8%|▊ | 28177/371472 [2:16:29<27:34:36, 3.46it/s] 8%|▊ | 28178/371472 [2:16:29<26:42:35, 3.57it/s] 8%|▊ | 28179/371472 [2:16:29<26:48:06, 3.56it/s] 8%|▊ | 28180/371472 [2:16:30<26:58:35, 3.53it/s] {'loss': 4.4932, 'learning_rate': 9.321737163050269e-07, 'epoch': 1.21} + 8%|▊ | 28180/371472 [2:16:30<26:58:35, 3.53it/s] 8%|▊ | 28181/371472 [2:16:30<27:21:34, 3.49it/s] 8%|▊ | 28182/371472 [2:16:30<30:05:29, 3.17it/s] 8%|▊ | 28183/371472 [2:16:31<28:45:27, 3.32it/s] 8%|▊ | 28184/371472 [2:16:31<27:58:07, 3.41it/s] 8%|▊ | 28185/371472 [2:16:31<30:58:29, 3.08it/s] 8%|▊ | 28186/371472 [2:16:32<29:30:14, 3.23it/s] 8%|▊ | 28187/371472 [2:16:32<28:13:43, 3.38it/s] 8%|▊ | 28188/371472 [2:16:32<30:12:29, 3.16it/s] 8%|▊ | 28189/371472 [2:16:33<29:21:13, 3.25it/s] 8%|▊ | 28190/371472 [2:16:33<28:13:57, 3.38it/s] 8%|▊ | 28191/371472 [2:16:33<26:57:48, 3.54it/s] 8%|▊ | 28192/371472 [2:16:33<26:58:28, 3.54it/s] 8%|▊ | 28193/371472 [2:16:34<26:16:20, 3.63it/s] 8%|▊ | 28194/371472 [2:16:34<25:22:47, 3.76it/s] 8%|▊ | 28195/371472 [2:16:34<25:41:02, 3.71it/s] 8%|▊ | 28196/371472 [2:16:34<27:48:00, 3.43it/s] 8%|▊ | 28197/371472 [2:16:35<28:53:32, 3.30it/s] 8%|▊ | 28198/371472 [2:16:35<28:11:13, 3.38it/s] 8%|▊ | 28199/371472 [2:16:35<28:33:41, 3.34it/s] 8%|▊ | 28200/371472 [2:16:36<29:10:46, 3.27it/s] {'loss': 4.4182, 'learning_rate': 9.321252343295481e-07, 'epoch': 1.21} + 8%|▊ | 28200/371472 [2:16:36<29:10:46, 3.27it/s] 8%|▊ | 28201/371472 [2:16:36<29:20:46, 3.25it/s] 8%|▊ | 28202/371472 [2:16:36<29:26:45, 3.24it/s] 8%|▊ | 28203/371472 [2:16:37<28:11:39, 3.38it/s] 8%|▊ | 28204/371472 [2:16:37<31:23:07, 3.04it/s] 8%|▊ | 28205/371472 [2:16:37<30:26:06, 3.13it/s] 8%|▊ | 28206/371472 [2:16:38<29:47:15, 3.20it/s] 8%|▊ | 28207/371472 [2:16:38<28:21:58, 3.36it/s] 8%|▊ | 28208/371472 [2:16:38<31:02:11, 3.07it/s] 8%|▊ | 28209/371472 [2:16:39<29:52:28, 3.19it/s] 8%|▊ | 28210/371472 [2:16:39<28:52:49, 3.30it/s] 8%|▊ | 28211/371472 [2:16:39<27:56:08, 3.41it/s] 8%|▊ | 28212/371472 [2:16:39<27:29:29, 3.47it/s] 8%|▊ | 28213/371472 [2:16:40<27:31:09, 3.46it/s] 8%|▊ | 28214/371472 [2:16:40<28:07:52, 3.39it/s] 8%|▊ | 28215/371472 [2:16:40<28:23:13, 3.36it/s] 8%|▊ | 28216/371472 [2:16:41<27:50:16, 3.43it/s] 8%|▊ | 28217/371472 [2:16:41<26:52:30, 3.55it/s] 8%|▊ | 28218/371472 [2:16:41<26:38:14, 3.58it/s] 8%|▊ | 28219/371472 [2:16:41<26:10:18, 3.64it/s] 8%|▊ | 28220/371472 [2:16:42<25:49:12, 3.69it/s] {'loss': 4.4323, 'learning_rate': 9.320767523540693e-07, 'epoch': 1.22} + 8%|▊ | 28220/371472 [2:16:42<25:49:12, 3.69it/s] 8%|▊ | 28221/371472 [2:16:42<26:04:02, 3.66it/s] 8%|▊ | 28222/371472 [2:16:42<25:59:26, 3.67it/s] 8%|▊ | 28223/371472 [2:16:42<25:37:38, 3.72it/s] 8%|▊ | 28224/371472 [2:16:43<26:03:47, 3.66it/s] 8%|▊ | 28225/371472 [2:16:43<27:52:44, 3.42it/s] 8%|▊ | 28226/371472 [2:16:43<28:05:08, 3.39it/s] 8%|▊ | 28227/371472 [2:16:44<27:34:41, 3.46it/s] 8%|▊ | 28228/371472 [2:16:44<27:34:18, 3.46it/s] 8%|▊ | 28229/371472 [2:16:44<27:17:12, 3.49it/s] 8%|▊ | 28230/371472 [2:16:44<27:23:38, 3.48it/s] 8%|▊ | 28231/371472 [2:16:45<26:51:12, 3.55it/s] 8%|▊ | 28232/371472 [2:16:45<27:10:08, 3.51it/s] 8%|▊ | 28233/371472 [2:16:45<26:19:57, 3.62it/s] 8%|▊ | 28234/371472 [2:16:46<28:04:06, 3.40it/s] 8%|▊ | 28235/371472 [2:16:46<30:02:33, 3.17it/s] 8%|▊ | 28236/371472 [2:16:46<28:24:30, 3.36it/s] 8%|▊ | 28237/371472 [2:16:47<28:28:12, 3.35it/s] 8%|▊ | 28238/371472 [2:16:47<28:07:08, 3.39it/s] 8%|▊ | 28239/371472 [2:16:47<31:11:39, 3.06it/s] 8%|▊ | 28240/371472 [2:16:47<29:27:05, 3.24it/s] {'loss': 4.5085, 'learning_rate': 9.320282703785904e-07, 'epoch': 1.22} + 8%|▊ | 28240/371472 [2:16:48<29:27:05, 3.24it/s] 8%|▊ | 28241/371472 [2:16:48<28:24:35, 3.36it/s] 8%|▊ | 28242/371472 [2:16:48<31:22:14, 3.04it/s] 8%|▊ | 28243/371472 [2:16:48<30:01:18, 3.18it/s] 8%|▊ | 28244/371472 [2:16:49<29:00:28, 3.29it/s] 8%|▊ | 28245/371472 [2:16:49<29:11:15, 3.27it/s] 8%|▊ | 28246/371472 [2:16:49<29:08:13, 3.27it/s] 8%|▊ | 28247/371472 [2:16:50<29:00:48, 3.29it/s] 8%|▊ | 28248/371472 [2:16:50<29:21:37, 3.25it/s] 8%|▊ | 28249/371472 [2:16:50<29:20:40, 3.25it/s] 8%|▊ | 28250/371472 [2:16:51<27:56:22, 3.41it/s] 8%|▊ | 28251/371472 [2:16:51<27:05:05, 3.52it/s] 8%|▊ | 28252/371472 [2:16:51<26:19:38, 3.62it/s] 8%|▊ | 28253/371472 [2:16:51<26:05:53, 3.65it/s] 8%|▊ | 28254/371472 [2:16:52<26:24:29, 3.61it/s] 8%|▊ | 28255/371472 [2:16:52<25:47:09, 3.70it/s] 8%|▊ | 28256/371472 [2:16:52<26:13:06, 3.64it/s] 8%|▊ | 28257/371472 [2:16:52<25:59:55, 3.67it/s] 8%|▊ | 28258/371472 [2:16:53<25:21:49, 3.76it/s] 8%|▊ | 28259/371472 [2:16:53<25:29:52, 3.74it/s] 8%|▊ | 28260/371472 [2:16:53<28:51:23, 3.30it/s] {'loss': 4.2675, 'learning_rate': 9.319797884031113e-07, 'epoch': 1.22} + 8%|▊ | 28260/371472 [2:16:53<28:51:23, 3.30it/s] 8%|▊ | 28261/371472 [2:16:54<29:09:26, 3.27it/s] 8%|▊ | 28262/371472 [2:16:54<28:36:32, 3.33it/s] 8%|▊ | 28263/371472 [2:16:54<28:01:20, 3.40it/s] 8%|▊ | 28264/371472 [2:16:54<27:13:17, 3.50it/s] 8%|▊ | 28265/371472 [2:16:55<26:23:08, 3.61it/s] 8%|▊ | 28266/371472 [2:16:55<25:35:55, 3.72it/s] 8%|▊ | 28267/371472 [2:16:55<25:53:07, 3.68it/s] 8%|▊ | 28268/371472 [2:16:56<25:18:17, 3.77it/s] 8%|▊ | 28269/371472 [2:16:56<25:58:00, 3.67it/s] 8%|▊ | 28270/371472 [2:16:56<26:56:14, 3.54it/s] 8%|▊ | 28271/371472 [2:16:56<26:10:30, 3.64it/s] 8%|▊ | 28272/371472 [2:16:57<28:18:51, 3.37it/s] 8%|▊ | 28273/371472 [2:16:57<31:44:51, 3.00it/s] 8%|▊ | 28274/371472 [2:16:57<30:09:05, 3.16it/s] 8%|▊ | 28275/371472 [2:16:58<28:46:41, 3.31it/s] 8%|▊ | 28276/371472 [2:16:58<29:55:06, 3.19it/s] 8%|▊ | 28277/371472 [2:16:58<28:34:28, 3.34it/s] 8%|▊ | 28278/371472 [2:16:59<27:16:21, 3.50it/s] 8%|▊ | 28279/371472 [2:16:59<26:17:59, 3.62it/s] 8%|▊ | 28280/371472 [2:16:59<26:02:07, 3.66it/s] {'loss': 4.4825, 'learning_rate': 9.319313064276325e-07, 'epoch': 1.22} + 8%|▊ | 28280/371472 [2:16:59<26:02:07, 3.66it/s] 8%|▊ | 28281/371472 [2:16:59<26:12:00, 3.64it/s] 8%|▊ | 28282/371472 [2:17:00<25:57:45, 3.67it/s] 8%|▊ | 28283/371472 [2:17:00<25:56:57, 3.67it/s] 8%|▊ | 28284/371472 [2:17:00<26:09:16, 3.64it/s] 8%|▊ | 28285/371472 [2:17:00<26:10:23, 3.64it/s] 8%|▊ | 28286/371472 [2:17:01<26:13:19, 3.64it/s] 8%|▊ | 28287/371472 [2:17:01<26:43:12, 3.57it/s] 8%|▊ | 28288/371472 [2:17:01<26:07:34, 3.65it/s] 8%|▊ | 28289/371472 [2:17:02<26:08:01, 3.65it/s] 8%|▊ | 28290/371472 [2:17:02<26:38:59, 3.58it/s] 8%|▊ | 28291/371472 [2:17:02<26:59:28, 3.53it/s] 8%|▊ | 28292/371472 [2:17:02<26:56:24, 3.54it/s] 8%|▊ | 28293/371472 [2:17:03<26:13:56, 3.63it/s] 8%|▊ | 28294/371472 [2:17:03<25:46:19, 3.70it/s] 8%|▊ | 28295/371472 [2:17:03<27:02:05, 3.53it/s] 8%|▊ | 28296/371472 [2:17:03<26:54:29, 3.54it/s] 8%|▊ | 28297/371472 [2:17:04<26:34:23, 3.59it/s] 8%|▊ | 28298/371472 [2:17:04<26:17:15, 3.63it/s] 8%|▊ | 28299/371472 [2:17:04<26:30:15, 3.60it/s] 8%|▊ | 28300/371472 [2:17:05<26:40:27, 3.57it/s] {'loss': 4.7004, 'learning_rate': 9.318828244521536e-07, 'epoch': 1.22} + 8%|▊ | 28300/371472 [2:17:05<26:40:27, 3.57it/s] 8%|▊ | 28301/371472 [2:17:05<25:57:45, 3.67it/s] 8%|▊ | 28302/371472 [2:17:05<25:58:05, 3.67it/s] 8%|▊ | 28303/371472 [2:17:05<27:14:57, 3.50it/s] 8%|▊ | 28304/371472 [2:17:06<27:04:08, 3.52it/s] 8%|▊ | 28305/371472 [2:17:06<29:03:29, 3.28it/s] 8%|▊ | 28306/371472 [2:17:06<31:11:01, 3.06it/s] 8%|▊ | 28307/371472 [2:17:07<30:08:19, 3.16it/s] 8%|▊ | 28308/371472 [2:17:07<29:19:21, 3.25it/s] 8%|▊ | 28309/371472 [2:17:07<28:38:53, 3.33it/s] 8%|▊ | 28310/371472 [2:17:08<28:38:35, 3.33it/s] 8%|▊ | 28311/371472 [2:17:08<28:57:09, 3.29it/s] 8%|▊ | 28312/371472 [2:17:08<29:01:11, 3.28it/s] 8%|▊ | 28313/371472 [2:17:09<28:54:55, 3.30it/s] 8%|▊ | 28314/371472 [2:17:09<28:40:08, 3.32it/s] 8%|▊ | 28315/371472 [2:17:09<27:42:42, 3.44it/s] 8%|▊ | 28316/371472 [2:17:09<27:35:44, 3.45it/s] 8%|▊ | 28317/371472 [2:17:10<27:13:36, 3.50it/s] 8%|▊ | 28318/371472 [2:17:10<28:53:58, 3.30it/s] 8%|▊ | 28319/371472 [2:17:10<28:00:34, 3.40it/s] 8%|▊ | 28320/371472 [2:17:11<26:36:42, 3.58it/s] {'loss': 4.1579, 'learning_rate': 9.318343424766748e-07, 'epoch': 1.22} + 8%|▊ | 28320/371472 [2:17:11<26:36:42, 3.58it/s] 8%|▊ | 28321/371472 [2:17:11<26:16:47, 3.63it/s] 8%|▊ | 28322/371472 [2:17:11<26:30:55, 3.59it/s] 8%|▊ | 28323/371472 [2:17:11<28:40:14, 3.32it/s] 8%|▊ | 28324/371472 [2:17:12<27:22:33, 3.48it/s] 8%|▊ | 28325/371472 [2:17:12<27:11:20, 3.51it/s] 8%|▊ | 28326/371472 [2:17:12<29:42:12, 3.21it/s] 8%|▊ | 28327/371472 [2:17:13<28:41:09, 3.32it/s] 8%|▊ | 28328/371472 [2:17:13<29:41:32, 3.21it/s] 8%|▊ | 28329/371472 [2:17:13<29:01:39, 3.28it/s] 8%|▊ | 28330/371472 [2:17:14<28:25:07, 3.35it/s] 8%|▊ | 28331/371472 [2:17:14<28:40:12, 3.32it/s] 8%|▊ | 28332/371472 [2:17:14<27:55:26, 3.41it/s] 8%|▊ | 28333/371472 [2:17:14<26:50:58, 3.55it/s] 8%|▊ | 28334/371472 [2:17:15<27:42:36, 3.44it/s] 8%|▊ | 28335/371472 [2:17:15<27:30:20, 3.47it/s] 8%|▊ | 28336/371472 [2:17:15<27:28:54, 3.47it/s] 8%|▊ | 28337/371472 [2:17:16<28:42:31, 3.32it/s] 8%|▊ | 28338/371472 [2:17:16<27:22:10, 3.48it/s] 8%|▊ | 28339/371472 [2:17:16<26:31:07, 3.59it/s] 8%|▊ | 28340/371472 [2:17:16<25:59:19, 3.67it/s] {'loss': 4.4969, 'learning_rate': 9.317858605011958e-07, 'epoch': 1.22} + 8%|▊ | 28340/371472 [2:17:16<25:59:19, 3.67it/s] 8%|▊ | 28341/371472 [2:17:17<25:49:59, 3.69it/s] 8%|▊ | 28342/371472 [2:17:17<26:34:11, 3.59it/s] 8%|▊ | 28343/371472 [2:17:17<27:12:40, 3.50it/s] 8%|▊ | 28344/371472 [2:17:18<28:44:15, 3.32it/s] 8%|▊ | 28345/371472 [2:17:18<32:21:10, 2.95it/s] 8%|▊ | 28346/371472 [2:17:18<30:28:48, 3.13it/s] 8%|▊ | 28347/371472 [2:17:19<28:48:48, 3.31it/s] 8%|▊ | 28348/371472 [2:17:19<28:35:28, 3.33it/s] 8%|▊ | 28349/371472 [2:17:19<28:17:31, 3.37it/s] 8%|▊ | 28350/371472 [2:17:19<27:22:24, 3.48it/s] 8%|▊ | 28351/371472 [2:17:20<26:50:20, 3.55it/s] 8%|▊ | 28352/371472 [2:17:20<26:21:10, 3.62it/s] 8%|▊ | 28353/371472 [2:17:20<26:01:28, 3.66it/s] 8%|▊ | 28354/371472 [2:17:20<26:21:40, 3.62it/s] 8%|▊ | 28355/371472 [2:17:21<25:51:22, 3.69it/s] 8%|▊ | 28356/371472 [2:17:21<25:28:21, 3.74it/s] 8%|▊ | 28357/371472 [2:17:21<26:51:59, 3.55it/s] 8%|▊ | 28358/371472 [2:17:22<26:57:44, 3.53it/s] 8%|▊ | 28359/371472 [2:17:22<26:59:17, 3.53it/s] 8%|▊ | 28360/371472 [2:17:22<26:00:28, 3.66it/s] {'loss': 4.8679, 'learning_rate': 9.31737378525717e-07, 'epoch': 1.22} + 8%|▊ | 28360/371472 [2:17:22<26:00:28, 3.66it/s] 8%|▊ | 28361/371472 [2:17:22<26:21:51, 3.62it/s] 8%|▊ | 28362/371472 [2:17:23<26:32:54, 3.59it/s] 8%|▊ | 28363/371472 [2:17:23<26:31:52, 3.59it/s] 8%|▊ | 28364/371472 [2:17:23<27:22:02, 3.48it/s] 8%|▊ | 28365/371472 [2:17:24<27:28:42, 3.47it/s] 8%|▊ | 28366/371472 [2:17:24<27:09:43, 3.51it/s] 8%|▊ | 28367/371472 [2:17:24<27:16:03, 3.50it/s] 8%|▊ | 28368/371472 [2:17:24<28:37:02, 3.33it/s] 8%|▊ | 28369/371472 [2:17:25<28:29:51, 3.34it/s] 8%|▊ | 28370/371472 [2:17:25<27:12:38, 3.50it/s] 8%|▊ | 28371/371472 [2:17:25<27:16:25, 3.49it/s] 8%|▊ | 28372/371472 [2:17:26<26:39:59, 3.57it/s] 8%|▊ | 28373/371472 [2:17:26<27:22:52, 3.48it/s] 8%|▊ | 28374/371472 [2:17:26<28:02:09, 3.40it/s] 8%|▊ | 28375/371472 [2:17:26<27:35:14, 3.45it/s] 8%|▊ | 28376/371472 [2:17:27<28:21:32, 3.36it/s] 8%|▊ | 28377/371472 [2:17:27<27:57:26, 3.41it/s] 8%|▊ | 28378/371472 [2:17:27<27:20:09, 3.49it/s] 8%|▊ | 28379/371472 [2:17:28<27:35:10, 3.45it/s] 8%|▊ | 28380/371472 [2:17:28<28:42:25, 3.32it/s] {'loss': 4.6333, 'learning_rate': 9.316888965502381e-07, 'epoch': 1.22} + 8%|▊ | 28380/371472 [2:17:28<28:42:25, 3.32it/s] 8%|▊ | 28381/371472 [2:17:28<28:06:24, 3.39it/s] 8%|▊ | 28382/371472 [2:17:28<27:43:53, 3.44it/s] 8%|▊ | 28383/371472 [2:17:29<27:00:44, 3.53it/s] 8%|▊ | 28384/371472 [2:17:29<27:01:34, 3.53it/s] 8%|▊ | 28385/371472 [2:17:29<28:28:21, 3.35it/s] 8%|▊ | 28386/371472 [2:17:30<27:56:57, 3.41it/s] 8%|▊ | 28387/371472 [2:17:30<27:24:25, 3.48it/s] 8%|▊ | 28388/371472 [2:17:30<27:07:14, 3.51it/s] 8%|▊ | 28389/371472 [2:17:30<26:21:49, 3.61it/s] 8%|▊ | 28390/371472 [2:17:31<27:08:23, 3.51it/s] 8%|▊ | 28391/371472 [2:17:31<27:40:12, 3.44it/s] 8%|▊ | 28392/371472 [2:17:31<27:07:20, 3.51it/s] 8%|▊ | 28393/371472 [2:17:32<27:58:26, 3.41it/s] 8%|▊ | 28394/371472 [2:17:32<27:25:06, 3.48it/s] 8%|▊ | 28395/371472 [2:17:32<28:57:14, 3.29it/s] 8%|▊ | 28396/371472 [2:17:33<29:37:36, 3.22it/s] 8%|▊ | 28397/371472 [2:17:33<30:19:17, 3.14it/s] 8%|▊ | 28398/371472 [2:17:33<31:08:42, 3.06it/s] 8%|▊ | 28399/371472 [2:17:34<29:40:14, 3.21it/s] 8%|▊ | 28400/371472 [2:17:34<28:00:12, 3.40it/s] {'loss': 4.6174, 'learning_rate': 9.316404145747591e-07, 'epoch': 1.22} + 8%|▊ | 28400/371472 [2:17:34<28:00:12, 3.40it/s] 8%|▊ | 28401/371472 [2:17:34<29:43:48, 3.21it/s] 8%|▊ | 28402/371472 [2:17:34<28:20:31, 3.36it/s] 8%|▊ | 28403/371472 [2:17:35<29:18:01, 3.25it/s] 8%|▊ | 28404/371472 [2:17:35<28:37:56, 3.33it/s] 8%|▊ | 28405/371472 [2:17:35<29:30:00, 3.23it/s] 8%|▊ | 28406/371472 [2:17:36<30:18:18, 3.14it/s] 8%|▊ | 28407/371472 [2:17:36<29:23:22, 3.24it/s] 8%|▊ | 28408/371472 [2:17:36<27:41:55, 3.44it/s] 8%|▊ | 28409/371472 [2:17:37<26:32:11, 3.59it/s] 8%|▊ | 28410/371472 [2:17:37<27:05:13, 3.52it/s] 8%|▊ | 28411/371472 [2:17:37<26:18:04, 3.62it/s] 8%|▊ | 28412/371472 [2:17:37<26:03:29, 3.66it/s] 8%|▊ | 28413/371472 [2:17:38<28:58:33, 3.29it/s] 8%|▊ | 28414/371472 [2:17:38<27:56:56, 3.41it/s] 8%|▊ | 28415/371472 [2:17:38<28:38:23, 3.33it/s] 8%|▊ | 28416/371472 [2:17:39<30:47:55, 3.09it/s] 8%|▊ | 28417/371472 [2:17:39<29:49:21, 3.20it/s] 8%|▊ | 28418/371472 [2:17:39<29:03:24, 3.28it/s] 8%|▊ | 28419/371472 [2:17:40<28:03:35, 3.40it/s] 8%|▊ | 28420/371472 [2:17:40<27:05:14, 3.52it/s] {'loss': 4.3189, 'learning_rate': 9.315919325992802e-07, 'epoch': 1.22} + 8%|▊ | 28420/371472 [2:17:40<27:05:14, 3.52it/s] 8%|▊ | 28421/371472 [2:17:40<27:47:28, 3.43it/s] 8%|▊ | 28422/371472 [2:17:40<28:20:16, 3.36it/s] 8%|▊ | 28423/371472 [2:17:41<27:20:24, 3.49it/s] 8%|▊ | 28424/371472 [2:17:41<28:28:17, 3.35it/s] 8%|▊ | 28425/371472 [2:17:41<28:10:54, 3.38it/s] 8%|▊ | 28426/371472 [2:17:42<27:42:20, 3.44it/s] 8%|▊ | 28427/371472 [2:17:42<27:39:08, 3.45it/s] 8%|▊ | 28428/371472 [2:17:42<26:49:41, 3.55it/s] 8%|▊ | 28429/371472 [2:17:42<27:17:59, 3.49it/s] 8%|▊ | 28430/371472 [2:17:43<28:03:54, 3.40it/s] 8%|▊ | 28431/371472 [2:17:43<27:45:19, 3.43it/s] 8%|▊ | 28432/371472 [2:17:43<27:55:15, 3.41it/s] 8%|▊ | 28433/371472 [2:17:44<27:49:24, 3.42it/s] 8%|▊ | 28434/371472 [2:17:44<28:29:28, 3.34it/s] 8%|▊ | 28435/371472 [2:17:44<28:21:45, 3.36it/s] 8%|▊ | 28436/371472 [2:17:44<27:22:00, 3.48it/s] 8%|▊ | 28437/371472 [2:17:45<27:38:20, 3.45it/s] 8%|▊ | 28438/371472 [2:17:45<28:09:38, 3.38it/s] 8%|▊ | 28439/371472 [2:17:45<27:57:04, 3.41it/s] 8%|▊ | 28440/371472 [2:17:46<27:24:50, 3.48it/s] {'loss': 4.2966, 'learning_rate': 9.315434506238014e-07, 'epoch': 1.22} + 8%|▊ | 28440/371472 [2:17:46<27:24:50, 3.48it/s] 8%|▊ | 28441/371472 [2:17:46<26:54:17, 3.54it/s] 8%|▊ | 28442/371472 [2:17:46<26:21:15, 3.62it/s] 8%|▊ | 28443/371472 [2:17:46<25:46:41, 3.70it/s] 8%|▊ | 28444/371472 [2:17:47<25:29:23, 3.74it/s] 8%|▊ | 28445/371472 [2:17:47<26:18:31, 3.62it/s] 8%|▊ | 28446/371472 [2:17:47<26:30:33, 3.59it/s] 8%|▊ | 28447/371472 [2:17:48<28:56:32, 3.29it/s] 8%|▊ | 28448/371472 [2:17:48<28:20:24, 3.36it/s] 8%|▊ | 28449/371472 [2:17:48<27:45:28, 3.43it/s] 8%|▊ | 28450/371472 [2:17:48<26:53:32, 3.54it/s] 8%|▊ | 28451/371472 [2:17:49<28:12:12, 3.38it/s] 8%|▊ | 28452/371472 [2:17:49<27:22:37, 3.48it/s] 8%|▊ | 28453/371472 [2:17:49<26:26:18, 3.60it/s] 8%|▊ | 28454/371472 [2:17:50<27:19:03, 3.49it/s] 8%|▊ | 28455/371472 [2:17:50<26:37:34, 3.58it/s] 8%|▊ | 28456/371472 [2:17:50<26:17:46, 3.62it/s] 8%|▊ | 28457/371472 [2:17:50<25:48:23, 3.69it/s] 8%|▊ | 28458/371472 [2:17:51<27:24:42, 3.48it/s] 8%|▊ | 28459/371472 [2:17:51<26:26:11, 3.60it/s] 8%|▊ | 28460/371472 [2:17:51<26:08:52, 3.64it/s] {'loss': 4.5531, 'learning_rate': 9.314949686483225e-07, 'epoch': 1.23} + 8%|▊ | 28460/371472 [2:17:51<26:08:52, 3.64it/s] 8%|▊ | 28461/371472 [2:17:52<27:01:42, 3.53it/s] 8%|▊ | 28462/371472 [2:17:52<26:52:38, 3.55it/s] 8%|▊ | 28463/371472 [2:17:52<31:15:11, 3.05it/s] 8%|▊ | 28464/371472 [2:17:53<29:27:35, 3.23it/s] 8%|▊ | 28465/371472 [2:17:53<28:11:34, 3.38it/s] 8%|▊ | 28466/371472 [2:17:53<29:10:13, 3.27it/s] 8%|▊ | 28467/371472 [2:17:53<28:40:21, 3.32it/s] 8%|▊ | 28468/371472 [2:17:54<29:29:45, 3.23it/s] 8%|▊ | 28469/371472 [2:17:54<28:42:08, 3.32it/s] 8%|▊ | 28470/371472 [2:17:54<28:42:48, 3.32it/s] 8%|▊ | 28471/371472 [2:17:55<28:09:30, 3.38it/s] 8%|▊ | 28472/371472 [2:17:55<31:06:30, 3.06it/s] 8%|▊ | 28473/371472 [2:17:55<30:34:50, 3.12it/s] 8%|▊ | 28474/371472 [2:17:56<30:27:15, 3.13it/s] 8%|▊ | 28475/371472 [2:17:56<28:35:28, 3.33it/s] 8%|▊ | 28476/371472 [2:17:56<27:05:35, 3.52it/s] 8%|▊ | 28477/371472 [2:17:56<26:40:49, 3.57it/s] 8%|▊ | 28478/371472 [2:17:57<27:34:11, 3.46it/s] 8%|▊ | 28479/371472 [2:17:57<28:45:58, 3.31it/s] 8%|▊ | 28480/371472 [2:17:57<29:17:06, 3.25it/s] {'loss': 4.4412, 'learning_rate': 9.314464866728436e-07, 'epoch': 1.23} + 8%|▊ | 28480/371472 [2:17:57<29:17:06, 3.25it/s] 8%|▊ | 28481/371472 [2:17:58<28:15:18, 3.37it/s] 8%|▊ | 28482/371472 [2:17:58<28:19:22, 3.36it/s] 8%|▊ | 28483/371472 [2:17:58<27:37:03, 3.45it/s] 8%|▊ | 28484/371472 [2:17:58<27:48:57, 3.43it/s] 8%|▊ | 28485/371472 [2:17:59<27:54:38, 3.41it/s] 8%|▊ | 28486/371472 [2:17:59<29:04:38, 3.28it/s] 8%|▊ | 28487/371472 [2:17:59<29:47:12, 3.20it/s] 8%|▊ | 28488/371472 [2:18:00<29:27:30, 3.23it/s] 8%|▊ | 28489/371472 [2:18:00<28:06:09, 3.39it/s] 8%|▊ | 28490/371472 [2:18:00<26:53:52, 3.54it/s] 8%|▊ | 28491/371472 [2:18:01<27:17:55, 3.49it/s] 8%|▊ | 28492/371472 [2:18:01<27:02:24, 3.52it/s] 8%|▊ | 28493/371472 [2:18:01<26:51:14, 3.55it/s] 8%|▊ | 28494/371472 [2:18:01<26:28:41, 3.60it/s] 8%|▊ | 28495/371472 [2:18:02<25:32:41, 3.73it/s] 8%|▊ | 28496/371472 [2:18:02<25:10:49, 3.78it/s] 8%|▊ | 28497/371472 [2:18:02<25:22:33, 3.75it/s] 8%|▊ | 28498/371472 [2:18:02<25:45:57, 3.70it/s] 8%|▊ | 28499/371472 [2:18:03<26:19:40, 3.62it/s] 8%|▊ | 28500/371472 [2:18:03<26:02:06, 3.66it/s] {'loss': 4.3971, 'learning_rate': 9.313980046973647e-07, 'epoch': 1.23} + 8%|▊ | 28500/371472 [2:18:03<26:02:06, 3.66it/s] 8%|▊ | 28501/371472 [2:18:03<26:24:51, 3.61it/s] 8%|▊ | 28502/371472 [2:18:04<26:33:51, 3.59it/s] 8%|▊ | 28503/371472 [2:18:04<26:52:19, 3.55it/s] 8%|▊ | 28504/371472 [2:18:04<25:51:28, 3.68it/s] 8%|▊ | 28505/371472 [2:18:04<26:13:28, 3.63it/s] 8%|▊ | 28506/371472 [2:18:05<27:38:08, 3.45it/s] 8%|▊ | 28507/371472 [2:18:05<27:12:52, 3.50it/s] 8%|▊ | 28508/371472 [2:18:05<27:47:05, 3.43it/s] 8%|▊ | 28509/371472 [2:18:06<28:15:56, 3.37it/s] 8%|▊ | 28510/371472 [2:18:06<27:39:50, 3.44it/s] 8%|▊ | 28511/371472 [2:18:06<28:07:19, 3.39it/s] 8%|▊ | 28512/371472 [2:18:06<28:29:02, 3.34it/s] 8%|▊ | 28513/371472 [2:18:07<28:19:43, 3.36it/s] 8%|▊ | 28514/371472 [2:18:07<28:41:45, 3.32it/s] 8%|▊ | 28515/371472 [2:18:07<27:51:34, 3.42it/s] 8%|▊ | 28516/371472 [2:18:08<27:54:58, 3.41it/s] 8%|▊ | 28517/371472 [2:18:08<29:16:24, 3.25it/s] 8%|▊ | 28518/371472 [2:18:08<27:59:17, 3.40it/s] 8%|▊ | 28519/371472 [2:18:09<29:07:07, 3.27it/s] 8%|▊ | 28520/371472 [2:18:09<28:07:35, 3.39it/s] {'loss': 4.7037, 'learning_rate': 9.313495227218858e-07, 'epoch': 1.23} + 8%|▊ | 28520/371472 [2:18:09<28:07:35, 3.39it/s] 8%|▊ | 28521/371472 [2:18:09<29:58:28, 3.18it/s] 8%|▊ | 28522/371472 [2:18:10<29:03:54, 3.28it/s] 8%|▊ | 28523/371472 [2:18:10<29:29:36, 3.23it/s] 8%|▊ | 28524/371472 [2:18:10<28:41:29, 3.32it/s] 8%|▊ | 28525/371472 [2:18:10<27:29:09, 3.47it/s] 8%|▊ | 28526/371472 [2:18:11<26:16:44, 3.63it/s] 8%|▊ | 28527/371472 [2:18:11<26:25:52, 3.60it/s] 8%|▊ | 28528/371472 [2:18:11<26:03:56, 3.65it/s] 8%|▊ | 28529/371472 [2:18:11<25:58:37, 3.67it/s] 8%|▊ | 28530/371472 [2:18:12<26:14:43, 3.63it/s] 8%|▊ | 28531/371472 [2:18:12<26:57:19, 3.53it/s] 8%|▊ | 28532/371472 [2:18:12<26:42:36, 3.57it/s] 8%|▊ | 28533/371472 [2:18:13<26:39:17, 3.57it/s] 8%|▊ | 28534/371472 [2:18:13<26:43:27, 3.56it/s] 8%|▊ | 28535/371472 [2:18:13<26:46:41, 3.56it/s] 8%|▊ | 28536/371472 [2:18:13<27:05:39, 3.52it/s] 8%|▊ | 28537/371472 [2:18:14<27:10:10, 3.51it/s] 8%|▊ | 28538/371472 [2:18:14<26:41:37, 3.57it/s] 8%|▊ | 28539/371472 [2:18:14<26:40:43, 3.57it/s] 8%|▊ | 28540/371472 [2:18:15<25:53:00, 3.68it/s] {'loss': 4.6395, 'learning_rate': 9.313010407464069e-07, 'epoch': 1.23} + 8%|▊ | 28540/371472 [2:18:15<25:53:00, 3.68it/s] 8%|▊ | 28541/371472 [2:18:15<27:24:18, 3.48it/s] 8%|▊ | 28542/371472 [2:18:15<29:42:32, 3.21it/s] 8%|▊ | 28543/371472 [2:18:15<28:57:44, 3.29it/s] 8%|▊ | 28544/371472 [2:18:16<29:51:27, 3.19it/s] 8%|▊ | 28545/371472 [2:18:16<28:38:08, 3.33it/s] 8%|▊ | 28546/371472 [2:18:16<29:37:36, 3.22it/s] 8%|▊ | 28547/371472 [2:18:17<29:02:02, 3.28it/s] 8%|▊ | 28548/371472 [2:18:17<29:07:08, 3.27it/s] 8%|▊ | 28549/371472 [2:18:17<29:18:12, 3.25it/s] 8%|▊ | 28550/371472 [2:18:18<29:15:06, 3.26it/s] 8%|▊ | 28551/371472 [2:18:18<27:30:11, 3.46it/s] 8%|▊ | 28552/371472 [2:18:18<26:50:39, 3.55it/s] 8%|▊ | 28553/371472 [2:18:18<26:47:05, 3.56it/s] 8%|▊ | 28554/371472 [2:18:19<29:28:20, 3.23it/s] 8%|▊ | 28555/371472 [2:18:19<29:40:00, 3.21it/s] 8%|▊ | 28556/371472 [2:18:19<29:06:08, 3.27it/s] 8%|▊ | 28557/371472 [2:18:20<28:45:16, 3.31it/s] 8%|▊ | 28558/371472 [2:18:20<27:11:08, 3.50it/s] 8%|▊ | 28559/371472 [2:18:20<25:59:06, 3.67it/s] 8%|▊ | 28560/371472 [2:18:20<25:57:29, 3.67it/s] {'loss': 4.5955, 'learning_rate': 9.31252558770928e-07, 'epoch': 1.23} + 8%|▊ | 28560/371472 [2:18:20<25:57:29, 3.67it/s] 8%|▊ | 28561/371472 [2:18:21<26:11:32, 3.64it/s] 8%|▊ | 28562/371472 [2:18:21<26:30:53, 3.59it/s] 8%|▊ | 28563/371472 [2:18:21<27:48:43, 3.42it/s] 8%|▊ | 28564/371472 [2:18:22<27:01:47, 3.52it/s] 8%|▊ | 28565/371472 [2:18:22<26:42:00, 3.57it/s] 8%|▊ | 28566/371472 [2:18:22<26:41:13, 3.57it/s] 8%|▊ | 28567/371472 [2:18:22<26:47:38, 3.55it/s] 8%|▊ | 28568/371472 [2:18:23<26:30:55, 3.59it/s] 8%|▊ | 28569/371472 [2:18:23<26:19:44, 3.62it/s] 8%|▊ | 28570/371472 [2:18:23<26:51:46, 3.55it/s] 8%|▊ | 28571/371472 [2:18:24<26:23:55, 3.61it/s] 8%|▊ | 28572/371472 [2:18:24<28:00:49, 3.40it/s] 8%|▊ | 28573/371472 [2:18:24<28:15:25, 3.37it/s] 8%|▊ | 28574/371472 [2:18:24<28:06:26, 3.39it/s] 8%|▊ | 28575/371472 [2:18:25<27:53:43, 3.41it/s] 8%|▊ | 28576/371472 [2:18:25<27:59:12, 3.40it/s] 8%|▊ | 28577/371472 [2:18:25<27:57:10, 3.41it/s] 8%|▊ | 28578/371472 [2:18:26<28:16:22, 3.37it/s] 8%|▊ | 28579/371472 [2:18:26<27:55:12, 3.41it/s] 8%|▊ | 28580/371472 [2:18:26<26:55:27, 3.54it/s] {'loss': 4.3676, 'learning_rate': 9.312040767954491e-07, 'epoch': 1.23} + 8%|▊ | 28580/371472 [2:18:26<26:55:27, 3.54it/s] 8%|▊ | 28581/371472 [2:18:27<29:38:11, 3.21it/s] 8%|▊ | 28582/371472 [2:18:27<31:03:53, 3.07it/s] 8%|▊ | 28583/371472 [2:18:27<30:25:34, 3.13it/s] 8%|▊ | 28584/371472 [2:18:28<31:19:14, 3.04it/s] 8%|▊ | 28585/371472 [2:18:28<30:24:31, 3.13it/s] 8%|▊ | 28586/371472 [2:18:28<30:58:15, 3.08it/s] 8%|▊ | 28587/371472 [2:18:29<29:26:51, 3.23it/s] 8%|▊ | 28588/371472 [2:18:29<28:48:28, 3.31it/s] 8%|▊ | 28589/371472 [2:18:29<28:00:32, 3.40it/s] 8%|▊ | 28590/371472 [2:18:29<27:06:59, 3.51it/s] 8%|▊ | 28591/371472 [2:18:30<26:23:29, 3.61it/s] 8%|▊ | 28592/371472 [2:18:30<28:09:26, 3.38it/s] 8%|▊ | 28593/371472 [2:18:30<27:16:04, 3.49it/s] 8%|▊ | 28594/371472 [2:18:30<27:13:10, 3.50it/s] 8%|▊ | 28595/371472 [2:18:31<26:47:19, 3.56it/s] 8%|▊ | 28596/371472 [2:18:31<27:29:57, 3.46it/s] 8%|▊ | 28597/371472 [2:18:31<27:03:23, 3.52it/s] 8%|▊ | 28598/371472 [2:18:32<27:08:17, 3.51it/s] 8%|▊ | 28599/371472 [2:18:32<27:43:01, 3.44it/s] 8%|▊ | 28600/371472 [2:18:32<26:49:27, 3.55it/s] {'loss': 4.6028, 'learning_rate': 9.311555948199703e-07, 'epoch': 1.23} + 8%|▊ | 28600/371472 [2:18:32<26:49:27, 3.55it/s] 8%|▊ | 28601/371472 [2:18:32<25:57:16, 3.67it/s] 8%|▊ | 28602/371472 [2:18:33<29:41:00, 3.21it/s] 8%|▊ | 28603/371472 [2:18:33<29:27:07, 3.23it/s] 8%|▊ | 28604/371472 [2:18:33<29:12:04, 3.26it/s] 8%|▊ | 28605/371472 [2:18:34<28:07:37, 3.39it/s] 8%|▊ | 28606/371472 [2:18:34<28:26:00, 3.35it/s] 8%|▊ | 28607/371472 [2:18:34<29:04:14, 3.28it/s] 8%|▊ | 28608/371472 [2:18:35<28:34:33, 3.33it/s] 8%|▊ | 28609/371472 [2:18:35<28:36:27, 3.33it/s] 8%|▊ | 28610/371472 [2:18:35<27:05:38, 3.52it/s] 8%|▊ | 28611/371472 [2:18:36<29:26:59, 3.23it/s] 8%|▊ | 28612/371472 [2:18:36<28:59:49, 3.28it/s] 8%|▊ | 28613/371472 [2:18:36<29:18:20, 3.25it/s] 8%|▊ | 28614/371472 [2:18:36<28:37:01, 3.33it/s] 8%|▊ | 28615/371472 [2:18:37<30:06:05, 3.16it/s] 8%|▊ | 28616/371472 [2:18:37<31:16:56, 3.04it/s] 8%|▊ | 28617/371472 [2:18:37<31:43:20, 3.00it/s] 8%|▊ | 28618/371472 [2:18:38<29:54:54, 3.18it/s] 8%|▊ | 28619/371472 [2:18:38<28:24:48, 3.35it/s] 8%|▊ | 28620/371472 [2:18:38<27:32:22, 3.46it/s] {'loss': 4.3803, 'learning_rate': 9.311071128444914e-07, 'epoch': 1.23} + 8%|▊ | 28620/371472 [2:18:38<27:32:22, 3.46it/s] 8%|▊ | 28621/371472 [2:18:39<28:09:01, 3.38it/s] 8%|▊ | 28622/371472 [2:18:39<27:46:39, 3.43it/s] 8%|▊ | 28623/371472 [2:18:39<27:05:26, 3.52it/s] 8%|▊ | 28624/371472 [2:18:39<26:31:36, 3.59it/s] 8%|▊ | 28625/371472 [2:18:40<26:23:34, 3.61it/s] 8%|▊ | 28626/371472 [2:18:40<26:18:06, 3.62it/s] 8%|▊ | 28627/371472 [2:18:40<26:38:39, 3.57it/s] 8%|▊ | 28628/371472 [2:18:41<26:08:36, 3.64it/s] 8%|▊ | 28629/371472 [2:18:41<26:56:44, 3.53it/s] 8%|▊ | 28630/371472 [2:18:41<26:05:51, 3.65it/s] 8%|▊ | 28631/371472 [2:18:41<25:30:08, 3.73it/s] 8%|▊ | 28632/371472 [2:18:42<26:24:11, 3.61it/s] 8%|▊ | 28633/371472 [2:18:42<26:24:59, 3.61it/s] 8%|▊ | 28634/371472 [2:18:42<29:33:20, 3.22it/s] 8%|▊ | 28635/371472 [2:18:43<28:21:21, 3.36it/s] 8%|▊ | 28636/371472 [2:18:43<27:43:42, 3.43it/s] 8%|▊ | 28637/371472 [2:18:43<27:18:02, 3.49it/s] 8%|▊ | 28638/371472 [2:18:43<27:22:55, 3.48it/s] 8%|▊ | 28639/371472 [2:18:44<26:27:16, 3.60it/s] 8%|▊ | 28640/371472 [2:18:44<28:01:40, 3.40it/s] {'loss': 4.583, 'learning_rate': 9.310586308690123e-07, 'epoch': 1.23} + 8%|▊ | 28640/371472 [2:18:44<28:01:40, 3.40it/s] 8%|▊ | 28641/371472 [2:18:44<26:49:54, 3.55it/s] 8%|▊ | 28642/371472 [2:18:45<26:44:04, 3.56it/s] 8%|▊ | 28643/371472 [2:18:45<26:43:48, 3.56it/s] 8%|▊ | 28644/371472 [2:18:45<26:20:41, 3.61it/s] 8%|▊ | 28645/371472 [2:18:45<26:27:30, 3.60it/s] 8%|▊ | 28646/371472 [2:18:46<25:56:24, 3.67it/s] 8%|▊ | 28647/371472 [2:18:46<28:16:32, 3.37it/s] 8%|▊ | 28648/371472 [2:18:46<29:25:12, 3.24it/s] 8%|▊ | 28649/371472 [2:18:47<27:34:22, 3.45it/s] 8%|▊ | 28650/371472 [2:18:47<29:40:15, 3.21it/s] 8%|▊ | 28651/371472 [2:18:47<28:18:37, 3.36it/s] 8%|▊ | 28652/371472 [2:18:47<26:54:08, 3.54it/s] 8%|▊ | 28653/371472 [2:18:48<27:14:34, 3.50it/s] 8%|▊ | 28654/371472 [2:18:48<26:49:04, 3.55it/s] 8%|▊ | 28655/371472 [2:18:48<27:41:31, 3.44it/s] 8%|▊ | 28656/371472 [2:18:49<27:35:04, 3.45it/s] 8%|▊ | 28657/371472 [2:18:49<26:45:32, 3.56it/s] 8%|▊ | 28658/371472 [2:18:49<29:25:13, 3.24it/s] 8%|▊ | 28659/371472 [2:18:49<28:26:21, 3.35it/s] 8%|▊ | 28660/371472 [2:18:50<27:52:14, 3.42it/s] {'loss': 4.5455, 'learning_rate': 9.310101488935335e-07, 'epoch': 1.23} + 8%|▊ | 28660/371472 [2:18:50<27:52:14, 3.42it/s] 8%|▊ | 28661/371472 [2:18:50<26:39:14, 3.57it/s] 8%|▊ | 28662/371472 [2:18:50<28:05:45, 3.39it/s] 8%|▊ | 28663/371472 [2:18:51<27:32:18, 3.46it/s] 8%|▊ | 28664/371472 [2:18:51<27:42:07, 3.44it/s] 8%|▊ | 28665/371472 [2:18:51<28:35:05, 3.33it/s] 8%|▊ | 28666/371472 [2:18:52<27:32:06, 3.46it/s] 8%|▊ | 28667/371472 [2:18:52<26:48:30, 3.55it/s] 8%|▊ | 28668/371472 [2:18:52<26:07:29, 3.64it/s] 8%|▊ | 28669/371472 [2:18:52<25:58:19, 3.67it/s] 8%|▊ | 28670/371472 [2:18:53<25:49:10, 3.69it/s] 8%|▊ | 28671/371472 [2:18:53<26:27:30, 3.60it/s] 8%|▊ | 28672/371472 [2:18:53<26:34:53, 3.58it/s] 8%|▊ | 28673/371472 [2:18:53<27:26:22, 3.47it/s] 8%|▊ | 28674/371472 [2:18:54<28:32:54, 3.34it/s] 8%|▊ | 28675/371472 [2:18:54<27:47:41, 3.43it/s] 8%|▊ | 28676/371472 [2:18:54<26:36:14, 3.58it/s] 8%|▊ | 28677/371472 [2:18:55<27:19:22, 3.49it/s] 8%|▊ | 28678/371472 [2:18:55<27:32:25, 3.46it/s] 8%|▊ | 28679/371472 [2:18:55<28:38:45, 3.32it/s] 8%|▊ | 28680/371472 [2:18:56<28:14:59, 3.37it/s] {'loss': 4.4278, 'learning_rate': 9.309616669180547e-07, 'epoch': 1.24} + 8%|▊ | 28680/371472 [2:18:56<28:14:59, 3.37it/s] 8%|▊ | 28681/371472 [2:18:56<27:37:37, 3.45it/s] 8%|▊ | 28682/371472 [2:18:56<26:31:58, 3.59it/s] 8%|▊ | 28683/371472 [2:18:56<30:01:10, 3.17it/s] 8%|▊ | 28684/371472 [2:18:57<29:07:59, 3.27it/s] 8%|▊ | 28685/371472 [2:18:57<27:47:23, 3.43it/s] 8%|▊ | 28686/371472 [2:18:57<27:21:21, 3.48it/s] 8%|▊ | 28687/371472 [2:18:58<26:34:51, 3.58it/s] 8%|▊ | 28688/371472 [2:18:58<28:38:05, 3.33it/s] 8%|▊ | 28689/371472 [2:18:58<27:36:18, 3.45it/s] 8%|▊ | 28690/371472 [2:18:58<27:19:33, 3.48it/s] 8%|▊ | 28691/371472 [2:18:59<27:42:33, 3.44it/s] 8%|▊ | 28692/371472 [2:18:59<26:45:25, 3.56it/s] 8%|▊ | 28693/371472 [2:18:59<27:23:00, 3.48it/s] 8%|▊ | 28694/371472 [2:19:00<27:07:51, 3.51it/s] 8%|▊ | 28695/371472 [2:19:00<27:49:46, 3.42it/s] 8%|▊ | 28696/371472 [2:19:00<26:41:31, 3.57it/s] 8%|▊ | 28697/371472 [2:19:00<26:30:29, 3.59it/s] 8%|▊ | 28698/371472 [2:19:01<27:18:35, 3.49it/s] 8%|▊ | 28699/371472 [2:19:01<26:40:18, 3.57it/s] 8%|▊ | 28700/371472 [2:19:01<26:28:53, 3.60it/s] {'loss': 4.3653, 'learning_rate': 9.309131849425757e-07, 'epoch': 1.24} + 8%|▊ | 28700/371472 [2:19:01<26:28:53, 3.60it/s] 8%|▊ | 28701/371472 [2:19:02<26:44:26, 3.56it/s] 8%|▊ | 28702/371472 [2:19:02<28:52:16, 3.30it/s] 8%|▊ | 28703/371472 [2:19:02<30:40:06, 3.10it/s] 8%|▊ | 28704/371472 [2:19:03<30:13:20, 3.15it/s] 8%|▊ | 28705/371472 [2:19:03<28:54:28, 3.29it/s] 8%|▊ | 28706/371472 [2:19:03<30:18:02, 3.14it/s] 8%|▊ | 28707/371472 [2:19:03<30:05:13, 3.16it/s] 8%|▊ | 28708/371472 [2:19:04<31:11:27, 3.05it/s] 8%|▊ | 28709/371472 [2:19:04<29:28:40, 3.23it/s] 8%|▊ | 28710/371472 [2:19:04<28:17:38, 3.37it/s] 8%|▊ | 28711/371472 [2:19:05<28:37:17, 3.33it/s] 8%|▊ | 28712/371472 [2:19:05<27:49:18, 3.42it/s] 8%|▊ | 28713/371472 [2:19:05<28:21:36, 3.36it/s] 8%|▊ | 28714/371472 [2:19:06<27:06:58, 3.51it/s] 8%|▊ | 28715/371472 [2:19:06<26:24:25, 3.61it/s] 8%|▊ | 28716/371472 [2:19:06<26:13:20, 3.63it/s] 8%|▊ | 28717/371472 [2:19:06<25:52:14, 3.68it/s] 8%|▊ | 28718/371472 [2:19:07<27:53:01, 3.41it/s] 8%|▊ | 28719/371472 [2:19:07<27:16:05, 3.49it/s] 8%|▊ | 28720/371472 [2:19:07<27:45:55, 3.43it/s] {'loss': 4.642, 'learning_rate': 9.308647029670968e-07, 'epoch': 1.24} + 8%|▊ | 28720/371472 [2:19:07<27:45:55, 3.43it/s] 8%|▊ | 28721/371472 [2:19:08<27:51:38, 3.42it/s] 8%|▊ | 28722/371472 [2:19:08<27:32:01, 3.46it/s] 8%|▊ | 28723/371472 [2:19:08<27:37:40, 3.45it/s] 8%|▊ | 28724/371472 [2:19:08<26:51:05, 3.55it/s] 8%|▊ | 28725/371472 [2:19:09<27:33:41, 3.45it/s] 8%|▊ | 28726/371472 [2:19:09<27:01:51, 3.52it/s] 8%|▊ | 28727/371472 [2:19:09<27:13:51, 3.50it/s] 8%|▊ | 28728/371472 [2:19:10<27:07:28, 3.51it/s] 8%|▊ | 28729/371472 [2:19:10<26:42:56, 3.56it/s] 8%|▊ | 28730/371472 [2:19:10<30:48:49, 3.09it/s] 8%|▊ | 28731/371472 [2:19:10<29:37:31, 3.21it/s] 8%|▊ | 28732/371472 [2:19:11<28:11:28, 3.38it/s] 8%|▊ | 28733/371472 [2:19:11<27:33:41, 3.45it/s] 8%|▊ | 28734/371472 [2:19:11<26:54:19, 3.54it/s] 8%|▊ | 28735/371472 [2:19:12<26:20:52, 3.61it/s] 8%|▊ | 28736/371472 [2:19:12<25:36:14, 3.72it/s] 8%|▊ | 28737/371472 [2:19:12<26:28:46, 3.60it/s] 8%|▊ | 28738/371472 [2:19:12<26:48:57, 3.55it/s] 8%|▊ | 28739/371472 [2:19:13<28:01:02, 3.40it/s] 8%|▊ | 28740/371472 [2:19:13<27:03:25, 3.52it/s] {'loss': 4.4583, 'learning_rate': 9.308162209916181e-07, 'epoch': 1.24} + 8%|▊ | 28740/371472 [2:19:13<27:03:25, 3.52it/s] 8%|▊ | 28741/371472 [2:19:13<27:36:52, 3.45it/s] 8%|▊ | 28742/371472 [2:19:14<27:15:43, 3.49it/s] 8%|▊ | 28743/371472 [2:19:14<26:55:49, 3.54it/s] 8%|▊ | 28744/371472 [2:19:14<26:31:55, 3.59it/s] 8%|▊ | 28745/371472 [2:19:14<26:41:23, 3.57it/s] 8%|▊ | 28746/371472 [2:19:15<27:01:29, 3.52it/s] 8%|▊ | 28747/371472 [2:19:15<26:08:34, 3.64it/s] 8%|▊ | 28748/371472 [2:19:15<27:06:56, 3.51it/s] 8%|▊ | 28749/371472 [2:19:16<27:19:39, 3.48it/s] 8%|▊ | 28750/371472 [2:19:16<28:36:45, 3.33it/s] 8%|▊ | 28751/371472 [2:19:16<30:41:53, 3.10it/s] 8%|▊ | 28752/371472 [2:19:17<30:39:17, 3.11it/s] 8%|▊ | 28753/371472 [2:19:17<30:37:05, 3.11it/s] 8%|▊ | 28754/371472 [2:19:17<29:33:33, 3.22it/s] 8%|▊ | 28755/371472 [2:19:18<30:34:31, 3.11it/s] 8%|▊ | 28756/371472 [2:19:18<29:17:10, 3.25it/s] 8%|▊ | 28757/371472 [2:19:18<27:36:31, 3.45it/s] 8%|▊ | 28758/371472 [2:19:18<27:33:29, 3.45it/s] 8%|▊ | 28759/371472 [2:19:19<27:52:12, 3.42it/s] 8%|▊ | 28760/371472 [2:19:19<27:13:40, 3.50it/s] {'loss': 4.3084, 'learning_rate': 9.307677390161391e-07, 'epoch': 1.24} + 8%|▊ | 28760/371472 [2:19:19<27:13:40, 3.50it/s] 8%|▊ | 28761/371472 [2:19:19<28:38:26, 3.32it/s] 8%|▊ | 28762/371472 [2:19:19<27:08:29, 3.51it/s] 8%|▊ | 28763/371472 [2:19:20<27:59:55, 3.40it/s] 8%|▊ | 28764/371472 [2:19:20<27:44:41, 3.43it/s] 8%|▊ | 28765/371472 [2:19:20<28:49:40, 3.30it/s] 8%|▊ | 28766/371472 [2:19:21<27:59:02, 3.40it/s] 8%|▊ | 28767/371472 [2:19:21<29:03:44, 3.28it/s] 8%|▊ | 28768/371472 [2:19:21<28:08:01, 3.38it/s] 8%|▊ | 28769/371472 [2:19:22<27:14:43, 3.49it/s] 8%|▊ | 28770/371472 [2:19:22<27:54:43, 3.41it/s] 8%|▊ | 28771/371472 [2:19:22<27:59:29, 3.40it/s] 8%|▊ | 28772/371472 [2:19:22<27:51:28, 3.42it/s] 8%|▊ | 28773/371472 [2:19:23<28:19:18, 3.36it/s] 8%|▊ | 28774/371472 [2:19:23<28:22:00, 3.36it/s] 8%|▊ | 28775/371472 [2:19:23<27:48:55, 3.42it/s] 8%|▊ | 28776/371472 [2:19:24<27:48:20, 3.42it/s] 8%|▊ | 28777/371472 [2:19:24<27:31:02, 3.46it/s] 8%|▊ | 28778/371472 [2:19:24<27:11:00, 3.50it/s] 8%|▊ | 28779/371472 [2:19:24<26:32:22, 3.59it/s] 8%|▊ | 28780/371472 [2:19:25<26:20:14, 3.61it/s] {'loss': 4.2672, 'learning_rate': 9.307192570406601e-07, 'epoch': 1.24} + 8%|▊ | 28780/371472 [2:19:25<26:20:14, 3.61it/s] 8%|▊ | 28781/371472 [2:19:25<26:16:49, 3.62it/s] 8%|▊ | 28782/371472 [2:19:25<27:29:35, 3.46it/s] 8%|▊ | 28783/371472 [2:19:26<29:04:16, 3.27it/s] 8%|▊ | 28784/371472 [2:19:26<28:11:34, 3.38it/s] 8%|▊ | 28785/371472 [2:19:26<27:36:10, 3.45it/s] 8%|▊ | 28786/371472 [2:19:26<27:09:30, 3.51it/s] 8%|▊ | 28787/371472 [2:19:27<27:00:05, 3.53it/s] 8%|▊ | 28788/371472 [2:19:27<25:50:02, 3.68it/s] 8%|▊ | 28789/371472 [2:19:27<26:01:36, 3.66it/s] 8%|▊ | 28790/371472 [2:19:28<27:34:55, 3.45it/s] 8%|▊ | 28791/371472 [2:19:28<29:02:16, 3.28it/s] 8%|▊ | 28792/371472 [2:19:28<28:23:07, 3.35it/s] 8%|▊ | 28793/371472 [2:19:29<27:58:43, 3.40it/s] 8%|▊ | 28794/371472 [2:19:29<30:02:17, 3.17it/s] 8%|▊ | 28795/371472 [2:19:29<28:51:44, 3.30it/s] 8%|▊ | 28796/371472 [2:19:30<30:32:13, 3.12it/s] 8%|▊ | 28797/371472 [2:19:30<30:00:58, 3.17it/s] 8%|▊ | 28798/371472 [2:19:30<30:05:51, 3.16it/s] 8%|▊ | 28799/371472 [2:19:30<30:21:04, 3.14it/s] 8%|▊ | 28800/371472 [2:19:31<30:13:01, 3.15it/s] {'loss': 4.5146, 'learning_rate': 9.306707750651812e-07, 'epoch': 1.24} + 8%|▊ | 28800/371472 [2:19:31<30:13:01, 3.15it/s] 8%|▊ | 28801/371472 [2:19:31<29:04:50, 3.27it/s] 8%|▊ | 28802/371472 [2:19:31<27:33:34, 3.45it/s] 8%|▊ | 28803/371472 [2:19:32<27:56:06, 3.41it/s] 8%|▊ | 28804/371472 [2:19:32<27:13:10, 3.50it/s] 8%|▊ | 28805/371472 [2:19:32<26:28:35, 3.60it/s] 8%|▊ | 28806/371472 [2:19:32<26:50:11, 3.55it/s] 8%|▊ | 28807/371472 [2:19:33<26:13:20, 3.63it/s] 8%|▊ | 28808/371472 [2:19:33<25:26:42, 3.74it/s] 8%|▊ | 28809/371472 [2:19:33<26:02:52, 3.65it/s] 8%|▊ | 28810/371472 [2:19:33<25:32:48, 3.73it/s] 8%|▊ | 28811/371472 [2:19:34<26:37:13, 3.58it/s] 8%|▊ | 28812/371472 [2:19:34<26:57:20, 3.53it/s] 8%|▊ | 28813/371472 [2:19:34<26:13:41, 3.63it/s] 8%|▊ | 28814/371472 [2:19:35<26:09:11, 3.64it/s] 8%|▊ | 28815/371472 [2:19:35<25:39:27, 3.71it/s] 8%|▊ | 28816/371472 [2:19:35<25:47:18, 3.69it/s] 8%|▊ | 28817/371472 [2:19:35<26:44:53, 3.56it/s] 8%|▊ | 28818/371472 [2:19:36<26:07:34, 3.64it/s] 8%|▊ | 28819/371472 [2:19:36<26:02:28, 3.66it/s] 8%|▊ | 28820/371472 [2:19:36<25:03:52, 3.80it/s] {'loss': 4.5027, 'learning_rate': 9.306222930897024e-07, 'epoch': 1.24} + 8%|▊ | 28820/371472 [2:19:36<25:03:52, 3.80it/s] 8%|▊ | 28821/371472 [2:19:36<24:58:03, 3.81it/s] 8%|▊ | 28822/371472 [2:19:37<24:19:02, 3.91it/s] 8%|▊ | 28823/371472 [2:19:37<24:28:33, 3.89it/s] 8%|▊ | 28824/371472 [2:19:37<25:36:10, 3.72it/s] 8%|▊ | 28825/371472 [2:19:38<27:12:43, 3.50it/s] 8%|▊ | 28826/371472 [2:19:38<26:46:34, 3.55it/s] 8%|▊ | 28827/371472 [2:19:38<27:34:52, 3.45it/s] 8%|▊ | 28828/371472 [2:19:38<27:44:31, 3.43it/s] 8%|▊ | 28829/371472 [2:19:39<28:24:12, 3.35it/s] 8%|▊ | 28830/371472 [2:19:39<28:03:32, 3.39it/s] 8%|▊ | 28831/371472 [2:19:39<28:12:13, 3.37it/s] 8%|▊ | 28832/371472 [2:19:40<28:15:53, 3.37it/s] 8%|▊ | 28833/371472 [2:19:40<27:18:56, 3.48it/s] 8%|▊ | 28834/371472 [2:19:40<27:30:45, 3.46it/s] 8%|▊ | 28835/371472 [2:19:41<27:11:39, 3.50it/s] 8%|▊ | 28836/371472 [2:19:41<27:07:29, 3.51it/s] 8%|▊ | 28837/371472 [2:19:41<26:16:42, 3.62it/s] 8%|▊ | 28838/371472 [2:19:41<27:06:57, 3.51it/s] 8%|▊ | 28839/371472 [2:19:42<26:39:06, 3.57it/s] 8%|▊ | 28840/371472 [2:19:42<26:59:43, 3.53it/s] {'loss': 4.4661, 'learning_rate': 9.305738111142235e-07, 'epoch': 1.24} + 8%|▊ | 28840/371472 [2:19:42<26:59:43, 3.53it/s] 8%|▊ | 28841/371472 [2:19:42<26:56:46, 3.53it/s] 8%|▊ | 28842/371472 [2:19:42<26:53:37, 3.54it/s] 8%|▊ | 28843/371472 [2:19:43<26:47:18, 3.55it/s] 8%|▊ | 28844/371472 [2:19:43<26:28:23, 3.60it/s] 8%|▊ | 28845/371472 [2:19:43<26:46:29, 3.55it/s] 8%|▊ | 28846/371472 [2:19:44<27:05:38, 3.51it/s] 8%|▊ | 28847/371472 [2:19:44<26:21:36, 3.61it/s] 8%|▊ | 28848/371472 [2:19:44<25:58:05, 3.67it/s] 8%|▊ | 28849/371472 [2:19:44<26:13:19, 3.63it/s] 8%|▊ | 28850/371472 [2:19:45<28:30:01, 3.34it/s] 8%|▊ | 28851/371472 [2:19:45<26:54:04, 3.54it/s] 8%|▊ | 28852/371472 [2:19:45<27:04:38, 3.51it/s] 8%|▊ | 28853/371472 [2:19:46<28:41:00, 3.32it/s] 8%|▊ | 28854/371472 [2:19:46<27:41:24, 3.44it/s] 8%|▊ | 28855/371472 [2:19:46<26:55:10, 3.54it/s] 8%|▊ | 28856/371472 [2:19:46<26:40:40, 3.57it/s] 8%|▊ | 28857/371472 [2:19:47<26:11:59, 3.63it/s] 8%|▊ | 28858/371472 [2:19:47<27:35:37, 3.45it/s] 8%|▊ | 28859/371472 [2:19:47<27:20:38, 3.48it/s] 8%|▊ | 28860/371472 [2:19:48<26:50:44, 3.55it/s] {'loss': 4.7078, 'learning_rate': 9.305253291387446e-07, 'epoch': 1.24} + 8%|▊ | 28860/371472 [2:19:48<26:50:44, 3.55it/s] 8%|▊ | 28861/371472 [2:19:48<26:18:30, 3.62it/s] 8%|▊ | 28862/371472 [2:19:48<26:31:00, 3.59it/s] 8%|▊ | 28863/371472 [2:19:48<25:55:09, 3.67it/s] 8%|▊ | 28864/371472 [2:19:49<27:41:50, 3.44it/s] 8%|▊ | 28865/371472 [2:19:49<26:35:25, 3.58it/s] 8%|▊ | 28866/371472 [2:19:49<25:55:39, 3.67it/s] 8%|▊ | 28867/371472 [2:19:50<27:07:28, 3.51it/s] 8%|▊ | 28868/371472 [2:19:50<26:29:38, 3.59it/s] 8%|▊ | 28869/371472 [2:19:50<27:24:17, 3.47it/s] 8%|▊ | 28870/371472 [2:19:51<34:45:57, 2.74it/s] 8%|▊ | 28871/371472 [2:19:51<32:05:37, 2.97it/s] 8%|▊ | 28872/371472 [2:19:51<29:56:34, 3.18it/s] 8%|▊ | 28873/371472 [2:19:52<34:44:28, 2.74it/s] 8%|▊ | 28874/371472 [2:19:52<32:46:59, 2.90it/s] 8%|▊ | 28875/371472 [2:19:52<30:20:29, 3.14it/s] 8%|▊ | 28876/371472 [2:19:53<29:26:53, 3.23it/s] 8%|▊ | 28877/371472 [2:19:53<29:42:08, 3.20it/s] 8%|▊ | 28878/371472 [2:19:53<28:21:03, 3.36it/s] 8%|▊ | 28879/371472 [2:19:53<28:04:32, 3.39it/s] 8%|▊ | 28880/371472 [2:19:54<26:44:34, 3.56it/s] {'loss': 4.4902, 'learning_rate': 9.304768471632657e-07, 'epoch': 1.24} + 8%|▊ | 28880/371472 [2:19:54<26:44:34, 3.56it/s] 8%|▊ | 28881/371472 [2:19:54<26:13:10, 3.63it/s] 8%|▊ | 28882/371472 [2:19:54<27:16:41, 3.49it/s] 8%|▊ | 28883/371472 [2:19:55<27:58:07, 3.40it/s] 8%|▊ | 28884/371472 [2:19:55<27:25:27, 3.47it/s] 8%|▊ | 28885/371472 [2:19:55<26:33:59, 3.58it/s] 8%|▊ | 28886/371472 [2:19:55<26:10:11, 3.64it/s] 8%|▊ | 28887/371472 [2:19:56<26:42:09, 3.56it/s] 8%|▊ | 28888/371472 [2:19:56<26:44:58, 3.56it/s] 8%|▊ | 28889/371472 [2:19:56<26:12:05, 3.63it/s] 8%|▊ | 28890/371472 [2:19:56<25:35:28, 3.72it/s] 8%|▊ | 28891/371472 [2:19:57<26:21:43, 3.61it/s] 8%|▊ | 28892/371472 [2:19:57<26:52:30, 3.54it/s] 8%|▊ | 28893/371472 [2:19:57<26:27:43, 3.60it/s] 8%|▊ | 28894/371472 [2:19:58<26:19:15, 3.62it/s] 8%|▊ | 28895/371472 [2:19:58<27:10:23, 3.50it/s] 8%|▊ | 28896/371472 [2:19:58<27:45:34, 3.43it/s] 8%|▊ | 28897/371472 [2:19:58<27:38:34, 3.44it/s] 8%|▊ | 28898/371472 [2:19:59<26:49:16, 3.55it/s] 8%|▊ | 28899/371472 [2:19:59<26:50:34, 3.55it/s] 8%|▊ | 28900/371472 [2:19:59<26:31:12, 3.59it/s] {'loss': 4.5009, 'learning_rate': 9.304283651877868e-07, 'epoch': 1.24} + 8%|▊ | 28900/371472 [2:19:59<26:31:12, 3.59it/s] 8%|▊ | 28901/371472 [2:20:00<26:15:55, 3.62it/s] 8%|▊ | 28902/371472 [2:20:00<26:20:13, 3.61it/s] 8%|▊ | 28903/371472 [2:20:00<26:33:39, 3.58it/s] 8%|▊ | 28904/371472 [2:20:00<25:30:28, 3.73it/s] 8%|▊ | 28905/371472 [2:20:01<26:46:38, 3.55it/s] 8%|▊ | 28906/371472 [2:20:01<26:55:42, 3.53it/s] 8%|▊ | 28907/371472 [2:20:01<28:15:51, 3.37it/s] 8%|▊ | 28908/371472 [2:20:02<27:31:06, 3.46it/s] 8%|▊ | 28909/371472 [2:20:02<30:47:42, 3.09it/s] 8%|▊ | 28910/371472 [2:20:02<30:31:16, 3.12it/s] 8%|▊ | 28911/371472 [2:20:03<31:07:29, 3.06it/s] 8%|▊ | 28912/371472 [2:20:03<29:15:01, 3.25it/s] 8%|▊ | 28913/371472 [2:20:03<28:15:07, 3.37it/s] 8%|▊ | 28914/371472 [2:20:03<28:52:19, 3.30it/s] 8%|▊ | 28915/371472 [2:20:04<27:30:25, 3.46it/s] 8%|▊ | 28916/371472 [2:20:04<29:09:47, 3.26it/s] 8%|▊ | 28917/371472 [2:20:04<28:27:44, 3.34it/s] 8%|▊ | 28918/371472 [2:20:05<29:56:27, 3.18it/s] 8%|▊ | 28919/371472 [2:20:05<28:43:21, 3.31it/s] 8%|▊ | 28920/371472 [2:20:05<28:48:54, 3.30it/s] {'loss': 4.5814, 'learning_rate': 9.303798832123079e-07, 'epoch': 1.25} + 8%|▊ | 28920/371472 [2:20:05<28:48:54, 3.30it/s] 8%|▊ | 28921/371472 [2:20:06<28:58:22, 3.28it/s] 8%|▊ | 28922/371472 [2:20:06<28:43:45, 3.31it/s] 8%|▊ | 28923/371472 [2:20:06<28:01:14, 3.40it/s] 8%|▊ | 28924/371472 [2:20:07<29:31:50, 3.22it/s] 8%|▊ | 28925/371472 [2:20:07<28:35:33, 3.33it/s] 8%|▊ | 28926/371472 [2:20:07<27:29:26, 3.46it/s] 8%|▊ | 28927/371472 [2:20:07<29:14:29, 3.25it/s] 8%|▊ | 28928/371472 [2:20:08<27:42:29, 3.43it/s] 8%|▊ | 28929/371472 [2:20:08<27:41:42, 3.44it/s] 8%|▊ | 28930/371472 [2:20:08<27:25:54, 3.47it/s] 8%|▊ | 28931/371472 [2:20:09<27:44:51, 3.43it/s] 8%|▊ | 28932/371472 [2:20:09<27:23:32, 3.47it/s] 8%|▊ | 28933/371472 [2:20:09<27:55:31, 3.41it/s] 8%|▊ | 28934/371472 [2:20:10<33:04:30, 2.88it/s] 8%|▊ | 28935/371472 [2:20:10<32:43:44, 2.91it/s] 8%|▊ | 28936/371472 [2:20:10<31:28:39, 3.02it/s] 8%|▊ | 28937/371472 [2:20:10<29:30:31, 3.22it/s] 8%|▊ | 28938/371472 [2:20:11<30:21:38, 3.13it/s] 8%|▊ | 28939/371472 [2:20:11<30:14:29, 3.15it/s] 8%|▊ | 28940/371472 [2:20:11<29:29:56, 3.23it/s] {'loss': 4.3019, 'learning_rate': 9.30331401236829e-07, 'epoch': 1.25} + 8%|▊ | 28940/371472 [2:20:11<29:29:56, 3.23it/s] 8%|▊ | 28941/371472 [2:20:12<29:36:42, 3.21it/s] 8%|▊ | 28942/371472 [2:20:12<28:40:17, 3.32it/s] 8%|▊ | 28943/371472 [2:20:12<29:33:44, 3.22it/s] 8%|▊ | 28944/371472 [2:20:13<31:47:28, 2.99it/s] 8%|▊ | 28945/371472 [2:20:13<30:27:16, 3.12it/s] 8%|▊ | 28946/371472 [2:20:13<30:51:04, 3.08it/s] 8%|▊ | 28947/371472 [2:20:14<30:13:41, 3.15it/s] 8%|▊ | 28948/371472 [2:20:14<28:58:41, 3.28it/s] 8%|▊ | 28949/371472 [2:20:14<27:46:14, 3.43it/s] 8%|▊ | 28950/371472 [2:20:14<27:07:56, 3.51it/s] 8%|▊ | 28951/371472 [2:20:15<27:58:49, 3.40it/s] 8%|▊ | 28952/371472 [2:20:15<27:03:17, 3.52it/s] 8%|▊ | 28953/371472 [2:20:15<26:07:53, 3.64it/s] 8%|▊ | 28954/371472 [2:20:16<27:34:32, 3.45it/s] 8%|▊ | 28955/371472 [2:20:16<26:33:19, 3.58it/s] 8%|▊ | 28956/371472 [2:20:16<26:09:33, 3.64it/s] 8%|▊ | 28957/371472 [2:20:16<26:23:40, 3.60it/s] 8%|▊ | 28958/371472 [2:20:17<27:35:07, 3.45it/s] 8%|▊ | 28959/371472 [2:20:17<27:49:37, 3.42it/s] 8%|▊ | 28960/371472 [2:20:17<27:14:29, 3.49it/s] {'loss': 4.5484, 'learning_rate': 9.302829192613501e-07, 'epoch': 1.25} + 8%|▊ | 28960/371472 [2:20:17<27:14:29, 3.49it/s] 8%|▊ | 28961/371472 [2:20:18<27:02:43, 3.52it/s] 8%|▊ | 28962/371472 [2:20:18<27:08:32, 3.51it/s] 8%|▊ | 28963/371472 [2:20:18<26:57:00, 3.53it/s] 8%|▊ | 28964/371472 [2:20:18<26:59:30, 3.52it/s] 8%|▊ | 28965/371472 [2:20:19<26:58:20, 3.53it/s] 8%|▊ | 28966/371472 [2:20:19<26:31:51, 3.59it/s] 8%|▊ | 28967/371472 [2:20:19<27:08:45, 3.50it/s] 8%|▊ | 28968/371472 [2:20:20<26:54:47, 3.54it/s] 8%|▊ | 28969/371472 [2:20:20<26:39:09, 3.57it/s] 8%|▊ | 28970/371472 [2:20:20<27:32:45, 3.45it/s] 8%|▊ | 28971/371472 [2:20:20<27:27:16, 3.47it/s] 8%|▊ | 28972/371472 [2:20:21<29:01:32, 3.28it/s] 8%|▊ | 28973/371472 [2:20:21<29:58:52, 3.17it/s] 8%|▊ | 28974/371472 [2:20:21<28:58:18, 3.28it/s] 8%|▊ | 28975/371472 [2:20:22<28:24:36, 3.35it/s] 8%|▊ | 28976/371472 [2:20:22<29:41:53, 3.20it/s] 8%|▊ | 28977/371472 [2:20:22<29:54:09, 3.18it/s] 8%|▊ | 28978/371472 [2:20:23<28:58:50, 3.28it/s] 8%|▊ | 28979/371472 [2:20:23<28:13:25, 3.37it/s] 8%|▊ | 28980/371472 [2:20:23<28:54:07, 3.29it/s] {'loss': 4.2874, 'learning_rate': 9.302344372858713e-07, 'epoch': 1.25} + 8%|▊ | 28980/371472 [2:20:23<28:54:07, 3.29it/s] 8%|▊ | 28981/371472 [2:20:23<27:44:54, 3.43it/s] 8%|▊ | 28982/371472 [2:20:24<26:49:52, 3.55it/s] 8%|▊ | 28983/371472 [2:20:24<27:02:46, 3.52it/s] 8%|▊ | 28984/371472 [2:20:24<26:55:15, 3.53it/s] 8%|▊ | 28985/371472 [2:20:25<26:29:56, 3.59it/s] 8%|▊ | 28986/371472 [2:20:25<25:45:15, 3.69it/s] 8%|▊ | 28987/371472 [2:20:25<27:20:40, 3.48it/s] 8%|▊ | 28988/371472 [2:20:25<26:50:51, 3.54it/s] 8%|▊ | 28989/371472 [2:20:26<27:25:49, 3.47it/s] 8%|▊ | 28990/371472 [2:20:26<27:18:53, 3.48it/s] 8%|▊ | 28991/371472 [2:20:26<26:28:22, 3.59it/s] 8%|▊ | 28992/371472 [2:20:27<26:21:37, 3.61it/s] 8%|▊ | 28993/371472 [2:20:27<27:32:54, 3.45it/s] 8%|▊ | 28994/371472 [2:20:27<28:03:22, 3.39it/s] 8%|▊ | 28995/371472 [2:20:27<26:46:23, 3.55it/s] 8%|▊ | 28996/371472 [2:20:28<28:09:47, 3.38it/s] 8%|▊ | 28997/371472 [2:20:28<27:17:48, 3.49it/s] 8%|▊ | 28998/371472 [2:20:28<26:33:29, 3.58it/s] 8%|▊ | 28999/371472 [2:20:29<25:44:32, 3.70it/s] 8%|▊ | 29000/371472 [2:20:29<26:27:45, 3.59it/s] {'loss': 4.6251, 'learning_rate': 9.301859553103923e-07, 'epoch': 1.25} + 8%|▊ | 29000/371472 [2:20:29<26:27:45, 3.59it/s] 8%|▊ | 29001/371472 [2:20:29<26:47:54, 3.55it/s] 8%|▊ | 29002/371472 [2:20:29<27:34:51, 3.45it/s] 8%|▊ | 29003/371472 [2:20:30<28:49:34, 3.30it/s] 8%|▊ | 29004/371472 [2:20:30<28:25:50, 3.35it/s] 8%|▊ | 29005/371472 [2:20:30<30:41:56, 3.10it/s] 8%|▊ | 29006/371472 [2:20:31<30:33:35, 3.11it/s] 8%|▊ | 29007/371472 [2:20:31<28:49:47, 3.30it/s] 8%|▊ | 29008/371472 [2:20:31<28:34:35, 3.33it/s] 8%|▊ | 29009/371472 [2:20:32<27:39:39, 3.44it/s] 8%|▊ | 29010/371472 [2:20:32<27:58:08, 3.40it/s] 8%|▊ | 29011/371472 [2:20:32<26:25:39, 3.60it/s] 8%|▊ | 29012/371472 [2:20:32<25:59:38, 3.66it/s] 8%|▊ | 29013/371472 [2:20:33<26:05:31, 3.65it/s] 8%|▊ | 29014/371472 [2:20:33<27:24:13, 3.47it/s] 8%|▊ | 29015/371472 [2:20:33<27:56:36, 3.40it/s] 8%|▊ | 29016/371472 [2:20:34<26:32:53, 3.58it/s] 8%|▊ | 29017/371472 [2:20:34<25:42:13, 3.70it/s] 8%|▊ | 29018/371472 [2:20:34<25:35:48, 3.72it/s] 8%|▊ | 29019/371472 [2:20:34<26:25:21, 3.60it/s] 8%|▊ | 29020/371472 [2:20:35<26:27:16, 3.60it/s] {'loss': 4.4941, 'learning_rate': 9.301374733349134e-07, 'epoch': 1.25} + 8%|▊ | 29020/371472 [2:20:35<26:27:16, 3.60it/s] 8%|▊ | 29021/371472 [2:20:35<28:01:50, 3.39it/s] 8%|▊ | 29022/371472 [2:20:35<27:22:09, 3.48it/s] 8%|▊ | 29023/371472 [2:20:36<28:12:14, 3.37it/s] 8%|▊ | 29024/371472 [2:20:36<28:10:36, 3.38it/s] 8%|▊ | 29025/371472 [2:20:36<28:19:04, 3.36it/s] 8%|▊ | 29026/371472 [2:20:36<29:28:18, 3.23it/s] 8%|▊ | 29027/371472 [2:20:37<31:37:30, 3.01it/s] 8%|▊ | 29028/371472 [2:20:37<31:05:47, 3.06it/s] 8%|▊ | 29029/371472 [2:20:37<30:32:19, 3.11it/s] 8%|▊ | 29030/371472 [2:20:38<30:02:04, 3.17it/s] 8%|▊ | 29031/371472 [2:20:38<30:01:54, 3.17it/s] 8%|▊ | 29032/371472 [2:20:38<29:30:18, 3.22it/s] 8%|▊ | 29033/371472 [2:20:39<28:45:20, 3.31it/s] 8%|▊ | 29034/371472 [2:20:39<28:02:37, 3.39it/s] 8%|▊ | 29035/371472 [2:20:39<27:32:21, 3.45it/s] 8%|▊ | 29036/371472 [2:20:40<29:41:15, 3.20it/s] 8%|▊ | 29037/371472 [2:20:40<28:24:45, 3.35it/s] 8%|▊ | 29038/371472 [2:20:40<27:46:31, 3.42it/s] 8%|▊ | 29039/371472 [2:20:40<27:32:07, 3.45it/s] 8%|▊ | 29040/371472 [2:20:41<27:51:23, 3.41it/s] {'loss': 4.2387, 'learning_rate': 9.300889913594345e-07, 'epoch': 1.25} + 8%|▊ | 29040/371472 [2:20:41<27:51:23, 3.41it/s] 8%|▊ | 29041/371472 [2:20:41<26:58:28, 3.53it/s] 8%|▊ | 29042/371472 [2:20:41<26:21:07, 3.61it/s] 8%|▊ | 29043/371472 [2:20:42<25:13:36, 3.77it/s] 8%|▊ | 29044/371472 [2:20:42<24:59:15, 3.81it/s] 8%|▊ | 29045/371472 [2:20:42<26:49:12, 3.55it/s] 8%|▊ | 29046/371472 [2:20:42<27:14:58, 3.49it/s] 8%|▊ | 29047/371472 [2:20:43<27:15:19, 3.49it/s] 8%|▊ | 29048/371472 [2:20:43<26:09:41, 3.64it/s] 8%|▊ | 29049/371472 [2:20:43<26:07:56, 3.64it/s] 8%|▊ | 29050/371472 [2:20:44<27:38:32, 3.44it/s] 8%|▊ | 29051/371472 [2:20:44<27:38:47, 3.44it/s] 8%|▊ | 29052/371472 [2:20:44<28:27:16, 3.34it/s] 8%|▊ | 29053/371472 [2:20:44<27:27:15, 3.46it/s] 8%|▊ | 29054/371472 [2:20:45<29:08:26, 3.26it/s] 8%|▊ | 29055/371472 [2:20:45<28:38:09, 3.32it/s] 8%|▊ | 29056/371472 [2:20:45<28:17:53, 3.36it/s] 8%|▊ | 29057/371472 [2:20:46<26:48:33, 3.55it/s] 8%|▊ | 29058/371472 [2:20:46<26:03:42, 3.65it/s] 8%|▊ | 29059/371472 [2:20:46<28:59:38, 3.28it/s] 8%|▊ | 29060/371472 [2:20:47<31:00:15, 3.07it/s] {'loss': 4.4437, 'learning_rate': 9.300405093839557e-07, 'epoch': 1.25} + 8%|▊ | 29060/371472 [2:20:47<31:00:15, 3.07it/s] 8%|▊ | 29061/371472 [2:20:47<29:24:50, 3.23it/s] 8%|▊ | 29062/371472 [2:20:47<29:13:11, 3.26it/s] 8%|▊ | 29063/371472 [2:20:47<27:53:24, 3.41it/s] 8%|▊ | 29064/371472 [2:20:48<27:12:34, 3.50it/s] 8%|▊ | 29065/371472 [2:20:48<29:20:51, 3.24it/s] 8%|▊ | 29066/371472 [2:20:48<30:35:16, 3.11it/s] 8%|▊ | 29067/371472 [2:20:49<29:31:51, 3.22it/s] 8%|▊ | 29068/371472 [2:20:49<29:26:13, 3.23it/s] 8%|▊ | 29069/371472 [2:20:49<28:27:09, 3.34it/s] 8%|▊ | 29070/371472 [2:20:50<27:34:26, 3.45it/s] 8%|▊ | 29071/371472 [2:20:50<26:55:46, 3.53it/s] 8%|▊ | 29072/371472 [2:20:50<27:22:34, 3.47it/s] 8%|▊ | 29073/371472 [2:20:50<27:02:29, 3.52it/s] 8%|▊ | 29074/371472 [2:20:51<26:27:09, 3.60it/s] 8%|▊ | 29075/371472 [2:20:51<25:50:46, 3.68it/s] 8%|▊ | 29076/371472 [2:20:51<25:38:10, 3.71it/s] 8%|▊ | 29077/371472 [2:20:51<27:21:45, 3.48it/s] 8%|▊ | 29078/371472 [2:20:52<26:46:49, 3.55it/s] 8%|▊ | 29079/371472 [2:20:52<27:43:12, 3.43it/s] 8%|▊ | 29080/371472 [2:20:52<27:44:47, 3.43it/s] {'loss': 4.5546, 'learning_rate': 9.299920274084767e-07, 'epoch': 1.25} + 8%|▊ | 29080/371472 [2:20:52<27:44:47, 3.43it/s] 8%|▊ | 29081/371472 [2:20:53<28:40:58, 3.32it/s] 8%|▊ | 29082/371472 [2:20:53<27:38:48, 3.44it/s] 8%|▊ | 29083/371472 [2:20:53<27:22:28, 3.47it/s] 8%|▊ | 29084/371472 [2:20:53<26:25:30, 3.60it/s] 8%|▊ | 29085/371472 [2:20:54<25:34:48, 3.72it/s] 8%|▊ | 29086/371472 [2:20:54<26:20:19, 3.61it/s] 8%|▊ | 29087/371472 [2:20:54<27:44:16, 3.43it/s] 8%|▊ | 29088/371472 [2:20:55<27:06:16, 3.51it/s] 8%|▊ | 29089/371472 [2:20:55<26:28:46, 3.59it/s] 8%|▊ | 29090/371472 [2:20:55<25:35:15, 3.72it/s] 8%|▊ | 29091/371472 [2:20:55<26:50:30, 3.54it/s] 8%|▊ | 29092/371472 [2:20:56<26:18:07, 3.62it/s] 8%|▊ | 29093/371472 [2:20:56<26:57:25, 3.53it/s] 8%|▊ | 29094/371472 [2:20:56<27:14:37, 3.49it/s] 8%|▊ | 29095/371472 [2:20:57<27:19:01, 3.48it/s] 8%|▊ | 29096/371472 [2:20:57<26:28:05, 3.59it/s] 8%|▊ | 29097/371472 [2:20:57<26:14:15, 3.62it/s] 8%|▊ | 29098/371472 [2:20:57<27:27:48, 3.46it/s] 8%|▊ | 29099/371472 [2:20:58<28:22:08, 3.35it/s] 8%|▊ | 29100/371472 [2:20:58<28:48:56, 3.30it/s] {'loss': 4.5387, 'learning_rate': 9.299435454329979e-07, 'epoch': 1.25} + 8%|▊ | 29100/371472 [2:20:58<28:48:56, 3.30it/s] 8%|▊ | 29101/371472 [2:20:58<27:51:07, 3.41it/s] 8%|▊ | 29102/371472 [2:20:59<28:11:51, 3.37it/s] 8%|▊ | 29103/371472 [2:20:59<31:17:08, 3.04it/s] 8%|▊ | 29104/371472 [2:20:59<29:25:10, 3.23it/s] 8%|▊ | 29105/371472 [2:21:00<30:13:31, 3.15it/s] 8%|▊ | 29106/371472 [2:21:00<29:09:48, 3.26it/s] 8%|▊ | 29107/371472 [2:21:00<28:04:35, 3.39it/s] 8%|▊ | 29108/371472 [2:21:00<26:36:19, 3.57it/s] 8%|▊ | 29109/371472 [2:21:01<25:50:31, 3.68it/s] 8%|▊ | 29110/371472 [2:21:01<26:35:31, 3.58it/s] 8%|▊ | 29111/371472 [2:21:01<26:33:55, 3.58it/s] 8%|▊ | 29112/371472 [2:21:02<25:49:52, 3.68it/s] 8%|▊ | 29113/371472 [2:21:02<26:55:07, 3.53it/s] 8%|▊ | 29114/371472 [2:21:02<26:56:38, 3.53it/s] 8%|▊ | 29115/371472 [2:21:02<27:09:23, 3.50it/s] 8%|▊ | 29116/371472 [2:21:03<26:17:58, 3.62it/s] 8%|▊ | 29117/371472 [2:21:03<26:05:27, 3.64it/s] 8%|▊ | 29118/371472 [2:21:03<26:29:26, 3.59it/s] 8%|▊ | 29119/371472 [2:21:04<28:07:27, 3.38it/s] 8%|▊ | 29120/371472 [2:21:04<31:31:55, 3.02it/s] {'loss': 4.2704, 'learning_rate': 9.29895063457519e-07, 'epoch': 1.25} + 8%|▊ | 29120/371472 [2:21:04<31:31:55, 3.02it/s] 8%|▊ | 29121/371472 [2:21:04<30:37:44, 3.10it/s] 8%|▊ | 29122/371472 [2:21:05<29:43:35, 3.20it/s] 8%|▊ | 29123/371472 [2:21:05<29:42:28, 3.20it/s] 8%|▊ | 29124/371472 [2:21:05<29:03:39, 3.27it/s] 8%|▊ | 29125/371472 [2:21:06<29:58:24, 3.17it/s] 8%|▊ | 29126/371472 [2:21:06<30:39:34, 3.10it/s] 8%|▊ | 29127/371472 [2:21:06<29:53:28, 3.18it/s] 8%|▊ | 29128/371472 [2:21:06<28:43:30, 3.31it/s] 8%|▊ | 29129/371472 [2:21:07<28:02:42, 3.39it/s] 8%|▊ | 29130/371472 [2:21:07<28:56:45, 3.29it/s] 8%|▊ | 29131/371472 [2:21:07<28:09:56, 3.38it/s] 8%|▊ | 29132/371472 [2:21:08<27:11:26, 3.50it/s] 8%|▊ | 29133/371472 [2:21:08<28:43:34, 3.31it/s] 8%|▊ | 29134/371472 [2:21:08<27:28:06, 3.46it/s] 8%|▊ | 29135/371472 [2:21:08<27:13:36, 3.49it/s] 8%|▊ | 29136/371472 [2:21:09<26:54:12, 3.53it/s] 8%|▊ | 29137/371472 [2:21:09<27:42:39, 3.43it/s] 8%|▊ | 29138/371472 [2:21:09<27:10:21, 3.50it/s] 8%|▊ | 29139/371472 [2:21:10<26:45:12, 3.55it/s] 8%|▊ | 29140/371472 [2:21:10<26:04:08, 3.65it/s] {'loss': 4.4503, 'learning_rate': 9.298465814820402e-07, 'epoch': 1.26} + 8%|▊ | 29140/371472 [2:21:10<26:04:08, 3.65it/s] 8%|▊ | 29141/371472 [2:21:10<25:20:09, 3.75it/s] 8%|▊ | 29142/371472 [2:21:10<25:40:56, 3.70it/s] 8%|▊ | 29143/371472 [2:21:11<26:07:20, 3.64it/s] 8%|▊ | 29144/371472 [2:21:11<25:30:53, 3.73it/s] 8%|▊ | 29145/371472 [2:21:11<25:44:50, 3.69it/s] 8%|▊ | 29146/371472 [2:21:11<26:41:24, 3.56it/s] 8%|▊ | 29147/371472 [2:21:12<26:22:01, 3.61it/s] 8%|▊ | 29148/371472 [2:21:12<25:54:05, 3.67it/s] 8%|▊ | 29149/371472 [2:21:12<25:29:41, 3.73it/s] 8%|▊ | 29150/371472 [2:21:13<27:07:40, 3.51it/s] 8%|▊ | 29151/371472 [2:21:13<28:09:06, 3.38it/s] 8%|▊ | 29152/371472 [2:21:13<29:37:53, 3.21it/s] 8%|▊ | 29153/371472 [2:21:13<27:47:01, 3.42it/s] 8%|▊ | 29154/371472 [2:21:14<27:40:11, 3.44it/s] 8%|▊ | 29155/371472 [2:21:14<26:47:21, 3.55it/s] 8%|▊ | 29156/371472 [2:21:14<27:55:12, 3.41it/s] 8%|▊ | 29157/371472 [2:21:15<27:13:33, 3.49it/s] 8%|▊ | 29158/371472 [2:21:15<27:13:41, 3.49it/s] 8%|▊ | 29159/371472 [2:21:15<29:35:31, 3.21it/s] 8%|▊ | 29160/371472 [2:21:16<28:03:55, 3.39it/s] {'loss': 4.4924, 'learning_rate': 9.297980995065611e-07, 'epoch': 1.26} + 8%|▊ | 29160/371472 [2:21:16<28:03:55, 3.39it/s] 8%|▊ | 29161/371472 [2:21:16<27:09:40, 3.50it/s] 8%|▊ | 29162/371472 [2:21:16<26:26:29, 3.60it/s] 8%|▊ | 29163/371472 [2:21:16<25:44:13, 3.69it/s] 8%|▊ | 29164/371472 [2:21:17<27:31:07, 3.46it/s] 8%|▊ | 29165/371472 [2:21:17<27:27:55, 3.46it/s] 8%|▊ | 29166/371472 [2:21:17<26:58:20, 3.53it/s] 8%|▊ | 29167/371472 [2:21:17<26:44:29, 3.56it/s] 8%|▊ | 29168/371472 [2:21:18<26:34:52, 3.58it/s] 8%|▊ | 29169/371472 [2:21:18<25:53:41, 3.67it/s] 8%|▊ | 29170/371472 [2:21:18<25:04:17, 3.79it/s] 8%|▊ | 29171/371472 [2:21:19<25:16:06, 3.76it/s] 8%|▊ | 29172/371472 [2:21:19<27:34:25, 3.45it/s] 8%|▊ | 29173/371472 [2:21:19<27:16:21, 3.49it/s] 8%|▊ | 29174/371472 [2:21:19<26:42:59, 3.56it/s] 8%|▊ | 29175/371472 [2:21:20<26:44:53, 3.55it/s] 8%|▊ | 29176/371472 [2:21:20<26:21:19, 3.61it/s] 8%|▊ | 29177/371472 [2:21:20<25:35:08, 3.72it/s] 8%|▊ | 29178/371472 [2:21:21<26:15:34, 3.62it/s] 8%|▊ | 29179/371472 [2:21:21<25:15:07, 3.77it/s] 8%|▊ | 29180/371472 [2:21:21<25:20:31, 3.75it/s] {'loss': 4.8439, 'learning_rate': 9.297496175310823e-07, 'epoch': 1.26} + 8%|▊ | 29180/371472 [2:21:21<25:20:31, 3.75it/s] 8%|▊ | 29181/371472 [2:21:21<25:39:51, 3.70it/s] 8%|▊ | 29182/371472 [2:21:22<26:18:25, 3.61it/s] 8%|▊ | 29183/371472 [2:21:22<27:43:00, 3.43it/s] 8%|▊ | 29184/371472 [2:21:22<26:46:45, 3.55it/s] 8%|▊ | 29185/371472 [2:21:23<27:39:34, 3.44it/s] 8%|▊ | 29186/371472 [2:21:23<26:42:43, 3.56it/s] 8%|▊ | 29187/371472 [2:21:23<26:31:37, 3.58it/s] 8%|▊ | 29188/371472 [2:21:23<27:32:16, 3.45it/s] 8%|▊ | 29189/371472 [2:21:24<27:37:40, 3.44it/s] 8%|▊ | 29190/371472 [2:21:24<29:10:10, 3.26it/s] 8%|▊ | 29191/371472 [2:21:24<28:39:45, 3.32it/s] 8%|▊ | 29192/371472 [2:21:25<29:28:57, 3.22it/s] 8%|▊ | 29193/371472 [2:21:25<30:25:09, 3.13it/s] 8%|▊ | 29194/371472 [2:21:25<28:48:39, 3.30it/s] 8%|▊ | 29195/371472 [2:21:26<31:40:02, 3.00it/s] 8%|▊ | 29196/371472 [2:21:26<29:40:57, 3.20it/s] 8%|▊ | 29197/371472 [2:21:26<28:34:16, 3.33it/s] 8%|▊ | 29198/371472 [2:21:27<30:49:35, 3.08it/s] 8%|▊ | 29199/371472 [2:21:27<34:34:50, 2.75it/s] 8%|▊ | 29200/371472 [2:21:27<31:29:08, 3.02it/s] {'loss': 4.4959, 'learning_rate': 9.297011355556034e-07, 'epoch': 1.26} + 8%|▊ | 29200/371472 [2:21:27<31:29:08, 3.02it/s] 8%|▊ | 29201/371472 [2:21:28<30:26:46, 3.12it/s] 8%|▊ | 29202/371472 [2:21:28<31:44:42, 2.99it/s] 8%|▊ | 29203/371472 [2:21:28<30:35:58, 3.11it/s] 8%|▊ | 29204/371472 [2:21:28<28:53:37, 3.29it/s] 8%|▊ | 29205/371472 [2:21:29<27:17:40, 3.48it/s] 8%|▊ | 29206/371472 [2:21:29<26:04:38, 3.65it/s] 8%|▊ | 29207/371472 [2:21:29<26:49:56, 3.54it/s] 8%|▊ | 29208/371472 [2:21:30<27:07:24, 3.51it/s] 8%|▊ | 29209/371472 [2:21:30<29:41:27, 3.20it/s] 8%|▊ | 29210/371472 [2:21:30<28:21:18, 3.35it/s] 8%|▊ | 29211/371472 [2:21:30<27:55:12, 3.41it/s] 8%|▊ | 29212/371472 [2:21:31<26:50:14, 3.54it/s] 8%|▊ | 29213/371472 [2:21:31<26:15:16, 3.62it/s] 8%|▊ | 29214/371472 [2:21:31<27:54:27, 3.41it/s] 8%|▊ | 29215/371472 [2:21:32<31:23:44, 3.03it/s] 8%|▊ | 29216/371472 [2:21:32<31:00:34, 3.07it/s] 8%|▊ | 29217/371472 [2:21:32<29:02:09, 3.27it/s] 8%|▊ | 29218/371472 [2:21:33<27:27:00, 3.46it/s] 8%|▊ | 29219/371472 [2:21:33<26:28:22, 3.59it/s] 8%|▊ | 29220/371472 [2:21:33<26:13:12, 3.63it/s] {'loss': 4.6758, 'learning_rate': 9.296526535801246e-07, 'epoch': 1.26} + 8%|▊ | 29220/371472 [2:21:33<26:13:12, 3.63it/s] 8%|▊ | 29221/371472 [2:21:33<26:36:54, 3.57it/s] 8%|▊ | 29222/371472 [2:21:34<27:47:40, 3.42it/s] 8%|▊ | 29223/371472 [2:21:34<27:30:36, 3.46it/s] 8%|▊ | 29224/371472 [2:21:34<27:18:31, 3.48it/s] 8%|▊ | 29225/371472 [2:21:35<27:09:08, 3.50it/s] 8%|▊ | 29226/371472 [2:21:35<28:18:54, 3.36it/s] 8%|▊ | 29227/371472 [2:21:35<27:04:34, 3.51it/s] 8%|▊ | 29228/371472 [2:21:35<27:36:02, 3.44it/s] 8%|▊ | 29229/371472 [2:21:36<26:42:10, 3.56it/s] 8%|▊ | 29230/371472 [2:21:36<26:22:13, 3.61it/s] 8%|▊ | 29231/371472 [2:21:36<26:50:12, 3.54it/s] 8%|▊ | 29232/371472 [2:21:37<26:42:37, 3.56it/s] 8%|▊ | 29233/371472 [2:21:37<27:37:33, 3.44it/s] 8%|▊ | 29234/371472 [2:21:37<28:38:20, 3.32it/s] 8%|▊ | 29235/371472 [2:21:37<27:59:10, 3.40it/s] 8%|▊ | 29236/371472 [2:21:38<26:56:20, 3.53it/s] 8%|▊ | 29237/371472 [2:21:38<26:10:36, 3.63it/s] 8%|▊ | 29238/371472 [2:21:38<27:35:59, 3.44it/s] 8%|▊ | 29239/371472 [2:21:39<26:40:30, 3.56it/s] 8%|▊ | 29240/371472 [2:21:39<25:50:19, 3.68it/s] {'loss': 4.8718, 'learning_rate': 9.296041716046456e-07, 'epoch': 1.26} + 8%|▊ | 29240/371472 [2:21:39<25:50:19, 3.68it/s] 8%|▊ | 29241/371472 [2:21:39<29:07:27, 3.26it/s] 8%|▊ | 29242/371472 [2:21:39<28:22:24, 3.35it/s] 8%|▊ | 29243/371472 [2:21:40<28:32:53, 3.33it/s] 8%|▊ | 29244/371472 [2:21:40<27:39:52, 3.44it/s] 8%|▊ | 29245/371472 [2:21:40<26:43:43, 3.56it/s] 8%|▊ | 29246/371472 [2:21:41<26:37:00, 3.57it/s] 8%|▊ | 29247/371472 [2:21:41<26:29:53, 3.59it/s] 8%|▊ | 29248/371472 [2:21:41<25:49:10, 3.68it/s] 8%|▊ | 29249/371472 [2:21:41<25:56:05, 3.67it/s] 8%|▊ | 29250/371472 [2:21:42<25:50:32, 3.68it/s] 8%|▊ | 29251/371472 [2:21:42<26:09:25, 3.63it/s] 8%|▊ | 29252/371472 [2:21:42<27:16:22, 3.49it/s] 8%|▊ | 29253/371472 [2:21:43<27:04:31, 3.51it/s] 8%|▊ | 29254/371472 [2:21:43<27:10:12, 3.50it/s] 8%|▊ | 29255/371472 [2:21:43<26:46:11, 3.55it/s] 8%|▊ | 29256/371472 [2:21:43<27:11:34, 3.50it/s] 8%|▊ | 29257/371472 [2:21:44<26:30:45, 3.59it/s] 8%|▊ | 29258/371472 [2:21:44<27:04:23, 3.51it/s] 8%|▊ | 29259/371472 [2:21:44<28:41:09, 3.31it/s] 8%|▊ | 29260/371472 [2:21:45<28:20:21, 3.35it/s] {'loss': 6.2251, 'learning_rate': 9.295556896291667e-07, 'epoch': 1.26} + 8%|▊ | 29260/371472 [2:21:45<28:20:21, 3.35it/s] 8%|▊ | 29261/371472 [2:21:45<27:26:12, 3.46it/s] 8%|▊ | 29262/371472 [2:21:45<26:44:54, 3.55it/s] 8%|▊ | 29263/371472 [2:21:45<25:57:29, 3.66it/s] 8%|▊ | 29264/371472 [2:21:46<26:37:33, 3.57it/s] 8%|▊ | 29265/371472 [2:21:46<26:56:00, 3.53it/s] 8%|▊ | 29266/371472 [2:21:46<28:52:21, 3.29it/s] 8%|▊ | 29267/371472 [2:21:47<27:40:36, 3.43it/s] 8%|▊ | 29268/371472 [2:21:47<27:01:50, 3.52it/s] 8%|▊ | 29269/371472 [2:21:47<27:24:26, 3.47it/s] 8%|▊ | 29270/371472 [2:21:47<27:41:33, 3.43it/s] 8%|▊ | 29271/371472 [2:21:48<27:20:31, 3.48it/s] 8%|▊ | 29272/371472 [2:21:48<26:31:21, 3.58it/s] 8%|▊ | 29273/371472 [2:21:48<29:17:33, 3.25it/s] 8%|▊ | 29274/371472 [2:21:49<29:25:52, 3.23it/s] 8%|▊ | 29275/371472 [2:21:49<28:52:50, 3.29it/s] 8%|▊ | 29276/371472 [2:21:49<27:44:46, 3.43it/s] 8%|▊ | 29277/371472 [2:21:49<27:14:34, 3.49it/s] 8%|▊ | 29278/371472 [2:21:50<28:31:42, 3.33it/s] 8%|▊ | 29279/371472 [2:21:50<27:13:32, 3.49it/s] 8%|▊ | 29280/371472 [2:21:50<26:47:07, 3.55it/s] {'loss': 7.7667, 'learning_rate': 9.295072076536878e-07, 'epoch': 1.26} + 8%|▊ | 29280/371472 [2:21:50<26:47:07, 3.55it/s] 8%|▊ | 29281/371472 [2:21:51<26:28:03, 3.59it/s] 8%|▊ | 29282/371472 [2:21:51<26:21:41, 3.61it/s] 8%|▊ | 29283/371472 [2:21:51<26:31:43, 3.58it/s] 8%|▊ | 29284/371472 [2:21:51<25:33:15, 3.72it/s] 8%|▊ | 29285/371472 [2:21:52<26:57:05, 3.53it/s] 8%|▊ | 29286/371472 [2:21:52<27:01:30, 3.52it/s] 8%|▊ | 29287/371472 [2:21:52<27:32:06, 3.45it/s] 8%|▊ | 29288/371472 [2:21:53<28:03:04, 3.39it/s] 8%|▊ | 29289/371472 [2:21:53<28:02:40, 3.39it/s] 8%|▊ | 29290/371472 [2:21:53<27:49:48, 3.42it/s] 8%|▊ | 29291/371472 [2:21:54<28:30:17, 3.33it/s] 8%|▊ | 29292/371472 [2:21:54<28:48:49, 3.30it/s] 8%|▊ | 29293/371472 [2:21:54<29:10:12, 3.26it/s] 8%|▊ | 29294/371472 [2:21:54<29:47:10, 3.19it/s] 8%|▊ | 29295/371472 [2:21:55<29:33:25, 3.22it/s] 8%|▊ | 29296/371472 [2:21:55<28:11:27, 3.37it/s] 8%|▊ | 29297/371472 [2:21:55<27:21:39, 3.47it/s] 8%|▊ | 29298/371472 [2:21:56<26:49:26, 3.54it/s] 8%|▊ | 29299/371472 [2:21:56<26:02:02, 3.65it/s] 8%|▊ | 29300/371472 [2:21:56<26:03:17, 3.65it/s] {'loss': 7.3613, 'learning_rate': 9.29458725678209e-07, 'epoch': 1.26} + 8%|▊ | 29300/371472 [2:21:56<26:03:17, 3.65it/s] 8%|▊ | 29301/371472 [2:21:56<27:25:04, 3.47it/s] 8%|▊ | 29302/371472 [2:21:57<28:39:34, 3.32it/s] 8%|▊ | 29303/371472 [2:21:57<29:08:41, 3.26it/s] 8%|▊ | 29304/371472 [2:21:57<29:22:26, 3.24it/s] 8%|▊ | 29305/371472 [2:21:58<28:26:49, 3.34it/s] 8%|▊ | 29306/371472 [2:21:58<27:57:57, 3.40it/s] 8%|▊ | 29307/371472 [2:21:58<27:18:23, 3.48it/s] 8%|▊ | 29308/371472 [2:21:58<26:50:46, 3.54it/s] 8%|▊ | 29309/371472 [2:21:59<25:48:40, 3.68it/s] 8%|▊ | 29310/371472 [2:21:59<25:52:34, 3.67it/s] 8%|▊ | 29311/371472 [2:21:59<26:47:45, 3.55it/s] 8%|▊ | 29312/371472 [2:22:00<29:37:43, 3.21it/s] 8%|▊ | 29313/371472 [2:22:00<29:10:56, 3.26it/s] 8%|▊ | 29314/371472 [2:22:00<28:11:23, 3.37it/s] 8%|▊ | 29315/371472 [2:22:01<29:40:57, 3.20it/s] 8%|▊ | 29316/371472 [2:22:01<29:00:41, 3.28it/s] 8%|▊ | 29317/371472 [2:22:01<29:16:42, 3.25it/s] 8%|▊ | 29318/371472 [2:22:01<28:07:42, 3.38it/s] 8%|▊ | 29319/371472 [2:22:02<27:31:03, 3.45it/s] 8%|▊ | 29320/371472 [2:22:02<26:18:20, 3.61it/s] {'loss': 7.3938, 'learning_rate': 9.2941024370273e-07, 'epoch': 1.26} + 8%|▊ | 29320/371472 [2:22:02<26:18:20, 3.61it/s] 8%|▊ | 29321/371472 [2:22:02<26:28:23, 3.59it/s] 8%|▊ | 29322/371472 [2:22:03<26:06:32, 3.64it/s] 8%|▊ | 29323/371472 [2:22:03<25:47:53, 3.68it/s] 8%|▊ | 29324/371472 [2:22:03<25:27:58, 3.73it/s] 8%|▊ | 29325/371472 [2:22:03<26:07:56, 3.64it/s] 8%|▊ | 29326/371472 [2:22:04<26:01:55, 3.65it/s] 8%|▊ | 29327/371472 [2:22:04<26:01:20, 3.65it/s] 8%|▊ | 29328/371472 [2:22:04<26:20:47, 3.61it/s] 8%|▊ | 29329/371472 [2:22:05<27:01:10, 3.52it/s] 8%|▊ | 29330/371472 [2:22:05<26:12:39, 3.63it/s] 8%|▊ | 29331/371472 [2:22:05<27:43:46, 3.43it/s] 8%|▊ | 29332/371472 [2:22:05<27:14:47, 3.49it/s] 8%|▊ | 29333/371472 [2:22:06<26:47:46, 3.55it/s] 8%|▊ | 29334/371472 [2:22:06<26:40:11, 3.56it/s] 8%|▊ | 29335/371472 [2:22:06<27:41:27, 3.43it/s] 8%|▊ | 29336/371472 [2:22:06<26:51:34, 3.54it/s] 8%|▊ | 29337/371472 [2:22:07<26:56:01, 3.53it/s] 8%|▊ | 29338/371472 [2:22:07<27:01:13, 3.52it/s] 8%|▊ | 29339/371472 [2:22:07<26:23:07, 3.60it/s] 8%|▊ | 29340/371472 [2:22:08<26:52:25, 3.54it/s] {'loss': 7.4492, 'learning_rate': 9.293617617272511e-07, 'epoch': 1.26} + 8%|▊ | 29340/371472 [2:22:08<26:52:25, 3.54it/s] 8%|▊ | 29341/371472 [2:22:08<27:08:04, 3.50it/s] 8%|▊ | 29342/371472 [2:22:08<28:27:29, 3.34it/s] 8%|▊ | 29343/371472 [2:22:08<27:07:03, 3.50it/s] 8%|▊ | 29344/371472 [2:22:09<27:48:44, 3.42it/s] 8%|▊ | 29345/371472 [2:22:09<28:01:03, 3.39it/s] 8%|▊ | 29346/371472 [2:22:09<26:58:57, 3.52it/s] 8%|▊ | 29347/371472 [2:22:10<26:39:23, 3.57it/s] 8%|▊ | 29348/371472 [2:22:10<26:33:50, 3.58it/s] 8%|▊ | 29349/371472 [2:22:10<28:30:18, 3.33it/s] 8%|▊ | 29350/371472 [2:22:11<28:13:37, 3.37it/s] 8%|▊ | 29351/371472 [2:22:11<27:08:06, 3.50it/s] 8%|▊ | 29352/371472 [2:22:11<27:18:21, 3.48it/s] 8%|▊ | 29353/371472 [2:22:11<27:10:09, 3.50it/s] 8%|▊ | 29354/371472 [2:22:12<27:59:51, 3.39it/s] 8%|▊ | 29355/371472 [2:22:12<28:10:37, 3.37it/s] 8%|▊ | 29356/371472 [2:22:12<27:44:18, 3.43it/s] 8%|▊ | 29357/371472 [2:22:13<27:05:27, 3.51it/s] 8%|▊ | 29358/371472 [2:22:13<26:31:42, 3.58it/s] 8%|▊ | 29359/371472 [2:22:13<26:02:24, 3.65it/s] 8%|▊ | 29360/371472 [2:22:13<27:01:30, 3.52it/s] {'loss': 7.3229, 'learning_rate': 9.293132797517723e-07, 'epoch': 1.26} + 8%|▊ | 29360/371472 [2:22:13<27:01:30, 3.52it/s] 8%|▊ | 29361/371472 [2:22:14<27:52:54, 3.41it/s] 8%|▊ | 29362/371472 [2:22:14<26:51:31, 3.54it/s] 8%|▊ | 29363/371472 [2:22:14<26:21:33, 3.61it/s] 8%|▊ | 29364/371472 [2:22:15<27:55:59, 3.40it/s] 8%|▊ | 29365/371472 [2:22:15<29:35:48, 3.21it/s] 8%|▊ | 29366/371472 [2:22:15<28:16:21, 3.36it/s] 8%|▊ | 29367/371472 [2:22:15<28:12:26, 3.37it/s] 8%|▊ | 29368/371472 [2:22:16<26:54:51, 3.53it/s] 8%|▊ | 29369/371472 [2:22:16<28:06:34, 3.38it/s] 8%|▊ | 29370/371472 [2:22:16<29:21:07, 3.24it/s] 8%|▊ | 29371/371472 [2:22:17<27:54:48, 3.40it/s] 8%|▊ | 29372/371472 [2:22:17<26:55:37, 3.53it/s] 8%|▊ | 29373/371472 [2:22:17<27:58:55, 3.40it/s] 8%|▊ | 29374/371472 [2:22:18<28:10:50, 3.37it/s] 8%|▊ | 29375/371472 [2:22:18<27:53:13, 3.41it/s] 8%|▊ | 29376/371472 [2:22:18<27:15:06, 3.49it/s] 8%|▊ | 29377/371472 [2:22:18<29:31:29, 3.22it/s] 8%|▊ | 29378/371472 [2:22:19<28:55:52, 3.28it/s] 8%|▊ | 29379/371472 [2:22:19<28:44:27, 3.31it/s] 8%|▊ | 29380/371472 [2:22:19<27:43:26, 3.43it/s] {'loss': 7.4968, 'learning_rate': 9.292647977762933e-07, 'epoch': 1.27} + 8%|▊ | 29380/371472 [2:22:19<27:43:26, 3.43it/s] 8%|▊ | 29381/371472 [2:22:20<26:34:17, 3.58it/s] 8%|▊ | 29382/371472 [2:22:20<26:18:44, 3.61it/s] 8%|▊ | 29383/371472 [2:22:20<27:01:56, 3.52it/s] 8%|▊ | 29384/371472 [2:22:20<27:04:09, 3.51it/s] 8%|▊ | 29385/371472 [2:22:21<26:12:49, 3.62it/s] 8%|▊ | 29386/371472 [2:22:21<26:03:48, 3.65it/s] 8%|▊ | 29387/371472 [2:22:21<26:29:16, 3.59it/s] 8%|▊ | 29388/371472 [2:22:22<26:27:38, 3.59it/s] 8%|▊ | 29389/371472 [2:22:22<26:45:21, 3.55it/s] 8%|▊ | 29390/371472 [2:22:22<27:25:39, 3.46it/s] 8%|▊ | 29391/371472 [2:22:22<29:59:38, 3.17it/s] 8%|▊ | 29392/371472 [2:22:23<29:32:42, 3.22it/s] 8%|▊ | 29393/371472 [2:22:23<28:35:49, 3.32it/s] 8%|▊ | 29394/371472 [2:22:23<28:22:46, 3.35it/s] 8%|▊ | 29395/371472 [2:22:24<28:37:23, 3.32it/s] 8%|▊ | 29396/371472 [2:22:24<26:49:52, 3.54it/s] 8%|▊ | 29397/371472 [2:22:24<27:00:08, 3.52it/s] 8%|▊ | 29398/371472 [2:22:24<26:23:55, 3.60it/s] 8%|▊ | 29399/371472 [2:22:25<25:54:15, 3.67it/s] 8%|▊ | 29400/371472 [2:22:25<26:19:26, 3.61it/s] {'loss': 7.4763, 'learning_rate': 9.292163158008144e-07, 'epoch': 1.27} + 8%|▊ | 29400/371472 [2:22:25<26:19:26, 3.61it/s] 8%|▊ | 29401/371472 [2:22:25<27:02:49, 3.51it/s] 8%|▊ | 29402/371472 [2:22:26<26:34:40, 3.58it/s] 8%|▊ | 29403/371472 [2:22:26<26:39:19, 3.56it/s] 8%|▊ | 29404/371472 [2:22:26<26:39:36, 3.56it/s] 8%|▊ | 29405/371472 [2:22:26<27:50:25, 3.41it/s] 8%|▊ | 29406/371472 [2:22:27<27:19:33, 3.48it/s] 8%|▊ | 29407/371472 [2:22:27<28:08:00, 3.38it/s] 8%|▊ | 29408/371472 [2:22:27<26:56:56, 3.53it/s] 8%|▊ | 29409/371472 [2:22:28<26:42:52, 3.56it/s] 8%|▊ | 29410/371472 [2:22:28<26:58:10, 3.52it/s] 8%|▊ | 29411/371472 [2:22:28<26:09:04, 3.63it/s] 8%|▊ | 29412/371472 [2:22:28<27:39:54, 3.43it/s] 8%|▊ | 29413/371472 [2:22:29<27:41:45, 3.43it/s] 8%|▊ | 29414/371472 [2:22:29<27:21:10, 3.47it/s] 8%|▊ | 29415/371472 [2:22:29<27:34:02, 3.45it/s] 8%|▊ | 29416/371472 [2:22:30<26:45:10, 3.55it/s] 8%|▊ | 29417/371472 [2:22:30<26:34:17, 3.58it/s] 8%|▊ | 29418/371472 [2:22:30<27:17:38, 3.48it/s] 8%|▊ | 29419/371472 [2:22:30<27:14:16, 3.49it/s] 8%|▊ | 29420/371472 [2:22:31<29:38:32, 3.21it/s] {'loss': 7.0305, 'learning_rate': 9.291678338253355e-07, 'epoch': 1.27} + 8%|▊ | 29420/371472 [2:22:31<29:38:32, 3.21it/s] 8%|▊ | 29421/371472 [2:22:31<28:48:07, 3.30it/s] 8%|▊ | 29422/371472 [2:22:31<29:10:29, 3.26it/s] 8%|▊ | 29423/371472 [2:22:32<27:44:48, 3.42it/s] 8%|▊ | 29424/371472 [2:22:32<28:33:13, 3.33it/s] 8%|▊ | 29425/371472 [2:22:32<27:53:11, 3.41it/s] 8%|▊ | 29426/371472 [2:22:33<26:24:23, 3.60it/s] 8%|▊ | 29427/371472 [2:22:33<26:08:19, 3.63it/s] 8%|▊ | 29428/371472 [2:22:33<25:50:54, 3.68it/s] 8%|▊ | 29429/371472 [2:22:33<26:01:09, 3.65it/s] 8%|▊ | 29430/371472 [2:22:34<25:52:19, 3.67it/s] 8%|▊ | 29431/371472 [2:22:34<28:28:20, 3.34it/s] 8%|▊ | 29432/371472 [2:22:34<27:16:15, 3.48it/s] 8%|▊ | 29433/371472 [2:22:34<26:25:22, 3.60it/s] 8%|▊ | 29434/371472 [2:22:35<26:58:20, 3.52it/s] 8%|▊ | 29435/371472 [2:22:35<27:12:54, 3.49it/s] 8%|▊ | 29436/371472 [2:22:35<26:43:36, 3.55it/s] 8%|▊ | 29437/371472 [2:22:36<26:17:36, 3.61it/s] 8%|▊ | 29438/371472 [2:22:36<25:38:52, 3.70it/s] 8%|▊ | 29439/371472 [2:22:36<24:50:23, 3.82it/s] 8%|▊ | 29440/371472 [2:22:36<24:15:32, 3.92it/s] {'loss': 7.4039, 'learning_rate': 9.291193518498567e-07, 'epoch': 1.27} + 8%|▊ | 29440/371472 [2:22:36<24:15:32, 3.92it/s] 8%|▊ | 29441/371472 [2:22:37<25:30:45, 3.72it/s] 8%|▊ | 29442/371472 [2:22:37<26:05:30, 3.64it/s] 8%|▊ | 29443/371472 [2:22:37<27:42:34, 3.43it/s] 8%|▊ | 29444/371472 [2:22:38<27:08:01, 3.50it/s] 8%|▊ | 29445/371472 [2:22:38<28:25:33, 3.34it/s] 8%|▊ | 29446/371472 [2:22:38<27:24:44, 3.47it/s] 8%|▊ | 29447/371472 [2:22:39<30:49:42, 3.08it/s] 8%|▊ | 29448/371472 [2:22:39<29:42:00, 3.20it/s] 8%|▊ | 29449/371472 [2:22:39<32:45:59, 2.90it/s] 8%|▊ | 29450/371472 [2:22:40<31:33:24, 3.01it/s] 8%|▊ | 29451/371472 [2:22:40<30:11:29, 3.15it/s] 8%|▊ | 29452/371472 [2:22:40<28:58:00, 3.28it/s] 8%|▊ | 29453/371472 [2:22:40<28:16:38, 3.36it/s] 8%|▊ | 29454/371472 [2:22:41<27:05:54, 3.51it/s] 8%|▊ | 29455/371472 [2:22:41<28:35:29, 3.32it/s] 8%|▊ | 29456/371472 [2:22:41<28:57:04, 3.28it/s] 8%|▊ | 29457/371472 [2:22:42<27:45:57, 3.42it/s] 8%|▊ | 29458/371472 [2:22:42<27:59:45, 3.39it/s] 8%|▊ | 29459/371472 [2:22:42<27:11:51, 3.49it/s] 8%|▊ | 29460/371472 [2:22:42<27:01:30, 3.52it/s] {'loss': 7.437, 'learning_rate': 9.290708698743777e-07, 'epoch': 1.27} + 8%|▊ | 29460/371472 [2:22:42<27:01:30, 3.52it/s] 8%|▊ | 29461/371472 [2:22:43<27:00:10, 3.52it/s] 8%|▊ | 29462/371472 [2:22:43<26:44:15, 3.55it/s] 8%|▊ | 29463/371472 [2:22:43<26:44:46, 3.55it/s] 8%|▊ | 29464/371472 [2:22:44<26:40:07, 3.56it/s] 8%|▊ | 29465/371472 [2:22:44<26:14:22, 3.62it/s] 8%|▊ | 29466/371472 [2:22:44<25:42:18, 3.70it/s] 8%|▊ | 29467/371472 [2:22:44<25:43:30, 3.69it/s] 8%|▊ | 29468/371472 [2:22:45<29:02:00, 3.27it/s] 8%|▊ | 29469/371472 [2:22:45<28:01:27, 3.39it/s] 8%|▊ | 29470/371472 [2:22:45<27:51:13, 3.41it/s] 8%|▊ | 29471/371472 [2:22:46<27:36:43, 3.44it/s] 8%|▊ | 29472/371472 [2:22:46<27:43:40, 3.43it/s] 8%|▊ | 29473/371472 [2:22:46<28:05:31, 3.38it/s] 8%|▊ | 29474/371472 [2:22:46<28:22:50, 3.35it/s] 8%|▊ | 29475/371472 [2:22:47<30:07:38, 3.15it/s] 8%|▊ | 29476/371472 [2:22:47<29:51:57, 3.18it/s] 8%|▊ | 29477/371472 [2:22:47<28:26:06, 3.34it/s] 8%|▊ | 29478/371472 [2:22:48<28:58:34, 3.28it/s] 8%|▊ | 29479/371472 [2:22:48<27:36:31, 3.44it/s] 8%|▊ | 29480/371472 [2:22:48<27:16:34, 3.48it/s] {'loss': 7.1294, 'learning_rate': 9.290223878988989e-07, 'epoch': 1.27} + 8%|▊ | 29480/371472 [2:22:48<27:16:34, 3.48it/s] 8%|▊ | 29481/371472 [2:22:49<29:25:55, 3.23it/s] 8%|▊ | 29482/371472 [2:22:49<28:56:05, 3.28it/s] 8%|▊ | 29483/371472 [2:22:49<28:21:01, 3.35it/s] 8%|▊ | 29484/371472 [2:22:49<28:32:18, 3.33it/s] 8%|▊ | 29485/371472 [2:22:50<28:05:29, 3.38it/s] 8%|▊ | 29486/371472 [2:22:50<26:34:32, 3.57it/s] 8%|▊ | 29487/371472 [2:22:50<25:25:15, 3.74it/s] 8%|▊ | 29488/371472 [2:22:50<25:26:19, 3.73it/s] 8%|▊ | 29489/371472 [2:22:51<27:52:42, 3.41it/s] 8%|▊ | 29490/371472 [2:22:51<27:55:50, 3.40it/s] 8%|▊ | 29491/371472 [2:22:51<27:35:59, 3.44it/s] 8%|▊ | 29492/371472 [2:22:52<27:07:32, 3.50it/s] 8%|▊ | 29493/371472 [2:22:52<26:52:43, 3.53it/s] 8%|▊ | 29494/371472 [2:22:52<27:03:37, 3.51it/s] 8%|▊ | 29495/371472 [2:22:53<28:10:04, 3.37it/s] 8%|▊ | 29496/371472 [2:22:53<27:42:58, 3.43it/s] 8%|▊ | 29497/371472 [2:22:53<27:22:18, 3.47it/s] 8%|▊ | 29498/371472 [2:22:53<26:42:14, 3.56it/s] 8%|▊ | 29499/371472 [2:22:54<26:26:02, 3.59it/s] 8%|▊ | 29500/371472 [2:22:54<28:14:26, 3.36it/s] {'loss': 7.3842, 'learning_rate': 9.2897390592342e-07, 'epoch': 1.27} + 8%|▊ | 29500/371472 [2:22:54<28:14:26, 3.36it/s] 8%|▊ | 29501/371472 [2:22:54<26:55:48, 3.53it/s] 8%|▊ | 29502/371472 [2:22:55<26:29:16, 3.59it/s] 8%|▊ | 29503/371472 [2:22:55<26:08:47, 3.63it/s] 8%|▊ | 29504/371472 [2:22:55<26:03:51, 3.64it/s] 8%|▊ | 29505/371472 [2:22:55<26:50:52, 3.54it/s] 8%|▊ | 29506/371472 [2:22:56<26:40:53, 3.56it/s] 8%|▊ | 29507/371472 [2:22:56<29:49:02, 3.19it/s] 8%|▊ | 29508/371472 [2:22:56<28:05:09, 3.38it/s] 8%|▊ | 29509/371472 [2:22:57<27:30:21, 3.45it/s] 8%|▊ | 29510/371472 [2:22:57<27:12:11, 3.49it/s] 8%|▊ | 29511/371472 [2:22:57<27:38:32, 3.44it/s] 8%|▊ | 29512/371472 [2:22:57<27:46:16, 3.42it/s] 8%|▊ | 29513/371472 [2:22:58<29:08:58, 3.26it/s] 8%|▊ | 29514/371472 [2:22:58<27:51:12, 3.41it/s] 8%|▊ | 29515/371472 [2:22:58<26:20:41, 3.61it/s] 8%|▊ | 29516/371472 [2:22:59<27:07:18, 3.50it/s] 8%|▊ | 29517/371472 [2:22:59<26:08:20, 3.63it/s] 8%|▊ | 29518/371472 [2:22:59<26:48:55, 3.54it/s] 8%|▊ | 29519/371472 [2:23:00<29:07:05, 3.26it/s] 8%|▊ | 29520/371472 [2:23:00<29:13:02, 3.25it/s] {'loss': 7.0857, 'learning_rate': 9.289254239479412e-07, 'epoch': 1.27} + 8%|▊ | 29520/371472 [2:23:00<29:13:02, 3.25it/s] 8%|▊ | 29521/371472 [2:23:00<29:20:01, 3.24it/s] 8%|▊ | 29522/371472 [2:23:00<28:53:19, 3.29it/s] 8%|▊ | 29523/371472 [2:23:01<31:47:48, 2.99it/s] 8%|▊ | 29524/371472 [2:23:01<30:21:47, 3.13it/s] 8%|▊ | 29525/371472 [2:23:01<29:55:06, 3.17it/s] 8%|▊ | 29526/371472 [2:23:02<29:43:25, 3.20it/s] 8%|▊ | 29527/371472 [2:23:02<28:42:12, 3.31it/s] 8%|▊ | 29528/371472 [2:23:02<28:06:21, 3.38it/s] 8%|▊ | 29529/371472 [2:23:03<27:55:47, 3.40it/s] 8%|▊ | 29530/371472 [2:23:03<27:23:02, 3.47it/s] 8%|▊ | 29531/371472 [2:23:03<29:41:08, 3.20it/s] 8%|▊ | 29532/371472 [2:23:04<28:42:25, 3.31it/s] 8%|▊ | 29533/371472 [2:23:04<27:13:46, 3.49it/s] 8%|▊ | 29534/371472 [2:23:04<26:04:19, 3.64it/s] 8%|▊ | 29535/371472 [2:23:04<25:46:27, 3.69it/s] 8%|▊ | 29536/371472 [2:23:05<26:22:25, 3.60it/s] 8%|▊ | 29537/371472 [2:23:05<26:04:49, 3.64it/s] 8%|▊ | 29538/371472 [2:23:05<26:51:00, 3.54it/s] 8%|▊ | 29539/371472 [2:23:05<28:07:34, 3.38it/s] 8%|▊ | 29540/371472 [2:23:06<27:20:40, 3.47it/s] {'loss': 6.5619, 'learning_rate': 9.288769419724621e-07, 'epoch': 1.27} + 8%|▊ | 29540/371472 [2:23:06<27:20:40, 3.47it/s] 8%|▊ | 29541/371472 [2:23:06<26:56:30, 3.53it/s] 8%|▊ | 29542/371472 [2:23:06<27:16:18, 3.48it/s] 8%|▊ | 29543/371472 [2:23:07<26:28:30, 3.59it/s] 8%|▊ | 29544/371472 [2:23:07<27:44:49, 3.42it/s] 8%|▊ | 29545/371472 [2:23:07<28:02:33, 3.39it/s] 8%|▊ | 29546/371472 [2:23:07<27:58:43, 3.39it/s] 8%|▊ | 29547/371472 [2:23:08<26:53:47, 3.53it/s] 8%|▊ | 29548/371472 [2:23:08<26:48:26, 3.54it/s] 8%|▊ | 29549/371472 [2:23:08<27:06:35, 3.50it/s] 8%|▊ | 29550/371472 [2:23:09<27:35:57, 3.44it/s] 8%|▊ | 29551/371472 [2:23:09<27:45:21, 3.42it/s] 8%|▊ | 29552/371472 [2:23:09<28:53:52, 3.29it/s] 8%|▊ | 29553/371472 [2:23:09<27:37:59, 3.44it/s] 8%|▊ | 29554/371472 [2:23:10<29:15:28, 3.25it/s] 8%|▊ | 29555/371472 [2:23:10<27:41:04, 3.43it/s] 8%|▊ | 29556/371472 [2:23:10<27:41:44, 3.43it/s] 8%|▊ | 29557/371472 [2:23:11<27:00:01, 3.52it/s] 8%|▊ | 29558/371472 [2:23:11<26:07:04, 3.64it/s] 8%|▊ | 29559/371472 [2:23:11<25:36:31, 3.71it/s] 8%|▊ | 29560/371472 [2:23:11<25:26:25, 3.73it/s] {'loss': 6.7763, 'learning_rate': 9.288284599969833e-07, 'epoch': 1.27} + 8%|▊ | 29560/371472 [2:23:11<25:26:25, 3.73it/s] 8%|▊ | 29561/371472 [2:23:12<26:35:07, 3.57it/s] 8%|▊ | 29562/371472 [2:23:12<27:10:29, 3.49it/s] 8%|▊ | 29563/371472 [2:23:12<27:30:55, 3.45it/s] 8%|▊ | 29564/371472 [2:23:13<26:52:21, 3.53it/s] 8%|▊ | 29565/371472 [2:23:13<32:19:45, 2.94it/s] 8%|▊ | 29566/371472 [2:23:13<30:24:05, 3.12it/s] 8%|▊ | 29567/371472 [2:23:14<29:14:24, 3.25it/s] 8%|▊ | 29568/371472 [2:23:14<27:46:50, 3.42it/s] 8%|▊ | 29569/371472 [2:23:14<27:48:16, 3.42it/s] 8%|▊ | 29570/371472 [2:23:14<28:14:25, 3.36it/s] 8%|▊ | 29571/371472 [2:23:15<28:24:52, 3.34it/s] 8%|▊ | 29572/371472 [2:23:15<28:07:42, 3.38it/s] 8%|▊ | 29573/371472 [2:23:15<28:27:29, 3.34it/s] 8%|▊ | 29574/371472 [2:23:16<27:49:12, 3.41it/s] 8%|▊ | 29575/371472 [2:23:16<27:15:44, 3.48it/s] 8%|▊ | 29576/371472 [2:23:16<28:37:31, 3.32it/s] 8%|▊ | 29577/371472 [2:23:17<30:22:22, 3.13it/s] 8%|▊ | 29578/371472 [2:23:17<30:36:08, 3.10it/s] 8%|▊ | 29579/371472 [2:23:17<29:21:27, 3.23it/s] 8%|▊ | 29580/371472 [2:23:18<31:14:13, 3.04it/s] {'loss': 6.9909, 'learning_rate': 9.287799780215045e-07, 'epoch': 1.27} + 8%|▊ | 29580/371472 [2:23:18<31:14:13, 3.04it/s] 8%|▊ | 29581/371472 [2:23:18<31:37:10, 3.00it/s] 8%|▊ | 29582/371472 [2:23:18<29:36:25, 3.21it/s] 8%|▊ | 29583/371472 [2:23:18<28:26:22, 3.34it/s] 8%|▊ | 29584/371472 [2:23:19<28:05:41, 3.38it/s] 8%|▊ | 29585/371472 [2:23:19<28:10:26, 3.37it/s] 8%|▊ | 29586/371472 [2:23:19<27:53:53, 3.40it/s] 8%|▊ | 29587/371472 [2:23:20<26:35:30, 3.57it/s] 8%|▊ | 29588/371472 [2:23:20<27:11:53, 3.49it/s] 8%|▊ | 29589/371472 [2:23:20<26:36:19, 3.57it/s] 8%|▊ | 29590/371472 [2:23:20<26:36:57, 3.57it/s] 8%|▊ | 29591/371472 [2:23:21<26:33:13, 3.58it/s] 8%|▊ | 29592/371472 [2:23:21<26:43:01, 3.55it/s] 8%|▊ | 29593/371472 [2:23:21<26:01:21, 3.65it/s] 8%|▊ | 29594/371472 [2:23:22<25:34:33, 3.71it/s] 8%|▊ | 29595/371472 [2:23:22<26:11:14, 3.63it/s] 8%|▊ | 29596/371472 [2:23:22<25:40:01, 3.70it/s] 8%|▊ | 29597/371472 [2:23:22<25:18:43, 3.75it/s] 8%|▊ | 29598/371472 [2:23:23<26:15:31, 3.62it/s] 8%|▊ | 29599/371472 [2:23:23<25:15:38, 3.76it/s] 8%|▊ | 29600/371472 [2:23:23<26:15:45, 3.62it/s] {'loss': 6.7757, 'learning_rate': 9.287314960460256e-07, 'epoch': 1.27} + 8%|▊ | 29600/371472 [2:23:23<26:15:45, 3.62it/s] 8%|▊ | 29601/371472 [2:23:23<26:52:31, 3.53it/s] 8%|▊ | 29602/371472 [2:23:24<26:20:07, 3.61it/s] 8%|▊ | 29603/371472 [2:23:24<26:08:38, 3.63it/s] 8%|▊ | 29604/371472 [2:23:24<27:04:23, 3.51it/s] 8%|▊ | 29605/371472 [2:23:25<26:19:33, 3.61it/s] 8%|▊ | 29606/371472 [2:23:25<26:08:19, 3.63it/s] 8%|▊ | 29607/371472 [2:23:25<25:56:29, 3.66it/s] 8%|▊ | 29608/371472 [2:23:25<26:31:28, 3.58it/s] 8%|▊ | 29609/371472 [2:23:26<26:35:40, 3.57it/s] 8%|▊ | 29610/371472 [2:23:26<27:38:25, 3.44it/s] 8%|▊ | 29611/371472 [2:23:26<27:21:40, 3.47it/s] 8%|▊ | 29612/371472 [2:23:27<27:06:28, 3.50it/s] 8%|▊ | 29613/371472 [2:23:27<27:24:06, 3.47it/s] 8%|▊ | 29614/371472 [2:23:27<26:51:22, 3.54it/s] 8%|▊ | 29615/371472 [2:23:27<26:36:58, 3.57it/s] 8%|▊ | 29616/371472 [2:23:28<25:57:07, 3.66it/s] 8%|▊ | 29617/371472 [2:23:28<27:01:07, 3.51it/s] 8%|▊ | 29618/371472 [2:23:28<26:49:41, 3.54it/s] 8%|▊ | 29619/371472 [2:23:29<34:16:11, 2.77it/s] 8%|▊ | 29620/371472 [2:23:29<34:56:50, 2.72it/s] {'loss': 7.1912, 'learning_rate': 9.286830140705466e-07, 'epoch': 1.28} + 8%|▊ | 29620/371472 [2:23:29<34:56:50, 2.72it/s] 8%|▊ | 29621/371472 [2:23:29<32:08:00, 2.96it/s] 8%|▊ | 29622/371472 [2:23:30<31:30:55, 3.01it/s] 8%|▊ | 29623/371472 [2:23:30<32:39:18, 2.91it/s] 8%|▊ | 29624/371472 [2:23:30<29:41:56, 3.20it/s] 8%|▊ | 29625/371472 [2:23:31<28:21:15, 3.35it/s] 8%|▊ | 29626/371472 [2:23:31<27:54:51, 3.40it/s] 8%|▊ | 29627/371472 [2:23:31<27:02:32, 3.51it/s] 8%|▊ | 29628/371472 [2:23:31<26:48:10, 3.54it/s] 8%|▊ | 29629/371472 [2:23:32<26:34:07, 3.57it/s] 8%|▊ | 29630/371472 [2:23:32<26:45:13, 3.55it/s] 8%|▊ | 29631/371472 [2:23:32<25:56:02, 3.66it/s] 8%|▊ | 29632/371472 [2:23:33<25:52:42, 3.67it/s] 8%|▊ | 29633/371472 [2:23:33<28:42:20, 3.31it/s] 8%|▊ | 29634/371472 [2:23:33<28:10:34, 3.37it/s] 8%|▊ | 29635/371472 [2:23:34<28:13:07, 3.36it/s] 8%|▊ | 29636/371472 [2:23:34<28:21:36, 3.35it/s] 8%|▊ | 29637/371472 [2:23:34<28:53:07, 3.29it/s] 8%|▊ | 29638/371472 [2:23:34<28:24:24, 3.34it/s] 8%|▊ | 29639/371472 [2:23:35<28:43:40, 3.31it/s] 8%|▊ | 29640/371472 [2:23:35<29:07:57, 3.26it/s] {'loss': 7.0042, 'learning_rate': 9.286345320950678e-07, 'epoch': 1.28} + 8%|▊ | 29640/371472 [2:23:35<29:07:57, 3.26it/s] 8%|▊ | 29641/371472 [2:23:35<29:31:57, 3.22it/s] 8%|▊ | 29642/371472 [2:23:36<28:33:37, 3.32it/s] 8%|▊ | 29643/371472 [2:23:36<27:28:16, 3.46it/s] 8%|▊ | 29644/371472 [2:23:36<28:24:15, 3.34it/s] 8%|▊ | 29645/371472 [2:23:37<27:47:41, 3.42it/s] 8%|▊ | 29646/371472 [2:23:37<26:26:57, 3.59it/s] 8%|▊ | 29647/371472 [2:23:37<26:04:14, 3.64it/s] 8%|▊ | 29648/371472 [2:23:37<26:37:24, 3.57it/s] 8%|▊ | 29649/371472 [2:23:38<26:02:01, 3.65it/s] 8%|▊ | 29650/371472 [2:23:38<25:48:43, 3.68it/s] 8%|▊ | 29651/371472 [2:23:38<26:15:39, 3.62it/s] 8%|▊ | 29652/371472 [2:23:38<27:55:58, 3.40it/s] 8%|▊ | 29653/371472 [2:23:39<27:56:51, 3.40it/s] 8%|▊ | 29654/371472 [2:23:39<28:24:22, 3.34it/s] 8%|▊ | 29655/371472 [2:23:39<27:53:35, 3.40it/s] 8%|▊ | 29656/371472 [2:23:40<27:09:34, 3.50it/s] 8%|▊ | 29657/371472 [2:23:40<27:05:59, 3.50it/s] 8%|▊ | 29658/371472 [2:23:40<27:54:47, 3.40it/s] 8%|▊ | 29659/371472 [2:23:41<28:05:05, 3.38it/s] 8%|▊ | 29660/371472 [2:23:41<27:25:08, 3.46it/s] {'loss': 6.9697, 'learning_rate': 9.285860501195888e-07, 'epoch': 1.28} + 8%|▊ | 29660/371472 [2:23:41<27:25:08, 3.46it/s] 8%|▊ | 29661/371472 [2:23:41<26:42:16, 3.56it/s] 8%|▊ | 29662/371472 [2:23:41<26:02:34, 3.65it/s] 8%|▊ | 29663/371472 [2:23:42<26:08:52, 3.63it/s] 8%|▊ | 29664/371472 [2:23:42<25:55:28, 3.66it/s] 8%|▊ | 29665/371472 [2:23:42<27:11:58, 3.49it/s] 8%|▊ | 29666/371472 [2:23:42<28:00:55, 3.39it/s] 8%|▊ | 29667/371472 [2:23:43<27:16:34, 3.48it/s] 8%|▊ | 29668/371472 [2:23:43<26:30:49, 3.58it/s] 8%|▊ | 29669/371472 [2:23:43<27:04:08, 3.51it/s] 8%|▊ | 29670/371472 [2:23:44<26:23:56, 3.60it/s] 8%|▊ | 29671/371472 [2:23:44<26:02:57, 3.64it/s] 8%|▊ | 29672/371472 [2:23:44<26:41:00, 3.56it/s] 8%|▊ | 29673/371472 [2:23:44<27:11:34, 3.49it/s] 8%|▊ | 29674/371472 [2:23:45<26:47:31, 3.54it/s] 8%|▊ | 29675/371472 [2:23:45<28:04:12, 3.38it/s] 8%|▊ | 29676/371472 [2:23:45<28:17:26, 3.36it/s] 8%|▊ | 29677/371472 [2:23:46<28:25:24, 3.34it/s] 8%|▊ | 29678/371472 [2:23:46<28:37:27, 3.32it/s] 8%|▊ | 29679/371472 [2:23:46<28:15:52, 3.36it/s] 8%|▊ | 29680/371472 [2:23:47<30:00:11, 3.16it/s] {'loss': 7.4797, 'learning_rate': 9.285375681441099e-07, 'epoch': 1.28} + 8%|▊ | 29680/371472 [2:23:47<30:00:11, 3.16it/s] 8%|▊ | 29681/371472 [2:23:47<30:13:44, 3.14it/s] 8%|▊ | 29682/371472 [2:23:47<29:48:12, 3.19it/s] 8%|▊ | 29683/371472 [2:23:48<29:03:10, 3.27it/s] 8%|▊ | 29684/371472 [2:23:48<32:36:59, 2.91it/s] 8%|▊ | 29685/371472 [2:23:48<30:35:03, 3.10it/s] 8%|▊ | 29686/371472 [2:23:48<29:14:31, 3.25it/s] 8%|▊ | 29687/371472 [2:23:49<28:21:57, 3.35it/s] 8%|▊ | 29688/371472 [2:23:49<27:53:37, 3.40it/s] 8%|▊ | 29689/371472 [2:23:49<27:08:14, 3.50it/s] 8%|▊ | 29690/371472 [2:23:50<27:47:24, 3.42it/s] 8%|▊ | 29691/371472 [2:23:50<27:22:29, 3.47it/s] 8%|▊ | 29692/371472 [2:23:50<26:55:00, 3.53it/s] 8%|▊ | 29693/371472 [2:23:50<27:15:32, 3.48it/s] 8%|▊ | 29694/371472 [2:23:51<26:27:38, 3.59it/s] 8%|▊ | 29695/371472 [2:23:51<25:39:56, 3.70it/s] 8%|▊ | 29696/371472 [2:23:51<28:18:52, 3.35it/s] 8%|▊ | 29697/371472 [2:23:52<27:33:11, 3.45it/s] 8%|▊ | 29698/371472 [2:23:52<27:04:44, 3.51it/s] 8%|▊ | 29699/371472 [2:23:52<28:41:16, 3.31it/s] 8%|▊ | 29700/371472 [2:23:53<27:41:49, 3.43it/s] {'loss': 7.2872, 'learning_rate': 9.28489086168631e-07, 'epoch': 1.28} + 8%|▊ | 29700/371472 [2:23:53<27:41:49, 3.43it/s] 8%|▊ | 29701/371472 [2:23:53<26:44:16, 3.55it/s] 8%|▊ | 29702/371472 [2:23:53<26:56:10, 3.52it/s] 8%|▊ | 29703/371472 [2:23:53<27:09:57, 3.49it/s] 8%|▊ | 29704/371472 [2:23:54<27:04:17, 3.51it/s] 8%|▊ | 29705/371472 [2:23:54<28:47:06, 3.30it/s] 8%|▊ | 29706/371472 [2:23:54<27:43:00, 3.43it/s] 8%|▊ | 29707/371472 [2:23:55<28:22:33, 3.35it/s] 8%|▊ | 29708/371472 [2:23:55<27:33:59, 3.44it/s] 8%|▊ | 29709/371472 [2:23:55<27:08:20, 3.50it/s] 8%|▊ | 29710/371472 [2:23:55<27:33:51, 3.44it/s] 8%|▊ | 29711/371472 [2:23:56<26:35:30, 3.57it/s] 8%|▊ | 29712/371472 [2:23:56<25:59:18, 3.65it/s] 8%|▊ | 29713/371472 [2:23:56<26:02:09, 3.65it/s] 8%|▊ | 29714/371472 [2:23:57<27:48:32, 3.41it/s] 8%|▊ | 29715/371472 [2:23:57<26:43:04, 3.55it/s] 8%|▊ | 29716/371472 [2:23:57<28:05:34, 3.38it/s] 8%|▊ | 29717/371472 [2:23:57<27:48:44, 3.41it/s] 8%|▊ | 29718/371472 [2:23:58<28:17:25, 3.36it/s] 8%|▊ | 29719/371472 [2:23:58<27:45:30, 3.42it/s] 8%|▊ | 29720/371472 [2:23:58<27:14:55, 3.48it/s] {'loss': 7.1162, 'learning_rate': 9.284406041931522e-07, 'epoch': 1.28} + 8%|▊ | 29720/371472 [2:23:58<27:14:55, 3.48it/s] 8%|▊ | 29721/371472 [2:23:59<27:23:41, 3.47it/s] 8%|▊ | 29722/371472 [2:23:59<27:41:25, 3.43it/s] 8%|▊ | 29723/371472 [2:23:59<27:54:47, 3.40it/s] 8%|▊ | 29724/371472 [2:23:59<27:09:31, 3.50it/s] 8%|▊ | 29725/371472 [2:24:00<27:20:26, 3.47it/s] 8%|▊ | 29726/371472 [2:24:00<26:38:14, 3.56it/s] 8%|▊ | 29727/371472 [2:24:00<27:09:52, 3.49it/s] 8%|▊ | 29728/371472 [2:24:01<26:33:12, 3.57it/s] 8%|▊ | 29729/371472 [2:24:01<26:47:01, 3.54it/s] 8%|▊ | 29730/371472 [2:24:01<25:57:18, 3.66it/s] 8%|▊ | 29731/371472 [2:24:01<26:07:16, 3.63it/s] 8%|▊ | 29732/371472 [2:24:02<27:58:08, 3.39it/s] 8%|▊ | 29733/371472 [2:24:02<26:52:58, 3.53it/s] 8%|▊ | 29734/371472 [2:24:02<26:23:29, 3.60it/s] 8%|▊ | 29735/371472 [2:24:03<27:19:25, 3.47it/s] 8%|▊ | 29736/371472 [2:24:03<26:24:33, 3.59it/s] 8%|▊ | 29737/371472 [2:24:03<26:25:06, 3.59it/s] 8%|▊ | 29738/371472 [2:24:03<26:44:48, 3.55it/s] 8%|▊ | 29739/371472 [2:24:04<27:32:29, 3.45it/s] 8%|▊ | 29740/371472 [2:24:04<27:12:31, 3.49it/s] {'loss': 7.2223, 'learning_rate': 9.283921222176733e-07, 'epoch': 1.28} + 8%|▊ | 29740/371472 [2:24:04<27:12:31, 3.49it/s] 8%|▊ | 29741/371472 [2:24:04<26:50:03, 3.54it/s] 8%|▊ | 29742/371472 [2:24:05<26:45:33, 3.55it/s] 8%|▊ | 29743/371472 [2:24:05<26:31:06, 3.58it/s] 8%|▊ | 29744/371472 [2:24:05<27:59:00, 3.39it/s] 8%|▊ | 29745/371472 [2:24:05<29:11:32, 3.25it/s] 8%|▊ | 29746/371472 [2:24:06<28:27:52, 3.33it/s] 8%|▊ | 29747/371472 [2:24:06<28:55:30, 3.28it/s] 8%|▊ | 29748/371472 [2:24:06<28:05:53, 3.38it/s] 8%|▊ | 29749/371472 [2:24:07<27:48:06, 3.41it/s] 8%|▊ | 29750/371472 [2:24:07<27:16:30, 3.48it/s] 8%|▊ | 29751/371472 [2:24:07<26:39:23, 3.56it/s] 8%|▊ | 29752/371472 [2:24:07<27:23:21, 3.47it/s] 8%|▊ | 29753/371472 [2:24:08<28:15:04, 3.36it/s] 8%|▊ | 29754/371472 [2:24:08<27:50:07, 3.41it/s] 8%|▊ | 29755/371472 [2:24:08<29:02:13, 3.27it/s] 8%|▊ | 29756/371472 [2:24:09<28:20:02, 3.35it/s] 8%|▊ | 29757/371472 [2:24:09<27:59:16, 3.39it/s] 8%|▊ | 29758/371472 [2:24:09<27:33:12, 3.44it/s] 8%|▊ | 29759/371472 [2:24:10<28:42:42, 3.31it/s] 8%|▊ | 29760/371472 [2:24:10<29:42:27, 3.20it/s] {'loss': 7.2239, 'learning_rate': 9.283436402421944e-07, 'epoch': 1.28} + 8%|▊ | 29760/371472 [2:24:10<29:42:27, 3.20it/s] 8%|▊ | 29761/371472 [2:24:10<29:06:17, 3.26it/s] 8%|▊ | 29762/371472 [2:24:10<27:15:56, 3.48it/s] 8%|▊ | 29763/371472 [2:24:11<27:51:49, 3.41it/s] 8%|▊ | 29764/371472 [2:24:11<26:44:46, 3.55it/s] 8%|▊ | 29765/371472 [2:24:11<27:01:53, 3.51it/s] 8%|▊ | 29766/371472 [2:24:12<27:27:30, 3.46it/s] 8%|▊ | 29767/371472 [2:24:12<28:21:09, 3.35it/s] 8%|▊ | 29768/371472 [2:24:12<28:20:00, 3.35it/s] 8%|▊ | 29769/371472 [2:24:12<28:00:43, 3.39it/s] 8%|▊ | 29770/371472 [2:24:13<27:17:54, 3.48it/s] 8%|▊ | 29771/371472 [2:24:13<27:17:41, 3.48it/s] 8%|▊ | 29772/371472 [2:24:13<28:16:58, 3.36it/s] 8%|▊ | 29773/371472 [2:24:14<26:53:27, 3.53it/s] 8%|▊ | 29774/371472 [2:24:14<26:05:46, 3.64it/s] 8%|▊ | 29775/371472 [2:24:14<25:20:28, 3.75it/s] 8%|▊ | 29776/371472 [2:24:14<25:29:25, 3.72it/s] 8%|▊ | 29777/371472 [2:24:15<25:21:06, 3.74it/s] 8%|▊ | 29778/371472 [2:24:15<25:45:16, 3.69it/s] 8%|▊ | 29779/371472 [2:24:15<25:27:08, 3.73it/s] 8%|▊ | 29780/371472 [2:24:15<25:19:02, 3.75it/s] {'loss': 7.0478, 'learning_rate': 9.282951582667155e-07, 'epoch': 1.28} + 8%|▊ | 29780/371472 [2:24:15<25:19:02, 3.75it/s] 8%|▊ | 29781/371472 [2:24:16<25:17:08, 3.75it/s] 8%|▊ | 29782/371472 [2:24:16<25:08:25, 3.78it/s] 8%|▊ | 29783/371472 [2:24:16<25:23:57, 3.74it/s] 8%|▊ | 29784/371472 [2:24:17<25:25:28, 3.73it/s] 8%|▊ | 29785/371472 [2:24:17<25:30:00, 3.72it/s] 8%|▊ | 29786/371472 [2:24:17<28:08:57, 3.37it/s] 8%|▊ | 29787/371472 [2:24:18<29:16:43, 3.24it/s] 8%|▊ | 29788/371472 [2:24:18<28:10:22, 3.37it/s] 8%|▊ | 29789/371472 [2:24:18<28:07:21, 3.37it/s] 8%|▊ | 29790/371472 [2:24:18<28:59:04, 3.27it/s] 8%|▊ | 29791/371472 [2:24:19<29:00:18, 3.27it/s] 8%|▊ | 29792/371472 [2:24:19<28:27:04, 3.34it/s] 8%|▊ | 29793/371472 [2:24:19<27:20:00, 3.47it/s] 8%|▊ | 29794/371472 [2:24:20<27:04:49, 3.50it/s] 8%|▊ | 29795/371472 [2:24:20<26:16:40, 3.61it/s] 8%|▊ | 29796/371472 [2:24:20<26:50:29, 3.54it/s] 8%|▊ | 29797/371472 [2:24:20<26:44:02, 3.55it/s] 8%|▊ | 29798/371472 [2:24:21<26:15:25, 3.61it/s] 8%|▊ | 29799/371472 [2:24:21<25:47:10, 3.68it/s] 8%|▊ | 29800/371472 [2:24:21<25:47:24, 3.68it/s] {'loss': 7.0737, 'learning_rate': 9.282466762912365e-07, 'epoch': 1.28} + 8%|▊ | 29800/371472 [2:24:21<25:47:24, 3.68it/s] 8%|▊ | 29801/371472 [2:24:21<26:35:12, 3.57it/s] 8%|▊ | 29802/371472 [2:24:22<26:44:35, 3.55it/s] 8%|▊ | 29803/371472 [2:24:22<26:41:42, 3.56it/s] 8%|▊ | 29804/371472 [2:24:22<27:17:03, 3.48it/s] 8%|▊ | 29805/371472 [2:24:23<28:46:01, 3.30it/s] 8%|▊ | 29806/371472 [2:24:23<27:53:45, 3.40it/s] 8%|▊ | 29807/371472 [2:24:23<27:45:27, 3.42it/s] 8%|▊ | 29808/371472 [2:24:23<27:06:49, 3.50it/s] 8%|▊ | 29809/371472 [2:24:24<26:56:30, 3.52it/s] 8%|▊ | 29810/371472 [2:24:24<26:18:26, 3.61it/s] 8%|▊ | 29811/371472 [2:24:24<25:33:21, 3.71it/s] 8%|▊ | 29812/371472 [2:24:25<25:06:13, 3.78it/s] 8%|▊ | 29813/371472 [2:24:25<27:36:45, 3.44it/s] 8%|▊ | 29814/371472 [2:24:25<27:48:29, 3.41it/s] 8%|▊ | 29815/371472 [2:24:25<26:36:37, 3.57it/s] 8%|▊ | 29816/371472 [2:24:26<27:00:42, 3.51it/s] 8%|▊ | 29817/371472 [2:24:26<26:47:07, 3.54it/s] 8%|▊ | 29818/371472 [2:24:26<26:07:05, 3.63it/s] 8%|▊ | 29819/371472 [2:24:27<26:00:20, 3.65it/s] 8%|▊ | 29820/371472 [2:24:27<26:40:16, 3.56it/s] {'loss': 6.9948, 'learning_rate': 9.281981943157577e-07, 'epoch': 1.28} + 8%|▊ | 29820/371472 [2:24:27<26:40:16, 3.56it/s] 8%|▊ | 29821/371472 [2:24:27<27:00:23, 3.51it/s] 8%|▊ | 29822/371472 [2:24:27<26:37:41, 3.56it/s] 8%|▊ | 29823/371472 [2:24:28<27:24:42, 3.46it/s] 8%|▊ | 29824/371472 [2:24:28<30:21:32, 3.13it/s] 8%|▊ | 29825/371472 [2:24:28<29:49:31, 3.18it/s] 8%|▊ | 29826/371472 [2:24:29<29:16:00, 3.24it/s] 8%|▊ | 29827/371472 [2:24:29<27:12:40, 3.49it/s] 8%|▊ | 29828/371472 [2:24:29<28:16:01, 3.36it/s] 8%|▊ | 29829/371472 [2:24:30<28:12:41, 3.36it/s] 8%|▊ | 29830/371472 [2:24:30<27:40:41, 3.43it/s] 8%|▊ | 29831/371472 [2:24:30<27:09:28, 3.49it/s] 8%|▊ | 29832/371472 [2:24:30<28:57:20, 3.28it/s] 8%|▊ | 29833/371472 [2:24:31<28:08:11, 3.37it/s] 8%|▊ | 29834/371472 [2:24:31<27:28:14, 3.45it/s] 8%|▊ | 29835/371472 [2:24:31<28:15:49, 3.36it/s] 8%|▊ | 29836/371472 [2:24:32<27:30:49, 3.45it/s] 8%|▊ | 29837/371472 [2:24:32<27:22:18, 3.47it/s] 8%|▊ | 29838/371472 [2:24:32<28:15:13, 3.36it/s] 8%|▊ | 29839/371472 [2:24:32<27:04:29, 3.51it/s] 8%|▊ | 29840/371472 [2:24:33<27:15:59, 3.48it/s] {'loss': 6.7393, 'learning_rate': 9.281497123402788e-07, 'epoch': 1.29} + 8%|▊ | 29840/371472 [2:24:33<27:15:59, 3.48it/s] 8%|▊ | 29841/371472 [2:24:33<27:54:16, 3.40it/s] 8%|▊ | 29842/371472 [2:24:33<27:30:39, 3.45it/s] 8%|▊ | 29843/371472 [2:24:34<27:42:38, 3.42it/s] 8%|▊ | 29844/371472 [2:24:34<26:29:15, 3.58it/s] 8%|▊ | 29845/371472 [2:24:34<26:33:28, 3.57it/s] 8%|▊ | 29846/371472 [2:24:34<26:09:14, 3.63it/s] 8%|▊ | 29847/371472 [2:24:35<25:35:19, 3.71it/s] 8%|▊ | 29848/371472 [2:24:35<25:02:25, 3.79it/s] 8%|▊ | 29849/371472 [2:24:35<26:01:59, 3.65it/s] 8%|▊ | 29850/371472 [2:24:36<26:17:05, 3.61it/s] 8%|▊ | 29851/371472 [2:24:36<26:14:10, 3.62it/s] 8%|▊ | 29852/371472 [2:24:36<25:21:57, 3.74it/s] 8%|▊ | 29853/371472 [2:24:36<25:07:01, 3.78it/s] 8%|▊ | 29854/371472 [2:24:37<25:32:35, 3.72it/s] 8%|▊ | 29855/371472 [2:24:37<25:36:23, 3.71it/s] 8%|▊ | 29856/371472 [2:24:37<25:46:58, 3.68it/s] 8%|▊ | 29857/371472 [2:24:37<27:07:28, 3.50it/s] 8%|▊ | 29858/371472 [2:24:38<28:32:27, 3.32it/s] 8%|▊ | 29859/371472 [2:24:38<28:51:16, 3.29it/s] 8%|▊ | 29860/371472 [2:24:38<27:59:30, 3.39it/s] {'loss': 7.1649, 'learning_rate': 9.281012303647999e-07, 'epoch': 1.29} + 8%|▊ | 29860/371472 [2:24:38<27:59:30, 3.39it/s] 8%|▊ | 29861/371472 [2:24:39<28:50:06, 3.29it/s] 8%|▊ | 29862/371472 [2:24:39<27:58:13, 3.39it/s] 8%|▊ | 29863/371472 [2:24:39<29:10:12, 3.25it/s] 8%|▊ | 29864/371472 [2:24:40<30:29:16, 3.11it/s] 8%|▊ | 29865/371472 [2:24:40<30:04:23, 3.16it/s] 8%|▊ | 29866/371472 [2:24:40<29:31:21, 3.21it/s] 8%|▊ | 29867/371472 [2:24:41<29:15:32, 3.24it/s] 8%|▊ | 29868/371472 [2:24:41<28:53:53, 3.28it/s] 8%|▊ | 29869/371472 [2:24:41<28:57:12, 3.28it/s] 8%|▊ | 29870/371472 [2:24:41<29:09:32, 3.25it/s] 8%|▊ | 29871/371472 [2:24:42<28:27:55, 3.33it/s] 8%|▊ | 29872/371472 [2:24:42<28:11:54, 3.37it/s] 8%|▊ | 29873/371472 [2:24:42<27:03:49, 3.51it/s] 8%|▊ | 29874/371472 [2:24:43<26:50:57, 3.53it/s] 8%|▊ | 29875/371472 [2:24:43<25:39:18, 3.70it/s] 8%|▊ | 29876/371472 [2:24:43<26:13:50, 3.62it/s] 8%|▊ | 29877/371472 [2:24:43<26:06:21, 3.63it/s] 8%|▊ | 29878/371472 [2:24:44<26:43:59, 3.55it/s] 8%|▊ | 29879/371472 [2:24:44<27:57:28, 3.39it/s] 8%|▊ | 29880/371472 [2:24:44<27:07:16, 3.50it/s] {'loss': 7.1114, 'learning_rate': 9.28052748389321e-07, 'epoch': 1.29} + 8%|▊ | 29880/371472 [2:24:44<27:07:16, 3.50it/s] 8%|▊ | 29881/371472 [2:24:45<27:13:04, 3.49it/s] 8%|▊ | 29882/371472 [2:24:45<27:09:57, 3.49it/s] 8%|▊ | 29883/371472 [2:24:45<26:08:09, 3.63it/s] 8%|▊ | 29884/371472 [2:24:45<27:11:58, 3.49it/s] 8%|▊ | 29885/371472 [2:24:46<28:49:56, 3.29it/s] 8%|▊ | 29886/371472 [2:24:46<28:56:28, 3.28it/s] 8%|▊ | 29887/371472 [2:24:46<27:43:24, 3.42it/s] 8%|▊ | 29888/371472 [2:24:47<28:36:30, 3.32it/s] 8%|▊ | 29889/371472 [2:24:47<29:48:49, 3.18it/s] 8%|▊ | 29890/371472 [2:24:47<28:31:51, 3.33it/s] 8%|▊ | 29891/371472 [2:24:48<27:23:38, 3.46it/s] 8%|▊ | 29892/371472 [2:24:48<27:02:57, 3.51it/s] 8%|▊ | 29893/371472 [2:24:48<27:16:48, 3.48it/s] 8%|▊ | 29894/371472 [2:24:48<28:44:22, 3.30it/s] 8%|▊ | 29895/371472 [2:24:49<27:56:06, 3.40it/s] 8%|▊ | 29896/371472 [2:24:49<28:09:53, 3.37it/s] 8%|▊ | 29897/371472 [2:24:49<27:34:07, 3.44it/s] 8%|▊ | 29898/371472 [2:24:50<27:06:44, 3.50it/s] 8%|▊ | 29899/371472 [2:24:50<26:25:12, 3.59it/s] 8%|▊ | 29900/371472 [2:24:50<27:42:00, 3.43it/s] {'loss': 6.8673, 'learning_rate': 9.280042664138422e-07, 'epoch': 1.29} + 8%|▊ | 29900/371472 [2:24:50<27:42:00, 3.43it/s] 8%|▊ | 29901/371472 [2:24:50<27:11:10, 3.49it/s] 8%|▊ | 29902/371472 [2:24:51<27:46:50, 3.42it/s] 8%|▊ | 29903/371472 [2:24:51<27:55:37, 3.40it/s] 8%|▊ | 29904/371472 [2:24:51<27:58:58, 3.39it/s] 8%|▊ | 29905/371472 [2:24:52<26:46:34, 3.54it/s] 8%|▊ | 29906/371472 [2:24:52<28:27:35, 3.33it/s] 8%|▊ | 29907/371472 [2:24:52<27:58:31, 3.39it/s] 8%|▊ | 29908/371472 [2:24:52<26:57:12, 3.52it/s] 8%|▊ | 29909/371472 [2:24:53<26:33:59, 3.57it/s] 8%|▊ | 29910/371472 [2:24:53<25:49:57, 3.67it/s] 8%|▊ | 29911/371472 [2:24:53<27:11:49, 3.49it/s] 8%|▊ | 29912/371472 [2:24:54<26:01:59, 3.64it/s] 8%|▊ | 29913/371472 [2:24:54<27:03:38, 3.51it/s] 8%|▊ | 29914/371472 [2:24:54<27:03:22, 3.51it/s] 8%|▊ | 29915/371472 [2:24:54<28:11:04, 3.37it/s] 8%|▊ | 29916/371472 [2:24:55<27:31:45, 3.45it/s] 8%|▊ | 29917/371472 [2:24:55<28:52:06, 3.29it/s] 8%|▊ | 29918/371472 [2:24:55<28:11:58, 3.36it/s] 8%|▊ | 29919/371472 [2:24:56<29:21:14, 3.23it/s] 8%|▊ | 29920/371472 [2:24:56<29:47:00, 3.19it/s] {'loss': 6.8635, 'learning_rate': 9.279557844383631e-07, 'epoch': 1.29} + 8%|▊ | 29920/371472 [2:24:56<29:47:00, 3.19it/s] 8%|▊ | 29921/371472 [2:24:56<28:10:05, 3.37it/s] 8%|▊ | 29922/371472 [2:24:57<27:12:21, 3.49it/s] 8%|▊ | 29923/371472 [2:24:57<26:51:21, 3.53it/s] 8%|▊ | 29924/371472 [2:24:57<27:13:08, 3.49it/s] 8%|▊ | 29925/371472 [2:24:57<27:29:19, 3.45it/s] 8%|▊ | 29926/371472 [2:24:58<26:43:11, 3.55it/s] 8%|▊ | 29927/371472 [2:24:58<28:23:58, 3.34it/s] 8%|▊ | 29928/371472 [2:24:58<29:52:15, 3.18it/s] 8%|▊ | 29929/371472 [2:24:59<28:22:47, 3.34it/s] 8%|▊ | 29930/371472 [2:24:59<27:39:52, 3.43it/s] 8%|▊ | 29931/371472 [2:24:59<27:46:18, 3.42it/s] 8%|▊ | 29932/371472 [2:24:59<27:13:39, 3.48it/s] 8%|▊ | 29933/371472 [2:25:00<27:10:03, 3.49it/s] 8%|▊ | 29934/371472 [2:25:00<26:47:55, 3.54it/s] 8%|▊ | 29935/371472 [2:25:00<25:54:26, 3.66it/s] 8%|▊ | 29936/371472 [2:25:01<27:43:42, 3.42it/s] 8%|▊ | 29937/371472 [2:25:01<27:22:23, 3.47it/s] 8%|▊ | 29938/371472 [2:25:01<27:00:16, 3.51it/s] 8%|▊ | 29939/371472 [2:25:02<29:59:19, 3.16it/s] 8%|▊ | 29940/371472 [2:25:02<30:11:37, 3.14it/s] {'loss': 6.8289, 'learning_rate': 9.279073024628843e-07, 'epoch': 1.29} + 8%|▊ | 29940/371472 [2:25:02<30:11:37, 3.14it/s] 8%|▊ | 29941/371472 [2:25:02<31:19:17, 3.03it/s] 8%|▊ | 29942/371472 [2:25:03<29:46:30, 3.19it/s] 8%|▊ | 29943/371472 [2:25:03<29:21:36, 3.23it/s] 8%|▊ | 29944/371472 [2:25:03<28:43:06, 3.30it/s] 8%|▊ | 29945/371472 [2:25:03<28:15:45, 3.36it/s] 8%|▊ | 29946/371472 [2:25:04<27:34:35, 3.44it/s] 8%|▊ | 29947/371472 [2:25:04<28:18:49, 3.35it/s] 8%|▊ | 29948/371472 [2:25:04<27:45:40, 3.42it/s] 8%|▊ | 29949/371472 [2:25:05<27:17:19, 3.48it/s] 8%|▊ | 29950/371472 [2:25:05<26:46:48, 3.54it/s] 8%|▊ | 29951/371472 [2:25:05<26:12:36, 3.62it/s] 8%|▊ | 29952/371472 [2:25:05<26:15:00, 3.61it/s] 8%|▊ | 29953/371472 [2:25:06<28:24:36, 3.34it/s] 8%|▊ | 29954/371472 [2:25:06<28:21:08, 3.35it/s] 8%|▊ | 29955/371472 [2:25:06<27:23:48, 3.46it/s] 8%|▊ | 29956/371472 [2:25:07<29:03:57, 3.26it/s] 8%|▊ | 29957/371472 [2:25:07<27:33:02, 3.44it/s] 8%|▊ | 29958/371472 [2:25:07<30:05:59, 3.15it/s] 8%|▊ | 29959/371472 [2:25:08<30:27:14, 3.12it/s] 8%|▊ | 29960/371472 [2:25:08<28:40:37, 3.31it/s] {'loss': 7.1112, 'learning_rate': 9.278588204874054e-07, 'epoch': 1.29} + 8%|▊ | 29960/371472 [2:25:08<28:40:37, 3.31it/s] 8%|▊ | 29961/371472 [2:25:08<28:47:58, 3.29it/s] 8%|▊ | 29962/371472 [2:25:08<27:24:11, 3.46it/s] 8%|▊ | 29963/371472 [2:25:09<27:22:01, 3.47it/s] 8%|▊ | 29964/371472 [2:25:09<29:37:39, 3.20it/s] 8%|▊ | 29965/371472 [2:25:09<27:47:12, 3.41it/s] 8%|▊ | 29966/371472 [2:25:10<27:11:37, 3.49it/s] 8%|▊ | 29967/371472 [2:25:10<27:11:22, 3.49it/s] 8%|▊ | 29968/371472 [2:25:10<26:22:07, 3.60it/s] 8%|▊ | 29969/371472 [2:25:10<26:20:38, 3.60it/s] 8%|▊ | 29970/371472 [2:25:11<26:18:25, 3.61it/s] 8%|▊ | 29971/371472 [2:25:11<26:21:07, 3.60it/s] 8%|▊ | 29972/371472 [2:25:11<26:25:37, 3.59it/s] 8%|▊ | 29973/371472 [2:25:12<26:28:20, 3.58it/s] 8%|▊ | 29974/371472 [2:25:12<27:10:34, 3.49it/s] 8%|▊ | 29975/371472 [2:25:12<26:58:50, 3.52it/s] 8%|▊ | 29976/371472 [2:25:12<26:44:54, 3.55it/s] 8%|▊ | 29977/371472 [2:25:13<26:45:12, 3.55it/s] 8%|▊ | 29978/371472 [2:25:13<28:59:54, 3.27it/s] 8%|▊ | 29979/371472 [2:25:13<29:03:41, 3.26it/s] 8%|▊ | 29980/371472 [2:25:14<28:36:36, 3.32it/s] {'loss': 7.1843, 'learning_rate': 9.278103385119265e-07, 'epoch': 1.29} + 8%|▊ | 29980/371472 [2:25:14<28:36:36, 3.32it/s] 8%|▊ | 29981/371472 [2:25:14<29:29:22, 3.22it/s] 8%|▊ | 29982/371472 [2:25:14<28:47:05, 3.30it/s] 8%|▊ | 29983/371472 [2:25:14<27:44:17, 3.42it/s] 8%|▊ | 29984/371472 [2:25:15<27:06:09, 3.50it/s] 8%|▊ | 29985/371472 [2:25:15<28:58:58, 3.27it/s] 8%|▊ | 29986/371472 [2:25:15<28:08:34, 3.37it/s] 8%|▊ | 29987/371472 [2:25:16<27:56:26, 3.39it/s] 8%|▊ | 29988/371472 [2:25:16<26:47:38, 3.54it/s] 8%|▊ | 29989/371472 [2:25:16<27:08:07, 3.50it/s] 8%|▊ | 29990/371472 [2:25:16<26:40:26, 3.56it/s] 8%|▊ | 29991/371472 [2:25:17<26:23:08, 3.59it/s] 8%|▊ | 29992/371472 [2:25:17<26:35:50, 3.57it/s] 8%|▊ | 29993/371472 [2:25:17<26:34:28, 3.57it/s] 8%|▊ | 29994/371472 [2:25:18<26:13:19, 3.62it/s] 8%|▊ | 29995/371472 [2:25:18<26:39:33, 3.56it/s] 8%|▊ | 29996/371472 [2:25:18<26:08:45, 3.63it/s] 8%|▊ | 29997/371472 [2:25:18<25:39:13, 3.70it/s] 8%|▊ | 29998/371472 [2:25:19<25:42:19, 3.69it/s] 8%|▊ | 29999/371472 [2:25:19<28:02:35, 3.38it/s] 8%|▊ | 30000/371472 [2:25:19<29:18:40, 3.24it/s] {'loss': 7.1475, 'learning_rate': 9.277618565364475e-07, 'epoch': 1.29} + 8%|▊ | 30000/371472 [2:25:19<29:18:40, 3.24it/s] 8%|▊ | 30001/371472 [2:25:20<28:29:01, 3.33it/s] 8%|▊ | 30002/371472 [2:25:20<27:45:16, 3.42it/s] 8%|▊ | 30003/371472 [2:25:20<28:58:59, 3.27it/s] 8%|▊ | 30004/371472 [2:25:21<29:35:05, 3.21it/s] 8%|▊ | 30005/371472 [2:25:21<29:05:29, 3.26it/s] 8%|▊ | 30006/371472 [2:25:21<28:28:37, 3.33it/s] 8%|▊ | 30007/371472 [2:25:21<28:22:15, 3.34it/s] 8%|▊ | 30008/371472 [2:25:22<27:18:40, 3.47it/s] 8%|▊ | 30009/371472 [2:25:22<27:44:37, 3.42it/s] 8%|▊ | 30010/371472 [2:25:22<26:40:29, 3.56it/s] 8%|▊ | 30011/371472 [2:25:23<26:49:23, 3.54it/s] 8%|▊ | 30012/371472 [2:25:23<27:06:40, 3.50it/s] 8%|▊ | 30013/371472 [2:25:23<26:57:19, 3.52it/s] 8%|▊ | 30014/371472 [2:25:23<27:15:00, 3.48it/s] 8%|▊ | 30015/371472 [2:25:24<27:20:24, 3.47it/s] 8%|▊ | 30016/371472 [2:25:24<26:49:47, 3.54it/s] 8%|▊ | 30017/371472 [2:25:24<27:54:29, 3.40it/s] 8%|▊ | 30018/371472 [2:25:25<27:57:39, 3.39it/s] 8%|▊ | 30019/371472 [2:25:25<28:23:09, 3.34it/s] 8%|▊ | 30020/371472 [2:25:25<28:15:50, 3.36it/s] {'loss': 7.0125, 'learning_rate': 9.277133745609688e-07, 'epoch': 1.29} + 8%|▊ | 30020/371472 [2:25:25<28:15:50, 3.36it/s] 8%|▊ | 30021/371472 [2:25:25<27:19:26, 3.47it/s] 8%|▊ | 30022/371472 [2:25:26<26:23:47, 3.59it/s] 8%|▊ | 30023/371472 [2:25:26<26:38:08, 3.56it/s] 8%|▊ | 30024/371472 [2:25:26<26:21:38, 3.60it/s] 8%|▊ | 30025/371472 [2:25:27<26:24:58, 3.59it/s] 8%|▊ | 30026/371472 [2:25:27<25:39:22, 3.70it/s] 8%|▊ | 30027/371472 [2:25:27<25:42:08, 3.69it/s] 8%|▊ | 30028/371472 [2:25:27<25:58:06, 3.65it/s] 8%|▊ | 30029/371472 [2:25:28<27:30:58, 3.45it/s] 8%|▊ | 30030/371472 [2:25:28<27:12:23, 3.49it/s] 8%|▊ | 30031/371472 [2:25:28<28:51:00, 3.29it/s] 8%|▊ | 30032/371472 [2:25:29<28:37:25, 3.31it/s] 8%|▊ | 30033/371472 [2:25:29<27:42:53, 3.42it/s] 8%|▊ | 30034/371472 [2:25:29<27:46:43, 3.41it/s] 8%|▊ | 30035/371472 [2:25:29<27:42:23, 3.42it/s] 8%|▊ | 30036/371472 [2:25:30<26:36:41, 3.56it/s] 8%|▊ | 30037/371472 [2:25:30<27:49:35, 3.41it/s] 8%|▊ | 30038/371472 [2:25:30<27:21:17, 3.47it/s] 8%|▊ | 30039/371472 [2:25:31<26:45:13, 3.55it/s] 8%|▊ | 30040/371472 [2:25:31<29:27:40, 3.22it/s] {'loss': 7.2633, 'learning_rate': 9.276648925854898e-07, 'epoch': 1.29} + 8%|▊ | 30040/371472 [2:25:31<29:27:40, 3.22it/s] 8%|▊ | 30041/371472 [2:25:31<27:53:35, 3.40it/s] 8%|▊ | 30042/371472 [2:25:32<27:12:52, 3.48it/s] 8%|▊ | 30043/371472 [2:25:32<27:11:18, 3.49it/s] 8%|▊ | 30044/371472 [2:25:32<27:16:56, 3.48it/s] 8%|▊ | 30045/371472 [2:25:32<26:57:17, 3.52it/s] 8%|▊ | 30046/371472 [2:25:33<28:18:06, 3.35it/s] 8%|▊ | 30047/371472 [2:25:33<27:15:05, 3.48it/s] 8%|▊ | 30048/371472 [2:25:33<27:53:25, 3.40it/s] 8%|▊ | 30049/371472 [2:25:34<26:54:15, 3.53it/s] 8%|▊ | 30050/371472 [2:25:34<27:08:05, 3.50it/s] 8%|▊ | 30051/371472 [2:25:34<27:46:42, 3.41it/s] 8%|▊ | 30052/371472 [2:25:34<27:07:25, 3.50it/s] 8%|▊ | 30053/371472 [2:25:35<26:21:51, 3.60it/s] 8%|▊ | 30054/371472 [2:25:35<25:49:43, 3.67it/s] 8%|▊ | 30055/371472 [2:25:35<25:19:45, 3.74it/s] 8%|▊ | 30056/371472 [2:25:35<25:40:39, 3.69it/s] 8%|▊ | 30057/371472 [2:25:36<25:38:39, 3.70it/s] 8%|▊ | 30058/371472 [2:25:36<26:48:43, 3.54it/s] 8%|▊ | 30059/371472 [2:25:36<26:11:01, 3.62it/s] 8%|▊ | 30060/371472 [2:25:37<25:26:28, 3.73it/s] {'loss': 6.62, 'learning_rate': 9.276164106100109e-07, 'epoch': 1.29} + 8%|▊ | 30060/371472 [2:25:37<25:26:28, 3.73it/s] 8%|▊ | 30061/371472 [2:25:37<25:58:47, 3.65it/s] 8%|▊ | 30062/371472 [2:25:37<25:39:18, 3.70it/s] 8%|▊ | 30063/371472 [2:25:37<26:22:20, 3.60it/s] 8%|▊ | 30064/371472 [2:25:38<26:10:42, 3.62it/s] 8%|▊ | 30065/371472 [2:25:38<27:47:24, 3.41it/s] 8%|▊ | 30066/371472 [2:25:38<28:10:22, 3.37it/s] 8%|▊ | 30067/371472 [2:25:39<27:14:09, 3.48it/s] 8%|▊ | 30068/371472 [2:25:39<27:23:26, 3.46it/s] 8%|▊ | 30069/371472 [2:25:39<27:06:23, 3.50it/s] 8%|▊ | 30070/371472 [2:25:39<26:52:04, 3.53it/s] 8%|▊ | 30071/371472 [2:25:40<28:13:14, 3.36it/s] 8%|▊ | 30072/371472 [2:25:40<28:34:16, 3.32it/s] 8%|▊ | 30073/371472 [2:25:40<27:27:07, 3.45it/s] 8%|▊ | 30074/371472 [2:25:41<26:31:49, 3.57it/s] 8%|▊ | 30075/371472 [2:25:41<26:28:31, 3.58it/s] 8%|▊ | 30076/371472 [2:25:41<25:53:34, 3.66it/s] 8%|▊ | 30077/371472 [2:25:41<26:25:43, 3.59it/s] 8%|▊ | 30078/371472 [2:25:42<26:17:14, 3.61it/s] 8%|▊ | 30079/371472 [2:25:42<26:59:07, 3.51it/s] 8%|▊ | 30080/371472 [2:25:42<26:23:03, 3.59it/s] {'loss': 7.1395, 'learning_rate': 9.27567928634532e-07, 'epoch': 1.3} + 8%|▊ | 30080/371472 [2:25:42<26:23:03, 3.59it/s] 8%|▊ | 30081/371472 [2:25:43<30:01:32, 3.16it/s] 8%|▊ | 30082/371472 [2:25:43<28:17:33, 3.35it/s] 8%|▊ | 30083/371472 [2:25:43<27:44:25, 3.42it/s] 8%|▊ | 30084/371472 [2:25:44<29:23:01, 3.23it/s] 8%|▊ | 30085/371472 [2:25:44<29:28:30, 3.22it/s] 8%|▊ | 30086/371472 [2:25:44<30:22:00, 3.12it/s] 8%|▊ | 30087/371472 [2:25:44<28:48:46, 3.29it/s] 8%|▊ | 30088/371472 [2:25:45<28:59:27, 3.27it/s] 8%|▊ | 30089/371472 [2:25:45<27:18:31, 3.47it/s] 8%|▊ | 30090/371472 [2:25:45<28:40:40, 3.31it/s] 8%|▊ | 30091/371472 [2:25:46<27:33:57, 3.44it/s] 8%|▊ | 30092/371472 [2:25:46<27:07:18, 3.50it/s] 8%|▊ | 30093/371472 [2:25:46<27:03:55, 3.50it/s] 8%|▊ | 30094/371472 [2:25:46<27:29:36, 3.45it/s] 8%|▊ | 30095/371472 [2:25:47<27:10:07, 3.49it/s] 8%|▊ | 30096/371472 [2:25:47<26:50:40, 3.53it/s] 8%|▊ | 30097/371472 [2:25:47<25:47:32, 3.68it/s] 8%|▊ | 30098/371472 [2:25:48<25:52:40, 3.66it/s] 8%|▊ | 30099/371472 [2:25:48<25:37:19, 3.70it/s] 8%|▊ | 30100/371472 [2:25:48<25:33:25, 3.71it/s] {'loss': 7.0973, 'learning_rate': 9.275194466590532e-07, 'epoch': 1.3} + 8%|▊ | 30100/371472 [2:25:48<25:33:25, 3.71it/s] 8%|▊ | 30101/371472 [2:25:48<25:46:14, 3.68it/s] 8%|▊ | 30102/371472 [2:25:49<25:56:45, 3.65it/s] 8%|▊ | 30103/371472 [2:25:49<26:20:36, 3.60it/s] 8%|▊ | 30104/371472 [2:25:49<26:31:11, 3.58it/s] 8%|▊ | 30105/371472 [2:25:50<28:43:30, 3.30it/s] 8%|▊ | 30106/371472 [2:25:50<28:28:34, 3.33it/s] 8%|▊ | 30107/371472 [2:25:50<27:55:33, 3.40it/s] 8%|▊ | 30108/371472 [2:25:50<26:52:55, 3.53it/s] 8%|▊ | 30109/371472 [2:25:51<27:31:09, 3.45it/s] 8%|▊ | 30110/371472 [2:25:51<27:26:52, 3.45it/s] 8%|▊ | 30111/371472 [2:25:51<27:38:16, 3.43it/s] 8%|▊ | 30112/371472 [2:25:52<27:00:36, 3.51it/s] 8%|▊ | 30113/371472 [2:25:52<27:06:52, 3.50it/s] 8%|▊ | 30114/371472 [2:25:52<27:08:39, 3.49it/s] 8%|▊ | 30115/371472 [2:25:52<27:44:53, 3.42it/s] 8%|▊ | 30116/371472 [2:25:53<28:33:19, 3.32it/s] 8%|▊ | 30117/371472 [2:25:53<27:14:06, 3.48it/s] 8%|▊ | 30118/371472 [2:25:53<27:26:44, 3.45it/s] 8%|▊ | 30119/371472 [2:25:54<27:46:13, 3.41it/s] 8%|▊ | 30120/371472 [2:25:54<27:01:21, 3.51it/s] {'loss': 7.0828, 'learning_rate': 9.274709646835743e-07, 'epoch': 1.3} + 8%|▊ | 30120/371472 [2:25:54<27:01:21, 3.51it/s] 8%|▊ | 30121/371472 [2:25:54<26:52:48, 3.53it/s] 8%|▊ | 30122/371472 [2:25:54<26:55:48, 3.52it/s] 8%|▊ | 30123/371472 [2:25:55<26:16:22, 3.61it/s] 8%|▊ | 30124/371472 [2:25:55<26:32:58, 3.57it/s] 8%|▊ | 30125/371472 [2:25:55<26:10:08, 3.62it/s] 8%|▊ | 30126/371472 [2:25:56<26:26:59, 3.58it/s] 8%|▊ | 30127/371472 [2:25:56<27:52:49, 3.40it/s] 8%|▊ | 30128/371472 [2:25:56<27:19:31, 3.47it/s] 8%|▊ | 30129/371472 [2:25:56<27:52:11, 3.40it/s] 8%|▊ | 30130/371472 [2:25:57<27:46:24, 3.41it/s] 8%|▊ | 30131/371472 [2:25:57<27:19:31, 3.47it/s] 8%|▊ | 30132/371472 [2:25:57<27:06:14, 3.50it/s] 8%|▊ | 30133/371472 [2:25:58<27:08:57, 3.49it/s] 8%|▊ | 30134/371472 [2:25:58<27:36:14, 3.43it/s] 8%|▊ | 30135/371472 [2:25:58<28:34:00, 3.32it/s] 8%|▊ | 30136/371472 [2:25:58<27:17:48, 3.47it/s] 8%|▊ | 30137/371472 [2:25:59<27:17:34, 3.47it/s] 8%|▊ | 30138/371472 [2:25:59<26:42:30, 3.55it/s] 8%|▊ | 30139/371472 [2:25:59<28:42:39, 3.30it/s] 8%|▊ | 30140/371472 [2:26:00<30:02:27, 3.16it/s] {'loss': 7.079, 'learning_rate': 9.274224827080954e-07, 'epoch': 1.3} + 8%|▊ | 30140/371472 [2:26:00<30:02:27, 3.16it/s] 8%|▊ | 30141/371472 [2:26:00<28:14:13, 3.36it/s] 8%|▊ | 30142/371472 [2:26:00<29:36:59, 3.20it/s] 8%|▊ | 30143/371472 [2:26:01<28:52:19, 3.28it/s] 8%|▊ | 30144/371472 [2:26:01<28:22:41, 3.34it/s] 8%|▊ | 30145/371472 [2:26:01<30:20:31, 3.12it/s] 8%|▊ | 30146/371472 [2:26:02<29:45:34, 3.19it/s] 8%|▊ | 30147/371472 [2:26:02<29:28:01, 3.22it/s] 8%|▊ | 30148/371472 [2:26:02<28:32:31, 3.32it/s] 8%|▊ | 30149/371472 [2:26:02<29:09:59, 3.25it/s] 8%|▊ | 30150/371472 [2:26:03<28:19:04, 3.35it/s] 8%|▊ | 30151/371472 [2:26:03<28:22:39, 3.34it/s] 8%|▊ | 30152/371472 [2:26:03<29:39:56, 3.20it/s] 8%|▊ | 30153/371472 [2:26:04<28:42:22, 3.30it/s] 8%|▊ | 30154/371472 [2:26:04<27:47:58, 3.41it/s] 8%|▊ | 30155/371472 [2:26:04<27:38:53, 3.43it/s] 8%|▊ | 30156/371472 [2:26:05<27:01:37, 3.51it/s] 8%|▊ | 30157/371472 [2:26:05<28:20:45, 3.34it/s] 8%|▊ | 30158/371472 [2:26:05<28:11:20, 3.36it/s] 8%|▊ | 30159/371472 [2:26:05<27:05:10, 3.50it/s] 8%|▊ | 30160/371472 [2:26:06<26:59:18, 3.51it/s] {'loss': 6.8067, 'learning_rate': 9.273740007326165e-07, 'epoch': 1.3} + 8%|▊ | 30160/371472 [2:26:06<26:59:18, 3.51it/s] 8%|▊ | 30161/371472 [2:26:06<26:38:22, 3.56it/s] 8%|▊ | 30162/371472 [2:26:06<27:27:10, 3.45it/s] 8%|▊ | 30163/371472 [2:26:07<27:01:58, 3.51it/s] 8%|▊ | 30164/371472 [2:26:07<27:30:39, 3.45it/s] 8%|▊ | 30165/371472 [2:26:07<28:18:23, 3.35it/s] 8%|▊ | 30166/371472 [2:26:07<27:36:28, 3.43it/s] 8%|▊ | 30167/371472 [2:26:08<27:17:28, 3.47it/s] 8%|▊ | 30168/371472 [2:26:08<26:18:36, 3.60it/s] 8%|▊ | 30169/371472 [2:26:08<26:27:46, 3.58it/s] 8%|▊ | 30170/371472 [2:26:09<26:04:53, 3.63it/s] 8%|▊ | 30171/371472 [2:26:09<28:06:34, 3.37it/s] 8%|▊ | 30172/371472 [2:26:09<28:05:07, 3.38it/s] 8%|▊ | 30173/371472 [2:26:09<27:00:14, 3.51it/s] 8%|▊ | 30174/371472 [2:26:10<26:26:27, 3.59it/s] 8%|▊ | 30175/371472 [2:26:10<25:24:09, 3.73it/s] 8%|▊ | 30176/371472 [2:26:10<24:59:25, 3.79it/s] 8%|▊ | 30177/371472 [2:26:10<25:06:45, 3.78it/s] 8%|▊ | 30178/371472 [2:26:11<25:05:18, 3.78it/s] 8%|▊ | 30179/371472 [2:26:11<25:21:51, 3.74it/s] 8%|▊ | 30180/371472 [2:26:11<25:12:30, 3.76it/s] {'loss': 6.7055, 'learning_rate': 9.273255187571376e-07, 'epoch': 1.3} + 8%|▊ | 30180/371472 [2:26:11<25:12:30, 3.76it/s] 8%|▊ | 30181/371472 [2:26:12<26:21:40, 3.60it/s] 8%|▊ | 30182/371472 [2:26:12<26:37:21, 3.56it/s] 8%|▊ | 30183/371472 [2:26:12<26:53:29, 3.53it/s] 8%|▊ | 30184/371472 [2:26:12<26:41:02, 3.55it/s] 8%|▊ | 30185/371472 [2:26:13<28:16:10, 3.35it/s] 8%|▊ | 30186/371472 [2:26:13<27:43:31, 3.42it/s] 8%|▊ | 30187/371472 [2:26:13<27:47:00, 3.41it/s] 8%|▊ | 30188/371472 [2:26:14<28:02:43, 3.38it/s] 8%|▊ | 30189/371472 [2:26:14<29:25:39, 3.22it/s] 8%|▊ | 30190/371472 [2:26:14<28:50:27, 3.29it/s] 8%|▊ | 30191/371472 [2:26:15<30:01:39, 3.16it/s] 8%|▊ | 30192/371472 [2:26:15<29:03:06, 3.26it/s] 8%|▊ | 30193/371472 [2:26:15<27:30:11, 3.45it/s] 8%|▊ | 30194/371472 [2:26:15<27:06:13, 3.50it/s] 8%|▊ | 30195/371472 [2:26:16<27:44:39, 3.42it/s] 8%|▊ | 30196/371472 [2:26:16<28:19:54, 3.35it/s] 8%|▊ | 30197/371472 [2:26:16<29:47:22, 3.18it/s] 8%|▊ | 30198/371472 [2:26:17<28:04:41, 3.38it/s] 8%|▊ | 30199/371472 [2:26:17<28:58:57, 3.27it/s] 8%|▊ | 30200/371472 [2:26:17<30:04:22, 3.15it/s] {'loss': 7.0344, 'learning_rate': 9.272770367816587e-07, 'epoch': 1.3} + 8%|▊ | 30200/371472 [2:26:17<30:04:22, 3.15it/s] 8%|▊ | 30201/371472 [2:26:18<30:52:07, 3.07it/s] 8%|▊ | 30202/371472 [2:26:18<29:02:55, 3.26it/s] 8%|▊ | 30203/371472 [2:26:18<28:10:38, 3.36it/s] 8%|▊ | 30204/371472 [2:26:18<27:35:10, 3.44it/s] 8%|▊ | 30205/371472 [2:26:19<26:42:48, 3.55it/s] 8%|▊ | 30206/371472 [2:26:19<27:16:59, 3.47it/s] 8%|▊ | 30207/371472 [2:26:19<26:28:53, 3.58it/s] 8%|▊ | 30208/371472 [2:26:20<27:42:50, 3.42it/s] 8%|▊ | 30209/371472 [2:26:20<26:56:44, 3.52it/s] 8%|▊ | 30210/371472 [2:26:20<27:33:04, 3.44it/s] 8%|▊ | 30211/371472 [2:26:20<27:11:32, 3.49it/s] 8%|▊ | 30212/371472 [2:26:21<28:33:51, 3.32it/s] 8%|▊ | 30213/371472 [2:26:21<27:37:30, 3.43it/s] 8%|▊ | 30214/371472 [2:26:21<28:34:10, 3.32it/s] 8%|▊ | 30215/371472 [2:26:22<28:35:28, 3.32it/s] 8%|▊ | 30216/371472 [2:26:22<29:05:36, 3.26it/s] 8%|▊ | 30217/371472 [2:26:22<29:09:24, 3.25it/s] 8%|▊ | 30218/371472 [2:26:23<28:20:43, 3.34it/s] 8%|▊ | 30219/371472 [2:26:23<28:38:46, 3.31it/s] 8%|▊ | 30220/371472 [2:26:23<29:09:33, 3.25it/s] {'loss': 7.1736, 'learning_rate': 9.272285548061798e-07, 'epoch': 1.3} + 8%|▊ | 30220/371472 [2:26:23<29:09:33, 3.25it/s] 8%|▊ | 30221/371472 [2:26:24<30:34:46, 3.10it/s] 8%|▊ | 30222/371472 [2:26:24<29:34:32, 3.21it/s] 8%|▊ | 30223/371472 [2:26:24<28:54:21, 3.28it/s] 8%|▊ | 30224/371472 [2:26:24<27:44:09, 3.42it/s] 8%|▊ | 30225/371472 [2:26:25<27:29:26, 3.45it/s] 8%|▊ | 30226/371472 [2:26:25<28:42:55, 3.30it/s] 8%|▊ | 30227/371472 [2:26:25<27:23:07, 3.46it/s] 8%|▊ | 30228/371472 [2:26:26<26:48:09, 3.54it/s] 8%|▊ | 30229/371472 [2:26:26<26:09:35, 3.62it/s] 8%|▊ | 30230/371472 [2:26:26<25:55:02, 3.66it/s] 8%|▊ | 30231/371472 [2:26:26<25:43:01, 3.69it/s] 8%|▊ | 30232/371472 [2:26:27<27:26:57, 3.45it/s] 8%|▊ | 30233/371472 [2:26:27<26:50:42, 3.53it/s] 8%|▊ | 30234/371472 [2:26:27<26:37:06, 3.56it/s] 8%|▊ | 30235/371472 [2:26:27<25:58:46, 3.65it/s] 8%|▊ | 30236/371472 [2:26:28<27:08:29, 3.49it/s] 8%|▊ | 30237/371472 [2:26:28<28:35:37, 3.31it/s] 8%|▊ | 30238/371472 [2:26:28<28:09:06, 3.37it/s] 8%|▊ | 30239/371472 [2:26:29<28:04:43, 3.38it/s] 8%|▊ | 30240/371472 [2:26:29<27:53:18, 3.40it/s] {'loss': 7.212, 'learning_rate': 9.271800728307009e-07, 'epoch': 1.3} + 8%|▊ | 30240/371472 [2:26:29<27:53:18, 3.40it/s] 8%|▊ | 30241/371472 [2:26:29<27:18:05, 3.47it/s] 8%|▊ | 30242/371472 [2:26:30<27:44:53, 3.42it/s] 8%|▊ | 30243/371472 [2:26:30<27:32:44, 3.44it/s] 8%|▊ | 30244/371472 [2:26:30<30:38:11, 3.09it/s] 8%|▊ | 30245/371472 [2:26:31<29:47:57, 3.18it/s] 8%|▊ | 30246/371472 [2:26:31<28:12:45, 3.36it/s] 8%|▊ | 30247/371472 [2:26:31<29:26:05, 3.22it/s] 8%|▊ | 30248/371472 [2:26:31<29:33:55, 3.21it/s] 8%|▊ | 30249/371472 [2:26:32<30:54:59, 3.07it/s] 8%|▊ | 30250/371472 [2:26:32<29:14:22, 3.24it/s] 8%|▊ | 30251/371472 [2:26:32<28:28:47, 3.33it/s] 8%|▊ | 30252/371472 [2:26:33<28:57:03, 3.27it/s] 8%|▊ | 30253/371472 [2:26:33<28:16:51, 3.35it/s] 8%|▊ | 30254/371472 [2:26:33<28:08:15, 3.37it/s] 8%|▊ | 30255/371472 [2:26:34<28:14:01, 3.36it/s] 8%|▊ | 30256/371472 [2:26:34<27:42:22, 3.42it/s] 8%|▊ | 30257/371472 [2:26:34<27:03:59, 3.50it/s] 8%|▊ | 30258/371472 [2:26:34<27:11:07, 3.49it/s] 8%|▊ | 30259/371472 [2:26:35<26:53:05, 3.53it/s] 8%|▊ | 30260/371472 [2:26:35<27:34:44, 3.44it/s] {'loss': 6.8573, 'learning_rate': 9.271315908552221e-07, 'epoch': 1.3} + 8%|▊ | 30260/371472 [2:26:35<27:34:44, 3.44it/s] 8%|▊ | 30261/371472 [2:26:35<28:07:05, 3.37it/s] 8%|▊ | 30262/371472 [2:26:36<28:32:22, 3.32it/s] 8%|▊ | 30263/371472 [2:26:36<30:31:17, 3.11it/s] 8%|▊ | 30264/371472 [2:26:36<30:36:54, 3.10it/s] 8%|▊ | 30265/371472 [2:26:37<31:41:17, 2.99it/s] 8%|▊ | 30266/371472 [2:26:37<30:51:48, 3.07it/s] 8%|▊ | 30267/371472 [2:26:37<28:37:02, 3.31it/s] 8%|▊ | 30268/371472 [2:26:38<27:44:43, 3.42it/s] 8%|▊ | 30269/371472 [2:26:38<27:04:26, 3.50it/s] 8%|▊ | 30270/371472 [2:26:38<28:51:25, 3.28it/s] 8%|▊ | 30271/371472 [2:26:38<30:29:05, 3.11it/s] 8%|▊ | 30272/371472 [2:26:39<28:27:20, 3.33it/s] 8%|▊ | 30273/371472 [2:26:39<27:22:15, 3.46it/s] 8%|▊ | 30274/371472 [2:26:39<27:31:33, 3.44it/s] 8%|▊ | 30275/371472 [2:26:40<26:41:01, 3.55it/s] 8%|▊ | 30276/371472 [2:26:40<30:31:48, 3.10it/s] 8%|▊ | 30277/371472 [2:26:40<28:43:39, 3.30it/s] 8%|▊ | 30278/371472 [2:26:41<28:08:36, 3.37it/s] 8%|▊ | 30279/371472 [2:26:41<27:29:37, 3.45it/s] 8%|▊ | 30280/371472 [2:26:41<26:57:39, 3.52it/s] {'loss': 6.196, 'learning_rate': 9.270831088797431e-07, 'epoch': 1.3} + 8%|▊ | 30280/371472 [2:26:41<26:57:39, 3.52it/s] 8%|▊ | 30281/371472 [2:26:41<27:10:37, 3.49it/s] 8%|▊ | 30282/371472 [2:26:42<26:56:49, 3.52it/s] 8%|▊ | 30283/371472 [2:26:42<26:22:32, 3.59it/s] 8%|▊ | 30284/371472 [2:26:42<27:23:05, 3.46it/s] 8%|▊ | 30285/371472 [2:26:42<27:23:55, 3.46it/s] 8%|▊ | 30286/371472 [2:26:43<27:24:53, 3.46it/s] 8%|▊ | 30287/371472 [2:26:43<26:43:18, 3.55it/s] 8%|▊ | 30288/371472 [2:26:43<26:50:30, 3.53it/s] 8%|▊ | 30289/371472 [2:26:44<26:38:57, 3.56it/s] 8%|▊ | 30290/371472 [2:26:44<26:58:42, 3.51it/s] 8%|▊ | 30291/371472 [2:26:44<27:01:54, 3.51it/s] 8%|▊ | 30292/371472 [2:26:45<27:48:49, 3.41it/s] 8%|▊ | 30293/371472 [2:26:45<27:00:19, 3.51it/s] 8%|▊ | 30294/371472 [2:26:45<27:30:43, 3.44it/s] 8%|▊ | 30295/371472 [2:26:45<28:09:47, 3.37it/s] 8%|▊ | 30296/371472 [2:26:46<27:32:35, 3.44it/s] 8%|▊ | 30297/371472 [2:26:46<26:52:02, 3.53it/s] 8%|▊ | 30298/371472 [2:26:46<25:53:24, 3.66it/s] 8%|▊ | 30299/371472 [2:26:46<25:50:53, 3.67it/s] 8%|▊ | 30300/371472 [2:26:47<26:19:07, 3.60it/s] {'loss': 6.7491, 'learning_rate': 9.270346269042642e-07, 'epoch': 1.31} + 8%|▊ | 30300/371472 [2:26:47<26:19:07, 3.60it/s] 8%|▊ | 30301/371472 [2:26:47<27:37:28, 3.43it/s] 8%|▊ | 30302/371472 [2:26:47<26:57:48, 3.51it/s] 8%|▊ | 30303/371472 [2:26:48<28:03:46, 3.38it/s] 8%|▊ | 30304/371472 [2:26:48<27:19:48, 3.47it/s] 8%|▊ | 30305/371472 [2:26:48<28:24:47, 3.34it/s] 8%|▊ | 30306/371472 [2:26:49<38:06:23, 2.49it/s] 8%|▊ | 30307/371472 [2:26:49<35:37:07, 2.66it/s] 8%|▊ | 30308/371472 [2:26:49<32:48:28, 2.89it/s] 8%|▊ | 30309/371472 [2:26:50<30:50:32, 3.07it/s] 8%|▊ | 30310/371472 [2:26:50<29:39:30, 3.20it/s] 8%|▊ | 30311/371472 [2:26:50<28:20:50, 3.34it/s] 8%|▊ | 30312/371472 [2:26:51<27:06:51, 3.50it/s] 8%|▊ | 30313/371472 [2:26:51<27:08:36, 3.49it/s] 8%|▊ | 30314/371472 [2:26:51<29:18:00, 3.23it/s] 8%|▊ | 30315/371472 [2:26:51<28:34:29, 3.32it/s] 8%|▊ | 30316/371472 [2:26:52<27:54:39, 3.40it/s] 8%|▊ | 30317/371472 [2:26:52<27:51:10, 3.40it/s] 8%|▊ | 30318/371472 [2:26:52<27:29:15, 3.45it/s] 8%|▊ | 30319/371472 [2:26:53<27:34:42, 3.44it/s] 8%|▊ | 30320/371472 [2:26:53<28:03:12, 3.38it/s] {'loss': 6.6946, 'learning_rate': 9.269861449287853e-07, 'epoch': 1.31} + 8%|▊ | 30320/371472 [2:26:53<28:03:12, 3.38it/s] 8%|▊ | 30321/371472 [2:26:53<28:14:51, 3.35it/s] 8%|▊ | 30322/371472 [2:26:54<29:01:15, 3.27it/s] 8%|▊ | 30323/371472 [2:26:54<28:22:32, 3.34it/s] 8%|▊ | 30324/371472 [2:26:54<27:30:34, 3.44it/s] 8%|▊ | 30325/371472 [2:26:54<27:15:59, 3.48it/s] 8%|▊ | 30326/371472 [2:26:55<27:15:00, 3.48it/s] 8%|▊ | 30327/371472 [2:26:55<27:37:30, 3.43it/s] 8%|▊ | 30328/371472 [2:26:55<27:10:29, 3.49it/s] 8%|▊ | 30329/371472 [2:26:56<28:14:25, 3.36it/s] 8%|▊ | 30330/371472 [2:26:56<27:14:03, 3.48it/s] 8%|▊ | 30331/371472 [2:26:56<26:30:11, 3.58it/s] 8%|▊ | 30332/371472 [2:26:56<25:41:52, 3.69it/s] 8%|▊ | 30333/371472 [2:26:57<27:57:45, 3.39it/s] 8%|▊ | 30334/371472 [2:26:57<27:07:09, 3.49it/s] 8%|▊ | 30335/371472 [2:26:57<27:07:41, 3.49it/s] 8%|▊ | 30336/371472 [2:26:58<26:44:34, 3.54it/s] 8%|▊ | 30337/371472 [2:26:58<29:52:31, 3.17it/s] 8%|▊ | 30338/371472 [2:26:58<28:53:00, 3.28it/s] 8%|▊ | 30339/371472 [2:26:58<27:48:14, 3.41it/s] 8%|▊ | 30340/371472 [2:26:59<28:01:44, 3.38it/s] {'loss': 6.5269, 'learning_rate': 9.269376629533064e-07, 'epoch': 1.31} + 8%|▊ | 30340/371472 [2:26:59<28:01:44, 3.38it/s] 8%|▊ | 30341/371472 [2:26:59<27:27:45, 3.45it/s] 8%|▊ | 30342/371472 [2:26:59<26:45:41, 3.54it/s] 8%|▊ | 30343/371472 [2:27:00<28:55:23, 3.28it/s] 8%|▊ | 30344/371472 [2:27:00<29:08:50, 3.25it/s] 8%|▊ | 30345/371472 [2:27:00<27:59:54, 3.38it/s] 8%|▊ | 30346/371472 [2:27:01<26:56:52, 3.52it/s] 8%|▊ | 30347/371472 [2:27:01<26:31:32, 3.57it/s] 8%|▊ | 30348/371472 [2:27:01<25:52:21, 3.66it/s] 8%|▊ | 30349/371472 [2:27:01<30:21:55, 3.12it/s] 8%|▊ | 30350/371472 [2:27:02<28:47:03, 3.29it/s] 8%|▊ | 30351/371472 [2:27:02<28:47:48, 3.29it/s] 8%|▊ | 30352/371472 [2:27:02<29:53:27, 3.17it/s] 8%|▊ | 30353/371472 [2:27:03<28:50:30, 3.29it/s] 8%|▊ | 30354/371472 [2:27:03<27:31:20, 3.44it/s] 8%|▊ | 30355/371472 [2:27:03<27:32:46, 3.44it/s] 8%|▊ | 30356/371472 [2:27:04<28:15:02, 3.35it/s] 8%|▊ | 30357/371472 [2:27:04<27:35:17, 3.43it/s] 8%|▊ | 30358/371472 [2:27:04<30:01:55, 3.16it/s] 8%|▊ | 30359/371472 [2:27:04<28:57:18, 3.27it/s] 8%|▊ | 30360/371472 [2:27:05<28:21:54, 3.34it/s] {'loss': 6.6064, 'learning_rate': 9.268891809778275e-07, 'epoch': 1.31} + 8%|▊ | 30360/371472 [2:27:05<28:21:54, 3.34it/s] 8%|▊ | 30361/371472 [2:27:05<27:44:44, 3.42it/s] 8%|▊ | 30362/371472 [2:27:05<28:20:54, 3.34it/s] 8%|▊ | 30363/371472 [2:27:06<27:42:55, 3.42it/s] 8%|▊ | 30364/371472 [2:27:06<27:06:29, 3.50it/s] 8%|▊ | 30365/371472 [2:27:06<26:55:19, 3.52it/s] 8%|▊ | 30366/371472 [2:27:06<26:42:57, 3.55it/s] 8%|▊ | 30367/371472 [2:27:07<26:48:22, 3.53it/s] 8%|▊ | 30368/371472 [2:27:07<27:04:47, 3.50it/s] 8%|▊ | 30369/371472 [2:27:07<25:59:54, 3.64it/s] 8%|▊ | 30370/371472 [2:27:08<25:46:01, 3.68it/s] 8%|▊ | 30371/371472 [2:27:08<25:27:23, 3.72it/s] 8%|▊ | 30372/371472 [2:27:08<25:14:56, 3.75it/s] 8%|▊ | 30373/371472 [2:27:08<25:26:24, 3.72it/s] 8%|▊ | 30374/371472 [2:27:09<25:27:18, 3.72it/s] 8%|▊ | 30375/371472 [2:27:09<25:21:05, 3.74it/s] 8%|▊ | 30376/371472 [2:27:09<25:33:46, 3.71it/s] 8%|▊ | 30377/371472 [2:27:09<25:48:01, 3.67it/s] 8%|▊ | 30378/371472 [2:27:10<25:56:39, 3.65it/s] 8%|▊ | 30379/371472 [2:27:10<25:33:03, 3.71it/s] 8%|▊ | 30380/371472 [2:27:10<28:12:33, 3.36it/s] {'loss': 7.2097, 'learning_rate': 9.268406990023487e-07, 'epoch': 1.31} + 8%|▊ | 30380/371472 [2:27:10<28:12:33, 3.36it/s] 8%|▊ | 30381/371472 [2:27:11<28:27:00, 3.33it/s] 8%|▊ | 30382/371472 [2:27:11<28:47:22, 3.29it/s] 8%|▊ | 30383/371472 [2:27:11<28:55:19, 3.28it/s] 8%|▊ | 30384/371472 [2:27:12<29:28:34, 3.21it/s] 8%|▊ | 30385/371472 [2:27:12<29:09:00, 3.25it/s] 8%|▊ | 30386/371472 [2:27:12<28:37:34, 3.31it/s] 8%|▊ | 30387/371472 [2:27:12<28:28:45, 3.33it/s] 8%|▊ | 30388/371472 [2:27:13<31:26:30, 3.01it/s] 8%|▊ | 30389/371472 [2:27:13<33:12:17, 2.85it/s] 8%|▊ | 30390/371472 [2:27:14<32:17:08, 2.93it/s] 8%|▊ | 30391/371472 [2:27:14<30:38:41, 3.09it/s] 8%|▊ | 30392/371472 [2:27:14<29:42:16, 3.19it/s] 8%|▊ | 30393/371472 [2:27:14<28:48:20, 3.29it/s] 8%|▊ | 30394/371472 [2:27:15<28:30:57, 3.32it/s] 8%|▊ | 30395/371472 [2:27:15<27:39:45, 3.42it/s] 8%|▊ | 30396/371472 [2:27:15<28:24:32, 3.33it/s] 8%|▊ | 30397/371472 [2:27:16<27:49:11, 3.41it/s] 8%|▊ | 30398/371472 [2:27:16<26:24:51, 3.59it/s] 8%|▊ | 30399/371472 [2:27:16<26:23:44, 3.59it/s] 8%|▊ | 30400/371472 [2:27:16<25:56:43, 3.65it/s] {'loss': 6.6234, 'learning_rate': 9.267922170268698e-07, 'epoch': 1.31} + 8%|▊ | 30400/371472 [2:27:16<25:56:43, 3.65it/s] 8%|▊ | 30401/371472 [2:27:17<27:10:39, 3.49it/s] 8%|▊ | 30402/371472 [2:27:17<28:24:11, 3.34it/s] 8%|▊ | 30403/371472 [2:27:17<28:06:13, 3.37it/s] 8%|▊ | 30404/371472 [2:27:18<26:59:42, 3.51it/s] 8%|▊ | 30405/371472 [2:27:18<27:07:55, 3.49it/s] 8%|▊ | 30406/371472 [2:27:18<28:11:32, 3.36it/s] 8%|▊ | 30407/371472 [2:27:18<27:25:32, 3.45it/s] 8%|▊ | 30408/371472 [2:27:19<27:01:55, 3.50it/s] 8%|▊ | 30409/371472 [2:27:19<26:26:23, 3.58it/s] 8%|▊ | 30410/371472 [2:27:19<25:41:01, 3.69it/s] 8%|▊ | 30411/371472 [2:27:20<25:51:32, 3.66it/s] 8%|▊ | 30412/371472 [2:27:20<25:46:53, 3.67it/s] 8%|▊ | 30413/371472 [2:27:20<25:33:22, 3.71it/s] 8%|▊ | 30414/371472 [2:27:20<26:47:28, 3.54it/s] 8%|▊ | 30415/371472 [2:27:21<26:29:03, 3.58it/s] 8%|▊ | 30416/371472 [2:27:21<25:51:41, 3.66it/s] 8%|▊ | 30417/371472 [2:27:21<25:46:32, 3.68it/s] 8%|▊ | 30418/371472 [2:27:21<25:25:59, 3.72it/s] 8%|▊ | 30419/371472 [2:27:22<27:38:11, 3.43it/s] 8%|▊ | 30420/371472 [2:27:22<27:18:19, 3.47it/s] {'loss': 7.0382, 'learning_rate': 9.267437350513908e-07, 'epoch': 1.31} + 8%|▊ | 30420/371472 [2:27:22<27:18:19, 3.47it/s] 8%|▊ | 30421/371472 [2:27:22<28:22:15, 3.34it/s] 8%|▊ | 30422/371472 [2:27:23<27:34:59, 3.43it/s] 8%|▊ | 30423/371472 [2:27:23<28:09:57, 3.36it/s] 8%|▊ | 30424/371472 [2:27:23<30:52:47, 3.07it/s] 8%|▊ | 30425/371472 [2:27:24<30:46:53, 3.08it/s] 8%|▊ | 30426/371472 [2:27:24<32:18:45, 2.93it/s] 8%|▊ | 30427/371472 [2:27:24<30:44:12, 3.08it/s] 8%|▊ | 30428/371472 [2:27:25<28:47:43, 3.29it/s] 8%|▊ | 30429/371472 [2:27:25<28:50:12, 3.29it/s] 8%|▊ | 30430/371472 [2:27:25<28:41:15, 3.30it/s] 8%|▊ | 30431/371472 [2:27:26<28:10:05, 3.36it/s] 8%|▊ | 30432/371472 [2:27:26<30:14:31, 3.13it/s] 8%|▊ | 30433/371472 [2:27:26<30:13:08, 3.13it/s] 8%|▊ | 30434/371472 [2:27:27<30:22:47, 3.12it/s] 8%|▊ | 30435/371472 [2:27:27<30:42:41, 3.08it/s] 8%|▊ | 30436/371472 [2:27:27<30:54:26, 3.07it/s] 8%|▊ | 30437/371472 [2:27:27<29:01:38, 3.26it/s] 8%|▊ | 30438/371472 [2:27:28<27:57:14, 3.39it/s] 8%|▊ | 30439/371472 [2:27:28<28:58:43, 3.27it/s] 8%|▊ | 30440/371472 [2:27:28<29:02:57, 3.26it/s] {'loss': 6.7611, 'learning_rate': 9.266952530759119e-07, 'epoch': 1.31} + 8%|▊ | 30440/371472 [2:27:28<29:02:57, 3.26it/s] 8%|▊ | 30441/371472 [2:27:29<27:49:15, 3.41it/s] 8%|▊ | 30442/371472 [2:27:29<27:42:27, 3.42it/s] 8%|▊ | 30443/371472 [2:27:29<26:23:02, 3.59it/s] 8%|▊ | 30444/371472 [2:27:29<25:51:56, 3.66it/s] 8%|▊ | 30445/371472 [2:27:30<26:59:21, 3.51it/s] 8%|▊ | 30446/371472 [2:27:30<26:55:59, 3.52it/s] 8%|▊ | 30447/371472 [2:27:30<27:43:07, 3.42it/s] 8%|▊ | 30448/371472 [2:27:31<27:31:39, 3.44it/s] 8%|▊ | 30449/371472 [2:27:31<26:51:47, 3.53it/s] 8%|▊ | 30450/371472 [2:27:31<28:22:25, 3.34it/s] 8%|▊ | 30451/371472 [2:27:31<27:46:30, 3.41it/s] 8%|▊ | 30452/371472 [2:27:32<28:25:57, 3.33it/s] 8%|▊ | 30453/371472 [2:27:32<27:39:28, 3.42it/s] 8%|▊ | 30454/371472 [2:27:32<27:34:59, 3.43it/s] 8%|▊ | 30455/371472 [2:27:33<28:00:18, 3.38it/s] 8%|▊ | 30456/371472 [2:27:33<27:09:29, 3.49it/s] 8%|▊ | 30457/371472 [2:27:33<26:31:07, 3.57it/s] 8%|▊ | 30458/371472 [2:27:34<29:29:07, 3.21it/s] 8%|▊ | 30459/371472 [2:27:34<28:09:25, 3.36it/s] 8%|▊ | 30460/371472 [2:27:34<27:14:43, 3.48it/s] {'loss': 6.6191, 'learning_rate': 9.26646771100433e-07, 'epoch': 1.31} + 8%|▊ | 30460/371472 [2:27:34<27:14:43, 3.48it/s] 8%|▊ | 30461/371472 [2:27:34<26:41:59, 3.55it/s] 8%|▊ | 30462/371472 [2:27:35<26:44:33, 3.54it/s] 8%|▊ | 30463/371472 [2:27:35<27:00:46, 3.51it/s] 8%|▊ | 30464/371472 [2:27:35<27:05:50, 3.50it/s] 8%|▊ | 30465/371472 [2:27:36<27:28:32, 3.45it/s] 8%|▊ | 30466/371472 [2:27:36<26:56:16, 3.52it/s] 8%|▊ | 30467/371472 [2:27:36<26:45:05, 3.54it/s] 8%|▊ | 30468/371472 [2:27:36<27:01:51, 3.50it/s] 8%|▊ | 30469/371472 [2:27:37<27:08:58, 3.49it/s] 8%|▊ | 30470/371472 [2:27:37<26:48:34, 3.53it/s] 8%|▊ | 30471/371472 [2:27:37<26:34:53, 3.56it/s] 8%|▊ | 30472/371472 [2:27:38<28:30:12, 3.32it/s] 8%|▊ | 30473/371472 [2:27:38<27:24:43, 3.46it/s] 8%|▊ | 30474/371472 [2:27:38<26:41:43, 3.55it/s] 8%|▊ | 30475/371472 [2:27:38<26:17:29, 3.60it/s] 8%|▊ | 30476/371472 [2:27:39<26:14:59, 3.61it/s] 8%|▊ | 30477/371472 [2:27:39<26:41:32, 3.55it/s] 8%|▊ | 30478/371472 [2:27:39<26:18:53, 3.60it/s] 8%|▊ | 30479/371472 [2:27:39<26:06:13, 3.63it/s] 8%|▊ | 30480/371472 [2:27:40<26:24:32, 3.59it/s] {'loss': 6.8269, 'learning_rate': 9.265982891249542e-07, 'epoch': 1.31} + 8%|▊ | 30480/371472 [2:27:40<26:24:32, 3.59it/s] 8%|▊ | 30481/371472 [2:27:40<25:46:08, 3.68it/s] 8%|▊ | 30482/371472 [2:27:40<25:36:07, 3.70it/s] 8%|▊ | 30483/371472 [2:27:41<27:11:24, 3.48it/s] 8%|▊ | 30484/371472 [2:27:41<27:15:02, 3.48it/s] 8%|▊ | 30485/371472 [2:27:41<26:17:52, 3.60it/s] 8%|▊ | 30486/371472 [2:27:41<26:05:01, 3.63it/s] 8%|▊ | 30487/371472 [2:27:42<26:19:16, 3.60it/s] 8%|▊ | 30488/371472 [2:27:42<26:48:00, 3.53it/s] 8%|▊ | 30489/371472 [2:27:42<27:19:37, 3.47it/s] 8%|▊ | 30490/371472 [2:27:43<27:03:08, 3.50it/s] 8%|▊ | 30491/371472 [2:27:43<26:46:41, 3.54it/s] 8%|▊ | 30492/371472 [2:27:43<27:18:24, 3.47it/s] 8%|▊ | 30493/371472 [2:27:43<27:15:27, 3.47it/s] 8%|▊ | 30494/371472 [2:27:44<26:56:42, 3.52it/s] 8%|▊ | 30495/371472 [2:27:44<32:30:49, 2.91it/s] 8%|▊ | 30496/371472 [2:27:44<30:42:57, 3.08it/s] 8%|▊ | 30497/371472 [2:27:45<28:36:37, 3.31it/s] 8%|▊ | 30498/371472 [2:27:45<27:33:11, 3.44it/s] 8%|▊ | 30499/371472 [2:27:45<26:55:56, 3.52it/s] 8%|▊ | 30500/371472 [2:27:46<27:31:39, 3.44it/s] {'loss': 6.4991, 'learning_rate': 9.265498071494753e-07, 'epoch': 1.31} + 8%|▊ | 30500/371472 [2:27:46<27:31:39, 3.44it/s] 8%|▊ | 30501/371472 [2:27:46<26:27:04, 3.58it/s] 8%|▊ | 30502/371472 [2:27:46<25:58:18, 3.65it/s] 8%|▊ | 30503/371472 [2:27:46<26:19:07, 3.60it/s] 8%|▊ | 30504/371472 [2:27:47<26:22:06, 3.59it/s] 8%|▊ | 30505/371472 [2:27:47<28:19:59, 3.34it/s] 8%|▊ | 30506/371472 [2:27:47<27:59:16, 3.38it/s] 8%|▊ | 30507/371472 [2:27:48<27:16:48, 3.47it/s] 8%|▊ | 30508/371472 [2:27:48<26:38:02, 3.56it/s] 8%|▊ | 30509/371472 [2:27:48<25:56:11, 3.65it/s] 8%|▊ | 30510/371472 [2:27:48<26:12:27, 3.61it/s] 8%|▊ | 30511/371472 [2:27:49<27:36:42, 3.43it/s] 8%|▊ | 30512/371472 [2:27:49<26:58:28, 3.51it/s] 8%|▊ | 30513/371472 [2:27:49<27:01:32, 3.50it/s] 8%|▊ | 30514/371472 [2:27:49<26:01:57, 3.64it/s] 8%|▊ | 30515/371472 [2:27:50<27:06:12, 3.49it/s] 8%|▊ | 30516/371472 [2:27:50<27:43:56, 3.42it/s] 8%|▊ | 30517/371472 [2:27:50<27:39:32, 3.42it/s] 8%|▊ | 30518/371472 [2:27:51<28:34:34, 3.31it/s] 8%|▊ | 30519/371472 [2:27:51<27:40:18, 3.42it/s] 8%|▊ | 30520/371472 [2:27:51<28:25:42, 3.33it/s] {'loss': 6.8496, 'learning_rate': 9.265013251739965e-07, 'epoch': 1.31} + 8%|▊ | 30520/371472 [2:27:51<28:25:42, 3.33it/s] 8%|▊ | 30521/371472 [2:27:52<27:51:35, 3.40it/s] 8%|▊ | 30522/371472 [2:27:52<27:15:41, 3.47it/s] 8%|▊ | 30523/371472 [2:27:52<28:01:27, 3.38it/s] 8%|▊ | 30524/371472 [2:27:52<26:56:38, 3.51it/s] 8%|▊ | 30525/371472 [2:27:53<26:11:45, 3.62it/s] 8%|▊ | 30526/371472 [2:27:53<27:40:06, 3.42it/s] 8%|▊ | 30527/371472 [2:27:53<29:02:23, 3.26it/s] 8%|▊ | 30528/371472 [2:27:54<27:57:57, 3.39it/s] 8%|▊ | 30529/371472 [2:27:54<27:17:52, 3.47it/s] 8%|▊ | 30530/371472 [2:27:54<28:14:24, 3.35it/s] 8%|▊ | 30531/371472 [2:27:54<27:18:34, 3.47it/s] 8%|▊ | 30532/371472 [2:27:55<27:03:32, 3.50it/s] 8%|▊ | 30533/371472 [2:27:55<26:12:42, 3.61it/s] 8%|▊ | 30534/371472 [2:27:55<25:53:03, 3.66it/s] 8%|▊ | 30535/371472 [2:27:56<28:35:21, 3.31it/s] 8%|▊ | 30536/371472 [2:27:56<29:27:34, 3.21it/s] 8%|▊ | 30537/371472 [2:27:56<29:01:47, 3.26it/s] 8%|▊ | 30538/371472 [2:27:57<27:59:22, 3.38it/s] 8%|▊ | 30539/371472 [2:27:57<28:25:22, 3.33it/s] 8%|▊ | 30540/371472 [2:27:57<29:34:37, 3.20it/s] {'loss': 6.6252, 'learning_rate': 9.264528431985175e-07, 'epoch': 1.32} + 8%|▊ | 30540/371472 [2:27:57<29:34:37, 3.20it/s] 8%|▊ | 30541/371472 [2:27:58<30:26:12, 3.11it/s] 8%|▊ | 30542/371472 [2:27:58<30:56:53, 3.06it/s] 8%|▊ | 30543/371472 [2:27:58<32:26:53, 2.92it/s] 8%|▊ | 30544/371472 [2:27:59<32:06:21, 2.95it/s] 8%|▊ | 30545/371472 [2:27:59<32:20:48, 2.93it/s] 8%|▊ | 30546/371472 [2:27:59<31:05:43, 3.05it/s] 8%|▊ | 30547/371472 [2:28:00<29:15:31, 3.24it/s] 8%|▊ | 30548/371472 [2:28:00<28:29:06, 3.32it/s] 8%|▊ | 30549/371472 [2:28:00<28:00:48, 3.38it/s] 8%|▊ | 30550/371472 [2:28:00<27:17:12, 3.47it/s] 8%|▊ | 30551/371472 [2:28:01<26:00:11, 3.64it/s] 8%|▊ | 30552/371472 [2:28:01<26:08:16, 3.62it/s] 8%|▊ | 30553/371472 [2:28:01<28:41:42, 3.30it/s] 8%|▊ | 30554/371472 [2:28:02<29:10:48, 3.25it/s] 8%|▊ | 30555/371472 [2:28:02<28:06:10, 3.37it/s] 8%|▊ | 30556/371472 [2:28:02<28:54:23, 3.28it/s] 8%|▊ | 30557/371472 [2:28:02<29:11:36, 3.24it/s] 8%|▊ | 30558/371472 [2:28:03<28:12:22, 3.36it/s] 8%|▊ | 30559/371472 [2:28:03<28:03:24, 3.38it/s] 8%|▊ | 30560/371472 [2:28:03<28:06:36, 3.37it/s] {'loss': 6.5231, 'learning_rate': 9.264043612230386e-07, 'epoch': 1.32} + 8%|▊ | 30560/371472 [2:28:03<28:06:36, 3.37it/s] 8%|▊ | 30561/371472 [2:28:04<27:32:48, 3.44it/s] 8%|▊ | 30562/371472 [2:28:04<27:04:57, 3.50it/s] 8%|▊ | 30563/371472 [2:28:04<26:48:16, 3.53it/s] 8%|▊ | 30564/371472 [2:28:04<26:26:16, 3.58it/s] 8%|▊ | 30565/371472 [2:28:05<26:45:17, 3.54it/s] 8%|▊ | 30566/371472 [2:28:05<27:29:46, 3.44it/s] 8%|▊ | 30567/371472 [2:28:05<26:38:49, 3.55it/s] 8%|▊ | 30568/371472 [2:28:06<26:36:09, 3.56it/s] 8%|▊ | 30569/371472 [2:28:06<26:01:08, 3.64it/s] 8%|▊ | 30570/371472 [2:28:06<26:17:00, 3.60it/s] 8%|▊ | 30571/371472 [2:28:06<25:57:09, 3.65it/s] 8%|▊ | 30572/371472 [2:28:07<25:09:44, 3.76it/s] 8%|▊ | 30573/371472 [2:28:07<27:16:28, 3.47it/s] 8%|▊ | 30574/371472 [2:28:07<27:12:24, 3.48it/s] 8%|▊ | 30575/371472 [2:28:08<26:16:51, 3.60it/s] 8%|▊ | 30576/371472 [2:28:08<26:54:00, 3.52it/s] 8%|▊ | 30577/371472 [2:28:08<26:44:48, 3.54it/s] 8%|▊ | 30578/371472 [2:28:08<26:30:45, 3.57it/s] 8%|▊ | 30579/371472 [2:28:09<26:11:39, 3.62it/s] 8%|▊ | 30580/371472 [2:28:09<25:46:07, 3.67it/s] {'loss': 6.6693, 'learning_rate': 9.263558792475596e-07, 'epoch': 1.32} + 8%|▊ | 30580/371472 [2:28:09<25:46:07, 3.67it/s] 8%|▊ | 30581/371472 [2:28:09<25:40:03, 3.69it/s] 8%|▊ | 30582/371472 [2:28:09<25:51:45, 3.66it/s] 8%|▊ | 30583/371472 [2:28:10<25:50:04, 3.67it/s] 8%|▊ | 30584/371472 [2:28:10<26:12:23, 3.61it/s] 8%|▊ | 30585/371472 [2:28:10<25:23:54, 3.73it/s] 8%|▊ | 30586/371472 [2:28:11<25:00:26, 3.79it/s] 8%|▊ | 30587/371472 [2:28:11<25:32:23, 3.71it/s] 8%|▊ | 30588/371472 [2:28:11<24:52:57, 3.81it/s] 8%|▊ | 30589/371472 [2:28:11<28:25:51, 3.33it/s] 8%|▊ | 30590/371472 [2:28:12<28:06:41, 3.37it/s] 8%|▊ | 30591/371472 [2:28:12<27:16:36, 3.47it/s] 8%|▊ | 30592/371472 [2:28:12<27:48:10, 3.41it/s] 8%|▊ | 30593/371472 [2:28:13<29:00:19, 3.26it/s] 8%|▊ | 30594/371472 [2:28:13<27:29:14, 3.44it/s] 8%|▊ | 30595/371472 [2:28:13<28:25:35, 3.33it/s] 8%|▊ | 30596/371472 [2:28:13<28:26:59, 3.33it/s] 8%|▊ | 30597/371472 [2:28:14<29:00:47, 3.26it/s] 8%|▊ | 30598/371472 [2:28:14<28:54:37, 3.28it/s] 8%|▊ | 30599/371472 [2:28:14<28:47:49, 3.29it/s] 8%|▊ | 30600/371472 [2:28:15<28:08:48, 3.36it/s] {'loss': 6.6069, 'learning_rate': 9.263073972720808e-07, 'epoch': 1.32} + 8%|▊ | 30600/371472 [2:28:15<28:08:48, 3.36it/s] 8%|▊ | 30601/371472 [2:28:15<27:44:57, 3.41it/s] 8%|▊ | 30602/371472 [2:28:15<29:25:55, 3.22it/s] 8%|▊ | 30603/371472 [2:28:16<28:43:05, 3.30it/s] 8%|▊ | 30604/371472 [2:28:16<28:54:07, 3.28it/s] 8%|▊ | 30605/371472 [2:28:16<27:27:21, 3.45it/s] 8%|▊ | 30606/371472 [2:28:16<27:13:21, 3.48it/s] 8%|▊ | 30607/371472 [2:28:17<27:53:37, 3.39it/s] 8%|▊ | 30608/371472 [2:28:17<26:47:32, 3.53it/s] 8%|▊ | 30609/371472 [2:28:17<28:18:24, 3.34it/s] 8%|▊ | 30610/371472 [2:28:18<29:10:02, 3.25it/s] 8%|▊ | 30611/371472 [2:28:18<29:41:48, 3.19it/s] 8%|▊ | 30612/371472 [2:28:18<28:15:58, 3.35it/s] 8%|▊ | 30613/371472 [2:28:19<27:22:26, 3.46it/s] 8%|▊ | 30614/371472 [2:28:19<30:06:31, 3.14it/s] 8%|▊ | 30615/371472 [2:28:19<29:06:25, 3.25it/s] 8%|▊ | 30616/371472 [2:28:20<28:50:04, 3.28it/s] 8%|▊ | 30617/371472 [2:28:20<28:12:05, 3.36it/s] 8%|▊ | 30618/371472 [2:28:20<29:11:01, 3.24it/s] 8%|▊ | 30619/371472 [2:28:20<27:50:08, 3.40it/s] 8%|▊ | 30620/371472 [2:28:21<28:08:42, 3.36it/s] {'loss': 6.7363, 'learning_rate': 9.262589152966019e-07, 'epoch': 1.32} + 8%|▊ | 30620/371472 [2:28:21<28:08:42, 3.36it/s] 8%|▊ | 30621/371472 [2:28:21<29:42:05, 3.19it/s] 8%|▊ | 30622/371472 [2:28:21<30:04:55, 3.15it/s] 8%|▊ | 30623/371472 [2:28:22<28:50:33, 3.28it/s] 8%|▊ | 30624/371472 [2:28:22<28:04:05, 3.37it/s] 8%|▊ | 30625/371472 [2:28:22<27:26:27, 3.45it/s] 8%|▊ | 30626/371472 [2:28:23<30:25:03, 3.11it/s] 8%|▊ | 30627/371472 [2:28:23<31:13:17, 3.03it/s] 8%|▊ | 30628/371472 [2:28:23<31:50:02, 2.97it/s] 8%|▊ | 30629/371472 [2:28:24<30:23:05, 3.12it/s] 8%|▊ | 30630/371472 [2:28:24<28:38:52, 3.30it/s] 8%|▊ | 30631/371472 [2:28:24<28:39:58, 3.30it/s] 8%|▊ | 30632/371472 [2:28:24<29:12:52, 3.24it/s] 8%|▊ | 30633/371472 [2:28:25<30:12:14, 3.13it/s] 8%|▊ | 30634/371472 [2:28:25<29:00:15, 3.26it/s] 8%|▊ | 30635/371472 [2:28:25<28:00:06, 3.38it/s] 8%|▊ | 30636/371472 [2:28:26<27:28:44, 3.45it/s] 8%|▊ | 30637/371472 [2:28:26<26:34:07, 3.56it/s] 8%|▊ | 30638/371472 [2:28:26<26:29:15, 3.57it/s] 8%|▊ | 30639/371472 [2:28:27<28:23:39, 3.33it/s] 8%|▊ | 30640/371472 [2:28:27<28:01:40, 3.38it/s] {'loss': 6.4996, 'learning_rate': 9.262104333211231e-07, 'epoch': 1.32} + 8%|▊ | 30640/371472 [2:28:27<28:01:40, 3.38it/s] 8%|▊ | 30641/371472 [2:28:27<27:41:30, 3.42it/s] 8%|▊ | 30642/371472 [2:28:27<26:28:58, 3.57it/s] 8%|▊ | 30643/371472 [2:28:28<27:29:27, 3.44it/s] 8%|▊ | 30644/371472 [2:28:28<27:27:55, 3.45it/s] 8%|▊ | 30645/371472 [2:28:28<27:38:13, 3.43it/s] 8%|▊ | 30646/371472 [2:28:29<26:47:58, 3.53it/s] 8%|▊ | 30647/371472 [2:28:29<26:17:41, 3.60it/s] 8%|▊ | 30648/371472 [2:28:29<26:08:25, 3.62it/s] 8%|▊ | 30649/371472 [2:28:29<25:48:56, 3.67it/s] 8%|▊ | 30650/371472 [2:28:30<26:19:48, 3.60it/s] 8%|▊ | 30651/371472 [2:28:30<26:19:40, 3.60it/s] 8%|▊ | 30652/371472 [2:28:30<26:04:38, 3.63it/s] 8%|▊ | 30653/371472 [2:28:30<25:41:14, 3.69it/s] 8%|▊ | 30654/371472 [2:28:31<25:37:07, 3.70it/s] 8%|▊ | 30655/371472 [2:28:31<26:53:03, 3.52it/s] 8%|▊ | 30656/371472 [2:28:31<27:18:11, 3.47it/s] 8%|▊ | 30657/371472 [2:28:32<26:58:47, 3.51it/s] 8%|▊ | 30658/371472 [2:28:32<26:22:12, 3.59it/s] 8%|▊ | 30659/371472 [2:28:32<26:50:53, 3.53it/s] 8%|▊ | 30660/371472 [2:28:32<26:41:03, 3.55it/s] {'loss': 6.7116, 'learning_rate': 9.261619513456441e-07, 'epoch': 1.32} + 8%|▊ | 30660/371472 [2:28:32<26:41:03, 3.55it/s] 8%|▊ | 30661/371472 [2:28:33<26:32:30, 3.57it/s] 8%|▊ | 30662/371472 [2:28:33<26:50:34, 3.53it/s] 8%|▊ | 30663/371472 [2:28:33<27:10:46, 3.48it/s] 8%|▊ | 30664/371472 [2:28:34<26:47:44, 3.53it/s] 8%|▊ | 30665/371472 [2:28:34<26:36:17, 3.56it/s] 8%|▊ | 30666/371472 [2:28:34<25:44:26, 3.68it/s] 8%|▊ | 30667/371472 [2:28:34<29:49:06, 3.17it/s] 8%|▊ | 30668/371472 [2:28:35<28:03:13, 3.37it/s] 8%|▊ | 30669/371472 [2:28:35<29:48:06, 3.18it/s] 8%|▊ | 30670/371472 [2:28:35<28:38:12, 3.31it/s] 8%|▊ | 30671/371472 [2:28:36<30:04:20, 3.15it/s] 8%|▊ | 30672/371472 [2:28:36<28:18:48, 3.34it/s] 8%|▊ | 30673/371472 [2:28:36<26:50:32, 3.53it/s] 8%|▊ | 30674/371472 [2:28:36<26:08:18, 3.62it/s] 8%|▊ | 30675/371472 [2:28:37<25:18:50, 3.74it/s] 8%|▊ | 30676/371472 [2:28:37<29:10:26, 3.24it/s] 8%|▊ | 30677/371472 [2:28:37<27:44:06, 3.41it/s] 8%|▊ | 30678/371472 [2:28:38<27:49:21, 3.40it/s] 8%|▊ | 30679/371472 [2:28:38<26:40:14, 3.55it/s] 8%|▊ | 30680/371472 [2:28:38<26:04:21, 3.63it/s] {'loss': 6.7801, 'learning_rate': 9.261134693701652e-07, 'epoch': 1.32} + 8%|▊ | 30680/371472 [2:28:38<26:04:21, 3.63it/s] 8%|▊ | 30681/371472 [2:28:38<25:54:34, 3.65it/s] 8%|▊ | 30682/371472 [2:28:39<26:35:06, 3.56it/s] 8%|▊ | 30683/371472 [2:28:39<26:51:01, 3.53it/s] 8%|▊ | 30684/371472 [2:28:39<25:44:00, 3.68it/s] 8%|▊ | 30685/371472 [2:28:40<26:01:58, 3.64it/s] 8%|▊ | 30686/371472 [2:28:40<25:24:47, 3.72it/s] 8%|▊ | 30687/371472 [2:28:40<26:20:38, 3.59it/s] 8%|▊ | 30688/371472 [2:28:40<26:41:09, 3.55it/s] 8%|▊ | 30689/371472 [2:28:41<26:36:28, 3.56it/s] 8%|▊ | 30690/371472 [2:28:41<26:41:25, 3.55it/s] 8%|▊ | 30691/371472 [2:28:41<27:04:22, 3.50it/s] 8%|▊ | 30692/371472 [2:28:42<26:16:23, 3.60it/s] 8%|▊ | 30693/371472 [2:28:42<25:22:24, 3.73it/s] 8%|▊ | 30694/371472 [2:28:42<25:52:12, 3.66it/s] 8%|▊ | 30695/371472 [2:28:42<25:56:27, 3.65it/s] 8%|▊ | 30696/371472 [2:28:43<25:41:25, 3.68it/s] 8%|▊ | 30697/371472 [2:28:43<26:38:52, 3.55it/s] 8%|▊ | 30698/371472 [2:28:43<26:48:15, 3.53it/s] 8%|▊ | 30699/371472 [2:28:43<26:07:28, 3.62it/s] 8%|▊ | 30700/371472 [2:28:44<26:01:57, 3.64it/s] {'loss': 6.8897, 'learning_rate': 9.260649873946863e-07, 'epoch': 1.32} + 8%|▊ | 30700/371472 [2:28:44<26:01:57, 3.64it/s] 8%|▊ | 30701/371472 [2:28:44<26:01:11, 3.64it/s] 8%|▊ | 30702/371472 [2:28:44<25:24:32, 3.73it/s] 8%|▊ | 30703/371472 [2:28:45<25:11:49, 3.76it/s] 8%|▊ | 30704/371472 [2:28:45<26:07:47, 3.62it/s] 8%|▊ | 30705/371472 [2:28:45<26:02:05, 3.64it/s] 8%|▊ | 30706/371472 [2:28:45<25:39:06, 3.69it/s] 8%|▊ | 30707/371472 [2:28:46<25:54:16, 3.65it/s] 8%|▊ | 30708/371472 [2:28:46<28:40:02, 3.30it/s] 8%|▊ | 30709/371472 [2:28:46<27:23:12, 3.46it/s] 8%|▊ | 30710/371472 [2:28:47<26:51:23, 3.52it/s] 8%|▊ | 30711/371472 [2:28:47<27:26:47, 3.45it/s] 8%|▊ | 30712/371472 [2:28:47<26:43:06, 3.54it/s] 8%|▊ | 30713/371472 [2:28:47<25:56:33, 3.65it/s] 8%|▊ | 30714/371472 [2:28:48<25:35:14, 3.70it/s] 8%|▊ | 30715/371472 [2:28:48<25:47:29, 3.67it/s] 8%|▊ | 30716/371472 [2:28:48<26:14:42, 3.61it/s] 8%|▊ | 30717/371472 [2:28:48<25:44:39, 3.68it/s] 8%|▊ | 30718/371472 [2:28:49<26:19:33, 3.60it/s] 8%|▊ | 30719/371472 [2:28:49<27:18:45, 3.47it/s] 8%|▊ | 30720/371472 [2:28:49<28:26:54, 3.33it/s] {'loss': 6.7371, 'learning_rate': 9.260165054192075e-07, 'epoch': 1.32} + 8%|▊ | 30720/371472 [2:28:49<28:26:54, 3.33it/s] 8%|▊ | 30721/371472 [2:28:50<28:39:39, 3.30it/s] 8%|▊ | 30722/371472 [2:28:50<28:49:10, 3.28it/s] 8%|▊ | 30723/371472 [2:28:50<28:18:35, 3.34it/s] 8%|▊ | 30724/371472 [2:28:51<27:40:14, 3.42it/s] 8%|▊ | 30725/371472 [2:28:51<26:13:59, 3.61it/s] 8%|▊ | 30726/371472 [2:28:51<25:21:26, 3.73it/s] 8%|▊ | 30727/371472 [2:28:51<26:54:05, 3.52it/s] 8%|▊ | 30728/371472 [2:28:52<27:00:50, 3.50it/s] 8%|▊ | 30729/371472 [2:28:52<26:55:49, 3.51it/s] 8%|▊ | 30730/371472 [2:28:52<27:52:07, 3.40it/s] 8%|▊ | 30731/371472 [2:28:53<27:12:56, 3.48it/s] 8%|▊ | 30732/371472 [2:28:53<26:19:55, 3.59it/s] 8%|▊ | 30733/371472 [2:28:53<26:14:16, 3.61it/s] 8%|▊ | 30734/371472 [2:28:53<25:48:31, 3.67it/s] 8%|▊ | 30735/371472 [2:28:54<27:01:02, 3.50it/s] 8%|▊ | 30736/371472 [2:28:54<26:13:56, 3.61it/s] 8%|▊ | 30737/371472 [2:28:54<26:12:40, 3.61it/s] 8%|▊ | 30738/371472 [2:28:54<26:21:17, 3.59it/s] 8%|▊ | 30739/371472 [2:28:55<26:12:15, 3.61it/s] 8%|▊ | 30740/371472 [2:28:55<26:44:50, 3.54it/s] {'loss': 6.5691, 'learning_rate': 9.259680234437285e-07, 'epoch': 1.32} + 8%|▊ | 30740/371472 [2:28:55<26:44:50, 3.54it/s] 8%|▊ | 30741/371472 [2:28:55<26:29:43, 3.57it/s] 8%|▊ | 30742/371472 [2:28:56<26:01:18, 3.64it/s] 8%|▊ | 30743/371472 [2:28:56<27:52:17, 3.40it/s] 8%|▊ | 30744/371472 [2:28:56<27:31:25, 3.44it/s] 8%|▊ | 30745/371472 [2:28:56<27:26:00, 3.45it/s] 8%|▊ | 30746/371472 [2:28:57<29:54:52, 3.16it/s] 8%|▊ | 30747/371472 [2:28:57<30:37:54, 3.09it/s] 8%|▊ | 30748/371472 [2:28:57<29:38:29, 3.19it/s] 8%|▊ | 30749/371472 [2:28:58<29:00:02, 3.26it/s] 8%|▊ | 30750/371472 [2:28:58<27:17:04, 3.47it/s] 8%|▊ | 30751/371472 [2:28:58<26:38:55, 3.55it/s] 8%|▊ | 30752/371472 [2:28:59<26:05:19, 3.63it/s] 8%|▊ | 30753/371472 [2:28:59<28:43:12, 3.30it/s] 8%|▊ | 30754/371472 [2:28:59<28:07:29, 3.37it/s] 8%|▊ | 30755/371472 [2:28:59<27:58:50, 3.38it/s] 8%|▊ | 30756/371472 [2:29:00<29:11:02, 3.24it/s] 8%|▊ | 30757/371472 [2:29:00<28:57:03, 3.27it/s] 8%|▊ | 30758/371472 [2:29:00<28:14:51, 3.35it/s] 8%|▊ | 30759/371472 [2:29:01<28:06:07, 3.37it/s] 8%|▊ | 30760/371472 [2:29:01<28:18:11, 3.34it/s] {'loss': 6.881, 'learning_rate': 9.259195414682497e-07, 'epoch': 1.32} + 8%|▊ | 30760/371472 [2:29:01<28:18:11, 3.34it/s] 8%|▊ | 30761/371472 [2:29:01<28:44:30, 3.29it/s] 8%|▊ | 30762/371472 [2:29:02<27:40:47, 3.42it/s] 8%|▊ | 30763/371472 [2:29:02<28:51:19, 3.28it/s] 8%|▊ | 30764/371472 [2:29:02<28:14:40, 3.35it/s] 8%|▊ | 30765/371472 [2:29:02<27:37:06, 3.43it/s] 8%|▊ | 30766/371472 [2:29:03<27:52:43, 3.39it/s] 8%|▊ | 30767/371472 [2:29:03<27:29:04, 3.44it/s] 8%|▊ | 30768/371472 [2:29:03<27:28:01, 3.45it/s] 8%|▊ | 30769/371472 [2:29:04<26:40:53, 3.55it/s] 8%|▊ | 30770/371472 [2:29:04<26:19:10, 3.60it/s] 8%|▊ | 30771/371472 [2:29:04<26:14:30, 3.61it/s] 8%|▊ | 30772/371472 [2:29:04<26:40:42, 3.55it/s] 8%|▊ | 30773/371472 [2:29:05<26:49:47, 3.53it/s] 8%|▊ | 30774/371472 [2:29:05<27:04:50, 3.49it/s] 8%|▊ | 30775/371472 [2:29:05<27:25:05, 3.45it/s] 8%|▊ | 30776/371472 [2:29:06<27:06:06, 3.49it/s] 8%|▊ | 30777/371472 [2:29:06<27:15:47, 3.47it/s] 8%|▊ | 30778/371472 [2:29:06<26:45:54, 3.54it/s] 8%|▊ | 30779/371472 [2:29:06<26:07:38, 3.62it/s] 8%|▊ | 30780/371472 [2:29:07<25:44:49, 3.68it/s] {'loss': 6.8269, 'learning_rate': 9.258710594927708e-07, 'epoch': 1.33} + 8%|▊ | 30780/371472 [2:29:07<25:44:49, 3.68it/s] 8%|▊ | 30781/371472 [2:29:07<26:38:51, 3.55it/s] 8%|▊ | 30782/371472 [2:29:07<26:50:16, 3.53it/s] 8%|▊ | 30783/371472 [2:29:08<26:16:45, 3.60it/s] 8%|▊ | 30784/371472 [2:29:08<27:43:50, 3.41it/s] 8%|▊ | 30785/371472 [2:29:08<26:43:36, 3.54it/s] 8%|▊ | 30786/371472 [2:29:08<26:24:44, 3.58it/s] 8%|▊ | 30787/371472 [2:29:09<28:25:22, 3.33it/s] 8%|▊ | 30788/371472 [2:29:09<27:07:33, 3.49it/s] 8%|▊ | 30789/371472 [2:29:09<27:34:02, 3.43it/s] 8%|▊ | 30790/371472 [2:29:10<26:31:47, 3.57it/s] 8%|▊ | 30791/371472 [2:29:10<26:00:48, 3.64it/s] 8%|▊ | 30792/371472 [2:29:10<25:42:00, 3.68it/s] 8%|▊ | 30793/371472 [2:29:10<26:37:46, 3.55it/s] 8%|▊ | 30794/371472 [2:29:11<26:10:55, 3.61it/s] 8%|▊ | 30795/371472 [2:29:11<25:50:07, 3.66it/s] 8%|▊ | 30796/371472 [2:29:11<27:06:49, 3.49it/s] 8%|▊ | 30797/371472 [2:29:12<27:31:24, 3.44it/s] 8%|▊ | 30798/371472 [2:29:12<26:37:39, 3.55it/s] 8%|▊ | 30799/371472 [2:29:12<27:02:32, 3.50it/s] 8%|▊ | 30800/371472 [2:29:12<27:16:22, 3.47it/s] {'loss': 6.5559, 'learning_rate': 9.258225775172919e-07, 'epoch': 1.33} + 8%|▊ | 30800/371472 [2:29:12<27:16:22, 3.47it/s] 8%|▊ | 30801/371472 [2:29:13<27:16:39, 3.47it/s] 8%|▊ | 30802/371472 [2:29:13<27:14:23, 3.47it/s] 8%|▊ | 30803/371472 [2:29:13<28:07:16, 3.37it/s] 8%|▊ | 30804/371472 [2:29:14<28:50:24, 3.28it/s] 8%|▊ | 30805/371472 [2:29:14<28:26:19, 3.33it/s] 8%|▊ | 30806/371472 [2:29:14<31:20:23, 3.02it/s] 8%|▊ | 30807/371472 [2:29:15<30:57:59, 3.06it/s] 8%|▊ | 30808/371472 [2:29:15<28:39:00, 3.30it/s] 8%|▊ | 30809/371472 [2:29:15<29:10:15, 3.24it/s] 8%|▊ | 30810/371472 [2:29:15<28:41:43, 3.30it/s] 8%|▊ | 30811/371472 [2:29:16<28:06:50, 3.37it/s] 8%|▊ | 30812/371472 [2:29:16<27:39:24, 3.42it/s] 8%|▊ | 30813/371472 [2:29:16<27:39:50, 3.42it/s] 8%|▊ | 30814/371472 [2:29:17<26:50:15, 3.53it/s] 8%|▊ | 30815/371472 [2:29:17<26:23:01, 3.59it/s] 8%|▊ | 30816/371472 [2:29:17<27:42:07, 3.42it/s] 8%|▊ | 30817/371472 [2:29:18<27:46:59, 3.41it/s] 8%|▊ | 30818/371472 [2:29:18<29:54:07, 3.16it/s] 8%|▊ | 30819/371472 [2:29:18<28:52:16, 3.28it/s] 8%|▊ | 30820/371472 [2:29:18<27:29:35, 3.44it/s] {'loss': 6.5314, 'learning_rate': 9.257740955418129e-07, 'epoch': 1.33} + 8%|▊ | 30820/371472 [2:29:18<27:29:35, 3.44it/s] 8%|▊ | 30821/371472 [2:29:19<27:55:54, 3.39it/s] 8%|▊ | 30822/371472 [2:29:19<28:17:42, 3.34it/s] 8%|▊ | 30823/371472 [2:29:19<27:49:13, 3.40it/s] 8%|▊ | 30824/371472 [2:29:20<26:25:24, 3.58it/s] 8%|▊ | 30825/371472 [2:29:20<26:41:00, 3.55it/s] 8%|▊ | 30826/371472 [2:29:20<25:55:48, 3.65it/s] 8%|▊ | 30827/371472 [2:29:20<25:38:14, 3.69it/s] 8%|▊ | 30828/371472 [2:29:21<25:42:42, 3.68it/s] 8%|▊ | 30829/371472 [2:29:21<27:45:39, 3.41it/s] 8%|▊ | 30830/371472 [2:29:21<26:44:44, 3.54it/s] 8%|▊ | 30831/371472 [2:29:22<26:51:26, 3.52it/s] 8%|▊ | 30832/371472 [2:29:22<25:44:55, 3.67it/s] 8%|▊ | 30833/371472 [2:29:22<25:54:14, 3.65it/s] 8%|▊ | 30834/371472 [2:29:22<27:26:31, 3.45it/s] 8%|▊ | 30835/371472 [2:29:23<26:06:02, 3.63it/s] 8%|▊ | 30836/371472 [2:29:23<26:15:38, 3.60it/s] 8%|▊ | 30837/371472 [2:29:23<26:01:52, 3.63it/s] 8%|▊ | 30838/371472 [2:29:24<28:12:18, 3.35it/s] 8%|▊ | 30839/371472 [2:29:24<28:41:01, 3.30it/s] 8%|▊ | 30840/371472 [2:29:24<26:56:44, 3.51it/s] {'loss': 6.7018, 'learning_rate': 9.257256135663341e-07, 'epoch': 1.33} + 8%|▊ | 30840/371472 [2:29:24<26:56:44, 3.51it/s] 8%|▊ | 30841/371472 [2:29:24<26:47:06, 3.53it/s] 8%|▊ | 30842/371472 [2:29:25<26:08:10, 3.62it/s] 8%|▊ | 30843/371472 [2:29:25<26:25:34, 3.58it/s] 8%|▊ | 30844/371472 [2:29:25<26:08:25, 3.62it/s] 8%|▊ | 30845/371472 [2:29:25<27:10:48, 3.48it/s] 8%|▊ | 30846/371472 [2:29:26<26:38:25, 3.55it/s] 8%|▊ | 30847/371472 [2:29:26<28:13:07, 3.35it/s] 8%|▊ | 30848/371472 [2:29:26<30:19:32, 3.12it/s] 8%|▊ | 30849/371472 [2:29:27<29:31:09, 3.21it/s] 8%|▊ | 30850/371472 [2:29:27<28:05:15, 3.37it/s] 8%|▊ | 30851/371472 [2:29:27<28:08:31, 3.36it/s] 8%|▊ | 30852/371472 [2:29:28<27:16:17, 3.47it/s] 8%|▊ | 30853/371472 [2:29:28<27:50:24, 3.40it/s] 8%|▊ | 30854/371472 [2:29:28<27:32:30, 3.44it/s] 8%|▊ | 30855/371472 [2:29:28<27:42:49, 3.41it/s] 8%|▊ | 30856/371472 [2:29:29<27:19:09, 3.46it/s] 8%|▊ | 30857/371472 [2:29:29<27:26:04, 3.45it/s] 8%|▊ | 30858/371472 [2:29:29<26:56:12, 3.51it/s] 8%|▊ | 30859/371472 [2:29:30<27:54:45, 3.39it/s] 8%|▊ | 30860/371472 [2:29:30<27:01:20, 3.50it/s] {'loss': 6.5448, 'learning_rate': 9.256771315908552e-07, 'epoch': 1.33} + 8%|▊ | 30860/371472 [2:29:30<27:01:20, 3.50it/s] 8%|▊ | 30861/371472 [2:29:30<26:46:55, 3.53it/s] 8%|▊ | 30862/371472 [2:29:30<26:49:45, 3.53it/s] 8%|▊ | 30863/371472 [2:29:31<28:39:51, 3.30it/s] 8%|▊ | 30864/371472 [2:29:31<28:08:55, 3.36it/s] 8%|▊ | 30865/371472 [2:29:31<27:04:24, 3.49it/s] 8%|▊ | 30866/371472 [2:29:32<27:28:24, 3.44it/s] 8%|▊ | 30867/371472 [2:29:32<26:36:53, 3.55it/s] 8%|▊ | 30868/371472 [2:29:32<26:32:05, 3.57it/s] 8%|▊ | 30869/371472 [2:29:32<27:04:09, 3.50it/s] 8%|▊ | 30870/371472 [2:29:33<26:40:09, 3.55it/s] 8%|▊ | 30871/371472 [2:29:33<27:11:40, 3.48it/s] 8%|▊ | 30872/371472 [2:29:33<27:54:30, 3.39it/s] 8%|▊ | 30873/371472 [2:29:34<28:33:33, 3.31it/s] 8%|▊ | 30874/371472 [2:29:34<27:16:16, 3.47it/s] 8%|▊ | 30875/371472 [2:29:34<26:07:43, 3.62it/s] 8%|▊ | 30876/371472 [2:29:35<28:02:33, 3.37it/s] 8%|▊ | 30877/371472 [2:29:35<28:16:34, 3.35it/s] 8%|▊ | 30878/371472 [2:29:35<27:27:53, 3.44it/s] 8%|▊ | 30879/371472 [2:29:35<26:56:11, 3.51it/s] 8%|▊ | 30880/371472 [2:29:36<26:18:35, 3.60it/s] {'loss': 6.63, 'learning_rate': 9.256286496153762e-07, 'epoch': 1.33} + 8%|▊ | 30880/371472 [2:29:36<26:18:35, 3.60it/s] 8%|▊ | 30881/371472 [2:29:36<27:20:40, 3.46it/s] 8%|▊ | 30882/371472 [2:29:36<27:14:05, 3.47it/s] 8%|▊ | 30883/371472 [2:29:37<28:08:53, 3.36it/s] 8%|▊ | 30884/371472 [2:29:37<28:30:04, 3.32it/s] 8%|▊ | 30885/371472 [2:29:37<27:38:09, 3.42it/s] 8%|▊ | 30886/371472 [2:29:37<27:46:07, 3.41it/s] 8%|▊ | 30887/371472 [2:29:38<26:52:47, 3.52it/s] 8%|▊ | 30888/371472 [2:29:38<26:34:31, 3.56it/s] 8%|▊ | 30889/371472 [2:29:38<27:02:13, 3.50it/s] 8%|▊ | 30890/371472 [2:29:39<27:44:22, 3.41it/s] 8%|▊ | 30891/371472 [2:29:39<28:35:46, 3.31it/s] 8%|▊ | 30892/371472 [2:29:39<28:32:18, 3.32it/s] 8%|▊ | 30893/371472 [2:29:39<28:04:27, 3.37it/s] 8%|▊ | 30894/371472 [2:29:40<27:18:14, 3.46it/s] 8%|▊ | 30895/371472 [2:29:40<26:39:23, 3.55it/s] 8%|▊ | 30896/371472 [2:29:40<26:20:50, 3.59it/s] 8%|▊ | 30897/371472 [2:29:41<28:59:20, 3.26it/s] 8%|▊ | 30898/371472 [2:29:41<28:55:03, 3.27it/s] 8%|▊ | 30899/371472 [2:29:41<27:57:35, 3.38it/s] 8%|▊ | 30900/371472 [2:29:42<29:20:31, 3.22it/s] {'loss': 6.676, 'learning_rate': 9.255801676398974e-07, 'epoch': 1.33} + 8%|▊ | 30900/371472 [2:29:42<29:20:31, 3.22it/s] 8%|▊ | 30901/371472 [2:29:42<29:15:39, 3.23it/s] 8%|▊ | 30902/371472 [2:29:42<29:09:52, 3.24it/s] 8%|▊ | 30903/371472 [2:29:42<27:56:00, 3.39it/s] 8%|▊ | 30904/371472 [2:29:43<27:37:06, 3.43it/s] 8%|▊ | 30905/371472 [2:29:43<27:11:52, 3.48it/s] 8%|▊ | 30906/371472 [2:29:43<28:15:42, 3.35it/s] 8%|▊ | 30907/371472 [2:29:44<27:19:21, 3.46it/s] 8%|▊ | 30908/371472 [2:29:44<26:40:13, 3.55it/s] 8%|▊ | 30909/371472 [2:29:44<26:37:14, 3.55it/s] 8%|▊ | 30910/371472 [2:29:44<26:22:52, 3.59it/s] 8%|▊ | 30911/371472 [2:29:45<25:44:05, 3.68it/s] 8%|▊ | 30912/371472 [2:29:45<26:52:23, 3.52it/s] 8%|▊ | 30913/371472 [2:29:45<26:07:02, 3.62it/s] 8%|▊ | 30914/371472 [2:29:46<26:58:57, 3.51it/s] 8%|▊ | 30915/371472 [2:29:46<28:55:28, 3.27it/s] 8%|▊ | 30916/371472 [2:29:46<29:53:28, 3.16it/s] 8%|▊ | 30917/371472 [2:29:47<28:04:48, 3.37it/s] 8%|▊ | 30918/371472 [2:29:47<28:45:32, 3.29it/s] 8%|▊ | 30919/371472 [2:29:47<28:27:56, 3.32it/s] 8%|▊ | 30920/371472 [2:29:47<27:43:43, 3.41it/s] {'loss': 6.5936, 'learning_rate': 9.255316856644186e-07, 'epoch': 1.33} + 8%|▊ | 30920/371472 [2:29:47<27:43:43, 3.41it/s] 8%|▊ | 30921/371472 [2:29:48<28:04:09, 3.37it/s] 8%|▊ | 30922/371472 [2:29:48<27:37:26, 3.42it/s] 8%|▊ | 30923/371472 [2:29:48<27:11:03, 3.48it/s] 8%|▊ | 30924/371472 [2:29:49<28:38:57, 3.30it/s] 8%|▊ | 30925/371472 [2:29:49<27:44:05, 3.41it/s] 8%|▊ | 30926/371472 [2:29:49<28:00:00, 3.38it/s] 8%|▊ | 30927/371472 [2:29:49<26:31:17, 3.57it/s] 8%|▊ | 30928/371472 [2:29:50<26:14:19, 3.61it/s] 8%|▊ | 30929/371472 [2:29:50<25:57:35, 3.64it/s] 8%|▊ | 30930/371472 [2:29:50<25:26:45, 3.72it/s] 8%|▊ | 30931/371472 [2:29:51<26:17:40, 3.60it/s] 8%|▊ | 30932/371472 [2:29:51<28:25:56, 3.33it/s] 8%|▊ | 30933/371472 [2:29:51<27:42:18, 3.41it/s] 8%|▊ | 30934/371472 [2:29:51<26:22:16, 3.59it/s] 8%|▊ | 30935/371472 [2:29:52<25:38:40, 3.69it/s] 8%|▊ | 30936/371472 [2:29:52<25:51:56, 3.66it/s] 8%|▊ | 30937/371472 [2:29:52<25:46:52, 3.67it/s] 8%|▊ | 30938/371472 [2:29:52<25:25:01, 3.72it/s] 8%|▊ | 30939/371472 [2:29:53<25:58:04, 3.64it/s] 8%|▊ | 30940/371472 [2:29:53<27:31:24, 3.44it/s] {'loss': 6.4651, 'learning_rate': 9.254832036889396e-07, 'epoch': 1.33} + 8%|▊ | 30940/371472 [2:29:53<27:31:24, 3.44it/s] 8%|▊ | 30941/371472 [2:29:53<28:32:43, 3.31it/s] 8%|▊ | 30942/371472 [2:29:54<27:02:55, 3.50it/s] 8%|▊ | 30943/371472 [2:29:54<30:08:46, 3.14it/s] 8%|▊ | 30944/371472 [2:29:54<28:50:18, 3.28it/s] 8%|▊ | 30945/371472 [2:29:55<27:21:55, 3.46it/s] 8%|▊ | 30946/371472 [2:29:55<27:50:51, 3.40it/s] 8%|▊ | 30947/371472 [2:29:55<30:04:25, 3.15it/s] 8%|▊ | 30948/371472 [2:29:56<28:55:50, 3.27it/s] 8%|▊ | 30949/371472 [2:29:56<27:26:22, 3.45it/s] 8%|▊ | 30950/371472 [2:29:56<27:58:04, 3.38it/s] 8%|▊ | 30951/371472 [2:29:56<28:29:30, 3.32it/s] 8%|▊ | 30952/371472 [2:29:57<28:34:04, 3.31it/s] 8%|▊ | 30953/371472 [2:29:57<27:48:34, 3.40it/s] 8%|▊ | 30954/371472 [2:29:57<28:12:02, 3.35it/s] 8%|▊ | 30955/371472 [2:29:58<27:27:04, 3.45it/s] 8%|▊ | 30956/371472 [2:29:58<27:42:39, 3.41it/s] 8%|▊ | 30957/371472 [2:29:58<26:53:36, 3.52it/s] 8%|▊ | 30958/371472 [2:29:58<26:55:43, 3.51it/s] 8%|▊ | 30959/371472 [2:29:59<26:58:59, 3.51it/s] 8%|▊ | 30960/371472 [2:29:59<26:40:07, 3.55it/s] {'loss': 6.8695, 'learning_rate': 9.254347217134606e-07, 'epoch': 1.33} + 8%|▊ | 30960/371472 [2:29:59<26:40:07, 3.55it/s] 8%|▊ | 30961/371472 [2:29:59<26:08:15, 3.62it/s] 8%|▊ | 30962/371472 [2:30:00<27:16:44, 3.47it/s] 8%|▊ | 30963/371472 [2:30:00<27:14:54, 3.47it/s] 8%|▊ | 30964/371472 [2:30:00<26:55:54, 3.51it/s] 8%|▊ | 30965/371472 [2:30:00<26:25:04, 3.58it/s] 8%|▊ | 30966/371472 [2:30:01<25:56:06, 3.65it/s] 8%|▊ | 30967/371472 [2:30:01<25:44:46, 3.67it/s] 8%|▊ | 30968/371472 [2:30:01<27:03:48, 3.49it/s] 8%|▊ | 30969/371472 [2:30:01<26:13:33, 3.61it/s] 8%|▊ | 30970/371472 [2:30:02<25:54:24, 3.65it/s] 8%|▊ | 30971/371472 [2:30:02<25:57:41, 3.64it/s] 8%|▊ | 30972/371472 [2:30:02<25:41:42, 3.68it/s] 8%|▊ | 30973/371472 [2:30:03<25:22:01, 3.73it/s] 8%|▊ | 30974/371472 [2:30:03<26:15:42, 3.60it/s] 8%|▊ | 30975/371472 [2:30:03<27:42:05, 3.41it/s] 8%|▊ | 30976/371472 [2:30:04<29:37:55, 3.19it/s] 8%|▊ | 30977/371472 [2:30:04<29:56:01, 3.16it/s] 8%|▊ | 30978/371472 [2:30:04<29:05:02, 3.25it/s] 8%|▊ | 30979/371472 [2:30:05<30:21:27, 3.12it/s] 8%|▊ | 30980/371472 [2:30:05<29:19:45, 3.22it/s] {'loss': 6.7311, 'learning_rate': 9.253862397379818e-07, 'epoch': 1.33} + 8%|▊ | 30980/371472 [2:30:05<29:19:45, 3.22it/s] 8%|▊ | 30981/371472 [2:30:05<28:50:59, 3.28it/s] 8%|▊ | 30982/371472 [2:30:05<27:56:04, 3.39it/s] 8%|▊ | 30983/371472 [2:30:06<27:14:15, 3.47it/s] 8%|▊ | 30984/371472 [2:30:06<28:08:11, 3.36it/s] 8%|▊ | 30985/371472 [2:30:06<27:37:50, 3.42it/s] 8%|▊ | 30986/371472 [2:30:07<27:36:42, 3.43it/s] 8%|▊ | 30987/371472 [2:30:07<27:01:48, 3.50it/s] 8%|▊ | 30988/371472 [2:30:07<26:55:33, 3.51it/s] 8%|▊ | 30989/371472 [2:30:07<27:16:46, 3.47it/s] 8%|▊ | 30990/371472 [2:30:08<26:12:42, 3.61it/s] 8%|▊ | 30991/371472 [2:30:08<25:36:25, 3.69it/s] 8%|▊ | 30992/371472 [2:30:08<26:29:36, 3.57it/s] 8%|▊ | 30993/371472 [2:30:09<27:42:57, 3.41it/s] 8%|▊ | 30994/371472 [2:30:09<29:44:04, 3.18it/s] 8%|▊ | 30995/371472 [2:30:09<29:43:09, 3.18it/s] 8%|▊ | 30996/371472 [2:30:09<28:46:58, 3.29it/s] 8%|▊ | 30997/371472 [2:30:10<27:08:31, 3.48it/s] 8%|▊ | 30998/371472 [2:30:10<26:55:22, 3.51it/s] 8%|▊ | 30999/371472 [2:30:10<26:48:39, 3.53it/s] 8%|▊ | 31000/371472 [2:30:11<26:52:01, 3.52it/s] {'loss': 6.6761, 'learning_rate': 9.253377577625029e-07, 'epoch': 1.34} + 8%|▊ | 31000/371472 [2:30:11<26:52:01, 3.52it/s] 8%|▊ | 31001/371472 [2:30:11<26:54:06, 3.52it/s] 8%|▊ | 31002/371472 [2:30:11<27:13:44, 3.47it/s] 8%|▊ | 31003/371472 [2:30:11<27:08:05, 3.49it/s] 8%|▊ | 31004/371472 [2:30:12<27:01:41, 3.50it/s] 8%|▊ | 31005/371472 [2:30:12<27:00:57, 3.50it/s] 8%|▊ | 31006/371472 [2:30:12<26:46:48, 3.53it/s] 8%|▊ | 31007/371472 [2:30:13<26:28:29, 3.57it/s] 8%|▊ | 31008/371472 [2:30:13<26:19:40, 3.59it/s] 8%|▊ | 31009/371472 [2:30:13<25:45:22, 3.67it/s] 8%|▊ | 31010/371472 [2:30:13<25:26:26, 3.72it/s] 8%|▊ | 31011/371472 [2:30:14<25:22:44, 3.73it/s] 8%|▊ | 31012/371472 [2:30:14<25:31:34, 3.70it/s] 8%|▊ | 31013/371472 [2:30:14<26:30:05, 3.57it/s] 8%|▊ | 31014/371472 [2:30:14<25:58:08, 3.64it/s] 8%|▊ | 31015/371472 [2:30:15<25:18:49, 3.74it/s] 8%|▊ | 31016/371472 [2:30:15<25:28:03, 3.71it/s] 8%|▊ | 31017/371472 [2:30:15<26:26:14, 3.58it/s] 8%|▊ | 31018/371472 [2:30:16<25:44:26, 3.67it/s] 8%|▊ | 31019/371472 [2:30:16<25:27:15, 3.72it/s] 8%|▊ | 31020/371472 [2:30:16<26:04:32, 3.63it/s] {'loss': 6.9313, 'learning_rate': 9.252892757870241e-07, 'epoch': 1.34} + 8%|▊ | 31020/371472 [2:30:16<26:04:32, 3.63it/s] 8%|▊ | 31021/371472 [2:30:16<26:14:44, 3.60it/s] 8%|▊ | 31022/371472 [2:30:17<25:49:46, 3.66it/s] 8%|▊ | 31023/371472 [2:30:17<25:28:02, 3.71it/s] 8%|▊ | 31024/371472 [2:30:17<25:35:29, 3.70it/s] 8%|▊ | 31025/371472 [2:30:18<27:46:04, 3.41it/s] 8%|▊ | 31026/371472 [2:30:18<27:03:05, 3.50it/s] 8%|▊ | 31027/371472 [2:30:18<26:32:50, 3.56it/s] 8%|▊ | 31028/371472 [2:30:18<26:16:37, 3.60it/s] 8%|▊ | 31029/371472 [2:30:19<26:11:26, 3.61it/s] 8%|▊ | 31030/371472 [2:30:19<25:19:26, 3.73it/s] 8%|▊ | 31031/371472 [2:30:19<26:17:06, 3.60it/s] 8%|▊ | 31032/371472 [2:30:19<25:47:28, 3.67it/s] 8%|▊ | 31033/371472 [2:30:20<25:35:24, 3.70it/s] 8%|▊ | 31034/371472 [2:30:20<25:38:57, 3.69it/s] 8%|▊ | 31035/371472 [2:30:20<25:48:11, 3.66it/s] 8%|▊ | 31036/371472 [2:30:20<25:17:18, 3.74it/s] 8%|▊ | 31037/371472 [2:30:21<25:12:06, 3.75it/s] 8%|▊ | 31038/371472 [2:30:21<27:14:33, 3.47it/s] 8%|▊ | 31039/371472 [2:30:21<26:44:01, 3.54it/s] 8%|▊ | 31040/371472 [2:30:22<27:23:39, 3.45it/s] {'loss': 6.7279, 'learning_rate': 9.252407938115451e-07, 'epoch': 1.34} + 8%|▊ | 31040/371472 [2:30:22<27:23:39, 3.45it/s] 8%|▊ | 31041/371472 [2:30:22<26:51:57, 3.52it/s] 8%|▊ | 31042/371472 [2:30:22<26:46:35, 3.53it/s] 8%|▊ | 31043/371472 [2:30:22<26:39:04, 3.55it/s] 8%|▊ | 31044/371472 [2:30:23<29:16:55, 3.23it/s] 8%|▊ | 31045/371472 [2:30:23<27:27:31, 3.44it/s] 8%|▊ | 31046/371472 [2:30:23<27:27:43, 3.44it/s] 8%|▊ | 31047/371472 [2:30:24<27:46:32, 3.40it/s] 8%|▊ | 31048/371472 [2:30:24<27:19:15, 3.46it/s] 8%|▊ | 31049/371472 [2:30:24<26:57:09, 3.51it/s] 8%|▊ | 31050/371472 [2:30:25<27:02:56, 3.50it/s] 8%|▊ | 31051/371472 [2:30:25<26:55:44, 3.51it/s] 8%|▊ | 31052/371472 [2:30:25<28:52:38, 3.27it/s] 8%|▊ | 31053/371472 [2:30:25<27:49:36, 3.40it/s] 8%|▊ | 31054/371472 [2:30:26<29:41:17, 3.19it/s] 8%|▊ | 31055/371472 [2:30:26<29:15:07, 3.23it/s] 8%|▊ | 31056/371472 [2:30:26<28:04:21, 3.37it/s] 8%|▊ | 31057/371472 [2:30:27<27:03:58, 3.49it/s] 8%|▊ | 31058/371472 [2:30:27<29:46:40, 3.18it/s] 8%|▊ | 31059/371472 [2:30:27<29:10:54, 3.24it/s] 8%|▊ | 31060/371472 [2:30:28<27:47:30, 3.40it/s] {'loss': 6.5361, 'learning_rate': 9.251923118360662e-07, 'epoch': 1.34} + 8%|▊ | 31060/371472 [2:30:28<27:47:30, 3.40it/s] 8%|▊ | 31061/371472 [2:30:28<27:14:04, 3.47it/s] 8%|▊ | 31062/371472 [2:30:28<26:49:57, 3.52it/s] 8%|▊ | 31063/371472 [2:30:28<26:25:30, 3.58it/s] 8%|▊ | 31064/371472 [2:30:29<26:41:01, 3.54it/s] 8%|▊ | 31065/371472 [2:30:29<28:43:31, 3.29it/s] 8%|▊ | 31066/371472 [2:30:29<27:54:00, 3.39it/s] 8%|▊ | 31067/371472 [2:30:30<28:00:32, 3.38it/s] 8%|▊ | 31068/371472 [2:30:30<27:42:45, 3.41it/s] 8%|▊ | 31069/371472 [2:30:30<26:56:37, 3.51it/s] 8%|▊ | 31070/371472 [2:30:30<27:33:26, 3.43it/s] 8%|▊ | 31071/371472 [2:30:31<28:58:59, 3.26it/s] 8%|▊ | 31072/371472 [2:30:31<28:27:10, 3.32it/s] 8%|▊ | 31073/371472 [2:30:31<28:38:12, 3.30it/s] 8%|▊ | 31074/371472 [2:30:32<27:30:46, 3.44it/s] 8%|▊ | 31075/371472 [2:30:32<28:09:32, 3.36it/s] 8%|▊ | 31076/371472 [2:30:32<28:56:34, 3.27it/s] 8%|▊ | 31077/371472 [2:30:33<35:12:12, 2.69it/s] 8%|▊ | 31078/371472 [2:30:33<32:15:06, 2.93it/s] 8%|▊ | 31079/371472 [2:30:33<30:51:22, 3.06it/s] 8%|▊ | 31080/371472 [2:30:34<29:01:11, 3.26it/s] {'loss': 6.6605, 'learning_rate': 9.251438298605873e-07, 'epoch': 1.34} + 8%|▊ | 31080/371472 [2:30:34<29:01:11, 3.26it/s] 8%|▊ | 31081/371472 [2:30:34<27:48:14, 3.40it/s] 8%|▊ | 31082/371472 [2:30:34<27:30:52, 3.44it/s] 8%|▊ | 31083/371472 [2:30:34<26:43:58, 3.54it/s] 8%|▊ | 31084/371472 [2:30:35<26:02:59, 3.63it/s] 8%|▊ | 31085/371472 [2:30:35<27:44:51, 3.41it/s] 8%|▊ | 31086/371472 [2:30:35<27:35:56, 3.43it/s] 8%|▊ | 31087/371472 [2:30:36<26:36:25, 3.55it/s] 8%|▊ | 31088/371472 [2:30:36<26:51:27, 3.52it/s] 8%|▊ | 31089/371472 [2:30:36<27:08:28, 3.48it/s] 8%|▊ | 31090/371472 [2:30:36<26:57:21, 3.51it/s] 8%|▊ | 31091/371472 [2:30:37<26:13:06, 3.61it/s] 8%|▊ | 31092/371472 [2:30:37<26:10:57, 3.61it/s] 8%|▊ | 31093/371472 [2:30:37<26:29:07, 3.57it/s] 8%|▊ | 31094/371472 [2:30:38<27:18:17, 3.46it/s] 8%|▊ | 31095/371472 [2:30:38<27:24:43, 3.45it/s] 8%|▊ | 31096/371472 [2:30:38<27:26:00, 3.45it/s] 8%|▊ | 31097/371472 [2:30:38<27:04:10, 3.49it/s] 8%|▊ | 31098/371472 [2:30:39<27:30:37, 3.44it/s] 8%|▊ | 31099/371472 [2:30:39<27:28:07, 3.44it/s] 8%|▊ | 31100/371472 [2:30:39<27:33:15, 3.43it/s] {'loss': 6.0599, 'learning_rate': 9.250953478851085e-07, 'epoch': 1.34} + 8%|▊ | 31100/371472 [2:30:39<27:33:15, 3.43it/s] 8%|▊ | 31101/371472 [2:30:40<27:16:28, 3.47it/s] 8%|▊ | 31102/371472 [2:30:40<27:10:52, 3.48it/s] 8%|▊ | 31103/371472 [2:30:40<28:45:56, 3.29it/s] 8%|▊ | 31104/371472 [2:30:41<28:01:14, 3.37it/s] 8%|▊ | 31105/371472 [2:30:41<28:05:15, 3.37it/s] 8%|▊ | 31106/371472 [2:30:41<28:36:01, 3.31it/s] 8%|▊ | 31107/371472 [2:30:41<27:04:30, 3.49it/s] 8%|▊ | 31108/371472 [2:30:42<26:04:56, 3.62it/s] 8%|▊ | 31109/371472 [2:30:42<25:36:59, 3.69it/s] 8%|▊ | 31110/371472 [2:30:42<25:34:34, 3.70it/s] 8%|▊ | 31111/371472 [2:30:42<25:35:55, 3.69it/s] 8%|▊ | 31112/371472 [2:30:43<29:01:01, 3.26it/s] 8%|▊ | 31113/371472 [2:30:43<28:42:52, 3.29it/s] 8%|▊ | 31114/371472 [2:30:43<28:30:30, 3.32it/s] 8%|▊ | 31115/371472 [2:30:44<28:48:05, 3.28it/s] 8%|▊ | 31116/371472 [2:30:44<31:26:51, 3.01it/s] 8%|▊ | 31117/371472 [2:30:44<29:54:02, 3.16it/s] 8%|▊ | 31118/371472 [2:30:45<28:25:10, 3.33it/s] 8%|▊ | 31119/371472 [2:30:45<27:34:57, 3.43it/s] 8%|▊ | 31120/371472 [2:30:45<28:21:38, 3.33it/s] {'loss': 4.8999, 'learning_rate': 9.250468659096295e-07, 'epoch': 1.34} + 8%|▊ | 31120/371472 [2:30:45<28:21:38, 3.33it/s] 8%|▊ | 31121/371472 [2:30:46<28:27:30, 3.32it/s] 8%|▊ | 31122/371472 [2:30:46<27:18:54, 3.46it/s] 8%|▊ | 31123/371472 [2:30:46<29:01:24, 3.26it/s] 8%|▊ | 31124/371472 [2:30:46<28:48:40, 3.28it/s] 8%|▊ | 31125/371472 [2:30:47<28:06:44, 3.36it/s] 8%|▊ | 31126/371472 [2:30:47<28:23:47, 3.33it/s] 8%|▊ | 31127/371472 [2:30:47<27:54:11, 3.39it/s] 8%|▊ | 31128/371472 [2:30:48<29:03:09, 3.25it/s] 8%|▊ | 31129/371472 [2:30:48<27:48:01, 3.40it/s] 8%|▊ | 31130/371472 [2:30:48<28:22:54, 3.33it/s] 8%|▊ | 31131/371472 [2:30:49<28:40:04, 3.30it/s] 8%|▊ | 31132/371472 [2:30:49<27:45:54, 3.40it/s] 8%|▊ | 31133/371472 [2:30:49<26:35:14, 3.56it/s] 8%|▊ | 31134/371472 [2:30:49<25:52:55, 3.65it/s] 8%|▊ | 31135/371472 [2:30:50<25:48:32, 3.66it/s] 8%|▊ | 31136/371472 [2:30:50<25:22:59, 3.72it/s] 8%|▊ | 31137/371472 [2:30:50<25:04:19, 3.77it/s] 8%|▊ | 31138/371472 [2:30:50<27:16:26, 3.47it/s] 8%|▊ | 31139/371472 [2:30:51<28:06:20, 3.36it/s] 8%|▊ | 31140/371472 [2:30:51<27:02:44, 3.50it/s] {'loss': 4.8532, 'learning_rate': 9.249983839341507e-07, 'epoch': 1.34} + 8%|▊ | 31140/371472 [2:30:51<27:02:44, 3.50it/s] 8%|▊ | 31141/371472 [2:30:51<27:55:24, 3.39it/s] 8%|▊ | 31142/371472 [2:30:52<26:52:18, 3.52it/s] 8%|▊ | 31143/371472 [2:30:52<26:26:16, 3.58it/s] 8%|▊ | 31144/371472 [2:30:52<26:41:53, 3.54it/s] 8%|▊ | 31145/371472 [2:30:52<26:33:41, 3.56it/s] 8%|▊ | 31146/371472 [2:30:53<27:51:30, 3.39it/s] 8%|▊ | 31147/371472 [2:30:53<27:51:07, 3.39it/s] 8%|▊ | 31148/371472 [2:30:53<27:17:12, 3.46it/s] 8%|▊ | 31149/371472 [2:30:54<27:36:08, 3.42it/s] 8%|▊ | 31150/371472 [2:30:54<27:17:24, 3.46it/s] 8%|▊ | 31151/371472 [2:30:54<26:58:20, 3.50it/s] 8%|▊ | 31152/371472 [2:30:55<27:07:21, 3.49it/s] 8%|▊ | 31153/371472 [2:30:55<28:21:54, 3.33it/s] 8%|▊ | 31154/371472 [2:30:55<27:29:57, 3.44it/s] 8%|▊ | 31155/371472 [2:30:55<27:03:09, 3.49it/s] 8%|▊ | 31156/371472 [2:30:56<26:23:16, 3.58it/s] 8%|▊ | 31157/371472 [2:30:56<26:40:04, 3.54it/s] 8%|▊ | 31158/371472 [2:30:56<28:00:26, 3.38it/s] 8%|▊ | 31159/371472 [2:30:57<26:57:18, 3.51it/s] 8%|▊ | 31160/371472 [2:30:57<28:28:50, 3.32it/s] {'loss': 4.5862, 'learning_rate': 9.249499019586718e-07, 'epoch': 1.34} + 8%|▊ | 31160/371472 [2:30:57<28:28:50, 3.32it/s] 8%|▊ | 31161/371472 [2:30:57<30:30:07, 3.10it/s] 8%|▊ | 31162/371472 [2:30:57<29:00:26, 3.26it/s] 8%|▊ | 31163/371472 [2:30:58<28:46:18, 3.29it/s] 8%|▊ | 31164/371472 [2:30:58<28:36:02, 3.31it/s] 8%|▊ | 31165/371472 [2:30:58<29:20:23, 3.22it/s] 8%|▊ | 31166/371472 [2:30:59<28:42:48, 3.29it/s] 8%|▊ | 31167/371472 [2:30:59<28:03:58, 3.37it/s] 8%|▊ | 31168/371472 [2:30:59<27:29:15, 3.44it/s] 8%|▊ | 31169/371472 [2:31:00<26:59:09, 3.50it/s] 8%|▊ | 31170/371472 [2:31:00<26:22:33, 3.58it/s] 8%|▊ | 31171/371472 [2:31:00<26:31:39, 3.56it/s] 8%|▊ | 31172/371472 [2:31:00<25:58:34, 3.64it/s] 8%|▊ | 31173/371472 [2:31:01<27:10:18, 3.48it/s] 8%|▊ | 31174/371472 [2:31:01<26:22:49, 3.58it/s] 8%|▊ | 31175/371472 [2:31:01<27:22:05, 3.45it/s] 8%|▊ | 31176/371472 [2:31:02<27:01:13, 3.50it/s] 8%|▊ | 31177/371472 [2:31:02<25:52:56, 3.65it/s] 8%|▊ | 31178/371472 [2:31:02<25:23:30, 3.72it/s] 8%|▊ | 31179/371472 [2:31:02<27:35:43, 3.43it/s] 8%|▊ | 31180/371472 [2:31:03<27:13:51, 3.47it/s] {'loss': 4.7988, 'learning_rate': 9.249014199831928e-07, 'epoch': 1.34} + 8%|▊ | 31180/371472 [2:31:03<27:13:51, 3.47it/s] 8%|▊ | 31181/371472 [2:31:03<29:18:51, 3.22it/s] 8%|▊ | 31182/371472 [2:31:03<28:21:41, 3.33it/s] 8%|▊ | 31183/371472 [2:31:04<30:19:26, 3.12it/s] 8%|▊ | 31184/371472 [2:31:04<28:41:30, 3.29it/s] 8%|▊ | 31185/371472 [2:31:04<28:10:49, 3.35it/s] 8%|▊ | 31186/371472 [2:31:04<27:18:43, 3.46it/s] 8%|▊ | 31187/371472 [2:31:05<26:54:50, 3.51it/s] 8%|▊ | 31188/371472 [2:31:05<27:30:10, 3.44it/s] 8%|▊ | 31189/371472 [2:31:05<29:50:41, 3.17it/s] 8%|▊ | 31190/371472 [2:31:06<29:09:56, 3.24it/s] 8%|▊ | 31191/371472 [2:31:06<27:50:36, 3.39it/s] 8%|▊ | 31192/371472 [2:31:06<28:06:43, 3.36it/s] 8%|▊ | 31193/371472 [2:31:07<27:42:47, 3.41it/s] 8%|▊ | 31194/371472 [2:31:07<28:13:16, 3.35it/s] 8%|▊ | 31195/371472 [2:31:07<31:09:07, 3.03it/s] 8%|▊ | 31196/371472 [2:31:08<30:21:17, 3.11it/s] 8%|▊ | 31197/371472 [2:31:08<29:13:57, 3.23it/s] 8%|▊ | 31198/371472 [2:31:08<28:54:22, 3.27it/s] 8%|▊ | 31199/371472 [2:31:08<28:34:05, 3.31it/s] 8%|▊ | 31200/371472 [2:31:09<27:39:41, 3.42it/s] {'loss': 4.4932, 'learning_rate': 9.248529380077139e-07, 'epoch': 1.34} + 8%|▊ | 31200/371472 [2:31:09<27:39:41, 3.42it/s] 8%|▊ | 31201/371472 [2:31:09<26:50:54, 3.52it/s] 8%|▊ | 31202/371472 [2:31:09<25:45:24, 3.67it/s] 8%|▊ | 31203/371472 [2:31:10<25:35:19, 3.69it/s] 8%|▊ | 31204/371472 [2:31:10<25:09:57, 3.76it/s] 8%|▊ | 31205/371472 [2:31:10<24:49:49, 3.81it/s] 8%|▊ | 31206/371472 [2:31:10<27:37:38, 3.42it/s] 8%|▊ | 31207/371472 [2:31:11<26:32:45, 3.56it/s] 8%|▊ | 31208/371472 [2:31:11<26:12:15, 3.61it/s] 8%|▊ | 31209/371472 [2:31:11<26:17:34, 3.59it/s] 8%|▊ | 31210/371472 [2:31:11<26:04:40, 3.62it/s] 8%|▊ | 31211/371472 [2:31:12<26:19:49, 3.59it/s] 8%|▊ | 31212/371472 [2:31:12<26:42:41, 3.54it/s] 8%|▊ | 31213/371472 [2:31:12<28:12:40, 3.35it/s] 8%|▊ | 31214/371472 [2:31:13<29:17:54, 3.23it/s] 8%|▊ | 31215/371472 [2:31:13<28:49:41, 3.28it/s] 8%|▊ | 31216/371472 [2:31:13<30:46:35, 3.07it/s] 8%|▊ | 31217/371472 [2:31:14<29:53:52, 3.16it/s] 8%|▊ | 31218/371472 [2:31:14<29:55:01, 3.16it/s] 8%|▊ | 31219/371472 [2:31:14<28:12:59, 3.35it/s] 8%|▊ | 31220/371472 [2:31:14<27:20:46, 3.46it/s] {'loss': 4.8881, 'learning_rate': 9.248044560322351e-07, 'epoch': 1.34} + 8%|▊ | 31220/371472 [2:31:15<27:20:46, 3.46it/s] 8%|▊ | 31221/371472 [2:31:15<26:16:56, 3.60it/s] 8%|▊ | 31222/371472 [2:31:15<26:37:26, 3.55it/s] 8%|▊ | 31223/371472 [2:31:15<28:23:37, 3.33it/s] 8%|▊ | 31224/371472 [2:31:16<28:24:30, 3.33it/s] 8%|▊ | 31225/371472 [2:31:16<27:22:39, 3.45it/s] 8%|▊ | 31226/371472 [2:31:16<27:22:51, 3.45it/s] 8%|▊ | 31227/371472 [2:31:16<26:17:38, 3.59it/s] 8%|▊ | 31228/371472 [2:31:17<26:12:09, 3.61it/s] 8%|▊ | 31229/371472 [2:31:17<27:00:26, 3.50it/s] 8%|▊ | 31230/371472 [2:31:17<26:21:34, 3.59it/s] 8%|▊ | 31231/371472 [2:31:18<26:01:01, 3.63it/s] 8%|▊ | 31232/371472 [2:31:18<27:06:40, 3.49it/s] 8%|▊ | 31233/371472 [2:31:18<26:45:44, 3.53it/s] 8%|▊ | 31234/371472 [2:31:18<26:06:58, 3.62it/s] 8%|▊ | 31235/371472 [2:31:19<26:17:02, 3.60it/s] 8%|▊ | 31236/371472 [2:31:19<25:50:44, 3.66it/s] 8%|▊ | 31237/371472 [2:31:19<25:32:39, 3.70it/s] 8%|▊ | 31238/371472 [2:31:20<25:36:01, 3.69it/s] 8%|▊ | 31239/371472 [2:31:20<25:39:47, 3.68it/s] 8%|▊ | 31240/371472 [2:31:20<26:23:39, 3.58it/s] {'loss': 5.7502, 'learning_rate': 9.247559740567562e-07, 'epoch': 1.35} + 8%|▊ | 31240/371472 [2:31:20<26:23:39, 3.58it/s] 8%|▊ | 31241/371472 [2:31:20<26:27:36, 3.57it/s] 8%|▊ | 31242/371472 [2:31:21<25:58:17, 3.64it/s] 8%|▊ | 31243/371472 [2:31:21<25:28:21, 3.71it/s] 8%|▊ | 31244/371472 [2:31:21<27:16:14, 3.47it/s] 8%|▊ | 31245/371472 [2:31:21<26:05:09, 3.62it/s] 8%|▊ | 31246/371472 [2:31:22<28:55:11, 3.27it/s] 8%|▊ | 31247/371472 [2:31:22<28:07:29, 3.36it/s] 8%|▊ | 31248/371472 [2:31:22<27:27:13, 3.44it/s] 8%|▊ | 31249/371472 [2:31:23<27:37:02, 3.42it/s] 8%|▊ | 31250/371472 [2:31:23<26:41:12, 3.54it/s] 8%|▊ | 31251/371472 [2:31:23<27:09:33, 3.48it/s] 8%|▊ | 31252/371472 [2:31:24<26:36:40, 3.55it/s] 8%|▊ | 31253/371472 [2:31:24<27:45:32, 3.40it/s] 8%|▊ | 31254/371472 [2:31:24<28:00:00, 3.38it/s] 8%|▊ | 31255/371472 [2:31:24<27:27:05, 3.44it/s] 8%|▊ | 31256/371472 [2:31:25<27:17:11, 3.46it/s] 8%|▊ | 31257/371472 [2:31:25<29:13:13, 3.23it/s] 8%|▊ | 31258/371472 [2:31:25<29:15:22, 3.23it/s] 8%|▊ | 31259/371472 [2:31:26<28:48:22, 3.28it/s] 8%|▊ | 31260/371472 [2:31:26<27:55:05, 3.39it/s] {'loss': 5.3209, 'learning_rate': 9.247074920812773e-07, 'epoch': 1.35} + 8%|▊ | 31260/371472 [2:31:26<27:55:05, 3.39it/s] 8%|▊ | 31261/371472 [2:31:26<27:40:06, 3.42it/s] 8%|▊ | 31262/371472 [2:31:27<27:24:15, 3.45it/s] 8%|▊ | 31263/371472 [2:31:27<28:19:18, 3.34it/s] 8%|▊ | 31264/371472 [2:31:27<28:22:05, 3.33it/s] 8%|▊ | 31265/371472 [2:31:27<27:15:27, 3.47it/s] 8%|▊ | 31266/371472 [2:31:28<25:58:52, 3.64it/s] 8%|▊ | 31267/371472 [2:31:28<27:19:58, 3.46it/s] 8%|▊ | 31268/371472 [2:31:28<28:09:54, 3.36it/s] 8%|▊ | 31269/371472 [2:31:29<27:52:19, 3.39it/s] 8%|▊ | 31270/371472 [2:31:29<28:08:19, 3.36it/s] 8%|▊ | 31271/371472 [2:31:29<27:15:25, 3.47it/s] 8%|▊ | 31272/371472 [2:31:29<26:01:30, 3.63it/s] 8%|▊ | 31273/371472 [2:31:30<27:48:09, 3.40it/s] 8%|▊ | 31274/371472 [2:31:30<28:23:02, 3.33it/s] 8%|▊ | 31275/371472 [2:31:30<27:51:07, 3.39it/s] 8%|▊ | 31276/371472 [2:31:31<26:32:38, 3.56it/s] 8%|▊ | 31277/371472 [2:31:31<25:56:00, 3.64it/s] 8%|▊ | 31278/371472 [2:31:31<25:38:45, 3.68it/s] 8%|▊ | 31279/371472 [2:31:31<26:15:42, 3.60it/s] 8%|▊ | 31280/371472 [2:31:32<26:02:23, 3.63it/s] {'loss': 5.0991, 'learning_rate': 9.246590101057984e-07, 'epoch': 1.35} + 8%|▊ | 31280/371472 [2:31:32<26:02:23, 3.63it/s] 8%|▊ | 31281/371472 [2:31:32<25:43:34, 3.67it/s] 8%|▊ | 31282/371472 [2:31:32<25:29:42, 3.71it/s] 8%|▊ | 31283/371472 [2:31:32<25:22:53, 3.72it/s] 8%|▊ | 31284/371472 [2:31:33<25:20:16, 3.73it/s] 8%|▊ | 31285/371472 [2:31:33<25:09:47, 3.76it/s] 8%|▊ | 31286/371472 [2:31:33<25:51:07, 3.66it/s] 8%|▊ | 31287/371472 [2:31:34<25:58:19, 3.64it/s] 8%|▊ | 31288/371472 [2:31:34<28:39:34, 3.30it/s] 8%|▊ | 31289/371472 [2:31:34<27:26:20, 3.44it/s] 8%|▊ | 31290/371472 [2:31:34<26:33:40, 3.56it/s] 8%|▊ | 31291/371472 [2:31:35<25:47:31, 3.66it/s] 8%|▊ | 31292/371472 [2:31:35<26:01:45, 3.63it/s] 8%|▊ | 31293/371472 [2:31:35<25:48:57, 3.66it/s] 8%|▊ | 31294/371472 [2:31:36<26:47:19, 3.53it/s] 8%|▊ | 31295/371472 [2:31:36<26:46:22, 3.53it/s] 8%|▊ | 31296/371472 [2:31:36<28:45:36, 3.29it/s] 8%|▊ | 31297/371472 [2:31:36<27:48:00, 3.40it/s] 8%|▊ | 31298/371472 [2:31:37<27:54:59, 3.38it/s] 8%|▊ | 31299/371472 [2:31:37<27:14:19, 3.47it/s] 8%|▊ | 31300/371472 [2:31:37<27:12:26, 3.47it/s] {'loss': 4.9333, 'learning_rate': 9.246105281303196e-07, 'epoch': 1.35} + 8%|▊ | 31300/371472 [2:31:37<27:12:26, 3.47it/s] 8%|▊ | 31301/371472 [2:31:38<28:14:47, 3.35it/s] 8%|▊ | 31302/371472 [2:31:38<29:44:58, 3.18it/s] 8%|▊ | 31303/371472 [2:31:38<28:27:14, 3.32it/s] 8%|▊ | 31304/371472 [2:31:39<29:02:03, 3.25it/s] 8%|▊ | 31305/371472 [2:31:39<28:27:03, 3.32it/s] 8%|▊ | 31306/371472 [2:31:39<27:58:59, 3.38it/s] 8%|▊ | 31307/371472 [2:31:39<28:23:26, 3.33it/s] 8%|▊ | 31308/371472 [2:31:40<27:05:44, 3.49it/s] 8%|▊ | 31309/371472 [2:31:40<26:44:11, 3.53it/s] 8%|▊ | 31310/371472 [2:31:40<29:16:39, 3.23it/s] 8%|▊ | 31311/371472 [2:31:41<28:04:08, 3.37it/s] 8%|▊ | 31312/371472 [2:31:41<26:59:19, 3.50it/s] 8%|▊ | 31313/371472 [2:31:41<27:52:18, 3.39it/s] 8%|▊ | 31314/371472 [2:31:42<28:47:35, 3.28it/s] 8%|▊ | 31315/371472 [2:31:42<27:33:53, 3.43it/s] 8%|▊ | 31316/371472 [2:31:42<27:43:30, 3.41it/s] 8%|▊ | 31317/371472 [2:31:42<27:01:42, 3.50it/s] 8%|▊ | 31318/371472 [2:31:43<27:44:23, 3.41it/s] 8%|▊ | 31319/371472 [2:31:43<27:45:31, 3.40it/s] 8%|▊ | 31320/371472 [2:31:43<26:34:02, 3.56it/s] {'loss': 5.1862, 'learning_rate': 9.245620461548406e-07, 'epoch': 1.35} + 8%|▊ | 31320/371472 [2:31:43<26:34:02, 3.56it/s] 8%|▊ | 31321/371472 [2:31:43<25:59:01, 3.64it/s] 8%|▊ | 31322/371472 [2:31:44<26:04:26, 3.62it/s] 8%|▊ | 31323/371472 [2:31:44<25:38:41, 3.68it/s] 8%|▊ | 31324/371472 [2:31:44<26:26:30, 3.57it/s] 8%|▊ | 31325/371472 [2:31:45<26:13:59, 3.60it/s] 8%|▊ | 31326/371472 [2:31:45<25:54:36, 3.65it/s] 8%|▊ | 31327/371472 [2:31:45<26:36:24, 3.55it/s] 8%|▊ | 31328/371472 [2:31:45<25:45:57, 3.67it/s] 8%|▊ | 31329/371472 [2:31:46<28:42:02, 3.29it/s] 8%|▊ | 31330/371472 [2:31:46<27:55:55, 3.38it/s] 8%|▊ | 31331/371472 [2:31:46<27:33:56, 3.43it/s] 8%|▊ | 31332/371472 [2:31:47<28:14:36, 3.35it/s] 8%|▊ | 31333/371472 [2:31:47<28:13:28, 3.35it/s] 8%|▊ | 31334/371472 [2:31:47<27:32:42, 3.43it/s] 8%|▊ | 31335/371472 [2:31:48<28:31:53, 3.31it/s] 8%|▊ | 31336/371472 [2:31:48<27:29:55, 3.44it/s] 8%|▊ | 31337/371472 [2:31:48<26:57:40, 3.50it/s] 8%|▊ | 31338/371472 [2:31:48<29:21:23, 3.22it/s] 8%|▊ | 31339/371472 [2:31:49<33:40:56, 2.81it/s] 8%|▊ | 31340/371472 [2:31:49<30:42:58, 3.08it/s] {'loss': 5.2048, 'learning_rate': 9.245135641793617e-07, 'epoch': 1.35} + 8%|▊ | 31340/371472 [2:31:49<30:42:58, 3.08it/s] 8%|▊ | 31341/371472 [2:31:50<30:37:57, 3.08it/s] 8%|▊ | 31342/371472 [2:31:50<30:15:55, 3.12it/s] 8%|▊ | 31343/371472 [2:31:50<30:49:01, 3.07it/s] 8%|▊ | 31344/371472 [2:31:50<28:45:06, 3.29it/s] 8%|▊ | 31345/371472 [2:31:51<30:42:44, 3.08it/s] 8%|▊ | 31346/371472 [2:31:51<29:22:56, 3.22it/s] 8%|▊ | 31347/371472 [2:31:51<27:58:59, 3.38it/s] 8%|▊ | 31348/371472 [2:31:52<27:46:36, 3.40it/s] 8%|▊ | 31349/371472 [2:31:52<27:26:02, 3.44it/s] 8%|▊ | 31350/371472 [2:31:52<28:10:55, 3.35it/s] 8%|▊ | 31351/371472 [2:31:53<27:54:36, 3.39it/s] 8%|▊ | 31352/371472 [2:31:53<27:33:45, 3.43it/s] 8%|▊ | 31353/371472 [2:31:53<27:43:57, 3.41it/s] 8%|▊ | 31354/371472 [2:31:53<27:07:12, 3.48it/s] 8%|▊ | 31355/371472 [2:31:54<26:23:39, 3.58it/s] 8%|▊ | 31356/371472 [2:31:54<26:57:38, 3.50it/s] 8%|▊ | 31357/371472 [2:31:54<27:25:18, 3.45it/s] 8%|▊ | 31358/371472 [2:31:54<26:17:59, 3.59it/s] 8%|▊ | 31359/371472 [2:31:55<25:59:55, 3.63it/s] 8%|▊ | 31360/371472 [2:31:55<25:48:05, 3.66it/s] {'loss': 5.2122, 'learning_rate': 9.244650822038829e-07, 'epoch': 1.35} + 8%|▊ | 31360/371472 [2:31:55<25:48:05, 3.66it/s] 8%|▊ | 31361/371472 [2:31:55<26:14:21, 3.60it/s] 8%|▊ | 31362/371472 [2:31:56<25:41:44, 3.68it/s] 8%|▊ | 31363/371472 [2:31:56<25:16:03, 3.74it/s] 8%|▊ | 31364/371472 [2:31:56<25:27:16, 3.71it/s] 8%|▊ | 31365/371472 [2:31:56<25:21:03, 3.73it/s] 8%|▊ | 31366/371472 [2:31:57<26:38:05, 3.55it/s] 8%|▊ | 31367/371472 [2:31:57<27:28:04, 3.44it/s] 8%|▊ | 31368/371472 [2:31:57<26:56:59, 3.51it/s] 8%|▊ | 31369/371472 [2:31:58<26:31:33, 3.56it/s] 8%|▊ | 31370/371472 [2:31:58<26:35:43, 3.55it/s] 8%|▊ | 31371/371472 [2:31:58<25:52:50, 3.65it/s] 8%|▊ | 31372/371472 [2:31:58<26:29:42, 3.57it/s] 8%|▊ | 31373/371472 [2:31:59<26:14:15, 3.60it/s] 8%|▊ | 31374/371472 [2:31:59<27:03:51, 3.49it/s] 8%|▊ | 31375/371472 [2:31:59<27:09:07, 3.48it/s] 8%|▊ | 31376/371472 [2:32:00<27:20:47, 3.45it/s] 8%|▊ | 31377/371472 [2:32:00<28:57:05, 3.26it/s] 8%|▊ | 31378/371472 [2:32:00<27:20:18, 3.46it/s] 8%|▊ | 31379/371472 [2:32:00<26:46:02, 3.53it/s] 8%|▊ | 31380/371472 [2:32:01<26:00:54, 3.63it/s] {'loss': 5.1965, 'learning_rate': 9.24416600228404e-07, 'epoch': 1.35} + 8%|▊ | 31380/371472 [2:32:01<26:00:54, 3.63it/s] 8%|▊ | 31381/371472 [2:32:01<26:05:29, 3.62it/s] 8%|▊ | 31382/371472 [2:32:01<28:07:02, 3.36it/s] 8%|▊ | 31383/371472 [2:32:02<26:29:37, 3.57it/s] 8%|▊ | 31384/371472 [2:32:02<27:21:03, 3.45it/s] 8%|▊ | 31385/371472 [2:32:02<26:45:44, 3.53it/s] 8%|▊ | 31386/371472 [2:32:02<28:00:10, 3.37it/s] 8%|▊ | 31387/371472 [2:32:03<27:27:46, 3.44it/s] 8%|▊ | 31388/371472 [2:32:03<27:59:58, 3.37it/s] 8%|▊ | 31389/371472 [2:32:03<29:40:15, 3.18it/s] 8%|▊ | 31390/371472 [2:32:04<30:27:05, 3.10it/s] 8%|▊ | 31391/371472 [2:32:04<30:12:39, 3.13it/s] 8%|▊ | 31392/371472 [2:32:04<28:48:49, 3.28it/s] 8%|▊ | 31393/371472 [2:32:05<29:02:57, 3.25it/s] 8%|▊ | 31394/371472 [2:32:05<31:55:36, 2.96it/s] 8%|▊ | 31395/371472 [2:32:05<29:47:27, 3.17it/s] 8%|▊ | 31396/371472 [2:32:06<28:23:17, 3.33it/s] 8%|▊ | 31397/371472 [2:32:06<27:27:37, 3.44it/s] 8%|▊ | 31398/371472 [2:32:06<27:48:37, 3.40it/s] 8%|▊ | 31399/371472 [2:32:06<29:38:06, 3.19it/s] 8%|▊ | 31400/371472 [2:32:07<28:42:49, 3.29it/s] {'loss': 5.2912, 'learning_rate': 9.243681182529251e-07, 'epoch': 1.35} + 8%|▊ | 31400/371472 [2:32:07<28:42:49, 3.29it/s] 8%|▊ | 31401/371472 [2:32:07<28:11:04, 3.35it/s] 8%|▊ | 31402/371472 [2:32:07<30:50:55, 3.06it/s] 8%|▊ | 31403/371472 [2:32:08<29:57:17, 3.15it/s] 8%|▊ | 31404/371472 [2:32:08<28:06:35, 3.36it/s] 8%|▊ | 31405/371472 [2:32:08<27:14:54, 3.47it/s] 8%|▊ | 31406/371472 [2:32:09<27:02:15, 3.49it/s] 8%|▊ | 31407/371472 [2:32:09<27:35:56, 3.42it/s] 8%|▊ | 31408/371472 [2:32:09<28:03:11, 3.37it/s] 8%|▊ | 31409/371472 [2:32:09<28:06:10, 3.36it/s] 8%|▊ | 31410/371472 [2:32:10<28:02:59, 3.37it/s] 8%|▊ | 31411/371472 [2:32:10<27:46:46, 3.40it/s] 8%|▊ | 31412/371472 [2:32:10<27:26:47, 3.44it/s] 8%|▊ | 31413/371472 [2:32:11<26:34:36, 3.55it/s] 8%|▊ | 31414/371472 [2:32:11<26:36:00, 3.55it/s] 8%|▊ | 31415/371472 [2:32:11<27:13:27, 3.47it/s] 8%|▊ | 31416/371472 [2:32:11<26:31:39, 3.56it/s] 8%|▊ | 31417/371472 [2:32:12<27:07:33, 3.48it/s] 8%|▊ | 31418/371472 [2:32:12<26:44:46, 3.53it/s] 8%|▊ | 31419/371472 [2:32:12<27:14:04, 3.47it/s] 8%|▊ | 31420/371472 [2:32:13<29:11:08, 3.24it/s] {'loss': 4.911, 'learning_rate': 9.243196362774461e-07, 'epoch': 1.35} + 8%|▊ | 31420/371472 [2:32:13<29:11:08, 3.24it/s] 8%|▊ | 31421/371472 [2:32:13<27:47:37, 3.40it/s] 8%|▊ | 31422/371472 [2:32:13<27:16:41, 3.46it/s] 8%|▊ | 31423/371472 [2:32:13<27:45:21, 3.40it/s] 8%|▊ | 31424/371472 [2:32:14<26:43:23, 3.53it/s] 8%|▊ | 31425/371472 [2:32:14<28:15:13, 3.34it/s] 8%|▊ | 31426/371472 [2:32:14<29:14:49, 3.23it/s] 8%|▊ | 31427/371472 [2:32:15<28:54:09, 3.27it/s] 8%|▊ | 31428/371472 [2:32:15<28:01:18, 3.37it/s] 8%|▊ | 31429/371472 [2:32:15<26:52:59, 3.51it/s] 8%|▊ | 31430/371472 [2:32:16<29:06:18, 3.25it/s] 8%|▊ | 31431/371472 [2:32:16<29:20:21, 3.22it/s] 8%|▊ | 31432/371472 [2:32:16<27:59:03, 3.38it/s] 8%|▊ | 31433/371472 [2:32:17<30:30:40, 3.10it/s] 8%|▊ | 31434/371472 [2:32:17<30:17:36, 3.12it/s] 8%|▊ | 31435/371472 [2:32:17<28:08:57, 3.36it/s] 8%|▊ | 31436/371472 [2:32:17<27:40:05, 3.41it/s] 8%|▊ | 31437/371472 [2:32:18<26:30:05, 3.56it/s] 8%|▊ | 31438/371472 [2:32:18<26:04:32, 3.62it/s] 8%|▊ | 31439/371472 [2:32:18<26:34:22, 3.55it/s] 8%|▊ | 31440/371472 [2:32:18<25:43:54, 3.67it/s] {'loss': 4.8417, 'learning_rate': 9.242711543019672e-07, 'epoch': 1.35} + 8%|▊ | 31440/371472 [2:32:18<25:43:54, 3.67it/s] 8%|▊ | 31441/371472 [2:32:19<29:16:20, 3.23it/s] 8%|▊ | 31442/371472 [2:32:19<29:10:30, 3.24it/s] 8%|▊ | 31443/371472 [2:32:19<28:33:57, 3.31it/s] 8%|▊ | 31444/371472 [2:32:20<28:12:45, 3.35it/s] 8%|▊ | 31445/371472 [2:32:20<29:00:14, 3.26it/s] 8%|▊ | 31446/371472 [2:32:20<28:16:55, 3.34it/s] 8%|▊ | 31447/371472 [2:32:21<27:36:01, 3.42it/s] 8%|▊ | 31448/371472 [2:32:21<26:51:50, 3.52it/s] 8%|▊ | 31449/371472 [2:32:21<27:21:27, 3.45it/s] 8%|▊ | 31450/371472 [2:32:21<26:57:43, 3.50it/s] 8%|▊ | 31451/371472 [2:32:22<26:56:39, 3.51it/s] 8%|▊ | 31452/371472 [2:32:22<26:25:02, 3.58it/s] 8%|▊ | 31453/371472 [2:32:22<26:15:01, 3.60it/s] 8%|▊ | 31454/371472 [2:32:23<25:49:22, 3.66it/s] 8%|▊ | 31455/371472 [2:32:23<27:30:40, 3.43it/s] 8%|▊ | 31456/371472 [2:32:23<27:54:11, 3.38it/s] 8%|▊ | 31457/371472 [2:32:23<27:58:39, 3.38it/s] 8%|▊ | 31458/371472 [2:32:24<29:24:36, 3.21it/s] 8%|▊ | 31459/371472 [2:32:24<30:21:28, 3.11it/s] 8%|▊ | 31460/371472 [2:32:24<29:47:38, 3.17it/s] {'loss': 5.1662, 'learning_rate': 9.242226723264884e-07, 'epoch': 1.36} + 8%|▊ | 31460/371472 [2:32:24<29:47:38, 3.17it/s] 8%|▊ | 31461/371472 [2:32:25<28:55:13, 3.27it/s] 8%|▊ | 31462/371472 [2:32:25<29:53:00, 3.16it/s] 8%|▊ | 31463/371472 [2:32:25<28:10:26, 3.35it/s] 8%|▊ | 31464/371472 [2:32:26<27:42:41, 3.41it/s] 8%|▊ | 31465/371472 [2:32:26<27:08:01, 3.48it/s] 8%|▊ | 31466/371472 [2:32:26<26:25:41, 3.57it/s] 8%|▊ | 31467/371472 [2:32:27<28:17:59, 3.34it/s] 8%|▊ | 31468/371472 [2:32:27<28:08:24, 3.36it/s] 8%|▊ | 31469/371472 [2:32:27<26:38:58, 3.54it/s] 8%|▊ | 31470/371472 [2:32:27<25:46:47, 3.66it/s] 8%|▊ | 31471/371472 [2:32:28<25:59:59, 3.63it/s] 8%|▊ | 31472/371472 [2:32:28<29:38:01, 3.19it/s] 8%|▊ | 31473/371472 [2:32:28<28:34:24, 3.31it/s] 8%|▊ | 31474/371472 [2:32:29<28:49:06, 3.28it/s] 8%|▊ | 31475/371472 [2:32:29<28:56:38, 3.26it/s] 8%|▊ | 31476/371472 [2:32:29<28:55:07, 3.27it/s] 8%|▊ | 31477/371472 [2:32:29<28:10:17, 3.35it/s] 8%|▊ | 31478/371472 [2:32:30<28:11:18, 3.35it/s] 8%|▊ | 31479/371472 [2:32:30<27:26:01, 3.44it/s] 8%|▊ | 31480/371472 [2:32:30<27:01:54, 3.49it/s] {'loss': 5.1467, 'learning_rate': 9.241741903510094e-07, 'epoch': 1.36} + 8%|▊ | 31480/371472 [2:32:30<27:01:54, 3.49it/s] 8%|▊ | 31481/371472 [2:32:31<26:27:54, 3.57it/s] 8%|▊ | 31482/371472 [2:32:31<26:01:06, 3.63it/s] 8%|▊ | 31483/371472 [2:32:31<25:37:24, 3.69it/s] 8%|▊ | 31484/371472 [2:32:31<25:11:16, 3.75it/s] 8%|▊ | 31485/371472 [2:32:32<25:33:28, 3.70it/s] 8%|▊ | 31486/371472 [2:32:32<25:38:13, 3.68it/s] 8%|▊ | 31487/371472 [2:32:32<26:21:07, 3.58it/s] 8%|▊ | 31488/371472 [2:32:32<25:41:50, 3.68it/s] 8%|▊ | 31489/371472 [2:32:33<27:36:53, 3.42it/s] 8%|▊ | 31490/371472 [2:32:33<26:28:42, 3.57it/s] 8%|▊ | 31491/371472 [2:32:33<26:12:36, 3.60it/s] 8%|▊ | 31492/371472 [2:32:34<25:46:54, 3.66it/s] 8%|▊ | 31493/371472 [2:32:34<29:25:22, 3.21it/s] 8%|▊ | 31494/371472 [2:32:34<28:02:47, 3.37it/s] 8%|▊ | 31495/371472 [2:32:35<27:35:08, 3.42it/s] 8%|▊ | 31496/371472 [2:32:35<27:19:00, 3.46it/s] 8%|▊ | 31497/371472 [2:32:35<27:56:29, 3.38it/s] 8%|▊ | 31498/371472 [2:32:35<28:00:30, 3.37it/s] 8%|▊ | 31499/371472 [2:32:36<27:24:40, 3.45it/s] 8%|▊ | 31500/371472 [2:32:36<27:57:59, 3.38it/s] {'loss': 5.3874, 'learning_rate': 9.241257083755305e-07, 'epoch': 1.36} + 8%|▊ | 31500/371472 [2:32:36<27:57:59, 3.38it/s] 8%|▊ | 31501/371472 [2:32:36<27:06:05, 3.48it/s] 8%|▊ | 31502/371472 [2:32:37<26:27:22, 3.57it/s] 8%|▊ | 31503/371472 [2:32:37<26:11:57, 3.60it/s] 8%|▊ | 31504/371472 [2:32:37<25:46:51, 3.66it/s] 8%|▊ | 31505/371472 [2:32:37<25:10:24, 3.75it/s] 8%|▊ | 31506/371472 [2:32:38<26:36:39, 3.55it/s] 8%|▊ | 31507/371472 [2:32:38<26:59:49, 3.50it/s] 8%|▊ | 31508/371472 [2:32:38<26:19:36, 3.59it/s] 8%|▊ | 31509/371472 [2:32:39<26:27:16, 3.57it/s] 8%|▊ | 31510/371472 [2:32:39<26:29:22, 3.56it/s] 8%|▊ | 31511/371472 [2:32:39<25:58:59, 3.63it/s] 8%|▊ | 31512/371472 [2:32:39<25:31:54, 3.70it/s] 8%|▊ | 31513/371472 [2:32:40<26:17:07, 3.59it/s] 8%|▊ | 31514/371472 [2:32:40<26:36:49, 3.55it/s] 8%|▊ | 31515/371472 [2:32:40<26:22:51, 3.58it/s] 8%|▊ | 31516/371472 [2:32:40<26:18:47, 3.59it/s] 8%|▊ | 31517/371472 [2:32:41<28:31:52, 3.31it/s] 8%|▊ | 31518/371472 [2:32:41<28:33:53, 3.31it/s] 8%|▊ | 31519/371472 [2:32:41<28:02:31, 3.37it/s] 8%|▊ | 31520/371472 [2:32:42<27:31:25, 3.43it/s] {'loss': 5.8458, 'learning_rate': 9.240772264000517e-07, 'epoch': 1.36} + 8%|▊ | 31520/371472 [2:32:42<27:31:25, 3.43it/s] 8%|▊ | 31521/371472 [2:32:42<26:57:31, 3.50it/s] 8%|▊ | 31522/371472 [2:32:42<26:01:15, 3.63it/s] 8%|▊ | 31523/371472 [2:32:42<26:29:20, 3.56it/s] 8%|▊ | 31524/371472 [2:32:43<26:40:42, 3.54it/s] 8%|▊ | 31525/371472 [2:32:43<27:13:52, 3.47it/s] 8%|▊ | 31526/371472 [2:32:43<27:06:27, 3.48it/s] 8%|▊ | 31527/371472 [2:32:44<26:25:37, 3.57it/s] 8%|▊ | 31528/371472 [2:32:44<26:09:59, 3.61it/s] 8%|▊ | 31529/371472 [2:32:44<27:11:24, 3.47it/s] 8%|▊ | 31530/371472 [2:32:44<26:48:55, 3.52it/s] 8%|▊ | 31531/371472 [2:32:45<26:44:50, 3.53it/s] 8%|▊ | 31532/371472 [2:32:45<26:44:45, 3.53it/s] 8%|▊ | 31533/371472 [2:32:45<27:43:46, 3.41it/s] 8%|▊ | 31534/371472 [2:32:46<27:03:09, 3.49it/s] 8%|▊ | 31535/371472 [2:32:46<26:57:26, 3.50it/s] 8%|▊ | 31536/371472 [2:32:46<28:17:25, 3.34it/s] 8%|▊ | 31537/371472 [2:32:47<26:51:09, 3.52it/s] 8%|▊ | 31538/371472 [2:32:47<26:26:36, 3.57it/s] 8%|▊ | 31539/371472 [2:32:47<25:38:08, 3.68it/s] 8%|▊ | 31540/371472 [2:32:47<25:40:12, 3.68it/s] {'loss': 6.3314, 'learning_rate': 9.240287444245728e-07, 'epoch': 1.36} + 8%|▊ | 31540/371472 [2:32:47<25:40:12, 3.68it/s] 8%|▊ | 31541/371472 [2:32:48<26:12:18, 3.60it/s] 8%|▊ | 31542/371472 [2:32:48<26:45:16, 3.53it/s] 8%|▊ | 31543/371472 [2:32:48<28:05:08, 3.36it/s] 8%|▊ | 31544/371472 [2:32:48<27:22:20, 3.45it/s] 8%|▊ | 31545/371472 [2:32:49<26:40:35, 3.54it/s] 8%|▊ | 31546/371472 [2:32:49<26:09:21, 3.61it/s] 8%|▊ | 31547/371472 [2:32:49<28:26:07, 3.32it/s] 8%|▊ | 31548/371472 [2:32:50<28:54:50, 3.27it/s] 8%|▊ | 31549/371472 [2:32:50<27:19:17, 3.46it/s] 8%|▊ | 31550/371472 [2:32:50<27:08:08, 3.48it/s] 8%|▊ | 31551/371472 [2:32:50<26:19:27, 3.59it/s] 8%|▊ | 31552/371472 [2:32:51<26:02:34, 3.63it/s] 8%|▊ | 31553/371472 [2:32:51<27:08:58, 3.48it/s] 8%|▊ | 31554/371472 [2:32:51<28:13:09, 3.35it/s] 8%|▊ | 31555/371472 [2:32:52<27:42:38, 3.41it/s] 8%|▊ | 31556/371472 [2:32:52<27:40:59, 3.41it/s] 8%|▊ | 31557/371472 [2:32:52<26:53:30, 3.51it/s] 8%|▊ | 31558/371472 [2:32:52<26:01:58, 3.63it/s] 8%|▊ | 31559/371472 [2:32:53<26:06:30, 3.62it/s] 8%|▊ | 31560/371472 [2:32:53<25:35:24, 3.69it/s] {'loss': 6.3855, 'learning_rate': 9.23980262449094e-07, 'epoch': 1.36} + 8%|▊ | 31560/371472 [2:32:53<25:35:24, 3.69it/s] 8%|▊ | 31561/371472 [2:32:53<26:23:17, 3.58it/s] 8%|▊ | 31562/371472 [2:32:54<25:43:05, 3.67it/s] 8%|▊ | 31563/371472 [2:32:54<25:50:45, 3.65it/s] 8%|▊ | 31564/371472 [2:32:54<25:33:41, 3.69it/s] 8%|▊ | 31565/371472 [2:32:54<25:54:53, 3.64it/s] 8%|▊ | 31566/371472 [2:32:55<25:40:41, 3.68it/s] 8%|▊ | 31567/371472 [2:32:55<27:00:04, 3.50it/s] 8%|▊ | 31568/371472 [2:32:55<27:04:11, 3.49it/s] 8%|▊ | 31569/371472 [2:32:56<27:07:11, 3.48it/s] 8%|▊ | 31570/371472 [2:32:56<26:51:49, 3.51it/s] 8%|▊ | 31571/371472 [2:32:56<27:48:59, 3.39it/s] 8%|▊ | 31572/371472 [2:32:56<28:07:58, 3.36it/s] 8%|▊ | 31573/371472 [2:32:57<27:36:44, 3.42it/s] 8%|▊ | 31574/371472 [2:32:57<27:39:38, 3.41it/s] 8%|▊ | 31575/371472 [2:32:57<27:33:53, 3.43it/s] 9%|▊ | 31576/371472 [2:32:58<27:11:00, 3.47it/s] 9%|▊ | 31577/371472 [2:32:58<27:04:17, 3.49it/s] 9%|▊ | 31578/371472 [2:32:58<26:59:08, 3.50it/s] 9%|▊ | 31579/371472 [2:32:58<27:21:40, 3.45it/s] 9%|▊ | 31580/371472 [2:32:59<30:16:42, 3.12it/s] {'loss': 6.4171, 'learning_rate': 9.239317804736149e-07, 'epoch': 1.36} + 9%|▊ | 31580/371472 [2:32:59<30:16:42, 3.12it/s] 9%|▊ | 31581/371472 [2:32:59<29:10:53, 3.24it/s] 9%|▊ | 31582/371472 [2:32:59<29:22:22, 3.21it/s] 9%|▊ | 31583/371472 [2:33:00<27:28:40, 3.44it/s] 9%|▊ | 31584/371472 [2:33:00<29:47:47, 3.17it/s] 9%|▊ | 31585/371472 [2:33:00<30:17:29, 3.12it/s] 9%|▊ | 31586/371472 [2:33:01<28:51:07, 3.27it/s] 9%|▊ | 31587/371472 [2:33:01<28:13:08, 3.35it/s] 9%|▊ | 31588/371472 [2:33:01<27:26:40, 3.44it/s] 9%|▊ | 31589/371472 [2:33:01<26:21:15, 3.58it/s] 9%|▊ | 31590/371472 [2:33:02<25:44:17, 3.67it/s] 9%|▊ | 31591/371472 [2:33:02<25:26:03, 3.71it/s] 9%|▊ | 31592/371472 [2:33:02<26:06:18, 3.62it/s] 9%|▊ | 31593/371472 [2:33:03<27:53:40, 3.38it/s] 9%|▊ | 31594/371472 [2:33:03<29:41:57, 3.18it/s] 9%|▊ | 31595/371472 [2:33:03<29:33:45, 3.19it/s] 9%|▊ | 31596/371472 [2:33:04<31:32:45, 2.99it/s] 9%|▊ | 31597/371472 [2:33:04<29:38:14, 3.19it/s] 9%|▊ | 31598/371472 [2:33:04<27:54:03, 3.38it/s] 9%|▊ | 31599/371472 [2:33:05<28:02:02, 3.37it/s] 9%|▊ | 31600/371472 [2:33:05<27:59:35, 3.37it/s] {'loss': 6.7412, 'learning_rate': 9.238832984981361e-07, 'epoch': 1.36} + 9%|▊ | 31600/371472 [2:33:05<27:59:35, 3.37it/s] 9%|▊ | 31601/371472 [2:33:05<27:11:29, 3.47it/s] 9%|▊ | 31602/371472 [2:33:05<26:23:52, 3.58it/s] 9%|▊ | 31603/371472 [2:33:06<26:10:24, 3.61it/s] 9%|▊ | 31604/371472 [2:33:06<26:04:48, 3.62it/s] 9%|▊ | 31605/371472 [2:33:06<26:28:51, 3.57it/s] 9%|▊ | 31606/371472 [2:33:06<26:03:48, 3.62it/s] 9%|▊ | 31607/371472 [2:33:07<26:07:44, 3.61it/s] 9%|▊ | 31608/371472 [2:33:07<26:42:45, 3.53it/s] 9%|▊ | 31609/371472 [2:33:07<28:08:25, 3.35it/s] 9%|▊ | 31610/371472 [2:33:08<27:14:45, 3.46it/s] 9%|▊ | 31611/371472 [2:33:08<26:04:22, 3.62it/s] 9%|▊ | 31612/371472 [2:33:08<25:27:22, 3.71it/s] 9%|▊ | 31613/371472 [2:33:08<25:18:34, 3.73it/s] 9%|▊ | 31614/371472 [2:33:09<24:53:40, 3.79it/s] 9%|▊ | 31615/371472 [2:33:09<25:32:40, 3.70it/s] 9%|▊ | 31616/371472 [2:33:09<26:31:09, 3.56it/s] 9%|▊ | 31617/371472 [2:33:10<27:23:19, 3.45it/s] 9%|▊ | 31618/371472 [2:33:10<26:15:18, 3.60it/s] 9%|▊ | 31619/371472 [2:33:10<26:29:35, 3.56it/s] 9%|▊ | 31620/371472 [2:33:10<26:18:18, 3.59it/s] {'loss': 6.7147, 'learning_rate': 9.238348165226572e-07, 'epoch': 1.36} + 9%|▊ | 31620/371472 [2:33:10<26:18:18, 3.59it/s] 9%|▊ | 31621/371472 [2:33:11<25:58:14, 3.63it/s] 9%|▊ | 31622/371472 [2:33:11<27:15:58, 3.46it/s] 9%|▊ | 31623/371472 [2:33:11<26:45:42, 3.53it/s] 9%|▊ | 31624/371472 [2:33:12<26:59:05, 3.50it/s] 9%|▊ | 31625/371472 [2:33:12<26:26:49, 3.57it/s] 9%|▊ | 31626/371472 [2:33:12<27:49:32, 3.39it/s] 9%|▊ | 31627/371472 [2:33:12<26:45:54, 3.53it/s] 9%|▊ | 31628/371472 [2:33:13<27:08:59, 3.48it/s] 9%|▊ | 31629/371472 [2:33:13<27:44:59, 3.40it/s] 9%|▊ | 31630/371472 [2:33:13<27:44:37, 3.40it/s] 9%|▊ | 31631/371472 [2:33:14<28:51:07, 3.27it/s] 9%|▊ | 31632/371472 [2:33:14<26:56:23, 3.50it/s] 9%|▊ | 31633/371472 [2:33:14<26:15:14, 3.60it/s] 9%|▊ | 31634/371472 [2:33:15<30:05:49, 3.14it/s] 9%|▊ | 31635/371472 [2:33:15<30:12:20, 3.13it/s] 9%|▊ | 31636/371472 [2:33:15<37:23:05, 2.53it/s] 9%|▊ | 31637/371472 [2:33:16<33:31:05, 2.82it/s] 9%|▊ | 31638/371472 [2:33:16<34:22:58, 2.75it/s] 9%|▊ | 31639/371472 [2:33:16<31:59:12, 2.95it/s] 9%|▊ | 31640/371472 [2:33:17<30:24:49, 3.10it/s] {'loss': 6.4155, 'learning_rate': 9.237863345471783e-07, 'epoch': 1.36} + 9%|▊ | 31640/371472 [2:33:17<30:24:49, 3.10it/s] 9%|▊ | 31641/371472 [2:33:17<28:46:12, 3.28it/s] 9%|▊ | 31642/371472 [2:33:17<28:06:10, 3.36it/s] 9%|▊ | 31643/371472 [2:33:18<35:20:55, 2.67it/s] 9%|▊ | 31644/371472 [2:33:18<32:16:41, 2.92it/s] 9%|▊ | 31645/371472 [2:33:18<30:09:13, 3.13it/s] 9%|▊ | 31646/371472 [2:33:19<29:25:58, 3.21it/s] 9%|▊ | 31647/371472 [2:33:19<27:58:35, 3.37it/s] 9%|▊ | 31648/371472 [2:33:19<26:38:39, 3.54it/s] 9%|▊ | 31649/371472 [2:33:19<27:09:40, 3.48it/s] 9%|▊ | 31650/371472 [2:33:20<27:04:44, 3.49it/s] 9%|▊ | 31651/371472 [2:33:20<26:49:22, 3.52it/s] 9%|▊ | 31652/371472 [2:33:20<26:19:39, 3.59it/s] 9%|▊ | 31653/371472 [2:33:20<25:58:14, 3.63it/s] 9%|▊ | 31654/371472 [2:33:21<26:25:25, 3.57it/s] 9%|▊ | 31655/371472 [2:33:21<26:45:54, 3.53it/s] 9%|▊ | 31656/371472 [2:33:21<26:37:50, 3.54it/s] 9%|▊ | 31657/371472 [2:33:22<25:53:56, 3.64it/s] 9%|▊ | 31658/371472 [2:33:22<26:12:41, 3.60it/s] 9%|▊ | 31659/371472 [2:33:22<25:25:03, 3.71it/s] 9%|▊ | 31660/371472 [2:33:22<25:24:34, 3.71it/s] {'loss': 6.4637, 'learning_rate': 9.237378525716994e-07, 'epoch': 1.36} + 9%|▊ | 31660/371472 [2:33:22<25:24:34, 3.71it/s] 9%|▊ | 31661/371472 [2:33:23<25:36:07, 3.69it/s] 9%|▊ | 31662/371472 [2:33:23<25:32:41, 3.70it/s] 9%|▊ | 31663/371472 [2:33:23<25:16:30, 3.73it/s] 9%|▊ | 31664/371472 [2:33:23<26:25:38, 3.57it/s] 9%|▊ | 31665/371472 [2:33:24<25:56:16, 3.64it/s] 9%|▊ | 31666/371472 [2:33:24<25:25:26, 3.71it/s] 9%|▊ | 31667/371472 [2:33:24<26:05:00, 3.62it/s] 9%|▊ | 31668/371472 [2:33:25<25:52:11, 3.65it/s] 9%|▊ | 31669/371472 [2:33:25<26:08:14, 3.61it/s] 9%|▊ | 31670/371472 [2:33:25<26:13:18, 3.60it/s] 9%|▊ | 31671/371472 [2:33:25<25:31:04, 3.70it/s] 9%|▊ | 31672/371472 [2:33:26<27:09:19, 3.48it/s] 9%|▊ | 31673/371472 [2:33:26<26:59:20, 3.50it/s] 9%|▊ | 31674/371472 [2:33:26<26:45:58, 3.53it/s] 9%|▊ | 31675/371472 [2:33:27<28:40:38, 3.29it/s] 9%|▊ | 31676/371472 [2:33:27<28:13:32, 3.34it/s] 9%|▊ | 31677/371472 [2:33:27<27:17:19, 3.46it/s] 9%|▊ | 31678/371472 [2:33:27<27:59:45, 3.37it/s] 9%|▊ | 31679/371472 [2:33:28<26:57:56, 3.50it/s] 9%|▊ | 31680/371472 [2:33:28<27:17:19, 3.46it/s] {'loss': 6.6824, 'learning_rate': 9.236893705962206e-07, 'epoch': 1.36} + 9%|▊ | 31680/371472 [2:33:28<27:17:19, 3.46it/s] 9%|▊ | 31681/371472 [2:33:28<28:20:43, 3.33it/s] 9%|▊ | 31682/371472 [2:33:29<27:25:44, 3.44it/s] 9%|▊ | 31683/371472 [2:33:29<27:16:32, 3.46it/s] 9%|▊ | 31684/371472 [2:33:29<28:40:03, 3.29it/s] 9%|▊ | 31685/371472 [2:33:30<28:04:09, 3.36it/s] 9%|▊ | 31686/371472 [2:33:30<27:15:25, 3.46it/s] 9%|▊ | 31687/371472 [2:33:30<27:07:27, 3.48it/s] 9%|▊ | 31688/371472 [2:33:30<26:58:42, 3.50it/s] 9%|▊ | 31689/371472 [2:33:31<27:03:00, 3.49it/s] 9%|▊ | 31690/371472 [2:33:31<26:58:57, 3.50it/s] 9%|▊ | 31691/371472 [2:33:31<27:42:11, 3.41it/s] 9%|▊ | 31692/371472 [2:33:32<26:57:17, 3.50it/s] 9%|▊ | 31693/371472 [2:33:32<28:14:04, 3.34it/s] 9%|▊ | 31694/371472 [2:33:32<27:08:32, 3.48it/s] 9%|▊ | 31695/371472 [2:33:32<26:17:15, 3.59it/s] 9%|▊ | 31696/371472 [2:33:33<26:26:09, 3.57it/s] 9%|▊ | 31697/371472 [2:33:33<26:53:28, 3.51it/s] 9%|▊ | 31698/371472 [2:33:33<26:29:33, 3.56it/s] 9%|▊ | 31699/371472 [2:33:34<26:38:04, 3.54it/s] 9%|▊ | 31700/371472 [2:33:34<27:23:19, 3.45it/s] {'loss': 6.4347, 'learning_rate': 9.236408886207416e-07, 'epoch': 1.37} + 9%|▊ | 31700/371472 [2:33:34<27:23:19, 3.45it/s] 9%|▊ | 31701/371472 [2:33:34<26:13:49, 3.60it/s] 9%|▊ | 31702/371472 [2:33:34<25:35:48, 3.69it/s] 9%|▊ | 31703/371472 [2:33:35<25:40:34, 3.68it/s] 9%|▊ | 31704/371472 [2:33:35<26:44:11, 3.53it/s] 9%|▊ | 31705/371472 [2:33:35<25:53:08, 3.65it/s] 9%|▊ | 31706/371472 [2:33:35<25:20:49, 3.72it/s] 9%|▊ | 31707/371472 [2:33:36<24:45:19, 3.81it/s] 9%|▊ | 31708/371472 [2:33:36<25:32:29, 3.70it/s] 9%|▊ | 31709/371472 [2:33:36<25:36:55, 3.68it/s] 9%|▊ | 31710/371472 [2:33:37<28:55:19, 3.26it/s] 9%|▊ | 31711/371472 [2:33:37<27:38:26, 3.41it/s] 9%|▊ | 31712/371472 [2:33:37<26:39:28, 3.54it/s] 9%|▊ | 31713/371472 [2:33:37<27:17:27, 3.46it/s] 9%|▊ | 31714/371472 [2:33:38<27:08:32, 3.48it/s] 9%|▊ | 31715/371472 [2:33:38<27:11:36, 3.47it/s] 9%|▊ | 31716/371472 [2:33:38<26:45:51, 3.53it/s] 9%|▊ | 31717/371472 [2:33:39<27:54:23, 3.38it/s] 9%|▊ | 31718/371472 [2:33:39<26:57:41, 3.50it/s] 9%|▊ | 31719/371472 [2:33:39<26:53:09, 3.51it/s] 9%|▊ | 31720/371472 [2:33:39<26:18:28, 3.59it/s] {'loss': 6.7485, 'learning_rate': 9.235924066452627e-07, 'epoch': 1.37} + 9%|▊ | 31720/371472 [2:33:39<26:18:28, 3.59it/s] 9%|▊ | 31721/371472 [2:33:40<26:13:18, 3.60it/s] 9%|▊ | 31722/371472 [2:33:40<27:44:21, 3.40it/s] 9%|▊ | 31723/371472 [2:33:40<27:23:43, 3.44it/s] 9%|▊ | 31724/371472 [2:33:41<25:58:51, 3.63it/s] 9%|▊ | 31725/371472 [2:33:41<26:44:15, 3.53it/s] 9%|▊ | 31726/371472 [2:33:41<26:48:01, 3.52it/s] 9%|▊ | 31727/371472 [2:33:41<27:22:14, 3.45it/s] 9%|▊ | 31728/371472 [2:33:42<26:57:38, 3.50it/s] 9%|▊ | 31729/371472 [2:33:42<28:39:08, 3.29it/s] 9%|▊ | 31730/371472 [2:33:42<30:12:13, 3.12it/s] 9%|▊ | 31731/371472 [2:33:43<28:55:02, 3.26it/s] 9%|▊ | 31732/371472 [2:33:43<31:35:35, 2.99it/s] 9%|▊ | 31733/371472 [2:33:43<31:24:23, 3.00it/s] 9%|▊ | 31734/371472 [2:33:44<30:10:34, 3.13it/s] 9%|▊ | 31735/371472 [2:33:44<28:59:43, 3.25it/s] 9%|▊ | 31736/371472 [2:33:44<29:24:50, 3.21it/s] 9%|▊ | 31737/371472 [2:33:45<28:27:12, 3.32it/s] 9%|▊ | 31738/371472 [2:33:45<28:20:31, 3.33it/s] 9%|▊ | 31739/371472 [2:33:45<27:40:42, 3.41it/s] 9%|▊ | 31740/371472 [2:33:45<27:03:59, 3.49it/s] {'loss': 6.6971, 'learning_rate': 9.235439246697838e-07, 'epoch': 1.37} + 9%|▊ | 31740/371472 [2:33:45<27:03:59, 3.49it/s] 9%|▊ | 31741/371472 [2:33:46<26:30:50, 3.56it/s] 9%|▊ | 31742/371472 [2:33:46<25:31:43, 3.70it/s] 9%|▊ | 31743/371472 [2:33:46<25:10:37, 3.75it/s] 9%|▊ | 31744/371472 [2:33:46<24:56:58, 3.78it/s] 9%|▊ | 31745/371472 [2:33:47<27:29:37, 3.43it/s] 9%|▊ | 31746/371472 [2:33:47<26:50:21, 3.52it/s] 9%|▊ | 31747/371472 [2:33:47<29:55:43, 3.15it/s] 9%|▊ | 31748/371472 [2:33:48<28:44:56, 3.28it/s] 9%|▊ | 31749/371472 [2:33:48<27:07:40, 3.48it/s] 9%|▊ | 31750/371472 [2:33:48<26:22:02, 3.58it/s] 9%|▊ | 31751/371472 [2:33:49<26:14:13, 3.60it/s] 9%|▊ | 31752/371472 [2:33:49<27:29:33, 3.43it/s] 9%|▊ | 31753/371472 [2:33:49<26:42:27, 3.53it/s] 9%|▊ | 31754/371472 [2:33:49<27:04:31, 3.49it/s] 9%|▊ | 31755/371472 [2:33:50<26:14:04, 3.60it/s] 9%|▊ | 31756/371472 [2:33:50<25:46:53, 3.66it/s] 9%|▊ | 31757/371472 [2:33:50<26:10:26, 3.61it/s] 9%|▊ | 31758/371472 [2:33:50<25:26:46, 3.71it/s] 9%|▊ | 31759/371472 [2:33:51<24:56:46, 3.78it/s] 9%|▊ | 31760/371472 [2:33:51<26:15:39, 3.59it/s] {'loss': 6.9485, 'learning_rate': 9.23495442694305e-07, 'epoch': 1.37} + 9%|▊ | 31760/371472 [2:33:51<26:15:39, 3.59it/s] 9%|▊ | 31761/371472 [2:33:51<25:37:51, 3.68it/s] 9%|▊ | 31762/371472 [2:33:52<25:52:57, 3.65it/s] 9%|▊ | 31763/371472 [2:33:52<25:37:34, 3.68it/s] 9%|▊ | 31764/371472 [2:33:52<26:03:55, 3.62it/s] 9%|▊ | 31765/371472 [2:33:52<26:01:28, 3.63it/s] 9%|▊ | 31766/371472 [2:33:53<26:01:33, 3.63it/s] 9%|▊ | 31767/371472 [2:33:53<26:09:09, 3.61it/s] 9%|▊ | 31768/371472 [2:33:53<27:02:45, 3.49it/s] 9%|▊ | 31769/371472 [2:33:54<25:58:30, 3.63it/s] 9%|▊ | 31770/371472 [2:33:54<27:53:04, 3.38it/s] 9%|▊ | 31771/371472 [2:33:54<26:42:09, 3.53it/s] 9%|▊ | 31772/371472 [2:33:54<26:26:01, 3.57it/s] 9%|▊ | 31773/371472 [2:33:55<25:45:51, 3.66it/s] 9%|▊ | 31774/371472 [2:33:55<25:22:57, 3.72it/s] 9%|▊ | 31775/371472 [2:33:55<25:55:22, 3.64it/s] 9%|▊ | 31776/371472 [2:33:56<27:31:17, 3.43it/s] 9%|▊ | 31777/371472 [2:33:56<26:37:42, 3.54it/s] 9%|▊ | 31778/371472 [2:33:56<25:54:46, 3.64it/s] 9%|▊ | 31779/371472 [2:33:56<26:07:20, 3.61it/s] 9%|▊ | 31780/371472 [2:33:57<25:48:30, 3.66it/s] {'loss': 6.6041, 'learning_rate': 9.234469607188259e-07, 'epoch': 1.37} + 9%|▊ | 31780/371472 [2:33:57<25:48:30, 3.66it/s] 9%|▊ | 31781/371472 [2:33:57<26:56:39, 3.50it/s] 9%|▊ | 31782/371472 [2:33:57<28:26:58, 3.32it/s] 9%|▊ | 31783/371472 [2:33:58<28:38:30, 3.29it/s] 9%|▊ | 31784/371472 [2:33:58<28:45:27, 3.28it/s] 9%|▊ | 31785/371472 [2:33:58<28:00:33, 3.37it/s] 9%|▊ | 31786/371472 [2:33:59<30:02:05, 3.14it/s] 9%|▊ | 31787/371472 [2:33:59<28:05:42, 3.36it/s] 9%|▊ | 31788/371472 [2:33:59<27:20:32, 3.45it/s] 9%|▊ | 31789/371472 [2:33:59<26:49:24, 3.52it/s] 9%|▊ | 31790/371472 [2:34:00<29:19:05, 3.22it/s] 9%|▊ | 31791/371472 [2:34:00<27:51:13, 3.39it/s] 9%|▊ | 31792/371472 [2:34:00<27:00:46, 3.49it/s] 9%|▊ | 31793/371472 [2:34:01<27:59:30, 3.37it/s] 9%|▊ | 31794/371472 [2:34:01<27:39:28, 3.41it/s] 9%|▊ | 31795/371472 [2:34:01<28:10:23, 3.35it/s] 9%|▊ | 31796/371472 [2:34:01<27:53:22, 3.38it/s] 9%|▊ | 31797/371472 [2:34:02<27:53:43, 3.38it/s] 9%|▊ | 31798/371472 [2:34:02<28:58:13, 3.26it/s] 9%|▊ | 31799/371472 [2:34:02<28:34:50, 3.30it/s] 9%|▊ | 31800/371472 [2:34:03<27:55:57, 3.38it/s] {'loss': 6.6304, 'learning_rate': 9.233984787433472e-07, 'epoch': 1.37} + 9%|▊ | 31800/371472 [2:34:03<27:55:57, 3.38it/s] 9%|▊ | 31801/371472 [2:34:03<27:06:18, 3.48it/s] 9%|▊ | 31802/371472 [2:34:03<27:53:45, 3.38it/s] 9%|▊ | 31803/371472 [2:34:03<27:49:29, 3.39it/s] 9%|▊ | 31804/371472 [2:34:04<28:11:00, 3.35it/s] 9%|▊ | 31805/371472 [2:34:04<28:07:38, 3.35it/s] 9%|▊ | 31806/371472 [2:34:04<27:57:54, 3.37it/s] 9%|▊ | 31807/371472 [2:34:05<27:09:45, 3.47it/s] 9%|▊ | 31808/371472 [2:34:05<26:48:04, 3.52it/s] 9%|▊ | 31809/371472 [2:34:05<26:07:16, 3.61it/s] 9%|▊ | 31810/371472 [2:34:05<26:48:47, 3.52it/s] 9%|▊ | 31811/371472 [2:34:06<28:21:00, 3.33it/s] 9%|▊ | 31812/371472 [2:34:06<28:05:47, 3.36it/s] 9%|▊ | 31813/371472 [2:34:06<28:50:07, 3.27it/s] 9%|▊ | 31814/371472 [2:34:07<27:23:58, 3.44it/s] 9%|▊ | 31815/371472 [2:34:07<26:53:28, 3.51it/s] 9%|▊ | 31816/371472 [2:34:07<27:22:11, 3.45it/s] 9%|▊ | 31817/371472 [2:34:08<26:54:21, 3.51it/s] 9%|▊ | 31818/371472 [2:34:08<26:59:42, 3.50it/s] 9%|▊ | 31819/371472 [2:34:08<26:48:44, 3.52it/s] 9%|▊ | 31820/371472 [2:34:08<26:55:04, 3.51it/s] {'loss': 6.5932, 'learning_rate': 9.233499967678682e-07, 'epoch': 1.37} + 9%|▊ | 31820/371472 [2:34:08<26:55:04, 3.51it/s] 9%|▊ | 31821/371472 [2:34:09<26:33:30, 3.55it/s] 9%|▊ | 31822/371472 [2:34:09<26:11:05, 3.60it/s] 9%|▊ | 31823/371472 [2:34:09<26:01:29, 3.63it/s] 9%|▊ | 31824/371472 [2:34:09<25:13:37, 3.74it/s] 9%|▊ | 31825/371472 [2:34:10<26:04:01, 3.62it/s] 9%|▊ | 31826/371472 [2:34:10<25:09:19, 3.75it/s] 9%|▊ | 31827/371472 [2:34:10<25:12:23, 3.74it/s] 9%|▊ | 31828/371472 [2:34:11<25:22:26, 3.72it/s] 9%|▊ | 31829/371472 [2:34:11<25:45:23, 3.66it/s] 9%|▊ | 31830/371472 [2:34:11<27:08:41, 3.48it/s] 9%|▊ | 31831/371472 [2:34:11<27:11:20, 3.47it/s] 9%|▊ | 31832/371472 [2:34:12<27:23:41, 3.44it/s] 9%|▊ | 31833/371472 [2:34:12<27:53:22, 3.38it/s] 9%|▊ | 31834/371472 [2:34:12<26:51:34, 3.51it/s] 9%|▊ | 31835/371472 [2:34:13<26:35:59, 3.55it/s] 9%|▊ | 31836/371472 [2:34:13<27:26:26, 3.44it/s] 9%|▊ | 31837/371472 [2:34:13<27:13:32, 3.47it/s] 9%|▊ | 31838/371472 [2:34:14<29:03:29, 3.25it/s] 9%|▊ | 31839/371472 [2:34:14<28:24:43, 3.32it/s] 9%|▊ | 31840/371472 [2:34:14<29:29:07, 3.20it/s] {'loss': 6.6949, 'learning_rate': 9.233015147923894e-07, 'epoch': 1.37} + 9%|▊ | 31840/371472 [2:34:14<29:29:07, 3.20it/s] 9%|▊ | 31841/371472 [2:34:14<29:00:48, 3.25it/s] 9%|▊ | 31842/371472 [2:34:15<27:53:30, 3.38it/s] 9%|▊ | 31843/371472 [2:34:15<28:04:05, 3.36it/s] 9%|▊ | 31844/371472 [2:34:15<29:19:36, 3.22it/s] 9%|▊ | 31845/371472 [2:34:16<29:22:08, 3.21it/s] 9%|▊ | 31846/371472 [2:34:16<28:02:50, 3.36it/s] 9%|▊ | 31847/371472 [2:34:16<27:33:44, 3.42it/s] 9%|▊ | 31848/371472 [2:34:16<26:57:14, 3.50it/s] 9%|▊ | 31849/371472 [2:34:17<27:45:27, 3.40it/s] 9%|▊ | 31850/371472 [2:34:17<27:54:11, 3.38it/s] 9%|▊ | 31851/371472 [2:34:17<27:38:41, 3.41it/s] 9%|▊ | 31852/371472 [2:34:18<27:33:39, 3.42it/s] 9%|▊ | 31853/371472 [2:34:18<27:48:36, 3.39it/s] 9%|▊ | 31854/371472 [2:34:18<26:49:43, 3.52it/s] 9%|▊ | 31855/371472 [2:34:18<26:02:13, 3.62it/s] 9%|▊ | 31856/371472 [2:34:19<26:48:22, 3.52it/s] 9%|▊ | 31857/371472 [2:34:19<26:39:02, 3.54it/s] 9%|▊ | 31858/371472 [2:34:19<27:05:59, 3.48it/s] 9%|▊ | 31859/371472 [2:34:20<26:37:50, 3.54it/s] 9%|▊ | 31860/371472 [2:34:20<25:55:04, 3.64it/s] {'loss': 6.7267, 'learning_rate': 9.232530328169104e-07, 'epoch': 1.37} + 9%|▊ | 31860/371472 [2:34:20<25:55:04, 3.64it/s] 9%|▊ | 31861/371472 [2:34:20<26:34:59, 3.55it/s] 9%|▊ | 31862/371472 [2:34:20<25:43:53, 3.67it/s] 9%|▊ | 31863/371472 [2:34:21<26:26:29, 3.57it/s] 9%|▊ | 31864/371472 [2:34:21<27:11:34, 3.47it/s] 9%|▊ | 31865/371472 [2:34:21<27:00:47, 3.49it/s] 9%|▊ | 31866/371472 [2:34:22<26:22:01, 3.58it/s] 9%|▊ | 31867/371472 [2:34:22<27:12:24, 3.47it/s] 9%|▊ | 31868/371472 [2:34:22<27:10:16, 3.47it/s] 9%|▊ | 31869/371472 [2:34:22<27:45:21, 3.40it/s] 9%|▊ | 31870/371472 [2:34:23<27:34:22, 3.42it/s] 9%|▊ | 31871/371472 [2:34:23<26:48:52, 3.52it/s] 9%|▊ | 31872/371472 [2:34:23<28:51:56, 3.27it/s] 9%|▊ | 31873/371472 [2:34:24<28:23:09, 3.32it/s] 9%|▊ | 31874/371472 [2:34:24<27:47:14, 3.39it/s] 9%|▊ | 31875/371472 [2:34:24<26:30:00, 3.56it/s] 9%|▊ | 31876/371472 [2:34:24<26:08:30, 3.61it/s] 9%|▊ | 31877/371472 [2:34:25<25:30:04, 3.70it/s] 9%|▊ | 31878/371472 [2:34:25<25:50:15, 3.65it/s] 9%|▊ | 31879/371472 [2:34:25<26:01:42, 3.62it/s] 9%|▊ | 31880/371472 [2:34:26<26:49:30, 3.52it/s] {'loss': 6.6524, 'learning_rate': 9.232045508414316e-07, 'epoch': 1.37} + 9%|▊ | 31880/371472 [2:34:26<26:49:30, 3.52it/s] 9%|▊ | 31881/371472 [2:34:26<26:10:02, 3.60it/s] 9%|▊ | 31882/371472 [2:34:26<25:28:39, 3.70it/s] 9%|▊ | 31883/371472 [2:34:26<27:09:31, 3.47it/s] 9%|▊ | 31884/371472 [2:34:27<26:12:34, 3.60it/s] 9%|▊ | 31885/371472 [2:34:27<25:40:09, 3.67it/s] 9%|▊ | 31886/371472 [2:34:27<25:14:05, 3.74it/s] 9%|▊ | 31887/371472 [2:34:27<25:07:09, 3.76it/s] 9%|▊ | 31888/371472 [2:34:28<25:46:30, 3.66it/s] 9%|▊ | 31889/371472 [2:34:28<27:35:59, 3.42it/s] 9%|▊ | 31890/371472 [2:34:28<26:54:46, 3.50it/s] 9%|▊ | 31891/371472 [2:34:29<26:25:40, 3.57it/s] 9%|▊ | 31892/371472 [2:34:29<26:52:15, 3.51it/s] 9%|▊ | 31893/371472 [2:34:29<26:44:25, 3.53it/s] 9%|▊ | 31894/371472 [2:34:29<25:52:01, 3.65it/s] 9%|▊ | 31895/371472 [2:34:30<25:16:10, 3.73it/s] 9%|▊ | 31896/371472 [2:34:30<25:04:39, 3.76it/s] 9%|▊ | 31897/371472 [2:34:30<25:01:02, 3.77it/s] 9%|▊ | 31898/371472 [2:34:31<25:53:19, 3.64it/s] 9%|▊ | 31899/371472 [2:34:31<25:36:32, 3.68it/s] 9%|▊ | 31900/371472 [2:34:31<27:04:20, 3.48it/s] {'loss': 6.4257, 'learning_rate': 9.231560688659527e-07, 'epoch': 1.37} + 9%|▊ | 31900/371472 [2:34:31<27:04:20, 3.48it/s] 9%|▊ | 31901/371472 [2:34:31<26:42:07, 3.53it/s] 9%|▊ | 31902/371472 [2:34:32<26:07:34, 3.61it/s] 9%|▊ | 31903/371472 [2:34:32<26:55:25, 3.50it/s] 9%|▊ | 31904/371472 [2:34:32<26:36:11, 3.55it/s] 9%|▊ | 31905/371472 [2:34:33<26:00:35, 3.63it/s] 9%|▊ | 31906/371472 [2:34:33<25:09:40, 3.75it/s] 9%|▊ | 31907/371472 [2:34:33<24:51:58, 3.79it/s] 9%|▊ | 31908/371472 [2:34:33<26:34:52, 3.55it/s] 9%|▊ | 31909/371472 [2:34:34<27:42:45, 3.40it/s] 9%|▊ | 31910/371472 [2:34:34<28:59:17, 3.25it/s] 9%|▊ | 31911/371472 [2:34:34<28:50:12, 3.27it/s] 9%|▊ | 31912/371472 [2:34:35<29:14:16, 3.23it/s] 9%|▊ | 31913/371472 [2:34:35<27:57:45, 3.37it/s] 9%|▊ | 31914/371472 [2:34:35<26:55:27, 3.50it/s] 9%|▊ | 31915/371472 [2:34:35<26:02:53, 3.62it/s] 9%|▊ | 31916/371472 [2:34:36<26:45:17, 3.53it/s] 9%|▊ | 31917/371472 [2:34:36<25:30:25, 3.70it/s] 9%|▊ | 31918/371472 [2:34:36<24:58:25, 3.78it/s] 9%|▊ | 31919/371472 [2:34:36<25:16:15, 3.73it/s] 9%|▊ | 31920/371472 [2:34:37<25:21:14, 3.72it/s] {'loss': 6.6167, 'learning_rate': 9.231075868904739e-07, 'epoch': 1.37} + 9%|▊ | 31920/371472 [2:34:37<25:21:14, 3.72it/s] 9%|▊ | 31921/371472 [2:34:37<25:35:58, 3.68it/s] 9%|▊ | 31922/371472 [2:34:37<26:04:08, 3.62it/s] 9%|▊ | 31923/371472 [2:34:38<25:08:49, 3.75it/s] 9%|▊ | 31924/371472 [2:34:38<24:54:01, 3.79it/s] 9%|▊ | 31925/371472 [2:34:38<25:03:08, 3.76it/s] 9%|▊ | 31926/371472 [2:34:38<25:16:03, 3.73it/s] 9%|▊ | 31927/371472 [2:34:39<26:14:28, 3.59it/s] 9%|▊ | 31928/371472 [2:34:39<27:26:49, 3.44it/s] 9%|▊ | 31929/371472 [2:34:39<26:02:20, 3.62it/s] 9%|▊ | 31930/371472 [2:34:39<25:22:04, 3.72it/s] 9%|▊ | 31931/371472 [2:34:40<24:41:02, 3.82it/s] 9%|▊ | 31932/371472 [2:34:40<26:34:44, 3.55it/s] 9%|▊ | 31933/371472 [2:34:40<26:01:07, 3.62it/s] 9%|▊ | 31934/371472 [2:34:41<26:09:35, 3.61it/s] 9%|▊ | 31935/371472 [2:34:41<25:55:04, 3.64it/s] 9%|▊ | 31936/371472 [2:34:41<25:24:45, 3.71it/s] 9%|▊ | 31937/371472 [2:34:41<26:36:48, 3.54it/s] 9%|▊ | 31938/371472 [2:34:42<26:20:38, 3.58it/s] 9%|▊ | 31939/371472 [2:34:42<26:58:04, 3.50it/s] 9%|▊ | 31940/371472 [2:34:42<26:40:15, 3.54it/s] {'loss': 6.3268, 'learning_rate': 9.230591049149949e-07, 'epoch': 1.38} + 9%|▊ | 31940/371472 [2:34:42<26:40:15, 3.54it/s] 9%|▊ | 31941/371472 [2:34:43<25:38:26, 3.68it/s] 9%|▊ | 31942/371472 [2:34:43<25:38:20, 3.68it/s] 9%|▊ | 31943/371472 [2:34:43<25:55:10, 3.64it/s] 9%|▊ | 31944/371472 [2:34:43<26:29:31, 3.56it/s] 9%|▊ | 31945/371472 [2:34:44<26:08:27, 3.61it/s] 9%|▊ | 31946/371472 [2:34:44<26:15:04, 3.59it/s] 9%|▊ | 31947/371472 [2:34:44<25:46:52, 3.66it/s] 9%|▊ | 31948/371472 [2:34:44<25:44:32, 3.66it/s] 9%|▊ | 31949/371472 [2:34:45<27:26:39, 3.44it/s] 9%|▊ | 31950/371472 [2:34:45<27:08:50, 3.47it/s] 9%|▊ | 31951/371472 [2:34:45<27:50:50, 3.39it/s] 9%|▊ | 31952/371472 [2:34:46<27:23:17, 3.44it/s] 9%|▊ | 31953/371472 [2:34:46<27:05:45, 3.48it/s] 9%|▊ | 31954/371472 [2:34:46<25:56:48, 3.63it/s] 9%|▊ | 31955/371472 [2:34:46<25:45:29, 3.66it/s] 9%|▊ | 31956/371472 [2:34:47<26:34:20, 3.55it/s] 9%|▊ | 31957/371472 [2:34:47<27:16:18, 3.46it/s] 9%|▊ | 31958/371472 [2:34:47<27:09:36, 3.47it/s] 9%|▊ | 31959/371472 [2:34:48<27:05:00, 3.48it/s] 9%|▊ | 31960/371472 [2:34:48<28:01:12, 3.37it/s] {'loss': 6.8016, 'learning_rate': 9.230106229395159e-07, 'epoch': 1.38} + 9%|▊ | 31960/371472 [2:34:48<28:01:12, 3.37it/s] 9%|▊ | 31961/371472 [2:34:48<27:38:14, 3.41it/s] 9%|▊ | 31962/371472 [2:34:49<27:07:09, 3.48it/s] 9%|▊ | 31963/371472 [2:34:49<26:29:41, 3.56it/s] 9%|▊ | 31964/371472 [2:34:49<26:39:56, 3.54it/s] 9%|▊ | 31965/371472 [2:34:49<27:02:09, 3.49it/s] 9%|▊ | 31966/371472 [2:34:50<26:24:35, 3.57it/s] 9%|▊ | 31967/371472 [2:34:50<27:25:36, 3.44it/s] 9%|▊ | 31968/371472 [2:34:50<26:37:09, 3.54it/s] 9%|▊ | 31969/371472 [2:34:51<27:50:34, 3.39it/s] 9%|▊ | 31970/371472 [2:34:51<27:22:49, 3.44it/s] 9%|▊ | 31971/371472 [2:34:51<29:13:39, 3.23it/s] 9%|▊ | 31972/371472 [2:34:51<28:03:58, 3.36it/s] 9%|▊ | 31973/371472 [2:34:52<27:50:04, 3.39it/s] 9%|▊ | 31974/371472 [2:34:52<27:04:01, 3.48it/s] 9%|▊ | 31975/371472 [2:34:52<26:47:08, 3.52it/s] 9%|▊ | 31976/371472 [2:34:53<26:30:29, 3.56it/s] 9%|▊ | 31977/371472 [2:34:53<28:10:23, 3.35it/s] 9%|▊ | 31978/371472 [2:34:53<28:02:44, 3.36it/s] 9%|▊ | 31979/371472 [2:34:53<27:59:35, 3.37it/s] 9%|▊ | 31980/371472 [2:34:54<26:49:50, 3.51it/s] {'loss': 6.6805, 'learning_rate': 9.229621409640371e-07, 'epoch': 1.38} + 9%|▊ | 31980/371472 [2:34:54<26:49:50, 3.51it/s] 9%|▊ | 31981/371472 [2:34:54<26:59:40, 3.49it/s] 9%|▊ | 31982/371472 [2:34:54<26:31:49, 3.55it/s] 9%|▊ | 31983/371472 [2:34:55<28:58:58, 3.25it/s] 9%|▊ | 31984/371472 [2:34:55<27:58:35, 3.37it/s] 9%|▊ | 31985/371472 [2:34:55<27:53:32, 3.38it/s] 9%|▊ | 31986/371472 [2:34:56<27:44:20, 3.40it/s] 9%|▊ | 31987/371472 [2:34:56<28:03:17, 3.36it/s] 9%|▊ | 31988/371472 [2:34:56<26:49:03, 3.52it/s] 9%|▊ | 31989/371472 [2:34:56<26:25:02, 3.57it/s] 9%|▊ | 31990/371472 [2:34:57<25:51:14, 3.65it/s] 9%|▊ | 31991/371472 [2:34:57<25:00:14, 3.77it/s] 9%|▊ | 31992/371472 [2:34:57<25:28:05, 3.70it/s] 9%|▊ | 31993/371472 [2:34:57<25:53:43, 3.64it/s] 9%|▊ | 31994/371472 [2:34:58<26:58:14, 3.50it/s] 9%|▊ | 31995/371472 [2:34:58<27:42:28, 3.40it/s] 9%|▊ | 31996/371472 [2:34:58<27:01:13, 3.49it/s] 9%|▊ | 31997/371472 [2:34:59<26:52:05, 3.51it/s] 9%|▊ | 31998/371472 [2:34:59<28:37:58, 3.29it/s] 9%|▊ | 31999/371472 [2:34:59<27:56:38, 3.37it/s] 9%|▊ | 32000/371472 [2:35:00<27:39:54, 3.41it/s] {'loss': 6.501, 'learning_rate': 9.229136589885583e-07, 'epoch': 1.38} + 9%|▊ | 32000/371472 [2:35:00<27:39:54, 3.41it/s] 9%|▊ | 32001/371472 [2:35:00<27:01:34, 3.49it/s] 9%|▊ | 32002/371472 [2:35:00<26:40:25, 3.54it/s] 9%|▊ | 32003/371472 [2:35:00<26:26:37, 3.57it/s] 9%|▊ | 32004/371472 [2:35:01<25:22:17, 3.72it/s] 9%|▊ | 32005/371472 [2:35:01<26:17:08, 3.59it/s] 9%|▊ | 32006/371472 [2:35:01<25:52:47, 3.64it/s] 9%|▊ | 32007/371472 [2:35:01<25:18:20, 3.73it/s] 9%|▊ | 32008/371472 [2:35:02<25:33:14, 3.69it/s] 9%|▊ | 32009/371472 [2:35:02<25:50:17, 3.65it/s] 9%|▊ | 32010/371472 [2:35:02<25:32:05, 3.69it/s] 9%|▊ | 32011/371472 [2:35:02<25:52:28, 3.64it/s] 9%|▊ | 32012/371472 [2:35:03<28:54:59, 3.26it/s] 9%|▊ | 32013/371472 [2:35:03<27:11:09, 3.47it/s] 9%|▊ | 32014/371472 [2:35:03<28:30:40, 3.31it/s] 9%|▊ | 32015/371472 [2:35:04<29:52:34, 3.16it/s] 9%|▊ | 32016/371472 [2:35:04<28:52:20, 3.27it/s] 9%|▊ | 32017/371472 [2:35:04<28:03:30, 3.36it/s] 9%|▊ | 32018/371472 [2:35:05<26:52:36, 3.51it/s] 9%|▊ | 32019/371472 [2:35:05<28:14:33, 3.34it/s] 9%|▊ | 32020/371472 [2:35:05<28:51:15, 3.27it/s] {'loss': 6.529, 'learning_rate': 9.228651770130793e-07, 'epoch': 1.38} + 9%|▊ | 32020/371472 [2:35:05<28:51:15, 3.27it/s] 9%|▊ | 32021/371472 [2:35:06<28:39:22, 3.29it/s] 9%|▊ | 32022/371472 [2:35:06<27:41:05, 3.41it/s] 9%|▊ | 32023/371472 [2:35:06<28:17:36, 3.33it/s] 9%|▊ | 32024/371472 [2:35:06<27:59:03, 3.37it/s] 9%|▊ | 32025/371472 [2:35:07<26:52:54, 3.51it/s] 9%|▊ | 32026/371472 [2:35:07<27:04:55, 3.48it/s] 9%|▊ | 32027/371472 [2:35:07<26:48:47, 3.52it/s] 9%|▊ | 32028/371472 [2:35:08<26:40:56, 3.53it/s] 9%|▊ | 32029/371472 [2:35:08<27:18:51, 3.45it/s] 9%|▊ | 32030/371472 [2:35:08<26:53:50, 3.51it/s] 9%|▊ | 32031/371472 [2:35:08<28:37:38, 3.29it/s] 9%|▊ | 32032/371472 [2:35:09<28:16:35, 3.33it/s] 9%|▊ | 32033/371472 [2:35:09<28:08:27, 3.35it/s] 9%|▊ | 32034/371472 [2:35:09<27:41:55, 3.40it/s] 9%|▊ | 32035/371472 [2:35:10<28:06:05, 3.36it/s] 9%|▊ | 32036/371472 [2:35:10<28:42:45, 3.28it/s] 9%|▊ | 32037/371472 [2:35:10<27:52:14, 3.38it/s] 9%|▊ | 32038/371472 [2:35:11<28:55:14, 3.26it/s] 9%|▊ | 32039/371472 [2:35:11<30:35:30, 3.08it/s] 9%|▊ | 32040/371472 [2:35:11<29:13:47, 3.23it/s] {'loss': 6.2686, 'learning_rate': 9.228166950376004e-07, 'epoch': 1.38} + 9%|▊ | 32040/371472 [2:35:11<29:13:47, 3.23it/s] 9%|▊ | 32041/371472 [2:35:12<29:02:17, 3.25it/s] 9%|▊ | 32042/371472 [2:35:12<28:30:03, 3.31it/s] 9%|▊ | 32043/371472 [2:35:12<28:32:50, 3.30it/s] 9%|▊ | 32044/371472 [2:35:12<30:08:10, 3.13it/s] 9%|▊ | 32045/371472 [2:35:13<29:00:45, 3.25it/s] 9%|▊ | 32046/371472 [2:35:13<27:37:07, 3.41it/s] 9%|▊ | 32047/371472 [2:35:13<27:17:23, 3.45it/s] 9%|▊ | 32048/371472 [2:35:14<26:16:06, 3.59it/s] 9%|▊ | 32049/371472 [2:35:14<26:39:30, 3.54it/s] 9%|▊ | 32050/371472 [2:35:14<29:42:41, 3.17it/s] 9%|▊ | 32051/371472 [2:35:15<28:20:03, 3.33it/s] 9%|▊ | 32052/371472 [2:35:15<29:01:46, 3.25it/s] 9%|▊ | 32053/371472 [2:35:15<29:20:51, 3.21it/s] 9%|▊ | 32054/371472 [2:35:15<28:33:58, 3.30it/s] 9%|▊ | 32055/371472 [2:35:16<27:41:54, 3.40it/s] 9%|▊ | 32056/371472 [2:35:16<28:29:58, 3.31it/s] 9%|▊ | 32057/371472 [2:35:16<27:32:58, 3.42it/s] 9%|▊ | 32058/371472 [2:35:17<27:13:14, 3.46it/s] 9%|▊ | 32059/371472 [2:35:17<27:00:58, 3.49it/s] 9%|▊ | 32060/371472 [2:35:17<26:25:56, 3.57it/s] {'loss': 6.5076, 'learning_rate': 9.227682130621216e-07, 'epoch': 1.38} + 9%|▊ | 32060/371472 [2:35:17<26:25:56, 3.57it/s] 9%|▊ | 32061/371472 [2:35:17<26:50:57, 3.51it/s] 9%|▊ | 32062/371472 [2:35:18<26:03:46, 3.62it/s] 9%|▊ | 32063/371472 [2:35:18<25:07:40, 3.75it/s] 9%|▊ | 32064/371472 [2:35:18<25:10:06, 3.75it/s] 9%|▊ | 32065/371472 [2:35:19<27:11:00, 3.47it/s] 9%|▊ | 32066/371472 [2:35:19<27:46:15, 3.39it/s] 9%|▊ | 32067/371472 [2:35:19<27:09:53, 3.47it/s] 9%|▊ | 32068/371472 [2:35:19<26:48:59, 3.52it/s] 9%|▊ | 32069/371472 [2:35:20<28:02:56, 3.36it/s] 9%|▊ | 32070/371472 [2:35:20<26:51:13, 3.51it/s] 9%|▊ | 32071/371472 [2:35:20<26:01:24, 3.62it/s] 9%|▊ | 32072/371472 [2:35:20<25:16:31, 3.73it/s] 9%|▊ | 32073/371472 [2:35:21<25:06:17, 3.76it/s] 9%|▊ | 32074/371472 [2:35:21<25:01:18, 3.77it/s] 9%|▊ | 32075/371472 [2:35:21<26:23:30, 3.57it/s] 9%|▊ | 32076/371472 [2:35:22<26:13:51, 3.59it/s] 9%|▊ | 32077/371472 [2:35:22<26:15:07, 3.59it/s] 9%|▊ | 32078/371472 [2:35:22<26:01:46, 3.62it/s] 9%|▊ | 32079/371472 [2:35:22<26:12:19, 3.60it/s] 9%|▊ | 32080/371472 [2:35:23<27:18:56, 3.45it/s] {'loss': 6.3802, 'learning_rate': 9.227197310866425e-07, 'epoch': 1.38} + 9%|▊ | 32080/371472 [2:35:23<27:18:56, 3.45it/s] 9%|▊ | 32081/371472 [2:35:23<27:06:44, 3.48it/s] 9%|▊ | 32082/371472 [2:35:23<27:11:03, 3.47it/s] 9%|▊ | 32083/371472 [2:35:24<26:51:47, 3.51it/s] 9%|▊ | 32084/371472 [2:35:24<28:41:59, 3.28it/s] 9%|▊ | 32085/371472 [2:35:24<27:49:19, 3.39it/s] 9%|▊ | 32086/371472 [2:35:25<29:37:27, 3.18it/s] 9%|▊ | 32087/371472 [2:35:25<29:41:08, 3.18it/s] 9%|▊ | 32088/371472 [2:35:25<28:26:48, 3.31it/s] 9%|▊ | 32089/371472 [2:35:26<30:29:01, 3.09it/s] 9%|▊ | 32090/371472 [2:35:26<29:55:27, 3.15it/s] 9%|▊ | 32091/371472 [2:35:26<28:29:02, 3.31it/s] 9%|▊ | 32092/371472 [2:35:26<27:44:40, 3.40it/s] 9%|▊ | 32093/371472 [2:35:27<26:51:19, 3.51it/s] 9%|▊ | 32094/371472 [2:35:27<26:39:36, 3.54it/s] 9%|▊ | 32095/371472 [2:35:27<26:32:57, 3.55it/s] 9%|▊ | 32096/371472 [2:35:27<25:55:26, 3.64it/s] 9%|▊ | 32097/371472 [2:35:28<25:50:58, 3.65it/s] 9%|▊ | 32098/371472 [2:35:28<26:51:02, 3.51it/s] 9%|▊ | 32099/371472 [2:35:28<27:14:29, 3.46it/s] 9%|▊ | 32100/371472 [2:35:29<27:46:07, 3.39it/s] {'loss': 6.35, 'learning_rate': 9.226712491111637e-07, 'epoch': 1.38} + 9%|▊ | 32100/371472 [2:35:29<27:46:07, 3.39it/s] 9%|▊ | 32101/371472 [2:35:29<27:08:51, 3.47it/s] 9%|▊ | 32102/371472 [2:35:29<27:30:34, 3.43it/s] 9%|▊ | 32103/371472 [2:35:30<28:21:04, 3.33it/s] 9%|▊ | 32104/371472 [2:35:30<27:30:19, 3.43it/s] 9%|▊ | 32105/371472 [2:35:30<26:56:09, 3.50it/s] 9%|▊ | 32106/371472 [2:35:30<27:36:57, 3.41it/s] 9%|▊ | 32107/371472 [2:35:31<27:11:51, 3.47it/s] 9%|▊ | 32108/371472 [2:35:31<28:01:34, 3.36it/s] 9%|▊ | 32109/371472 [2:35:31<27:49:16, 3.39it/s] 9%|▊ | 32110/371472 [2:35:32<28:58:34, 3.25it/s] 9%|▊ | 32111/371472 [2:35:32<27:52:57, 3.38it/s] 9%|▊ | 32112/371472 [2:35:32<28:40:36, 3.29it/s] 9%|▊ | 32113/371472 [2:35:32<26:52:30, 3.51it/s] 9%|▊ | 32114/371472 [2:35:33<25:39:39, 3.67it/s] 9%|▊ | 32115/371472 [2:35:33<25:09:15, 3.75it/s] 9%|▊ | 32116/371472 [2:35:33<26:15:25, 3.59it/s] 9%|▊ | 32117/371472 [2:35:34<26:47:19, 3.52it/s] 9%|▊ | 32118/371472 [2:35:34<26:22:10, 3.57it/s] 9%|▊ | 32119/371472 [2:35:34<25:59:16, 3.63it/s] 9%|▊ | 32120/371472 [2:35:34<25:45:41, 3.66it/s] {'loss': 6.6141, 'learning_rate': 9.226227671356848e-07, 'epoch': 1.38} + 9%|▊ | 32120/371472 [2:35:34<25:45:41, 3.66it/s] 9%|▊ | 32121/371472 [2:35:35<25:07:06, 3.75it/s] 9%|▊ | 32122/371472 [2:35:35<29:10:48, 3.23it/s] 9%|▊ | 32123/371472 [2:35:35<28:42:38, 3.28it/s] 9%|▊ | 32124/371472 [2:35:36<27:31:39, 3.42it/s] 9%|▊ | 32125/371472 [2:35:36<26:50:37, 3.51it/s] 9%|▊ | 32126/371472 [2:35:36<27:22:30, 3.44it/s] 9%|▊ | 32127/371472 [2:35:36<27:31:21, 3.42it/s] 9%|▊ | 32128/371472 [2:35:37<29:36:39, 3.18it/s] 9%|▊ | 32129/371472 [2:35:37<28:55:28, 3.26it/s] 9%|▊ | 32130/371472 [2:35:37<28:11:05, 3.34it/s] 9%|▊ | 32131/371472 [2:35:38<27:30:51, 3.43it/s] 9%|▊ | 32132/371472 [2:35:38<26:14:35, 3.59it/s] 9%|▊ | 32133/371472 [2:35:38<29:12:20, 3.23it/s] 9%|▊ | 32134/371472 [2:35:39<28:39:28, 3.29it/s] 9%|▊ | 32135/371472 [2:35:39<28:51:08, 3.27it/s] 9%|▊ | 32136/371472 [2:35:39<29:09:24, 3.23it/s] 9%|▊ | 32137/371472 [2:35:39<28:35:43, 3.30it/s] 9%|▊ | 32138/371472 [2:35:40<27:53:58, 3.38it/s] 9%|▊ | 32139/371472 [2:35:40<27:05:56, 3.48it/s] 9%|▊ | 32140/371472 [2:35:40<27:38:56, 3.41it/s] {'loss': 6.3192, 'learning_rate': 9.22574285160206e-07, 'epoch': 1.38} + 9%|▊ | 32140/371472 [2:35:40<27:38:56, 3.41it/s] 9%|▊ | 32141/371472 [2:35:41<27:14:46, 3.46it/s] 9%|▊ | 32142/371472 [2:35:41<26:34:02, 3.55it/s] 9%|▊ | 32143/371472 [2:35:41<26:41:53, 3.53it/s] 9%|▊ | 32144/371472 [2:35:41<26:38:08, 3.54it/s] 9%|▊ | 32145/371472 [2:35:42<26:21:25, 3.58it/s] 9%|▊ | 32146/371472 [2:35:42<25:49:55, 3.65it/s] 9%|▊ | 32147/371472 [2:35:42<26:23:13, 3.57it/s] 9%|▊ | 32148/371472 [2:35:43<26:34:57, 3.55it/s] 9%|▊ | 32149/371472 [2:35:43<27:22:22, 3.44it/s] 9%|▊ | 32150/371472 [2:35:43<26:45:51, 3.52it/s] 9%|▊ | 32151/371472 [2:35:43<27:26:03, 3.44it/s] 9%|▊ | 32152/371472 [2:35:44<26:39:46, 3.54it/s] 9%|▊ | 32153/371472 [2:35:44<26:23:24, 3.57it/s] 9%|▊ | 32154/371472 [2:35:44<27:39:43, 3.41it/s] 9%|▊ | 32155/371472 [2:35:45<26:58:29, 3.49it/s] 9%|▊ | 32156/371472 [2:35:45<27:02:19, 3.49it/s] 9%|▊ | 32157/371472 [2:35:45<27:34:15, 3.42it/s] 9%|▊ | 32158/371472 [2:35:45<27:08:17, 3.47it/s] 9%|▊ | 32159/371472 [2:35:46<26:36:18, 3.54it/s] 9%|▊ | 32160/371472 [2:35:46<25:40:50, 3.67it/s] {'loss': 6.6553, 'learning_rate': 9.22525803184727e-07, 'epoch': 1.39} + 9%|▊ | 32160/371472 [2:35:46<25:40:50, 3.67it/s] 9%|▊ | 32161/371472 [2:35:46<26:47:26, 3.52it/s] 9%|▊ | 32162/371472 [2:35:47<26:16:13, 3.59it/s] 9%|▊ | 32163/371472 [2:35:47<26:35:07, 3.55it/s] 9%|▊ | 32164/371472 [2:35:47<27:20:55, 3.45it/s] 9%|▊ | 32165/371472 [2:35:47<26:09:14, 3.60it/s] 9%|▊ | 32166/371472 [2:35:48<26:52:02, 3.51it/s] 9%|▊ | 32167/371472 [2:35:48<26:43:12, 3.53it/s] 9%|▊ | 32168/371472 [2:35:48<28:06:59, 3.35it/s] 9%|▊ | 32169/371472 [2:35:49<27:53:20, 3.38it/s] 9%|▊ | 32170/371472 [2:35:49<27:14:13, 3.46it/s] 9%|▊ | 32171/371472 [2:35:49<26:53:27, 3.50it/s] 9%|▊ | 32172/371472 [2:35:49<26:36:05, 3.54it/s] 9%|▊ | 32173/371472 [2:35:50<26:05:48, 3.61it/s] 9%|▊ | 32174/371472 [2:35:50<27:33:48, 3.42it/s] 9%|▊ | 32175/371472 [2:35:50<31:15:30, 3.02it/s] 9%|▊ | 32176/371472 [2:35:51<30:31:04, 3.09it/s] 9%|▊ | 32177/371472 [2:35:51<32:43:29, 2.88it/s] 9%|▊ | 32178/371472 [2:35:51<30:33:25, 3.08it/s] 9%|▊ | 32179/371472 [2:35:52<28:39:26, 3.29it/s] 9%|▊ | 32180/371472 [2:35:52<28:07:28, 3.35it/s] {'loss': 6.4769, 'learning_rate': 9.224773212092482e-07, 'epoch': 1.39} + 9%|▊ | 32180/371472 [2:35:52<28:07:28, 3.35it/s] 9%|▊ | 32181/371472 [2:35:52<27:10:59, 3.47it/s] 9%|▊ | 32182/371472 [2:35:53<29:29:54, 3.19it/s] 9%|▊ | 32183/371472 [2:35:53<29:36:25, 3.18it/s] 9%|▊ | 32184/371472 [2:35:53<29:15:01, 3.22it/s] 9%|▊ | 32185/371472 [2:35:53<28:23:45, 3.32it/s] 9%|▊ | 32186/371472 [2:35:54<28:21:34, 3.32it/s] 9%|▊ | 32187/371472 [2:35:54<27:01:15, 3.49it/s] 9%|▊ | 32188/371472 [2:35:54<26:50:41, 3.51it/s] 9%|▊ | 32189/371472 [2:35:55<25:48:24, 3.65it/s] 9%|▊ | 32190/371472 [2:35:55<26:11:09, 3.60it/s] 9%|▊ | 32191/371472 [2:35:55<26:31:29, 3.55it/s] 9%|▊ | 32192/371472 [2:35:55<26:42:10, 3.53it/s] 9%|▊ | 32193/371472 [2:35:56<27:01:39, 3.49it/s] 9%|▊ | 32194/371472 [2:35:56<26:22:19, 3.57it/s] 9%|▊ | 32195/371472 [2:35:56<26:52:03, 3.51it/s] 9%|▊ | 32196/371472 [2:35:57<26:40:09, 3.53it/s] 9%|▊ | 32197/371472 [2:35:57<26:18:36, 3.58it/s] 9%|▊ | 32198/371472 [2:35:57<25:53:38, 3.64it/s] 9%|▊ | 32199/371472 [2:35:57<25:48:10, 3.65it/s] 9%|▊ | 32200/371472 [2:35:58<25:16:21, 3.73it/s] {'loss': 6.4383, 'learning_rate': 9.224288392337692e-07, 'epoch': 1.39} + 9%|▊ | 32200/371472 [2:35:58<25:16:21, 3.73it/s] 9%|▊ | 32201/371472 [2:35:58<26:00:22, 3.62it/s] 9%|▊ | 32202/371472 [2:35:58<26:40:40, 3.53it/s] 9%|▊ | 32203/371472 [2:35:59<26:57:35, 3.50it/s] 9%|▊ | 32204/371472 [2:35:59<27:07:30, 3.47it/s] 9%|▊ | 32205/371472 [2:35:59<26:43:58, 3.53it/s] 9%|▊ | 32206/371472 [2:35:59<26:37:40, 3.54it/s] 9%|▊ | 32207/371472 [2:36:00<25:39:19, 3.67it/s] 9%|▊ | 32208/371472 [2:36:00<25:32:58, 3.69it/s] 9%|▊ | 32209/371472 [2:36:00<28:22:14, 3.32it/s] 9%|▊ | 32210/371472 [2:36:01<27:51:24, 3.38it/s] 9%|▊ | 32211/371472 [2:36:01<28:02:07, 3.36it/s] 9%|▊ | 32212/371472 [2:36:01<27:17:02, 3.45it/s] 9%|▊ | 32213/371472 [2:36:01<26:12:29, 3.60it/s] 9%|▊ | 32214/371472 [2:36:02<25:50:37, 3.65it/s] 9%|▊ | 32215/371472 [2:36:02<25:28:52, 3.70it/s] 9%|▊ | 32216/371472 [2:36:02<26:39:47, 3.53it/s] 9%|▊ | 32217/371472 [2:36:02<26:47:43, 3.52it/s] 9%|▊ | 32218/371472 [2:36:03<27:09:12, 3.47it/s] 9%|▊ | 32219/371472 [2:36:03<26:49:52, 3.51it/s] 9%|▊ | 32220/371472 [2:36:03<26:59:27, 3.49it/s] {'loss': 6.4079, 'learning_rate': 9.223803572582904e-07, 'epoch': 1.39} + 9%|▊ | 32220/371472 [2:36:03<26:59:27, 3.49it/s] 9%|▊ | 32221/371472 [2:36:04<26:33:15, 3.55it/s] 9%|▊ | 32222/371472 [2:36:04<25:46:30, 3.66it/s] 9%|▊ | 32223/371472 [2:36:04<25:47:51, 3.65it/s] 9%|▊ | 32224/371472 [2:36:04<25:25:26, 3.71it/s] 9%|▊ | 32225/371472 [2:36:05<26:03:26, 3.62it/s] 9%|▊ | 32226/371472 [2:36:05<27:47:28, 3.39it/s] 9%|▊ | 32227/371472 [2:36:05<27:33:23, 3.42it/s] 9%|▊ | 32228/371472 [2:36:06<26:42:36, 3.53it/s] 9%|▊ | 32229/371472 [2:36:06<27:04:00, 3.48it/s] 9%|▊ | 32230/371472 [2:36:06<26:04:55, 3.61it/s] 9%|▊ | 32231/371472 [2:36:06<27:01:31, 3.49it/s] 9%|▊ | 32232/371472 [2:36:07<25:52:30, 3.64it/s] 9%|▊ | 32233/371472 [2:36:07<25:47:47, 3.65it/s] 9%|▊ | 32234/371472 [2:36:07<26:21:04, 3.58it/s] 9%|▊ | 32235/371472 [2:36:08<26:23:45, 3.57it/s] 9%|▊ | 32236/371472 [2:36:08<26:08:14, 3.61it/s] 9%|▊ | 32237/371472 [2:36:08<27:08:39, 3.47it/s] 9%|▊ | 32238/371472 [2:36:08<26:32:33, 3.55it/s] 9%|▊ | 32239/371472 [2:36:09<25:43:42, 3.66it/s] 9%|▊ | 32240/371472 [2:36:09<27:07:52, 3.47it/s] {'loss': 6.6363, 'learning_rate': 9.223318752828114e-07, 'epoch': 1.39} + 9%|▊ | 32240/371472 [2:36:09<27:07:52, 3.47it/s] 9%|▊ | 32241/371472 [2:36:09<26:47:39, 3.52it/s] 9%|▊ | 32242/371472 [2:36:10<27:04:11, 3.48it/s] 9%|▊ | 32243/371472 [2:36:10<27:39:03, 3.41it/s] 9%|▊ | 32244/371472 [2:36:10<29:37:10, 3.18it/s] 9%|▊ | 32245/371472 [2:36:11<30:05:51, 3.13it/s] 9%|▊ | 32246/371472 [2:36:11<28:32:45, 3.30it/s] 9%|▊ | 32247/371472 [2:36:11<27:13:59, 3.46it/s] 9%|▊ | 32248/371472 [2:36:11<27:51:08, 3.38it/s] 9%|▊ | 32249/371472 [2:36:12<26:48:16, 3.52it/s] 9%|▊ | 32250/371472 [2:36:12<25:48:36, 3.65it/s] 9%|▊ | 32251/371472 [2:36:12<27:02:41, 3.48it/s] 9%|▊ | 32252/371472 [2:36:12<26:03:54, 3.62it/s] 9%|▊ | 32253/371472 [2:36:13<26:06:54, 3.61it/s] 9%|▊ | 32254/371472 [2:36:13<26:33:35, 3.55it/s] 9%|▊ | 32255/371472 [2:36:13<28:46:14, 3.28it/s] 9%|▊ | 32256/371472 [2:36:14<29:53:13, 3.15it/s] 9%|▊ | 32257/371472 [2:36:14<28:31:01, 3.30it/s] 9%|▊ | 32258/371472 [2:36:14<28:18:53, 3.33it/s] 9%|▊ | 32259/371472 [2:36:15<27:52:40, 3.38it/s] 9%|▊ | 32260/371472 [2:36:15<29:57:19, 3.15it/s] {'loss': 6.5362, 'learning_rate': 9.222833933073326e-07, 'epoch': 1.39} + 9%|▊ | 32260/371472 [2:36:15<29:57:19, 3.15it/s] 9%|▊ | 32261/371472 [2:36:15<29:51:33, 3.16it/s] 9%|▊ | 32262/371472 [2:36:16<28:15:01, 3.34it/s] 9%|▊ | 32263/371472 [2:36:16<29:24:32, 3.20it/s] 9%|▊ | 32264/371472 [2:36:16<28:40:27, 3.29it/s] 9%|▊ | 32265/371472 [2:36:16<28:12:04, 3.34it/s] 9%|▊ | 32266/371472 [2:36:17<27:43:50, 3.40it/s] 9%|▊ | 32267/371472 [2:36:17<27:19:01, 3.45it/s] 9%|▊ | 32268/371472 [2:36:17<26:51:04, 3.51it/s] 9%|▊ | 32269/371472 [2:36:18<26:49:08, 3.51it/s] 9%|▊ | 32270/371472 [2:36:18<26:30:29, 3.55it/s] 9%|▊ | 32271/371472 [2:36:18<25:57:47, 3.63it/s] 9%|▊ | 32272/371472 [2:36:18<25:13:00, 3.74it/s] 9%|▊ | 32273/371472 [2:36:19<25:08:40, 3.75it/s] 9%|▊ | 32274/371472 [2:36:19<25:40:07, 3.67it/s] 9%|▊ | 32275/371472 [2:36:19<25:06:10, 3.75it/s] 9%|▊ | 32276/371472 [2:36:19<27:29:51, 3.43it/s] 9%|▊ | 32277/371472 [2:36:20<29:13:13, 3.22it/s] 9%|▊ | 32278/371472 [2:36:20<28:06:31, 3.35it/s] 9%|▊ | 32279/371472 [2:36:20<27:12:58, 3.46it/s] 9%|▊ | 32280/371472 [2:36:21<26:17:15, 3.58it/s] {'loss': 6.6245, 'learning_rate': 9.222349113318537e-07, 'epoch': 1.39} + 9%|▊ | 32280/371472 [2:36:21<26:17:15, 3.58it/s] 9%|▊ | 32281/371472 [2:36:21<27:39:38, 3.41it/s] 9%|▊ | 32282/371472 [2:36:21<27:30:20, 3.43it/s] 9%|▊ | 32283/371472 [2:36:22<27:29:20, 3.43it/s] 9%|▊ | 32284/371472 [2:36:22<28:48:32, 3.27it/s] 9%|▊ | 32285/371472 [2:36:22<27:47:40, 3.39it/s] 9%|▊ | 32286/371472 [2:36:22<27:18:35, 3.45it/s] 9%|▊ | 32287/371472 [2:36:23<26:46:16, 3.52it/s] 9%|▊ | 32288/371472 [2:36:23<26:07:37, 3.61it/s] 9%|▊ | 32289/371472 [2:36:23<28:19:31, 3.33it/s] 9%|▊ | 32290/371472 [2:36:24<27:48:34, 3.39it/s] 9%|▊ | 32291/371472 [2:36:24<28:48:48, 3.27it/s] 9%|▊ | 32292/371472 [2:36:24<28:30:51, 3.30it/s] 9%|▊ | 32293/371472 [2:36:24<26:58:41, 3.49it/s] 9%|▊ | 32294/371472 [2:36:25<26:03:43, 3.62it/s] 9%|▊ | 32295/371472 [2:36:25<25:31:17, 3.69it/s] 9%|▊ | 32296/371472 [2:36:25<25:22:28, 3.71it/s] 9%|▊ | 32297/371472 [2:36:26<26:09:01, 3.60it/s] 9%|▊ | 32298/371472 [2:36:26<25:50:24, 3.65it/s] 9%|▊ | 32299/371472 [2:36:26<25:26:50, 3.70it/s] 9%|▊ | 32300/371472 [2:36:26<25:16:06, 3.73it/s] {'loss': 6.6903, 'learning_rate': 9.22186429356375e-07, 'epoch': 1.39} + 9%|▊ | 32300/371472 [2:36:26<25:16:06, 3.73it/s] 9%|▊ | 32301/371472 [2:36:27<26:14:08, 3.59it/s] 9%|▊ | 32302/371472 [2:36:27<26:09:53, 3.60it/s] 9%|▊ | 32303/371472 [2:36:27<25:50:28, 3.65it/s] 9%|▊ | 32304/371472 [2:36:27<25:53:29, 3.64it/s] 9%|▊ | 32305/371472 [2:36:28<25:51:35, 3.64it/s] 9%|▊ | 32306/371472 [2:36:28<27:51:14, 3.38it/s] 9%|▊ | 32307/371472 [2:36:28<28:14:43, 3.34it/s] 9%|▊ | 32308/371472 [2:36:29<27:04:54, 3.48it/s] 9%|▊ | 32309/371472 [2:36:29<27:31:09, 3.42it/s] 9%|▊ | 32310/371472 [2:36:29<27:38:01, 3.41it/s] 9%|▊ | 32311/371472 [2:36:30<27:38:52, 3.41it/s] 9%|▊ | 32312/371472 [2:36:30<26:37:49, 3.54it/s] 9%|▊ | 32313/371472 [2:36:30<26:04:35, 3.61it/s] 9%|▊ | 32314/371472 [2:36:30<25:11:35, 3.74it/s] 9%|▊ | 32315/371472 [2:36:31<25:22:05, 3.71it/s] 9%|▊ | 32316/371472 [2:36:31<25:07:09, 3.75it/s] 9%|▊ | 32317/371472 [2:36:31<24:54:16, 3.78it/s] 9%|▊ | 32318/371472 [2:36:31<26:16:34, 3.59it/s] 9%|▊ | 32319/371472 [2:36:32<26:16:01, 3.59it/s] 9%|▊ | 32320/371472 [2:36:32<25:37:01, 3.68it/s] {'loss': 6.7031, 'learning_rate': 9.221379473808959e-07, 'epoch': 1.39} + 9%|▊ | 32320/371472 [2:36:32<25:37:01, 3.68it/s] 9%|▊ | 32321/371472 [2:36:32<25:12:11, 3.74it/s] 9%|▊ | 32322/371472 [2:36:32<25:01:13, 3.77it/s] 9%|▊ | 32323/371472 [2:36:33<25:00:36, 3.77it/s] 9%|▊ | 32324/371472 [2:36:33<25:44:51, 3.66it/s] 9%|▊ | 32325/371472 [2:36:33<25:45:41, 3.66it/s] 9%|▊ | 32326/371472 [2:36:34<25:20:17, 3.72it/s] 9%|▊ | 32327/371472 [2:36:34<27:59:36, 3.37it/s] 9%|▊ | 32328/371472 [2:36:34<27:59:13, 3.37it/s] 9%|▊ | 32329/371472 [2:36:34<27:05:14, 3.48it/s] 9%|▊ | 32330/371472 [2:36:35<26:13:25, 3.59it/s] 9%|▊ | 32331/371472 [2:36:35<26:09:06, 3.60it/s] 9%|▊ | 32332/371472 [2:36:35<27:33:05, 3.42it/s] 9%|▊ | 32333/371472 [2:36:36<29:26:51, 3.20it/s] 9%|▊ | 32334/371472 [2:36:36<29:14:28, 3.22it/s] 9%|▊ | 32335/371472 [2:36:36<30:53:02, 3.05it/s] 9%|▊ | 32336/371472 [2:36:37<30:46:14, 3.06it/s] 9%|▊ | 32337/371472 [2:36:37<29:55:03, 3.15it/s] 9%|▊ | 32338/371472 [2:36:37<31:10:30, 3.02it/s] 9%|▊ | 32339/371472 [2:36:38<30:03:49, 3.13it/s] 9%|▊ | 32340/371472 [2:36:38<28:03:04, 3.36it/s] {'loss': 6.4752, 'learning_rate': 9.22089465405417e-07, 'epoch': 1.39} + 9%|▊ | 32340/371472 [2:36:38<28:03:04, 3.36it/s] 9%|▊ | 32341/371472 [2:36:38<27:20:35, 3.45it/s] 9%|▊ | 32342/371472 [2:36:38<26:49:13, 3.51it/s] 9%|▊ | 32343/371472 [2:36:39<26:57:18, 3.49it/s] 9%|▊ | 32344/371472 [2:36:39<26:16:37, 3.58it/s] 9%|▊ | 32345/371472 [2:36:39<26:39:59, 3.53it/s] 9%|▊ | 32346/371472 [2:36:40<26:19:52, 3.58it/s] 9%|▊ | 32347/371472 [2:36:40<30:22:43, 3.10it/s] 9%|▊ | 32348/371472 [2:36:40<28:19:51, 3.33it/s] 9%|▊ | 32349/371472 [2:36:41<28:07:30, 3.35it/s] 9%|▊ | 32350/371472 [2:36:41<27:14:48, 3.46it/s] 9%|▊ | 32351/371472 [2:36:41<28:16:26, 3.33it/s] 9%|▊ | 32352/371472 [2:36:41<27:50:59, 3.38it/s] 9%|▊ | 32353/371472 [2:36:42<27:15:48, 3.46it/s] 9%|▊ | 32354/371472 [2:36:42<26:24:31, 3.57it/s] 9%|▊ | 32355/371472 [2:36:42<26:22:17, 3.57it/s] 9%|▊ | 32356/371472 [2:36:43<27:20:39, 3.44it/s] 9%|▊ | 32357/371472 [2:36:43<27:59:01, 3.37it/s] 9%|▊ | 32358/371472 [2:36:43<31:24:41, 3.00it/s] 9%|▊ | 32359/371472 [2:36:44<30:25:06, 3.10it/s] 9%|▊ | 32360/371472 [2:36:44<28:42:22, 3.28it/s] {'loss': 6.403, 'learning_rate': 9.220409834299381e-07, 'epoch': 1.39} + 9%|▊ | 32360/371472 [2:36:44<28:42:22, 3.28it/s] 9%|▊ | 32361/371472 [2:36:44<27:22:13, 3.44it/s] 9%|▊ | 32362/371472 [2:36:44<26:35:00, 3.54it/s] 9%|▊ | 32363/371472 [2:36:45<25:29:25, 3.70it/s] 9%|▊ | 32364/371472 [2:36:45<24:55:10, 3.78it/s] 9%|▊ | 32365/371472 [2:36:45<24:54:55, 3.78it/s] 9%|▊ | 32366/371472 [2:36:45<25:23:26, 3.71it/s] 9%|▊ | 32367/371472 [2:36:46<27:02:12, 3.48it/s] 9%|▊ | 32368/371472 [2:36:46<26:05:56, 3.61it/s] 9%|▊ | 32369/371472 [2:36:46<25:30:58, 3.69it/s] 9%|▊ | 32370/371472 [2:36:46<25:18:34, 3.72it/s] 9%|▊ | 32371/371472 [2:36:47<26:43:25, 3.52it/s] 9%|▊ | 32372/371472 [2:36:47<25:52:23, 3.64it/s] 9%|▊ | 32373/371472 [2:36:47<28:07:37, 3.35it/s] 9%|▊ | 32374/371472 [2:36:48<26:37:31, 3.54it/s] 9%|▊ | 32375/371472 [2:36:48<26:32:47, 3.55it/s] 9%|▊ | 32376/371472 [2:36:48<27:30:02, 3.43it/s] 9%|▊ | 32377/371472 [2:36:49<27:11:26, 3.46it/s] 9%|▊ | 32378/371472 [2:36:49<26:17:15, 3.58it/s] 9%|▊ | 32379/371472 [2:36:49<26:16:53, 3.58it/s] 9%|▊ | 32380/371472 [2:36:49<26:08:11, 3.60it/s] {'loss': 6.3745, 'learning_rate': 9.219925014544593e-07, 'epoch': 1.39} + 9%|▊ | 32380/371472 [2:36:49<26:08:11, 3.60it/s] 9%|▊ | 32381/371472 [2:36:50<25:33:07, 3.69it/s] 9%|▊ | 32382/371472 [2:36:50<25:11:32, 3.74it/s] 9%|▊ | 32383/371472 [2:36:50<25:13:32, 3.73it/s] 9%|▊ | 32384/371472 [2:36:50<25:21:15, 3.71it/s] 9%|▊ | 32385/371472 [2:36:51<26:23:10, 3.57it/s] 9%|▊ | 32386/371472 [2:36:51<27:19:27, 3.45it/s] 9%|▊ | 32387/371472 [2:36:51<26:22:19, 3.57it/s] 9%|▊ | 32388/371472 [2:36:52<25:56:37, 3.63it/s] 9%|▊ | 32389/371472 [2:36:52<25:46:56, 3.65it/s] 9%|▊ | 32390/371472 [2:36:52<26:10:28, 3.60it/s] 9%|▊ | 32391/371472 [2:36:52<29:04:30, 3.24it/s] 9%|▊ | 32392/371472 [2:36:53<28:22:33, 3.32it/s] 9%|▊ | 32393/371472 [2:36:53<29:06:48, 3.24it/s] 9%|▊ | 32394/371472 [2:36:53<29:19:15, 3.21it/s] 9%|▊ | 32395/371472 [2:36:54<28:01:16, 3.36it/s] 9%|▊ | 32396/371472 [2:36:54<28:01:45, 3.36it/s] 9%|▊ | 32397/371472 [2:36:54<29:18:57, 3.21it/s] 9%|▊ | 32398/371472 [2:36:55<29:38:10, 3.18it/s] 9%|▊ | 32399/371472 [2:36:55<28:05:55, 3.35it/s] 9%|▊ | 32400/371472 [2:36:55<26:52:38, 3.50it/s] {'loss': 6.3431, 'learning_rate': 9.219440194789803e-07, 'epoch': 1.4} + 9%|▊ | 32400/371472 [2:36:55<26:52:38, 3.50it/s] 9%|▊ | 32401/371472 [2:36:55<25:58:51, 3.63it/s] 9%|▊ | 32402/371472 [2:36:56<27:38:11, 3.41it/s] 9%|▊ | 32403/371472 [2:36:56<27:52:27, 3.38it/s] 9%|▊ | 32404/371472 [2:36:56<27:05:08, 3.48it/s] 9%|▊ | 32405/371472 [2:36:57<27:11:42, 3.46it/s] 9%|▊ | 32406/371472 [2:36:57<26:16:50, 3.58it/s] 9%|▊ | 32407/371472 [2:36:57<27:06:39, 3.47it/s] 9%|▊ | 32408/371472 [2:36:57<26:34:50, 3.54it/s] 9%|▊ | 32409/371472 [2:36:58<26:58:27, 3.49it/s] 9%|▊ | 32410/371472 [2:36:58<26:30:33, 3.55it/s] 9%|▊ | 32411/371472 [2:36:58<26:33:27, 3.55it/s] 9%|▊ | 32412/371472 [2:36:59<25:59:38, 3.62it/s] 9%|▊ | 32413/371472 [2:36:59<26:05:01, 3.61it/s] 9%|▊ | 32414/371472 [2:36:59<26:07:50, 3.60it/s] 9%|▊ | 32415/371472 [2:36:59<26:55:25, 3.50it/s] 9%|▊ | 32416/371472 [2:37:00<29:32:27, 3.19it/s] 9%|▊ | 32417/371472 [2:37:00<28:30:31, 3.30it/s] 9%|▊ | 32418/371472 [2:37:00<28:20:14, 3.32it/s] 9%|▊ | 32419/371472 [2:37:01<28:03:26, 3.36it/s] 9%|▊ | 32420/371472 [2:37:01<28:08:02, 3.35it/s] {'loss': 6.1962, 'learning_rate': 9.218955375035015e-07, 'epoch': 1.4} + 9%|▊ | 32420/371472 [2:37:01<28:08:02, 3.35it/s] 9%|▊ | 32421/371472 [2:37:01<26:39:34, 3.53it/s] 9%|▊ | 32422/371472 [2:37:01<25:56:30, 3.63it/s] 9%|▊ | 32423/371472 [2:37:02<25:59:44, 3.62it/s] 9%|▊ | 32424/371472 [2:37:02<25:34:57, 3.68it/s] 9%|▊ | 32425/371472 [2:37:02<24:56:15, 3.78it/s] 9%|▊ | 32426/371472 [2:37:03<25:01:51, 3.76it/s] 9%|▊ | 32427/371472 [2:37:03<25:36:53, 3.68it/s] 9%|▊ | 32428/371472 [2:37:03<25:49:49, 3.65it/s] 9%|▊ | 32429/371472 [2:37:03<25:40:08, 3.67it/s] 9%|▊ | 32430/371472 [2:37:04<25:29:39, 3.69it/s] 9%|▊ | 32431/371472 [2:37:04<25:32:41, 3.69it/s] 9%|▊ | 32432/371472 [2:37:04<26:29:58, 3.55it/s] 9%|▊ | 32433/371472 [2:37:04<26:14:01, 3.59it/s] 9%|▊ | 32434/371472 [2:37:05<26:03:11, 3.61it/s] 9%|▊ | 32435/371472 [2:37:05<26:03:51, 3.61it/s] 9%|▊ | 32436/371472 [2:37:05<26:52:32, 3.50it/s] 9%|▊ | 32437/371472 [2:37:06<26:30:32, 3.55it/s] 9%|▊ | 32438/371472 [2:37:06<27:14:30, 3.46it/s] 9%|▊ | 32439/371472 [2:37:06<27:17:38, 3.45it/s] 9%|▊ | 32440/371472 [2:37:06<27:57:55, 3.37it/s] {'loss': 6.4415, 'learning_rate': 9.218470555280226e-07, 'epoch': 1.4} + 9%|▊ | 32440/371472 [2:37:06<27:57:55, 3.37it/s] 9%|▊ | 32441/371472 [2:37:07<28:25:08, 3.31it/s] 9%|▊ | 32442/371472 [2:37:07<27:40:29, 3.40it/s] 9%|▊ | 32443/371472 [2:37:07<26:25:57, 3.56it/s] 9%|▊ | 32444/371472 [2:37:08<27:29:31, 3.43it/s] 9%|▊ | 32445/371472 [2:37:08<28:45:50, 3.27it/s] 9%|▊ | 32446/371472 [2:37:08<28:08:52, 3.35it/s] 9%|▊ | 32447/371472 [2:37:09<30:26:55, 3.09it/s] 9%|▊ | 32448/371472 [2:37:09<28:35:30, 3.29it/s] 9%|▊ | 32449/371472 [2:37:09<27:54:13, 3.37it/s] 9%|▊ | 32450/371472 [2:37:09<26:54:55, 3.50it/s] 9%|▊ | 32451/371472 [2:37:10<26:19:19, 3.58it/s] 9%|▊ | 32452/371472 [2:37:10<28:13:10, 3.34it/s] 9%|▊ | 32453/371472 [2:37:10<28:02:53, 3.36it/s] 9%|▊ | 32454/371472 [2:37:11<27:34:02, 3.42it/s] 9%|▊ | 32455/371472 [2:37:11<27:04:39, 3.48it/s] 9%|▊ | 32456/371472 [2:37:11<27:33:31, 3.42it/s] 9%|▊ | 32457/371472 [2:37:11<26:53:57, 3.50it/s] 9%|▊ | 32458/371472 [2:37:12<26:08:34, 3.60it/s] 9%|▊ | 32459/371472 [2:37:12<27:14:01, 3.46it/s] 9%|▊ | 32460/371472 [2:37:12<27:51:17, 3.38it/s] {'loss': 6.5485, 'learning_rate': 9.217985735525436e-07, 'epoch': 1.4} + 9%|▊ | 32460/371472 [2:37:12<27:51:17, 3.38it/s] 9%|▊ | 32461/371472 [2:37:13<26:50:52, 3.51it/s] 9%|▊ | 32462/371472 [2:37:13<25:59:00, 3.62it/s] 9%|▊ | 32463/371472 [2:37:13<26:35:18, 3.54it/s] 9%|▊ | 32464/371472 [2:37:13<26:29:06, 3.56it/s] 9%|▊ | 32465/371472 [2:37:14<28:58:07, 3.25it/s] 9%|▊ | 32466/371472 [2:37:14<27:03:39, 3.48it/s] 9%|▊ | 32467/371472 [2:37:14<26:59:51, 3.49it/s] 9%|▊ | 32468/371472 [2:37:15<27:15:17, 3.46it/s] 9%|▊ | 32469/371472 [2:37:15<26:46:08, 3.52it/s] 9%|▊ | 32470/371472 [2:37:15<26:20:04, 3.58it/s] 9%|▊ | 32471/371472 [2:37:15<26:01:39, 3.62it/s] 9%|▊ | 32472/371472 [2:37:16<27:11:58, 3.46it/s] 9%|▊ | 32473/371472 [2:37:16<26:41:34, 3.53it/s] 9%|▊ | 32474/371472 [2:37:16<25:46:43, 3.65it/s] 9%|▊ | 32475/371472 [2:37:17<25:34:59, 3.68it/s] 9%|▊ | 32476/371472 [2:37:17<25:41:26, 3.67it/s] 9%|▊ | 32477/371472 [2:37:17<25:39:33, 3.67it/s] 9%|▊ | 32478/371472 [2:37:17<27:20:05, 3.44it/s] 9%|▊ | 32479/371472 [2:37:18<26:48:01, 3.51it/s] 9%|▊ | 32480/371472 [2:37:18<27:04:46, 3.48it/s] {'loss': 6.7211, 'learning_rate': 9.217500915770647e-07, 'epoch': 1.4} + 9%|▊ | 32480/371472 [2:37:18<27:04:46, 3.48it/s] 9%|▊ | 32481/371472 [2:37:18<28:15:42, 3.33it/s] 9%|▊ | 32482/371472 [2:37:19<28:25:21, 3.31it/s] 9%|▊ | 32483/371472 [2:37:19<28:59:37, 3.25it/s] 9%|▊ | 32484/371472 [2:37:19<28:28:40, 3.31it/s] 9%|▊ | 32485/371472 [2:37:20<27:37:37, 3.41it/s] 9%|▊ | 32486/371472 [2:37:20<26:59:07, 3.49it/s] 9%|▊ | 32487/371472 [2:37:20<26:18:26, 3.58it/s] 9%|▊ | 32488/371472 [2:37:20<25:51:03, 3.64it/s] 9%|▊ | 32489/371472 [2:37:21<26:02:56, 3.61it/s] 9%|▊ | 32490/371472 [2:37:21<26:08:39, 3.60it/s] 9%|▊ | 32491/371472 [2:37:22<38:20:56, 2.46it/s] 9%|▊ | 32492/371472 [2:37:22<33:46:14, 2.79it/s] 9%|▊ | 32493/371472 [2:37:22<31:38:15, 2.98it/s] 9%|▊ | 32494/371472 [2:37:22<29:35:28, 3.18it/s] 9%|▊ | 32495/371472 [2:37:23<28:43:14, 3.28it/s] 9%|▊ | 32496/371472 [2:37:23<29:15:02, 3.22it/s] 9%|▊ | 32497/371472 [2:37:23<29:08:58, 3.23it/s] 9%|▊ | 32498/371472 [2:37:24<27:17:01, 3.45it/s] 9%|▊ | 32499/371472 [2:37:24<30:13:30, 3.12it/s] 9%|▊ | 32500/371472 [2:37:24<29:00:50, 3.25it/s] {'loss': 6.5814, 'learning_rate': 9.217016096015858e-07, 'epoch': 1.4} + 9%|▊ | 32500/371472 [2:37:24<29:00:50, 3.25it/s] 9%|▊ | 32501/371472 [2:37:25<28:34:54, 3.29it/s] 9%|▊ | 32502/371472 [2:37:25<27:46:25, 3.39it/s] 9%|▊ | 32503/371472 [2:37:25<27:20:22, 3.44it/s] 9%|▉ | 32504/371472 [2:37:25<27:53:47, 3.38it/s] 9%|▉ | 32505/371472 [2:37:26<26:56:41, 3.49it/s] 9%|▉ | 32506/371472 [2:37:26<25:54:26, 3.63it/s] 9%|▉ | 32507/371472 [2:37:26<27:01:58, 3.48it/s] 9%|▉ | 32508/371472 [2:37:26<26:30:55, 3.55it/s] 9%|▉ | 32509/371472 [2:37:27<26:23:11, 3.57it/s] 9%|▉ | 32510/371472 [2:37:27<27:11:54, 3.46it/s] 9%|▉ | 32511/371472 [2:37:27<26:56:11, 3.50it/s] 9%|▉ | 32512/371472 [2:37:28<27:27:54, 3.43it/s] 9%|▉ | 32513/371472 [2:37:28<27:45:48, 3.39it/s] 9%|▉ | 32514/371472 [2:37:28<26:25:53, 3.56it/s] 9%|▉ | 32515/371472 [2:37:29<27:41:03, 3.40it/s] 9%|▉ | 32516/371472 [2:37:29<26:22:58, 3.57it/s] 9%|▉ | 32517/371472 [2:37:29<26:13:42, 3.59it/s] 9%|▉ | 32518/371472 [2:37:29<27:24:57, 3.43it/s] 9%|▉ | 32519/371472 [2:37:30<28:35:08, 3.29it/s] 9%|▉ | 32520/371472 [2:37:30<28:18:08, 3.33it/s] {'loss': 6.4893, 'learning_rate': 9.21653127626107e-07, 'epoch': 1.4} + 9%|▉ | 32520/371472 [2:37:30<28:18:08, 3.33it/s] 9%|▉ | 32521/371472 [2:37:30<27:55:15, 3.37it/s] 9%|▉ | 32522/371472 [2:37:31<30:33:45, 3.08it/s] 9%|▉ | 32523/371472 [2:37:31<29:23:55, 3.20it/s] 9%|▉ | 32524/371472 [2:37:31<30:23:28, 3.10it/s] 9%|▉ | 32525/371472 [2:37:32<28:54:30, 3.26it/s] 9%|▉ | 32526/371472 [2:37:32<27:06:48, 3.47it/s] 9%|▉ | 32527/371472 [2:37:32<27:23:58, 3.44it/s] 9%|▉ | 32528/371472 [2:37:32<26:37:48, 3.54it/s] 9%|▉ | 32529/371472 [2:37:33<27:40:02, 3.40it/s] 9%|▉ | 32530/371472 [2:37:33<27:07:18, 3.47it/s] 9%|▉ | 32531/371472 [2:37:33<26:55:52, 3.50it/s] 9%|▉ | 32532/371472 [2:37:34<26:25:05, 3.56it/s] 9%|▉ | 32533/371472 [2:37:34<26:12:33, 3.59it/s] 9%|▉ | 32534/371472 [2:37:34<25:25:16, 3.70it/s] 9%|▉ | 32535/371472 [2:37:34<25:56:43, 3.63it/s] 9%|▉ | 32536/371472 [2:37:35<26:09:40, 3.60it/s] 9%|▉ | 32537/371472 [2:37:35<26:24:01, 3.57it/s] 9%|▉ | 32538/371472 [2:37:35<27:59:50, 3.36it/s] 9%|▉ | 32539/371472 [2:37:36<28:10:55, 3.34it/s] 9%|▉ | 32540/371472 [2:37:36<28:14:07, 3.33it/s] {'loss': 6.5722, 'learning_rate': 9.216046456506281e-07, 'epoch': 1.4} + 9%|▉ | 32540/371472 [2:37:36<28:14:07, 3.33it/s] 9%|▉ | 32541/371472 [2:37:36<27:59:54, 3.36it/s] 9%|▉ | 32542/371472 [2:37:36<27:02:32, 3.48it/s] 9%|▉ | 32543/371472 [2:37:37<26:13:26, 3.59it/s] 9%|▉ | 32544/371472 [2:37:37<26:43:51, 3.52it/s] 9%|▉ | 32545/371472 [2:37:37<25:47:34, 3.65it/s] 9%|▉ | 32546/371472 [2:37:38<27:27:16, 3.43it/s] 9%|▉ | 32547/371472 [2:37:38<29:15:13, 3.22it/s] 9%|▉ | 32548/371472 [2:37:38<30:55:57, 3.04it/s] 9%|▉ | 32549/371472 [2:37:39<29:07:20, 3.23it/s] 9%|▉ | 32550/371472 [2:37:39<28:54:58, 3.26it/s] 9%|▉ | 32551/371472 [2:37:39<27:12:06, 3.46it/s] 9%|▉ | 32552/371472 [2:37:39<28:02:00, 3.36it/s] 9%|▉ | 32553/371472 [2:37:40<28:39:28, 3.29it/s] 9%|▉ | 32554/371472 [2:37:40<28:14:33, 3.33it/s] 9%|▉ | 32555/371472 [2:37:40<28:07:49, 3.35it/s] 9%|▉ | 32556/371472 [2:37:41<27:13:35, 3.46it/s] 9%|▉ | 32557/371472 [2:37:41<26:49:02, 3.51it/s] 9%|▉ | 32558/371472 [2:37:41<30:32:59, 3.08it/s] 9%|▉ | 32559/371472 [2:37:42<30:12:47, 3.12it/s] 9%|▉ | 32560/371472 [2:37:42<29:50:04, 3.16it/s] {'loss': 6.5738, 'learning_rate': 9.215561636751492e-07, 'epoch': 1.4} + 9%|▉ | 32560/371472 [2:37:42<29:50:04, 3.16it/s] 9%|▉ | 32561/371472 [2:37:42<29:06:21, 3.23it/s] 9%|▉ | 32562/371472 [2:37:42<27:53:15, 3.38it/s] 9%|▉ | 32563/371472 [2:37:43<26:55:14, 3.50it/s] 9%|▉ | 32564/371472 [2:37:43<26:58:21, 3.49it/s] 9%|▉ | 32565/371472 [2:37:43<26:43:33, 3.52it/s] 9%|▉ | 32566/371472 [2:37:44<29:45:24, 3.16it/s] 9%|▉ | 32567/371472 [2:37:44<29:06:20, 3.23it/s] 9%|▉ | 32568/371472 [2:37:44<31:18:22, 3.01it/s] 9%|▉ | 32569/371472 [2:37:45<29:18:25, 3.21it/s] 9%|▉ | 32570/371472 [2:37:45<29:32:50, 3.19it/s] 9%|▉ | 32571/371472 [2:37:45<28:37:14, 3.29it/s] 9%|▉ | 32572/371472 [2:37:46<28:39:19, 3.29it/s] 9%|▉ | 32573/371472 [2:37:46<29:42:10, 3.17it/s] 9%|▉ | 32574/371472 [2:37:46<29:13:34, 3.22it/s] 9%|▉ | 32575/371472 [2:37:46<28:06:05, 3.35it/s] 9%|▉ | 32576/371472 [2:37:47<27:31:20, 3.42it/s] 9%|▉ | 32577/371472 [2:37:47<26:42:40, 3.52it/s] 9%|▉ | 32578/371472 [2:37:47<28:18:20, 3.33it/s] 9%|▉ | 32579/371472 [2:37:48<28:05:58, 3.35it/s] 9%|▉ | 32580/371472 [2:37:48<27:09:08, 3.47it/s] {'loss': 6.5566, 'learning_rate': 9.215076816996703e-07, 'epoch': 1.4} + 9%|▉ | 32580/371472 [2:37:48<27:09:08, 3.47it/s] 9%|▉ | 32581/371472 [2:37:48<28:39:41, 3.28it/s] 9%|▉ | 32582/371472 [2:37:48<28:28:39, 3.31it/s] 9%|▉ | 32583/371472 [2:37:49<27:54:24, 3.37it/s] 9%|▉ | 32584/371472 [2:37:49<27:28:27, 3.43it/s] 9%|▉ | 32585/371472 [2:37:49<27:01:14, 3.48it/s] 9%|▉ | 32586/371472 [2:37:50<27:00:56, 3.48it/s] 9%|▉ | 32587/371472 [2:37:50<26:39:19, 3.53it/s] 9%|▉ | 32588/371472 [2:37:50<26:31:09, 3.55it/s] 9%|▉ | 32589/371472 [2:37:51<28:25:32, 3.31it/s] 9%|▉ | 32590/371472 [2:37:51<26:59:01, 3.49it/s] 9%|▉ | 32591/371472 [2:37:51<27:33:05, 3.42it/s] 9%|▉ | 32592/371472 [2:37:51<30:22:47, 3.10it/s] 9%|▉ | 32593/371472 [2:37:52<28:42:30, 3.28it/s] 9%|▉ | 32594/371472 [2:37:52<27:34:19, 3.41it/s] 9%|▉ | 32595/371472 [2:37:52<26:35:05, 3.54it/s] 9%|▉ | 32596/371472 [2:37:53<26:11:26, 3.59it/s] 9%|▉ | 32597/371472 [2:37:53<25:41:51, 3.66it/s] 9%|▉ | 32598/371472 [2:37:53<28:41:02, 3.28it/s] 9%|▉ | 32599/371472 [2:37:53<28:44:19, 3.28it/s] 9%|▉ | 32600/371472 [2:37:54<28:41:45, 3.28it/s] {'loss': 6.3732, 'learning_rate': 9.214591997241914e-07, 'epoch': 1.4} + 9%|▉ | 32600/371472 [2:37:54<28:41:45, 3.28it/s] 9%|▉ | 32601/371472 [2:37:54<28:13:24, 3.34it/s] 9%|▉ | 32602/371472 [2:37:54<27:43:18, 3.40it/s] 9%|▉ | 32603/371472 [2:37:55<27:32:47, 3.42it/s] 9%|▉ | 32604/371472 [2:37:55<26:48:50, 3.51it/s] 9%|▉ | 32605/371472 [2:37:55<27:15:25, 3.45it/s] 9%|▉ | 32606/371472 [2:37:55<26:16:21, 3.58it/s] 9%|▉ | 32607/371472 [2:37:56<25:55:25, 3.63it/s] 9%|▉ | 32608/371472 [2:37:56<26:57:57, 3.49it/s] 9%|▉ | 32609/371472 [2:37:56<26:59:23, 3.49it/s] 9%|▉ | 32610/371472 [2:37:57<27:42:57, 3.40it/s] 9%|▉ | 32611/371472 [2:37:57<28:19:57, 3.32it/s] 9%|▉ | 32612/371472 [2:37:57<26:54:15, 3.50it/s] 9%|▉ | 32613/371472 [2:37:57<25:56:59, 3.63it/s] 9%|▉ | 32614/371472 [2:37:58<26:19:51, 3.57it/s] 9%|▉ | 32615/371472 [2:37:58<25:44:51, 3.66it/s] 9%|▉ | 32616/371472 [2:37:58<26:22:54, 3.57it/s] 9%|▉ | 32617/371472 [2:37:59<25:46:56, 3.65it/s] 9%|▉ | 32618/371472 [2:37:59<25:41:38, 3.66it/s] 9%|▉ | 32619/371472 [2:37:59<26:15:59, 3.58it/s] 9%|▉ | 32620/371472 [2:37:59<25:46:44, 3.65it/s] {'loss': 6.18, 'learning_rate': 9.214107177487125e-07, 'epoch': 1.41} + 9%|▉ | 32620/371472 [2:37:59<25:46:44, 3.65it/s] 9%|▉ | 32621/371472 [2:38:00<25:29:06, 3.69it/s] 9%|▉ | 32622/371472 [2:38:00<27:20:55, 3.44it/s] 9%|▉ | 32623/371472 [2:38:00<28:20:46, 3.32it/s] 9%|▉ | 32624/371472 [2:38:01<27:54:32, 3.37it/s] 9%|▉ | 32625/371472 [2:38:01<27:25:39, 3.43it/s] 9%|▉ | 32626/371472 [2:38:01<26:33:21, 3.54it/s] 9%|▉ | 32627/371472 [2:38:01<28:04:58, 3.35it/s] 9%|▉ | 32628/371472 [2:38:02<28:14:00, 3.33it/s] 9%|▉ | 32629/371472 [2:38:02<27:38:11, 3.41it/s] 9%|▉ | 32630/371472 [2:38:02<27:46:19, 3.39it/s] 9%|▉ | 32631/371472 [2:38:03<26:33:46, 3.54it/s] 9%|▉ | 32632/371472 [2:38:03<26:32:51, 3.55it/s] 9%|▉ | 32633/371472 [2:38:03<26:30:52, 3.55it/s] 9%|▉ | 32634/371472 [2:38:04<27:56:46, 3.37it/s] 9%|▉ | 32635/371472 [2:38:04<27:03:00, 3.48it/s] 9%|▉ | 32636/371472 [2:38:04<27:58:47, 3.36it/s] 9%|▉ | 32637/371472 [2:38:04<27:05:31, 3.47it/s] 9%|▉ | 32638/371472 [2:38:05<28:47:48, 3.27it/s] 9%|▉ | 32639/371472 [2:38:05<27:39:18, 3.40it/s] 9%|▉ | 32640/371472 [2:38:05<28:04:06, 3.35it/s] {'loss': 6.2922, 'learning_rate': 9.213622357732336e-07, 'epoch': 1.41} + 9%|▉ | 32640/371472 [2:38:05<28:04:06, 3.35it/s] 9%|▉ | 32641/371472 [2:38:06<27:26:24, 3.43it/s] 9%|▉ | 32642/371472 [2:38:06<26:36:20, 3.54it/s] 9%|▉ | 32643/371472 [2:38:06<26:14:44, 3.59it/s] 9%|▉ | 32644/371472 [2:38:06<26:06:00, 3.61it/s] 9%|▉ | 32645/371472 [2:38:07<25:31:49, 3.69it/s] 9%|▉ | 32646/371472 [2:38:07<26:39:16, 3.53it/s] 9%|▉ | 32647/371472 [2:38:07<27:07:48, 3.47it/s] 9%|▉ | 32648/371472 [2:38:08<27:26:03, 3.43it/s] 9%|▉ | 32649/371472 [2:38:08<27:12:15, 3.46it/s] 9%|▉ | 32650/371472 [2:38:08<26:44:42, 3.52it/s] 9%|▉ | 32651/371472 [2:38:08<26:59:40, 3.49it/s] 9%|▉ | 32652/371472 [2:38:09<27:16:32, 3.45it/s] 9%|▉ | 32653/371472 [2:38:09<26:10:35, 3.60it/s] 9%|▉ | 32654/371472 [2:38:09<25:54:24, 3.63it/s] 9%|▉ | 32655/371472 [2:38:09<25:50:13, 3.64it/s] 9%|▉ | 32656/371472 [2:38:10<25:41:45, 3.66it/s] 9%|▉ | 32657/371472 [2:38:10<25:09:48, 3.74it/s] 9%|▉ | 32658/371472 [2:38:10<26:37:34, 3.53it/s] 9%|▉ | 32659/371472 [2:38:11<26:16:25, 3.58it/s] 9%|▉ | 32660/371472 [2:38:11<25:48:32, 3.65it/s] {'loss': 6.5907, 'learning_rate': 9.213137537977547e-07, 'epoch': 1.41} + 9%|▉ | 32660/371472 [2:38:11<25:48:32, 3.65it/s] 9%|▉ | 32661/371472 [2:38:11<27:07:37, 3.47it/s] 9%|▉ | 32662/371472 [2:38:11<27:28:43, 3.42it/s] 9%|▉ | 32663/371472 [2:38:12<26:47:17, 3.51it/s] 9%|▉ | 32664/371472 [2:38:12<26:37:16, 3.54it/s] 9%|▉ | 32665/371472 [2:38:12<26:34:32, 3.54it/s] 9%|▉ | 32666/371472 [2:38:13<27:47:42, 3.39it/s] 9%|▉ | 32667/371472 [2:38:13<26:52:37, 3.50it/s] 9%|▉ | 32668/371472 [2:38:13<26:45:48, 3.52it/s] 9%|▉ | 32669/371472 [2:38:13<27:25:25, 3.43it/s] 9%|▉ | 32670/371472 [2:38:14<26:28:25, 3.55it/s] 9%|▉ | 32671/371472 [2:38:14<26:35:27, 3.54it/s] 9%|▉ | 32672/371472 [2:38:14<28:08:34, 3.34it/s] 9%|▉ | 32673/371472 [2:38:15<27:20:40, 3.44it/s] 9%|▉ | 32674/371472 [2:38:15<26:36:38, 3.54it/s] 9%|▉ | 32675/371472 [2:38:15<26:01:58, 3.62it/s] 9%|▉ | 32676/371472 [2:38:15<27:09:07, 3.47it/s] 9%|▉ | 32677/371472 [2:38:16<27:00:14, 3.49it/s] 9%|▉ | 32678/371472 [2:38:16<26:46:08, 3.52it/s] 9%|▉ | 32679/371472 [2:38:16<27:38:30, 3.40it/s] 9%|▉ | 32680/371472 [2:38:17<27:35:36, 3.41it/s] {'loss': 6.4877, 'learning_rate': 9.212652718222759e-07, 'epoch': 1.41} + 9%|▉ | 32680/371472 [2:38:17<27:35:36, 3.41it/s] 9%|▉ | 32681/371472 [2:38:17<27:09:37, 3.46it/s] 9%|▉ | 32682/371472 [2:38:17<27:22:16, 3.44it/s] 9%|▉ | 32683/371472 [2:38:17<26:51:14, 3.50it/s] 9%|▉ | 32684/371472 [2:38:18<27:46:50, 3.39it/s] 9%|▉ | 32685/371472 [2:38:18<28:20:18, 3.32it/s] 9%|▉ | 32686/371472 [2:38:18<27:38:24, 3.40it/s] 9%|▉ | 32687/371472 [2:38:19<27:21:32, 3.44it/s] 9%|▉ | 32688/371472 [2:38:19<27:36:59, 3.41it/s] 9%|▉ | 32689/371472 [2:38:19<29:40:07, 3.17it/s] 9%|▉ | 32690/371472 [2:38:20<30:01:48, 3.13it/s] 9%|▉ | 32691/371472 [2:38:20<28:46:53, 3.27it/s] 9%|▉ | 32692/371472 [2:38:20<27:52:36, 3.38it/s] 9%|▉ | 32693/371472 [2:38:20<26:33:45, 3.54it/s] 9%|▉ | 32694/371472 [2:38:21<26:16:05, 3.58it/s] 9%|▉ | 32695/371472 [2:38:21<25:45:23, 3.65it/s] 9%|▉ | 32696/371472 [2:38:21<26:26:46, 3.56it/s] 9%|▉ | 32697/371472 [2:38:22<26:59:58, 3.49it/s] 9%|▉ | 32698/371472 [2:38:22<26:07:22, 3.60it/s] 9%|▉ | 32699/371472 [2:38:22<26:32:07, 3.55it/s] 9%|▉ | 32700/371472 [2:38:22<26:14:21, 3.59it/s] {'loss': 6.4984, 'learning_rate': 9.212167898467969e-07, 'epoch': 1.41} + 9%|▉ | 32700/371472 [2:38:22<26:14:21, 3.59it/s] 9%|▉ | 32701/371472 [2:38:23<26:20:36, 3.57it/s] 9%|▉ | 32702/371472 [2:38:23<29:13:13, 3.22it/s] 9%|▉ | 32703/371472 [2:38:23<28:47:10, 3.27it/s] 9%|▉ | 32704/371472 [2:38:24<27:29:54, 3.42it/s] 9%|▉ | 32705/371472 [2:38:24<29:10:52, 3.22it/s] 9%|▉ | 32706/371472 [2:38:24<27:39:16, 3.40it/s] 9%|▉ | 32707/371472 [2:38:24<26:32:24, 3.55it/s] 9%|▉ | 32708/371472 [2:38:25<26:07:22, 3.60it/s] 9%|▉ | 32709/371472 [2:38:25<26:16:48, 3.58it/s] 9%|▉ | 32710/371472 [2:38:25<26:44:02, 3.52it/s] 9%|▉ | 32711/371472 [2:38:26<28:40:35, 3.28it/s] 9%|▉ | 32712/371472 [2:38:26<27:24:22, 3.43it/s] 9%|▉ | 32713/371472 [2:38:26<29:50:03, 3.15it/s] 9%|▉ | 32714/371472 [2:38:27<28:03:44, 3.35it/s] 9%|▉ | 32715/371472 [2:38:27<38:32:43, 2.44it/s] 9%|▉ | 32716/371472 [2:38:28<36:43:39, 2.56it/s] 9%|▉ | 32717/371472 [2:38:28<34:53:13, 2.70it/s] 9%|▉ | 32718/371472 [2:38:28<31:55:55, 2.95it/s] 9%|▉ | 32719/371472 [2:38:28<30:04:55, 3.13it/s] 9%|▉ | 32720/371472 [2:38:29<28:42:31, 3.28it/s] {'loss': 6.1901, 'learning_rate': 9.21168307871318e-07, 'epoch': 1.41} + 9%|▉ | 32720/371472 [2:38:29<28:42:31, 3.28it/s] 9%|▉ | 32721/371472 [2:38:29<27:48:53, 3.38it/s] 9%|▉ | 32722/371472 [2:38:29<27:46:52, 3.39it/s] 9%|▉ | 32723/371472 [2:38:30<27:11:32, 3.46it/s] 9%|▉ | 32724/371472 [2:38:30<26:58:21, 3.49it/s] 9%|▉ | 32725/371472 [2:38:30<26:07:48, 3.60it/s] 9%|▉ | 32726/371472 [2:38:30<26:20:27, 3.57it/s] 9%|▉ | 32727/371472 [2:38:31<26:59:46, 3.49it/s] 9%|▉ | 32728/371472 [2:38:31<29:04:43, 3.24it/s] 9%|▉ | 32729/371472 [2:38:31<28:03:55, 3.35it/s] 9%|▉ | 32730/371472 [2:38:32<26:54:51, 3.50it/s] 9%|▉ | 32731/371472 [2:38:32<29:16:33, 3.21it/s] 9%|▉ | 32732/371472 [2:38:32<28:17:39, 3.33it/s] 9%|▉ | 32733/371472 [2:38:33<28:02:34, 3.36it/s] 9%|▉ | 32734/371472 [2:38:33<30:31:16, 3.08it/s] 9%|▉ | 32735/371472 [2:38:33<29:19:34, 3.21it/s] 9%|▉ | 32736/371472 [2:38:34<29:13:42, 3.22it/s] 9%|▉ | 32737/371472 [2:38:34<27:37:10, 3.41it/s] 9%|▉ | 32738/371472 [2:38:34<27:57:49, 3.36it/s] 9%|▉ | 32739/371472 [2:38:34<26:50:43, 3.50it/s] 9%|▉ | 32740/371472 [2:38:35<26:19:39, 3.57it/s] {'loss': 6.1892, 'learning_rate': 9.211198258958391e-07, 'epoch': 1.41} + 9%|▉ | 32740/371472 [2:38:35<26:19:39, 3.57it/s] 9%|▉ | 32741/371472 [2:38:35<25:44:39, 3.65it/s] 9%|▉ | 32742/371472 [2:38:35<25:22:44, 3.71it/s] 9%|▉ | 32743/371472 [2:38:35<25:47:41, 3.65it/s] 9%|▉ | 32744/371472 [2:38:36<28:41:52, 3.28it/s] 9%|▉ | 32745/371472 [2:38:36<28:41:25, 3.28it/s] 9%|▉ | 32746/371472 [2:38:36<27:47:00, 3.39it/s] 9%|▉ | 32747/371472 [2:38:37<28:12:36, 3.34it/s] 9%|▉ | 32748/371472 [2:38:37<26:30:38, 3.55it/s] 9%|▉ | 32749/371472 [2:38:37<27:03:50, 3.48it/s] 9%|▉ | 32750/371472 [2:38:38<27:20:09, 3.44it/s] 9%|▉ | 32751/371472 [2:38:38<26:02:02, 3.61it/s] 9%|▉ | 32752/371472 [2:38:38<29:39:32, 3.17it/s] 9%|▉ | 32753/371472 [2:38:38<27:59:50, 3.36it/s] 9%|▉ | 32754/371472 [2:38:39<28:09:11, 3.34it/s] 9%|▉ | 32755/371472 [2:38:39<27:21:02, 3.44it/s] 9%|▉ | 32756/371472 [2:38:39<27:05:25, 3.47it/s] 9%|▉ | 32757/371472 [2:38:40<26:06:36, 3.60it/s] 9%|▉ | 32758/371472 [2:38:40<25:04:15, 3.75it/s] 9%|▉ | 32759/371472 [2:38:40<24:51:44, 3.78it/s] 9%|▉ | 32760/371472 [2:38:40<25:30:12, 3.69it/s] {'loss': 6.4323, 'learning_rate': 9.210713439203602e-07, 'epoch': 1.41} + 9%|▉ | 32760/371472 [2:38:40<25:30:12, 3.69it/s] 9%|▉ | 32761/371472 [2:38:41<25:43:23, 3.66it/s] 9%|▉ | 32762/371472 [2:38:41<26:34:05, 3.54it/s] 9%|▉ | 32763/371472 [2:38:41<26:54:38, 3.50it/s] 9%|▉ | 32764/371472 [2:38:41<27:24:58, 3.43it/s] 9%|▉ | 32765/371472 [2:38:42<26:43:37, 3.52it/s] 9%|▉ | 32766/371472 [2:38:42<26:10:26, 3.59it/s] 9%|▉ | 32767/371472 [2:38:42<25:51:33, 3.64it/s] 9%|▉ | 32768/371472 [2:38:43<26:12:53, 3.59it/s] 9%|▉ | 32769/371472 [2:38:43<28:04:42, 3.35it/s] 9%|▉ | 32770/371472 [2:38:43<29:44:22, 3.16it/s] 9%|▉ | 32771/371472 [2:38:44<29:04:46, 3.24it/s] 9%|▉ | 32772/371472 [2:38:44<28:04:29, 3.35it/s] 9%|▉ | 32773/371472 [2:38:44<28:54:57, 3.25it/s] 9%|▉ | 32774/371472 [2:38:44<28:18:49, 3.32it/s] 9%|▉ | 32775/371472 [2:38:45<27:47:15, 3.39it/s] 9%|▉ | 32776/371472 [2:38:45<27:43:21, 3.39it/s] 9%|▉ | 32777/371472 [2:38:45<26:42:38, 3.52it/s] 9%|▉ | 32778/371472 [2:38:46<26:23:11, 3.57it/s] 9%|▉ | 32779/371472 [2:38:46<26:45:03, 3.52it/s] 9%|▉ | 32780/371472 [2:38:46<27:00:32, 3.48it/s] {'loss': 6.4229, 'learning_rate': 9.210228619448813e-07, 'epoch': 1.41} + 9%|▉ | 32780/371472 [2:38:46<27:00:32, 3.48it/s] 9%|▉ | 32781/371472 [2:38:46<26:18:00, 3.58it/s] 9%|▉ | 32782/371472 [2:38:47<25:31:09, 3.69it/s] 9%|▉ | 32783/371472 [2:38:47<25:02:19, 3.76it/s] 9%|▉ | 32784/371472 [2:38:47<25:53:07, 3.63it/s] 9%|▉ | 32785/371472 [2:38:47<25:02:15, 3.76it/s] 9%|▉ | 32786/371472 [2:38:48<25:56:21, 3.63it/s] 9%|▉ | 32787/371472 [2:38:48<26:09:59, 3.60it/s] 9%|▉ | 32788/371472 [2:38:48<25:20:55, 3.71it/s] 9%|▉ | 32789/371472 [2:38:49<26:07:36, 3.60it/s] 9%|▉ | 32790/371472 [2:38:49<25:26:37, 3.70it/s] 9%|▉ | 32791/371472 [2:38:49<27:14:26, 3.45it/s] 9%|▉ | 32792/371472 [2:38:49<27:33:31, 3.41it/s] 9%|▉ | 32793/371472 [2:38:50<27:53:11, 3.37it/s] 9%|▉ | 32794/371472 [2:38:50<27:44:13, 3.39it/s] 9%|▉ | 32795/371472 [2:38:50<27:19:10, 3.44it/s] 9%|▉ | 32796/371472 [2:38:51<27:14:31, 3.45it/s] 9%|▉ | 32797/371472 [2:38:51<27:53:58, 3.37it/s] 9%|▉ | 32798/371472 [2:38:51<26:53:42, 3.50it/s] 9%|▉ | 32799/371472 [2:38:51<26:52:53, 3.50it/s] 9%|▉ | 32800/371472 [2:38:52<26:48:41, 3.51it/s] {'loss': 6.315, 'learning_rate': 9.209743799694025e-07, 'epoch': 1.41} + 9%|▉ | 32800/371472 [2:38:52<26:48:41, 3.51it/s] 9%|▉ | 32801/371472 [2:38:52<25:46:47, 3.65it/s] 9%|▉ | 32802/371472 [2:38:52<25:22:04, 3.71it/s] 9%|▉ | 32803/371472 [2:38:53<26:08:27, 3.60it/s] 9%|▉ | 32804/371472 [2:38:53<26:08:37, 3.60it/s] 9%|▉ | 32805/371472 [2:38:53<25:54:20, 3.63it/s] 9%|▉ | 32806/371472 [2:38:53<26:34:10, 3.54it/s] 9%|▉ | 32807/371472 [2:38:54<25:34:41, 3.68it/s] 9%|▉ | 32808/371472 [2:38:54<26:08:40, 3.60it/s] 9%|▉ | 32809/371472 [2:38:54<26:05:03, 3.61it/s] 9%|▉ | 32810/371472 [2:38:55<26:06:10, 3.60it/s] 9%|▉ | 32811/371472 [2:38:55<26:01:14, 3.62it/s] 9%|▉ | 32812/371472 [2:38:55<25:04:44, 3.75it/s] 9%|▉ | 32813/371472 [2:38:55<25:02:56, 3.76it/s] 9%|▉ | 32814/371472 [2:38:56<26:33:45, 3.54it/s] 9%|▉ | 32815/371472 [2:38:56<29:58:01, 3.14it/s] 9%|▉ | 32816/371472 [2:38:56<30:07:24, 3.12it/s] 9%|▉ | 32817/371472 [2:38:57<29:12:46, 3.22it/s] 9%|▉ | 32818/371472 [2:38:57<28:03:15, 3.35it/s] 9%|▉ | 32819/371472 [2:38:57<29:47:30, 3.16it/s] 9%|▉ | 32820/371472 [2:38:58<28:59:11, 3.25it/s] {'loss': 6.5691, 'learning_rate': 9.209258979939236e-07, 'epoch': 1.41} + 9%|▉ | 32820/371472 [2:38:58<28:59:11, 3.25it/s] 9%|▉ | 32821/371472 [2:38:58<28:38:39, 3.28it/s] 9%|▉ | 32822/371472 [2:38:58<27:09:23, 3.46it/s] 9%|▉ | 32823/371472 [2:38:58<26:46:38, 3.51it/s] 9%|▉ | 32824/371472 [2:38:59<26:03:03, 3.61it/s] 9%|▉ | 32825/371472 [2:38:59<25:46:36, 3.65it/s] 9%|▉ | 32826/371472 [2:38:59<25:45:07, 3.65it/s] 9%|▉ | 32827/371472 [2:38:59<26:40:47, 3.53it/s] 9%|▉ | 32828/371472 [2:39:00<27:57:50, 3.36it/s] 9%|▉ | 32829/371472 [2:39:00<28:35:11, 3.29it/s] 9%|▉ | 32830/371472 [2:39:00<28:51:30, 3.26it/s] 9%|▉ | 32831/371472 [2:39:01<27:36:55, 3.41it/s] 9%|▉ | 32832/371472 [2:39:01<27:07:59, 3.47it/s] 9%|▉ | 32833/371472 [2:39:01<27:43:15, 3.39it/s] 9%|▉ | 32834/371472 [2:39:02<27:00:21, 3.48it/s] 9%|▉ | 32835/371472 [2:39:02<26:14:51, 3.58it/s] 9%|▉ | 32836/371472 [2:39:02<28:28:58, 3.30it/s] 9%|▉ | 32837/371472 [2:39:02<27:19:56, 3.44it/s] 9%|▉ | 32838/371472 [2:39:03<26:39:42, 3.53it/s] 9%|▉ | 32839/371472 [2:39:03<26:21:28, 3.57it/s] 9%|▉ | 32840/371472 [2:39:03<26:24:32, 3.56it/s] {'loss': 6.3881, 'learning_rate': 9.208774160184446e-07, 'epoch': 1.41} + 9%|▉ | 32840/371472 [2:39:03<26:24:32, 3.56it/s] 9%|▉ | 32841/371472 [2:39:04<25:51:52, 3.64it/s] 9%|▉ | 32842/371472 [2:39:04<25:27:04, 3.70it/s] 9%|▉ | 32843/371472 [2:39:04<25:12:22, 3.73it/s] 9%|▉ | 32844/371472 [2:39:04<24:54:56, 3.78it/s] 9%|▉ | 32845/371472 [2:39:05<25:12:30, 3.73it/s] 9%|▉ | 32846/371472 [2:39:05<26:54:44, 3.50it/s] 9%|▉ | 32847/371472 [2:39:05<26:41:10, 3.52it/s] 9%|▉ | 32848/371472 [2:39:06<28:17:32, 3.32it/s] 9%|▉ | 32849/371472 [2:39:06<27:19:32, 3.44it/s] 9%|▉ | 32850/371472 [2:39:06<26:48:20, 3.51it/s] 9%|▉ | 32851/371472 [2:39:06<26:09:16, 3.60it/s] 9%|▉ | 32852/371472 [2:39:07<25:31:41, 3.68it/s] 9%|▉ | 32853/371472 [2:39:07<25:47:51, 3.65it/s] 9%|▉ | 32854/371472 [2:39:07<25:00:03, 3.76it/s] 9%|▉ | 32855/371472 [2:39:07<25:57:03, 3.62it/s] 9%|▉ | 32856/371472 [2:39:08<25:51:32, 3.64it/s] 9%|▉ | 32857/371472 [2:39:08<25:18:16, 3.72it/s] 9%|▉ | 32858/371472 [2:39:08<27:06:20, 3.47it/s] 9%|▉ | 32859/371472 [2:39:09<26:49:39, 3.51it/s] 9%|▉ | 32860/371472 [2:39:09<27:34:58, 3.41it/s] {'loss': 6.201, 'learning_rate': 9.208289340429657e-07, 'epoch': 1.42} + 9%|▉ | 32860/371472 [2:39:09<27:34:58, 3.41it/s] 9%|▉ | 32861/371472 [2:39:09<27:23:49, 3.43it/s] 9%|▉ | 32862/371472 [2:39:09<27:11:29, 3.46it/s] 9%|▉ | 32863/371472 [2:39:10<27:44:49, 3.39it/s] 9%|▉ | 32864/371472 [2:39:10<28:15:59, 3.33it/s] 9%|▉ | 32865/371472 [2:39:10<28:09:26, 3.34it/s] 9%|▉ | 32866/371472 [2:39:11<29:38:27, 3.17it/s] 9%|▉ | 32867/371472 [2:39:11<28:06:30, 3.35it/s] 9%|▉ | 32868/371472 [2:39:11<27:02:44, 3.48it/s] 9%|▉ | 32869/371472 [2:39:11<26:14:43, 3.58it/s] 9%|▉ | 32870/371472 [2:39:12<26:41:24, 3.52it/s] 9%|▉ | 32871/371472 [2:39:12<26:52:08, 3.50it/s] 9%|▉ | 32872/371472 [2:39:12<27:02:39, 3.48it/s] 9%|▉ | 32873/371472 [2:39:13<26:32:56, 3.54it/s] 9%|▉ | 32874/371472 [2:39:13<25:34:40, 3.68it/s] 9%|▉ | 32875/371472 [2:39:13<26:08:40, 3.60it/s] 9%|▉ | 32876/371472 [2:39:13<26:06:07, 3.60it/s] 9%|▉ | 32877/371472 [2:39:14<26:26:11, 3.56it/s] 9%|▉ | 32878/371472 [2:39:14<26:05:16, 3.61it/s] 9%|▉ | 32879/371472 [2:39:14<28:01:03, 3.36it/s] 9%|▉ | 32880/371472 [2:39:15<27:06:19, 3.47it/s] {'loss': 6.5495, 'learning_rate': 9.207804520674869e-07, 'epoch': 1.42} + 9%|▉ | 32880/371472 [2:39:15<27:06:19, 3.47it/s] 9%|▉ | 32881/371472 [2:39:15<29:07:18, 3.23it/s] 9%|▉ | 32882/371472 [2:39:15<27:54:44, 3.37it/s] 9%|▉ | 32883/371472 [2:39:16<26:49:39, 3.51it/s] 9%|▉ | 32884/371472 [2:39:16<26:22:56, 3.56it/s] 9%|▉ | 32885/371472 [2:39:16<26:19:59, 3.57it/s] 9%|▉ | 32886/371472 [2:39:16<27:10:09, 3.46it/s] 9%|▉ | 32887/371472 [2:39:17<26:16:26, 3.58it/s] 9%|▉ | 32888/371472 [2:39:17<25:46:47, 3.65it/s] 9%|▉ | 32889/371472 [2:39:17<25:13:06, 3.73it/s] 9%|▉ | 32890/371472 [2:39:17<25:19:29, 3.71it/s] 9%|▉ | 32891/371472 [2:39:18<26:45:04, 3.52it/s] 9%|▉ | 32892/371472 [2:39:18<26:49:08, 3.51it/s] 9%|▉ | 32893/371472 [2:39:18<26:35:32, 3.54it/s] 9%|▉ | 32894/371472 [2:39:19<26:34:30, 3.54it/s] 9%|▉ | 32895/371472 [2:39:19<28:25:29, 3.31it/s] 9%|▉ | 32896/371472 [2:39:19<26:54:44, 3.49it/s] 9%|▉ | 32897/371472 [2:39:19<27:52:54, 3.37it/s] 9%|▉ | 32898/371472 [2:39:20<28:35:09, 3.29it/s] 9%|▉ | 32899/371472 [2:39:20<27:51:21, 3.38it/s] 9%|▉ | 32900/371472 [2:39:20<27:21:11, 3.44it/s] {'loss': 6.2721, 'learning_rate': 9.20731970092008e-07, 'epoch': 1.42} + 9%|▉ | 32900/371472 [2:39:20<27:21:11, 3.44it/s] 9%|▉ | 32901/371472 [2:39:21<26:54:17, 3.50it/s] 9%|▉ | 32902/371472 [2:39:21<28:53:31, 3.26it/s] 9%|▉ | 32903/371472 [2:39:21<30:34:04, 3.08it/s] 9%|▉ | 32904/371472 [2:39:22<29:01:14, 3.24it/s] 9%|▉ | 32905/371472 [2:39:22<28:21:09, 3.32it/s] 9%|▉ | 32906/371472 [2:39:22<27:33:26, 3.41it/s] 9%|▉ | 32907/371472 [2:39:22<26:39:04, 3.53it/s] 9%|▉ | 32908/371472 [2:39:23<26:13:49, 3.59it/s] 9%|▉ | 32909/371472 [2:39:23<28:08:04, 3.34it/s] 9%|▉ | 32910/371472 [2:39:23<27:33:50, 3.41it/s] 9%|▉ | 32911/371472 [2:39:24<26:41:43, 3.52it/s] 9%|▉ | 32912/371472 [2:39:24<27:11:00, 3.46it/s] 9%|▉ | 32913/371472 [2:39:24<27:42:48, 3.39it/s] 9%|▉ | 32914/371472 [2:39:24<27:13:24, 3.45it/s] 9%|▉ | 32915/371472 [2:39:25<27:53:26, 3.37it/s] 9%|▉ | 32916/371472 [2:39:25<27:34:54, 3.41it/s] 9%|▉ | 32917/371472 [2:39:25<27:38:43, 3.40it/s] 9%|▉ | 32918/371472 [2:39:26<26:45:43, 3.51it/s] 9%|▉ | 32919/371472 [2:39:26<26:28:30, 3.55it/s] 9%|▉ | 32920/371472 [2:39:26<25:55:26, 3.63it/s] {'loss': 6.8134, 'learning_rate': 9.206834881165291e-07, 'epoch': 1.42} + 9%|▉ | 32920/371472 [2:39:26<25:55:26, 3.63it/s] 9%|▉ | 32921/371472 [2:39:26<25:16:51, 3.72it/s] 9%|▉ | 32922/371472 [2:39:27<26:34:44, 3.54it/s] 9%|▉ | 32923/371472 [2:39:27<26:59:10, 3.48it/s] 9%|▉ | 32924/371472 [2:39:27<26:17:49, 3.58it/s] 9%|▉ | 32925/371472 [2:39:28<26:14:50, 3.58it/s] 9%|▉ | 32926/371472 [2:39:28<25:53:29, 3.63it/s] 9%|▉ | 32927/371472 [2:39:28<24:51:04, 3.78it/s] 9%|▉ | 32928/371472 [2:39:28<25:21:56, 3.71it/s] 9%|▉ | 32929/371472 [2:39:29<26:19:50, 3.57it/s] 9%|▉ | 32930/371472 [2:39:29<26:05:53, 3.60it/s] 9%|▉ | 32931/371472 [2:39:29<25:50:46, 3.64it/s] 9%|▉ | 32932/371472 [2:39:30<25:57:34, 3.62it/s] 9%|▉ | 32933/371472 [2:39:30<26:08:13, 3.60it/s] 9%|▉ | 32934/371472 [2:39:30<27:49:12, 3.38it/s] 9%|▉ | 32935/371472 [2:39:30<27:13:39, 3.45it/s] 9%|▉ | 32936/371472 [2:39:31<27:45:36, 3.39it/s] 9%|▉ | 32937/371472 [2:39:31<27:05:53, 3.47it/s] 9%|▉ | 32938/371472 [2:39:31<27:23:43, 3.43it/s] 9%|▉ | 32939/371472 [2:39:32<27:38:52, 3.40it/s] 9%|▉ | 32940/371472 [2:39:32<27:00:01, 3.48it/s] {'loss': 6.4974, 'learning_rate': 9.206350061410502e-07, 'epoch': 1.42} + 9%|▉ | 32940/371472 [2:39:32<27:00:01, 3.48it/s] 9%|▉ | 32941/371472 [2:39:32<27:07:27, 3.47it/s] 9%|▉ | 32942/371472 [2:39:32<26:41:32, 3.52it/s] 9%|▉ | 32943/371472 [2:39:33<25:58:44, 3.62it/s] 9%|▉ | 32944/371472 [2:39:33<25:31:54, 3.68it/s] 9%|▉ | 32945/371472 [2:39:33<26:13:25, 3.59it/s] 9%|▉ | 32946/371472 [2:39:34<28:15:30, 3.33it/s] 9%|▉ | 32947/371472 [2:39:34<28:20:16, 3.32it/s] 9%|▉ | 32948/371472 [2:39:34<28:36:48, 3.29it/s] 9%|▉ | 32949/371472 [2:39:34<27:39:36, 3.40it/s] 9%|▉ | 32950/371472 [2:39:35<26:53:21, 3.50it/s] 9%|▉ | 32951/371472 [2:39:35<28:59:06, 3.24it/s] 9%|▉ | 32952/371472 [2:39:35<27:28:43, 3.42it/s] 9%|▉ | 32953/371472 [2:39:36<28:22:28, 3.31it/s] 9%|▉ | 32954/371472 [2:39:36<26:51:20, 3.50it/s] 9%|▉ | 32955/371472 [2:39:36<28:25:22, 3.31it/s] 9%|▉ | 32956/371472 [2:39:37<27:09:51, 3.46it/s] 9%|▉ | 32957/371472 [2:39:37<26:39:30, 3.53it/s] 9%|▉ | 32958/371472 [2:39:37<29:28:30, 3.19it/s] 9%|▉ | 32959/371472 [2:39:37<28:39:40, 3.28it/s] 9%|▉ | 32960/371472 [2:39:38<28:14:30, 3.33it/s] {'loss': 6.2932, 'learning_rate': 9.205865241655713e-07, 'epoch': 1.42} + 9%|▉ | 32960/371472 [2:39:38<28:14:30, 3.33it/s] 9%|▉ | 32961/371472 [2:39:38<27:31:58, 3.42it/s] 9%|▉ | 32962/371472 [2:39:38<27:31:14, 3.42it/s] 9%|▉ | 32963/371472 [2:39:39<26:42:05, 3.52it/s] 9%|▉ | 32964/371472 [2:39:39<25:39:45, 3.66it/s] 9%|▉ | 32965/371472 [2:39:39<25:35:46, 3.67it/s] 9%|▉ | 32966/371472 [2:39:39<26:14:37, 3.58it/s] 9%|▉ | 32967/371472 [2:39:40<25:44:47, 3.65it/s] 9%|▉ | 32968/371472 [2:39:40<25:11:24, 3.73it/s] 9%|▉ | 32969/371472 [2:39:40<24:45:02, 3.80it/s] 9%|▉ | 32970/371472 [2:39:40<24:39:42, 3.81it/s] 9%|▉ | 32971/371472 [2:39:41<26:03:20, 3.61it/s] 9%|▉ | 32972/371472 [2:39:41<26:12:23, 3.59it/s] 9%|▉ | 32973/371472 [2:39:41<27:27:27, 3.42it/s] 9%|▉ | 32974/371472 [2:39:42<26:02:28, 3.61it/s] 9%|▉ | 32975/371472 [2:39:42<25:47:20, 3.65it/s] 9%|▉ | 32976/371472 [2:39:42<25:08:01, 3.74it/s] 9%|▉ | 32977/371472 [2:39:42<24:59:24, 3.76it/s] 9%|▉ | 32978/371472 [2:39:43<24:59:18, 3.76it/s] 9%|▉ | 32979/371472 [2:39:43<25:31:10, 3.68it/s] 9%|▉ | 32980/371472 [2:39:43<27:03:15, 3.48it/s] {'loss': 6.1613, 'learning_rate': 9.205380421900924e-07, 'epoch': 1.42} + 9%|▉ | 32980/371472 [2:39:43<27:03:15, 3.48it/s] 9%|▉ | 32981/371472 [2:39:44<31:08:08, 3.02it/s] 9%|▉ | 32982/371472 [2:39:44<29:41:18, 3.17it/s] 9%|▉ | 32983/371472 [2:39:44<28:19:48, 3.32it/s] 9%|▉ | 32984/371472 [2:39:44<27:13:13, 3.45it/s] 9%|▉ | 32985/371472 [2:39:45<27:28:40, 3.42it/s] 9%|▉ | 32986/371472 [2:39:45<26:31:34, 3.54it/s] 9%|▉ | 32987/371472 [2:39:45<27:18:10, 3.44it/s] 9%|▉ | 32988/371472 [2:39:46<30:54:29, 3.04it/s] 9%|▉ | 32989/371472 [2:39:46<29:26:42, 3.19it/s] 9%|▉ | 32990/371472 [2:39:46<29:58:54, 3.14it/s] 9%|▉ | 32991/371472 [2:39:47<29:08:32, 3.23it/s] 9%|▉ | 32992/371472 [2:39:47<28:02:34, 3.35it/s] 9%|▉ | 32993/371472 [2:39:47<28:00:39, 3.36it/s] 9%|▉ | 32994/371472 [2:39:47<27:04:48, 3.47it/s] 9%|▉ | 32995/371472 [2:39:48<26:49:02, 3.51it/s] 9%|▉ | 32996/371472 [2:39:48<28:02:11, 3.35it/s] 9%|▉ | 32997/371472 [2:39:48<27:37:47, 3.40it/s] 9%|▉ | 32998/371472 [2:39:49<26:45:39, 3.51it/s] 9%|▉ | 32999/371472 [2:39:49<26:41:49, 3.52it/s] 9%|▉ | 33000/371472 [2:39:49<26:14:53, 3.58it/s] {'loss': 6.4778, 'learning_rate': 9.204895602146135e-07, 'epoch': 1.42} + 9%|▉ | 33000/371472 [2:39:49<26:14:53, 3.58it/s] 9%|▉ | 33001/371472 [2:39:50<27:28:33, 3.42it/s] 9%|▉ | 33002/371472 [2:39:50<28:25:25, 3.31it/s] 9%|▉ | 33003/371472 [2:39:50<28:07:41, 3.34it/s] 9%|▉ | 33004/371472 [2:39:50<27:23:39, 3.43it/s] 9%|▉ | 33005/371472 [2:39:51<26:53:33, 3.50it/s] 9%|▉ | 33006/371472 [2:39:51<25:41:07, 3.66it/s] 9%|▉ | 33007/371472 [2:39:51<26:35:53, 3.53it/s] 9%|▉ | 33008/371472 [2:39:52<26:14:55, 3.58it/s] 9%|▉ | 33009/371472 [2:39:52<27:23:42, 3.43it/s] 9%|▉ | 33010/371472 [2:39:52<26:44:25, 3.52it/s] 9%|▉ | 33011/371472 [2:39:52<25:47:43, 3.64it/s] 9%|▉ | 33012/371472 [2:39:53<26:34:02, 3.54it/s] 9%|▉ | 33013/371472 [2:39:53<25:47:03, 3.65it/s] 9%|▉ | 33014/371472 [2:39:53<25:25:31, 3.70it/s] 9%|▉ | 33015/371472 [2:39:54<27:31:17, 3.42it/s] 9%|▉ | 33016/371472 [2:39:54<26:54:05, 3.49it/s] 9%|▉ | 33017/371472 [2:39:54<26:42:20, 3.52it/s] 9%|▉ | 33018/371472 [2:39:54<28:57:40, 3.25it/s] 9%|▉ | 33019/371472 [2:39:55<27:43:52, 3.39it/s] 9%|▉ | 33020/371472 [2:39:55<26:35:47, 3.53it/s] {'loss': 6.3327, 'learning_rate': 9.204410782391346e-07, 'epoch': 1.42} + 9%|▉ | 33020/371472 [2:39:55<26:35:47, 3.53it/s] 9%|▉ | 33021/371472 [2:39:55<26:03:34, 3.61it/s] 9%|▉ | 33022/371472 [2:39:55<25:24:14, 3.70it/s] 9%|▉ | 33023/371472 [2:39:56<25:10:11, 3.74it/s] 9%|▉ | 33024/371472 [2:39:56<27:40:57, 3.40it/s] 9%|▉ | 33025/371472 [2:39:56<28:36:00, 3.29it/s] 9%|▉ | 33026/371472 [2:39:57<28:01:05, 3.36it/s] 9%|▉ | 33027/371472 [2:39:57<28:05:06, 3.35it/s] 9%|▉ | 33028/371472 [2:39:57<27:33:04, 3.41it/s] 9%|▉ | 33029/371472 [2:39:58<26:33:07, 3.54it/s] 9%|▉ | 33030/371472 [2:39:58<25:25:56, 3.70it/s] 9%|▉ | 33031/371472 [2:39:58<31:53:31, 2.95it/s] 9%|▉ | 33032/371472 [2:39:59<31:13:07, 3.01it/s] 9%|▉ | 33033/371472 [2:39:59<29:22:55, 3.20it/s] 9%|▉ | 33034/371472 [2:39:59<28:36:49, 3.29it/s] 9%|▉ | 33035/371472 [2:39:59<28:13:17, 3.33it/s] 9%|▉ | 33036/371472 [2:40:00<27:38:43, 3.40it/s] 9%|▉ | 33037/371472 [2:40:00<28:04:32, 3.35it/s] 9%|▉ | 33038/371472 [2:40:00<27:18:43, 3.44it/s] 9%|▉ | 33039/371472 [2:40:01<29:16:49, 3.21it/s] 9%|▉ | 33040/371472 [2:40:01<28:34:01, 3.29it/s] {'loss': 6.2487, 'learning_rate': 9.203925962636556e-07, 'epoch': 1.42} + 9%|▉ | 33040/371472 [2:40:01<28:34:01, 3.29it/s] 9%|▉ | 33041/371472 [2:40:01<27:30:18, 3.42it/s] 9%|▉ | 33042/371472 [2:40:01<26:29:42, 3.55it/s] 9%|▉ | 33043/371472 [2:40:02<27:13:41, 3.45it/s] 9%|▉ | 33044/371472 [2:40:02<27:03:09, 3.47it/s] 9%|▉ | 33045/371472 [2:40:02<27:49:07, 3.38it/s] 9%|▉ | 33046/371472 [2:40:03<27:21:22, 3.44it/s] 9%|▉ | 33047/371472 [2:40:03<27:47:02, 3.38it/s] 9%|▉ | 33048/371472 [2:40:03<28:57:25, 3.25it/s] 9%|▉ | 33049/371472 [2:40:04<29:48:31, 3.15it/s] 9%|▉ | 33050/371472 [2:40:04<30:07:34, 3.12it/s] 9%|▉ | 33051/371472 [2:40:04<29:28:35, 3.19it/s] 9%|▉ | 33052/371472 [2:40:05<28:27:58, 3.30it/s] 9%|▉ | 33053/371472 [2:40:05<28:37:05, 3.28it/s] 9%|▉ | 33054/371472 [2:40:05<27:38:29, 3.40it/s] 9%|▉ | 33055/371472 [2:40:05<27:01:36, 3.48it/s] 9%|▉ | 33056/371472 [2:40:06<27:55:23, 3.37it/s] 9%|▉ | 33057/371472 [2:40:06<27:16:17, 3.45it/s] 9%|▉ | 33058/371472 [2:40:06<29:18:46, 3.21it/s] 9%|▉ | 33059/371472 [2:40:07<28:12:20, 3.33it/s] 9%|▉ | 33060/371472 [2:40:07<27:21:52, 3.44it/s] {'loss': 6.2188, 'learning_rate': 9.203441142881769e-07, 'epoch': 1.42} + 9%|▉ | 33060/371472 [2:40:07<27:21:52, 3.44it/s] 9%|▉ | 33061/371472 [2:40:07<26:30:43, 3.55it/s] 9%|▉ | 33062/371472 [2:40:07<26:13:20, 3.58it/s] 9%|▉ | 33063/371472 [2:40:08<26:31:21, 3.54it/s] 9%|▉ | 33064/371472 [2:40:08<25:58:42, 3.62it/s] 9%|▉ | 33065/371472 [2:40:08<25:36:04, 3.67it/s] 9%|▉ | 33066/371472 [2:40:09<28:11:57, 3.33it/s] 9%|▉ | 33067/371472 [2:40:09<32:11:26, 2.92it/s] 9%|▉ | 33068/371472 [2:40:09<30:23:19, 3.09it/s] 9%|▉ | 33069/371472 [2:40:10<30:37:32, 3.07it/s] 9%|▉ | 33070/371472 [2:40:10<29:19:23, 3.21it/s] 9%|▉ | 33071/371472 [2:40:10<28:43:15, 3.27it/s] 9%|▉ | 33072/371472 [2:40:11<30:59:04, 3.03it/s] 9%|▉ | 33073/371472 [2:40:11<32:33:07, 2.89it/s] 9%|▉ | 33074/371472 [2:40:11<29:49:23, 3.15it/s] 9%|▉ | 33075/371472 [2:40:12<28:29:33, 3.30it/s] 9%|▉ | 33076/371472 [2:40:12<27:34:47, 3.41it/s] 9%|▉ | 33077/371472 [2:40:12<27:45:23, 3.39it/s] 9%|▉ | 33078/371472 [2:40:12<28:36:11, 3.29it/s] 9%|▉ | 33079/371472 [2:40:13<27:10:01, 3.46it/s] 9%|▉ | 33080/371472 [2:40:13<26:43:38, 3.52it/s] {'loss': 6.5313, 'learning_rate': 9.20295632312698e-07, 'epoch': 1.42} + 9%|▉ | 33080/371472 [2:40:13<26:43:38, 3.52it/s] 9%|▉ | 33081/371472 [2:40:13<30:27:58, 3.09it/s] 9%|▉ | 33082/371472 [2:40:14<30:00:56, 3.13it/s] 9%|▉ | 33083/371472 [2:40:14<29:23:35, 3.20it/s] 9%|▉ | 33084/371472 [2:40:14<30:28:36, 3.08it/s] 9%|▉ | 33085/371472 [2:40:15<29:10:59, 3.22it/s] 9%|▉ | 33086/371472 [2:40:15<28:44:23, 3.27it/s] 9%|▉ | 33087/371472 [2:40:15<27:25:27, 3.43it/s] 9%|▉ | 33088/371472 [2:40:15<29:24:23, 3.20it/s] 9%|▉ | 33089/371472 [2:40:16<28:22:09, 3.31it/s] 9%|▉ | 33090/371472 [2:40:16<27:05:15, 3.47it/s] 9%|▉ | 33091/371472 [2:40:16<27:05:14, 3.47it/s] 9%|▉ | 33092/371472 [2:40:17<27:07:56, 3.46it/s] 9%|▉ | 33093/371472 [2:40:17<26:46:19, 3.51it/s] 9%|▉ | 33094/371472 [2:40:17<26:43:44, 3.52it/s] 9%|▉ | 33095/371472 [2:40:17<26:29:20, 3.55it/s] 9%|▉ | 33096/371472 [2:40:18<26:26:08, 3.56it/s] 9%|▉ | 33097/371472 [2:40:18<26:34:02, 3.54it/s] 9%|▉ | 33098/371472 [2:40:18<26:40:22, 3.52it/s] 9%|▉ | 33099/371472 [2:40:19<27:14:22, 3.45it/s] 9%|▉ | 33100/371472 [2:40:19<27:42:40, 3.39it/s] {'loss': 6.3588, 'learning_rate': 9.20247150337219e-07, 'epoch': 1.43} + 9%|▉ | 33100/371472 [2:40:19<27:42:40, 3.39it/s] 9%|▉ | 33101/371472 [2:40:19<26:41:19, 3.52it/s] 9%|▉ | 33102/371472 [2:40:19<26:34:56, 3.54it/s] 9%|▉ | 33103/371472 [2:40:20<26:16:51, 3.58it/s] 9%|▉ | 33104/371472 [2:40:20<26:15:42, 3.58it/s] 9%|▉ | 33105/371472 [2:40:20<25:47:10, 3.64it/s] 9%|▉ | 33106/371472 [2:40:21<26:57:39, 3.49it/s] 9%|▉ | 33107/371472 [2:40:21<26:51:54, 3.50it/s] 9%|▉ | 33108/371472 [2:40:21<27:18:43, 3.44it/s] 9%|▉ | 33109/371472 [2:40:21<27:19:24, 3.44it/s] 9%|▉ | 33110/371472 [2:40:22<27:09:52, 3.46it/s] 9%|▉ | 33111/371472 [2:40:22<28:20:23, 3.32it/s] 9%|▉ | 33112/371472 [2:40:22<27:33:28, 3.41it/s] 9%|▉ | 33113/371472 [2:40:23<26:41:17, 3.52it/s] 9%|▉ | 33114/371472 [2:40:23<27:26:05, 3.43it/s] 9%|▉ | 33115/371472 [2:40:23<27:07:48, 3.46it/s] 9%|▉ | 33116/371472 [2:40:23<27:25:01, 3.43it/s] 9%|▉ | 33117/371472 [2:40:24<28:01:12, 3.35it/s] 9%|▉ | 33118/371472 [2:40:24<28:20:40, 3.32it/s] 9%|▉ | 33119/371472 [2:40:24<27:53:29, 3.37it/s] 9%|▉ | 33120/371472 [2:40:25<27:11:42, 3.46it/s] {'loss': 6.0301, 'learning_rate': 9.201986683617401e-07, 'epoch': 1.43} + 9%|▉ | 33120/371472 [2:40:25<27:11:42, 3.46it/s] 9%|▉ | 33121/371472 [2:40:25<25:58:23, 3.62it/s] 9%|▉ | 33122/371472 [2:40:25<26:29:51, 3.55it/s] 9%|▉ | 33123/371472 [2:40:25<25:34:10, 3.68it/s] 9%|▉ | 33124/371472 [2:40:26<26:45:25, 3.51it/s] 9%|▉ | 33125/371472 [2:40:26<27:07:49, 3.46it/s] 9%|▉ | 33126/371472 [2:40:26<27:23:51, 3.43it/s] 9%|▉ | 33127/371472 [2:40:27<26:38:30, 3.53it/s] 9%|▉ | 33128/371472 [2:40:27<27:00:45, 3.48it/s] 9%|▉ | 33129/371472 [2:40:27<27:22:02, 3.43it/s] 9%|▉ | 33130/371472 [2:40:28<30:10:12, 3.12it/s] 9%|▉ | 33131/371472 [2:40:28<30:01:28, 3.13it/s] 9%|▉ | 33132/371472 [2:40:28<28:39:41, 3.28it/s] 9%|▉ | 33133/371472 [2:40:29<29:34:27, 3.18it/s] 9%|▉ | 33134/371472 [2:40:29<28:17:20, 3.32it/s] 9%|▉ | 33135/371472 [2:40:29<28:37:17, 3.28it/s] 9%|▉ | 33136/371472 [2:40:29<28:19:49, 3.32it/s] 9%|▉ | 33137/371472 [2:40:30<28:39:39, 3.28it/s] 9%|▉ | 33138/371472 [2:40:30<28:43:02, 3.27it/s] 9%|▉ | 33139/371472 [2:40:30<31:49:34, 2.95it/s] 9%|▉ | 33140/371472 [2:40:31<31:40:14, 2.97it/s] {'loss': 6.2546, 'learning_rate': 9.201501863862613e-07, 'epoch': 1.43} + 9%|▉ | 33140/371472 [2:40:31<31:40:14, 2.97it/s] 9%|▉ | 33141/371472 [2:40:31<30:03:19, 3.13it/s] 9%|▉ | 33142/371472 [2:40:31<28:41:43, 3.28it/s] 9%|▉ | 33143/371472 [2:40:32<27:45:17, 3.39it/s] 9%|▉ | 33144/371472 [2:40:32<29:43:48, 3.16it/s] 9%|▉ | 33145/371472 [2:40:32<28:48:53, 3.26it/s] 9%|▉ | 33146/371472 [2:40:33<27:43:09, 3.39it/s] 9%|▉ | 33147/371472 [2:40:33<26:34:44, 3.54it/s] 9%|▉ | 33148/371472 [2:40:33<27:19:33, 3.44it/s] 9%|▉ | 33149/371472 [2:40:33<27:14:44, 3.45it/s] 9%|▉ | 33150/371472 [2:40:34<27:16:06, 3.45it/s] 9%|▉ | 33151/371472 [2:40:34<26:59:32, 3.48it/s] 9%|▉ | 33152/371472 [2:40:34<30:36:22, 3.07it/s] 9%|▉ | 33153/371472 [2:40:35<33:00:03, 2.85it/s] 9%|▉ | 33154/371472 [2:40:35<31:03:36, 3.03it/s] 9%|▉ | 33155/371472 [2:40:35<29:18:11, 3.21it/s] 9%|▉ | 33156/371472 [2:40:36<29:46:24, 3.16it/s] 9%|▉ | 33157/371472 [2:40:36<29:41:34, 3.16it/s] 9%|▉ | 33158/371472 [2:40:36<27:54:12, 3.37it/s] 9%|▉ | 33159/371472 [2:40:37<27:40:56, 3.39it/s] 9%|▉ | 33160/371472 [2:40:37<27:29:38, 3.42it/s] {'loss': 6.5632, 'learning_rate': 9.201017044107823e-07, 'epoch': 1.43} + 9%|▉ | 33160/371472 [2:40:37<27:29:38, 3.42it/s] 9%|▉ | 33161/371472 [2:40:37<28:03:39, 3.35it/s] 9%|▉ | 33162/371472 [2:40:37<28:22:13, 3.31it/s] 9%|▉ | 33163/371472 [2:40:38<29:52:47, 3.15it/s] 9%|▉ | 33164/371472 [2:40:38<31:26:16, 2.99it/s] 9%|▉ | 33165/371472 [2:40:38<30:32:58, 3.08it/s] 9%|▉ | 33166/371472 [2:40:39<30:50:18, 3.05it/s] 9%|▉ | 33167/371472 [2:40:39<29:26:11, 3.19it/s] 9%|▉ | 33168/371472 [2:40:39<30:14:41, 3.11it/s] 9%|▉ | 33169/371472 [2:40:40<29:16:14, 3.21it/s] 9%|▉ | 33170/371472 [2:40:40<28:08:24, 3.34it/s] 9%|▉ | 33171/371472 [2:40:40<28:46:56, 3.26it/s] 9%|▉ | 33172/371472 [2:40:41<28:44:17, 3.27it/s] 9%|▉ | 33173/371472 [2:40:41<30:12:16, 3.11it/s] 9%|▉ | 33174/371472 [2:40:41<28:12:16, 3.33it/s] 9%|▉ | 33175/371472 [2:40:42<29:06:50, 3.23it/s] 9%|▉ | 33176/371472 [2:40:42<28:01:49, 3.35it/s] 9%|▉ | 33177/371472 [2:40:42<27:18:25, 3.44it/s] 9%|▉ | 33178/371472 [2:40:42<26:06:17, 3.60it/s] 9%|▉ | 33179/371472 [2:40:43<25:18:04, 3.71it/s] 9%|▉ | 33180/371472 [2:40:43<28:28:38, 3.30it/s] {'loss': 6.2113, 'learning_rate': 9.200532224353035e-07, 'epoch': 1.43} + 9%|▉ | 33180/371472 [2:40:43<28:28:38, 3.30it/s] 9%|▉ | 33181/371472 [2:40:43<27:42:24, 3.39it/s] 9%|▉ | 33182/371472 [2:40:44<28:09:03, 3.34it/s] 9%|▉ | 33183/371472 [2:40:44<28:49:34, 3.26it/s] 9%|▉ | 33184/371472 [2:40:44<29:17:25, 3.21it/s] 9%|▉ | 33185/371472 [2:40:44<28:14:10, 3.33it/s] 9%|▉ | 33186/371472 [2:40:45<32:03:51, 2.93it/s] 9%|▉ | 33187/371472 [2:40:45<31:49:26, 2.95it/s] 9%|▉ | 33188/371472 [2:40:46<30:16:35, 3.10it/s] 9%|▉ | 33189/371472 [2:40:46<28:05:38, 3.34it/s] 9%|▉ | 33190/371472 [2:40:46<27:53:19, 3.37it/s] 9%|▉ | 33191/371472 [2:40:46<27:14:04, 3.45it/s] 9%|▉ | 33192/371472 [2:40:47<26:10:13, 3.59it/s] 9%|▉ | 33193/371472 [2:40:47<26:32:05, 3.54it/s] 9%|▉ | 33194/371472 [2:40:47<25:45:37, 3.65it/s] 9%|▉ | 33195/371472 [2:40:47<26:11:46, 3.59it/s] 9%|▉ | 33196/371472 [2:40:48<25:09:33, 3.73it/s] 9%|▉ | 33197/371472 [2:40:48<24:46:21, 3.79it/s] 9%|▉ | 33198/371472 [2:40:48<27:22:38, 3.43it/s] 9%|▉ | 33199/371472 [2:40:49<26:51:44, 3.50it/s] 9%|▉ | 33200/371472 [2:40:49<27:53:09, 3.37it/s] {'loss': 6.4124, 'learning_rate': 9.200047404598246e-07, 'epoch': 1.43} + 9%|▉ | 33200/371472 [2:40:49<27:53:09, 3.37it/s] 9%|▉ | 33201/371472 [2:40:49<29:01:14, 3.24it/s] 9%|▉ | 33202/371472 [2:40:49<27:49:16, 3.38it/s] 9%|▉ | 33203/371472 [2:40:50<36:49:54, 2.55it/s] 9%|▉ | 33204/371472 [2:40:50<33:16:53, 2.82it/s] 9%|▉ | 33205/371472 [2:40:51<31:25:30, 2.99it/s] 9%|▉ | 33206/371472 [2:40:51<29:24:50, 3.19it/s] 9%|▉ | 33207/371472 [2:40:51<27:42:01, 3.39it/s] 9%|▉ | 33208/371472 [2:40:51<27:15:00, 3.45it/s] 9%|▉ | 33209/371472 [2:40:52<26:15:20, 3.58it/s] 9%|▉ | 33210/371472 [2:40:52<27:54:45, 3.37it/s] 9%|▉ | 33211/371472 [2:40:52<28:05:17, 3.35it/s] 9%|▉ | 33212/371472 [2:40:53<27:21:57, 3.43it/s] 9%|▉ | 33213/371472 [2:40:53<27:24:10, 3.43it/s] 9%|▉ | 33214/371472 [2:40:53<28:04:38, 3.35it/s] 9%|▉ | 33215/371472 [2:40:53<27:27:19, 3.42it/s] 9%|▉ | 33216/371472 [2:40:54<31:43:11, 2.96it/s] 9%|▉ | 33217/371472 [2:40:54<30:02:46, 3.13it/s] 9%|▉ | 33218/371472 [2:40:54<29:02:08, 3.24it/s] 9%|▉ | 33219/371472 [2:40:55<28:33:50, 3.29it/s] 9%|▉ | 33220/371472 [2:40:55<28:18:52, 3.32it/s] {'loss': 6.4584, 'learning_rate': 9.199562584843456e-07, 'epoch': 1.43} + 9%|▉ | 33220/371472 [2:40:55<28:18:52, 3.32it/s] 9%|▉ | 33221/371472 [2:40:55<29:18:13, 3.21it/s] 9%|▉ | 33222/371472 [2:40:56<28:19:04, 3.32it/s] 9%|▉ | 33223/371472 [2:40:56<28:46:30, 3.27it/s] 9%|▉ | 33224/371472 [2:40:56<27:31:31, 3.41it/s] 9%|▉ | 33225/371472 [2:40:57<27:01:34, 3.48it/s] 9%|▉ | 33226/371472 [2:40:57<27:18:35, 3.44it/s] 9%|▉ | 33227/371472 [2:40:57<27:18:39, 3.44it/s] 9%|▉ | 33228/371472 [2:40:57<27:49:13, 3.38it/s] 9%|▉ | 33229/371472 [2:40:58<29:56:23, 3.14it/s] 9%|▉ | 33230/371472 [2:40:58<28:25:58, 3.30it/s] 9%|▉ | 33231/371472 [2:40:58<27:27:06, 3.42it/s] 9%|▉ | 33232/371472 [2:40:59<26:55:50, 3.49it/s] 9%|▉ | 33233/371472 [2:40:59<26:55:22, 3.49it/s] 9%|▉ | 33234/371472 [2:40:59<26:15:34, 3.58it/s] 9%|▉ | 33235/371472 [2:40:59<25:52:26, 3.63it/s] 9%|▉ | 33236/371472 [2:41:00<25:19:20, 3.71it/s] 9%|▉ | 33237/371472 [2:41:00<25:04:26, 3.75it/s] 9%|▉ | 33238/371472 [2:41:00<26:21:19, 3.56it/s] 9%|▉ | 33239/371472 [2:41:01<25:49:26, 3.64it/s] 9%|▉ | 33240/371472 [2:41:01<25:13:02, 3.73it/s] {'loss': 6.3494, 'learning_rate': 9.199077765088667e-07, 'epoch': 1.43} + 9%|▉ | 33240/371472 [2:41:01<25:13:02, 3.73it/s] 9%|▉ | 33241/371472 [2:41:01<25:15:10, 3.72it/s] 9%|▉ | 33242/371472 [2:41:01<25:30:52, 3.68it/s] 9%|▉ | 33243/371472 [2:41:02<25:14:58, 3.72it/s] 9%|▉ | 33244/371472 [2:41:02<25:41:27, 3.66it/s] 9%|▉ | 33245/371472 [2:41:02<26:08:52, 3.59it/s] 9%|▉ | 33246/371472 [2:41:02<25:37:28, 3.67it/s] 9%|▉ | 33247/371472 [2:41:03<27:09:03, 3.46it/s] 9%|▉ | 33248/371472 [2:41:03<26:20:06, 3.57it/s] 9%|▉ | 33249/371472 [2:41:03<28:16:00, 3.32it/s] 9%|▉ | 33250/371472 [2:41:04<29:28:40, 3.19it/s] 9%|▉ | 33251/371472 [2:41:04<27:42:46, 3.39it/s] 9%|▉ | 33252/371472 [2:41:04<27:30:06, 3.42it/s] 9%|▉ | 33253/371472 [2:41:05<27:22:19, 3.43it/s] 9%|▉ | 33254/371472 [2:41:05<27:06:10, 3.47it/s] 9%|▉ | 33255/371472 [2:41:05<26:36:47, 3.53it/s] 9%|▉ | 33256/371472 [2:41:05<26:38:46, 3.53it/s] 9%|▉ | 33257/371472 [2:41:06<27:04:27, 3.47it/s] 9%|▉ | 33258/371472 [2:41:06<26:43:29, 3.52it/s] 9%|▉ | 33259/371472 [2:41:06<26:55:47, 3.49it/s] 9%|▉ | 33260/371472 [2:41:07<26:48:22, 3.50it/s] {'loss': 6.2183, 'learning_rate': 9.198592945333879e-07, 'epoch': 1.43} + 9%|▉ | 33260/371472 [2:41:07<26:48:22, 3.50it/s] 9%|▉ | 33261/371472 [2:41:07<25:54:16, 3.63it/s] 9%|▉ | 33262/371472 [2:41:07<26:53:55, 3.49it/s] 9%|▉ | 33263/371472 [2:41:07<26:05:27, 3.60it/s] 9%|▉ | 33264/371472 [2:41:08<27:46:00, 3.38it/s] 9%|▉ | 33265/371472 [2:41:08<26:32:23, 3.54it/s] 9%|▉ | 33266/371472 [2:41:08<25:45:23, 3.65it/s] 9%|▉ | 33267/371472 [2:41:08<26:44:07, 3.51it/s] 9%|▉ | 33268/371472 [2:41:09<26:32:14, 3.54it/s] 9%|▉ | 33269/371472 [2:41:09<27:21:10, 3.43it/s] 9%|▉ | 33270/371472 [2:41:09<27:11:04, 3.46it/s] 9%|▉ | 33271/371472 [2:41:10<27:42:40, 3.39it/s] 9%|▉ | 33272/371472 [2:41:10<26:20:42, 3.57it/s] 9%|▉ | 33273/371472 [2:41:10<27:17:38, 3.44it/s] 9%|▉ | 33274/371472 [2:41:11<27:01:32, 3.48it/s] 9%|▉ | 33275/371472 [2:41:11<28:13:23, 3.33it/s] 9%|▉ | 33276/371472 [2:41:11<27:36:16, 3.40it/s] 9%|▉ | 33277/371472 [2:41:11<27:07:10, 3.46it/s] 9%|▉ | 33278/371472 [2:41:12<27:23:25, 3.43it/s] 9%|▉ | 33279/371472 [2:41:12<26:08:04, 3.59it/s] 9%|▉ | 33280/371472 [2:41:12<26:39:32, 3.52it/s] {'loss': 6.5717, 'learning_rate': 9.19810812557909e-07, 'epoch': 1.43} + 9%|▉ | 33280/371472 [2:41:12<26:39:32, 3.52it/s] 9%|▉ | 33281/371472 [2:41:13<26:06:06, 3.60it/s] 9%|▉ | 33282/371472 [2:41:13<27:29:20, 3.42it/s] 9%|▉ | 33283/371472 [2:41:13<27:48:01, 3.38it/s] 9%|▉ | 33284/371472 [2:41:13<28:31:57, 3.29it/s] 9%|▉ | 33285/371472 [2:41:14<28:38:42, 3.28it/s] 9%|▉ | 33286/371472 [2:41:14<28:11:15, 3.33it/s] 9%|▉ | 33287/371472 [2:41:14<27:53:18, 3.37it/s] 9%|▉ | 33288/371472 [2:41:15<28:21:08, 3.31it/s] 9%|▉ | 33289/371472 [2:41:15<27:51:39, 3.37it/s] 9%|▉ | 33290/371472 [2:41:15<27:26:20, 3.42it/s] 9%|▉ | 33291/371472 [2:41:16<27:48:20, 3.38it/s] 9%|▉ | 33292/371472 [2:41:16<26:28:08, 3.55it/s] 9%|▉ | 33293/371472 [2:41:16<26:37:58, 3.53it/s] 9%|▉ | 33294/371472 [2:41:16<26:39:02, 3.52it/s] 9%|▉ | 33295/371472 [2:41:17<26:22:22, 3.56it/s] 9%|▉ | 33296/371472 [2:41:17<25:44:08, 3.65it/s] 9%|▉ | 33297/371472 [2:41:17<26:38:09, 3.53it/s] 9%|▉ | 33298/371472 [2:41:18<27:25:12, 3.43it/s] 9%|▉ | 33299/371472 [2:41:18<27:38:17, 3.40it/s] 9%|▉ | 33300/371472 [2:41:18<27:20:32, 3.44it/s] {'loss': 6.2162, 'learning_rate': 9.197623305824301e-07, 'epoch': 1.43} + 9%|▉ | 33300/371472 [2:41:18<27:20:32, 3.44it/s] 9%|▉ | 33301/371472 [2:41:18<26:55:47, 3.49it/s] 9%|▉ | 33302/371472 [2:41:19<26:33:41, 3.54it/s] 9%|▉ | 33303/371472 [2:41:19<26:05:00, 3.60it/s] 9%|▉ | 33304/371472 [2:41:19<25:18:41, 3.71it/s] 9%|▉ | 33305/371472 [2:41:19<25:15:52, 3.72it/s] 9%|▉ | 33306/371472 [2:41:20<26:33:02, 3.54it/s] 9%|▉ | 33307/371472 [2:41:20<26:33:41, 3.54it/s] 9%|▉ | 33308/371472 [2:41:20<26:16:21, 3.58it/s] 9%|▉ | 33309/371472 [2:41:21<27:47:05, 3.38it/s] 9%|▉ | 33310/371472 [2:41:21<27:15:17, 3.45it/s] 9%|▉ | 33311/371472 [2:41:21<28:52:14, 3.25it/s] 9%|▉ | 33312/371472 [2:41:22<27:49:06, 3.38it/s] 9%|▉ | 33313/371472 [2:41:22<26:57:34, 3.48it/s] 9%|▉ | 33314/371472 [2:41:22<26:41:30, 3.52it/s] 9%|▉ | 33315/371472 [2:41:22<25:45:51, 3.65it/s] 9%|▉ | 33316/371472 [2:41:23<26:10:38, 3.59it/s] 9%|▉ | 33317/371472 [2:41:23<26:09:14, 3.59it/s] 9%|▉ | 33318/371472 [2:41:23<25:29:31, 3.68it/s] 9%|▉ | 33319/371472 [2:41:23<25:56:14, 3.62it/s] 9%|▉ | 33320/371472 [2:41:24<26:07:59, 3.59it/s] {'loss': 6.4291, 'learning_rate': 9.197138486069512e-07, 'epoch': 1.44} + 9%|▉ | 33320/371472 [2:41:24<26:07:59, 3.59it/s] 9%|▉ | 33321/371472 [2:41:24<25:59:12, 3.61it/s] 9%|▉ | 33322/371472 [2:41:24<28:55:53, 3.25it/s] 9%|▉ | 33323/371472 [2:41:25<27:25:12, 3.43it/s] 9%|▉ | 33324/371472 [2:41:25<26:51:27, 3.50it/s] 9%|▉ | 33325/371472 [2:41:25<26:35:29, 3.53it/s] 9%|▉ | 33326/371472 [2:41:26<35:02:59, 2.68it/s] 9%|▉ | 33327/371472 [2:41:26<31:49:48, 2.95it/s] 9%|▉ | 33328/371472 [2:41:26<30:43:33, 3.06it/s] 9%|▉ | 33329/371472 [2:41:27<29:09:29, 3.22it/s] 9%|▉ | 33330/371472 [2:41:27<28:57:24, 3.24it/s] 9%|▉ | 33331/371472 [2:41:27<28:24:39, 3.31it/s] 9%|▉ | 33332/371472 [2:41:27<28:41:34, 3.27it/s] 9%|▉ | 33333/371472 [2:41:28<28:21:07, 3.31it/s] 9%|▉ | 33334/371472 [2:41:28<27:24:37, 3.43it/s] 9%|▉ | 33335/371472 [2:41:28<26:44:44, 3.51it/s] 9%|▉ | 33336/371472 [2:41:29<26:32:48, 3.54it/s] 9%|▉ | 33337/371472 [2:41:29<26:24:38, 3.56it/s] 9%|▉ | 33338/371472 [2:41:29<25:31:44, 3.68it/s] 9%|▉ | 33339/371472 [2:41:29<26:37:57, 3.53it/s] 9%|▉ | 33340/371472 [2:41:30<26:31:15, 3.54it/s] {'loss': 6.6195, 'learning_rate': 9.196653666314724e-07, 'epoch': 1.44} + 9%|▉ | 33340/371472 [2:41:30<26:31:15, 3.54it/s] 9%|▉ | 33341/371472 [2:41:30<26:38:12, 3.53it/s] 9%|▉ | 33342/371472 [2:41:30<26:40:48, 3.52it/s] 9%|▉ | 33343/371472 [2:41:31<26:52:30, 3.49it/s] 9%|▉ | 33344/371472 [2:41:31<27:29:48, 3.42it/s] 9%|▉ | 33345/371472 [2:41:31<26:14:34, 3.58it/s] 9%|▉ | 33346/371472 [2:41:32<29:15:10, 3.21it/s] 9%|▉ | 33347/371472 [2:41:32<27:53:13, 3.37it/s] 9%|▉ | 33348/371472 [2:41:32<28:19:09, 3.32it/s] 9%|▉ | 33349/371472 [2:41:32<26:42:20, 3.52it/s] 9%|▉ | 33350/371472 [2:41:33<26:31:02, 3.54it/s] 9%|▉ | 33351/371472 [2:41:33<27:48:57, 3.38it/s] 9%|▉ | 33352/371472 [2:41:33<27:12:29, 3.45it/s] 9%|▉ | 33353/371472 [2:41:33<26:33:48, 3.54it/s] 9%|▉ | 33354/371472 [2:41:34<26:49:46, 3.50it/s] 9%|▉ | 33355/371472 [2:41:34<25:46:27, 3.64it/s] 9%|▉ | 33356/371472 [2:41:34<25:58:24, 3.62it/s] 9%|▉ | 33357/371472 [2:41:35<27:59:35, 3.36it/s] 9%|▉ | 33358/371472 [2:41:35<27:11:42, 3.45it/s] 9%|▉ | 33359/371472 [2:41:35<26:34:00, 3.54it/s] 9%|▉ | 33360/371472 [2:41:36<27:36:35, 3.40it/s] {'loss': 6.5921, 'learning_rate': 9.196168846559933e-07, 'epoch': 1.44} + 9%|▉ | 33360/371472 [2:41:36<27:36:35, 3.40it/s] 9%|▉ | 33361/371472 [2:41:36<27:02:05, 3.47it/s] 9%|▉ | 33362/371472 [2:41:36<28:17:17, 3.32it/s] 9%|▉ | 33363/371472 [2:41:36<27:28:25, 3.42it/s] 9%|▉ | 33364/371472 [2:41:37<26:49:34, 3.50it/s] 9%|▉ | 33365/371472 [2:41:37<27:03:57, 3.47it/s] 9%|▉ | 33366/371472 [2:41:37<27:25:52, 3.42it/s] 9%|▉ | 33367/371472 [2:41:38<27:31:15, 3.41it/s] 9%|▉ | 33368/371472 [2:41:38<27:05:50, 3.47it/s] 9%|▉ | 33369/371472 [2:41:38<27:03:10, 3.47it/s] 9%|▉ | 33370/371472 [2:41:38<25:58:37, 3.62it/s] 9%|▉ | 33371/371472 [2:41:39<25:37:38, 3.66it/s] 9%|▉ | 33372/371472 [2:41:39<25:42:55, 3.65it/s] 9%|▉ | 33373/371472 [2:41:39<24:50:54, 3.78it/s] 9%|▉ | 33374/371472 [2:41:39<25:38:22, 3.66it/s] 9%|▉ | 33375/371472 [2:41:40<25:57:34, 3.62it/s] 9%|▉ | 33376/371472 [2:41:40<25:56:26, 3.62it/s] 9%|▉ | 33377/371472 [2:41:40<24:54:45, 3.77it/s] 9%|▉ | 33378/371472 [2:41:40<24:55:58, 3.77it/s] 9%|▉ | 33379/371472 [2:41:41<26:57:08, 3.48it/s] 9%|▉ | 33380/371472 [2:41:41<26:14:58, 3.58it/s] {'loss': 6.6409, 'learning_rate': 9.195684026805145e-07, 'epoch': 1.44} + 9%|▉ | 33380/371472 [2:41:41<26:14:58, 3.58it/s] 9%|▉ | 33381/371472 [2:41:42<30:30:35, 3.08it/s] 9%|▉ | 33382/371472 [2:41:42<28:44:59, 3.27it/s] 9%|▉ | 33383/371472 [2:41:42<30:16:30, 3.10it/s] 9%|▉ | 33384/371472 [2:41:42<28:52:17, 3.25it/s] 9%|▉ | 33385/371472 [2:41:43<27:59:13, 3.36it/s] 9%|▉ | 33386/371472 [2:41:43<28:37:45, 3.28it/s] 9%|▉ | 33387/371472 [2:41:43<29:44:21, 3.16it/s] 9%|▉ | 33388/371472 [2:41:44<28:53:57, 3.25it/s] 9%|▉ | 33389/371472 [2:41:44<27:16:58, 3.44it/s] 9%|▉ | 33390/371472 [2:41:44<28:36:40, 3.28it/s] 9%|▉ | 33391/371472 [2:41:45<28:21:25, 3.31it/s] 9%|▉ | 33392/371472 [2:41:45<29:02:45, 3.23it/s] 9%|▉ | 33393/371472 [2:41:45<28:54:04, 3.25it/s] 9%|▉ | 33394/371472 [2:41:45<27:46:59, 3.38it/s] 9%|▉ | 33395/371472 [2:41:46<26:57:15, 3.48it/s] 9%|▉ | 33396/371472 [2:41:46<26:44:16, 3.51it/s] 9%|▉ | 33397/371472 [2:41:46<27:50:38, 3.37it/s] 9%|▉ | 33398/371472 [2:41:47<28:05:48, 3.34it/s] 9%|▉ | 33399/371472 [2:41:47<28:04:34, 3.34it/s] 9%|▉ | 33400/371472 [2:41:47<29:21:51, 3.20it/s] {'loss': 5.9695, 'learning_rate': 9.195199207050356e-07, 'epoch': 1.44} + 9%|▉ | 33400/371472 [2:41:47<29:21:51, 3.20it/s] 9%|▉ | 33401/371472 [2:41:48<28:09:10, 3.34it/s] 9%|▉ | 33402/371472 [2:41:48<28:42:13, 3.27it/s] 9%|▉ | 33403/371472 [2:41:48<28:49:05, 3.26it/s] 9%|▉ | 33404/371472 [2:41:48<28:22:25, 3.31it/s] 9%|▉ | 33405/371472 [2:41:49<27:12:02, 3.45it/s] 9%|▉ | 33406/371472 [2:41:49<27:32:47, 3.41it/s] 9%|▉ | 33407/371472 [2:41:49<28:10:36, 3.33it/s] 9%|▉ | 33408/371472 [2:41:50<28:01:19, 3.35it/s] 9%|▉ | 33409/371472 [2:41:50<26:58:57, 3.48it/s] 9%|▉ | 33410/371472 [2:41:50<27:18:30, 3.44it/s] 9%|▉ | 33411/371472 [2:41:50<26:37:51, 3.53it/s] 9%|▉ | 33412/371472 [2:41:51<26:11:26, 3.59it/s] 9%|▉ | 33413/371472 [2:41:51<27:00:50, 3.48it/s] 9%|▉ | 33414/371472 [2:41:51<26:39:05, 3.52it/s] 9%|▉ | 33415/371472 [2:41:52<26:26:38, 3.55it/s] 9%|▉ | 33416/371472 [2:41:52<26:45:48, 3.51it/s] 9%|▉ | 33417/371472 [2:41:52<26:34:27, 3.53it/s] 9%|▉ | 33418/371472 [2:41:52<27:06:15, 3.46it/s] 9%|▉ | 33419/371472 [2:41:53<26:14:34, 3.58it/s] 9%|▉ | 33420/371472 [2:41:53<25:54:35, 3.62it/s] {'loss': 6.5148, 'learning_rate': 9.194714387295568e-07, 'epoch': 1.44} + 9%|▉ | 33420/371472 [2:41:53<25:54:35, 3.62it/s] 9%|▉ | 33421/371472 [2:41:53<28:11:53, 3.33it/s] 9%|▉ | 33422/371472 [2:41:54<27:27:10, 3.42it/s] 9%|▉ | 33423/371472 [2:41:54<27:26:38, 3.42it/s] 9%|▉ | 33424/371472 [2:41:54<27:06:16, 3.46it/s] 9%|▉ | 33425/371472 [2:41:54<26:51:53, 3.50it/s] 9%|▉ | 33426/371472 [2:41:55<26:41:53, 3.52it/s] 9%|▉ | 33427/371472 [2:41:55<26:28:35, 3.55it/s] 9%|▉ | 33428/371472 [2:41:55<27:37:08, 3.40it/s] 9%|▉ | 33429/371472 [2:41:56<27:02:58, 3.47it/s] 9%|▉ | 33430/371472 [2:41:56<26:46:36, 3.51it/s] 9%|▉ | 33431/371472 [2:41:56<26:09:29, 3.59it/s] 9%|▉ | 33432/371472 [2:41:56<25:53:51, 3.63it/s] 9%|▉ | 33433/371472 [2:41:57<27:42:35, 3.39it/s] 9%|▉ | 33434/371472 [2:41:57<27:17:46, 3.44it/s] 9%|▉ | 33435/371472 [2:41:57<27:02:55, 3.47it/s] 9%|▉ | 33436/371472 [2:41:58<26:09:33, 3.59it/s] 9%|▉ | 33437/371472 [2:41:58<26:03:16, 3.60it/s] 9%|▉ | 33438/371472 [2:41:58<29:14:49, 3.21it/s] 9%|▉ | 33439/371472 [2:41:59<28:13:10, 3.33it/s] 9%|▉ | 33440/371472 [2:41:59<27:29:49, 3.41it/s] {'loss': 6.447, 'learning_rate': 9.194229567540778e-07, 'epoch': 1.44} + 9%|▉ | 33440/371472 [2:41:59<27:29:49, 3.41it/s] 9%|▉ | 33441/371472 [2:41:59<27:14:17, 3.45it/s] 9%|▉ | 33442/371472 [2:41:59<26:37:16, 3.53it/s] 9%|▉ | 33443/371472 [2:42:00<26:20:21, 3.56it/s] 9%|▉ | 33444/371472 [2:42:00<26:04:57, 3.60it/s] 9%|▉ | 33445/371472 [2:42:00<26:33:11, 3.54it/s] 9%|▉ | 33446/371472 [2:42:00<26:11:32, 3.58it/s] 9%|▉ | 33447/371472 [2:42:01<25:47:18, 3.64it/s] 9%|▉ | 33448/371472 [2:42:01<25:41:10, 3.66it/s] 9%|▉ | 33449/371472 [2:42:01<25:05:13, 3.74it/s] 9%|▉ | 33450/371472 [2:42:02<26:11:24, 3.59it/s] 9%|▉ | 33451/371472 [2:42:02<25:18:11, 3.71it/s] 9%|▉ | 33452/371472 [2:42:02<26:29:58, 3.54it/s] 9%|▉ | 33453/371472 [2:42:02<26:47:57, 3.50it/s] 9%|▉ | 33454/371472 [2:42:03<26:21:26, 3.56it/s] 9%|▉ | 33455/371472 [2:42:03<25:53:37, 3.63it/s] 9%|▉ | 33456/371472 [2:42:03<29:29:38, 3.18it/s] 9%|▉ | 33457/371472 [2:42:04<28:01:10, 3.35it/s] 9%|▉ | 33458/371472 [2:42:04<27:24:49, 3.43it/s] 9%|▉ | 33459/371472 [2:42:04<29:30:52, 3.18it/s] 9%|▉ | 33460/371472 [2:42:04<27:35:52, 3.40it/s] {'loss': 6.2085, 'learning_rate': 9.19374474778599e-07, 'epoch': 1.44} + 9%|▉ | 33460/371472 [2:42:04<27:35:52, 3.40it/s] 9%|▉ | 33461/371472 [2:42:05<26:52:44, 3.49it/s] 9%|▉ | 33462/371472 [2:42:05<27:24:06, 3.43it/s] 9%|▉ | 33463/371472 [2:42:05<28:08:59, 3.34it/s] 9%|▉ | 33464/371472 [2:42:06<27:37:49, 3.40it/s] 9%|▉ | 33465/371472 [2:42:06<29:08:18, 3.22it/s] 9%|▉ | 33466/371472 [2:42:06<28:42:24, 3.27it/s] 9%|▉ | 33467/371472 [2:42:07<28:50:26, 3.26it/s] 9%|▉ | 33468/371472 [2:42:07<29:49:04, 3.15it/s] 9%|▉ | 33469/371472 [2:42:07<28:35:07, 3.28it/s] 9%|▉ | 33470/371472 [2:42:07<27:12:25, 3.45it/s] 9%|▉ | 33471/371472 [2:42:08<28:12:55, 3.33it/s] 9%|▉ | 33472/371472 [2:42:08<27:16:31, 3.44it/s] 9%|▉ | 33473/371472 [2:42:08<26:23:03, 3.56it/s] 9%|▉ | 33474/371472 [2:42:09<27:08:36, 3.46it/s] 9%|▉ | 33475/371472 [2:42:09<28:26:13, 3.30it/s] 9%|▉ | 33476/371472 [2:42:09<27:53:13, 3.37it/s] 9%|▉ | 33477/371472 [2:42:10<26:50:16, 3.50it/s] 9%|▉ | 33478/371472 [2:42:10<26:12:42, 3.58it/s] 9%|▉ | 33479/371472 [2:42:10<25:47:36, 3.64it/s] 9%|▉ | 33480/371472 [2:42:10<26:12:43, 3.58it/s] {'loss': 5.4657, 'learning_rate': 9.1932599280312e-07, 'epoch': 1.44} + 9%|▉ | 33480/371472 [2:42:10<26:12:43, 3.58it/s] 9%|▉ | 33481/371472 [2:42:11<26:59:22, 3.48it/s] 9%|▉ | 33482/371472 [2:42:11<28:48:48, 3.26it/s] 9%|▉ | 33483/371472 [2:42:11<27:45:50, 3.38it/s] 9%|▉ | 33484/371472 [2:42:12<27:48:19, 3.38it/s] 9%|▉ | 33485/371472 [2:42:12<26:38:26, 3.52it/s] 9%|▉ | 33486/371472 [2:42:12<27:02:42, 3.47it/s] 9%|▉ | 33487/371472 [2:42:12<27:01:04, 3.47it/s] 9%|▉ | 33488/371472 [2:42:13<27:51:20, 3.37it/s] 9%|▉ | 33489/371472 [2:42:13<27:08:42, 3.46it/s] 9%|▉ | 33490/371472 [2:42:13<27:24:51, 3.42it/s] 9%|▉ | 33491/371472 [2:42:14<26:15:38, 3.58it/s] 9%|▉ | 33492/371472 [2:42:14<26:11:17, 3.58it/s] 9%|▉ | 33493/371472 [2:42:14<27:10:16, 3.46it/s] 9%|▉ | 33494/371472 [2:42:14<25:59:21, 3.61it/s] 9%|▉ | 33495/371472 [2:42:15<25:33:36, 3.67it/s] 9%|▉ | 33496/371472 [2:42:15<25:18:54, 3.71it/s] 9%|▉ | 33497/371472 [2:42:15<25:20:31, 3.70it/s] 9%|▉ | 33498/371472 [2:42:15<26:17:53, 3.57it/s] 9%|▉ | 33499/371472 [2:42:16<25:14:50, 3.72it/s] 9%|▉ | 33500/371472 [2:42:16<25:41:59, 3.65it/s] {'loss': 4.7962, 'learning_rate': 9.192775108276412e-07, 'epoch': 1.44} + 9%|▉ | 33500/371472 [2:42:16<25:41:59, 3.65it/s] 9%|▉ | 33501/371472 [2:42:16<25:14:46, 3.72it/s] 9%|▉ | 33502/371472 [2:42:17<25:27:24, 3.69it/s] 9%|▉ | 33503/371472 [2:42:17<28:55:57, 3.24it/s] 9%|▉ | 33504/371472 [2:42:17<28:14:30, 3.32it/s] 9%|▉ | 33505/371472 [2:42:17<27:18:26, 3.44it/s] 9%|▉ | 33506/371472 [2:42:18<26:39:45, 3.52it/s] 9%|▉ | 33507/371472 [2:42:18<26:50:57, 3.50it/s] 9%|▉ | 33508/371472 [2:42:18<27:29:14, 3.42it/s] 9%|▉ | 33509/371472 [2:42:19<26:49:42, 3.50it/s] 9%|▉ | 33510/371472 [2:42:19<26:05:55, 3.60it/s] 9%|▉ | 33511/371472 [2:42:19<27:04:36, 3.47it/s] 9%|▉ | 33512/371472 [2:42:20<28:14:31, 3.32it/s] 9%|▉ | 33513/371472 [2:42:20<29:11:38, 3.22it/s] 9%|▉ | 33514/371472 [2:42:20<28:17:58, 3.32it/s] 9%|▉ | 33515/371472 [2:42:20<27:01:51, 3.47it/s] 9%|▉ | 33516/371472 [2:42:21<27:35:03, 3.40it/s] 9%|▉ | 33517/371472 [2:42:21<27:35:12, 3.40it/s] 9%|▉ | 33518/371472 [2:42:21<27:05:20, 3.47it/s] 9%|▉ | 33519/371472 [2:42:22<26:41:11, 3.52it/s] 9%|▉ | 33520/371472 [2:42:22<26:01:48, 3.61it/s] {'loss': 4.5683, 'learning_rate': 9.192290288521622e-07, 'epoch': 1.44} + 9%|▉ | 33520/371472 [2:42:22<26:01:48, 3.61it/s] 9%|▉ | 33521/371472 [2:42:22<27:01:14, 3.47it/s] 9%|▉ | 33522/371472 [2:42:22<27:03:22, 3.47it/s] 9%|▉ | 33523/371472 [2:42:23<27:08:00, 3.46it/s] 9%|▉ | 33524/371472 [2:42:23<26:34:05, 3.53it/s] 9%|▉ | 33525/371472 [2:42:23<27:10:28, 3.45it/s] 9%|▉ | 33526/371472 [2:42:24<27:19:12, 3.44it/s] 9%|▉ | 33527/371472 [2:42:24<28:39:02, 3.28it/s] 9%|▉ | 33528/371472 [2:42:24<28:10:27, 3.33it/s] 9%|▉ | 33529/371472 [2:42:24<27:12:06, 3.45it/s] 9%|▉ | 33530/371472 [2:42:25<26:16:26, 3.57it/s] 9%|▉ | 33531/371472 [2:42:25<26:20:24, 3.56it/s] 9%|▉ | 33532/371472 [2:42:25<27:03:25, 3.47it/s] 9%|▉ | 33533/371472 [2:42:26<28:38:22, 3.28it/s] 9%|▉ | 33534/371472 [2:42:26<28:28:32, 3.30it/s] 9%|▉ | 33535/371472 [2:42:26<27:10:09, 3.46it/s] 9%|▉ | 33536/371472 [2:42:26<26:44:20, 3.51it/s] 9%|▉ | 33537/371472 [2:42:27<28:28:50, 3.30it/s] 9%|▉ | 33538/371472 [2:42:27<28:52:49, 3.25it/s] 9%|▉ | 33539/371472 [2:42:27<27:35:23, 3.40it/s] 9%|▉ | 33540/371472 [2:42:28<27:11:53, 3.45it/s] {'loss': 4.612, 'learning_rate': 9.191805468766834e-07, 'epoch': 1.44} + 9%|▉ | 33540/371472 [2:42:28<27:11:53, 3.45it/s] 9%|▉ | 33541/371472 [2:42:28<28:31:46, 3.29it/s] 9%|▉ | 33542/371472 [2:42:28<27:33:53, 3.41it/s] 9%|▉ | 33543/371472 [2:42:29<26:43:21, 3.51it/s] 9%|▉ | 33544/371472 [2:42:29<26:19:28, 3.57it/s] 9%|▉ | 33545/371472 [2:42:29<28:39:10, 3.28it/s] 9%|▉ | 33546/371472 [2:42:29<27:06:07, 3.46it/s] 9%|▉ | 33547/371472 [2:42:30<27:30:07, 3.41it/s] 9%|▉ | 33548/371472 [2:42:30<29:49:40, 3.15it/s] 9%|▉ | 33549/371472 [2:42:30<30:38:48, 3.06it/s] 9%|▉ | 33550/371472 [2:42:31<28:35:40, 3.28it/s] 9%|▉ | 33551/371472 [2:42:31<27:14:08, 3.45it/s] 9%|▉ | 33552/371472 [2:42:31<26:58:44, 3.48it/s] 9%|▉ | 33553/371472 [2:42:32<27:05:37, 3.46it/s] 9%|▉ | 33554/371472 [2:42:32<26:06:39, 3.59it/s] 9%|▉ | 33555/371472 [2:42:32<25:57:44, 3.62it/s] 9%|▉ | 33556/371472 [2:42:32<27:41:39, 3.39it/s] 9%|▉ | 33557/371472 [2:42:33<27:40:27, 3.39it/s] 9%|▉ | 33558/371472 [2:42:33<27:33:49, 3.41it/s] 9%|▉ | 33559/371472 [2:42:33<27:26:09, 3.42it/s] 9%|▉ | 33560/371472 [2:42:34<26:29:22, 3.54it/s] {'loss': 4.7549, 'learning_rate': 9.191320649012044e-07, 'epoch': 1.45} + 9%|▉ | 33560/371472 [2:42:34<26:29:22, 3.54it/s] 9%|▉ | 33561/371472 [2:42:34<27:12:02, 3.45it/s] 9%|▉ | 33562/371472 [2:42:34<27:18:27, 3.44it/s] 9%|▉ | 33563/371472 [2:42:34<27:16:10, 3.44it/s] 9%|▉ | 33564/371472 [2:42:35<26:38:53, 3.52it/s] 9%|▉ | 33565/371472 [2:42:35<26:38:22, 3.52it/s] 9%|▉ | 33566/371472 [2:42:35<26:28:03, 3.55it/s] 9%|▉ | 33567/371472 [2:42:36<26:01:30, 3.61it/s] 9%|▉ | 33568/371472 [2:42:36<28:29:47, 3.29it/s] 9%|▉ | 33569/371472 [2:42:36<27:17:25, 3.44it/s] 9%|▉ | 33570/371472 [2:42:36<26:26:50, 3.55it/s] 9%|▉ | 33571/371472 [2:42:37<25:30:08, 3.68it/s] 9%|▉ | 33572/371472 [2:42:37<27:07:39, 3.46it/s] 9%|▉ | 33573/371472 [2:42:37<28:17:25, 3.32it/s] 9%|▉ | 33574/371472 [2:42:38<26:53:46, 3.49it/s] 9%|▉ | 33575/371472 [2:42:38<26:58:50, 3.48it/s] 9%|▉ | 33576/371472 [2:42:38<26:09:13, 3.59it/s] 9%|▉ | 33577/371472 [2:42:38<25:40:18, 3.66it/s] 9%|▉ | 33578/371472 [2:42:39<25:55:59, 3.62it/s] 9%|▉ | 33579/371472 [2:42:39<25:39:30, 3.66it/s] 9%|▉ | 33580/371472 [2:42:39<27:26:19, 3.42it/s] {'loss': 4.7052, 'learning_rate': 9.190835829257256e-07, 'epoch': 1.45} + 9%|▉ | 33580/371472 [2:42:39<27:26:19, 3.42it/s] 9%|▉ | 33581/371472 [2:42:40<26:41:55, 3.52it/s] 9%|▉ | 33582/371472 [2:42:40<26:26:44, 3.55it/s] 9%|▉ | 33583/371472 [2:42:40<26:58:41, 3.48it/s] 9%|▉ | 33584/371472 [2:42:40<27:12:17, 3.45it/s] 9%|▉ | 33585/371472 [2:42:41<28:10:21, 3.33it/s] 9%|▉ | 33586/371472 [2:42:41<28:50:21, 3.25it/s] 9%|▉ | 33587/371472 [2:42:41<27:39:24, 3.39it/s] 9%|▉ | 33588/371472 [2:42:42<27:09:54, 3.46it/s] 9%|▉ | 33589/371472 [2:42:42<31:43:15, 2.96it/s] 9%|▉ | 33590/371472 [2:42:42<30:48:42, 3.05it/s] 9%|▉ | 33591/371472 [2:42:43<31:37:49, 2.97it/s] 9%|▉ | 33592/371472 [2:42:43<31:13:48, 3.01it/s] 9%|▉ | 33593/371472 [2:42:43<29:57:36, 3.13it/s] 9%|▉ | 33594/371472 [2:42:44<28:41:22, 3.27it/s] 9%|▉ | 33595/371472 [2:42:44<29:23:03, 3.19it/s] 9%|▉ | 33596/371472 [2:42:44<27:46:17, 3.38it/s] 9%|▉ | 33597/371472 [2:42:44<26:13:51, 3.58it/s] 9%|▉ | 33598/371472 [2:42:45<27:01:28, 3.47it/s] 9%|▉ | 33599/371472 [2:42:45<26:30:51, 3.54it/s] 9%|▉ | 33600/371472 [2:42:45<26:22:22, 3.56it/s] {'loss': 4.5388, 'learning_rate': 9.190351009502466e-07, 'epoch': 1.45} + 9%|▉ | 33600/371472 [2:42:45<26:22:22, 3.56it/s] 9%|▉ | 33601/371472 [2:42:46<27:13:32, 3.45it/s] 9%|▉ | 33602/371472 [2:42:46<26:57:16, 3.48it/s] 9%|▉ | 33603/371472 [2:42:46<26:40:52, 3.52it/s] 9%|▉ | 33604/371472 [2:42:46<26:54:51, 3.49it/s] 9%|▉ | 33605/371472 [2:42:47<28:44:48, 3.26it/s] 9%|▉ | 33606/371472 [2:42:47<27:38:22, 3.40it/s] 9%|▉ | 33607/371472 [2:42:47<27:31:39, 3.41it/s] 9%|▉ | 33608/371472 [2:42:48<27:29:06, 3.41it/s] 9%|▉ | 33609/371472 [2:42:48<27:08:57, 3.46it/s] 9%|▉ | 33610/371472 [2:42:48<26:13:00, 3.58it/s] 9%|▉ | 33611/371472 [2:42:48<25:53:08, 3.63it/s] 9%|▉ | 33612/371472 [2:42:49<26:32:06, 3.54it/s] 9%|▉ | 33613/371472 [2:42:49<26:05:55, 3.60it/s] 9%|▉ | 33614/371472 [2:42:49<25:31:04, 3.68it/s] 9%|▉ | 33615/371472 [2:42:50<27:42:21, 3.39it/s] 9%|▉ | 33616/371472 [2:42:50<29:47:44, 3.15it/s] 9%|▉ | 33617/371472 [2:42:50<28:42:10, 3.27it/s] 9%|▉ | 33618/371472 [2:42:51<28:00:54, 3.35it/s] 9%|▉ | 33619/371472 [2:42:51<27:14:31, 3.44it/s] 9%|▉ | 33620/371472 [2:42:51<26:44:14, 3.51it/s] {'loss': 4.3169, 'learning_rate': 9.189866189747678e-07, 'epoch': 1.45} + 9%|▉ | 33620/371472 [2:42:51<26:44:14, 3.51it/s] 9%|▉ | 33621/371472 [2:42:51<27:23:43, 3.43it/s] 9%|▉ | 33622/371472 [2:42:52<26:40:46, 3.52it/s] 9%|▉ | 33623/371472 [2:42:52<26:01:09, 3.61it/s] 9%|▉ | 33624/371472 [2:42:52<25:34:53, 3.67it/s] 9%|▉ | 33625/371472 [2:42:52<25:59:26, 3.61it/s] 9%|▉ | 33626/371472 [2:42:53<26:07:40, 3.59it/s] 9%|▉ | 33627/371472 [2:42:53<26:18:54, 3.57it/s] 9%|▉ | 33628/371472 [2:42:53<27:13:01, 3.45it/s] 9%|▉ | 33629/371472 [2:42:54<27:21:23, 3.43it/s] 9%|▉ | 33630/371472 [2:42:54<27:15:58, 3.44it/s] 9%|▉ | 33631/371472 [2:42:54<27:10:28, 3.45it/s] 9%|▉ | 33632/371472 [2:42:55<29:58:33, 3.13it/s] 9%|▉ | 33633/371472 [2:42:55<28:43:17, 3.27it/s] 9%|▉ | 33634/371472 [2:42:55<29:02:37, 3.23it/s] 9%|▉ | 33635/371472 [2:42:56<28:52:21, 3.25it/s] 9%|▉ | 33636/371472 [2:42:56<28:52:47, 3.25it/s] 9%|▉ | 33637/371472 [2:42:56<27:38:59, 3.39it/s] 9%|▉ | 33638/371472 [2:42:56<27:14:13, 3.45it/s] 9%|▉ | 33639/371472 [2:42:57<27:01:56, 3.47it/s] 9%|▉ | 33640/371472 [2:42:57<26:17:56, 3.57it/s] {'loss': 4.8239, 'learning_rate': 9.189381369992889e-07, 'epoch': 1.45} + 9%|▉ | 33640/371472 [2:42:57<26:17:56, 3.57it/s] 9%|▉ | 33641/371472 [2:42:57<26:12:32, 3.58it/s] 9%|▉ | 33642/371472 [2:42:57<25:57:08, 3.62it/s] 9%|▉ | 33643/371472 [2:42:58<26:59:55, 3.48it/s] 9%|▉ | 33644/371472 [2:42:58<34:30:11, 2.72it/s] 9%|▉ | 33645/371472 [2:42:59<32:35:30, 2.88it/s] 9%|▉ | 33646/371472 [2:42:59<31:00:00, 3.03it/s] 9%|��� | 33647/371472 [2:42:59<28:51:14, 3.25it/s] 9%|▉ | 33648/371472 [2:42:59<27:44:37, 3.38it/s] 9%|▉ | 33649/371472 [2:43:00<27:26:08, 3.42it/s] 9%|▉ | 33650/371472 [2:43:00<26:19:10, 3.57it/s] 9%|▉ | 33651/371472 [2:43:00<25:38:29, 3.66it/s] 9%|▉ | 33652/371472 [2:43:01<26:35:58, 3.53it/s] 9%|▉ | 33653/371472 [2:43:01<25:36:45, 3.66it/s] 9%|▉ | 33654/371472 [2:43:01<26:09:21, 3.59it/s] 9%|▉ | 33655/371472 [2:43:01<25:41:18, 3.65it/s] 9%|▉ | 33656/371472 [2:43:02<25:18:20, 3.71it/s] 9%|▉ | 33657/371472 [2:43:02<27:30:38, 3.41it/s] 9%|▉ | 33658/371472 [2:43:02<28:16:15, 3.32it/s] 9%|▉ | 33659/371472 [2:43:03<27:25:58, 3.42it/s] 9%|▉ | 33660/371472 [2:43:03<26:45:12, 3.51it/s] {'loss': 4.8199, 'learning_rate': 9.1888965502381e-07, 'epoch': 1.45} + 9%|▉ | 33660/371472 [2:43:03<26:45:12, 3.51it/s] 9%|▉ | 33661/371472 [2:43:03<29:09:36, 3.22it/s] 9%|▉ | 33662/371472 [2:43:03<27:59:29, 3.35it/s] 9%|▉ | 33663/371472 [2:43:04<27:12:17, 3.45it/s] 9%|▉ | 33664/371472 [2:43:04<27:15:05, 3.44it/s] 9%|▉ | 33665/371472 [2:43:04<28:42:20, 3.27it/s] 9%|▉ | 33666/371472 [2:43:05<27:57:31, 3.36it/s] 9%|▉ | 33667/371472 [2:43:05<28:25:27, 3.30it/s] 9%|▉ | 33668/371472 [2:43:05<27:06:06, 3.46it/s] 9%|▉ | 33669/371472 [2:43:06<30:42:07, 3.06it/s] 9%|▉ | 33670/371472 [2:43:06<29:31:14, 3.18it/s] 9%|▉ | 33671/371472 [2:43:06<28:03:23, 3.34it/s] 9%|▉ | 33672/371472 [2:43:06<27:14:10, 3.45it/s] 9%|▉ | 33673/371472 [2:43:07<27:08:46, 3.46it/s] 9%|▉ | 33674/371472 [2:43:07<26:01:30, 3.61it/s] 9%|▉ | 33675/371472 [2:43:07<25:22:36, 3.70it/s] 9%|▉ | 33676/371472 [2:43:08<25:09:28, 3.73it/s] 9%|▉ | 33677/371472 [2:43:08<24:38:58, 3.81it/s] 9%|▉ | 33678/371472 [2:43:08<25:14:20, 3.72it/s] 9%|▉ | 33679/371472 [2:43:08<25:12:15, 3.72it/s] 9%|▉ | 33680/371472 [2:43:09<27:26:24, 3.42it/s] {'loss': 4.7415, 'learning_rate': 9.188411730483311e-07, 'epoch': 1.45} + 9%|▉ | 33680/371472 [2:43:09<27:26:24, 3.42it/s] 9%|▉ | 33681/371472 [2:43:09<27:53:36, 3.36it/s] 9%|▉ | 33682/371472 [2:43:09<28:52:33, 3.25it/s] 9%|▉ | 33683/371472 [2:43:10<27:28:29, 3.42it/s] 9%|▉ | 33684/371472 [2:43:10<28:38:24, 3.28it/s] 9%|▉ | 33685/371472 [2:43:10<29:07:51, 3.22it/s] 9%|▉ | 33686/371472 [2:43:10<27:39:41, 3.39it/s] 9%|▉ | 33687/371472 [2:43:11<27:40:20, 3.39it/s] 9%|▉ | 33688/371472 [2:43:11<27:59:41, 3.35it/s] 9%|▉ | 33689/371472 [2:43:11<30:45:20, 3.05it/s] 9%|▉ | 33690/371472 [2:43:12<29:46:15, 3.15it/s] 9%|▉ | 33691/371472 [2:43:12<28:17:10, 3.32it/s] 9%|▉ | 33692/371472 [2:43:12<27:17:19, 3.44it/s] 9%|▉ | 33693/371472 [2:43:13<27:36:55, 3.40it/s] 9%|▉ | 33694/371472 [2:43:13<28:15:22, 3.32it/s] 9%|▉ | 33695/371472 [2:43:13<29:14:01, 3.21it/s] 9%|▉ | 33696/371472 [2:43:14<32:16:23, 2.91it/s] 9%|▉ | 33697/371472 [2:43:14<29:37:21, 3.17it/s] 9%|▉ | 33698/371472 [2:43:14<30:51:58, 3.04it/s] 9%|▉ | 33699/371472 [2:43:15<28:46:47, 3.26it/s] 9%|▉ | 33700/371472 [2:43:15<29:05:24, 3.23it/s] {'loss': 4.4792, 'learning_rate': 9.187926910728522e-07, 'epoch': 1.45} + 9%|▉ | 33700/371472 [2:43:15<29:05:24, 3.23it/s] 9%|▉ | 33701/371472 [2:43:15<31:11:30, 3.01it/s] 9%|▉ | 33702/371472 [2:43:16<30:01:34, 3.12it/s] 9%|▉ | 33703/371472 [2:43:16<30:09:15, 3.11it/s] 9%|▉ | 33704/371472 [2:43:16<29:01:55, 3.23it/s] 9%|▉ | 33705/371472 [2:43:16<27:36:17, 3.40it/s] 9%|▉ | 33706/371472 [2:43:17<30:48:45, 3.04it/s] 9%|▉ | 33707/371472 [2:43:17<28:56:18, 3.24it/s] 9%|▉ | 33708/371472 [2:43:17<27:13:35, 3.45it/s] 9%|▉ | 33709/371472 [2:43:18<26:13:09, 3.58it/s] 9%|▉ | 33710/371472 [2:43:18<25:16:27, 3.71it/s] 9%|▉ | 33711/371472 [2:43:18<24:53:39, 3.77it/s] 9%|▉ | 33712/371472 [2:43:18<25:07:24, 3.73it/s] 9%|▉ | 33713/371472 [2:43:19<24:46:57, 3.79it/s] 9%|▉ | 33714/371472 [2:43:19<25:26:07, 3.69it/s] 9%|▉ | 33715/371472 [2:43:19<25:44:52, 3.64it/s] 9%|▉ | 33716/371472 [2:43:19<27:11:14, 3.45it/s] 9%|▉ | 33717/371472 [2:43:20<30:15:23, 3.10it/s] 9%|▉ | 33718/371472 [2:43:20<30:34:33, 3.07it/s] 9%|▉ | 33719/371472 [2:43:20<28:46:40, 3.26it/s] 9%|▉ | 33720/371472 [2:43:21<28:05:58, 3.34it/s] {'loss': 4.7136, 'learning_rate': 9.187442090973734e-07, 'epoch': 1.45} + 9%|▉ | 33720/371472 [2:43:21<28:05:58, 3.34it/s] 9%|▉ | 33721/371472 [2:43:21<27:35:17, 3.40it/s] 9%|▉ | 33722/371472 [2:43:21<27:47:11, 3.38it/s] 9%|▉ | 33723/371472 [2:43:22<27:33:51, 3.40it/s] 9%|▉ | 33724/371472 [2:43:22<27:55:43, 3.36it/s] 9%|▉ | 33725/371472 [2:43:22<26:45:14, 3.51it/s] 9%|▉ | 33726/371472 [2:43:22<27:18:42, 3.44it/s] 9%|▉ | 33727/371472 [2:43:23<27:36:40, 3.40it/s] 9%|▉ | 33728/371472 [2:43:23<27:02:06, 3.47it/s] 9%|▉ | 33729/371472 [2:43:23<28:40:19, 3.27it/s] 9%|▉ | 33730/371472 [2:43:24<27:10:37, 3.45it/s] 9%|▉ | 33731/371472 [2:43:24<28:47:54, 3.26it/s] 9%|▉ | 33732/371472 [2:43:24<27:16:46, 3.44it/s] 9%|▉ | 33733/371472 [2:43:25<27:47:08, 3.38it/s] 9%|▉ | 33734/371472 [2:43:25<29:56:50, 3.13it/s] 9%|▉ | 33735/371472 [2:43:25<29:43:46, 3.16it/s] 9%|▉ | 33736/371472 [2:43:26<28:09:46, 3.33it/s] 9%|▉ | 33737/371472 [2:43:26<28:18:15, 3.31it/s] 9%|▉ | 33738/371472 [2:43:26<27:05:50, 3.46it/s] 9%|▉ | 33739/371472 [2:43:26<27:05:49, 3.46it/s] 9%|▉ | 33740/371472 [2:43:27<25:55:30, 3.62it/s] {'loss': 4.6015, 'learning_rate': 9.186957271218943e-07, 'epoch': 1.45} + 9%|▉ | 33740/371472 [2:43:27<25:55:30, 3.62it/s] 9%|▉ | 33741/371472 [2:43:27<26:27:11, 3.55it/s] 9%|▉ | 33742/371472 [2:43:27<27:49:49, 3.37it/s] 9%|▉ | 33743/371472 [2:43:28<27:33:48, 3.40it/s] 9%|▉ | 33744/371472 [2:43:28<27:11:50, 3.45it/s] 9%|▉ | 33745/371472 [2:43:28<26:16:38, 3.57it/s] 9%|▉ | 33746/371472 [2:43:28<27:32:55, 3.41it/s] 9%|▉ | 33747/371472 [2:43:29<26:12:38, 3.58it/s] 9%|▉ | 33748/371472 [2:43:29<26:39:11, 3.52it/s] 9%|▉ | 33749/371472 [2:43:29<26:00:55, 3.61it/s] 9%|▉ | 33750/371472 [2:43:29<25:35:00, 3.67it/s] 9%|▉ | 33751/371472 [2:43:30<28:26:30, 3.30it/s] 9%|▉ | 33752/371472 [2:43:30<28:46:00, 3.26it/s] 9%|▉ | 33753/371472 [2:43:30<27:53:33, 3.36it/s] 9%|▉ | 33754/371472 [2:43:31<27:05:30, 3.46it/s] 9%|▉ | 33755/371472 [2:43:31<25:43:06, 3.65it/s] 9%|▉ | 33756/371472 [2:43:31<25:05:38, 3.74it/s] 9%|▉ | 33757/371472 [2:43:31<25:13:42, 3.72it/s] 9%|▉ | 33758/371472 [2:43:32<25:04:33, 3.74it/s] 9%|▉ | 33759/371472 [2:43:32<25:06:06, 3.74it/s] 9%|▉ | 33760/371472 [2:43:32<25:00:22, 3.75it/s] {'loss': 4.6091, 'learning_rate': 9.186472451464155e-07, 'epoch': 1.45} + 9%|▉ | 33760/371472 [2:43:32<25:00:22, 3.75it/s] 9%|▉ | 33761/371472 [2:43:33<24:44:52, 3.79it/s] 9%|▉ | 33762/371472 [2:43:33<24:53:53, 3.77it/s] 9%|▉ | 33763/371472 [2:43:33<25:10:47, 3.73it/s] 9%|▉ | 33764/371472 [2:43:33<27:31:03, 3.41it/s] 9%|▉ | 33765/371472 [2:43:34<26:29:07, 3.54it/s] 9%|▉ | 33766/371472 [2:43:34<26:09:23, 3.59it/s] 9%|▉ | 33767/371472 [2:43:34<26:28:37, 3.54it/s] 9%|▉ | 33768/371472 [2:43:35<26:42:42, 3.51it/s] 9%|▉ | 33769/371472 [2:43:35<26:15:11, 3.57it/s] 9%|▉ | 33770/371472 [2:43:35<25:44:00, 3.65it/s] 9%|▉ | 33771/371472 [2:43:35<25:16:30, 3.71it/s] 9%|▉ | 33772/371472 [2:43:36<27:45:32, 3.38it/s] 9%|▉ | 33773/371472 [2:43:36<28:50:38, 3.25it/s] 9%|▉ | 33774/371472 [2:43:36<28:00:57, 3.35it/s] 9%|▉ | 33775/371472 [2:43:37<26:54:01, 3.49it/s] 9%|▉ | 33776/371472 [2:43:37<29:04:06, 3.23it/s] 9%|▉ | 33777/371472 [2:43:37<29:10:02, 3.22it/s] 9%|▉ | 33778/371472 [2:43:37<27:38:56, 3.39it/s] 9%|▉ | 33779/371472 [2:43:38<27:51:10, 3.37it/s] 9%|▉ | 33780/371472 [2:43:38<28:50:50, 3.25it/s] {'loss': 4.4059, 'learning_rate': 9.185987631709366e-07, 'epoch': 1.45} + 9%|▉ | 33780/371472 [2:43:38<28:50:50, 3.25it/s] 9%|▉ | 33781/371472 [2:43:38<27:51:10, 3.37it/s] 9%|▉ | 33782/371472 [2:43:39<27:02:29, 3.47it/s] 9%|▉ | 33783/371472 [2:43:39<26:13:56, 3.58it/s] 9%|▉ | 33784/371472 [2:43:39<26:33:51, 3.53it/s] 9%|▉ | 33785/371472 [2:43:39<25:52:17, 3.63it/s] 9%|▉ | 33786/371472 [2:43:40<25:02:42, 3.75it/s] 9%|▉ | 33787/371472 [2:43:40<25:40:51, 3.65it/s] 9%|▉ | 33788/371472 [2:43:40<25:09:33, 3.73it/s] 9%|▉ | 33789/371472 [2:43:41<26:30:31, 3.54it/s] 9%|▉ | 33790/371472 [2:43:41<25:46:19, 3.64it/s] 9%|▉ | 33791/371472 [2:43:41<25:41:05, 3.65it/s] 9%|▉ | 33792/371472 [2:43:41<27:36:38, 3.40it/s] 9%|▉ | 33793/371472 [2:43:42<28:18:56, 3.31it/s] 9%|▉ | 33794/371472 [2:43:42<27:43:06, 3.38it/s] 9%|▉ | 33795/371472 [2:43:42<27:50:22, 3.37it/s] 9%|▉ | 33796/371472 [2:43:43<26:31:33, 3.54it/s] 9%|▉ | 33797/371472 [2:43:43<26:51:16, 3.49it/s] 9%|▉ | 33798/371472 [2:43:43<27:39:40, 3.39it/s] 9%|▉ | 33799/371472 [2:43:44<27:51:52, 3.37it/s] 9%|▉ | 33800/371472 [2:43:44<26:38:48, 3.52it/s] {'loss': 4.7998, 'learning_rate': 9.185502811954578e-07, 'epoch': 1.46} + 9%|▉ | 33800/371472 [2:43:44<26:38:48, 3.52it/s] 9%|▉ | 33801/371472 [2:43:44<27:07:20, 3.46it/s] 9%|▉ | 33802/371472 [2:43:44<26:36:09, 3.53it/s] 9%|▉ | 33803/371472 [2:43:45<25:37:47, 3.66it/s] 9%|▉ | 33804/371472 [2:43:45<26:05:36, 3.59it/s] 9%|▉ | 33805/371472 [2:43:45<26:16:35, 3.57it/s] 9%|▉ | 33806/371472 [2:43:45<25:59:27, 3.61it/s] 9%|▉ | 33807/371472 [2:43:46<26:15:52, 3.57it/s] 9%|▉ | 33808/371472 [2:43:46<25:30:25, 3.68it/s] 9%|▉ | 33809/371472 [2:43:46<26:27:44, 3.54it/s] 9%|▉ | 33810/371472 [2:43:47<25:34:18, 3.67it/s] 9%|▉ | 33811/371472 [2:43:47<25:32:23, 3.67it/s] 9%|▉ | 33812/371472 [2:43:47<25:22:24, 3.70it/s] 9%|▉ | 33813/371472 [2:43:47<25:30:48, 3.68it/s] 9%|▉ | 33814/371472 [2:43:48<24:54:06, 3.77it/s] 9%|▉ | 33815/371472 [2:43:48<24:26:35, 3.84it/s] 9%|▉ | 33816/371472 [2:43:48<23:51:33, 3.93it/s] 9%|▉ | 33817/371472 [2:43:48<23:42:42, 3.96it/s] 9%|▉ | 33818/371472 [2:43:49<24:52:55, 3.77it/s] 9%|▉ | 33819/371472 [2:43:49<27:47:56, 3.37it/s] 9%|▉ | 33820/371472 [2:43:49<27:17:54, 3.44it/s] {'loss': 4.6805, 'learning_rate': 9.185017992199788e-07, 'epoch': 1.46} + 9%|▉ | 33820/371472 [2:43:49<27:17:54, 3.44it/s] 9%|▉ | 33821/371472 [2:43:50<26:31:56, 3.54it/s] 9%|▉ | 33822/371472 [2:43:50<26:43:44, 3.51it/s] 9%|▉ | 33823/371472 [2:43:50<26:06:33, 3.59it/s] 9%|▉ | 33824/371472 [2:43:50<25:51:00, 3.63it/s] 9%|▉ | 33825/371472 [2:43:51<26:44:19, 3.51it/s] 9%|▉ | 33826/371472 [2:43:51<27:17:39, 3.44it/s] 9%|▉ | 33827/371472 [2:43:51<26:36:38, 3.52it/s] 9%|▉ | 33828/371472 [2:43:52<26:30:44, 3.54it/s] 9%|▉ | 33829/371472 [2:43:52<27:40:56, 3.39it/s] 9%|▉ | 33830/371472 [2:43:52<28:18:51, 3.31it/s] 9%|▉ | 33831/371472 [2:43:52<28:33:40, 3.28it/s] 9%|▉ | 33832/371472 [2:43:53<27:53:36, 3.36it/s] 9%|▉ | 33833/371472 [2:43:53<28:47:21, 3.26it/s] 9%|▉ | 33834/371472 [2:43:53<29:06:04, 3.22it/s] 9%|▉ | 33835/371472 [2:43:54<29:19:43, 3.20it/s] 9%|▉ | 33836/371472 [2:43:54<28:11:39, 3.33it/s] 9%|▉ | 33837/371472 [2:43:54<28:04:39, 3.34it/s] 9%|▉ | 33838/371472 [2:43:55<27:39:39, 3.39it/s] 9%|▉ | 33839/371472 [2:43:55<26:16:44, 3.57it/s] 9%|▉ | 33840/371472 [2:43:55<28:16:06, 3.32it/s] {'loss': 4.7347, 'learning_rate': 9.184533172445e-07, 'epoch': 1.46} + 9%|▉ | 33840/371472 [2:43:55<28:16:06, 3.32it/s] 9%|▉ | 33841/371472 [2:43:55<27:17:32, 3.44it/s] 9%|▉ | 33842/371472 [2:43:56<27:02:30, 3.47it/s] 9%|▉ | 33843/371472 [2:43:56<27:08:43, 3.45it/s] 9%|▉ | 33844/371472 [2:43:56<28:00:00, 3.35it/s] 9%|▉ | 33845/371472 [2:43:57<28:06:30, 3.34it/s] 9%|▉ | 33846/371472 [2:43:57<28:46:43, 3.26it/s] 9%|▉ | 33847/371472 [2:43:57<27:39:11, 3.39it/s] 9%|▉ | 33848/371472 [2:43:57<26:28:10, 3.54it/s] 9%|▉ | 33849/371472 [2:43:58<26:22:01, 3.56it/s] 9%|▉ | 33850/371472 [2:43:58<27:43:26, 3.38it/s] 9%|▉ | 33851/371472 [2:43:58<27:41:14, 3.39it/s] 9%|▉ | 33852/371472 [2:43:59<26:49:51, 3.50it/s] 9%|▉ | 33853/371472 [2:43:59<26:42:19, 3.51it/s] 9%|▉ | 33854/371472 [2:43:59<26:14:30, 3.57it/s] 9%|▉ | 33855/371472 [2:43:59<26:12:54, 3.58it/s] 9%|▉ | 33856/371472 [2:44:00<26:26:22, 3.55it/s] 9%|▉ | 33857/371472 [2:44:00<26:15:36, 3.57it/s] 9%|▉ | 33858/371472 [2:44:00<26:10:50, 3.58it/s] 9%|▉ | 33859/371472 [2:44:01<28:29:39, 3.29it/s] 9%|▉ | 33860/371472 [2:44:01<28:56:45, 3.24it/s] {'loss': 4.4513, 'learning_rate': 9.18404835269021e-07, 'epoch': 1.46} + 9%|▉ | 33860/371472 [2:44:01<28:56:45, 3.24it/s] 9%|▉ | 33861/371472 [2:44:01<28:19:43, 3.31it/s] 9%|▉ | 33862/371472 [2:44:02<28:06:05, 3.34it/s] 9%|▉ | 33863/371472 [2:44:02<29:56:30, 3.13it/s] 9%|▉ | 33864/371472 [2:44:02<28:16:31, 3.32it/s] 9%|▉ | 33865/371472 [2:44:02<27:33:36, 3.40it/s] 9%|▉ | 33866/371472 [2:44:03<27:29:52, 3.41it/s] 9%|▉ | 33867/371472 [2:44:03<26:52:06, 3.49it/s] 9%|▉ | 33868/371472 [2:44:03<28:36:08, 3.28it/s] 9%|▉ | 33869/371472 [2:44:04<28:21:17, 3.31it/s] 9%|▉ | 33870/371472 [2:44:04<27:27:54, 3.41it/s] 9%|▉ | 33871/371472 [2:44:04<27:41:55, 3.39it/s] 9%|▉ | 33872/371472 [2:44:05<27:11:29, 3.45it/s] 9%|▉ | 33873/371472 [2:44:05<28:17:05, 3.32it/s] 9%|▉ | 33874/371472 [2:44:05<27:44:34, 3.38it/s] 9%|▉ | 33875/371472 [2:44:05<28:09:01, 3.33it/s] 9%|▉ | 33876/371472 [2:44:06<26:35:43, 3.53it/s] 9%|▉ | 33877/371472 [2:44:06<25:55:39, 3.62it/s] 9%|▉ | 33878/371472 [2:44:06<26:12:37, 3.58it/s] 9%|▉ | 33879/371472 [2:44:06<25:34:27, 3.67it/s] 9%|▉ | 33880/371472 [2:44:07<25:01:47, 3.75it/s] {'loss': 4.8156, 'learning_rate': 9.183563532935422e-07, 'epoch': 1.46} + 9%|▉ | 33880/371472 [2:44:07<25:01:47, 3.75it/s] 9%|▉ | 33881/371472 [2:44:07<25:32:42, 3.67it/s] 9%|▉ | 33882/371472 [2:44:07<25:23:05, 3.69it/s] 9%|▉ | 33883/371472 [2:44:08<26:49:17, 3.50it/s] 9%|▉ | 33884/371472 [2:44:08<25:59:31, 3.61it/s] 9%|▉ | 33885/371472 [2:44:08<26:09:00, 3.59it/s] 9%|▉ | 33886/371472 [2:44:08<26:08:58, 3.59it/s] 9%|▉ | 33887/371472 [2:44:09<28:03:57, 3.34it/s] 9%|▉ | 33888/371472 [2:44:09<27:02:04, 3.47it/s] 9%|▉ | 33889/371472 [2:44:09<27:02:08, 3.47it/s] 9%|▉ | 33890/371472 [2:44:10<26:19:49, 3.56it/s] 9%|▉ | 33891/371472 [2:44:10<25:42:54, 3.65it/s] 9%|▉ | 33892/371472 [2:44:10<25:11:43, 3.72it/s] 9%|▉ | 33893/371472 [2:44:10<25:33:04, 3.67it/s] 9%|▉ | 33894/371472 [2:44:11<25:28:21, 3.68it/s] 9%|▉ | 33895/371472 [2:44:11<26:52:24, 3.49it/s] 9%|▉ | 33896/371472 [2:44:11<26:16:09, 3.57it/s] 9%|▉ | 33897/371472 [2:44:12<26:19:35, 3.56it/s] 9%|▉ | 33898/371472 [2:44:12<25:45:06, 3.64it/s] 9%|▉ | 33899/371472 [2:44:12<25:34:12, 3.67it/s] 9%|▉ | 33900/371472 [2:44:12<25:08:14, 3.73it/s] {'loss': 4.6449, 'learning_rate': 9.183078713180632e-07, 'epoch': 1.46} + 9%|▉ | 33900/371472 [2:44:12<25:08:14, 3.73it/s] 9%|▉ | 33901/371472 [2:44:13<25:44:07, 3.64it/s] 9%|▉ | 33902/371472 [2:44:13<25:29:37, 3.68it/s] 9%|▉ | 33903/371472 [2:44:13<25:19:13, 3.70it/s] 9%|▉ | 33904/371472 [2:44:13<26:28:39, 3.54it/s] 9%|▉ | 33905/371472 [2:44:14<26:06:07, 3.59it/s] 9%|▉ | 33906/371472 [2:44:14<25:23:18, 3.69it/s] 9%|▉ | 33907/371472 [2:44:14<25:23:57, 3.69it/s] 9%|▉ | 33908/371472 [2:44:15<26:19:56, 3.56it/s] 9%|▉ | 33909/371472 [2:44:15<27:05:47, 3.46it/s] 9%|▉ | 33910/371472 [2:44:15<26:38:29, 3.52it/s] 9%|▉ | 33911/371472 [2:44:15<28:24:37, 3.30it/s] 9%|▉ | 33912/371472 [2:44:16<27:28:20, 3.41it/s] 9%|▉ | 33913/371472 [2:44:16<26:29:13, 3.54it/s] 9%|▉ | 33914/371472 [2:44:16<26:14:41, 3.57it/s] 9%|▉ | 33915/371472 [2:44:17<25:23:37, 3.69it/s] 9%|▉ | 33916/371472 [2:44:17<24:57:08, 3.76it/s] 9%|▉ | 33917/371472 [2:44:17<25:03:35, 3.74it/s] 9%|▉ | 33918/371472 [2:44:17<25:17:15, 3.71it/s] 9%|▉ | 33919/371472 [2:44:18<26:41:46, 3.51it/s] 9%|▉ | 33920/371472 [2:44:18<27:00:34, 3.47it/s] {'loss': 4.5142, 'learning_rate': 9.182593893425844e-07, 'epoch': 1.46} + 9%|▉ | 33920/371472 [2:44:18<27:00:34, 3.47it/s] 9%|▉ | 33921/371472 [2:44:18<26:23:57, 3.55it/s] 9%|▉ | 33922/371472 [2:44:19<28:49:31, 3.25it/s] 9%|▉ | 33923/371472 [2:44:19<28:38:27, 3.27it/s] 9%|▉ | 33924/371472 [2:44:19<27:47:39, 3.37it/s] 9%|▉ | 33925/371472 [2:44:19<27:40:44, 3.39it/s] 9%|▉ | 33926/371472 [2:44:20<26:28:49, 3.54it/s] 9%|▉ | 33927/371472 [2:44:20<26:02:49, 3.60it/s] 9%|▉ | 33928/371472 [2:44:20<25:50:49, 3.63it/s] 9%|▉ | 33929/371472 [2:44:21<25:45:55, 3.64it/s] 9%|▉ | 33930/371472 [2:44:21<25:17:04, 3.71it/s] 9%|▉ | 33931/371472 [2:44:21<25:29:21, 3.68it/s] 9%|▉ | 33932/371472 [2:44:21<25:37:29, 3.66it/s] 9%|▉ | 33933/371472 [2:44:22<25:25:51, 3.69it/s] 9%|▉ | 33934/371472 [2:44:22<24:43:44, 3.79it/s] 9%|▉ | 33935/371472 [2:44:22<25:11:20, 3.72it/s] 9%|▉ | 33936/371472 [2:44:22<25:56:45, 3.61it/s] 9%|▉ | 33937/371472 [2:44:23<25:15:49, 3.71it/s] 9%|▉ | 33938/371472 [2:44:23<25:03:20, 3.74it/s] 9%|▉ | 33939/371472 [2:44:23<26:06:03, 3.59it/s] 9%|▉ | 33940/371472 [2:44:23<25:24:05, 3.69it/s] {'loss': 4.6287, 'learning_rate': 9.182109073671055e-07, 'epoch': 1.46} + 9%|▉ | 33940/371472 [2:44:23<25:24:05, 3.69it/s] 9%|▉ | 33941/371472 [2:44:24<25:10:08, 3.73it/s] 9%|▉ | 33942/371472 [2:44:24<26:43:02, 3.51it/s] 9%|▉ | 33943/371472 [2:44:24<26:53:26, 3.49it/s] 9%|▉ | 33944/371472 [2:44:25<27:10:41, 3.45it/s] 9%|▉ | 33945/371472 [2:44:25<26:15:56, 3.57it/s] 9%|▉ | 33946/371472 [2:44:25<26:23:56, 3.55it/s] 9%|▉ | 33947/371472 [2:44:25<26:20:43, 3.56it/s] 9%|▉ | 33948/371472 [2:44:26<27:35:13, 3.40it/s] 9%|▉ | 33949/371472 [2:44:26<27:24:48, 3.42it/s] 9%|▉ | 33950/371472 [2:44:26<27:17:03, 3.44it/s] 9%|▉ | 33951/371472 [2:44:27<26:55:43, 3.48it/s] 9%|▉ | 33952/371472 [2:44:27<29:08:47, 3.22it/s] 9%|▉ | 33953/371472 [2:44:27<28:15:37, 3.32it/s] 9%|▉ | 33954/371472 [2:44:28<29:49:05, 3.14it/s] 9%|▉ | 33955/371472 [2:44:28<28:00:15, 3.35it/s] 9%|▉ | 33956/371472 [2:44:28<27:07:09, 3.46it/s] 9%|▉ | 33957/371472 [2:44:28<26:02:09, 3.60it/s] 9%|▉ | 33958/371472 [2:44:29<25:04:07, 3.74it/s] 9%|▉ | 33959/371472 [2:44:29<25:42:31, 3.65it/s] 9%|▉ | 33960/371472 [2:44:29<25:58:11, 3.61it/s] {'loss': 4.5074, 'learning_rate': 9.181624253916267e-07, 'epoch': 1.46} + 9%|▉ | 33960/371472 [2:44:29<25:58:11, 3.61it/s] 9%|▉ | 33961/371472 [2:44:30<27:22:31, 3.42it/s] 9%|▉ | 33962/371472 [2:44:30<29:13:42, 3.21it/s] 9%|▉ | 33963/371472 [2:44:30<28:57:39, 3.24it/s] 9%|▉ | 33964/371472 [2:44:31<29:05:20, 3.22it/s] 9%|▉ | 33965/371472 [2:44:31<29:20:59, 3.19it/s] 9%|▉ | 33966/371472 [2:44:31<28:48:39, 3.25it/s] 9%|▉ | 33967/371472 [2:44:31<29:31:07, 3.18it/s] 9%|▉ | 33968/371472 [2:44:32<30:00:32, 3.12it/s] 9%|▉ | 33969/371472 [2:44:32<29:10:05, 3.21it/s] 9%|▉ | 33970/371472 [2:44:32<28:00:08, 3.35it/s] 9%|▉ | 33971/371472 [2:44:33<27:07:53, 3.46it/s] 9%|▉ | 33972/371472 [2:44:33<27:02:07, 3.47it/s] 9%|▉ | 33973/371472 [2:44:33<28:30:13, 3.29it/s] 9%|▉ | 33974/371472 [2:44:34<27:39:52, 3.39it/s] 9%|▉ | 33975/371472 [2:44:34<27:09:01, 3.45it/s] 9%|▉ | 33976/371472 [2:44:34<27:04:09, 3.46it/s] 9%|▉ | 33977/371472 [2:44:34<26:39:30, 3.52it/s] 9%|▉ | 33978/371472 [2:44:35<26:28:26, 3.54it/s] 9%|▉ | 33979/371472 [2:44:35<26:43:47, 3.51it/s] 9%|▉ | 33980/371472 [2:44:35<26:17:02, 3.57it/s] {'loss': 4.6228, 'learning_rate': 9.181139434161476e-07, 'epoch': 1.46} + 9%|▉ | 33980/371472 [2:44:35<26:17:02, 3.57it/s] 9%|▉ | 33981/371472 [2:44:36<26:30:30, 3.54it/s] 9%|▉ | 33982/371472 [2:44:36<25:32:21, 3.67it/s] 9%|▉ | 33983/371472 [2:44:36<25:07:25, 3.73it/s] 9%|▉ | 33984/371472 [2:44:36<25:15:06, 3.71it/s] 9%|▉ | 33985/371472 [2:44:37<25:32:08, 3.67it/s] 9%|▉ | 33986/371472 [2:44:37<26:49:29, 3.49it/s] 9%|▉ | 33987/371472 [2:44:37<26:12:28, 3.58it/s] 9%|▉ | 33988/371472 [2:44:37<26:12:40, 3.58it/s] 9%|▉ | 33989/371472 [2:44:38<25:24:28, 3.69it/s] 9%|▉ | 33990/371472 [2:44:38<24:55:41, 3.76it/s] 9%|▉ | 33991/371472 [2:44:38<26:46:34, 3.50it/s] 9%|▉ | 33992/371472 [2:44:39<26:59:54, 3.47it/s] 9%|▉ | 33993/371472 [2:44:39<26:17:25, 3.57it/s] 9%|▉ | 33994/371472 [2:44:39<25:53:57, 3.62it/s] 9%|▉ | 33995/371472 [2:44:39<26:09:37, 3.58it/s] 9%|▉ | 33996/371472 [2:44:40<27:43:35, 3.38it/s] 9%|▉ | 33997/371472 [2:44:40<27:41:04, 3.39it/s] 9%|▉ | 33998/371472 [2:44:40<29:08:27, 3.22it/s] 9%|▉ | 33999/371472 [2:44:41<27:47:10, 3.37it/s] 9%|▉ | 34000/371472 [2:44:41<26:56:04, 3.48it/s] {'loss': 4.5035, 'learning_rate': 9.180654614406688e-07, 'epoch': 1.46} + 9%|▉ | 34000/371472 [2:44:41<26:56:04, 3.48it/s] 9%|▉ | 34001/371472 [2:44:41<28:07:05, 3.33it/s] 9%|▉ | 34002/371472 [2:44:42<29:08:40, 3.22it/s] 9%|▉ | 34003/371472 [2:44:42<29:16:09, 3.20it/s] 9%|▉ | 34004/371472 [2:44:42<27:57:46, 3.35it/s] 9%|▉ | 34005/371472 [2:44:42<26:53:00, 3.49it/s] 9%|▉ | 34006/371472 [2:44:43<28:30:43, 3.29it/s] 9%|▉ | 34007/371472 [2:44:43<27:21:43, 3.43it/s] 9%|▉ | 34008/371472 [2:44:43<27:54:47, 3.36it/s] 9%|▉ | 34009/371472 [2:44:44<28:30:51, 3.29it/s] 9%|▉ | 34010/371472 [2:44:44<28:07:01, 3.33it/s] 9%|▉ | 34011/371472 [2:44:44<27:39:34, 3.39it/s] 9%|▉ | 34012/371472 [2:44:44<26:54:07, 3.48it/s] 9%|▉ | 34013/371472 [2:44:45<26:17:52, 3.56it/s] 9%|▉ | 34014/371472 [2:44:45<27:49:47, 3.37it/s] 9%|▉ | 34015/371472 [2:44:45<27:12:58, 3.44it/s] 9%|▉ | 34016/371472 [2:44:46<27:05:03, 3.46it/s] 9%|▉ | 34017/371472 [2:44:46<26:30:18, 3.54it/s] 9%|▉ | 34018/371472 [2:44:46<26:26:01, 3.55it/s] 9%|▉ | 34019/371472 [2:44:46<26:19:20, 3.56it/s] 9%|▉ | 34020/371472 [2:44:47<26:35:56, 3.52it/s] {'loss': 4.4645, 'learning_rate': 9.180169794651899e-07, 'epoch': 1.47} + 9%|▉ | 34020/371472 [2:44:47<26:35:56, 3.52it/s] 9%|▉ | 34021/371472 [2:44:47<26:40:09, 3.51it/s] 9%|▉ | 34022/371472 [2:44:47<26:13:32, 3.57it/s] 9%|▉ | 34023/371472 [2:44:48<26:24:52, 3.55it/s] 9%|▉ | 34024/371472 [2:44:48<26:17:10, 3.57it/s] 9%|▉ | 34025/371472 [2:44:48<26:19:58, 3.56it/s] 9%|▉ | 34026/371472 [2:44:48<26:36:01, 3.52it/s] 9%|▉ | 34027/371472 [2:44:49<26:19:38, 3.56it/s] 9%|▉ | 34028/371472 [2:44:49<26:01:58, 3.60it/s] 9%|▉ | 34029/371472 [2:44:49<25:34:21, 3.67it/s] 9%|▉ | 34030/371472 [2:44:49<25:02:20, 3.74it/s] 9%|▉ | 34031/371472 [2:44:50<24:56:29, 3.76it/s] 9%|▉ | 34032/371472 [2:44:50<24:30:02, 3.83it/s] 9%|▉ | 34033/371472 [2:44:50<24:05:16, 3.89it/s] 9%|▉ | 34034/371472 [2:44:51<24:45:00, 3.79it/s] 9%|▉ | 34035/371472 [2:44:51<25:12:15, 3.72it/s] 9%|▉ | 34036/371472 [2:44:51<24:48:04, 3.78it/s] 9%|▉ | 34037/371472 [2:44:51<25:24:47, 3.69it/s] 9%|▉ | 34038/371472 [2:44:52<28:30:00, 3.29it/s] 9%|▉ | 34039/371472 [2:44:52<27:52:17, 3.36it/s] 9%|▉ | 34040/371472 [2:44:52<26:54:18, 3.48it/s] {'loss': 4.8142, 'learning_rate': 9.17968497489711e-07, 'epoch': 1.47} + 9%|▉ | 34040/371472 [2:44:52<26:54:18, 3.48it/s] 9%|▉ | 34041/371472 [2:44:53<28:58:31, 3.23it/s] 9%|▉ | 34042/371472 [2:44:53<29:33:31, 3.17it/s] 9%|▉ | 34043/371472 [2:44:53<28:48:56, 3.25it/s] 9%|▉ | 34044/371472 [2:44:54<29:22:07, 3.19it/s] 9%|▉ | 34045/371472 [2:44:54<29:52:19, 3.14it/s] 9%|▉ | 34046/371472 [2:44:54<27:54:50, 3.36it/s] 9%|▉ | 34047/371472 [2:44:54<26:54:33, 3.48it/s] 9%|▉ | 34048/371472 [2:44:55<29:53:28, 3.14it/s] 9%|▉ | 34049/371472 [2:44:55<28:49:43, 3.25it/s] 9%|▉ | 34050/371472 [2:44:55<27:41:48, 3.38it/s] 9%|▉ | 34051/371472 [2:44:56<26:58:24, 3.47it/s] 9%|▉ | 34052/371472 [2:44:56<25:59:57, 3.60it/s] 9%|▉ | 34053/371472 [2:44:56<25:11:31, 3.72it/s] 9%|▉ | 34054/371472 [2:44:56<25:25:11, 3.69it/s] 9%|▉ | 34055/371472 [2:44:57<25:40:30, 3.65it/s] 9%|▉ | 34056/371472 [2:44:57<25:55:37, 3.62it/s] 9%|▉ | 34057/371472 [2:44:57<25:38:54, 3.65it/s] 9%|▉ | 34058/371472 [2:44:58<25:26:29, 3.68it/s] 9%|▉ | 34059/371472 [2:44:58<25:49:19, 3.63it/s] 9%|▉ | 34060/371472 [2:44:58<25:45:19, 3.64it/s] {'loss': 4.8715, 'learning_rate': 9.179200155142321e-07, 'epoch': 1.47} + 9%|▉ | 34060/371472 [2:44:58<25:45:19, 3.64it/s] 9%|▉ | 34061/371472 [2:44:58<27:04:18, 3.46it/s] 9%|▉ | 34062/371472 [2:44:59<26:48:34, 3.50it/s] 9%|▉ | 34063/371472 [2:44:59<26:15:49, 3.57it/s] 9%|▉ | 34064/371472 [2:44:59<27:06:59, 3.46it/s] 9%|▉ | 34065/371472 [2:45:00<26:13:27, 3.57it/s] 9%|▉ | 34066/371472 [2:45:00<26:19:27, 3.56it/s] 9%|▉ | 34067/371472 [2:45:00<26:06:32, 3.59it/s] 9%|▉ | 34068/371472 [2:45:00<26:01:24, 3.60it/s] 9%|▉ | 34069/371472 [2:45:01<25:52:30, 3.62it/s] 9%|▉ | 34070/371472 [2:45:01<27:20:56, 3.43it/s] 9%|▉ | 34071/371472 [2:45:01<27:54:24, 3.36it/s] 9%|▉ | 34072/371472 [2:45:02<26:47:21, 3.50it/s] 9%|▉ | 34073/371472 [2:45:02<26:52:38, 3.49it/s] 9%|▉ | 34074/371472 [2:45:02<26:33:14, 3.53it/s] 9%|▉ | 34075/371472 [2:45:02<26:39:02, 3.52it/s] 9%|▉ | 34076/371472 [2:45:03<27:09:10, 3.45it/s] 9%|▉ | 34077/371472 [2:45:03<26:22:20, 3.55it/s] 9%|▉ | 34078/371472 [2:45:03<26:35:55, 3.52it/s] 9%|▉ | 34079/371472 [2:45:04<26:28:52, 3.54it/s] 9%|▉ | 34080/371472 [2:45:04<27:07:49, 3.45it/s] {'loss': 4.5135, 'learning_rate': 9.178715335387534e-07, 'epoch': 1.47} + 9%|▉ | 34080/371472 [2:45:04<27:07:49, 3.45it/s] 9%|▉ | 34081/371472 [2:45:04<28:05:26, 3.34it/s] 9%|▉ | 34082/371472 [2:45:04<27:19:42, 3.43it/s] 9%|▉ | 34083/371472 [2:45:05<28:17:04, 3.31it/s] 9%|▉ | 34084/371472 [2:45:05<26:53:33, 3.48it/s] 9%|▉ | 34085/371472 [2:45:05<26:05:00, 3.59it/s] 9%|▉ | 34086/371472 [2:45:06<25:47:18, 3.63it/s] 9%|▉ | 34087/371472 [2:45:06<26:22:55, 3.55it/s] 9%|▉ | 34088/371472 [2:45:06<27:19:53, 3.43it/s] 9%|▉ | 34089/371472 [2:45:06<29:01:06, 3.23it/s] 9%|▉ | 34090/371472 [2:45:07<28:11:50, 3.32it/s] 9%|▉ | 34091/371472 [2:45:07<28:20:47, 3.31it/s] 9%|▉ | 34092/371472 [2:45:07<28:15:53, 3.32it/s] 9%|▉ | 34093/371472 [2:45:08<26:41:58, 3.51it/s] 9%|▉ | 34094/371472 [2:45:08<26:24:55, 3.55it/s] 9%|▉ | 34095/371472 [2:45:08<26:54:36, 3.48it/s] 9%|▉ | 34096/371472 [2:45:08<26:44:03, 3.51it/s] 9%|▉ | 34097/371472 [2:45:09<26:41:39, 3.51it/s] 9%|▉ | 34098/371472 [2:45:09<26:21:50, 3.55it/s] 9%|▉ | 34099/371472 [2:45:09<30:11:09, 3.10it/s] 9%|▉ | 34100/371472 [2:45:10<28:58:53, 3.23it/s] {'loss': 4.5732, 'learning_rate': 9.178230515632744e-07, 'epoch': 1.47} + 9%|▉ | 34100/371472 [2:45:10<28:58:53, 3.23it/s] 9%|▉ | 34101/371472 [2:45:10<28:15:25, 3.32it/s] 9%|▉ | 34102/371472 [2:45:10<27:45:57, 3.38it/s] 9%|▉ | 34103/371472 [2:45:11<29:04:53, 3.22it/s] 9%|▉ | 34104/371472 [2:45:11<27:33:31, 3.40it/s] 9%|▉ | 34105/371472 [2:45:11<27:16:54, 3.43it/s] 9%|▉ | 34106/371472 [2:45:11<26:09:23, 3.58it/s] 9%|▉ | 34107/371472 [2:45:12<27:24:22, 3.42it/s] 9%|▉ | 34108/371472 [2:45:12<27:41:25, 3.38it/s] 9%|▉ | 34109/371472 [2:45:12<27:37:56, 3.39it/s] 9%|▉ | 34110/371472 [2:45:13<26:15:54, 3.57it/s] 9%|▉ | 34111/371472 [2:45:13<25:32:12, 3.67it/s] 9%|▉ | 34112/371472 [2:45:13<25:08:58, 3.73it/s] 9%|▉ | 34113/371472 [2:45:13<26:17:25, 3.56it/s] 9%|▉ | 34114/371472 [2:45:14<26:03:50, 3.60it/s] 9%|▉ | 34115/371472 [2:45:14<26:44:24, 3.50it/s] 9%|▉ | 34116/371472 [2:45:14<27:14:37, 3.44it/s] 9%|▉ | 34117/371472 [2:45:15<27:30:13, 3.41it/s] 9%|▉ | 34118/371472 [2:45:15<27:09:27, 3.45it/s] 9%|▉ | 34119/371472 [2:45:15<27:05:47, 3.46it/s] 9%|▉ | 34120/371472 [2:45:15<27:01:04, 3.47it/s] {'loss': 4.7193, 'learning_rate': 9.177745695877953e-07, 'epoch': 1.47} + 9%|▉ | 34120/371472 [2:45:15<27:01:04, 3.47it/s] 9%|▉ | 34121/371472 [2:45:16<26:55:24, 3.48it/s] 9%|▉ | 34122/371472 [2:45:16<26:37:09, 3.52it/s] 9%|▉ | 34123/371472 [2:45:16<27:07:41, 3.45it/s] 9%|▉ | 34124/371472 [2:45:17<26:10:06, 3.58it/s] 9%|▉ | 34125/371472 [2:45:17<26:15:38, 3.57it/s] 9%|▉ | 34126/371472 [2:45:17<26:56:57, 3.48it/s] 9%|▉ | 34127/371472 [2:45:17<28:44:53, 3.26it/s] 9%|▉ | 34128/371472 [2:45:18<29:57:42, 3.13it/s] 9%|▉ | 34129/371472 [2:45:18<29:33:36, 3.17it/s] 9%|▉ | 34130/371472 [2:45:18<29:12:57, 3.21it/s] 9%|▉ | 34131/371472 [2:45:19<28:51:49, 3.25it/s] 9%|▉ | 34132/371472 [2:45:19<27:53:49, 3.36it/s] 9%|▉ | 34133/371472 [2:45:19<27:24:23, 3.42it/s] 9%|▉ | 34134/371472 [2:45:20<27:08:38, 3.45it/s] 9%|▉ | 34135/371472 [2:45:20<27:13:06, 3.44it/s] 9%|▉ | 34136/371472 [2:45:20<26:21:03, 3.56it/s] 9%|▉ | 34137/371472 [2:45:20<26:37:27, 3.52it/s] 9%|▉ | 34138/371472 [2:45:21<27:36:16, 3.39it/s] 9%|▉ | 34139/371472 [2:45:21<26:43:30, 3.51it/s] 9%|▉ | 34140/371472 [2:45:21<26:02:31, 3.60it/s] {'loss': 4.6281, 'learning_rate': 9.177260876123165e-07, 'epoch': 1.47} + 9%|▉ | 34140/371472 [2:45:21<26:02:31, 3.60it/s] 9%|▉ | 34141/371472 [2:45:22<26:24:20, 3.55it/s] 9%|▉ | 34142/371472 [2:45:22<25:52:36, 3.62it/s] 9%|▉ | 34143/371472 [2:45:22<26:05:21, 3.59it/s] 9%|▉ | 34144/371472 [2:45:22<25:34:29, 3.66it/s] 9%|▉ | 34145/371472 [2:45:23<25:57:15, 3.61it/s] 9%|▉ | 34146/371472 [2:45:23<26:07:23, 3.59it/s] 9%|▉ | 34147/371472 [2:45:23<26:32:54, 3.53it/s] 9%|▉ | 34148/371472 [2:45:24<26:04:29, 3.59it/s] 9%|▉ | 34149/371472 [2:45:24<26:39:42, 3.51it/s] 9%|▉ | 34150/371472 [2:45:24<27:53:41, 3.36it/s] 9%|▉ | 34151/371472 [2:45:24<27:20:35, 3.43it/s] 9%|▉ | 34152/371472 [2:45:25<29:23:58, 3.19it/s] 9%|▉ | 34153/371472 [2:45:25<29:33:30, 3.17it/s] 9%|▉ | 34154/371472 [2:45:25<30:55:46, 3.03it/s] 9%|▉ | 34155/371472 [2:45:26<29:24:26, 3.19it/s] 9%|▉ | 34156/371472 [2:45:26<29:56:33, 3.13it/s] 9%|▉ | 34157/371472 [2:45:26<29:52:00, 3.14it/s] 9%|▉ | 34158/371472 [2:45:27<27:38:14, 3.39it/s] 9%|▉ | 34159/371472 [2:45:27<27:29:25, 3.41it/s] 9%|▉ | 34160/371472 [2:45:27<26:11:40, 3.58it/s] {'loss': 4.5386, 'learning_rate': 9.176776056368377e-07, 'epoch': 1.47} + 9%|▉ | 34160/371472 [2:45:27<26:11:40, 3.58it/s] 9%|▉ | 34161/371472 [2:45:27<26:21:12, 3.56it/s] 9%|▉ | 34162/371472 [2:45:28<26:53:42, 3.48it/s] 9%|▉ | 34163/371472 [2:45:28<26:44:13, 3.50it/s] 9%|▉ | 34164/371472 [2:45:28<26:57:04, 3.48it/s] 9%|▉ | 34165/371472 [2:45:29<27:06:03, 3.46it/s] 9%|▉ | 34166/371472 [2:45:29<26:25:51, 3.54it/s] 9%|▉ | 34167/371472 [2:45:29<27:10:26, 3.45it/s] 9%|▉ | 34168/371472 [2:45:29<25:59:53, 3.60it/s] 9%|▉ | 34169/371472 [2:45:30<26:13:50, 3.57it/s] 9%|▉ | 34170/371472 [2:45:30<26:38:09, 3.52it/s] 9%|▉ | 34171/371472 [2:45:30<26:52:15, 3.49it/s] 9%|▉ | 34172/371472 [2:45:31<26:46:06, 3.50it/s] 9%|▉ | 34173/371472 [2:45:31<26:48:38, 3.49it/s] 9%|▉ | 34174/371472 [2:45:31<25:49:25, 3.63it/s] 9%|▉ | 34175/371472 [2:45:31<25:33:11, 3.67it/s] 9%|▉ | 34176/371472 [2:45:32<25:04:48, 3.74it/s] 9%|▉ | 34177/371472 [2:45:32<24:59:27, 3.75it/s] 9%|▉ | 34178/371472 [2:45:32<24:59:29, 3.75it/s] 9%|▉ | 34179/371472 [2:45:32<24:20:50, 3.85it/s] 9%|▉ | 34180/371472 [2:45:33<24:59:13, 3.75it/s] {'loss': 4.58, 'learning_rate': 9.176291236613588e-07, 'epoch': 1.47} + 9%|▉ | 34180/371472 [2:45:33<24:59:13, 3.75it/s] 9%|▉ | 34181/371472 [2:45:33<27:58:59, 3.35it/s] 9%|▉ | 34182/371472 [2:45:33<28:12:56, 3.32it/s] 9%|▉ | 34183/371472 [2:45:34<27:41:39, 3.38it/s] 9%|▉ | 34184/371472 [2:45:34<26:27:01, 3.54it/s] 9%|▉ | 34185/371472 [2:45:34<26:47:49, 3.50it/s] 9%|▉ | 34186/371472 [2:45:34<25:49:39, 3.63it/s] 9%|▉ | 34187/371472 [2:45:35<25:41:13, 3.65it/s] 9%|▉ | 34188/371472 [2:45:35<25:50:01, 3.63it/s] 9%|▉ | 34189/371472 [2:45:35<25:34:36, 3.66it/s] 9%|▉ | 34190/371472 [2:45:36<27:41:40, 3.38it/s] 9%|▉ | 34191/371472 [2:45:36<26:42:45, 3.51it/s] 9%|▉ | 34192/371472 [2:45:36<27:45:53, 3.37it/s] 9%|▉ | 34193/371472 [2:45:36<27:13:05, 3.44it/s] 9%|▉ | 34194/371472 [2:45:37<27:04:53, 3.46it/s] 9%|▉ | 34195/371472 [2:45:37<26:10:04, 3.58it/s] 9%|▉ | 34196/371472 [2:45:37<26:04:49, 3.59it/s] 9%|▉ | 34197/371472 [2:45:38<27:15:06, 3.44it/s] 9%|▉ | 34198/371472 [2:45:38<27:11:09, 3.45it/s] 9%|▉ | 34199/371472 [2:45:38<27:11:36, 3.45it/s] 9%|▉ | 34200/371472 [2:45:39<27:53:34, 3.36it/s] {'loss': 4.5368, 'learning_rate': 9.175806416858798e-07, 'epoch': 1.47} + 9%|▉ | 34200/371472 [2:45:39<27:53:34, 3.36it/s] 9%|▉ | 34201/371472 [2:45:39<27:14:38, 3.44it/s] 9%|▉ | 34202/371472 [2:45:39<28:09:30, 3.33it/s] 9%|▉ | 34203/371472 [2:45:39<28:09:29, 3.33it/s] 9%|▉ | 34204/371472 [2:45:40<28:11:26, 3.32it/s] 9%|▉ | 34205/371472 [2:45:40<28:49:09, 3.25it/s] 9%|▉ | 34206/371472 [2:45:40<28:45:51, 3.26it/s] 9%|▉ | 34207/371472 [2:45:41<28:12:35, 3.32it/s] 9%|▉ | 34208/371472 [2:45:41<27:22:43, 3.42it/s] 9%|▉ | 34209/371472 [2:45:41<28:34:59, 3.28it/s] 9%|▉ | 34210/371472 [2:45:42<29:13:51, 3.20it/s] 9%|▉ | 34211/371472 [2:45:42<30:11:52, 3.10it/s] 9%|▉ | 34212/371472 [2:45:42<28:13:19, 3.32it/s] 9%|▉ | 34213/371472 [2:45:42<27:21:20, 3.42it/s] 9%|▉ | 34214/371472 [2:45:43<27:51:38, 3.36it/s] 9%|▉ | 34215/371472 [2:45:43<27:22:35, 3.42it/s] 9%|▉ | 34216/371472 [2:45:43<27:13:08, 3.44it/s] 9%|▉ | 34217/371472 [2:45:44<26:47:02, 3.50it/s] 9%|▉ | 34218/371472 [2:45:44<27:11:50, 3.44it/s] 9%|▉ | 34219/371472 [2:45:44<26:04:32, 3.59it/s] 9%|▉ | 34220/371472 [2:45:44<26:56:43, 3.48it/s] {'loss': 4.3916, 'learning_rate': 9.17532159710401e-07, 'epoch': 1.47} + 9%|▉ | 34220/371472 [2:45:44<26:56:43, 3.48it/s] 9%|▉ | 34221/371472 [2:45:45<27:59:56, 3.35it/s] 9%|▉ | 34222/371472 [2:45:45<27:23:29, 3.42it/s] 9%|▉ | 34223/371472 [2:45:45<26:16:59, 3.56it/s] 9%|▉ | 34224/371472 [2:45:46<27:19:41, 3.43it/s] 9%|▉ | 34225/371472 [2:45:46<27:56:33, 3.35it/s] 9%|▉ | 34226/371472 [2:45:46<28:33:34, 3.28it/s] 9%|▉ | 34227/371472 [2:45:47<28:28:22, 3.29it/s] 9%|▉ | 34228/371472 [2:45:47<27:49:04, 3.37it/s] 9%|▉ | 34229/371472 [2:45:47<27:12:44, 3.44it/s] 9%|▉ | 34230/371472 [2:45:47<27:18:21, 3.43it/s] 9%|▉ | 34231/371472 [2:45:48<26:20:34, 3.56it/s] 9%|▉ | 34232/371472 [2:45:48<26:27:03, 3.54it/s] 9%|▉ | 34233/371472 [2:45:48<26:40:18, 3.51it/s] 9%|▉ | 34234/371472 [2:45:49<27:11:37, 3.44it/s] 9%|▉ | 34235/371472 [2:45:49<27:13:51, 3.44it/s] 9%|▉ | 34236/371472 [2:45:49<27:42:01, 3.38it/s] 9%|▉ | 34237/371472 [2:45:49<26:41:23, 3.51it/s] 9%|▉ | 34238/371472 [2:45:50<25:43:56, 3.64it/s] 9%|▉ | 34239/371472 [2:45:50<26:06:03, 3.59it/s] 9%|▉ | 34240/371472 [2:45:50<25:36:16, 3.66it/s] {'loss': 4.5975, 'learning_rate': 9.17483677734922e-07, 'epoch': 1.47} + 9%|▉ | 34240/371472 [2:45:50<25:36:16, 3.66it/s] 9%|▉ | 34241/371472 [2:45:50<25:52:21, 3.62it/s] 9%|▉ | 34242/371472 [2:45:51<27:26:42, 3.41it/s] 9%|▉ | 34243/371472 [2:45:51<31:15:47, 3.00it/s] 9%|▉ | 34244/371472 [2:45:52<30:48:16, 3.04it/s] 9%|▉ | 34245/371472 [2:45:52<29:50:13, 3.14it/s] 9%|▉ | 34246/371472 [2:45:52<29:59:02, 3.12it/s] 9%|▉ | 34247/371472 [2:45:52<28:25:40, 3.30it/s] 9%|▉ | 34248/371472 [2:45:53<27:58:19, 3.35it/s] 9%|▉ | 34249/371472 [2:45:53<27:34:30, 3.40it/s] 9%|▉ | 34250/371472 [2:45:53<28:08:10, 3.33it/s] 9%|▉ | 34251/371472 [2:45:54<27:35:04, 3.40it/s] 9%|▉ | 34252/371472 [2:45:54<28:27:28, 3.29it/s] 9%|▉ | 34253/371472 [2:45:54<27:37:08, 3.39it/s] 9%|▉ | 34254/371472 [2:45:55<27:55:09, 3.36it/s] 9%|▉ | 34255/371472 [2:45:55<27:25:45, 3.42it/s] 9%|▉ | 34256/371472 [2:45:55<28:50:58, 3.25it/s] 9%|▉ | 34257/371472 [2:45:55<27:39:30, 3.39it/s] 9%|▉ | 34258/371472 [2:45:56<26:36:02, 3.52it/s] 9%|▉ | 34259/371472 [2:45:56<26:46:34, 3.50it/s] 9%|▉ | 34260/371472 [2:45:56<26:06:19, 3.59it/s] {'loss': 4.563, 'learning_rate': 9.174351957594432e-07, 'epoch': 1.48} + 9%|▉ | 34260/371472 [2:45:56<26:06:19, 3.59it/s] 9%|▉ | 34261/371472 [2:45:57<27:37:07, 3.39it/s] 9%|▉ | 34262/371472 [2:45:57<27:06:13, 3.46it/s] 9%|▉ | 34263/371472 [2:45:57<27:01:29, 3.47it/s] 9%|▉ | 34264/371472 [2:45:57<26:32:55, 3.53it/s] 9%|▉ | 34265/371472 [2:45:58<26:15:41, 3.57it/s] 9%|▉ | 34266/371472 [2:45:58<26:35:48, 3.52it/s] 9%|▉ | 34267/371472 [2:45:58<26:04:43, 3.59it/s] 9%|▉ | 34268/371472 [2:45:58<25:55:53, 3.61it/s] 9%|▉ | 34269/371472 [2:45:59<26:20:43, 3.56it/s] 9%|▉ | 34270/371472 [2:45:59<26:59:34, 3.47it/s] 9%|▉ | 34271/371472 [2:45:59<26:34:37, 3.52it/s] 9%|▉ | 34272/371472 [2:46:00<29:06:39, 3.22it/s] 9%|▉ | 34273/371472 [2:46:00<29:08:25, 3.21it/s] 9%|▉ | 34274/371472 [2:46:00<28:16:04, 3.31it/s] 9%|▉ | 34275/371472 [2:46:01<27:36:12, 3.39it/s] 9%|▉ | 34276/371472 [2:46:01<26:19:50, 3.56it/s] 9%|▉ | 34277/371472 [2:46:01<25:17:28, 3.70it/s] 9%|▉ | 34278/371472 [2:46:01<26:50:11, 3.49it/s] 9%|▉ | 34279/371472 [2:46:02<28:27:55, 3.29it/s] 9%|▉ | 34280/371472 [2:46:02<27:45:26, 3.37it/s] {'loss': 4.615, 'learning_rate': 9.173867137839642e-07, 'epoch': 1.48} + 9%|▉ | 34280/371472 [2:46:02<27:45:26, 3.37it/s] 9%|▉ | 34281/371472 [2:46:02<27:52:35, 3.36it/s] 9%|▉ | 34282/371472 [2:46:03<27:13:54, 3.44it/s] 9%|▉ | 34283/371472 [2:46:03<26:40:15, 3.51it/s] 9%|▉ | 34284/371472 [2:46:03<26:30:19, 3.53it/s] 9%|▉ | 34285/371472 [2:46:04<28:31:04, 3.28it/s] 9%|▉ | 34286/371472 [2:46:04<27:38:51, 3.39it/s] 9%|▉ | 34287/371472 [2:46:04<27:50:36, 3.36it/s] 9%|▉ | 34288/371472 [2:46:04<28:05:35, 3.33it/s] 9%|▉ | 34289/371472 [2:46:05<27:28:13, 3.41it/s] 9%|▉ | 34290/371472 [2:46:05<28:09:13, 3.33it/s] 9%|▉ | 34291/371472 [2:46:05<28:06:00, 3.33it/s] 9%|▉ | 34292/371472 [2:46:06<28:47:41, 3.25it/s] 9%|▉ | 34293/371472 [2:46:06<27:44:16, 3.38it/s] 9%|▉ | 34294/371472 [2:46:06<27:13:45, 3.44it/s] 9%|▉ | 34295/371472 [2:46:06<26:20:09, 3.56it/s] 9%|▉ | 34296/371472 [2:46:07<25:49:15, 3.63it/s] 9%|▉ | 34297/371472 [2:46:07<25:44:52, 3.64it/s] 9%|▉ | 34298/371472 [2:46:07<27:12:07, 3.44it/s] 9%|▉ | 34299/371472 [2:46:08<26:35:32, 3.52it/s] 9%|▉ | 34300/371472 [2:46:08<28:21:24, 3.30it/s] {'loss': 4.3919, 'learning_rate': 9.173382318084854e-07, 'epoch': 1.48} + 9%|▉ | 34300/371472 [2:46:08<28:21:24, 3.30it/s] 9%|▉ | 34301/371472 [2:46:08<30:10:44, 3.10it/s] 9%|▉ | 34302/371472 [2:46:09<28:28:02, 3.29it/s] 9%|▉ | 34303/371472 [2:46:09<28:02:41, 3.34it/s] 9%|▉ | 34304/371472 [2:46:09<28:15:06, 3.32it/s] 9%|▉ | 34305/371472 [2:46:09<27:25:50, 3.41it/s] 9%|▉ | 34306/371472 [2:46:10<26:33:03, 3.53it/s] 9%|▉ | 34307/371472 [2:46:10<27:19:27, 3.43it/s] 9%|▉ | 34308/371472 [2:46:10<26:36:31, 3.52it/s] 9%|▉ | 34309/371472 [2:46:11<26:06:15, 3.59it/s] 9%|▉ | 34310/371472 [2:46:11<29:56:05, 3.13it/s] 9%|▉ | 34311/371472 [2:46:11<28:07:36, 3.33it/s] 9%|▉ | 34312/371472 [2:46:11<27:50:04, 3.36it/s] 9%|▉ | 34313/371472 [2:46:12<27:05:06, 3.46it/s] 9%|▉ | 34314/371472 [2:46:12<26:03:47, 3.59it/s] 9%|▉ | 34315/371472 [2:46:12<27:22:29, 3.42it/s] 9%|▉ | 34316/371472 [2:46:13<30:06:32, 3.11it/s] 9%|▉ | 34317/371472 [2:46:13<28:57:28, 3.23it/s] 9%|▉ | 34318/371472 [2:46:13<29:53:20, 3.13it/s] 9%|▉ | 34319/371472 [2:46:14<27:41:51, 3.38it/s] 9%|▉ | 34320/371472 [2:46:14<27:47:47, 3.37it/s] {'loss': 4.7193, 'learning_rate': 9.172897498330065e-07, 'epoch': 1.48} + 9%|▉ | 34320/371472 [2:46:14<27:47:47, 3.37it/s] 9%|▉ | 34321/371472 [2:46:14<27:33:28, 3.40it/s] 9%|▉ | 34322/371472 [2:46:14<27:41:44, 3.38it/s] 9%|▉ | 34323/371472 [2:46:15<27:12:16, 3.44it/s] 9%|▉ | 34324/371472 [2:46:15<26:54:53, 3.48it/s] 9%|▉ | 34325/371472 [2:46:15<26:32:15, 3.53it/s] 9%|▉ | 34326/371472 [2:46:16<26:01:19, 3.60it/s] 9%|▉ | 34327/371472 [2:46:16<26:09:47, 3.58it/s] 9%|▉ | 34328/371472 [2:46:16<25:23:42, 3.69it/s] 9%|▉ | 34329/371472 [2:46:16<25:31:23, 3.67it/s] 9%|▉ | 34330/371472 [2:46:17<26:05:18, 3.59it/s] 9%|▉ | 34331/371472 [2:46:17<26:02:15, 3.60it/s] 9%|▉ | 34332/371472 [2:46:17<26:57:13, 3.47it/s] 9%|▉ | 34333/371472 [2:46:18<29:29:43, 3.18it/s] 9%|▉ | 34334/371472 [2:46:18<29:15:37, 3.20it/s] 9%|▉ | 34335/371472 [2:46:18<27:47:56, 3.37it/s] 9%|▉ | 34336/371472 [2:46:19<28:18:24, 3.31it/s] 9%|▉ | 34337/371472 [2:46:19<27:36:14, 3.39it/s] 9%|▉ | 34338/371472 [2:46:19<28:34:13, 3.28it/s] 9%|▉ | 34339/371472 [2:46:20<31:14:41, 3.00it/s] 9%|▉ | 34340/371472 [2:46:20<30:21:04, 3.09it/s] {'loss': 4.4685, 'learning_rate': 9.172412678575276e-07, 'epoch': 1.48} + 9%|▉ | 34340/371472 [2:46:20<30:21:04, 3.09it/s] 9%|▉ | 34341/371472 [2:46:20<29:08:57, 3.21it/s] 9%|▉ | 34342/371472 [2:46:20<28:28:57, 3.29it/s] 9%|▉ | 34343/371472 [2:46:21<27:28:36, 3.41it/s] 9%|▉ | 34344/371472 [2:46:21<27:01:01, 3.47it/s] 9%|▉ | 34345/371472 [2:46:21<26:06:33, 3.59it/s] 9%|▉ | 34346/371472 [2:46:21<26:35:49, 3.52it/s] 9%|▉ | 34347/371472 [2:46:22<26:51:32, 3.49it/s] 9%|▉ | 34348/371472 [2:46:22<27:00:53, 3.47it/s] 9%|▉ | 34349/371472 [2:46:22<26:40:18, 3.51it/s] 9%|▉ | 34350/371472 [2:46:23<25:36:09, 3.66it/s] 9%|▉ | 34351/371472 [2:46:23<26:12:59, 3.57it/s] 9%|▉ | 34352/371472 [2:46:23<26:46:21, 3.50it/s] 9%|▉ | 34353/371472 [2:46:23<26:20:45, 3.55it/s] 9%|▉ | 34354/371472 [2:46:24<25:41:18, 3.65it/s] 9%|▉ | 34355/371472 [2:46:24<27:44:59, 3.37it/s] 9%|▉ | 34356/371472 [2:46:24<26:50:53, 3.49it/s] 9%|▉ | 34357/371472 [2:46:25<27:50:34, 3.36it/s] 9%|▉ | 34358/371472 [2:46:25<26:53:12, 3.48it/s] 9%|▉ | 34359/371472 [2:46:25<26:07:05, 3.59it/s] 9%|▉ | 34360/371472 [2:46:25<25:08:29, 3.72it/s] {'loss': 4.6064, 'learning_rate': 9.171927858820487e-07, 'epoch': 1.48} + 9%|▉ | 34360/371472 [2:46:25<25:08:29, 3.72it/s] 9%|▉ | 34361/371472 [2:46:26<25:44:00, 3.64it/s] 9%|▉ | 34362/371472 [2:46:26<25:34:54, 3.66it/s] 9%|▉ | 34363/371472 [2:46:26<27:53:55, 3.36it/s] 9%|▉ | 34364/371472 [2:46:27<27:13:52, 3.44it/s] 9%|▉ | 34365/371472 [2:46:27<26:39:03, 3.51it/s] 9%|▉ | 34366/371472 [2:46:27<26:24:57, 3.54it/s] 9%|▉ | 34367/371472 [2:46:28<28:56:25, 3.24it/s] 9%|▉ | 34368/371472 [2:46:28<27:49:27, 3.37it/s] 9%|▉ | 34369/371472 [2:46:28<28:13:03, 3.32it/s] 9%|▉ | 34370/371472 [2:46:28<27:56:27, 3.35it/s] 9%|▉ | 34371/371472 [2:46:29<33:09:22, 2.82it/s] 9%|▉ | 34372/371472 [2:46:29<32:09:54, 2.91it/s] 9%|▉ | 34373/371472 [2:46:30<30:46:04, 3.04it/s] 9%|▉ | 34374/371472 [2:46:30<29:44:50, 3.15it/s] 9%|▉ | 34375/371472 [2:46:30<29:51:25, 3.14it/s] 9%|▉ | 34376/371472 [2:46:30<28:36:32, 3.27it/s] 9%|▉ | 34377/371472 [2:46:31<27:53:56, 3.36it/s] 9%|▉ | 34378/371472 [2:46:31<28:21:44, 3.30it/s] 9%|▉ | 34379/371472 [2:46:31<28:57:37, 3.23it/s] 9%|▉ | 34380/371472 [2:46:32<28:04:41, 3.33it/s] {'loss': 4.5293, 'learning_rate': 9.171443039065698e-07, 'epoch': 1.48} + 9%|▉ | 34380/371472 [2:46:32<28:04:41, 3.33it/s] 9%|▉ | 34381/371472 [2:46:32<27:40:44, 3.38it/s] 9%|▉ | 34382/371472 [2:46:32<27:48:50, 3.37it/s] 9%|▉ | 34383/371472 [2:46:32<27:10:08, 3.45it/s] 9%|▉ | 34384/371472 [2:46:33<26:51:34, 3.49it/s] 9%|▉ | 34385/371472 [2:46:33<26:48:15, 3.49it/s] 9%|▉ | 34386/371472 [2:46:33<26:48:50, 3.49it/s] 9%|▉ | 34387/371472 [2:46:34<27:35:56, 3.39it/s] 9%|▉ | 34388/371472 [2:46:34<27:04:18, 3.46it/s] 9%|▉ | 34389/371472 [2:46:34<26:08:52, 3.58it/s] 9%|▉ | 34390/371472 [2:46:34<27:49:50, 3.36it/s] 9%|▉ | 34391/371472 [2:46:35<26:50:09, 3.49it/s] 9%|▉ | 34392/371472 [2:46:35<25:57:42, 3.61it/s] 9%|▉ | 34393/371472 [2:46:35<25:28:16, 3.68it/s] 9%|▉ | 34394/371472 [2:46:36<25:22:21, 3.69it/s] 9%|▉ | 34395/371472 [2:46:36<26:09:53, 3.58it/s] 9%|▉ | 34396/371472 [2:46:36<26:28:32, 3.54it/s] 9%|▉ | 34397/371472 [2:46:36<28:15:22, 3.31it/s] 9%|▉ | 34398/371472 [2:46:37<28:33:37, 3.28it/s] 9%|▉ | 34399/371472 [2:46:37<28:14:08, 3.32it/s] 9%|▉ | 34400/371472 [2:46:37<26:42:59, 3.50it/s] {'loss': 4.4513, 'learning_rate': 9.17095821931091e-07, 'epoch': 1.48} + 9%|▉ | 34400/371472 [2:46:37<26:42:59, 3.50it/s] 9%|▉ | 34401/371472 [2:46:38<26:39:09, 3.51it/s] 9%|▉ | 34402/371472 [2:46:38<26:32:47, 3.53it/s] 9%|▉ | 34403/371472 [2:46:38<25:24:59, 3.68it/s] 9%|▉ | 34404/371472 [2:46:38<24:42:40, 3.79it/s] 9%|▉ | 34405/371472 [2:46:39<27:27:38, 3.41it/s] 9%|▉ | 34406/371472 [2:46:39<26:23:24, 3.55it/s] 9%|▉ | 34407/371472 [2:46:39<26:22:29, 3.55it/s] 9%|▉ | 34408/371472 [2:46:40<27:50:57, 3.36it/s] 9%|▉ | 34409/371472 [2:46:40<26:56:32, 3.48it/s] 9%|▉ | 34410/371472 [2:46:40<26:26:18, 3.54it/s] 9%|▉ | 34411/371472 [2:46:40<27:02:38, 3.46it/s] 9%|▉ | 34412/371472 [2:46:41<28:11:30, 3.32it/s] 9%|▉ | 34413/371472 [2:46:41<27:59:39, 3.34it/s] 9%|▉ | 34414/371472 [2:46:41<27:30:20, 3.40it/s] 9%|▉ | 34415/371472 [2:46:42<26:59:00, 3.47it/s] 9%|▉ | 34416/371472 [2:46:42<26:34:37, 3.52it/s] 9%|▉ | 34417/371472 [2:46:42<25:49:09, 3.63it/s] 9%|▉ | 34418/371472 [2:46:42<26:09:14, 3.58it/s] 9%|▉ | 34419/371472 [2:46:43<26:11:28, 3.57it/s] 9%|▉ | 34420/371472 [2:46:43<25:54:28, 3.61it/s] {'loss': 4.7872, 'learning_rate': 9.17047339955612e-07, 'epoch': 1.48} + 9%|▉ | 34420/371472 [2:46:43<25:54:28, 3.61it/s] 9%|▉ | 34421/371472 [2:46:43<27:39:15, 3.39it/s] 9%|▉ | 34422/371472 [2:46:44<27:02:39, 3.46it/s] 9%|▉ | 34423/371472 [2:46:44<27:17:56, 3.43it/s] 9%|▉ | 34424/371472 [2:46:44<26:46:09, 3.50it/s] 9%|▉ | 34425/371472 [2:46:44<25:54:02, 3.61it/s] 9%|▉ | 34426/371472 [2:46:45<25:41:03, 3.65it/s] 9%|▉ | 34427/371472 [2:46:45<25:27:49, 3.68it/s] 9%|▉ | 34428/371472 [2:46:45<25:37:43, 3.65it/s] 9%|▉ | 34429/371472 [2:46:45<24:59:24, 3.75it/s] 9%|▉ | 34430/371472 [2:46:46<25:31:53, 3.67it/s] 9%|▉ | 34431/371472 [2:46:46<25:22:09, 3.69it/s] 9%|▉ | 34432/371472 [2:46:46<26:04:01, 3.59it/s] 9%|▉ | 34433/371472 [2:46:47<26:23:47, 3.55it/s] 9%|▉ | 34434/371472 [2:46:47<26:58:25, 3.47it/s] 9%|▉ | 34435/371472 [2:46:47<26:41:47, 3.51it/s] 9%|▉ | 34436/371472 [2:46:48<26:49:28, 3.49it/s] 9%|▉ | 34437/371472 [2:46:48<26:07:17, 3.58it/s] 9%|▉ | 34438/371472 [2:46:48<25:40:41, 3.65it/s] 9%|▉ | 34439/371472 [2:46:48<26:16:49, 3.56it/s] 9%|▉ | 34440/371472 [2:46:49<26:58:10, 3.47it/s] {'loss': 4.591, 'learning_rate': 9.169988579801331e-07, 'epoch': 1.48} + 9%|▉ | 34440/371472 [2:46:49<26:58:10, 3.47it/s] 9%|▉ | 34441/371472 [2:46:49<27:00:21, 3.47it/s] 9%|▉ | 34442/371472 [2:46:49<27:08:02, 3.45it/s] 9%|▉ | 34443/371472 [2:46:50<27:47:40, 3.37it/s] 9%|▉ | 34444/371472 [2:46:50<26:56:28, 3.47it/s] 9%|▉ | 34445/371472 [2:46:50<26:35:10, 3.52it/s] 9%|▉ | 34446/371472 [2:46:50<25:36:59, 3.65it/s] 9%|▉ | 34447/371472 [2:46:51<25:04:19, 3.73it/s] 9%|▉ | 34448/371472 [2:46:51<27:04:54, 3.46it/s] 9%|▉ | 34449/371472 [2:46:51<27:03:02, 3.46it/s] 9%|▉ | 34450/371472 [2:46:51<26:51:31, 3.49it/s] 9%|▉ | 34451/371472 [2:46:52<27:03:29, 3.46it/s] 9%|▉ | 34452/371472 [2:46:52<28:17:20, 3.31it/s] 9%|▉ | 34453/371472 [2:46:52<27:47:53, 3.37it/s] 9%|▉ | 34454/371472 [2:46:53<26:43:43, 3.50it/s] 9%|▉ | 34455/371472 [2:46:53<26:16:41, 3.56it/s] 9%|▉ | 34456/371472 [2:46:53<29:00:02, 3.23it/s] 9%|▉ | 34457/371472 [2:46:54<28:39:41, 3.27it/s] 9%|▉ | 34458/371472 [2:46:54<27:13:46, 3.44it/s] 9%|▉ | 34459/371472 [2:46:54<26:33:41, 3.52it/s] 9%|▉ | 34460/371472 [2:46:54<26:11:07, 3.58it/s] {'loss': 4.468, 'learning_rate': 9.169503760046543e-07, 'epoch': 1.48} + 9%|▉ | 34460/371472 [2:46:54<26:11:07, 3.58it/s] 9%|▉ | 34461/371472 [2:46:55<27:25:49, 3.41it/s] 9%|▉ | 34462/371472 [2:46:55<26:33:11, 3.53it/s] 9%|▉ | 34463/371472 [2:46:55<27:40:52, 3.38it/s] 9%|▉ | 34464/371472 [2:46:56<29:10:18, 3.21it/s] 9%|▉ | 34465/371472 [2:46:56<27:49:43, 3.36it/s] 9%|▉ | 34466/371472 [2:46:56<27:12:25, 3.44it/s] 9%|▉ | 34467/371472 [2:46:56<27:13:23, 3.44it/s] 9%|▉ | 34468/371472 [2:46:57<26:30:21, 3.53it/s] 9%|▉ | 34469/371472 [2:46:57<26:03:03, 3.59it/s] 9%|▉ | 34470/371472 [2:46:57<25:47:25, 3.63it/s] 9%|▉ | 34471/371472 [2:46:58<24:50:27, 3.77it/s] 9%|▉ | 34472/371472 [2:46:58<24:49:22, 3.77it/s] 9%|▉ | 34473/371472 [2:46:58<24:13:55, 3.86it/s] 9%|▉ | 34474/371472 [2:46:58<28:51:45, 3.24it/s] 9%|▉ | 34475/371472 [2:46:59<27:13:54, 3.44it/s] 9%|▉ | 34476/371472 [2:46:59<26:42:40, 3.50it/s] 9%|▉ | 34477/371472 [2:46:59<28:24:43, 3.29it/s] 9%|▉ | 34478/371472 [2:47:00<27:08:59, 3.45it/s] 9%|▉ | 34479/371472 [2:47:00<26:11:42, 3.57it/s] 9%|▉ | 34480/371472 [2:47:00<26:22:38, 3.55it/s] {'loss': 4.569, 'learning_rate': 9.169018940291754e-07, 'epoch': 1.49} + 9%|▉ | 34480/371472 [2:47:00<26:22:38, 3.55it/s] 9%|▉ | 34481/371472 [2:47:00<26:46:20, 3.50it/s] 9%|▉ | 34482/371472 [2:47:01<26:22:28, 3.55it/s] 9%|▉ | 34483/371472 [2:47:01<26:14:41, 3.57it/s] 9%|▉ | 34484/371472 [2:47:01<26:40:42, 3.51it/s] 9%|▉ | 34485/371472 [2:47:02<30:08:40, 3.11it/s] 9%|▉ | 34486/371472 [2:47:02<27:58:39, 3.35it/s] 9%|▉ | 34487/371472 [2:47:02<28:53:22, 3.24it/s] 9%|▉ | 34488/371472 [2:47:03<29:19:09, 3.19it/s] 9%|▉ | 34489/371472 [2:47:03<28:48:16, 3.25it/s] 9%|▉ | 34490/371472 [2:47:03<27:26:58, 3.41it/s] 9%|▉ | 34491/371472 [2:47:03<27:20:22, 3.42it/s] 9%|▉ | 34492/371472 [2:47:04<26:12:46, 3.57it/s] 9%|▉ | 34493/371472 [2:47:04<26:36:00, 3.52it/s] 9%|▉ | 34494/371472 [2:47:04<27:41:04, 3.38it/s] 9%|▉ | 34495/371472 [2:47:05<26:56:16, 3.47it/s] 9%|▉ | 34496/371472 [2:47:05<26:32:06, 3.53it/s] 9%|▉ | 34497/371472 [2:47:05<27:37:04, 3.39it/s] 9%|▉ | 34498/371472 [2:47:05<27:35:28, 3.39it/s] 9%|▉ | 34499/371472 [2:47:06<26:57:38, 3.47it/s] 9%|▉ | 34500/371472 [2:47:06<26:26:47, 3.54it/s] {'loss': 4.429, 'learning_rate': 9.168534120536964e-07, 'epoch': 1.49} + 9%|▉ | 34500/371472 [2:47:06<26:26:47, 3.54it/s] 9%|▉ | 34501/371472 [2:47:06<26:17:43, 3.56it/s] 9%|▉ | 34502/371472 [2:47:07<26:03:19, 3.59it/s] 9%|▉ | 34503/371472 [2:47:07<26:12:51, 3.57it/s] 9%|▉ | 34504/371472 [2:47:07<25:47:46, 3.63it/s] 9%|▉ | 34505/371472 [2:47:07<26:06:06, 3.59it/s] 9%|▉ | 34506/371472 [2:47:08<26:10:49, 3.58it/s] 9%|▉ | 34507/371472 [2:47:08<26:27:24, 3.54it/s] 9%|▉ | 34508/371472 [2:47:08<27:28:41, 3.41it/s] 9%|▉ | 34509/371472 [2:47:09<26:36:35, 3.52it/s] 9%|▉ | 34510/371472 [2:47:09<26:45:15, 3.50it/s] 9%|▉ | 34511/371472 [2:47:09<27:22:37, 3.42it/s] 9%|▉ | 34512/371472 [2:47:09<26:32:45, 3.53it/s] 9%|▉ | 34513/371472 [2:47:10<28:05:13, 3.33it/s] 9%|▉ | 34514/371472 [2:47:10<27:28:56, 3.41it/s] 9%|▉ | 34515/371472 [2:47:10<27:37:32, 3.39it/s] 9%|▉ | 34516/371472 [2:47:11<28:57:34, 3.23it/s] 9%|▉ | 34517/371472 [2:47:11<28:19:51, 3.30it/s] 9%|▉ | 34518/371472 [2:47:11<28:21:56, 3.30it/s] 9%|▉ | 34519/371472 [2:47:11<27:07:12, 3.45it/s] 9%|▉ | 34520/371472 [2:47:12<27:50:07, 3.36it/s] {'loss': 4.5698, 'learning_rate': 9.168049300782175e-07, 'epoch': 1.49} + 9%|▉ | 34520/371472 [2:47:12<27:50:07, 3.36it/s] 9%|▉ | 34521/371472 [2:47:12<28:42:58, 3.26it/s] 9%|▉ | 34522/371472 [2:47:12<28:31:49, 3.28it/s] 9%|▉ | 34523/371472 [2:47:13<27:06:50, 3.45it/s] 9%|▉ | 34524/371472 [2:47:13<27:50:15, 3.36it/s] 9%|▉ | 34525/371472 [2:47:13<28:03:04, 3.34it/s] 9%|▉ | 34526/371472 [2:47:14<27:38:06, 3.39it/s] 9%|▉ | 34527/371472 [2:47:14<26:26:15, 3.54it/s] 9%|▉ | 34528/371472 [2:47:14<26:06:13, 3.59it/s] 9%|▉ | 34529/371472 [2:47:14<25:58:03, 3.60it/s] 9%|▉ | 34530/371472 [2:47:15<25:55:04, 3.61it/s] 9%|▉ | 34531/371472 [2:47:15<25:40:46, 3.64it/s] 9%|▉ | 34532/371472 [2:47:15<25:34:59, 3.66it/s] 9%|▉ | 34533/371472 [2:47:16<26:16:20, 3.56it/s] 9%|▉ | 34534/371472 [2:47:16<25:30:30, 3.67it/s] 9%|▉ | 34535/371472 [2:47:16<25:15:13, 3.71it/s] 9%|▉ | 34536/371472 [2:47:16<27:01:32, 3.46it/s] 9%|▉ | 34537/371472 [2:47:17<29:53:29, 3.13it/s] 9%|▉ | 34538/371472 [2:47:17<29:25:22, 3.18it/s] 9%|▉ | 34539/371472 [2:47:17<28:25:43, 3.29it/s] 9%|▉ | 34540/371472 [2:47:18<28:54:52, 3.24it/s] {'loss': 4.5016, 'learning_rate': 9.167564481027387e-07, 'epoch': 1.49} + 9%|▉ | 34540/371472 [2:47:18<28:54:52, 3.24it/s] 9%|▉ | 34541/371472 [2:47:18<28:35:39, 3.27it/s] 9%|▉ | 34542/371472 [2:47:18<28:41:04, 3.26it/s] 9%|▉ | 34543/371472 [2:47:19<29:57:46, 3.12it/s] 9%|▉ | 34544/371472 [2:47:19<28:15:35, 3.31it/s] 9%|▉ | 34545/371472 [2:47:19<27:53:33, 3.36it/s] 9%|▉ | 34546/371472 [2:47:19<26:30:34, 3.53it/s] 9%|▉ | 34547/371472 [2:47:20<27:34:47, 3.39it/s] 9%|▉ | 34548/371472 [2:47:20<26:22:10, 3.55it/s] 9%|▉ | 34549/371472 [2:47:20<26:45:55, 3.50it/s] 9%|▉ | 34550/371472 [2:47:21<26:20:40, 3.55it/s] 9%|▉ | 34551/371472 [2:47:21<26:51:20, 3.48it/s] 9%|▉ | 34552/371472 [2:47:21<28:59:25, 3.23it/s] 9%|▉ | 34553/371472 [2:47:21<27:37:37, 3.39it/s] 9%|▉ | 34554/371472 [2:47:22<28:29:49, 3.28it/s] 9%|▉ | 34555/371472 [2:47:22<26:51:05, 3.49it/s] 9%|▉ | 34556/371472 [2:47:22<26:47:50, 3.49it/s] 9%|▉ | 34557/371472 [2:47:23<27:25:50, 3.41it/s] 9%|▉ | 34558/371472 [2:47:23<26:34:33, 3.52it/s] 9%|▉ | 34559/371472 [2:47:23<26:59:30, 3.47it/s] 9%|▉ | 34560/371472 [2:47:23<27:14:18, 3.44it/s] {'loss': 4.3315, 'learning_rate': 9.167079661272598e-07, 'epoch': 1.49} + 9%|▉ | 34560/371472 [2:47:23<27:14:18, 3.44it/s] 9%|▉ | 34561/371472 [2:47:24<26:13:23, 3.57it/s] 9%|▉ | 34562/371472 [2:47:24<26:19:59, 3.55it/s] 9%|▉ | 34563/371472 [2:47:24<27:15:22, 3.43it/s] 9%|▉ | 34564/371472 [2:47:25<26:22:39, 3.55it/s] 9%|▉ | 34565/371472 [2:47:25<26:23:26, 3.55it/s] 9%|▉ | 34566/371472 [2:47:25<27:17:12, 3.43it/s] 9%|▉ | 34567/371472 [2:47:25<26:43:32, 3.50it/s] 9%|▉ | 34568/371472 [2:47:26<26:54:08, 3.48it/s] 9%|▉ | 34569/371472 [2:47:26<26:58:52, 3.47it/s] 9%|▉ | 34570/371472 [2:47:26<26:58:26, 3.47it/s] 9%|▉ | 34571/371472 [2:47:27<26:34:55, 3.52it/s] 9%|▉ | 34572/371472 [2:47:27<26:17:25, 3.56it/s] 9%|▉ | 34573/371472 [2:47:27<27:35:00, 3.39it/s] 9%|▉ | 34574/371472 [2:47:27<26:27:51, 3.54it/s] 9%|▉ | 34575/371472 [2:47:28<26:37:00, 3.52it/s] 9%|▉ | 34576/371472 [2:47:28<26:37:31, 3.51it/s] 9%|▉ | 34577/371472 [2:47:28<26:16:27, 3.56it/s] 9%|▉ | 34578/371472 [2:47:29<25:33:37, 3.66it/s] 9%|▉ | 34579/371472 [2:47:29<25:14:09, 3.71it/s] 9%|▉ | 34580/371472 [2:47:29<24:36:42, 3.80it/s] {'loss': 4.2194, 'learning_rate': 9.166594841517809e-07, 'epoch': 1.49} + 9%|▉ | 34580/371472 [2:47:29<24:36:42, 3.80it/s] 9%|▉ | 34581/371472 [2:47:29<25:26:22, 3.68it/s] 9%|▉ | 34582/371472 [2:47:30<24:54:41, 3.76it/s] 9%|▉ | 34583/371472 [2:47:30<25:06:31, 3.73it/s] 9%|▉ | 34584/371472 [2:47:30<27:38:31, 3.39it/s] 9%|▉ | 34585/371472 [2:47:31<27:09:41, 3.45it/s] 9%|▉ | 34586/371472 [2:47:31<28:44:57, 3.26it/s] 9%|▉ | 34587/371472 [2:47:31<27:46:17, 3.37it/s] 9%|▉ | 34588/371472 [2:47:31<27:16:48, 3.43it/s] 9%|▉ | 34589/371472 [2:47:32<27:38:21, 3.39it/s] 9%|▉ | 34590/371472 [2:47:32<26:48:24, 3.49it/s] 9%|▉ | 34591/371472 [2:47:32<28:09:59, 3.32it/s] 9%|▉ | 34592/371472 [2:47:33<27:32:47, 3.40it/s] 9%|▉ | 34593/371472 [2:47:33<27:36:35, 3.39it/s] 9%|▉ | 34594/371472 [2:47:33<27:42:11, 3.38it/s] 9%|▉ | 34595/371472 [2:47:33<26:53:00, 3.48it/s] 9%|▉ | 34596/371472 [2:47:34<26:10:12, 3.58it/s] 9%|▉ | 34597/371472 [2:47:34<26:08:03, 3.58it/s] 9%|▉ | 34598/371472 [2:47:34<28:49:24, 3.25it/s] 9%|▉ | 34599/371472 [2:47:35<29:12:09, 3.20it/s] 9%|▉ | 34600/371472 [2:47:35<28:24:18, 3.29it/s] {'loss': 4.4544, 'learning_rate': 9.16611002176302e-07, 'epoch': 1.49} + 9%|▉ | 34600/371472 [2:47:35<28:24:18, 3.29it/s] 9%|▉ | 34601/371472 [2:47:35<27:49:13, 3.36it/s] 9%|▉ | 34602/371472 [2:47:36<27:37:46, 3.39it/s] 9%|▉ | 34603/371472 [2:47:36<27:07:38, 3.45it/s] 9%|▉ | 34604/371472 [2:47:36<27:00:53, 3.46it/s] 9%|▉ | 34605/371472 [2:47:36<27:03:13, 3.46it/s] 9%|▉ | 34606/371472 [2:47:37<27:16:41, 3.43it/s] 9%|▉ | 34607/371472 [2:47:37<27:27:50, 3.41it/s] 9%|▉ | 34608/371472 [2:47:37<26:16:51, 3.56it/s] 9%|▉ | 34609/371472 [2:47:38<26:13:33, 3.57it/s] 9%|▉ | 34610/371472 [2:47:38<25:36:22, 3.65it/s] 9%|▉ | 34611/371472 [2:47:38<25:25:19, 3.68it/s] 9%|▉ | 34612/371472 [2:47:38<25:38:33, 3.65it/s] 9%|▉ | 34613/371472 [2:47:39<29:40:15, 3.15it/s] 9%|▉ | 34614/371472 [2:47:39<28:11:40, 3.32it/s] 9%|▉ | 34615/371472 [2:47:39<27:29:31, 3.40it/s] 9%|▉ | 34616/371472 [2:47:40<26:50:19, 3.49it/s] 9%|▉ | 34617/371472 [2:47:40<27:03:52, 3.46it/s] 9%|▉ | 34618/371472 [2:47:40<27:08:26, 3.45it/s] 9%|▉ | 34619/371472 [2:47:40<26:23:08, 3.55it/s] 9%|▉ | 34620/371472 [2:47:41<26:02:18, 3.59it/s] {'loss': 4.239, 'learning_rate': 9.165625202008231e-07, 'epoch': 1.49} + 9%|▉ | 34620/371472 [2:47:41<26:02:18, 3.59it/s] 9%|▉ | 34621/371472 [2:47:41<26:19:04, 3.56it/s] 9%|▉ | 34622/371472 [2:47:41<26:30:38, 3.53it/s] 9%|▉ | 34623/371472 [2:47:42<26:57:30, 3.47it/s] 9%|▉ | 34624/371472 [2:47:42<30:24:47, 3.08it/s] 9%|▉ | 34625/371472 [2:47:42<28:31:56, 3.28it/s] 9%|▉ | 34626/371472 [2:47:43<28:22:06, 3.30it/s] 9%|▉ | 34627/371472 [2:47:43<28:00:29, 3.34it/s] 9%|▉ | 34628/371472 [2:47:43<30:01:44, 3.12it/s] 9%|▉ | 34629/371472 [2:47:44<29:37:18, 3.16it/s] 9%|▉ | 34630/371472 [2:47:44<30:28:14, 3.07it/s] 9%|▉ | 34631/371472 [2:47:44<28:12:47, 3.32it/s] 9%|▉ | 34632/371472 [2:47:44<27:23:15, 3.42it/s] 9%|▉ | 34633/371472 [2:47:45<30:06:53, 3.11it/s] 9%|▉ | 34634/371472 [2:47:45<30:09:28, 3.10it/s] 9%|▉ | 34635/371472 [2:47:45<29:31:56, 3.17it/s] 9%|▉ | 34636/371472 [2:47:46<28:48:35, 3.25it/s] 9%|▉ | 34637/371472 [2:47:46<29:05:18, 3.22it/s] 9%|▉ | 34638/371472 [2:47:46<29:13:06, 3.20it/s] 9%|▉ | 34639/371472 [2:47:47<27:46:07, 3.37it/s] 9%|▉ | 34640/371472 [2:47:47<26:53:35, 3.48it/s] {'loss': 4.4725, 'learning_rate': 9.165140382253441e-07, 'epoch': 1.49} + 9%|▉ | 34640/371472 [2:47:47<26:53:35, 3.48it/s] 9%|▉ | 34641/371472 [2:47:47<26:39:16, 3.51it/s] 9%|▉ | 34642/371472 [2:47:47<26:55:01, 3.48it/s] 9%|▉ | 34643/371472 [2:47:48<25:51:02, 3.62it/s] 9%|▉ | 34644/371472 [2:47:48<26:06:25, 3.58it/s] 9%|▉ | 34645/371472 [2:47:48<26:06:59, 3.58it/s] 9%|▉ | 34646/371472 [2:47:49<27:07:29, 3.45it/s] 9%|▉ | 34647/371472 [2:47:49<26:23:50, 3.54it/s] 9%|▉ | 34648/371472 [2:47:49<26:38:50, 3.51it/s] 9%|▉ | 34649/371472 [2:47:49<27:03:23, 3.46it/s] 9%|▉ | 34650/371472 [2:47:50<26:40:05, 3.51it/s] 9%|▉ | 34651/371472 [2:47:50<26:25:26, 3.54it/s] 9%|▉ | 34652/371472 [2:47:50<26:30:20, 3.53it/s] 9%|▉ | 34653/371472 [2:47:50<25:34:06, 3.66it/s] 9%|▉ | 34654/371472 [2:47:51<26:20:02, 3.55it/s] 9%|▉ | 34655/371472 [2:47:51<25:49:21, 3.62it/s] 9%|▉ | 34656/371472 [2:47:51<26:20:21, 3.55it/s] 9%|▉ | 34657/371472 [2:47:52<25:41:28, 3.64it/s] 9%|▉ | 34658/371472 [2:47:52<26:58:33, 3.47it/s] 9%|▉ | 34659/371472 [2:47:52<26:30:54, 3.53it/s] 9%|▉ | 34660/371472 [2:47:53<27:04:34, 3.46it/s] {'loss': 4.5558, 'learning_rate': 9.164655562498652e-07, 'epoch': 1.49} + 9%|▉ | 34660/371472 [2:47:53<27:04:34, 3.46it/s] 9%|▉ | 34661/371472 [2:47:53<26:03:45, 3.59it/s] 9%|▉ | 34662/371472 [2:47:53<26:32:38, 3.52it/s] 9%|▉ | 34663/371472 [2:47:53<27:33:40, 3.39it/s] 9%|▉ | 34664/371472 [2:47:54<26:27:11, 3.54it/s] 9%|▉ | 34665/371472 [2:47:54<26:58:44, 3.47it/s] 9%|▉ | 34666/371472 [2:47:54<26:33:52, 3.52it/s] 9%|▉ | 34667/371472 [2:47:54<26:30:16, 3.53it/s] 9%|▉ | 34668/371472 [2:47:55<27:06:33, 3.45it/s] 9%|▉ | 34669/371472 [2:47:55<27:07:36, 3.45it/s] 9%|▉ | 34670/371472 [2:47:55<27:15:46, 3.43it/s] 9%|▉ | 34671/371472 [2:47:56<30:14:12, 3.09it/s] 9%|▉ | 34672/371472 [2:47:56<28:58:48, 3.23it/s] 9%|▉ | 34673/371472 [2:47:56<30:21:03, 3.08it/s] 9%|▉ | 34674/371472 [2:47:57<28:30:08, 3.28it/s] 9%|▉ | 34675/371472 [2:47:57<28:59:46, 3.23it/s] 9%|▉ | 34676/371472 [2:47:57<27:04:45, 3.45it/s] 9%|▉ | 34677/371472 [2:47:58<26:48:41, 3.49it/s] 9%|▉ | 34678/371472 [2:47:58<27:15:09, 3.43it/s] 9%|▉ | 34679/371472 [2:47:58<26:34:50, 3.52it/s] 9%|▉ | 34680/371472 [2:47:58<26:02:23, 3.59it/s] {'loss': 4.3513, 'learning_rate': 9.164170742743864e-07, 'epoch': 1.49} + 9%|▉ | 34680/371472 [2:47:58<26:02:23, 3.59it/s] 9%|▉ | 34681/371472 [2:47:59<26:21:58, 3.55it/s] 9%|▉ | 34682/371472 [2:47:59<27:38:57, 3.38it/s] 9%|▉ | 34683/371472 [2:47:59<26:34:05, 3.52it/s] 9%|▉ | 34684/371472 [2:47:59<26:04:49, 3.59it/s] 9%|▉ | 34685/371472 [2:48:00<29:02:54, 3.22it/s] 9%|▉ | 34686/371472 [2:48:00<30:32:32, 3.06it/s] 9%|▉ | 34687/371472 [2:48:01<29:44:43, 3.15it/s] 9%|▉ | 34688/371472 [2:48:01<29:58:21, 3.12it/s] 9%|▉ | 34689/371472 [2:48:01<28:29:44, 3.28it/s] 9%|▉ | 34690/371472 [2:48:01<27:31:26, 3.40it/s] 9%|▉ | 34691/371472 [2:48:02<28:02:29, 3.34it/s] 9%|▉ | 34692/371472 [2:48:02<32:26:04, 2.88it/s] 9%|▉ | 34693/371472 [2:48:02<30:01:56, 3.11it/s] 9%|▉ | 34694/371472 [2:48:03<28:01:59, 3.34it/s] 9%|▉ | 34695/371472 [2:48:03<29:51:08, 3.13it/s] 9%|▉ | 34696/371472 [2:48:03<30:08:11, 3.10it/s] 9%|▉ | 34697/371472 [2:48:04<32:14:47, 2.90it/s] 9%|▉ | 34698/371472 [2:48:04<30:45:57, 3.04it/s] 9%|▉ | 34699/371472 [2:48:04<30:12:56, 3.10it/s] 9%|▉ | 34700/371472 [2:48:05<31:03:21, 3.01it/s] {'loss': 4.3847, 'learning_rate': 9.163685922989076e-07, 'epoch': 1.49} + 9%|▉ | 34700/371472 [2:48:05<31:03:21, 3.01it/s] 9%|▉ | 34701/371472 [2:48:05<30:03:05, 3.11it/s] 9%|▉ | 34702/371472 [2:48:05<28:13:40, 3.31it/s] 9%|▉ | 34703/371472 [2:48:06<27:19:50, 3.42it/s] 9%|▉ | 34704/371472 [2:48:06<26:31:08, 3.53it/s] 9%|▉ | 34705/371472 [2:48:06<26:06:28, 3.58it/s] 9%|▉ | 34706/371472 [2:48:06<26:09:50, 3.58it/s] 9%|▉ | 34707/371472 [2:48:07<27:12:05, 3.44it/s] 9%|▉ | 34708/371472 [2:48:07<26:48:32, 3.49it/s] 9%|▉ | 34709/371472 [2:48:07<25:53:07, 3.61it/s] 9%|▉ | 34710/371472 [2:48:07<26:08:26, 3.58it/s] 9%|▉ | 34711/371472 [2:48:08<28:43:56, 3.26it/s] 9%|▉ | 34712/371472 [2:48:08<29:31:12, 3.17it/s] 9%|▉ | 34713/371472 [2:48:08<29:02:58, 3.22it/s] 9%|▉ | 34714/371472 [2:48:09<28:18:25, 3.30it/s] 9%|▉ | 34715/371472 [2:48:09<27:23:30, 3.42it/s] 9%|▉ | 34716/371472 [2:48:09<27:22:33, 3.42it/s] 9%|▉ | 34717/371472 [2:48:10<26:43:29, 3.50it/s] 9%|▉ | 34718/371472 [2:48:10<26:19:44, 3.55it/s] 9%|▉ | 34719/371472 [2:48:10<27:23:59, 3.41it/s] 9%|▉ | 34720/371472 [2:48:11<27:33:39, 3.39it/s] {'loss': 4.7456, 'learning_rate': 9.163201103234286e-07, 'epoch': 1.5} + 9%|▉ | 34720/371472 [2:48:11<27:33:39, 3.39it/s] 9%|▉ | 34721/371472 [2:48:11<27:09:43, 3.44it/s] 9%|▉ | 34722/371472 [2:48:11<26:30:57, 3.53it/s] 9%|▉ | 34723/371472 [2:48:11<26:12:25, 3.57it/s] 9%|▉ | 34724/371472 [2:48:12<25:52:14, 3.62it/s] 9%|▉ | 34725/371472 [2:48:12<26:43:08, 3.50it/s] 9%|▉ | 34726/371472 [2:48:12<27:02:57, 3.46it/s] 9%|▉ | 34727/371472 [2:48:13<27:55:10, 3.35it/s] 9%|▉ | 34728/371472 [2:48:13<27:19:18, 3.42it/s] 9%|▉ | 34729/371472 [2:48:13<26:38:08, 3.51it/s] 9%|▉ | 34730/371472 [2:48:13<27:00:03, 3.46it/s] 9%|▉ | 34731/371472 [2:48:14<26:40:20, 3.51it/s] 9%|▉ | 34732/371472 [2:48:14<25:49:23, 3.62it/s] 9%|▉ | 34733/371472 [2:48:14<25:14:38, 3.71it/s] 9%|▉ | 34734/371472 [2:48:14<25:35:16, 3.66it/s] 9%|▉ | 34735/371472 [2:48:15<25:28:17, 3.67it/s] 9%|▉ | 34736/371472 [2:48:15<25:22:32, 3.69it/s] 9%|▉ | 34737/371472 [2:48:15<25:06:56, 3.72it/s] 9%|▉ | 34738/371472 [2:48:15<25:00:10, 3.74it/s] 9%|▉ | 34739/371472 [2:48:16<24:23:36, 3.83it/s] 9%|▉ | 34740/371472 [2:48:16<25:14:12, 3.71it/s] {'loss': 4.7135, 'learning_rate': 9.162716283479497e-07, 'epoch': 1.5} + 9%|▉ | 34740/371472 [2:48:16<25:14:12, 3.71it/s] 9%|▉ | 34741/371472 [2:48:16<25:50:21, 3.62it/s] 9%|▉ | 34742/371472 [2:48:17<26:07:35, 3.58it/s] 9%|▉ | 34743/371472 [2:48:17<27:27:24, 3.41it/s] 9%|▉ | 34744/371472 [2:48:17<27:26:58, 3.41it/s] 9%|▉ | 34745/371472 [2:48:17<26:28:00, 3.53it/s] 9%|▉ | 34746/371472 [2:48:18<26:11:49, 3.57it/s] 9%|▉ | 34747/371472 [2:48:18<26:02:39, 3.59it/s] 9%|▉ | 34748/371472 [2:48:18<25:02:17, 3.74it/s] 9%|▉ | 34749/371472 [2:48:19<25:06:07, 3.73it/s] 9%|▉ | 34750/371472 [2:48:19<24:20:53, 3.84it/s] 9%|▉ | 34751/371472 [2:48:19<24:24:02, 3.83it/s] 9%|▉ | 34752/371472 [2:48:19<23:42:05, 3.95it/s] 9%|▉ | 34753/371472 [2:48:20<24:21:22, 3.84it/s] 9%|▉ | 34754/371472 [2:48:20<26:26:01, 3.54it/s] 9%|▉ | 34755/371472 [2:48:20<26:25:33, 3.54it/s] 9%|▉ | 34756/371472 [2:48:21<29:18:15, 3.19it/s] 9%|▉ | 34757/371472 [2:48:21<27:46:41, 3.37it/s] 9%|▉ | 34758/371472 [2:48:21<26:33:46, 3.52it/s] 9%|▉ | 34759/371472 [2:48:21<26:18:59, 3.55it/s] 9%|▉ | 34760/371472 [2:48:22<25:40:25, 3.64it/s] {'loss': 4.7201, 'learning_rate': 9.162231463724708e-07, 'epoch': 1.5} + 9%|▉ | 34760/371472 [2:48:22<25:40:25, 3.64it/s] 9%|▉ | 34761/371472 [2:48:22<25:51:17, 3.62it/s] 9%|▉ | 34762/371472 [2:48:22<27:25:39, 3.41it/s] 9%|▉ | 34763/371472 [2:48:23<29:43:15, 3.15it/s] 9%|▉ | 34764/371472 [2:48:23<28:39:31, 3.26it/s] 9%|▉ | 34765/371472 [2:48:23<28:38:41, 3.27it/s] 9%|▉ | 34766/371472 [2:48:23<27:45:19, 3.37it/s] 9%|▉ | 34767/371472 [2:48:24<27:25:26, 3.41it/s] 9%|▉ | 34768/371472 [2:48:24<26:52:22, 3.48it/s] 9%|▉ | 34769/371472 [2:48:24<26:49:49, 3.49it/s] 9%|▉ | 34770/371472 [2:48:25<29:01:21, 3.22it/s] 9%|▉ | 34771/371472 [2:48:25<27:58:57, 3.34it/s] 9%|▉ | 34772/371472 [2:48:25<28:31:47, 3.28it/s] 9%|▉ | 34773/371472 [2:48:26<27:07:37, 3.45it/s] 9%|▉ | 34774/371472 [2:48:26<26:29:54, 3.53it/s] 9%|▉ | 34775/371472 [2:48:26<27:42:49, 3.37it/s] 9%|▉ | 34776/371472 [2:48:26<27:50:01, 3.36it/s] 9%|▉ | 34777/371472 [2:48:27<27:04:19, 3.45it/s] 9%|▉ | 34778/371472 [2:48:27<28:36:45, 3.27it/s] 9%|▉ | 34779/371472 [2:48:27<29:12:56, 3.20it/s] 9%|▉ | 34780/371472 [2:48:28<27:52:38, 3.35it/s] {'loss': 4.3865, 'learning_rate': 9.161746643969919e-07, 'epoch': 1.5} + 9%|▉ | 34780/371472 [2:48:28<27:52:38, 3.35it/s] 9%|▉ | 34781/371472 [2:48:28<27:10:55, 3.44it/s] 9%|▉ | 34782/371472 [2:48:28<27:14:03, 3.43it/s] 9%|▉ | 34783/371472 [2:48:28<26:54:51, 3.47it/s] 9%|▉ | 34784/371472 [2:48:29<26:30:40, 3.53it/s] 9%|▉ | 34785/371472 [2:48:29<25:43:33, 3.64it/s] 9%|▉ | 34786/371472 [2:48:29<25:13:56, 3.71it/s] 9%|▉ | 34787/371472 [2:48:30<25:30:52, 3.67it/s] 9%|▉ | 34788/371472 [2:48:30<26:23:18, 3.54it/s] 9%|▉ | 34789/371472 [2:48:30<27:03:08, 3.46it/s] 9%|▉ | 34790/371472 [2:48:30<26:50:24, 3.48it/s] 9%|▉ | 34791/371472 [2:48:31<27:39:35, 3.38it/s] 9%|▉ | 34792/371472 [2:48:31<28:08:56, 3.32it/s] 9%|▉ | 34793/371472 [2:48:31<28:06:23, 3.33it/s] 9%|▉ | 34794/371472 [2:48:32<26:32:34, 3.52it/s] 9%|▉ | 34795/371472 [2:48:32<26:21:18, 3.55it/s] 9%|▉ | 34796/371472 [2:48:32<25:59:58, 3.60it/s] 9%|▉ | 34797/371472 [2:48:32<25:46:59, 3.63it/s] 9%|▉ | 34798/371472 [2:48:33<25:30:41, 3.67it/s] 9%|▉ | 34799/371472 [2:48:33<25:39:38, 3.64it/s] 9%|▉ | 34800/371472 [2:48:33<25:47:44, 3.63it/s] {'loss': 4.7127, 'learning_rate': 9.16126182421513e-07, 'epoch': 1.5} + 9%|▉ | 34800/371472 [2:48:33<25:47:44, 3.63it/s] 9%|▉ | 34801/371472 [2:48:33<25:19:13, 3.69it/s] 9%|▉ | 34802/371472 [2:48:34<27:51:21, 3.36it/s] 9%|▉ | 34803/371472 [2:48:34<27:07:24, 3.45it/s] 9%|▉ | 34804/371472 [2:48:34<26:00:28, 3.60it/s] 9%|▉ | 34805/371472 [2:48:35<26:03:44, 3.59it/s] 9%|▉ | 34806/371472 [2:48:35<31:08:57, 3.00it/s] 9%|▉ | 34807/371472 [2:48:35<31:29:39, 2.97it/s] 9%|▉ | 34808/371472 [2:48:36<29:40:57, 3.15it/s] 9%|▉ | 34809/371472 [2:48:36<28:07:12, 3.33it/s] 9%|▉ | 34810/371472 [2:48:36<26:34:22, 3.52it/s] 9%|▉ | 34811/371472 [2:48:37<26:23:49, 3.54it/s] 9%|▉ | 34812/371472 [2:48:37<26:02:12, 3.59it/s] 9%|▉ | 34813/371472 [2:48:37<26:40:01, 3.51it/s] 9%|▉ | 34814/371472 [2:48:37<26:36:31, 3.51it/s] 9%|▉ | 34815/371472 [2:48:38<26:57:23, 3.47it/s] 9%|▉ | 34816/371472 [2:48:38<25:47:57, 3.62it/s] 9%|▉ | 34817/371472 [2:48:38<26:39:55, 3.51it/s] 9%|▉ | 34818/371472 [2:48:39<26:37:17, 3.51it/s] 9%|▉ | 34819/371472 [2:48:39<26:37:50, 3.51it/s] 9%|▉ | 34820/371472 [2:48:39<27:44:20, 3.37it/s] {'loss': 4.4754, 'learning_rate': 9.16077700446034e-07, 'epoch': 1.5} + 9%|▉ | 34820/371472 [2:48:39<27:44:20, 3.37it/s] 9%|▉ | 34821/371472 [2:48:39<29:05:05, 3.22it/s] 9%|▉ | 34822/371472 [2:48:40<28:38:03, 3.27it/s] 9%|▉ | 34823/371472 [2:48:40<28:48:13, 3.25it/s] 9%|▉ | 34824/371472 [2:48:40<27:41:57, 3.38it/s] 9%|▉ | 34825/371472 [2:48:41<26:50:26, 3.48it/s] 9%|▉ | 34826/371472 [2:48:41<28:20:07, 3.30it/s] 9%|▉ | 34827/371472 [2:48:41<27:35:47, 3.39it/s] 9%|▉ | 34828/371472 [2:48:41<26:52:49, 3.48it/s] 9%|▉ | 34829/371472 [2:48:42<26:12:59, 3.57it/s] 9%|▉ | 34830/371472 [2:48:42<25:57:56, 3.60it/s] 9%|▉ | 34831/371472 [2:48:42<27:06:05, 3.45it/s] 9%|▉ | 34832/371472 [2:48:43<26:05:01, 3.59it/s] 9%|▉ | 34833/371472 [2:48:43<27:00:46, 3.46it/s] 9%|▉ | 34834/371472 [2:48:43<26:53:26, 3.48it/s] 9%|▉ | 34835/371472 [2:48:43<27:22:37, 3.42it/s] 9%|▉ | 34836/371472 [2:48:44<27:24:53, 3.41it/s] 9%|▉ | 34837/371472 [2:48:44<26:33:50, 3.52it/s] 9%|▉ | 34838/371472 [2:48:44<29:13:37, 3.20it/s] 9%|▉ | 34839/371472 [2:48:45<29:40:05, 3.15it/s] 9%|▉ | 34840/371472 [2:48:45<28:15:02, 3.31it/s] {'loss': 4.6578, 'learning_rate': 9.160292184705553e-07, 'epoch': 1.5} + 9%|▉ | 34840/371472 [2:48:45<28:15:02, 3.31it/s] 9%|▉ | 34841/371472 [2:48:45<27:06:41, 3.45it/s] 9%|▉ | 34842/371472 [2:48:46<26:25:56, 3.54it/s] 9%|▉ | 34843/371472 [2:48:46<26:36:01, 3.52it/s] 9%|▉ | 34844/371472 [2:48:46<26:55:16, 3.47it/s] 9%|▉ | 34845/371472 [2:48:46<27:45:46, 3.37it/s] 9%|▉ | 34846/371472 [2:48:47<27:16:15, 3.43it/s] 9%|▉ | 34847/371472 [2:48:47<26:12:41, 3.57it/s] 9%|▉ | 34848/371472 [2:48:47<26:23:52, 3.54it/s] 9%|▉ | 34849/371472 [2:48:48<25:46:13, 3.63it/s] 9%|▉ | 34850/371472 [2:48:48<25:42:41, 3.64it/s] 9%|▉ | 34851/371472 [2:48:48<26:12:09, 3.57it/s] 9%|▉ | 34852/371472 [2:48:49<29:23:56, 3.18it/s] 9%|▉ | 34853/371472 [2:48:49<27:45:04, 3.37it/s] 9%|▉ | 34854/371472 [2:48:49<26:30:58, 3.53it/s] 9%|▉ | 34855/371472 [2:48:49<26:38:28, 3.51it/s] 9%|▉ | 34856/371472 [2:48:50<26:30:09, 3.53it/s] 9%|▉ | 34857/371472 [2:48:50<28:17:49, 3.30it/s] 9%|▉ | 34858/371472 [2:48:50<27:33:29, 3.39it/s] 9%|▉ | 34859/371472 [2:48:51<30:23:53, 3.08it/s] 9%|▉ | 34860/371472 [2:48:51<29:18:55, 3.19it/s] {'loss': 4.6887, 'learning_rate': 9.159807364950764e-07, 'epoch': 1.5} + 9%|▉ | 34860/371472 [2:48:51<29:18:55, 3.19it/s] 9%|▉ | 34861/371472 [2:48:51<30:17:43, 3.09it/s] 9%|▉ | 34862/371472 [2:48:52<28:43:54, 3.25it/s] 9%|▉ | 34863/371472 [2:48:52<30:23:51, 3.08it/s] 9%|▉ | 34864/371472 [2:48:52<28:34:58, 3.27it/s] 9%|▉ | 34865/371472 [2:48:52<28:23:45, 3.29it/s] 9%|▉ | 34866/371472 [2:48:53<28:09:25, 3.32it/s] 9%|▉ | 34867/371472 [2:48:53<26:56:24, 3.47it/s] 9%|▉ | 34868/371472 [2:48:53<27:05:38, 3.45it/s] 9%|▉ | 34869/371472 [2:48:54<27:02:16, 3.46it/s] 9%|▉ | 34870/371472 [2:48:54<26:59:39, 3.46it/s] 9%|▉ | 34871/371472 [2:48:54<26:30:35, 3.53it/s] 9%|▉ | 34872/371472 [2:48:54<26:26:42, 3.54it/s] 9%|▉ | 34873/371472 [2:48:55<27:00:57, 3.46it/s] 9%|▉ | 34874/371472 [2:48:55<26:53:18, 3.48it/s] 9%|▉ | 34875/371472 [2:48:55<27:32:08, 3.40it/s] 9%|▉ | 34876/371472 [2:48:56<27:31:19, 3.40it/s] 9%|▉ | 34877/371472 [2:48:56<26:59:27, 3.46it/s] 9%|▉ | 34878/371472 [2:48:56<26:44:59, 3.50it/s] 9%|▉ | 34879/371472 [2:48:56<26:10:02, 3.57it/s] 9%|▉ | 34880/371472 [2:48:57<27:01:56, 3.46it/s] {'loss': 4.4038, 'learning_rate': 9.159322545195974e-07, 'epoch': 1.5} + 9%|▉ | 34880/371472 [2:48:57<27:01:56, 3.46it/s] 9%|▉ | 34881/371472 [2:48:57<26:42:48, 3.50it/s] 9%|▉ | 34882/371472 [2:48:57<27:03:37, 3.46it/s] 9%|▉ | 34883/371472 [2:48:58<28:32:59, 3.27it/s] 9%|▉ | 34884/371472 [2:48:58<29:47:49, 3.14it/s] 9%|▉ | 34885/371472 [2:48:58<30:11:14, 3.10it/s] 9%|▉ | 34886/371472 [2:48:59<30:41:16, 3.05it/s] 9%|▉ | 34887/371472 [2:48:59<30:47:25, 3.04it/s] 9%|▉ | 34888/371472 [2:48:59<29:58:50, 3.12it/s] 9%|▉ | 34889/371472 [2:49:00<29:22:01, 3.18it/s] 9%|▉ | 34890/371472 [2:49:00<28:08:48, 3.32it/s] 9%|▉ | 34891/371472 [2:49:00<27:02:22, 3.46it/s] 9%|▉ | 34892/371472 [2:49:00<27:16:28, 3.43it/s] 9%|▉ | 34893/371472 [2:49:01<26:56:14, 3.47it/s] 9%|▉ | 34894/371472 [2:49:01<26:51:28, 3.48it/s] 9%|▉ | 34895/371472 [2:49:01<26:43:23, 3.50it/s] 9%|▉ | 34896/371472 [2:49:02<27:24:56, 3.41it/s] 9%|▉ | 34897/371472 [2:49:02<27:36:35, 3.39it/s] 9%|▉ | 34898/371472 [2:49:02<26:32:45, 3.52it/s] 9%|▉ | 34899/371472 [2:49:02<26:50:05, 3.48it/s] 9%|▉ | 34900/371472 [2:49:03<26:22:31, 3.54it/s] {'loss': 4.8459, 'learning_rate': 9.158837725441185e-07, 'epoch': 1.5} + 9%|▉ | 34900/371472 [2:49:03<26:22:31, 3.54it/s] 9%|▉ | 34901/371472 [2:49:03<25:59:08, 3.60it/s] 9%|▉ | 34902/371472 [2:49:03<25:37:25, 3.65it/s] 9%|▉ | 34903/371472 [2:49:04<26:36:20, 3.51it/s] 9%|▉ | 34904/371472 [2:49:04<26:14:41, 3.56it/s] 9%|▉ | 34905/371472 [2:49:04<27:16:29, 3.43it/s] 9%|▉ | 34906/371472 [2:49:04<26:52:58, 3.48it/s] 9%|▉ | 34907/371472 [2:49:05<27:29:15, 3.40it/s] 9%|▉ | 34908/371472 [2:49:05<26:53:08, 3.48it/s] 9%|▉ | 34909/371472 [2:49:05<25:36:37, 3.65it/s] 9%|▉ | 34910/371472 [2:49:06<26:24:28, 3.54it/s] 9%|▉ | 34911/371472 [2:49:06<26:14:38, 3.56it/s] 9%|▉ | 34912/371472 [2:49:06<25:44:57, 3.63it/s] 9%|▉ | 34913/371472 [2:49:06<25:04:21, 3.73it/s] 9%|▉ | 34914/371472 [2:49:07<24:43:41, 3.78it/s] 9%|▉ | 34915/371472 [2:49:07<26:27:12, 3.53it/s] 9%|▉ | 34916/371472 [2:49:07<26:18:34, 3.55it/s] 9%|▉ | 34917/371472 [2:49:08<29:28:59, 3.17it/s] 9%|▉ | 34918/371472 [2:49:08<30:07:38, 3.10it/s] 9%|▉ | 34919/371472 [2:49:08<28:54:28, 3.23it/s] 9%|▉ | 34920/371472 [2:49:08<28:06:42, 3.33it/s] {'loss': 4.4111, 'learning_rate': 9.158352905686398e-07, 'epoch': 1.5} + 9%|▉ | 34920/371472 [2:49:08<28:06:42, 3.33it/s] 9%|▉ | 34921/371472 [2:49:09<27:14:35, 3.43it/s] 9%|▉ | 34922/371472 [2:49:09<26:38:20, 3.51it/s] 9%|▉ | 34923/371472 [2:49:09<25:44:44, 3.63it/s] 9%|▉ | 34924/371472 [2:49:10<25:53:51, 3.61it/s] 9%|▉ | 34925/371472 [2:49:10<25:21:47, 3.69it/s] 9%|▉ | 34926/371472 [2:49:10<25:08:54, 3.72it/s] 9%|▉ | 34927/371472 [2:49:10<25:57:58, 3.60it/s] 9%|▉ | 34928/371472 [2:49:11<25:28:24, 3.67it/s] 9%|▉ | 34929/371472 [2:49:11<25:16:18, 3.70it/s] 9%|▉ | 34930/371472 [2:49:11<25:33:31, 3.66it/s] 9%|▉ | 34931/371472 [2:49:11<25:50:52, 3.62it/s] 9%|▉ | 34932/371472 [2:49:12<26:28:51, 3.53it/s] 9%|▉ | 34933/371472 [2:49:12<26:15:52, 3.56it/s] 9%|▉ | 34934/371472 [2:49:12<26:28:15, 3.53it/s] 9%|▉ | 34935/371472 [2:49:13<26:04:15, 3.59it/s] 9%|▉ | 34936/371472 [2:49:13<25:16:57, 3.70it/s] 9%|▉ | 34937/371472 [2:49:13<25:23:05, 3.68it/s] 9%|▉ | 34938/371472 [2:49:13<25:18:01, 3.69it/s] 9%|▉ | 34939/371472 [2:49:14<26:35:09, 3.52it/s] 9%|▉ | 34940/371472 [2:49:14<26:15:26, 3.56it/s] {'loss': 4.7101, 'learning_rate': 9.157868085931607e-07, 'epoch': 1.5} + 9%|▉ | 34940/371472 [2:49:14<26:15:26, 3.56it/s] 9%|▉ | 34941/371472 [2:49:14<26:05:11, 3.58it/s] 9%|▉ | 34942/371472 [2:49:15<26:54:23, 3.47it/s] 9%|▉ | 34943/371472 [2:49:15<26:32:57, 3.52it/s] 9%|▉ | 34944/371472 [2:49:15<26:29:03, 3.53it/s] 9%|▉ | 34945/371472 [2:49:15<26:19:44, 3.55it/s] 9%|▉ | 34946/371472 [2:49:16<25:50:42, 3.62it/s] 9%|▉ | 34947/371472 [2:49:16<25:49:58, 3.62it/s] 9%|▉ | 34948/371472 [2:49:16<27:43:02, 3.37it/s] 9%|▉ | 34949/371472 [2:49:17<28:09:22, 3.32it/s] 9%|▉ | 34950/371472 [2:49:17<27:44:38, 3.37it/s] 9%|▉ | 34951/371472 [2:49:17<27:45:27, 3.37it/s] 9%|▉ | 34952/371472 [2:49:17<27:47:37, 3.36it/s] 9%|▉ | 34953/371472 [2:49:18<26:59:58, 3.46it/s] 9%|▉ | 34954/371472 [2:49:18<27:24:19, 3.41it/s] 9%|▉ | 34955/371472 [2:49:18<27:02:14, 3.46it/s] 9%|▉ | 34956/371472 [2:49:19<26:23:20, 3.54it/s] 9%|▉ | 34957/371472 [2:49:19<25:35:26, 3.65it/s] 9%|▉ | 34958/371472 [2:49:19<26:22:16, 3.54it/s] 9%|▉ | 34959/371472 [2:49:19<26:21:25, 3.55it/s] 9%|▉ | 34960/371472 [2:49:20<26:53:44, 3.48it/s] {'loss': 4.6699, 'learning_rate': 9.157383266176819e-07, 'epoch': 1.51} + 9%|▉ | 34960/371472 [2:49:20<26:53:44, 3.48it/s] 9%|▉ | 34961/371472 [2:49:20<26:10:11, 3.57it/s] 9%|▉ | 34962/371472 [2:49:20<26:32:16, 3.52it/s] 9%|▉ | 34963/371472 [2:49:21<26:07:05, 3.58it/s] 9%|▉ | 34964/371472 [2:49:21<26:05:58, 3.58it/s] 9%|▉ | 34965/371472 [2:49:21<26:41:00, 3.50it/s] 9%|▉ | 34966/371472 [2:49:21<26:09:41, 3.57it/s] 9%|▉ | 34967/371472 [2:49:22<25:53:43, 3.61it/s] 9%|▉ | 34968/371472 [2:49:22<26:07:58, 3.58it/s] 9%|▉ | 34969/371472 [2:49:22<25:29:09, 3.67it/s] 9%|▉ | 34970/371472 [2:49:22<25:41:47, 3.64it/s] 9%|▉ | 34971/371472 [2:49:23<25:18:18, 3.69it/s] 9%|▉ | 34972/371472 [2:49:23<28:02:22, 3.33it/s] 9%|▉ | 34973/371472 [2:49:23<28:25:41, 3.29it/s] 9%|▉ | 34974/371472 [2:49:24<26:55:50, 3.47it/s] 9%|▉ | 34975/371472 [2:49:24<27:25:56, 3.41it/s] 9%|▉ | 34976/371472 [2:49:24<28:31:24, 3.28it/s] 9%|▉ | 34977/371472 [2:49:25<27:06:37, 3.45it/s] 9%|▉ | 34978/371472 [2:49:25<26:11:23, 3.57it/s] 9%|▉ | 34979/371472 [2:49:25<25:52:32, 3.61it/s] 9%|▉ | 34980/371472 [2:49:25<26:29:55, 3.53it/s] {'loss': 4.5889, 'learning_rate': 9.15689844642203e-07, 'epoch': 1.51} + 9%|▉ | 34980/371472 [2:49:25<26:29:55, 3.53it/s] 9%|▉ | 34981/371472 [2:49:26<27:09:53, 3.44it/s] 9%|▉ | 34982/371472 [2:49:26<26:26:18, 3.54it/s] 9%|▉ | 34983/371472 [2:49:26<26:22:04, 3.54it/s] 9%|▉ | 34984/371472 [2:49:27<26:43:56, 3.50it/s] 9%|▉ | 34985/371472 [2:49:27<25:45:49, 3.63it/s] 9%|▉ | 34986/371472 [2:49:27<25:25:13, 3.68it/s] 9%|▉ | 34987/371472 [2:49:27<26:07:40, 3.58it/s] 9%|▉ | 34988/371472 [2:49:28<25:46:31, 3.63it/s] 9%|▉ | 34989/371472 [2:49:28<27:47:04, 3.36it/s] 9%|▉ | 34990/371472 [2:49:28<28:17:59, 3.30it/s] 9%|▉ | 34991/371472 [2:49:29<26:59:58, 3.46it/s] 9%|▉ | 34992/371472 [2:49:29<29:38:22, 3.15it/s] 9%|▉ | 34993/371472 [2:49:29<28:05:29, 3.33it/s] 9%|▉ | 34994/371472 [2:49:29<27:31:24, 3.40it/s] 9%|▉ | 34995/371472 [2:49:30<26:54:43, 3.47it/s] 9%|▉ | 34996/371472 [2:49:30<26:28:47, 3.53it/s] 9%|▉ | 34997/371472 [2:49:30<26:31:23, 3.52it/s] 9%|▉ | 34998/371472 [2:49:31<25:55:13, 3.61it/s] 9%|▉ | 34999/371472 [2:49:31<25:12:38, 3.71it/s] 9%|▉ | 35000/371472 [2:49:31<24:56:50, 3.75it/s] {'loss': 4.6535, 'learning_rate': 9.156413626667241e-07, 'epoch': 1.51} + 9%|▉ | 35000/371472 [2:49:31<24:56:50, 3.75it/s] 9%|▉ | 35001/371472 [2:49:31<25:26:22, 3.67it/s] 9%|▉ | 35002/371472 [2:49:32<25:39:03, 3.64it/s] 9%|▉ | 35003/371472 [2:49:32<26:46:23, 3.49it/s] 9%|▉ | 35004/371472 [2:49:32<29:22:30, 3.18it/s] 9%|▉ | 35005/371472 [2:49:33<29:05:54, 3.21it/s] 9%|▉ | 35006/371472 [2:49:33<28:26:03, 3.29it/s] 9%|▉ | 35007/371472 [2:49:33<28:06:02, 3.33it/s] 9%|▉ | 35008/371472 [2:49:34<27:40:25, 3.38it/s] 9%|▉ | 35009/371472 [2:49:34<26:41:34, 3.50it/s] 9%|▉ | 35010/371472 [2:49:34<27:10:14, 3.44it/s] 9%|▉ | 35011/371472 [2:49:34<26:52:10, 3.48it/s] 9%|▉ | 35012/371472 [2:49:35<27:02:13, 3.46it/s] 9%|▉ | 35013/371472 [2:49:35<27:00:44, 3.46it/s] 9%|▉ | 35014/371472 [2:49:35<26:22:25, 3.54it/s] 9%|▉ | 35015/371472 [2:49:35<25:16:22, 3.70it/s] 9%|▉ | 35016/371472 [2:49:36<27:51:16, 3.36it/s] 9%|▉ | 35017/371472 [2:49:36<27:23:44, 3.41it/s] 9%|▉ | 35018/371472 [2:49:36<27:09:32, 3.44it/s] 9%|▉ | 35019/371472 [2:49:37<26:51:21, 3.48it/s] 9%|▉ | 35020/371472 [2:49:37<27:57:51, 3.34it/s] {'loss': 4.5575, 'learning_rate': 9.155928806912451e-07, 'epoch': 1.51} + 9%|▉ | 35020/371472 [2:49:37<27:57:51, 3.34it/s] 9%|▉ | 35021/371472 [2:49:37<33:34:34, 2.78it/s] 9%|▉ | 35022/371472 [2:49:38<31:56:38, 2.93it/s] 9%|▉ | 35023/371472 [2:49:38<29:26:27, 3.17it/s] 9%|▉ | 35024/371472 [2:49:38<29:18:49, 3.19it/s] 9%|▉ | 35025/371472 [2:49:39<30:18:16, 3.08it/s] 9%|▉ | 35026/371472 [2:49:39<28:21:48, 3.29it/s] 9%|▉ | 35027/371472 [2:49:39<27:42:46, 3.37it/s] 9%|▉ | 35028/371472 [2:49:40<27:20:48, 3.42it/s] 9%|▉ | 35029/371472 [2:49:40<27:09:25, 3.44it/s] 9%|▉ | 35030/371472 [2:49:40<26:16:47, 3.56it/s] 9%|▉ | 35031/371472 [2:49:40<26:08:09, 3.58it/s] 9%|▉ | 35032/371472 [2:49:41<26:03:39, 3.59it/s] 9%|▉ | 35033/371472 [2:49:41<28:24:49, 3.29it/s] 9%|▉ | 35034/371472 [2:49:41<27:02:17, 3.46it/s] 9%|▉ | 35035/371472 [2:49:41<26:17:46, 3.55it/s] 9%|▉ | 35036/371472 [2:49:42<25:42:44, 3.63it/s] 9%|▉ | 35037/371472 [2:49:42<26:41:37, 3.50it/s] 9%|▉ | 35038/371472 [2:49:42<25:32:25, 3.66it/s] 9%|▉ | 35039/371472 [2:49:43<25:29:27, 3.67it/s] 9%|▉ | 35040/371472 [2:49:43<27:39:37, 3.38it/s] {'loss': 4.5002, 'learning_rate': 9.155443987157663e-07, 'epoch': 1.51} + 9%|▉ | 35040/371472 [2:49:43<27:39:37, 3.38it/s] 9%|▉ | 35041/371472 [2:49:43<27:07:35, 3.45it/s] 9%|▉ | 35042/371472 [2:49:44<27:15:26, 3.43it/s] 9%|▉ | 35043/371472 [2:49:44<26:55:29, 3.47it/s] 9%|▉ | 35044/371472 [2:49:44<27:23:58, 3.41it/s] 9%|▉ | 35045/371472 [2:49:44<26:29:02, 3.53it/s] 9%|▉ | 35046/371472 [2:49:45<26:05:24, 3.58it/s] 9%|▉ | 35047/371472 [2:49:45<29:20:46, 3.18it/s] 9%|▉ | 35048/371472 [2:49:45<28:08:58, 3.32it/s] 9%|▉ | 35049/371472 [2:49:46<27:17:55, 3.42it/s] 9%|▉ | 35050/371472 [2:49:46<27:30:52, 3.40it/s] 9%|▉ | 35051/371472 [2:49:46<27:08:36, 3.44it/s] 9%|▉ | 35052/371472 [2:49:46<26:13:45, 3.56it/s] 9%|▉ | 35053/371472 [2:49:47<26:15:12, 3.56it/s] 9%|▉ | 35054/371472 [2:49:47<26:16:40, 3.56it/s] 9%|▉ | 35055/371472 [2:49:47<25:30:32, 3.66it/s] 9%|▉ | 35056/371472 [2:49:48<27:01:16, 3.46it/s] 9%|▉ | 35057/371472 [2:49:48<25:56:03, 3.60it/s] 9%|▉ | 35058/371472 [2:49:48<26:40:41, 3.50it/s] 9%|▉ | 35059/371472 [2:49:48<26:55:31, 3.47it/s] 9%|▉ | 35060/371472 [2:49:49<26:14:08, 3.56it/s] {'loss': 4.5789, 'learning_rate': 9.154959167402874e-07, 'epoch': 1.51} + 9%|▉ | 35060/371472 [2:49:49<26:14:08, 3.56it/s] 9%|▉ | 35061/371472 [2:49:49<26:32:00, 3.52it/s] 9%|▉ | 35062/371472 [2:49:49<27:30:20, 3.40it/s] 9%|▉ | 35063/371472 [2:49:50<27:18:15, 3.42it/s] 9%|▉ | 35064/371472 [2:49:50<27:26:31, 3.41it/s] 9%|▉ | 35065/371472 [2:49:50<26:07:34, 3.58it/s] 9%|▉ | 35066/371472 [2:49:50<28:12:41, 3.31it/s] 9%|▉ | 35067/371472 [2:49:51<27:43:32, 3.37it/s] 9%|▉ | 35068/371472 [2:49:51<26:28:04, 3.53it/s] 9%|▉ | 35069/371472 [2:49:51<26:32:18, 3.52it/s] 9%|▉ | 35070/371472 [2:49:52<27:59:54, 3.34it/s] 9%|▉ | 35071/371472 [2:49:52<27:05:43, 3.45it/s] 9%|▉ | 35072/371472 [2:49:52<27:27:50, 3.40it/s] 9%|▉ | 35073/371472 [2:49:53<30:06:50, 3.10it/s] 9%|▉ | 35074/371472 [2:49:53<28:19:25, 3.30it/s] 9%|▉ | 35075/371472 [2:49:53<28:08:17, 3.32it/s] 9%|▉ | 35076/371472 [2:49:53<27:07:01, 3.45it/s] 9%|▉ | 35077/371472 [2:49:54<26:17:43, 3.55it/s] 9%|▉ | 35078/371472 [2:49:54<27:53:07, 3.35it/s] 9%|▉ | 35079/371472 [2:49:54<27:13:05, 3.43it/s] 9%|▉ | 35080/371472 [2:49:55<26:13:57, 3.56it/s] {'loss': 4.8286, 'learning_rate': 9.154474347648086e-07, 'epoch': 1.51} + 9%|▉ | 35080/371472 [2:49:55<26:13:57, 3.56it/s] 9%|▉ | 35081/371472 [2:49:55<25:44:21, 3.63it/s] 9%|▉ | 35082/371472 [2:49:55<25:25:37, 3.67it/s] 9%|▉ | 35083/371472 [2:49:55<25:16:48, 3.70it/s] 9%|▉ | 35084/371472 [2:49:56<28:23:41, 3.29it/s] 9%|▉ | 35085/371472 [2:49:56<28:24:07, 3.29it/s] 9%|▉ | 35086/371472 [2:49:56<27:39:20, 3.38it/s] 9%|▉ | 35087/371472 [2:49:57<27:12:19, 3.43it/s] 9%|▉ | 35088/371472 [2:49:57<26:10:19, 3.57it/s] 9%|▉ | 35089/371472 [2:49:57<25:10:29, 3.71it/s] 9%|▉ | 35090/371472 [2:49:57<25:40:30, 3.64it/s] 9%|▉ | 35091/371472 [2:49:58<27:05:48, 3.45it/s] 9%|▉ | 35092/371472 [2:49:58<27:10:34, 3.44it/s] 9%|▉ | 35093/371472 [2:49:58<27:33:10, 3.39it/s] 9%|▉ | 35094/371472 [2:49:59<26:04:23, 3.58it/s] 9%|▉ | 35095/371472 [2:49:59<26:07:44, 3.58it/s] 9%|▉ | 35096/371472 [2:49:59<26:48:51, 3.48it/s] 9%|▉ | 35097/371472 [2:49:59<26:00:17, 3.59it/s] 9%|▉ | 35098/371472 [2:50:00<25:20:13, 3.69it/s] 9%|▉ | 35099/371472 [2:50:00<25:48:48, 3.62it/s] 9%|▉ | 35100/371472 [2:50:00<25:12:53, 3.71it/s] {'loss': 4.4564, 'learning_rate': 9.153989527893296e-07, 'epoch': 1.51} + 9%|▉ | 35100/371472 [2:50:00<25:12:53, 3.71it/s] 9%|▉ | 35101/371472 [2:50:00<25:07:46, 3.72it/s] 9%|▉ | 35102/371472 [2:50:01<24:58:22, 3.74it/s] 9%|▉ | 35103/371472 [2:50:01<27:47:01, 3.36it/s] 9%|▉ | 35104/371472 [2:50:01<28:47:52, 3.24it/s] 9%|▉ | 35105/371472 [2:50:02<28:21:05, 3.30it/s] 9%|▉ | 35106/371472 [2:50:02<27:37:55, 3.38it/s] 9%|▉ | 35107/371472 [2:50:02<26:56:18, 3.47it/s] 9%|▉ | 35108/371472 [2:50:02<26:33:34, 3.52it/s] 9%|▉ | 35109/371472 [2:50:03<25:59:16, 3.60it/s] 9%|▉ | 35110/371472 [2:50:03<25:45:07, 3.63it/s] 9%|▉ | 35111/371472 [2:50:03<26:18:42, 3.55it/s] 9%|▉ | 35112/371472 [2:50:04<27:27:35, 3.40it/s] 9%|▉ | 35113/371472 [2:50:04<27:33:33, 3.39it/s] 9%|▉ | 35114/371472 [2:50:04<26:16:13, 3.56it/s] 9%|▉ | 35115/371472 [2:50:04<26:30:35, 3.52it/s] 9%|▉ | 35116/371472 [2:50:05<26:28:27, 3.53it/s] 9%|▉ | 35117/371472 [2:50:05<27:56:53, 3.34it/s] 9%|▉ | 35118/371472 [2:50:05<27:29:21, 3.40it/s] 9%|▉ | 35119/371472 [2:50:06<26:33:42, 3.52it/s] 9%|▉ | 35120/371472 [2:50:06<27:48:33, 3.36it/s] {'loss': 4.2606, 'learning_rate': 9.153504708138508e-07, 'epoch': 1.51} + 9%|▉ | 35120/371472 [2:50:06<27:48:33, 3.36it/s] 9%|▉ | 35121/371472 [2:50:06<27:27:34, 3.40it/s] 9%|▉ | 35122/371472 [2:50:07<27:00:07, 3.46it/s] 9%|▉ | 35123/371472 [2:50:07<25:58:30, 3.60it/s] 9%|▉ | 35124/371472 [2:50:07<25:42:21, 3.63it/s] 9%|▉ | 35125/371472 [2:50:07<26:53:50, 3.47it/s] 9%|▉ | 35126/371472 [2:50:08<26:05:17, 3.58it/s] 9%|▉ | 35127/371472 [2:50:08<25:47:45, 3.62it/s] 9%|▉ | 35128/371472 [2:50:08<26:18:03, 3.55it/s] 9%|▉ | 35129/371472 [2:50:08<26:49:27, 3.48it/s] 9%|▉ | 35130/371472 [2:50:09<25:39:51, 3.64it/s] 9%|▉ | 35131/371472 [2:50:09<27:08:00, 3.44it/s] 9%|▉ | 35132/371472 [2:50:09<26:14:26, 3.56it/s] 9%|▉ | 35133/371472 [2:50:10<26:10:12, 3.57it/s] 9%|▉ | 35134/371472 [2:50:10<28:27:41, 3.28it/s] 9%|▉ | 35135/371472 [2:50:10<27:28:51, 3.40it/s] 9%|▉ | 35136/371472 [2:50:10<26:23:19, 3.54it/s] 9%|▉ | 35137/371472 [2:50:11<25:39:47, 3.64it/s] 9%|▉ | 35138/371472 [2:50:11<26:12:39, 3.56it/s] 9%|▉ | 35139/371472 [2:50:11<26:16:21, 3.56it/s] 9%|▉ | 35140/371472 [2:50:12<26:29:10, 3.53it/s] {'loss': 4.5779, 'learning_rate': 9.153019888383718e-07, 'epoch': 1.51} + 9%|▉ | 35140/371472 [2:50:12<26:29:10, 3.53it/s] 9%|▉ | 35141/371472 [2:50:12<25:54:59, 3.60it/s] 9%|▉ | 35142/371472 [2:50:12<26:48:54, 3.48it/s] 9%|▉ | 35143/371472 [2:50:13<29:41:25, 3.15it/s] 9%|▉ | 35144/371472 [2:50:13<28:25:53, 3.29it/s] 9%|▉ | 35145/371472 [2:50:13<27:24:36, 3.41it/s] 9%|▉ | 35146/371472 [2:50:13<26:54:22, 3.47it/s] 9%|▉ | 35147/371472 [2:50:14<26:53:28, 3.47it/s] 9%|▉ | 35148/371472 [2:50:14<26:23:22, 3.54it/s] 9%|▉ | 35149/371472 [2:50:14<25:16:44, 3.70it/s] 9%|▉ | 35150/371472 [2:50:14<25:48:50, 3.62it/s] 9%|▉ | 35151/371472 [2:50:15<26:06:40, 3.58it/s] 9%|▉ | 35152/371472 [2:50:15<25:53:29, 3.61it/s] 9%|▉ | 35153/371472 [2:50:15<26:24:57, 3.54it/s] 9%|▉ | 35154/371472 [2:50:16<26:36:44, 3.51it/s] 9%|▉ | 35155/371472 [2:50:16<26:02:09, 3.59it/s] 9%|▉ | 35156/371472 [2:50:16<26:19:44, 3.55it/s] 9%|▉ | 35157/371472 [2:50:16<26:00:41, 3.59it/s] 9%|▉ | 35158/371472 [2:50:17<26:37:10, 3.51it/s] 9%|▉ | 35159/371472 [2:50:17<25:43:17, 3.63it/s] 9%|▉ | 35160/371472 [2:50:17<26:33:33, 3.52it/s] {'loss': 4.296, 'learning_rate': 9.15253506862893e-07, 'epoch': 1.51} + 9%|▉ | 35160/371472 [2:50:17<26:33:33, 3.52it/s] 9%|▉ | 35161/371472 [2:50:18<26:08:21, 3.57it/s] 9%|▉ | 35162/371472 [2:50:18<25:53:23, 3.61it/s] 9%|▉ | 35163/371472 [2:50:18<26:13:23, 3.56it/s] 9%|▉ | 35164/371472 [2:50:18<26:35:52, 3.51it/s] 9%|▉ | 35165/371472 [2:50:19<26:39:30, 3.50it/s] 9%|▉ | 35166/371472 [2:50:19<25:55:48, 3.60it/s] 9%|▉ | 35167/371472 [2:50:19<25:12:18, 3.71it/s] 9%|▉ | 35168/371472 [2:50:20<26:37:52, 3.51it/s] 9%|▉ | 35169/371472 [2:50:20<26:49:21, 3.48it/s] 9%|▉ | 35170/371472 [2:50:20<25:49:49, 3.62it/s] 9%|▉ | 35171/371472 [2:50:20<24:58:53, 3.74it/s] 9%|▉ | 35172/371472 [2:50:21<24:59:33, 3.74it/s] 9%|▉ | 35173/371472 [2:50:21<24:21:31, 3.84it/s] 9%|▉ | 35174/371472 [2:50:21<26:28:34, 3.53it/s] 9%|▉ | 35175/371472 [2:50:21<26:39:45, 3.50it/s] 9%|▉ | 35176/371472 [2:50:22<26:08:14, 3.57it/s] 9%|▉ | 35177/371472 [2:50:22<26:55:11, 3.47it/s] 9%|▉ | 35178/371472 [2:50:22<27:51:04, 3.35it/s] 9%|▉ | 35179/371472 [2:50:23<28:46:46, 3.25it/s] 9%|▉ | 35180/371472 [2:50:23<27:31:16, 3.39it/s] {'loss': 4.5038, 'learning_rate': 9.15205024887414e-07, 'epoch': 1.52} + 9%|▉ | 35180/371472 [2:50:23<27:31:16, 3.39it/s] 9%|▉ | 35181/371472 [2:50:23<26:46:02, 3.49it/s] 9%|▉ | 35182/371472 [2:50:24<26:24:20, 3.54it/s] 9%|▉ | 35183/371472 [2:50:24<26:38:34, 3.51it/s] 9%|▉ | 35184/371472 [2:50:24<25:53:33, 3.61it/s] 9%|▉ | 35185/371472 [2:50:24<25:47:10, 3.62it/s] 9%|▉ | 35186/371472 [2:50:25<29:19:37, 3.19it/s] 9%|▉ | 35187/371472 [2:50:25<28:46:42, 3.25it/s] 9%|▉ | 35188/371472 [2:50:25<28:01:54, 3.33it/s] 9%|▉ | 35189/371472 [2:50:26<28:35:12, 3.27it/s] 9%|▉ | 35190/371472 [2:50:26<28:36:00, 3.27it/s] 9%|▉ | 35191/371472 [2:50:26<29:00:14, 3.22it/s] 9%|▉ | 35192/371472 [2:50:27<27:47:02, 3.36it/s] 9%|▉ | 35193/371472 [2:50:27<27:16:57, 3.42it/s] 9%|▉ | 35194/371472 [2:50:27<27:05:33, 3.45it/s] 9%|▉ | 35195/371472 [2:50:27<26:27:12, 3.53it/s] 9%|▉ | 35196/371472 [2:50:28<25:38:24, 3.64it/s] 9%|▉ | 35197/371472 [2:50:28<27:13:48, 3.43it/s] 9%|▉ | 35198/371472 [2:50:28<26:24:26, 3.54it/s] 9%|▉ | 35199/371472 [2:50:28<26:27:25, 3.53it/s] 9%|▉ | 35200/371472 [2:50:29<26:55:28, 3.47it/s] {'loss': 4.6752, 'learning_rate': 9.151565429119351e-07, 'epoch': 1.52} + 9%|▉ | 35200/371472 [2:50:29<26:55:28, 3.47it/s] 9%|▉ | 35201/371472 [2:50:29<27:05:39, 3.45it/s] 9%|▉ | 35202/371472 [2:50:29<26:24:21, 3.54it/s] 9%|▉ | 35203/371472 [2:50:30<27:04:19, 3.45it/s] 9%|▉ | 35204/371472 [2:50:30<27:31:49, 3.39it/s] 9%|▉ | 35205/371472 [2:50:30<27:09:44, 3.44it/s] 9%|▉ | 35206/371472 [2:50:30<26:12:09, 3.56it/s] 9%|▉ | 35207/371472 [2:50:31<26:08:34, 3.57it/s] 9%|▉ | 35208/371472 [2:50:31<26:13:31, 3.56it/s] 9%|▉ | 35209/371472 [2:50:31<30:14:16, 3.09it/s] 9%|▉ | 35210/371472 [2:50:32<29:15:18, 3.19it/s] 9%|▉ | 35211/371472 [2:50:32<27:51:00, 3.35it/s] 9%|▉ | 35212/371472 [2:50:32<28:36:05, 3.27it/s] 9%|▉ | 35213/371472 [2:50:33<27:51:49, 3.35it/s] 9%|▉ | 35214/371472 [2:50:33<27:28:40, 3.40it/s] 9%|▉ | 35215/371472 [2:50:33<27:19:24, 3.42it/s] 9%|▉ | 35216/371472 [2:50:34<28:13:03, 3.31it/s] 9%|▉ | 35217/371472 [2:50:34<26:51:58, 3.48it/s] 9%|▉ | 35218/371472 [2:50:34<26:16:09, 3.56it/s] 9%|▉ | 35219/371472 [2:50:34<28:45:01, 3.25it/s] 9%|▉ | 35220/371472 [2:50:35<26:52:30, 3.48it/s] {'loss': 4.3909, 'learning_rate': 9.151080609364563e-07, 'epoch': 1.52} + 9%|▉ | 35220/371472 [2:50:35<26:52:30, 3.48it/s] 9%|▉ | 35221/371472 [2:50:35<26:57:50, 3.46it/s] 9%|▉ | 35222/371472 [2:50:35<29:16:58, 3.19it/s] 9%|▉ | 35223/371472 [2:50:36<28:09:25, 3.32it/s] 9%|▉ | 35224/371472 [2:50:36<27:59:47, 3.34it/s] 9%|▉ | 35225/371472 [2:50:36<27:44:51, 3.37it/s] 9%|▉ | 35226/371472 [2:50:36<28:02:12, 3.33it/s] 9%|▉ | 35227/371472 [2:50:37<28:52:12, 3.24it/s] 9%|▉ | 35228/371472 [2:50:37<27:32:45, 3.39it/s] 9%|▉ | 35229/371472 [2:50:37<28:01:19, 3.33it/s] 9%|▉ | 35230/371472 [2:50:38<27:46:40, 3.36it/s] 9%|▉ | 35231/371472 [2:50:38<28:29:19, 3.28it/s] 9%|▉ | 35232/371472 [2:50:38<27:46:54, 3.36it/s] 9%|▉ | 35233/371472 [2:50:39<28:14:02, 3.31it/s] 9%|▉ | 35234/371472 [2:50:39<27:21:26, 3.41it/s] 9%|▉ | 35235/371472 [2:50:39<27:21:06, 3.41it/s] 9%|▉ | 35236/371472 [2:50:39<27:05:11, 3.45it/s] 9%|▉ | 35237/371472 [2:50:40<28:34:06, 3.27it/s] 9%|▉ | 35238/371472 [2:50:40<27:52:42, 3.35it/s] 9%|▉ | 35239/371472 [2:50:41<32:16:08, 2.89it/s] 9%|▉ | 35240/371472 [2:50:41<30:51:19, 3.03it/s] {'loss': 4.6122, 'learning_rate': 9.150595789609775e-07, 'epoch': 1.52} + 9%|▉ | 35240/371472 [2:50:41<30:51:19, 3.03it/s] 9%|▉ | 35241/371472 [2:50:41<29:36:41, 3.15it/s] 9%|▉ | 35242/371472 [2:50:41<28:06:56, 3.32it/s] 9%|▉ | 35243/371472 [2:50:42<28:03:27, 3.33it/s] 9%|▉ | 35244/371472 [2:50:42<27:00:40, 3.46it/s] 9%|▉ | 35245/371472 [2:50:42<26:11:19, 3.57it/s] 9%|▉ | 35246/371472 [2:50:42<25:10:05, 3.71it/s] 9%|▉ | 35247/371472 [2:50:43<26:35:54, 3.51it/s] 9%|▉ | 35248/371472 [2:50:43<25:37:30, 3.64it/s] 9%|▉ | 35249/371472 [2:50:43<27:07:52, 3.44it/s] 9%|▉ | 35250/371472 [2:50:44<28:34:49, 3.27it/s] 9%|▉ | 35251/371472 [2:50:44<27:24:41, 3.41it/s] 9%|▉ | 35252/371472 [2:50:44<26:25:41, 3.53it/s] 9%|▉ | 35253/371472 [2:50:45<26:38:56, 3.50it/s] 9%|▉ | 35254/371472 [2:50:45<26:16:25, 3.55it/s] 9%|▉ | 35255/371472 [2:50:45<25:36:59, 3.65it/s] 9%|▉ | 35256/371472 [2:50:45<27:00:29, 3.46it/s] 9%|▉ | 35257/371472 [2:50:46<26:11:34, 3.57it/s] 9%|▉ | 35258/371472 [2:50:46<26:04:46, 3.58it/s] 9%|▉ | 35259/371472 [2:50:46<29:36:23, 3.15it/s] 9%|▉ | 35260/371472 [2:50:47<28:52:26, 3.23it/s] {'loss': 4.4034, 'learning_rate': 9.150110969854984e-07, 'epoch': 1.52} + 9%|▉ | 35260/371472 [2:50:47<28:52:26, 3.23it/s] 9%|▉ | 35261/371472 [2:50:47<29:06:19, 3.21it/s] 9%|▉ | 35262/371472 [2:50:47<28:17:54, 3.30it/s] 9%|▉ | 35263/371472 [2:50:47<27:47:58, 3.36it/s] 9%|▉ | 35264/371472 [2:50:48<26:40:14, 3.50it/s] 9%|▉ | 35265/371472 [2:50:48<30:31:44, 3.06it/s] 9%|▉ | 35266/371472 [2:50:48<28:46:57, 3.24it/s] 9%|▉ | 35267/371472 [2:50:49<28:09:25, 3.32it/s] 9%|▉ | 35268/371472 [2:50:49<28:11:14, 3.31it/s] 9%|▉ | 35269/371472 [2:50:49<27:28:23, 3.40it/s] 9%|▉ | 35270/371472 [2:50:50<26:50:26, 3.48it/s] 9%|▉ | 35271/371472 [2:50:50<25:41:56, 3.63it/s] 9%|▉ | 35272/371472 [2:50:50<25:23:05, 3.68it/s] 9%|▉ | 35273/371472 [2:50:50<25:40:04, 3.64it/s] 9%|▉ | 35274/371472 [2:50:51<26:42:11, 3.50it/s] 9%|▉ | 35275/371472 [2:50:51<28:53:54, 3.23it/s] 9%|▉ | 35276/371472 [2:50:51<27:11:54, 3.43it/s] 9%|▉ | 35277/371472 [2:50:52<26:50:57, 3.48it/s] 9%|▉ | 35278/371472 [2:50:52<27:12:07, 3.43it/s] 9%|▉ | 35279/371472 [2:50:52<27:34:51, 3.39it/s] 9%|▉ | 35280/371472 [2:50:52<28:22:51, 3.29it/s] {'loss': 4.5078, 'learning_rate': 9.149626150100195e-07, 'epoch': 1.52} + 9%|▉ | 35280/371472 [2:50:52<28:22:51, 3.29it/s] 9%|▉ | 35281/371472 [2:50:53<29:04:21, 3.21it/s] 9%|▉ | 35282/371472 [2:50:53<27:56:03, 3.34it/s] 9%|▉ | 35283/371472 [2:50:53<27:09:55, 3.44it/s] 9%|▉ | 35284/371472 [2:50:54<26:04:52, 3.58it/s] 9%|▉ | 35285/371472 [2:50:54<25:27:40, 3.67it/s] 9%|▉ | 35286/371472 [2:50:54<25:20:13, 3.69it/s] 9%|▉ | 35287/371472 [2:50:54<25:34:07, 3.65it/s] 9%|▉ | 35288/371472 [2:50:55<24:53:39, 3.75it/s] 9%|▉ | 35289/371472 [2:50:55<25:54:20, 3.60it/s] 10%|▉ | 35290/371472 [2:50:55<25:40:38, 3.64it/s] 10%|▉ | 35291/371472 [2:50:55<24:55:00, 3.75it/s] 10%|▉ | 35292/371472 [2:50:56<24:52:47, 3.75it/s] 10%|▉ | 35293/371472 [2:50:56<25:22:48, 3.68it/s] 10%|▉ | 35294/371472 [2:50:56<26:03:11, 3.58it/s] 10%|▉ | 35295/371472 [2:50:57<25:30:15, 3.66it/s] 10%|▉ | 35296/371472 [2:50:57<26:04:10, 3.58it/s] 10%|▉ | 35297/371472 [2:50:57<26:26:27, 3.53it/s] 10%|▉ | 35298/371472 [2:50:57<27:00:32, 3.46it/s] 10%|▉ | 35299/371472 [2:50:58<26:25:01, 3.53it/s] 10%|▉ | 35300/371472 [2:50:58<26:28:26, 3.53it/s] {'loss': 4.6073, 'learning_rate': 9.149141330345407e-07, 'epoch': 1.52} + 10%|▉ | 35300/371472 [2:50:58<26:28:26, 3.53it/s] 10%|▉ | 35301/371472 [2:50:58<26:06:05, 3.58it/s] 10%|▉ | 35302/371472 [2:50:59<26:15:32, 3.56it/s] 10%|▉ | 35303/371472 [2:50:59<25:33:37, 3.65it/s] 10%|▉ | 35304/371472 [2:50:59<26:26:44, 3.53it/s] 10%|▉ | 35305/371472 [2:50:59<26:20:54, 3.54it/s] 10%|▉ | 35306/371472 [2:51:00<26:26:05, 3.53it/s] 10%|▉ | 35307/371472 [2:51:00<25:34:13, 3.65it/s] 10%|▉ | 35308/371472 [2:51:00<25:55:31, 3.60it/s] 10%|▉ | 35309/371472 [2:51:01<26:15:51, 3.56it/s] 10%|▉ | 35310/371472 [2:51:01<26:38:39, 3.50it/s] 10%|▉ | 35311/371472 [2:51:01<26:33:31, 3.52it/s] 10%|▉ | 35312/371472 [2:51:01<25:43:02, 3.63it/s] 10%|▉ | 35313/371472 [2:51:02<26:48:38, 3.48it/s] 10%|▉ | 35314/371472 [2:51:02<25:58:20, 3.60it/s] 10%|▉ | 35315/371472 [2:51:02<28:18:06, 3.30it/s] 10%|▉ | 35316/371472 [2:51:03<26:59:30, 3.46it/s] 10%|▉ | 35317/371472 [2:51:03<27:00:21, 3.46it/s] 10%|▉ | 35318/371472 [2:51:03<27:15:07, 3.43it/s] 10%|▉ | 35319/371472 [2:51:03<28:00:51, 3.33it/s] 10%|▉ | 35320/371472 [2:51:04<27:55:33, 3.34it/s] {'loss': 4.4251, 'learning_rate': 9.148656510590617e-07, 'epoch': 1.52} + 10%|▉ | 35320/371472 [2:51:04<27:55:33, 3.34it/s] 10%|▉ | 35321/371472 [2:51:04<27:45:21, 3.36it/s] 10%|▉ | 35322/371472 [2:51:04<27:23:41, 3.41it/s] 10%|▉ | 35323/371472 [2:51:05<27:56:08, 3.34it/s] 10%|▉ | 35324/371472 [2:51:05<27:17:25, 3.42it/s] 10%|▉ | 35325/371472 [2:51:05<27:05:38, 3.45it/s] 10%|▉ | 35326/371472 [2:51:05<26:21:58, 3.54it/s] 10%|▉ | 35327/371472 [2:51:06<27:08:20, 3.44it/s] 10%|▉ | 35328/371472 [2:51:06<26:16:41, 3.55it/s] 10%|▉ | 35329/371472 [2:51:06<26:53:20, 3.47it/s] 10%|▉ | 35330/371472 [2:51:07<25:47:16, 3.62it/s] 10%|▉ | 35331/371472 [2:51:07<25:02:40, 3.73it/s] 10%|▉ | 35332/371472 [2:51:07<25:19:07, 3.69it/s] 10%|▉ | 35333/371472 [2:51:07<25:11:01, 3.71it/s] 10%|▉ | 35334/371472 [2:51:08<25:59:18, 3.59it/s] 10%|▉ | 35335/371472 [2:51:08<26:28:29, 3.53it/s] 10%|▉ | 35336/371472 [2:51:08<26:16:42, 3.55it/s] 10%|▉ | 35337/371472 [2:51:09<28:02:28, 3.33it/s] 10%|▉ | 35338/371472 [2:51:09<27:11:39, 3.43it/s] 10%|▉ | 35339/371472 [2:51:09<28:04:30, 3.33it/s] 10%|▉ | 35340/371472 [2:51:09<27:48:53, 3.36it/s] {'loss': 4.6143, 'learning_rate': 9.148171690835828e-07, 'epoch': 1.52} + 10%|▉ | 35340/371472 [2:51:09<27:48:53, 3.36it/s] 10%|▉ | 35341/371472 [2:51:10<27:50:25, 3.35it/s] 10%|▉ | 35342/371472 [2:51:10<28:32:14, 3.27it/s] 10%|▉ | 35343/371472 [2:51:10<27:34:37, 3.39it/s] 10%|▉ | 35344/371472 [2:51:11<28:26:02, 3.28it/s] 10%|▉ | 35345/371472 [2:51:11<28:59:37, 3.22it/s] 10%|▉ | 35346/371472 [2:51:11<27:05:50, 3.45it/s] 10%|▉ | 35347/371472 [2:51:12<29:29:24, 3.17it/s] 10%|▉ | 35348/371472 [2:51:12<28:58:27, 3.22it/s] 10%|▉ | 35349/371472 [2:51:12<29:00:14, 3.22it/s] 10%|▉ | 35350/371472 [2:51:13<28:31:32, 3.27it/s] 10%|▉ | 35351/371472 [2:51:13<28:02:39, 3.33it/s] 10%|▉ | 35352/371472 [2:51:13<29:00:18, 3.22it/s] 10%|▉ | 35353/371472 [2:51:14<30:05:23, 3.10it/s] 10%|▉ | 35354/371472 [2:51:14<29:08:06, 3.20it/s] 10%|▉ | 35355/371472 [2:51:14<28:41:05, 3.25it/s] 10%|▉ | 35356/371472 [2:51:14<28:26:52, 3.28it/s] 10%|▉ | 35357/371472 [2:51:15<28:06:44, 3.32it/s] 10%|▉ | 35358/371472 [2:51:15<28:44:11, 3.25it/s] 10%|▉ | 35359/371472 [2:51:15<27:12:31, 3.43it/s] 10%|▉ | 35360/371472 [2:51:16<26:51:23, 3.48it/s] {'loss': 4.7287, 'learning_rate': 9.14768687108104e-07, 'epoch': 1.52} + 10%|▉ | 35360/371472 [2:51:16<26:51:23, 3.48it/s] 10%|▉ | 35361/371472 [2:51:16<26:51:27, 3.48it/s] 10%|▉ | 35362/371472 [2:51:16<28:01:27, 3.33it/s] 10%|▉ | 35363/371472 [2:51:16<27:40:24, 3.37it/s] 10%|▉ | 35364/371472 [2:51:17<26:59:38, 3.46it/s] 10%|▉ | 35365/371472 [2:51:17<26:24:26, 3.54it/s] 10%|▉ | 35366/371472 [2:51:17<27:56:48, 3.34it/s] 10%|▉ | 35367/371472 [2:51:18<27:04:12, 3.45it/s] 10%|▉ | 35368/371472 [2:51:18<27:52:23, 3.35it/s] 10%|▉ | 35369/371472 [2:51:18<26:57:26, 3.46it/s] 10%|▉ | 35370/371472 [2:51:18<26:10:32, 3.57it/s] 10%|▉ | 35371/371472 [2:51:19<27:10:24, 3.44it/s] 10%|▉ | 35372/371472 [2:51:19<26:41:28, 3.50it/s] 10%|▉ | 35373/371472 [2:51:19<27:11:20, 3.43it/s] 10%|▉ | 35374/371472 [2:51:20<28:15:29, 3.30it/s] 10%|▉ | 35375/371472 [2:51:20<30:41:26, 3.04it/s] 10%|▉ | 35376/371472 [2:51:20<28:39:50, 3.26it/s] 10%|▉ | 35377/371472 [2:51:21<27:35:17, 3.38it/s] 10%|▉ | 35378/371472 [2:51:21<26:49:57, 3.48it/s] 10%|▉ | 35379/371472 [2:51:21<26:02:23, 3.59it/s] 10%|▉ | 35380/371472 [2:51:21<27:55:57, 3.34it/s] {'loss': 4.7114, 'learning_rate': 9.147202051326251e-07, 'epoch': 1.52} + 10%|▉ | 35380/371472 [2:51:21<27:55:57, 3.34it/s] 10%|▉ | 35381/371472 [2:51:22<27:05:52, 3.45it/s] 10%|▉ | 35382/371472 [2:51:22<28:21:22, 3.29it/s] 10%|▉ | 35383/371472 [2:51:22<27:39:05, 3.38it/s] 10%|▉ | 35384/371472 [2:51:23<28:15:54, 3.30it/s] 10%|▉ | 35385/371472 [2:51:23<28:17:18, 3.30it/s] 10%|▉ | 35386/371472 [2:51:23<27:35:18, 3.38it/s] 10%|▉ | 35387/371472 [2:51:24<27:17:01, 3.42it/s] 10%|▉ | 35388/371472 [2:51:24<27:16:14, 3.42it/s] 10%|▉ | 35389/371472 [2:51:24<28:23:35, 3.29it/s] 10%|▉ | 35390/371472 [2:51:24<26:45:46, 3.49it/s] 10%|▉ | 35391/371472 [2:51:25<26:35:29, 3.51it/s] 10%|▉ | 35392/371472 [2:51:25<27:38:21, 3.38it/s] 10%|▉ | 35393/371472 [2:51:25<26:27:12, 3.53it/s] 10%|▉ | 35394/371472 [2:51:26<25:39:34, 3.64it/s] 10%|▉ | 35395/371472 [2:51:26<25:44:42, 3.63it/s] 10%|▉ | 35396/371472 [2:51:26<27:20:26, 3.41it/s] 10%|▉ | 35397/371472 [2:51:26<26:09:57, 3.57it/s] 10%|▉ | 35398/371472 [2:51:27<26:04:47, 3.58it/s] 10%|▉ | 35399/371472 [2:51:27<26:21:32, 3.54it/s] 10%|▉ | 35400/371472 [2:51:27<26:04:07, 3.58it/s] {'loss': 4.3877, 'learning_rate': 9.146717231571461e-07, 'epoch': 1.52} + 10%|▉ | 35400/371472 [2:51:27<26:04:07, 3.58it/s] 10%|▉ | 35401/371472 [2:51:28<26:43:10, 3.49it/s] 10%|▉ | 35402/371472 [2:51:28<25:47:02, 3.62it/s] 10%|▉ | 35403/371472 [2:51:28<25:30:20, 3.66it/s] 10%|▉ | 35404/371472 [2:51:28<25:31:32, 3.66it/s] 10%|▉ | 35405/371472 [2:51:29<25:58:59, 3.59it/s] 10%|▉ | 35406/371472 [2:51:29<26:10:51, 3.57it/s] 10%|▉ | 35407/371472 [2:51:29<25:39:53, 3.64it/s] 10%|▉ | 35408/371472 [2:51:29<26:18:51, 3.55it/s] 10%|▉ | 35409/371472 [2:51:30<27:18:52, 3.42it/s] 10%|▉ | 35410/371472 [2:51:30<27:53:28, 3.35it/s] 10%|▉ | 35411/371472 [2:51:30<28:09:20, 3.32it/s] 10%|▉ | 35412/371472 [2:51:31<28:24:11, 3.29it/s] 10%|▉ | 35413/371472 [2:51:31<27:09:23, 3.44it/s] 10%|▉ | 35414/371472 [2:51:31<27:02:06, 3.45it/s] 10%|▉ | 35415/371472 [2:51:32<30:44:25, 3.04it/s] 10%|▉ | 35416/371472 [2:51:32<28:48:03, 3.24it/s] 10%|▉ | 35417/371472 [2:51:32<29:57:19, 3.12it/s] 10%|▉ | 35418/371472 [2:51:33<28:20:22, 3.29it/s] 10%|▉ | 35419/371472 [2:51:33<28:33:40, 3.27it/s] 10%|▉ | 35420/371472 [2:51:33<27:48:14, 3.36it/s] {'loss': 4.466, 'learning_rate': 9.146232411816673e-07, 'epoch': 1.53} + 10%|▉ | 35420/371472 [2:51:33<27:48:14, 3.36it/s] 10%|▉ | 35421/371472 [2:51:33<28:11:44, 3.31it/s] 10%|▉ | 35422/371472 [2:51:34<28:32:38, 3.27it/s] 10%|▉ | 35423/371472 [2:51:34<27:52:48, 3.35it/s] 10%|▉ | 35424/371472 [2:51:34<30:53:50, 3.02it/s] 10%|▉ | 35425/371472 [2:51:35<28:32:55, 3.27it/s] 10%|▉ | 35426/371472 [2:51:35<27:38:38, 3.38it/s] 10%|▉ | 35427/371472 [2:51:35<26:54:05, 3.47it/s] 10%|▉ | 35428/371472 [2:51:36<26:39:11, 3.50it/s] 10%|▉ | 35429/371472 [2:51:36<27:22:49, 3.41it/s] 10%|▉ | 35430/371472 [2:51:36<26:39:34, 3.50it/s] 10%|▉ | 35431/371472 [2:51:36<28:03:36, 3.33it/s] 10%|▉ | 35432/371472 [2:51:37<29:23:08, 3.18it/s] 10%|▉ | 35433/371472 [2:51:37<28:37:05, 3.26it/s] 10%|▉ | 35434/371472 [2:51:37<27:13:32, 3.43it/s] 10%|▉ | 35435/371472 [2:51:38<27:12:38, 3.43it/s] 10%|▉ | 35436/371472 [2:51:38<26:32:26, 3.52it/s] 10%|▉ | 35437/371472 [2:51:38<26:01:14, 3.59it/s] 10%|▉ | 35438/371472 [2:51:38<26:30:10, 3.52it/s] 10%|▉ | 35439/371472 [2:51:39<26:41:58, 3.50it/s] 10%|▉ | 35440/371472 [2:51:39<26:52:54, 3.47it/s] {'loss': 4.5891, 'learning_rate': 9.145747592061884e-07, 'epoch': 1.53} + 10%|▉ | 35440/371472 [2:51:39<26:52:54, 3.47it/s] 10%|▉ | 35441/371472 [2:51:39<26:57:19, 3.46it/s] 10%|▉ | 35442/371472 [2:51:40<26:35:16, 3.51it/s] 10%|▉ | 35443/371472 [2:51:40<26:10:39, 3.57it/s] 10%|▉ | 35444/371472 [2:51:40<25:51:42, 3.61it/s] 10%|▉ | 35445/371472 [2:51:40<26:53:27, 3.47it/s] 10%|▉ | 35446/371472 [2:51:41<26:57:36, 3.46it/s] 10%|▉ | 35447/371472 [2:51:41<26:15:05, 3.56it/s] 10%|▉ | 35448/371472 [2:51:41<25:47:33, 3.62it/s] 10%|▉ | 35449/371472 [2:51:42<26:38:53, 3.50it/s] 10%|▉ | 35450/371472 [2:51:42<25:43:07, 3.63it/s] 10%|▉ | 35451/371472 [2:51:42<25:09:46, 3.71it/s] 10%|▉ | 35452/371472 [2:51:42<25:01:57, 3.73it/s] 10%|▉ | 35453/371472 [2:51:43<25:03:31, 3.72it/s] 10%|▉ | 35454/371472 [2:51:43<25:40:34, 3.64it/s] 10%|▉ | 35455/371472 [2:51:43<27:51:11, 3.35it/s] 10%|▉ | 35456/371472 [2:51:44<26:51:18, 3.48it/s] 10%|▉ | 35457/371472 [2:51:44<26:14:39, 3.56it/s] 10%|▉ | 35458/371472 [2:51:44<25:17:14, 3.69it/s] 10%|▉ | 35459/371472 [2:51:44<25:18:47, 3.69it/s] 10%|▉ | 35460/371472 [2:51:45<24:35:11, 3.80it/s] {'loss': 4.7254, 'learning_rate': 9.145262772307096e-07, 'epoch': 1.53} + 10%|▉ | 35460/371472 [2:51:45<24:35:11, 3.80it/s] 10%|▉ | 35461/371472 [2:51:45<24:21:50, 3.83it/s] 10%|▉ | 35462/371472 [2:51:45<25:32:29, 3.65it/s] 10%|▉ | 35463/371472 [2:51:45<26:03:53, 3.58it/s] 10%|▉ | 35464/371472 [2:51:46<26:28:44, 3.52it/s] 10%|▉ | 35465/371472 [2:51:46<29:13:01, 3.19it/s] 10%|▉ | 35466/371472 [2:51:46<28:20:55, 3.29it/s] 10%|▉ | 35467/371472 [2:51:47<28:58:05, 3.22it/s] 10%|▉ | 35468/371472 [2:51:47<28:48:44, 3.24it/s] 10%|▉ | 35469/371472 [2:51:47<27:44:14, 3.36it/s] 10%|▉ | 35470/371472 [2:51:48<26:31:30, 3.52it/s] 10%|▉ | 35471/371472 [2:51:48<25:52:13, 3.61it/s] 10%|▉ | 35472/371472 [2:51:48<25:12:38, 3.70it/s] 10%|▉ | 35473/371472 [2:51:48<25:49:19, 3.61it/s] 10%|▉ | 35474/371472 [2:51:49<26:45:07, 3.49it/s] 10%|▉ | 35475/371472 [2:51:49<26:41:06, 3.50it/s] 10%|▉ | 35476/371472 [2:51:49<26:11:25, 3.56it/s] 10%|▉ | 35477/371472 [2:51:49<25:58:20, 3.59it/s] 10%|▉ | 35478/371472 [2:51:50<25:30:32, 3.66it/s] 10%|▉ | 35479/371472 [2:51:50<24:55:34, 3.74it/s] 10%|▉ | 35480/371472 [2:51:50<24:49:54, 3.76it/s] {'loss': 4.6304, 'learning_rate': 9.144777952552306e-07, 'epoch': 1.53} + 10%|▉ | 35480/371472 [2:51:50<24:49:54, 3.76it/s] 10%|▉ | 35481/371472 [2:51:50<24:43:03, 3.78it/s] 10%|▉ | 35482/371472 [2:51:51<26:55:42, 3.47it/s] 10%|▉ | 35483/371472 [2:51:51<25:49:20, 3.61it/s] 10%|▉ | 35484/371472 [2:51:51<27:59:05, 3.34it/s] 10%|▉ | 35485/371472 [2:51:52<26:24:37, 3.53it/s] 10%|▉ | 35486/371472 [2:51:52<26:07:50, 3.57it/s] 10%|▉ | 35487/371472 [2:51:52<26:23:14, 3.54it/s] 10%|▉ | 35488/371472 [2:51:52<25:31:16, 3.66it/s] 10%|▉ | 35489/371472 [2:51:53<25:28:15, 3.66it/s] 10%|▉ | 35490/371472 [2:51:53<24:54:35, 3.75it/s] 10%|▉ | 35491/371472 [2:51:53<26:40:32, 3.50it/s] 10%|▉ | 35492/371472 [2:51:54<26:13:38, 3.56it/s] 10%|▉ | 35493/371472 [2:51:54<25:15:22, 3.70it/s] 10%|▉ | 35494/371472 [2:51:54<24:55:16, 3.74it/s] 10%|▉ | 35495/371472 [2:51:54<25:04:58, 3.72it/s] 10%|▉ | 35496/371472 [2:51:55<25:42:15, 3.63it/s] 10%|▉ | 35497/371472 [2:51:55<26:54:10, 3.47it/s] 10%|▉ | 35498/371472 [2:51:55<26:11:57, 3.56it/s] 10%|▉ | 35499/371472 [2:51:56<27:07:38, 3.44it/s] 10%|▉ | 35500/371472 [2:51:56<27:45:20, 3.36it/s] {'loss': 4.5517, 'learning_rate': 9.144293132797518e-07, 'epoch': 1.53} + 10%|▉ | 35500/371472 [2:51:56<27:45:20, 3.36it/s] 10%|▉ | 35501/371472 [2:51:56<27:22:01, 3.41it/s] 10%|▉ | 35502/371472 [2:51:56<26:15:23, 3.55it/s] 10%|▉ | 35503/371472 [2:51:57<26:11:39, 3.56it/s] 10%|▉ | 35504/371472 [2:51:57<25:25:22, 3.67it/s] 10%|▉ | 35505/371472 [2:51:57<25:14:03, 3.70it/s] 10%|▉ | 35506/371472 [2:51:58<26:40:12, 3.50it/s] 10%|▉ | 35507/371472 [2:51:58<26:10:31, 3.57it/s] 10%|▉ | 35508/371472 [2:51:58<27:30:30, 3.39it/s] 10%|▉ | 35509/371472 [2:51:58<26:51:38, 3.47it/s] 10%|▉ | 35510/371472 [2:51:59<26:16:50, 3.55it/s] 10%|▉ | 35511/371472 [2:51:59<26:18:39, 3.55it/s] 10%|▉ | 35512/371472 [2:51:59<25:44:01, 3.63it/s] 10%|▉ | 35513/371472 [2:52:00<25:50:18, 3.61it/s] 10%|▉ | 35514/371472 [2:52:00<28:25:11, 3.28it/s] 10%|▉ | 35515/371472 [2:52:00<27:49:44, 3.35it/s] 10%|▉ | 35516/371472 [2:52:01<29:02:04, 3.21it/s] 10%|▉ | 35517/371472 [2:52:01<28:11:52, 3.31it/s] 10%|▉ | 35518/371472 [2:52:01<26:37:31, 3.50it/s] 10%|▉ | 35519/371472 [2:52:01<26:30:05, 3.52it/s] 10%|▉ | 35520/371472 [2:52:02<25:19:51, 3.68it/s] {'loss': 4.5951, 'learning_rate': 9.143808313042728e-07, 'epoch': 1.53} + 10%|▉ | 35520/371472 [2:52:02<25:19:51, 3.68it/s] 10%|▉ | 35521/371472 [2:52:02<24:54:20, 3.75it/s] 10%|▉ | 35522/371472 [2:52:02<26:04:56, 3.58it/s] 10%|▉ | 35523/371472 [2:52:02<26:26:02, 3.53it/s] 10%|▉ | 35524/371472 [2:52:03<26:02:20, 3.58it/s] 10%|▉ | 35525/371472 [2:52:03<27:27:04, 3.40it/s] 10%|▉ | 35526/371472 [2:52:03<27:20:23, 3.41it/s] 10%|▉ | 35527/371472 [2:52:04<26:27:02, 3.53it/s] 10%|▉ | 35528/371472 [2:52:04<25:37:00, 3.64it/s] 10%|▉ | 35529/371472 [2:52:04<25:42:18, 3.63it/s] 10%|▉ | 35530/371472 [2:52:04<26:12:52, 3.56it/s] 10%|▉ | 35531/371472 [2:52:05<26:10:58, 3.56it/s] 10%|▉ | 35532/371472 [2:52:05<25:56:01, 3.60it/s] 10%|▉ | 35533/371472 [2:52:05<25:41:10, 3.63it/s] 10%|▉ | 35534/371472 [2:52:05<25:43:18, 3.63it/s] 10%|▉ | 35535/371472 [2:52:06<25:37:14, 3.64it/s] 10%|▉ | 35536/371472 [2:52:06<25:19:49, 3.68it/s] 10%|▉ | 35537/371472 [2:52:06<24:59:05, 3.73it/s] 10%|▉ | 35538/371472 [2:52:07<24:38:36, 3.79it/s] 10%|▉ | 35539/371472 [2:52:07<24:02:09, 3.88it/s] 10%|▉ | 35540/371472 [2:52:07<25:39:29, 3.64it/s] {'loss': 4.6614, 'learning_rate': 9.14332349328794e-07, 'epoch': 1.53} + 10%|▉ | 35540/371472 [2:52:07<25:39:29, 3.64it/s] 10%|▉ | 35541/371472 [2:52:07<26:08:47, 3.57it/s] 10%|▉ | 35542/371472 [2:52:08<27:07:40, 3.44it/s] 10%|▉ | 35543/371472 [2:52:08<27:54:06, 3.34it/s] 10%|▉ | 35544/371472 [2:52:08<27:29:46, 3.39it/s] 10%|▉ | 35545/371472 [2:52:09<27:05:09, 3.45it/s] 10%|▉ | 35546/371472 [2:52:09<26:37:12, 3.51it/s] 10%|▉ | 35547/371472 [2:52:09<29:03:20, 3.21it/s] 10%|▉ | 35548/371472 [2:52:09<27:40:58, 3.37it/s] 10%|▉ | 35549/371472 [2:52:10<27:04:08, 3.45it/s] 10%|▉ | 35550/371472 [2:52:10<26:22:23, 3.54it/s] 10%|▉ | 35551/371472 [2:52:10<26:31:03, 3.52it/s] 10%|▉ | 35552/371472 [2:52:11<25:50:14, 3.61it/s] 10%|▉ | 35553/371472 [2:52:11<28:01:30, 3.33it/s] 10%|▉ | 35554/371472 [2:52:11<26:41:00, 3.50it/s] 10%|▉ | 35555/371472 [2:52:11<26:05:31, 3.58it/s] 10%|▉ | 35556/371472 [2:52:12<32:31:38, 2.87it/s] 10%|▉ | 35557/371472 [2:52:12<31:06:39, 3.00it/s] 10%|▉ | 35558/371472 [2:52:13<29:15:29, 3.19it/s] 10%|▉ | 35559/371472 [2:52:13<27:18:41, 3.42it/s] 10%|▉ | 35560/371472 [2:52:13<27:03:46, 3.45it/s] {'loss': 4.352, 'learning_rate': 9.14283867353315e-07, 'epoch': 1.53} + 10%|▉ | 35560/371472 [2:52:13<27:03:46, 3.45it/s] 10%|▉ | 35561/371472 [2:52:13<28:59:56, 3.22it/s] 10%|▉ | 35562/371472 [2:52:14<28:47:41, 3.24it/s] 10%|▉ | 35563/371472 [2:52:14<28:12:05, 3.31it/s] 10%|▉ | 35564/371472 [2:52:14<27:23:43, 3.41it/s] 10%|▉ | 35565/371472 [2:52:15<31:02:18, 3.01it/s] 10%|▉ | 35566/371472 [2:52:15<29:28:40, 3.17it/s] 10%|▉ | 35567/371472 [2:52:15<29:20:28, 3.18it/s] 10%|▉ | 35568/371472 [2:52:16<28:11:08, 3.31it/s] 10%|▉ | 35569/371472 [2:52:16<31:41:24, 2.94it/s] 10%|▉ | 35570/371472 [2:52:16<31:10:01, 2.99it/s] 10%|▉ | 35571/371472 [2:52:17<28:58:21, 3.22it/s] 10%|▉ | 35572/371472 [2:52:17<28:41:29, 3.25it/s] 10%|▉ | 35573/371472 [2:52:17<27:52:02, 3.35it/s] 10%|▉ | 35574/371472 [2:52:17<26:17:33, 3.55it/s] 10%|▉ | 35575/371472 [2:52:18<25:22:14, 3.68it/s] 10%|▉ | 35576/371472 [2:52:18<25:00:02, 3.73it/s] 10%|▉ | 35577/371472 [2:52:18<26:11:21, 3.56it/s] 10%|▉ | 35578/371472 [2:52:19<26:46:30, 3.48it/s] 10%|▉ | 35579/371472 [2:52:19<25:58:49, 3.59it/s] 10%|▉ | 35580/371472 [2:52:19<26:13:28, 3.56it/s] {'loss': 4.2372, 'learning_rate': 9.142353853778362e-07, 'epoch': 1.53} + 10%|▉ | 35580/371472 [2:52:19<26:13:28, 3.56it/s] 10%|▉ | 35581/371472 [2:52:19<26:01:37, 3.58it/s] 10%|▉ | 35582/371472 [2:52:20<27:28:45, 3.40it/s] 10%|▉ | 35583/371472 [2:52:20<26:23:45, 3.53it/s] 10%|▉ | 35584/371472 [2:52:20<26:52:11, 3.47it/s] 10%|▉ | 35585/371472 [2:52:20<26:38:09, 3.50it/s] 10%|▉ | 35586/371472 [2:52:21<26:48:16, 3.48it/s] 10%|▉ | 35587/371472 [2:52:21<25:41:52, 3.63it/s] 10%|▉ | 35588/371472 [2:52:21<26:37:11, 3.50it/s] 10%|▉ | 35589/371472 [2:52:22<27:30:13, 3.39it/s] 10%|▉ | 35590/371472 [2:52:22<27:04:20, 3.45it/s] 10%|▉ | 35591/371472 [2:52:22<27:26:14, 3.40it/s] 10%|▉ | 35592/371472 [2:52:23<27:16:46, 3.42it/s] 10%|▉ | 35593/371472 [2:52:23<26:46:25, 3.48it/s] 10%|▉ | 35594/371472 [2:52:23<26:26:49, 3.53it/s] 10%|▉ | 35595/371472 [2:52:23<26:43:33, 3.49it/s] 10%|▉ | 35596/371472 [2:52:24<25:55:35, 3.60it/s] 10%|▉ | 35597/371472 [2:52:24<25:45:13, 3.62it/s] 10%|▉ | 35598/371472 [2:52:24<26:24:01, 3.53it/s] 10%|▉ | 35599/371472 [2:52:24<25:14:22, 3.70it/s] 10%|▉ | 35600/371472 [2:52:25<26:57:55, 3.46it/s] {'loss': 4.5929, 'learning_rate': 9.141869034023573e-07, 'epoch': 1.53} + 10%|▉ | 35600/371472 [2:52:25<26:57:55, 3.46it/s] 10%|▉ | 35601/371472 [2:52:25<30:23:14, 3.07it/s] 10%|▉ | 35602/371472 [2:52:25<28:29:28, 3.27it/s] 10%|▉ | 35603/371472 [2:52:26<28:28:10, 3.28it/s] 10%|▉ | 35604/371472 [2:52:26<27:07:21, 3.44it/s] 10%|▉ | 35605/371472 [2:52:26<28:35:33, 3.26it/s] 10%|▉ | 35606/371472 [2:52:27<28:02:48, 3.33it/s] 10%|▉ | 35607/371472 [2:52:27<29:49:33, 3.13it/s] 10%|▉ | 35608/371472 [2:52:27<28:03:21, 3.33it/s] 10%|▉ | 35609/371472 [2:52:28<26:52:46, 3.47it/s] 10%|▉ | 35610/371472 [2:52:28<26:17:27, 3.55it/s] 10%|▉ | 35611/371472 [2:52:28<25:38:37, 3.64it/s] 10%|▉ | 35612/371472 [2:52:28<25:15:14, 3.69it/s] 10%|▉ | 35613/371472 [2:52:29<24:41:03, 3.78it/s] 10%|▉ | 35614/371472 [2:52:29<25:14:09, 3.70it/s] 10%|▉ | 35615/371472 [2:52:29<25:33:59, 3.65it/s] 10%|▉ | 35616/371472 [2:52:29<25:10:01, 3.71it/s] 10%|▉ | 35617/371472 [2:52:30<26:29:44, 3.52it/s] 10%|▉ | 35618/371472 [2:52:30<25:51:13, 3.61it/s] 10%|▉ | 35619/371472 [2:52:30<25:35:19, 3.65it/s] 10%|▉ | 35620/371472 [2:52:31<28:17:44, 3.30it/s] {'loss': 4.3707, 'learning_rate': 9.141384214268784e-07, 'epoch': 1.53} + 10%|▉ | 35620/371472 [2:52:31<28:17:44, 3.30it/s] 10%|▉ | 35621/371472 [2:52:31<27:18:29, 3.42it/s] 10%|▉ | 35622/371472 [2:52:31<27:29:27, 3.39it/s] 10%|▉ | 35623/371472 [2:52:31<26:19:03, 3.54it/s] 10%|▉ | 35624/371472 [2:52:32<27:48:35, 3.35it/s] 10%|▉ | 35625/371472 [2:52:32<27:15:03, 3.42it/s] 10%|▉ | 35626/371472 [2:52:32<26:16:40, 3.55it/s] 10%|▉ | 35627/371472 [2:52:33<27:33:41, 3.38it/s] 10%|▉ | 35628/371472 [2:52:33<27:47:12, 3.36it/s] 10%|▉ | 35629/371472 [2:52:33<27:40:03, 3.37it/s] 10%|▉ | 35630/371472 [2:52:33<26:33:50, 3.51it/s] 10%|▉ | 35631/371472 [2:52:34<25:38:58, 3.64it/s] 10%|▉ | 35632/371472 [2:52:34<26:16:02, 3.55it/s] 10%|▉ | 35633/371472 [2:52:34<26:02:28, 3.58it/s] 10%|▉ | 35634/371472 [2:52:35<25:31:44, 3.65it/s] 10%|▉ | 35635/371472 [2:52:35<25:13:01, 3.70it/s] 10%|▉ | 35636/371472 [2:52:35<25:29:22, 3.66it/s] 10%|▉ | 35637/371472 [2:52:35<27:34:08, 3.38it/s] 10%|▉ | 35638/371472 [2:52:36<26:16:03, 3.55it/s] 10%|▉ | 35639/371472 [2:52:36<26:16:39, 3.55it/s] 10%|▉ | 35640/371472 [2:52:36<27:40:30, 3.37it/s] {'loss': 4.4415, 'learning_rate': 9.140899394513994e-07, 'epoch': 1.54} + 10%|▉ | 35640/371472 [2:52:36<27:40:30, 3.37it/s] 10%|▉ | 35641/371472 [2:52:37<26:16:10, 3.55it/s] 10%|▉ | 35642/371472 [2:52:37<27:39:47, 3.37it/s] 10%|▉ | 35643/371472 [2:52:37<26:47:16, 3.48it/s] 10%|▉ | 35644/371472 [2:52:37<26:05:06, 3.58it/s] 10%|▉ | 35645/371472 [2:52:38<25:24:49, 3.67it/s] 10%|▉ | 35646/371472 [2:52:38<26:42:20, 3.49it/s] 10%|▉ | 35647/371472 [2:52:38<27:19:12, 3.41it/s] 10%|▉ | 35648/371472 [2:52:39<28:24:08, 3.28it/s] 10%|▉ | 35649/371472 [2:52:39<27:52:14, 3.35it/s] 10%|▉ | 35650/371472 [2:52:39<27:06:06, 3.44it/s] 10%|▉ | 35651/371472 [2:52:39<26:23:18, 3.54it/s] 10%|▉ | 35652/371472 [2:52:40<26:27:56, 3.52it/s] 10%|▉ | 35653/371472 [2:52:40<26:09:54, 3.57it/s] 10%|▉ | 35654/371472 [2:52:40<25:14:09, 3.70it/s] 10%|▉ | 35655/371472 [2:52:41<26:06:00, 3.57it/s] 10%|▉ | 35656/371472 [2:52:41<26:03:48, 3.58it/s] 10%|▉ | 35657/371472 [2:52:41<26:15:30, 3.55it/s] 10%|▉ | 35658/371472 [2:52:41<26:13:28, 3.56it/s] 10%|▉ | 35659/371472 [2:52:42<26:18:55, 3.54it/s] 10%|▉ | 35660/371472 [2:52:42<25:32:13, 3.65it/s] {'loss': 4.385, 'learning_rate': 9.140414574759206e-07, 'epoch': 1.54} + 10%|▉ | 35660/371472 [2:52:42<25:32:13, 3.65it/s] 10%|▉ | 35661/371472 [2:52:42<24:57:56, 3.74it/s] 10%|▉ | 35662/371472 [2:52:42<24:45:20, 3.77it/s] 10%|▉ | 35663/371472 [2:52:43<25:50:58, 3.61it/s] 10%|▉ | 35664/371472 [2:52:43<25:20:00, 3.68it/s] 10%|▉ | 35665/371472 [2:52:43<25:20:23, 3.68it/s] 10%|▉ | 35666/371472 [2:52:44<25:11:19, 3.70it/s] 10%|▉ | 35667/371472 [2:52:44<24:54:56, 3.74it/s] 10%|▉ | 35668/371472 [2:52:44<27:26:29, 3.40it/s] 10%|▉ | 35669/371472 [2:52:44<26:24:59, 3.53it/s] 10%|▉ | 35670/371472 [2:52:45<27:23:47, 3.40it/s] 10%|▉ | 35671/371472 [2:52:45<27:25:33, 3.40it/s] 10%|▉ | 35672/371472 [2:52:45<27:00:00, 3.45it/s] 10%|▉ | 35673/371472 [2:52:46<26:08:05, 3.57it/s] 10%|▉ | 35674/371472 [2:52:46<28:09:33, 3.31it/s] 10%|▉ | 35675/371472 [2:52:46<27:45:51, 3.36it/s] 10%|▉ | 35676/371472 [2:52:46<26:55:37, 3.46it/s] 10%|▉ | 35677/371472 [2:52:47<26:31:15, 3.52it/s] 10%|▉ | 35678/371472 [2:52:47<26:34:53, 3.51it/s] 10%|▉ | 35679/371472 [2:52:47<26:14:26, 3.55it/s] 10%|▉ | 35680/371472 [2:52:48<25:17:49, 3.69it/s] {'loss': 4.3335, 'learning_rate': 9.139929755004417e-07, 'epoch': 1.54} + 10%|▉ | 35680/371472 [2:52:48<25:17:49, 3.69it/s] 10%|▉ | 35681/371472 [2:52:48<25:10:43, 3.70it/s] 10%|▉ | 35682/371472 [2:52:48<25:00:19, 3.73it/s] 10%|▉ | 35683/371472 [2:52:48<24:56:40, 3.74it/s] 10%|▉ | 35684/371472 [2:52:49<24:47:48, 3.76it/s] 10%|▉ | 35685/371472 [2:52:49<25:20:18, 3.68it/s] 10%|▉ | 35686/371472 [2:52:49<24:52:24, 3.75it/s] 10%|▉ | 35687/371472 [2:52:49<26:13:15, 3.56it/s] 10%|▉ | 35688/371472 [2:52:50<26:30:58, 3.52it/s] 10%|▉ | 35689/371472 [2:52:50<26:03:47, 3.58it/s] 10%|▉ | 35690/371472 [2:52:50<27:44:02, 3.36it/s] 10%|▉ | 35691/371472 [2:52:51<27:03:10, 3.45it/s] 10%|▉ | 35692/371472 [2:52:51<26:23:56, 3.53it/s] 10%|▉ | 35693/371472 [2:52:51<26:39:40, 3.50it/s] 10%|▉ | 35694/371472 [2:52:51<26:37:32, 3.50it/s] 10%|▉ | 35695/371472 [2:52:52<26:29:56, 3.52it/s] 10%|▉ | 35696/371472 [2:52:52<27:07:15, 3.44it/s] 10%|▉ | 35697/371472 [2:52:52<29:40:03, 3.14it/s] 10%|▉ | 35698/371472 [2:52:53<28:44:56, 3.24it/s] 10%|▉ | 35699/371472 [2:52:53<29:37:20, 3.15it/s] 10%|▉ | 35700/371472 [2:52:53<29:10:07, 3.20it/s] {'loss': 4.3402, 'learning_rate': 9.139444935249628e-07, 'epoch': 1.54} + 10%|▉ | 35700/371472 [2:52:53<29:10:07, 3.20it/s] 10%|▉ | 35701/371472 [2:52:54<28:20:14, 3.29it/s] 10%|▉ | 35702/371472 [2:52:54<27:10:57, 3.43it/s] 10%|▉ | 35703/371472 [2:52:54<26:17:35, 3.55it/s] 10%|▉ | 35704/371472 [2:52:54<25:54:51, 3.60it/s] 10%|▉ | 35705/371472 [2:52:55<25:50:24, 3.61it/s] 10%|▉ | 35706/371472 [2:52:55<26:04:56, 3.58it/s] 10%|▉ | 35707/371472 [2:52:55<25:56:10, 3.60it/s] 10%|▉ | 35708/371472 [2:52:56<25:27:59, 3.66it/s] 10%|▉ | 35709/371472 [2:52:56<31:28:52, 2.96it/s] 10%|▉ | 35710/371472 [2:52:56<32:30:27, 2.87it/s] 10%|▉ | 35711/371472 [2:52:57<29:36:39, 3.15it/s] 10%|▉ | 35712/371472 [2:52:57<29:16:06, 3.19it/s] 10%|▉ | 35713/371472 [2:52:57<27:59:06, 3.33it/s] 10%|▉ | 35714/371472 [2:52:58<27:41:07, 3.37it/s] 10%|▉ | 35715/371472 [2:52:58<27:41:54, 3.37it/s] 10%|▉ | 35716/371472 [2:52:58<27:59:37, 3.33it/s] 10%|▉ | 35717/371472 [2:52:58<29:25:10, 3.17it/s] 10%|▉ | 35718/371472 [2:52:59<29:17:18, 3.18it/s] 10%|▉ | 35719/371472 [2:52:59<28:52:39, 3.23it/s] 10%|▉ | 35720/371472 [2:52:59<27:39:48, 3.37it/s] {'loss': 4.6116, 'learning_rate': 9.138960115494839e-07, 'epoch': 1.54} + 10%|▉ | 35720/371472 [2:52:59<27:39:48, 3.37it/s] 10%|▉ | 35721/371472 [2:53:00<27:23:18, 3.41it/s] 10%|▉ | 35722/371472 [2:53:00<26:19:41, 3.54it/s] 10%|▉ | 35723/371472 [2:53:00<25:37:36, 3.64it/s] 10%|▉ | 35724/371472 [2:53:00<25:13:17, 3.70it/s] 10%|▉ | 35725/371472 [2:53:01<26:08:26, 3.57it/s] 10%|▉ | 35726/371472 [2:53:01<25:42:05, 3.63it/s] 10%|▉ | 35727/371472 [2:53:01<25:02:51, 3.72it/s] 10%|▉ | 35728/371472 [2:53:02<24:56:14, 3.74it/s] 10%|▉ | 35729/371472 [2:53:02<26:10:30, 3.56it/s] 10%|▉ | 35730/371472 [2:53:02<26:18:09, 3.55it/s] 10%|▉ | 35731/371472 [2:53:02<26:07:44, 3.57it/s] 10%|▉ | 35732/371472 [2:53:03<26:45:30, 3.49it/s] 10%|▉ | 35733/371472 [2:53:03<26:55:31, 3.46it/s] 10%|▉ | 35734/371472 [2:53:03<27:17:46, 3.42it/s] 10%|▉ | 35735/371472 [2:53:04<26:46:34, 3.48it/s] 10%|▉ | 35736/371472 [2:53:04<26:39:00, 3.50it/s] 10%|▉ | 35737/371472 [2:53:04<26:44:19, 3.49it/s] 10%|▉ | 35738/371472 [2:53:04<26:25:08, 3.53it/s] 10%|▉ | 35739/371472 [2:53:05<27:12:49, 3.43it/s] 10%|▉ | 35740/371472 [2:53:05<26:51:55, 3.47it/s] {'loss': 4.6895, 'learning_rate': 9.13847529574005e-07, 'epoch': 1.54} + 10%|▉ | 35740/371472 [2:53:05<26:51:55, 3.47it/s] 10%|▉ | 35741/371472 [2:53:05<28:20:40, 3.29it/s] 10%|▉ | 35742/371472 [2:53:06<28:17:10, 3.30it/s] 10%|▉ | 35743/371472 [2:53:06<27:52:30, 3.35it/s] 10%|▉ | 35744/371472 [2:53:06<27:23:43, 3.40it/s] 10%|▉ | 35745/371472 [2:53:06<26:37:04, 3.50it/s] 10%|▉ | 35746/371472 [2:53:07<26:17:31, 3.55it/s] 10%|▉ | 35747/371472 [2:53:07<26:32:48, 3.51it/s] 10%|▉ | 35748/371472 [2:53:07<27:29:31, 3.39it/s] 10%|▉ | 35749/371472 [2:53:08<27:29:50, 3.39it/s] 10%|▉ | 35750/371472 [2:53:08<27:44:38, 3.36it/s] 10%|▉ | 35751/371472 [2:53:08<27:12:18, 3.43it/s] 10%|▉ | 35752/371472 [2:53:09<27:33:44, 3.38it/s] 10%|▉ | 35753/371472 [2:53:09<27:03:49, 3.45it/s] 10%|▉ | 35754/371472 [2:53:09<26:35:31, 3.51it/s] 10%|▉ | 35755/371472 [2:53:09<26:26:49, 3.53it/s] 10%|▉ | 35756/371472 [2:53:10<28:24:15, 3.28it/s] 10%|▉ | 35757/371472 [2:53:10<28:12:33, 3.31it/s] 10%|▉ | 35758/371472 [2:53:10<27:31:11, 3.39it/s] 10%|▉ | 35759/371472 [2:53:11<28:38:09, 3.26it/s] 10%|▉ | 35760/371472 [2:53:11<27:23:47, 3.40it/s] {'loss': 4.5757, 'learning_rate': 9.137990475985261e-07, 'epoch': 1.54} + 10%|▉ | 35760/371472 [2:53:11<27:23:47, 3.40it/s] 10%|▉ | 35761/371472 [2:53:11<27:17:04, 3.42it/s] 10%|▉ | 35762/371472 [2:53:11<27:06:37, 3.44it/s] 10%|▉ | 35763/371472 [2:53:12<27:04:57, 3.44it/s] 10%|▉ | 35764/371472 [2:53:12<27:01:16, 3.45it/s] 10%|▉ | 35765/371472 [2:53:12<26:50:35, 3.47it/s] 10%|▉ | 35766/371472 [2:53:13<27:12:00, 3.43it/s] 10%|▉ | 35767/371472 [2:53:13<27:00:23, 3.45it/s] 10%|▉ | 35768/371472 [2:53:13<26:42:16, 3.49it/s] 10%|▉ | 35769/371472 [2:53:14<27:33:06, 3.38it/s] 10%|▉ | 35770/371472 [2:53:14<27:24:51, 3.40it/s] 10%|▉ | 35771/371472 [2:53:14<26:12:30, 3.56it/s] 10%|▉ | 35772/371472 [2:53:14<26:50:14, 3.47it/s] 10%|▉ | 35773/371472 [2:53:15<26:27:22, 3.52it/s] 10%|▉ | 35774/371472 [2:53:15<25:38:38, 3.64it/s] 10%|▉ | 35775/371472 [2:53:15<25:28:00, 3.66it/s] 10%|▉ | 35776/371472 [2:53:15<25:24:08, 3.67it/s] 10%|▉ | 35777/371472 [2:53:16<26:27:40, 3.52it/s] 10%|▉ | 35778/371472 [2:53:16<27:21:15, 3.41it/s] 10%|▉ | 35779/371472 [2:53:16<29:15:12, 3.19it/s] 10%|▉ | 35780/371472 [2:53:17<27:41:59, 3.37it/s] {'loss': 4.5924, 'learning_rate': 9.137505656230472e-07, 'epoch': 1.54} + 10%|▉ | 35780/371472 [2:53:17<27:41:59, 3.37it/s] 10%|▉ | 35781/371472 [2:53:17<27:12:51, 3.43it/s] 10%|▉ | 35782/371472 [2:53:17<26:38:35, 3.50it/s] 10%|▉ | 35783/371472 [2:53:18<26:55:58, 3.46it/s] 10%|▉ | 35784/371472 [2:53:18<26:13:11, 3.56it/s] 10%|▉ | 35785/371472 [2:53:18<27:56:13, 3.34it/s] 10%|▉ | 35786/371472 [2:53:18<26:44:00, 3.49it/s] 10%|▉ | 35787/371472 [2:53:19<28:34:10, 3.26it/s] 10%|▉ | 35788/371472 [2:53:19<26:50:51, 3.47it/s] 10%|▉ | 35789/371472 [2:53:19<26:24:16, 3.53it/s] 10%|▉ | 35790/371472 [2:53:20<26:34:26, 3.51it/s] 10%|▉ | 35791/371472 [2:53:20<26:35:17, 3.51it/s] 10%|▉ | 35792/371472 [2:53:20<27:56:42, 3.34it/s] 10%|▉ | 35793/371472 [2:53:20<26:34:14, 3.51it/s] 10%|▉ | 35794/371472 [2:53:21<28:00:48, 3.33it/s] 10%|▉ | 35795/371472 [2:53:21<27:11:32, 3.43it/s] 10%|▉ | 35796/371472 [2:53:21<28:09:24, 3.31it/s] 10%|▉ | 35797/371472 [2:53:22<27:05:15, 3.44it/s] 10%|▉ | 35798/371472 [2:53:22<28:11:25, 3.31it/s] 10%|▉ | 35799/371472 [2:53:22<28:46:07, 3.24it/s] 10%|▉ | 35800/371472 [2:53:23<27:13:44, 3.42it/s] {'loss': 4.5913, 'learning_rate': 9.137020836475683e-07, 'epoch': 1.54} + 10%|▉ | 35800/371472 [2:53:23<27:13:44, 3.42it/s] 10%|▉ | 35801/371472 [2:53:23<26:47:19, 3.48it/s] 10%|▉ | 35802/371472 [2:53:23<25:47:40, 3.61it/s] 10%|▉ | 35803/371472 [2:53:23<26:25:17, 3.53it/s] 10%|▉ | 35804/371472 [2:53:24<26:46:43, 3.48it/s] 10%|▉ | 35805/371472 [2:53:24<26:23:52, 3.53it/s] 10%|▉ | 35806/371472 [2:53:24<27:30:03, 3.39it/s] 10%|▉ | 35807/371472 [2:53:25<27:59:35, 3.33it/s] 10%|▉ | 35808/371472 [2:53:25<29:25:38, 3.17it/s] 10%|▉ | 35809/371472 [2:53:25<27:25:20, 3.40it/s] 10%|▉ | 35810/371472 [2:53:25<26:49:33, 3.48it/s] 10%|▉ | 35811/371472 [2:53:26<26:11:59, 3.56it/s] 10%|▉ | 35812/371472 [2:53:26<26:07:35, 3.57it/s] 10%|▉ | 35813/371472 [2:53:26<28:03:02, 3.32it/s] 10%|▉ | 35814/371472 [2:53:27<28:29:50, 3.27it/s] 10%|▉ | 35815/371472 [2:53:27<27:46:37, 3.36it/s] 10%|▉ | 35816/371472 [2:53:27<28:09:29, 3.31it/s] 10%|▉ | 35817/371472 [2:53:27<27:29:48, 3.39it/s] 10%|▉ | 35818/371472 [2:53:28<26:19:26, 3.54it/s] 10%|▉ | 35819/371472 [2:53:28<25:51:47, 3.60it/s] 10%|▉ | 35820/371472 [2:53:28<26:29:16, 3.52it/s] {'loss': 4.5572, 'learning_rate': 9.136536016720894e-07, 'epoch': 1.54} + 10%|▉ | 35820/371472 [2:53:28<26:29:16, 3.52it/s] 10%|▉ | 35821/371472 [2:53:29<26:17:37, 3.55it/s] 10%|▉ | 35822/371472 [2:53:29<25:34:29, 3.65it/s] 10%|▉ | 35823/371472 [2:53:29<26:25:52, 3.53it/s] 10%|▉ | 35824/371472 [2:53:29<26:00:52, 3.58it/s] 10%|▉ | 35825/371472 [2:53:30<25:18:34, 3.68it/s] 10%|▉ | 35826/371472 [2:53:30<25:28:05, 3.66it/s] 10%|▉ | 35827/371472 [2:53:30<25:05:19, 3.72it/s] 10%|▉ | 35828/371472 [2:53:30<25:39:10, 3.63it/s] 10%|▉ | 35829/371472 [2:53:31<26:39:46, 3.50it/s] 10%|▉ | 35830/371472 [2:53:31<26:48:47, 3.48it/s] 10%|▉ | 35831/371472 [2:53:31<27:10:33, 3.43it/s] 10%|▉ | 35832/371472 [2:53:32<26:50:24, 3.47it/s] 10%|▉ | 35833/371472 [2:53:32<27:50:14, 3.35it/s] 10%|▉ | 35834/371472 [2:53:32<26:45:12, 3.48it/s] 10%|▉ | 35835/371472 [2:53:33<26:37:47, 3.50it/s] 10%|▉ | 35836/371472 [2:53:33<28:16:04, 3.30it/s] 10%|▉ | 35837/371472 [2:53:33<28:19:33, 3.29it/s] 10%|▉ | 35838/371472 [2:53:33<28:19:58, 3.29it/s] 10%|▉ | 35839/371472 [2:53:34<27:15:21, 3.42it/s] 10%|▉ | 35840/371472 [2:53:34<26:06:24, 3.57it/s] {'loss': 4.6149, 'learning_rate': 9.136051196966106e-07, 'epoch': 1.54} + 10%|▉ | 35840/371472 [2:53:34<26:06:24, 3.57it/s] 10%|▉ | 35841/371472 [2:53:34<25:41:02, 3.63it/s] 10%|▉ | 35842/371472 [2:53:35<26:10:07, 3.56it/s] 10%|▉ | 35843/371472 [2:53:35<26:09:17, 3.56it/s] 10%|▉ | 35844/371472 [2:53:35<26:47:14, 3.48it/s] 10%|▉ | 35845/371472 [2:53:35<26:42:11, 3.49it/s] 10%|▉ | 35846/371472 [2:53:36<29:15:57, 3.19it/s] 10%|▉ | 35847/371472 [2:53:36<28:58:01, 3.22it/s] 10%|▉ | 35848/371472 [2:53:36<27:12:58, 3.43it/s] 10%|▉ | 35849/371472 [2:53:37<26:18:55, 3.54it/s] 10%|▉ | 35850/371472 [2:53:37<26:04:18, 3.58it/s] 10%|▉ | 35851/371472 [2:53:37<25:47:14, 3.62it/s] 10%|▉ | 35852/371472 [2:53:37<25:27:49, 3.66it/s] 10%|▉ | 35853/371472 [2:53:38<25:52:43, 3.60it/s] 10%|▉ | 35854/371472 [2:53:38<26:11:24, 3.56it/s] 10%|▉ | 35855/371472 [2:53:38<25:22:53, 3.67it/s] 10%|▉ | 35856/371472 [2:53:39<25:12:23, 3.70it/s] 10%|▉ | 35857/371472 [2:53:39<25:54:39, 3.60it/s] 10%|▉ | 35858/371472 [2:53:39<25:18:37, 3.68it/s] 10%|▉ | 35859/371472 [2:53:39<25:41:41, 3.63it/s] 10%|��� | 35860/371472 [2:53:40<25:02:13, 3.72it/s] {'loss': 4.5057, 'learning_rate': 9.135566377211316e-07, 'epoch': 1.54} + 10%|▉ | 35860/371472 [2:53:40<25:02:13, 3.72it/s] 10%|▉ | 35861/371472 [2:53:40<24:37:30, 3.79it/s] 10%|▉ | 35862/371472 [2:53:40<24:47:27, 3.76it/s] 10%|▉ | 35863/371472 [2:53:40<25:21:37, 3.68it/s] 10%|▉ | 35864/371472 [2:53:41<27:11:57, 3.43it/s] 10%|▉ | 35865/371472 [2:53:41<26:18:05, 3.54it/s] 10%|▉ | 35866/371472 [2:53:41<26:22:05, 3.54it/s] 10%|▉ | 35867/371472 [2:53:42<26:16:57, 3.55it/s] 10%|▉ | 35868/371472 [2:53:42<27:28:16, 3.39it/s] 10%|▉ | 35869/371472 [2:53:42<27:29:35, 3.39it/s] 10%|▉ | 35870/371472 [2:53:42<27:06:23, 3.44it/s] 10%|▉ | 35871/371472 [2:53:43<27:53:42, 3.34it/s] 10%|▉ | 35872/371472 [2:53:43<27:48:35, 3.35it/s] 10%|▉ | 35873/371472 [2:53:43<28:58:16, 3.22it/s] 10%|▉ | 35874/371472 [2:53:44<29:29:58, 3.16it/s] 10%|▉ | 35875/371472 [2:53:44<28:25:25, 3.28it/s] 10%|▉ | 35876/371472 [2:53:44<27:08:29, 3.43it/s] 10%|▉ | 35877/371472 [2:53:45<26:38:02, 3.50it/s] 10%|▉ | 35878/371472 [2:53:45<26:57:23, 3.46it/s] 10%|▉ | 35879/371472 [2:53:45<27:02:44, 3.45it/s] 10%|▉ | 35880/371472 [2:53:45<26:23:33, 3.53it/s] {'loss': 4.5371, 'learning_rate': 9.135081557456528e-07, 'epoch': 1.55} + 10%|▉ | 35880/371472 [2:53:45<26:23:33, 3.53it/s] 10%|▉ | 35881/371472 [2:53:46<25:36:26, 3.64it/s] 10%|▉ | 35882/371472 [2:53:46<25:12:51, 3.70it/s] 10%|▉ | 35883/371472 [2:53:46<25:36:42, 3.64it/s] 10%|▉ | 35884/371472 [2:53:47<25:52:41, 3.60it/s] 10%|▉ | 35885/371472 [2:53:47<27:12:00, 3.43it/s] 10%|▉ | 35886/371472 [2:53:47<26:10:12, 3.56it/s] 10%|▉ | 35887/371472 [2:53:47<26:17:02, 3.55it/s] 10%|▉ | 35888/371472 [2:53:48<27:49:31, 3.35it/s] 10%|▉ | 35889/371472 [2:53:48<28:31:04, 3.27it/s] 10%|▉ | 35890/371472 [2:53:48<27:07:07, 3.44it/s] 10%|▉ | 35891/371472 [2:53:49<27:30:56, 3.39it/s] 10%|▉ | 35892/371472 [2:53:49<27:31:58, 3.39it/s] 10%|▉ | 35893/371472 [2:53:49<26:57:37, 3.46it/s] 10%|▉ | 35894/371472 [2:53:50<28:58:07, 3.22it/s] 10%|▉ | 35895/371472 [2:53:50<28:14:10, 3.30it/s] 10%|▉ | 35896/371472 [2:53:50<26:49:31, 3.47it/s] 10%|▉ | 35897/371472 [2:53:50<26:11:26, 3.56it/s] 10%|▉ | 35898/371472 [2:53:51<26:19:34, 3.54it/s] 10%|▉ | 35899/371472 [2:53:51<26:20:07, 3.54it/s] 10%|▉ | 35900/371472 [2:53:51<25:54:22, 3.60it/s] {'loss': 4.4889, 'learning_rate': 9.134596737701738e-07, 'epoch': 1.55} + 10%|▉ | 35900/371472 [2:53:51<25:54:22, 3.60it/s] 10%|▉ | 35901/371472 [2:53:51<25:01:43, 3.72it/s] 10%|▉ | 35902/371472 [2:53:52<24:17:52, 3.84it/s] 10%|▉ | 35903/371472 [2:53:52<24:03:13, 3.88it/s] 10%|▉ | 35904/371472 [2:53:52<24:51:00, 3.75it/s] 10%|▉ | 35905/371472 [2:53:52<24:36:45, 3.79it/s] 10%|▉ | 35906/371472 [2:53:53<26:50:02, 3.47it/s] 10%|▉ | 35907/371472 [2:53:53<28:22:28, 3.29it/s] 10%|▉ | 35908/371472 [2:53:53<27:58:45, 3.33it/s] 10%|▉ | 35909/371472 [2:53:54<28:06:41, 3.32it/s] 10%|▉ | 35910/371472 [2:53:54<28:04:06, 3.32it/s] 10%|▉ | 35911/371472 [2:53:54<27:13:55, 3.42it/s] 10%|▉ | 35912/371472 [2:53:55<26:46:21, 3.48it/s] 10%|▉ | 35913/371472 [2:53:55<26:15:32, 3.55it/s] 10%|▉ | 35914/371472 [2:53:55<27:38:06, 3.37it/s] 10%|▉ | 35915/371472 [2:53:55<27:25:50, 3.40it/s] 10%|▉ | 35916/371472 [2:53:56<26:20:06, 3.54it/s] 10%|▉ | 35917/371472 [2:53:56<25:45:10, 3.62it/s] 10%|▉ | 35918/371472 [2:53:56<25:54:02, 3.60it/s] 10%|▉ | 35919/371472 [2:53:57<25:30:22, 3.65it/s] 10%|▉ | 35920/371472 [2:53:57<26:05:59, 3.57it/s] {'loss': 4.6459, 'learning_rate': 9.134111917946949e-07, 'epoch': 1.55} + 10%|▉ | 35920/371472 [2:53:57<26:05:59, 3.57it/s] 10%|▉ | 35921/371472 [2:53:57<26:19:03, 3.54it/s] 10%|▉ | 35922/371472 [2:53:57<27:21:24, 3.41it/s] 10%|▉ | 35923/371472 [2:53:58<26:38:57, 3.50it/s] 10%|▉ | 35924/371472 [2:53:58<25:46:47, 3.62it/s] 10%|▉ | 35925/371472 [2:53:58<27:15:06, 3.42it/s] 10%|▉ | 35926/371472 [2:53:59<27:18:46, 3.41it/s] 10%|▉ | 35927/371472 [2:53:59<27:28:35, 3.39it/s] 10%|▉ | 35928/371472 [2:53:59<28:31:36, 3.27it/s] 10%|▉ | 35929/371472 [2:54:00<27:33:17, 3.38it/s] 10%|▉ | 35930/371472 [2:54:00<26:54:18, 3.46it/s] 10%|▉ | 35931/371472 [2:54:00<27:03:23, 3.44it/s] 10%|▉ | 35932/371472 [2:54:00<26:56:09, 3.46it/s] 10%|▉ | 35933/371472 [2:54:01<28:13:21, 3.30it/s] 10%|▉ | 35934/371472 [2:54:01<30:40:35, 3.04it/s] 10%|▉ | 35935/371472 [2:54:01<30:16:56, 3.08it/s] 10%|▉ | 35936/371472 [2:54:02<30:01:28, 3.10it/s] 10%|▉ | 35937/371472 [2:54:02<28:26:31, 3.28it/s] 10%|▉ | 35938/371472 [2:54:02<27:02:36, 3.45it/s] 10%|▉ | 35939/371472 [2:54:03<26:48:49, 3.48it/s] 10%|▉ | 35940/371472 [2:54:03<25:46:50, 3.62it/s] {'loss': 4.2167, 'learning_rate': 9.13362709819216e-07, 'epoch': 1.55} + 10%|▉ | 35940/371472 [2:54:03<25:46:50, 3.62it/s] 10%|▉ | 35941/371472 [2:54:03<25:22:14, 3.67it/s] 10%|▉ | 35942/371472 [2:54:03<26:23:42, 3.53it/s] 10%|▉ | 35943/371472 [2:54:04<26:18:40, 3.54it/s] 10%|▉ | 35944/371472 [2:54:04<26:25:14, 3.53it/s] 10%|▉ | 35945/371472 [2:54:04<27:54:41, 3.34it/s] 10%|▉ | 35946/371472 [2:54:05<26:50:16, 3.47it/s] 10%|▉ | 35947/371472 [2:54:05<26:31:30, 3.51it/s] 10%|▉ | 35948/371472 [2:54:05<27:00:36, 3.45it/s] 10%|▉ | 35949/371472 [2:54:05<26:15:45, 3.55it/s] 10%|▉ | 35950/371472 [2:54:06<27:50:42, 3.35it/s] 10%|▉ | 35951/371472 [2:54:06<27:28:03, 3.39it/s] 10%|▉ | 35952/371472 [2:54:06<26:48:17, 3.48it/s] 10%|▉ | 35953/371472 [2:54:07<27:09:38, 3.43it/s] 10%|▉ | 35954/371472 [2:54:07<26:55:52, 3.46it/s] 10%|▉ | 35955/371472 [2:54:07<26:08:06, 3.57it/s] 10%|▉ | 35956/371472 [2:54:07<26:59:13, 3.45it/s] 10%|▉ | 35957/371472 [2:54:08<26:21:53, 3.53it/s] 10%|▉ | 35958/371472 [2:54:08<25:37:08, 3.64it/s] 10%|▉ | 35959/371472 [2:54:08<24:51:16, 3.75it/s] 10%|▉ | 35960/371472 [2:54:08<25:34:10, 3.64it/s] {'loss': 4.312, 'learning_rate': 9.133142278437372e-07, 'epoch': 1.55} + 10%|▉ | 35960/371472 [2:54:08<25:34:10, 3.64it/s] 10%|▉ | 35961/371472 [2:54:09<26:07:13, 3.57it/s] 10%|▉ | 35962/371472 [2:54:09<25:42:10, 3.63it/s] 10%|▉ | 35963/371472 [2:54:09<24:52:47, 3.75it/s] 10%|▉ | 35964/371472 [2:54:10<24:35:50, 3.79it/s] 10%|▉ | 35965/371472 [2:54:10<25:13:53, 3.69it/s] 10%|▉ | 35966/371472 [2:54:10<26:06:25, 3.57it/s] 10%|▉ | 35967/371472 [2:54:10<26:02:31, 3.58it/s] 10%|▉ | 35968/371472 [2:54:11<28:03:05, 3.32it/s] 10%|▉ | 35969/371472 [2:54:11<28:34:31, 3.26it/s] 10%|▉ | 35970/371472 [2:54:11<27:54:01, 3.34it/s] 10%|▉ | 35971/371472 [2:54:12<27:16:39, 3.42it/s] 10%|▉ | 35972/371472 [2:54:12<26:53:09, 3.47it/s] 10%|▉ | 35973/371472 [2:54:12<28:14:30, 3.30it/s] 10%|▉ | 35974/371472 [2:54:13<28:21:14, 3.29it/s] 10%|▉ | 35975/371472 [2:54:13<29:07:42, 3.20it/s] 10%|▉ | 35976/371472 [2:54:13<29:28:13, 3.16it/s] 10%|▉ | 35977/371472 [2:54:13<28:16:05, 3.30it/s] 10%|▉ | 35978/371472 [2:54:14<27:54:45, 3.34it/s] 10%|▉ | 35979/371472 [2:54:14<27:25:08, 3.40it/s] 10%|▉ | 35980/371472 [2:54:14<26:57:13, 3.46it/s] {'loss': 4.2124, 'learning_rate': 9.132657458682583e-07, 'epoch': 1.55} + 10%|▉ | 35980/371472 [2:54:14<26:57:13, 3.46it/s] 10%|▉ | 35981/371472 [2:54:15<26:22:36, 3.53it/s] 10%|▉ | 35982/371472 [2:54:15<25:31:31, 3.65it/s] 10%|▉ | 35983/371472 [2:54:15<26:09:39, 3.56it/s] 10%|▉ | 35984/371472 [2:54:16<28:36:57, 3.26it/s] 10%|▉ | 35985/371472 [2:54:16<28:13:12, 3.30it/s] 10%|▉ | 35986/371472 [2:54:16<27:20:02, 3.41it/s] 10%|▉ | 35987/371472 [2:54:16<27:24:06, 3.40it/s] 10%|▉ | 35988/371472 [2:54:17<26:28:58, 3.52it/s] 10%|▉ | 35989/371472 [2:54:17<25:30:47, 3.65it/s] 10%|▉ | 35990/371472 [2:54:17<25:44:34, 3.62it/s] 10%|▉ | 35991/371472 [2:54:17<25:53:01, 3.60it/s] 10%|▉ | 35992/371472 [2:54:18<25:12:32, 3.70it/s] 10%|▉ | 35993/371472 [2:54:18<26:47:48, 3.48it/s] 10%|▉ | 35994/371472 [2:54:18<26:19:44, 3.54it/s] 10%|▉ | 35995/371472 [2:54:19<28:34:43, 3.26it/s] 10%|▉ | 35996/371472 [2:54:19<27:53:24, 3.34it/s] 10%|▉ | 35997/371472 [2:54:19<29:12:56, 3.19it/s] 10%|▉ | 35998/371472 [2:54:20<28:03:15, 3.32it/s] 10%|▉ | 35999/371472 [2:54:20<28:34:23, 3.26it/s] 10%|▉ | 36000/371472 [2:54:20<29:24:07, 3.17it/s] {'loss': 4.6083, 'learning_rate': 9.132172638927794e-07, 'epoch': 1.55} + 10%|▉ | 36000/371472 [2:54:20<29:24:07, 3.17it/s] 10%|▉ | 36001/371472 [2:54:21<29:05:44, 3.20it/s] 10%|▉ | 36002/371472 [2:54:21<27:24:23, 3.40it/s] 10%|▉ | 36003/371472 [2:54:21<26:49:40, 3.47it/s] 10%|▉ | 36004/371472 [2:54:21<27:04:48, 3.44it/s] 10%|▉ | 36005/371472 [2:54:22<26:59:25, 3.45it/s] 10%|▉ | 36006/371472 [2:54:22<26:28:56, 3.52it/s] 10%|▉ | 36007/371472 [2:54:22<25:26:17, 3.66it/s] 10%|▉ | 36008/371472 [2:54:22<26:00:56, 3.58it/s] 10%|▉ | 36009/371472 [2:54:23<27:07:12, 3.44it/s] 10%|▉ | 36010/371472 [2:54:23<26:16:54, 3.55it/s] 10%|▉ | 36011/371472 [2:54:23<27:42:00, 3.36it/s] 10%|▉ | 36012/371472 [2:54:24<26:43:48, 3.49it/s] 10%|▉ | 36013/371472 [2:54:24<25:51:24, 3.60it/s] 10%|▉ | 36014/371472 [2:54:24<25:55:32, 3.59it/s] 10%|▉ | 36015/371472 [2:54:24<27:47:44, 3.35it/s] 10%|▉ | 36016/371472 [2:54:25<26:58:40, 3.45it/s] 10%|▉ | 36017/371472 [2:54:25<26:15:24, 3.55it/s] 10%|▉ | 36018/371472 [2:54:25<26:10:24, 3.56it/s] 10%|▉ | 36019/371472 [2:54:26<25:29:39, 3.65it/s] 10%|▉ | 36020/371472 [2:54:26<25:31:37, 3.65it/s] {'loss': 4.5389, 'learning_rate': 9.131687819173004e-07, 'epoch': 1.55} + 10%|▉ | 36020/371472 [2:54:26<25:31:37, 3.65it/s] 10%|▉ | 36021/371472 [2:54:26<25:58:01, 3.59it/s] 10%|▉ | 36022/371472 [2:54:26<26:14:21, 3.55it/s] 10%|▉ | 36023/371472 [2:54:27<25:48:45, 3.61it/s] 10%|▉ | 36024/371472 [2:54:27<27:01:20, 3.45it/s] 10%|▉ | 36025/371472 [2:54:27<28:36:33, 3.26it/s] 10%|▉ | 36026/371472 [2:54:28<29:14:01, 3.19it/s] 10%|▉ | 36027/371472 [2:54:28<27:57:04, 3.33it/s] 10%|▉ | 36028/371472 [2:54:28<27:52:02, 3.34it/s] 10%|▉ | 36029/371472 [2:54:28<26:34:29, 3.51it/s] 10%|▉ | 36030/371472 [2:54:29<28:52:28, 3.23it/s] 10%|▉ | 36031/371472 [2:54:29<27:01:43, 3.45it/s] 10%|▉ | 36032/371472 [2:54:29<28:06:21, 3.32it/s] 10%|▉ | 36033/371472 [2:54:30<26:40:07, 3.49it/s] 10%|▉ | 36034/371472 [2:54:30<26:40:27, 3.49it/s] 10%|▉ | 36035/371472 [2:54:30<25:45:04, 3.62it/s] 10%|▉ | 36036/371472 [2:54:31<26:40:00, 3.49it/s] 10%|▉ | 36037/371472 [2:54:31<26:20:24, 3.54it/s] 10%|▉ | 36038/371472 [2:54:31<25:55:44, 3.59it/s] 10%|▉ | 36039/371472 [2:54:31<26:03:58, 3.57it/s] 10%|▉ | 36040/371472 [2:54:32<28:15:21, 3.30it/s] {'loss': 4.2441, 'learning_rate': 9.131202999418216e-07, 'epoch': 1.55} + 10%|▉ | 36040/371472 [2:54:32<28:15:21, 3.30it/s] 10%|▉ | 36041/371472 [2:54:32<27:28:17, 3.39it/s] 10%|▉ | 36042/371472 [2:54:32<28:07:27, 3.31it/s] 10%|▉ | 36043/371472 [2:54:33<28:02:57, 3.32it/s] 10%|▉ | 36044/371472 [2:54:33<30:54:51, 3.01it/s] 10%|▉ | 36045/371472 [2:54:33<31:04:21, 3.00it/s] 10%|▉ | 36046/371472 [2:54:34<31:05:20, 3.00it/s] 10%|▉ | 36047/371472 [2:54:34<29:21:28, 3.17it/s] 10%|▉ | 36048/371472 [2:54:34<29:37:59, 3.14it/s] 10%|▉ | 36049/371472 [2:54:35<27:51:50, 3.34it/s] 10%|▉ | 36050/371472 [2:54:35<27:38:20, 3.37it/s] 10%|▉ | 36051/371472 [2:54:35<26:41:58, 3.49it/s] 10%|▉ | 36052/371472 [2:54:35<27:41:08, 3.37it/s] 10%|▉ | 36053/371472 [2:54:36<26:42:55, 3.49it/s] 10%|▉ | 36054/371472 [2:54:36<27:46:01, 3.36it/s] 10%|▉ | 36055/371472 [2:54:36<26:46:23, 3.48it/s] 10%|▉ | 36056/371472 [2:54:37<28:19:50, 3.29it/s] 10%|▉ | 36057/371472 [2:54:37<27:22:01, 3.40it/s] 10%|▉ | 36058/371472 [2:54:37<27:04:54, 3.44it/s] 10%|▉ | 36059/371472 [2:54:37<27:03:57, 3.44it/s] 10%|▉ | 36060/371472 [2:54:38<27:06:00, 3.44it/s] {'loss': 4.3556, 'learning_rate': 9.130718179663427e-07, 'epoch': 1.55} + 10%|▉ | 36060/371472 [2:54:38<27:06:00, 3.44it/s] 10%|▉ | 36061/371472 [2:54:38<27:20:08, 3.41it/s] 10%|▉ | 36062/371472 [2:54:38<27:14:25, 3.42it/s] 10%|▉ | 36063/371472 [2:54:39<26:42:46, 3.49it/s] 10%|▉ | 36064/371472 [2:54:39<27:42:37, 3.36it/s] 10%|▉ | 36065/371472 [2:54:39<27:09:33, 3.43it/s] 10%|▉ | 36066/371472 [2:54:39<27:01:56, 3.45it/s] 10%|▉ | 36067/371472 [2:54:40<26:49:45, 3.47it/s] 10%|▉ | 36068/371472 [2:54:40<28:25:21, 3.28it/s] 10%|▉ | 36069/371472 [2:54:40<28:12:41, 3.30it/s] 10%|▉ | 36070/371472 [2:54:41<27:27:16, 3.39it/s] 10%|▉ | 36071/371472 [2:54:41<26:47:41, 3.48it/s] 10%|▉ | 36072/371472 [2:54:41<26:17:58, 3.54it/s] 10%|��� | 36073/371472 [2:54:42<26:25:18, 3.53it/s] 10%|▉ | 36074/371472 [2:54:42<26:25:26, 3.53it/s] 10%|▉ | 36075/371472 [2:54:42<25:43:08, 3.62it/s] 10%|▉ | 36076/371472 [2:54:42<25:10:08, 3.70it/s] 10%|▉ | 36077/371472 [2:54:43<24:37:20, 3.78it/s] 10%|▉ | 36078/371472 [2:54:43<24:34:57, 3.79it/s] 10%|▉ | 36079/371472 [2:54:43<26:27:47, 3.52it/s] 10%|▉ | 36080/371472 [2:54:43<26:31:41, 3.51it/s] {'loss': 4.3249, 'learning_rate': 9.130233359908638e-07, 'epoch': 1.55} + 10%|▉ | 36080/371472 [2:54:43<26:31:41, 3.51it/s] 10%|▉ | 36081/371472 [2:54:44<27:25:39, 3.40it/s] 10%|▉ | 36082/371472 [2:54:44<27:21:55, 3.40it/s] 10%|▉ | 36083/371472 [2:54:44<27:05:24, 3.44it/s] 10%|▉ | 36084/371472 [2:54:45<26:27:02, 3.52it/s] 10%|▉ | 36085/371472 [2:54:45<27:10:03, 3.43it/s] 10%|▉ | 36086/371472 [2:54:45<26:40:42, 3.49it/s] 10%|▉ | 36087/371472 [2:54:45<25:59:01, 3.59it/s] 10%|▉ | 36088/371472 [2:54:46<25:52:38, 3.60it/s] 10%|▉ | 36089/371472 [2:54:46<26:38:51, 3.50it/s] 10%|▉ | 36090/371472 [2:54:46<26:03:10, 3.58it/s] 10%|▉ | 36091/371472 [2:54:47<25:45:40, 3.62it/s] 10%|▉ | 36092/371472 [2:54:47<24:51:54, 3.75it/s] 10%|▉ | 36093/371472 [2:54:47<24:44:55, 3.76it/s] 10%|▉ | 36094/371472 [2:54:47<25:10:27, 3.70it/s] 10%|▉ | 36095/371472 [2:54:48<25:24:19, 3.67it/s] 10%|▉ | 36096/371472 [2:54:48<26:05:41, 3.57it/s] 10%|▉ | 36097/371472 [2:54:48<26:22:01, 3.53it/s] 10%|▉ | 36098/371472 [2:54:48<25:47:27, 3.61it/s] 10%|▉ | 36099/371472 [2:54:49<26:09:52, 3.56it/s] 10%|▉ | 36100/371472 [2:54:49<27:17:36, 3.41it/s] {'loss': 4.3299, 'learning_rate': 9.129748540153849e-07, 'epoch': 1.55} + 10%|▉ | 36100/371472 [2:54:49<27:17:36, 3.41it/s] 10%|▉ | 36101/371472 [2:54:49<27:10:15, 3.43it/s] 10%|▉ | 36102/371472 [2:54:50<26:48:22, 3.48it/s] 10%|▉ | 36103/371472 [2:54:50<34:39:16, 2.69it/s] 10%|▉ | 36104/371472 [2:54:51<33:20:14, 2.79it/s] 10%|▉ | 36105/371472 [2:54:51<31:29:19, 2.96it/s] 10%|▉ | 36106/371472 [2:54:51<29:02:08, 3.21it/s] 10%|▉ | 36107/371472 [2:54:51<27:57:09, 3.33it/s] 10%|▉ | 36108/371472 [2:54:52<26:50:47, 3.47it/s] 10%|▉ | 36109/371472 [2:54:52<28:07:01, 3.31it/s] 10%|▉ | 36110/371472 [2:54:52<26:42:01, 3.49it/s] 10%|▉ | 36111/371472 [2:54:53<27:25:10, 3.40it/s] 10%|▉ | 36112/371472 [2:54:53<26:59:25, 3.45it/s] 10%|▉ | 36113/371472 [2:54:53<28:16:28, 3.29it/s] 10%|▉ | 36114/371472 [2:54:53<28:59:09, 3.21it/s] 10%|▉ | 36115/371472 [2:54:54<28:45:29, 3.24it/s] 10%|▉ | 36116/371472 [2:54:54<27:33:33, 3.38it/s] 10%|▉ | 36117/371472 [2:54:54<27:24:25, 3.40it/s] 10%|▉ | 36118/371472 [2:54:55<26:20:02, 3.54it/s] 10%|▉ | 36119/371472 [2:54:55<26:23:48, 3.53it/s] 10%|▉ | 36120/371472 [2:54:55<25:41:31, 3.63it/s] {'loss': 4.6517, 'learning_rate': 9.129263720399061e-07, 'epoch': 1.56} + 10%|▉ | 36120/371472 [2:54:55<25:41:31, 3.63it/s] 10%|▉ | 36121/371472 [2:54:55<25:51:05, 3.60it/s] 10%|▉ | 36122/371472 [2:54:56<26:39:44, 3.49it/s] 10%|▉ | 36123/371472 [2:54:56<26:03:07, 3.58it/s] 10%|▉ | 36124/371472 [2:54:56<25:48:55, 3.61it/s] 10%|▉ | 36125/371472 [2:54:57<26:26:41, 3.52it/s] 10%|▉ | 36126/371472 [2:54:57<25:46:53, 3.61it/s] 10%|▉ | 36127/371472 [2:54:57<26:54:20, 3.46it/s] 10%|▉ | 36128/371472 [2:54:57<28:29:02, 3.27it/s] 10%|▉ | 36129/371472 [2:54:58<27:02:24, 3.44it/s] 10%|▉ | 36130/371472 [2:54:58<26:24:52, 3.53it/s] 10%|▉ | 36131/371472 [2:54:58<26:57:35, 3.46it/s] 10%|▉ | 36132/371472 [2:54:59<25:52:45, 3.60it/s] 10%|▉ | 36133/371472 [2:54:59<26:20:07, 3.54it/s] 10%|▉ | 36134/371472 [2:54:59<25:42:52, 3.62it/s] 10%|▉ | 36135/371472 [2:54:59<26:02:42, 3.58it/s] 10%|▉ | 36136/371472 [2:55:00<26:05:31, 3.57it/s] 10%|▉ | 36137/371472 [2:55:00<26:10:13, 3.56it/s] 10%|▉ | 36138/371472 [2:55:00<25:56:28, 3.59it/s] 10%|▉ | 36139/371472 [2:55:00<25:24:47, 3.67it/s] 10%|▉ | 36140/371472 [2:55:01<26:07:20, 3.57it/s] {'loss': 4.3534, 'learning_rate': 9.128778900644272e-07, 'epoch': 1.56} + 10%|▉ | 36140/371472 [2:55:01<26:07:20, 3.57it/s] 10%|▉ | 36141/371472 [2:55:01<25:48:56, 3.61it/s] 10%|▉ | 36142/371472 [2:55:01<26:22:55, 3.53it/s] 10%|▉ | 36143/371472 [2:55:02<26:44:39, 3.48it/s] 10%|▉ | 36144/371472 [2:55:02<26:28:48, 3.52it/s] 10%|▉ | 36145/371472 [2:55:02<26:11:58, 3.56it/s] 10%|▉ | 36146/371472 [2:55:02<26:11:35, 3.56it/s] 10%|▉ | 36147/371472 [2:55:03<25:49:44, 3.61it/s] 10%|▉ | 36148/371472 [2:55:03<26:08:30, 3.56it/s] 10%|▉ | 36149/371472 [2:55:03<26:20:12, 3.54it/s] 10%|▉ | 36150/371472 [2:55:04<26:10:40, 3.56it/s] 10%|▉ | 36151/371472 [2:55:04<26:52:02, 3.47it/s] 10%|▉ | 36152/371472 [2:55:04<29:17:07, 3.18it/s] 10%|▉ | 36153/371472 [2:55:05<29:21:54, 3.17it/s] 10%|▉ | 36154/371472 [2:55:05<28:30:23, 3.27it/s] 10%|▉ | 36155/371472 [2:55:05<29:22:46, 3.17it/s] 10%|▉ | 36156/371472 [2:55:05<28:04:42, 3.32it/s] 10%|▉ | 36157/371472 [2:55:06<26:44:55, 3.48it/s] 10%|▉ | 36158/371472 [2:55:06<27:09:42, 3.43it/s] 10%|▉ | 36159/371472 [2:55:06<26:39:02, 3.49it/s] 10%|▉ | 36160/371472 [2:55:07<28:59:36, 3.21it/s] {'loss': 4.7409, 'learning_rate': 9.128294080889482e-07, 'epoch': 1.56} + 10%|▉ | 36160/371472 [2:55:07<28:59:36, 3.21it/s] 10%|▉ | 36161/371472 [2:55:07<28:25:50, 3.28it/s] 10%|▉ | 36162/371472 [2:55:07<29:22:14, 3.17it/s] 10%|▉ | 36163/371472 [2:55:08<27:26:35, 3.39it/s] 10%|▉ | 36164/371472 [2:55:08<26:39:41, 3.49it/s] 10%|▉ | 36165/371472 [2:55:08<26:38:43, 3.50it/s] 10%|▉ | 36166/371472 [2:55:08<26:11:27, 3.56it/s] 10%|▉ | 36167/371472 [2:55:09<29:40:24, 3.14it/s] 10%|▉ | 36168/371472 [2:55:09<28:47:55, 3.23it/s] 10%|▉ | 36169/371472 [2:55:09<27:49:21, 3.35it/s] 10%|▉ | 36170/371472 [2:55:10<26:56:33, 3.46it/s] 10%|▉ | 36171/371472 [2:55:10<25:55:33, 3.59it/s] 10%|▉ | 36172/371472 [2:55:10<26:03:42, 3.57it/s] 10%|▉ | 36173/371472 [2:55:10<25:45:13, 3.62it/s] 10%|▉ | 36174/371472 [2:55:11<27:27:17, 3.39it/s] 10%|▉ | 36175/371472 [2:55:11<27:00:15, 3.45it/s] 10%|▉ | 36176/371472 [2:55:11<27:41:26, 3.36it/s] 10%|▉ | 36177/371472 [2:55:12<28:05:00, 3.32it/s] 10%|▉ | 36178/371472 [2:55:12<28:56:26, 3.22it/s] 10%|▉ | 36179/371472 [2:55:12<28:11:57, 3.30it/s] 10%|▉ | 36180/371472 [2:55:13<26:52:27, 3.47it/s] {'loss': 4.516, 'learning_rate': 9.127809261134694e-07, 'epoch': 1.56} + 10%|▉ | 36180/371472 [2:55:13<26:52:27, 3.47it/s] 10%|▉ | 36181/371472 [2:55:13<28:54:52, 3.22it/s] 10%|▉ | 36182/371472 [2:55:13<29:27:25, 3.16it/s] 10%|▉ | 36183/371472 [2:55:13<27:56:19, 3.33it/s] 10%|▉ | 36184/371472 [2:55:14<27:33:26, 3.38it/s] 10%|▉ | 36185/371472 [2:55:14<27:17:02, 3.41it/s] 10%|▉ | 36186/371472 [2:55:14<26:19:17, 3.54it/s] 10%|▉ | 36187/371472 [2:55:15<26:34:33, 3.50it/s] 10%|▉ | 36188/371472 [2:55:15<26:52:10, 3.47it/s] 10%|▉ | 36189/371472 [2:55:15<26:41:47, 3.49it/s] 10%|▉ | 36190/371472 [2:55:16<29:25:28, 3.17it/s] 10%|▉ | 36191/371472 [2:55:16<30:20:27, 3.07it/s] 10%|▉ | 36192/371472 [2:55:16<30:33:49, 3.05it/s] 10%|▉ | 36193/371472 [2:55:17<31:02:31, 3.00it/s] 10%|▉ | 36194/371472 [2:55:17<28:48:47, 3.23it/s] 10%|▉ | 36195/371472 [2:55:17<27:51:40, 3.34it/s] 10%|▉ | 36196/371472 [2:55:17<26:46:17, 3.48it/s] 10%|▉ | 36197/371472 [2:55:18<26:44:17, 3.48it/s] 10%|▉ | 36198/371472 [2:55:18<26:33:28, 3.51it/s] 10%|▉ | 36199/371472 [2:55:18<27:14:42, 3.42it/s] 10%|▉ | 36200/371472 [2:55:19<27:08:37, 3.43it/s] {'loss': 4.1291, 'learning_rate': 9.127324441379905e-07, 'epoch': 1.56} + 10%|▉ | 36200/371472 [2:55:19<27:08:37, 3.43it/s] 10%|▉ | 36201/371472 [2:55:19<27:09:23, 3.43it/s] 10%|▉ | 36202/371472 [2:55:19<27:28:45, 3.39it/s] 10%|▉ | 36203/371472 [2:55:19<26:13:37, 3.55it/s] 10%|▉ | 36204/371472 [2:55:20<27:05:21, 3.44it/s] 10%|▉ | 36205/371472 [2:55:20<27:19:53, 3.41it/s] 10%|▉ | 36206/371472 [2:55:20<26:34:15, 3.50it/s] 10%|▉ | 36207/371472 [2:55:21<26:02:06, 3.58it/s] 10%|▉ | 36208/371472 [2:55:21<25:22:38, 3.67it/s] 10%|▉ | 36209/371472 [2:55:21<24:28:07, 3.81it/s] 10%|▉ | 36210/371472 [2:55:21<24:44:48, 3.76it/s] 10%|▉ | 36211/371472 [2:55:22<25:33:16, 3.64it/s] 10%|▉ | 36212/371472 [2:55:22<25:14:21, 3.69it/s] 10%|▉ | 36213/371472 [2:55:22<25:57:34, 3.59it/s] 10%|▉ | 36214/371472 [2:55:22<25:55:47, 3.59it/s] 10%|▉ | 36215/371472 [2:55:23<26:22:45, 3.53it/s] 10%|▉ | 36216/371472 [2:55:23<26:01:49, 3.58it/s] 10%|▉ | 36217/371472 [2:55:23<28:30:35, 3.27it/s] 10%|▉ | 36218/371472 [2:55:24<27:51:01, 3.34it/s] 10%|▉ | 36219/371472 [2:55:24<26:50:18, 3.47it/s] 10%|▉ | 36220/371472 [2:55:24<26:40:27, 3.49it/s] {'loss': 4.5695, 'learning_rate': 9.126839621625115e-07, 'epoch': 1.56} + 10%|▉ | 36220/371472 [2:55:24<26:40:27, 3.49it/s] 10%|▉ | 36221/371472 [2:55:24<25:59:30, 3.58it/s] 10%|▉ | 36222/371472 [2:55:25<25:25:52, 3.66it/s] 10%|▉ | 36223/371472 [2:55:25<25:05:19, 3.71it/s] 10%|▉ | 36224/371472 [2:55:25<26:19:12, 3.54it/s] 10%|▉ | 36225/371472 [2:55:26<25:46:43, 3.61it/s] 10%|▉ | 36226/371472 [2:55:26<25:39:36, 3.63it/s] 10%|▉ | 36227/371472 [2:55:26<26:15:50, 3.55it/s] 10%|▉ | 36228/371472 [2:55:26<25:56:34, 3.59it/s] 10%|▉ | 36229/371472 [2:55:27<25:42:58, 3.62it/s] 10%|▉ | 36230/371472 [2:55:27<26:00:30, 3.58it/s] 10%|▉ | 36231/371472 [2:55:27<25:47:40, 3.61it/s] 10%|▉ | 36232/371472 [2:55:28<25:53:36, 3.60it/s] 10%|▉ | 36233/371472 [2:55:28<25:47:33, 3.61it/s] 10%|▉ | 36234/371472 [2:55:28<25:11:30, 3.70it/s] 10%|▉ | 36235/371472 [2:55:28<27:53:45, 3.34it/s] 10%|▉ | 36236/371472 [2:55:29<30:37:47, 3.04it/s] 10%|▉ | 36237/371472 [2:55:29<28:59:17, 3.21it/s] 10%|▉ | 36238/371472 [2:55:29<27:51:51, 3.34it/s] 10%|▉ | 36239/371472 [2:55:30<28:39:04, 3.25it/s] 10%|▉ | 36240/371472 [2:55:30<28:04:10, 3.32it/s] {'loss': 4.2779, 'learning_rate': 9.126354801870327e-07, 'epoch': 1.56} + 10%|▉ | 36240/371472 [2:55:30<28:04:10, 3.32it/s] 10%|▉ | 36241/371472 [2:55:30<30:05:43, 3.09it/s] 10%|▉ | 36242/371472 [2:55:31<28:46:32, 3.24it/s] 10%|▉ | 36243/371472 [2:55:31<28:12:05, 3.30it/s] 10%|▉ | 36244/371472 [2:55:31<26:57:25, 3.45it/s] 10%|▉ | 36245/371472 [2:55:31<26:19:48, 3.54it/s] 10%|▉ | 36246/371472 [2:55:32<26:07:48, 3.56it/s] 10%|▉ | 36247/371472 [2:55:32<25:42:49, 3.62it/s] 10%|▉ | 36248/371472 [2:55:32<26:19:39, 3.54it/s] 10%|▉ | 36249/371472 [2:55:33<25:45:59, 3.61it/s] 10%|▉ | 36250/371472 [2:55:33<25:28:52, 3.65it/s] 10%|▉ | 36251/371472 [2:55:33<26:22:02, 3.53it/s] 10%|▉ | 36252/371472 [2:55:33<27:16:04, 3.41it/s] 10%|▉ | 36253/371472 [2:55:34<28:49:30, 3.23it/s] 10%|▉ | 36254/371472 [2:55:34<27:54:01, 3.34it/s] 10%|▉ | 36255/371472 [2:55:34<30:37:36, 3.04it/s] 10%|▉ | 36256/371472 [2:55:35<29:23:50, 3.17it/s] 10%|▉ | 36257/371472 [2:55:35<28:49:00, 3.23it/s] 10%|▉ | 36258/371472 [2:55:35<28:07:46, 3.31it/s] 10%|▉ | 36259/371472 [2:55:36<28:41:55, 3.24it/s] 10%|▉ | 36260/371472 [2:55:36<27:22:23, 3.40it/s] {'loss': 4.4595, 'learning_rate': 9.125869982115538e-07, 'epoch': 1.56} + 10%|▉ | 36260/371472 [2:55:36<27:22:23, 3.40it/s] 10%|▉ | 36261/371472 [2:55:36<26:07:48, 3.56it/s] 10%|▉ | 36262/371472 [2:55:37<28:13:14, 3.30it/s] 10%|▉ | 36263/371472 [2:55:37<26:52:06, 3.47it/s] 10%|▉ | 36264/371472 [2:55:37<28:54:12, 3.22it/s] 10%|▉ | 36265/371472 [2:55:37<28:22:04, 3.28it/s] 10%|▉ | 36266/371472 [2:55:38<27:45:14, 3.35it/s] 10%|▉ | 36267/371472 [2:55:38<28:07:28, 3.31it/s] 10%|▉ | 36268/371472 [2:55:38<29:18:20, 3.18it/s] 10%|▉ | 36269/371472 [2:55:39<29:07:25, 3.20it/s] 10%|▉ | 36270/371472 [2:55:39<29:05:41, 3.20it/s] 10%|▉ | 36271/371472 [2:55:39<27:47:34, 3.35it/s] 10%|▉ | 36272/371472 [2:55:40<27:53:36, 3.34it/s] 10%|▉ | 36273/371472 [2:55:40<28:47:15, 3.23it/s] 10%|▉ | 36274/371472 [2:55:40<27:24:36, 3.40it/s] 10%|▉ | 36275/371472 [2:55:40<26:36:59, 3.50it/s] 10%|▉ | 36276/371472 [2:55:41<27:23:42, 3.40it/s] 10%|▉ | 36277/371472 [2:55:41<27:10:50, 3.43it/s] 10%|▉ | 36278/371472 [2:55:41<29:01:00, 3.21it/s] 10%|▉ | 36279/371472 [2:55:42<28:08:13, 3.31it/s] 10%|▉ | 36280/371472 [2:55:42<26:44:45, 3.48it/s] {'loss': 4.5646, 'learning_rate': 9.125385162360748e-07, 'epoch': 1.56} + 10%|▉ | 36280/371472 [2:55:42<26:44:45, 3.48it/s] 10%|▉ | 36281/371472 [2:55:42<27:35:04, 3.38it/s] 10%|▉ | 36282/371472 [2:55:42<26:39:03, 3.49it/s] 10%|▉ | 36283/371472 [2:55:43<25:42:24, 3.62it/s] 10%|▉ | 36284/371472 [2:55:43<25:39:53, 3.63it/s] 10%|▉ | 36285/371472 [2:55:43<28:47:13, 3.23it/s] 10%|▉ | 36286/371472 [2:55:44<27:09:59, 3.43it/s] 10%|▉ | 36287/371472 [2:55:44<28:08:48, 3.31it/s] 10%|▉ | 36288/371472 [2:55:44<27:37:37, 3.37it/s] 10%|▉ | 36289/371472 [2:55:44<26:31:21, 3.51it/s] 10%|▉ | 36290/371472 [2:55:45<27:01:57, 3.44it/s] 10%|▉ | 36291/371472 [2:55:45<26:11:44, 3.55it/s] 10%|▉ | 36292/371472 [2:55:45<26:15:13, 3.55it/s] 10%|▉ | 36293/371472 [2:55:46<26:32:04, 3.51it/s] 10%|▉ | 36294/371472 [2:55:46<25:26:16, 3.66it/s] 10%|▉ | 36295/371472 [2:55:46<25:17:18, 3.68it/s] 10%|▉ | 36296/371472 [2:55:47<27:58:09, 3.33it/s] 10%|▉ | 36297/371472 [2:55:47<26:47:46, 3.47it/s] 10%|▉ | 36298/371472 [2:55:47<26:00:09, 3.58it/s] 10%|▉ | 36299/371472 [2:55:47<28:22:09, 3.28it/s] 10%|▉ | 36300/371472 [2:55:48<29:02:18, 3.21it/s] {'loss': 4.3038, 'learning_rate': 9.124900342605959e-07, 'epoch': 1.56} + 10%|▉ | 36300/371472 [2:55:48<29:02:18, 3.21it/s] 10%|▉ | 36301/371472 [2:55:48<30:38:35, 3.04it/s] 10%|▉ | 36302/371472 [2:55:48<31:29:55, 2.96it/s] 10%|▉ | 36303/371472 [2:55:49<29:53:30, 3.11it/s] 10%|▉ | 36304/371472 [2:55:49<30:57:54, 3.01it/s] 10%|▉ | 36305/371472 [2:55:49<30:34:55, 3.04it/s] 10%|▉ | 36306/371472 [2:55:50<29:15:17, 3.18it/s] 10%|▉ | 36307/371472 [2:55:50<29:23:57, 3.17it/s] 10%|▉ | 36308/371472 [2:55:50<28:44:47, 3.24it/s] 10%|▉ | 36309/371472 [2:55:51<27:20:49, 3.40it/s] 10%|▉ | 36310/371472 [2:55:51<26:54:45, 3.46it/s] 10%|▉ | 36311/371472 [2:55:51<26:31:33, 3.51it/s] 10%|▉ | 36312/371472 [2:55:51<26:20:57, 3.53it/s] 10%|▉ | 36313/371472 [2:55:52<26:50:54, 3.47it/s] 10%|▉ | 36314/371472 [2:55:52<26:44:29, 3.48it/s] 10%|▉ | 36315/371472 [2:55:52<26:49:05, 3.47it/s] 10%|▉ | 36316/371472 [2:55:53<26:16:21, 3.54it/s] 10%|▉ | 36317/371472 [2:55:53<26:00:00, 3.58it/s] 10%|▉ | 36318/371472 [2:55:53<27:56:58, 3.33it/s] 10%|▉ | 36319/371472 [2:55:53<27:32:20, 3.38it/s] 10%|▉ | 36320/371472 [2:55:54<26:39:15, 3.49it/s] {'loss': 4.6117, 'learning_rate': 9.124415522851171e-07, 'epoch': 1.56} + 10%|▉ | 36320/371472 [2:55:54<26:39:15, 3.49it/s] 10%|▉ | 36321/371472 [2:55:54<26:45:44, 3.48it/s] 10%|▉ | 36322/371472 [2:55:54<28:25:37, 3.27it/s] 10%|▉ | 36323/371472 [2:55:55<27:52:47, 3.34it/s] 10%|▉ | 36324/371472 [2:55:55<29:26:20, 3.16it/s] 10%|▉ | 36325/371472 [2:55:55<27:51:18, 3.34it/s] 10%|▉ | 36326/371472 [2:55:56<27:59:56, 3.32it/s] 10%|▉ | 36327/371472 [2:55:56<27:44:16, 3.36it/s] 10%|▉ | 36328/371472 [2:55:56<27:17:31, 3.41it/s] 10%|▉ | 36329/371472 [2:55:56<26:36:19, 3.50it/s] 10%|▉ | 36330/371472 [2:55:57<27:33:00, 3.38it/s] 10%|▉ | 36331/371472 [2:55:57<27:41:52, 3.36it/s] 10%|▉ | 36332/371472 [2:55:57<29:31:08, 3.15it/s] 10%|▉ | 36333/371472 [2:55:58<28:36:20, 3.25it/s] 10%|▉ | 36334/371472 [2:55:58<29:56:56, 3.11it/s] 10%|▉ | 36335/371472 [2:55:58<27:55:31, 3.33it/s] 10%|▉ | 36336/371472 [2:55:59<27:26:47, 3.39it/s] 10%|▉ | 36337/371472 [2:55:59<29:16:38, 3.18it/s] 10%|▉ | 36338/371472 [2:55:59<28:51:09, 3.23it/s] 10%|▉ | 36339/371472 [2:56:00<28:26:31, 3.27it/s] 10%|▉ | 36340/371472 [2:56:00<28:09:59, 3.31it/s] {'loss': 4.528, 'learning_rate': 9.123930703096382e-07, 'epoch': 1.57} + 10%|▉ | 36340/371472 [2:56:00<28:09:59, 3.31it/s] 10%|▉ | 36341/371472 [2:56:00<27:18:01, 3.41it/s] 10%|▉ | 36342/371472 [2:56:00<26:05:04, 3.57it/s] 10%|▉ | 36343/371472 [2:56:01<26:28:19, 3.52it/s] 10%|▉ | 36344/371472 [2:56:01<30:01:07, 3.10it/s] 10%|▉ | 36345/371472 [2:56:01<29:24:23, 3.17it/s] 10%|▉ | 36346/371472 [2:56:02<28:17:40, 3.29it/s] 10%|▉ | 36347/371472 [2:56:02<28:37:59, 3.25it/s] 10%|▉ | 36348/371472 [2:56:02<28:21:04, 3.28it/s] 10%|▉ | 36349/371472 [2:56:02<26:55:23, 3.46it/s] 10%|▉ | 36350/371472 [2:56:03<26:03:06, 3.57it/s] 10%|▉ | 36351/371472 [2:56:03<26:14:45, 3.55it/s] 10%|▉ | 36352/371472 [2:56:03<27:23:55, 3.40it/s] 10%|▉ | 36353/371472 [2:56:04<26:35:39, 3.50it/s] 10%|▉ | 36354/371472 [2:56:04<26:10:55, 3.56it/s] 10%|▉ | 36355/371472 [2:56:04<27:50:58, 3.34it/s] 10%|▉ | 36356/371472 [2:56:05<27:21:07, 3.40it/s] 10%|▉ | 36357/371472 [2:56:05<26:09:47, 3.56it/s] 10%|▉ | 36358/371472 [2:56:05<25:33:02, 3.64it/s] 10%|▉ | 36359/371472 [2:56:05<25:04:18, 3.71it/s] 10%|▉ | 36360/371472 [2:56:06<25:36:46, 3.63it/s] {'loss': 4.4912, 'learning_rate': 9.123445883341593e-07, 'epoch': 1.57} + 10%|▉ | 36360/371472 [2:56:06<25:36:46, 3.63it/s] 10%|▉ | 36361/371472 [2:56:06<27:11:55, 3.42it/s] 10%|▉ | 36362/371472 [2:56:06<29:29:20, 3.16it/s] 10%|▉ | 36363/371472 [2:56:07<27:33:44, 3.38it/s] 10%|▉ | 36364/371472 [2:56:07<26:10:58, 3.56it/s] 10%|▉ | 36365/371472 [2:56:07<25:21:26, 3.67it/s] 10%|▉ | 36366/371472 [2:56:07<25:12:33, 3.69it/s] 10%|▉ | 36367/371472 [2:56:08<26:19:50, 3.54it/s] 10%|▉ | 36368/371472 [2:56:08<29:08:45, 3.19it/s] 10%|▉ | 36369/371472 [2:56:08<27:47:08, 3.35it/s] 10%|▉ | 36370/371472 [2:56:08<26:01:18, 3.58it/s] 10%|▉ | 36371/371472 [2:56:09<27:19:24, 3.41it/s] 10%|▉ | 36372/371472 [2:56:09<26:39:27, 3.49it/s] 10%|▉ | 36373/371472 [2:56:09<25:48:13, 3.61it/s] 10%|▉ | 36374/371472 [2:56:10<25:42:36, 3.62it/s] 10%|▉ | 36375/371472 [2:56:10<25:15:37, 3.68it/s] 10%|▉ | 36376/371472 [2:56:10<27:41:09, 3.36it/s] 10%|▉ | 36377/371472 [2:56:11<27:32:06, 3.38it/s] 10%|▉ | 36378/371472 [2:56:11<28:06:21, 3.31it/s] 10%|▉ | 36379/371472 [2:56:11<27:18:54, 3.41it/s] 10%|▉ | 36380/371472 [2:56:11<27:44:52, 3.35it/s] {'loss': 4.65, 'learning_rate': 9.122961063586804e-07, 'epoch': 1.57} + 10%|▉ | 36380/371472 [2:56:11<27:44:52, 3.35it/s] 10%|▉ | 36381/371472 [2:56:12<28:46:19, 3.24it/s] 10%|▉ | 36382/371472 [2:56:12<27:23:19, 3.40it/s] 10%|▉ | 36383/371472 [2:56:12<29:24:15, 3.17it/s] 10%|▉ | 36384/371472 [2:56:13<29:39:52, 3.14it/s] 10%|▉ | 36385/371472 [2:56:13<29:01:24, 3.21it/s] 10%|▉ | 36386/371472 [2:56:13<28:11:26, 3.30it/s] 10%|▉ | 36387/371472 [2:56:14<27:11:20, 3.42it/s] 10%|▉ | 36388/371472 [2:56:14<27:46:52, 3.35it/s] 10%|▉ | 36389/371472 [2:56:14<28:28:33, 3.27it/s] 10%|▉ | 36390/371472 [2:56:14<28:02:18, 3.32it/s] 10%|▉ | 36391/371472 [2:56:15<27:42:33, 3.36it/s] 10%|▉ | 36392/371472 [2:56:15<26:48:08, 3.47it/s] 10%|▉ | 36393/371472 [2:56:15<26:08:12, 3.56it/s] 10%|▉ | 36394/371472 [2:56:16<27:14:15, 3.42it/s] 10%|▉ | 36395/371472 [2:56:16<26:34:12, 3.50it/s] 10%|▉ | 36396/371472 [2:56:16<26:25:09, 3.52it/s] 10%|▉ | 36397/371472 [2:56:16<26:11:10, 3.55it/s] 10%|▉ | 36398/371472 [2:56:17<25:55:42, 3.59it/s] 10%|▉ | 36399/371472 [2:56:17<29:49:52, 3.12it/s] 10%|▉ | 36400/371472 [2:56:17<28:40:35, 3.25it/s] {'loss': 4.3715, 'learning_rate': 9.122476243832014e-07, 'epoch': 1.57} + 10%|▉ | 36400/371472 [2:56:17<28:40:35, 3.25it/s] 10%|▉ | 36401/371472 [2:56:18<27:57:03, 3.33it/s] 10%|▉ | 36402/371472 [2:56:18<28:24:17, 3.28it/s] 10%|▉ | 36403/371472 [2:56:18<27:12:51, 3.42it/s] 10%|▉ | 36404/371472 [2:56:19<26:33:09, 3.51it/s] 10%|▉ | 36405/371472 [2:56:19<28:51:48, 3.22it/s] 10%|▉ | 36406/371472 [2:56:19<29:14:34, 3.18it/s] 10%|▉ | 36407/371472 [2:56:20<28:53:12, 3.22it/s] 10%|▉ | 36408/371472 [2:56:20<27:13:19, 3.42it/s] 10%|▉ | 36409/371472 [2:56:20<26:37:09, 3.50it/s] 10%|▉ | 36410/371472 [2:56:20<25:56:36, 3.59it/s] 10%|▉ | 36411/371472 [2:56:21<26:08:58, 3.56it/s] 10%|▉ | 36412/371472 [2:56:21<26:53:14, 3.46it/s] 10%|▉ | 36413/371472 [2:56:21<26:03:37, 3.57it/s] 10%|▉ | 36414/371472 [2:56:21<26:08:03, 3.56it/s] 10%|▉ | 36415/371472 [2:56:22<28:04:26, 3.32it/s] 10%|▉ | 36416/371472 [2:56:22<26:48:26, 3.47it/s] 10%|▉ | 36417/371472 [2:56:22<26:47:08, 3.47it/s] 10%|▉ | 36418/371472 [2:56:23<26:27:40, 3.52it/s] 10%|▉ | 36419/371472 [2:56:23<26:28:02, 3.52it/s] 10%|▉ | 36420/371472 [2:56:23<27:31:25, 3.38it/s] {'loss': 4.4528, 'learning_rate': 9.121991424077226e-07, 'epoch': 1.57} + 10%|▉ | 36420/371472 [2:56:23<27:31:25, 3.38it/s] 10%|▉ | 36421/371472 [2:56:23<26:44:42, 3.48it/s] 10%|▉ | 36422/371472 [2:56:24<26:22:23, 3.53it/s] 10%|▉ | 36423/371472 [2:56:24<27:41:30, 3.36it/s] 10%|▉ | 36424/371472 [2:56:24<27:03:29, 3.44it/s] 10%|▉ | 36425/371472 [2:56:25<26:37:15, 3.50it/s] 10%|▉ | 36426/371472 [2:56:25<25:50:40, 3.60it/s] 10%|▉ | 36427/371472 [2:56:25<26:05:47, 3.57it/s] 10%|▉ | 36428/371472 [2:56:26<28:38:49, 3.25it/s] 10%|▉ | 36429/371472 [2:56:26<27:37:39, 3.37it/s] 10%|▉ | 36430/371472 [2:56:26<26:16:31, 3.54it/s] 10%|▉ | 36431/371472 [2:56:26<26:40:02, 3.49it/s] 10%|▉ | 36432/371472 [2:56:27<27:59:43, 3.32it/s] 10%|▉ | 36433/371472 [2:56:27<26:39:07, 3.49it/s] 10%|▉ | 36434/371472 [2:56:27<25:45:26, 3.61it/s] 10%|▉ | 36435/371472 [2:56:27<24:53:51, 3.74it/s] 10%|▉ | 36436/371472 [2:56:28<26:06:09, 3.57it/s] 10%|▉ | 36437/371472 [2:56:28<26:37:13, 3.50it/s] 10%|▉ | 36438/371472 [2:56:28<25:53:33, 3.59it/s] 10%|▉ | 36439/371472 [2:56:29<26:42:30, 3.48it/s] 10%|▉ | 36440/371472 [2:56:29<26:53:16, 3.46it/s] {'loss': 4.4725, 'learning_rate': 9.121506604322436e-07, 'epoch': 1.57} + 10%|▉ | 36440/371472 [2:56:29<26:53:16, 3.46it/s] 10%|▉ | 36441/371472 [2:56:29<26:53:33, 3.46it/s] 10%|▉ | 36442/371472 [2:56:29<26:02:03, 3.57it/s] 10%|▉ | 36443/371472 [2:56:30<25:59:40, 3.58it/s] 10%|▉ | 36444/371472 [2:56:30<27:38:19, 3.37it/s] 10%|▉ | 36445/371472 [2:56:30<27:03:18, 3.44it/s] 10%|▉ | 36446/371472 [2:56:31<28:05:16, 3.31it/s] 10%|▉ | 36447/371472 [2:56:31<27:46:25, 3.35it/s] 10%|▉ | 36448/371472 [2:56:31<26:54:58, 3.46it/s] 10%|▉ | 36449/371472 [2:56:32<26:03:20, 3.57it/s] 10%|▉ | 36450/371472 [2:56:32<26:16:09, 3.54it/s] 10%|▉ | 36451/371472 [2:56:32<27:29:22, 3.39it/s] 10%|▉ | 36452/371472 [2:56:32<27:02:04, 3.44it/s] 10%|▉ | 36453/371472 [2:56:33<26:23:19, 3.53it/s] 10%|▉ | 36454/371472 [2:56:33<25:59:34, 3.58it/s] 10%|▉ | 36455/371472 [2:56:33<26:28:21, 3.52it/s] 10%|▉ | 36456/371472 [2:56:33<25:43:02, 3.62it/s] 10%|▉ | 36457/371472 [2:56:34<26:45:22, 3.48it/s] 10%|▉ | 36458/371472 [2:56:34<27:35:25, 3.37it/s] 10%|▉ | 36459/371472 [2:56:34<27:02:16, 3.44it/s] 10%|▉ | 36460/371472 [2:56:35<27:16:47, 3.41it/s] {'loss': 4.5761, 'learning_rate': 9.121021784567648e-07, 'epoch': 1.57} + 10%|▉ | 36460/371472 [2:56:35<27:16:47, 3.41it/s] 10%|▉ | 36461/371472 [2:56:35<27:18:37, 3.41it/s] 10%|▉ | 36462/371472 [2:56:35<26:30:38, 3.51it/s] 10%|▉ | 36463/371472 [2:56:36<26:46:45, 3.48it/s] 10%|▉ | 36464/371472 [2:56:36<25:43:44, 3.62it/s] 10%|▉ | 36465/371472 [2:56:36<25:58:58, 3.58it/s] 10%|▉ | 36466/371472 [2:56:36<25:03:54, 3.71it/s] 10%|▉ | 36467/371472 [2:56:37<24:49:40, 3.75it/s] 10%|▉ | 36468/371472 [2:56:37<24:52:58, 3.74it/s] 10%|▉ | 36469/371472 [2:56:37<24:42:42, 3.77it/s] 10%|▉ | 36470/371472 [2:56:37<25:12:39, 3.69it/s] 10%|▉ | 36471/371472 [2:56:38<25:17:07, 3.68it/s] 10%|▉ | 36472/371472 [2:56:38<25:25:43, 3.66it/s] 10%|▉ | 36473/371472 [2:56:38<27:49:42, 3.34it/s] 10%|▉ | 36474/371472 [2:56:39<27:17:30, 3.41it/s] 10%|▉ | 36475/371472 [2:56:39<26:19:21, 3.54it/s] 10%|▉ | 36476/371472 [2:56:39<27:06:14, 3.43it/s] 10%|▉ | 36477/371472 [2:56:39<26:53:05, 3.46it/s] 10%|▉ | 36478/371472 [2:56:40<25:47:28, 3.61it/s] 10%|▉ | 36479/371472 [2:56:40<25:33:14, 3.64it/s] 10%|▉ | 36480/371472 [2:56:40<27:20:28, 3.40it/s] {'loss': 4.6145, 'learning_rate': 9.120536964812859e-07, 'epoch': 1.57} + 10%|▉ | 36480/371472 [2:56:40<27:20:28, 3.40it/s] 10%|▉ | 36481/371472 [2:56:41<27:20:20, 3.40it/s] 10%|▉ | 36482/371472 [2:56:41<26:31:15, 3.51it/s] 10%|▉ | 36483/371472 [2:56:41<30:46:50, 3.02it/s] 10%|▉ | 36484/371472 [2:56:42<30:29:47, 3.05it/s] 10%|▉ | 36485/371472 [2:56:42<28:53:43, 3.22it/s] 10%|▉ | 36486/371472 [2:56:42<29:24:01, 3.16it/s] 10%|▉ | 36487/371472 [2:56:43<30:20:40, 3.07it/s] 10%|▉ | 36488/371472 [2:56:43<28:55:49, 3.22it/s] 10%|▉ | 36489/371472 [2:56:43<28:05:51, 3.31it/s] 10%|▉ | 36490/371472 [2:56:43<28:05:01, 3.31it/s] 10%|▉ | 36491/371472 [2:56:44<28:46:02, 3.23it/s] 10%|▉ | 36492/371472 [2:56:44<28:11:27, 3.30it/s] 10%|▉ | 36493/371472 [2:56:44<27:10:22, 3.42it/s] 10%|▉ | 36494/371472 [2:56:45<26:55:57, 3.45it/s] 10%|▉ | 36495/371472 [2:56:45<27:50:45, 3.34it/s] 10%|▉ | 36496/371472 [2:56:45<26:30:59, 3.51it/s] 10%|▉ | 36497/371472 [2:56:45<26:07:55, 3.56it/s] 10%|▉ | 36498/371472 [2:56:46<25:27:32, 3.65it/s] 10%|▉ | 36499/371472 [2:56:46<27:18:44, 3.41it/s] 10%|▉ | 36500/371472 [2:56:46<28:26:36, 3.27it/s] {'loss': 4.4612, 'learning_rate': 9.120052145058071e-07, 'epoch': 1.57} + 10%|▉ | 36500/371472 [2:56:46<28:26:36, 3.27it/s] 10%|▉ | 36501/371472 [2:56:47<27:44:23, 3.35it/s] 10%|▉ | 36502/371472 [2:56:47<26:51:34, 3.46it/s] 10%|▉ | 36503/371472 [2:56:47<27:08:15, 3.43it/s] 10%|▉ | 36504/371472 [2:56:48<27:10:45, 3.42it/s] 10%|▉ | 36505/371472 [2:56:48<26:19:27, 3.53it/s] 10%|▉ | 36506/371472 [2:56:48<26:12:54, 3.55it/s] 10%|▉ | 36507/371472 [2:56:48<25:41:12, 3.62it/s] 10%|▉ | 36508/371472 [2:56:49<25:55:31, 3.59it/s] 10%|▉ | 36509/371472 [2:56:49<25:46:09, 3.61it/s] 10%|▉ | 36510/371472 [2:56:49<25:37:21, 3.63it/s] 10%|▉ | 36511/371472 [2:56:49<25:25:18, 3.66it/s] 10%|▉ | 36512/371472 [2:56:50<25:27:43, 3.65it/s] 10%|▉ | 36513/371472 [2:56:50<25:38:31, 3.63it/s] 10%|▉ | 36514/371472 [2:56:50<26:21:57, 3.53it/s] 10%|▉ | 36515/371472 [2:56:51<26:39:51, 3.49it/s] 10%|▉ | 36516/371472 [2:56:51<26:27:32, 3.52it/s] 10%|▉ | 36517/371472 [2:56:51<25:57:37, 3.58it/s] 10%|▉ | 36518/371472 [2:56:51<25:32:26, 3.64it/s] 10%|▉ | 36519/371472 [2:56:52<27:35:24, 3.37it/s] 10%|▉ | 36520/371472 [2:56:52<26:37:49, 3.49it/s] {'loss': 4.4888, 'learning_rate': 9.119567325303281e-07, 'epoch': 1.57} + 10%|▉ | 36520/371472 [2:56:52<26:37:49, 3.49it/s] 10%|▉ | 36521/371472 [2:56:52<26:50:25, 3.47it/s] 10%|▉ | 36522/371472 [2:56:53<26:57:07, 3.45it/s] 10%|▉ | 36523/371472 [2:56:53<26:31:39, 3.51it/s] 10%|▉ | 36524/371472 [2:56:53<26:03:10, 3.57it/s] 10%|▉ | 36525/371472 [2:56:53<26:09:07, 3.56it/s] 10%|▉ | 36526/371472 [2:56:54<25:57:07, 3.59it/s] 10%|▉ | 36527/371472 [2:56:54<25:21:13, 3.67it/s] 10%|▉ | 36528/371472 [2:56:54<25:09:12, 3.70it/s] 10%|▉ | 36529/371472 [2:56:54<24:49:21, 3.75it/s] 10%|▉ | 36530/371472 [2:56:55<25:25:54, 3.66it/s] 10%|▉ | 36531/371472 [2:56:55<25:36:39, 3.63it/s] 10%|▉ | 36532/371472 [2:56:55<26:03:56, 3.57it/s] 10%|▉ | 36533/371472 [2:56:56<26:25:19, 3.52it/s] 10%|▉ | 36534/371472 [2:56:56<26:00:17, 3.58it/s] 10%|▉ | 36535/371472 [2:56:56<26:55:33, 3.46it/s] 10%|▉ | 36536/371472 [2:56:56<26:45:30, 3.48it/s] 10%|▉ | 36537/371472 [2:56:57<26:20:09, 3.53it/s] 10%|▉ | 36538/371472 [2:56:57<26:36:25, 3.50it/s] 10%|▉ | 36539/371472 [2:56:57<25:36:15, 3.63it/s] 10%|▉ | 36540/371472 [2:56:58<25:01:30, 3.72it/s] {'loss': 4.4778, 'learning_rate': 9.119082505548492e-07, 'epoch': 1.57} + 10%|▉ | 36540/371472 [2:56:58<25:01:30, 3.72it/s] 10%|▉ | 36541/371472 [2:56:58<25:22:32, 3.67it/s] 10%|▉ | 36542/371472 [2:56:58<25:11:45, 3.69it/s] 10%|▉ | 36543/371472 [2:56:58<24:51:42, 3.74it/s] 10%|▉ | 36544/371472 [2:56:59<25:23:34, 3.66it/s] 10%|▉ | 36545/371472 [2:56:59<25:03:54, 3.71it/s] 10%|▉ | 36546/371472 [2:56:59<25:08:37, 3.70it/s] 10%|▉ | 36547/371472 [2:56:59<25:15:17, 3.68it/s] 10%|▉ | 36548/371472 [2:57:00<26:00:38, 3.58it/s] 10%|▉ | 36549/371472 [2:57:00<26:48:36, 3.47it/s] 10%|▉ | 36550/371472 [2:57:00<25:50:58, 3.60it/s] 10%|▉ | 36551/371472 [2:57:01<26:45:05, 3.48it/s] 10%|▉ | 36552/371472 [2:57:01<26:44:55, 3.48it/s] 10%|▉ | 36553/371472 [2:57:01<25:49:27, 3.60it/s] 10%|▉ | 36554/371472 [2:57:01<25:26:46, 3.66it/s] 10%|▉ | 36555/371472 [2:57:02<25:09:43, 3.70it/s] 10%|▉ | 36556/371472 [2:57:02<25:21:31, 3.67it/s] 10%|▉ | 36557/371472 [2:57:02<25:49:19, 3.60it/s] 10%|▉ | 36558/371472 [2:57:03<26:01:42, 3.57it/s] 10%|▉ | 36559/371472 [2:57:03<25:15:02, 3.68it/s] 10%|▉ | 36560/371472 [2:57:03<24:54:16, 3.74it/s] {'loss': 4.4639, 'learning_rate': 9.118597685793703e-07, 'epoch': 1.57} + 10%|▉ | 36560/371472 [2:57:03<24:54:16, 3.74it/s] 10%|▉ | 36561/371472 [2:57:03<26:02:25, 3.57it/s] 10%|▉ | 36562/371472 [2:57:04<25:51:10, 3.60it/s] 10%|▉ | 36563/371472 [2:57:04<25:46:22, 3.61it/s] 10%|▉ | 36564/371472 [2:57:04<27:07:10, 3.43it/s] 10%|▉ | 36565/371472 [2:57:05<27:04:27, 3.44it/s] 10%|▉ | 36566/371472 [2:57:05<27:32:18, 3.38it/s] 10%|▉ | 36567/371472 [2:57:05<27:39:26, 3.36it/s] 10%|▉ | 36568/371472 [2:57:05<29:11:52, 3.19it/s] 10%|▉ | 36569/371472 [2:57:06<28:54:29, 3.22it/s] 10%|▉ | 36570/371472 [2:57:06<28:03:53, 3.31it/s] 10%|▉ | 36571/371472 [2:57:06<27:35:58, 3.37it/s] 10%|▉ | 36572/371472 [2:57:07<26:21:45, 3.53it/s] 10%|▉ | 36573/371472 [2:57:07<25:41:28, 3.62it/s] 10%|▉ | 36574/371472 [2:57:07<25:41:26, 3.62it/s] 10%|▉ | 36575/371472 [2:57:07<26:11:20, 3.55it/s] 10%|▉ | 36576/371472 [2:57:08<27:22:40, 3.40it/s] 10%|▉ | 36577/371472 [2:57:08<27:32:09, 3.38it/s] 10%|▉ | 36578/371472 [2:57:08<26:34:45, 3.50it/s] 10%|▉ | 36579/371472 [2:57:09<27:18:47, 3.41it/s] 10%|▉ | 36580/371472 [2:57:09<26:10:30, 3.55it/s] {'loss': 4.4546, 'learning_rate': 9.118112866038915e-07, 'epoch': 1.58} + 10%|▉ | 36580/371472 [2:57:09<26:10:30, 3.55it/s] 10%|▉ | 36581/371472 [2:57:09<26:10:44, 3.55it/s] 10%|▉ | 36582/371472 [2:57:09<25:04:24, 3.71it/s] 10%|▉ | 36583/371472 [2:57:10<26:44:08, 3.48it/s] 10%|▉ | 36584/371472 [2:57:10<26:35:51, 3.50it/s] 10%|▉ | 36585/371472 [2:57:10<28:42:59, 3.24it/s] 10%|▉ | 36586/371472 [2:57:11<27:27:38, 3.39it/s] 10%|▉ | 36587/371472 [2:57:11<26:28:57, 3.51it/s] 10%|▉ | 36588/371472 [2:57:11<25:55:11, 3.59it/s] 10%|▉ | 36589/371472 [2:57:11<25:52:49, 3.59it/s] 10%|▉ | 36590/371472 [2:57:12<25:15:24, 3.68it/s] 10%|▉ | 36591/371472 [2:57:12<24:41:34, 3.77it/s] 10%|▉ | 36592/371472 [2:57:12<24:13:38, 3.84it/s] 10%|▉ | 36593/371472 [2:57:12<25:13:33, 3.69it/s] 10%|▉ | 36594/371472 [2:57:13<25:05:49, 3.71it/s] 10%|▉ | 36595/371472 [2:57:13<24:42:45, 3.76it/s] 10%|▉ | 36596/371472 [2:57:13<24:48:26, 3.75it/s] 10%|▉ | 36597/371472 [2:57:14<25:05:29, 3.71it/s] 10%|▉ | 36598/371472 [2:57:14<24:13:17, 3.84it/s] 10%|▉ | 36599/371472 [2:57:14<24:35:11, 3.78it/s] 10%|▉ | 36600/371472 [2:57:14<24:40:12, 3.77it/s] {'loss': 4.6701, 'learning_rate': 9.117628046284124e-07, 'epoch': 1.58} + 10%|▉ | 36600/371472 [2:57:14<24:40:12, 3.77it/s] 10%|▉ | 36601/371472 [2:57:15<26:24:26, 3.52it/s] 10%|▉ | 36602/371472 [2:57:15<26:11:17, 3.55it/s] 10%|▉ | 36603/371472 [2:57:15<30:45:58, 3.02it/s] 10%|▉ | 36604/371472 [2:57:16<31:50:44, 2.92it/s] 10%|▉ | 36605/371472 [2:57:16<29:37:12, 3.14it/s] 10%|▉ | 36606/371472 [2:57:16<28:34:33, 3.26it/s] 10%|▉ | 36607/371472 [2:57:17<28:36:45, 3.25it/s] 10%|▉ | 36608/371472 [2:57:17<29:23:08, 3.17it/s] 10%|▉ | 36609/371472 [2:57:17<29:01:00, 3.21it/s] 10%|▉ | 36610/371472 [2:57:18<28:02:00, 3.32it/s] 10%|▉ | 36611/371472 [2:57:18<27:40:41, 3.36it/s] 10%|▉ | 36612/371472 [2:57:18<27:30:47, 3.38it/s] 10%|▉ | 36613/371472 [2:57:18<27:04:59, 3.43it/s] 10%|▉ | 36614/371472 [2:57:19<26:23:00, 3.53it/s] 10%|▉ | 36615/371472 [2:57:19<28:55:44, 3.22it/s] 10%|▉ | 36616/371472 [2:57:19<27:58:50, 3.32it/s] 10%|▉ | 36617/371472 [2:57:20<27:37:47, 3.37it/s] 10%|▉ | 36618/371472 [2:57:20<27:18:51, 3.41it/s] 10%|▉ | 36619/371472 [2:57:20<26:30:05, 3.51it/s] 10%|▉ | 36620/371472 [2:57:20<25:34:40, 3.64it/s] {'loss': 4.3391, 'learning_rate': 9.117143226529337e-07, 'epoch': 1.58} + 10%|▉ | 36620/371472 [2:57:20<25:34:40, 3.64it/s] 10%|▉ | 36621/371472 [2:57:21<25:56:36, 3.59it/s] 10%|▉ | 36622/371472 [2:57:21<26:58:00, 3.45it/s] 10%|▉ | 36623/371472 [2:57:21<25:47:14, 3.61it/s] 10%|▉ | 36624/371472 [2:57:22<26:34:54, 3.50it/s] 10%|▉ | 36625/371472 [2:57:22<28:23:26, 3.28it/s] 10%|▉ | 36626/371472 [2:57:22<27:58:01, 3.33it/s] 10%|▉ | 36627/371472 [2:57:23<28:37:14, 3.25it/s] 10%|▉ | 36628/371472 [2:57:23<28:43:08, 3.24it/s] 10%|▉ | 36629/371472 [2:57:23<28:19:37, 3.28it/s] 10%|▉ | 36630/371472 [2:57:23<28:57:23, 3.21it/s] 10%|▉ | 36631/371472 [2:57:24<29:05:34, 3.20it/s] 10%|▉ | 36632/371472 [2:57:24<28:22:22, 3.28it/s] 10%|▉ | 36633/371472 [2:57:24<27:20:19, 3.40it/s] 10%|▉ | 36634/371472 [2:57:25<30:17:06, 3.07it/s] 10%|▉ | 36635/371472 [2:57:25<27:59:37, 3.32it/s] 10%|▉ | 36636/371472 [2:57:25<27:08:12, 3.43it/s] 10%|▉ | 36637/371472 [2:57:26<27:29:29, 3.38it/s] 10%|▉ | 36638/371472 [2:57:26<27:58:20, 3.33it/s] 10%|▉ | 36639/371472 [2:57:26<28:12:39, 3.30it/s] 10%|▉ | 36640/371472 [2:57:26<27:23:46, 3.39it/s] {'loss': 4.3337, 'learning_rate': 9.116658406774548e-07, 'epoch': 1.58} + 10%|▉ | 36640/371472 [2:57:26<27:23:46, 3.39it/s] 10%|▉ | 36641/371472 [2:57:27<27:32:53, 3.38it/s] 10%|▉ | 36642/371472 [2:57:27<27:36:32, 3.37it/s] 10%|▉ | 36643/371472 [2:57:27<26:22:44, 3.53it/s] 10%|▉ | 36644/371472 [2:57:28<25:41:13, 3.62it/s] 10%|▉ | 36645/371472 [2:57:28<27:31:24, 3.38it/s] 10%|▉ | 36646/371472 [2:57:28<27:51:05, 3.34it/s] 10%|▉ | 36647/371472 [2:57:28<26:52:07, 3.46it/s] 10%|▉ | 36648/371472 [2:57:29<26:17:29, 3.54it/s] 10%|▉ | 36649/371472 [2:57:29<25:34:26, 3.64it/s] 10%|▉ | 36650/371472 [2:57:29<26:03:15, 3.57it/s] 10%|▉ | 36651/371472 [2:57:30<25:41:31, 3.62it/s] 10%|▉ | 36652/371472 [2:57:30<26:12:41, 3.55it/s] 10%|▉ | 36653/371472 [2:57:30<25:36:17, 3.63it/s] 10%|▉ | 36654/371472 [2:57:30<26:24:16, 3.52it/s] 10%|▉ | 36655/371472 [2:57:31<27:18:03, 3.41it/s] 10%|▉ | 36656/371472 [2:57:31<27:57:17, 3.33it/s] 10%|▉ | 36657/371472 [2:57:31<28:00:16, 3.32it/s] 10%|▉ | 36658/371472 [2:57:32<27:43:58, 3.35it/s] 10%|▉ | 36659/371472 [2:57:32<26:42:55, 3.48it/s] 10%|▉ | 36660/371472 [2:57:32<26:35:06, 3.50it/s] {'loss': 4.3595, 'learning_rate': 9.116173587019759e-07, 'epoch': 1.58} + 10%|▉ | 36660/371472 [2:57:32<26:35:06, 3.50it/s] 10%|▉ | 36661/371472 [2:57:32<25:29:15, 3.65it/s] 10%|▉ | 36662/371472 [2:57:33<25:22:59, 3.66it/s] 10%|▉ | 36663/371472 [2:57:33<29:21:06, 3.17it/s] 10%|▉ | 36664/371472 [2:57:33<30:47:41, 3.02it/s] 10%|▉ | 36665/371472 [2:57:34<28:43:05, 3.24it/s] 10%|▉ | 36666/371472 [2:57:34<27:30:10, 3.38it/s] 10%|▉ | 36667/371472 [2:57:34<26:22:00, 3.53it/s] 10%|▉ | 36668/371472 [2:57:35<27:38:37, 3.36it/s] 10%|▉ | 36669/371472 [2:57:35<27:24:12, 3.39it/s] 10%|▉ | 36670/371472 [2:57:35<27:35:30, 3.37it/s] 10%|▉ | 36671/371472 [2:57:35<28:02:23, 3.32it/s] 10%|▉ | 36672/371472 [2:57:36<26:37:44, 3.49it/s] 10%|▉ | 36673/371472 [2:57:36<27:28:36, 3.38it/s] 10%|▉ | 36674/371472 [2:57:36<25:58:29, 3.58it/s] 10%|▉ | 36675/371472 [2:57:37<25:34:46, 3.64it/s] 10%|▉ | 36676/371472 [2:57:37<25:25:45, 3.66it/s] 10%|▉ | 36677/371472 [2:57:37<25:47:37, 3.61it/s] 10%|▉ | 36678/371472 [2:57:37<25:45:11, 3.61it/s] 10%|▉ | 36679/371472 [2:57:38<26:08:10, 3.56it/s] 10%|▉ | 36680/371472 [2:57:38<26:06:49, 3.56it/s] {'loss': 4.29, 'learning_rate': 9.115688767264969e-07, 'epoch': 1.58} + 10%|▉ | 36680/371472 [2:57:38<26:06:49, 3.56it/s] 10%|▉ | 36681/371472 [2:57:38<25:53:21, 3.59it/s] 10%|▉ | 36682/371472 [2:57:39<26:34:08, 3.50it/s] 10%|▉ | 36683/371472 [2:57:39<25:55:03, 3.59it/s] 10%|▉ | 36684/371472 [2:57:39<25:36:38, 3.63it/s] 10%|▉ | 36685/371472 [2:57:39<26:09:18, 3.56it/s] 10%|▉ | 36686/371472 [2:57:40<25:46:32, 3.61it/s] 10%|▉ | 36687/371472 [2:57:40<26:05:38, 3.56it/s] 10%|▉ | 36688/371472 [2:57:40<27:25:58, 3.39it/s] 10%|▉ | 36689/371472 [2:57:41<27:20:50, 3.40it/s] 10%|▉ | 36690/371472 [2:57:41<26:37:19, 3.49it/s] 10%|▉ | 36691/371472 [2:57:41<27:09:42, 3.42it/s] 10%|▉ | 36692/371472 [2:57:41<26:59:32, 3.45it/s] 10%|▉ | 36693/371472 [2:57:42<25:59:13, 3.58it/s] 10%|▉ | 36694/371472 [2:57:42<26:05:27, 3.56it/s] 10%|▉ | 36695/371472 [2:57:42<28:11:11, 3.30it/s] 10%|▉ | 36696/371472 [2:57:43<27:36:12, 3.37it/s] 10%|▉ | 36697/371472 [2:57:43<27:12:24, 3.42it/s] 10%|▉ | 36698/371472 [2:57:43<26:44:36, 3.48it/s] 10%|▉ | 36699/371472 [2:57:43<26:52:36, 3.46it/s] 10%|▉ | 36700/371472 [2:57:44<28:14:12, 3.29it/s] {'loss': 4.4379, 'learning_rate': 9.115203947510182e-07, 'epoch': 1.58} + 10%|▉ | 36700/371472 [2:57:44<28:14:12, 3.29it/s] 10%|▉ | 36701/371472 [2:57:44<26:41:20, 3.48it/s] 10%|▉ | 36702/371472 [2:57:44<26:09:58, 3.55it/s] 10%|▉ | 36703/371472 [2:57:45<26:05:44, 3.56it/s] 10%|▉ | 36704/371472 [2:57:45<26:08:11, 3.56it/s] 10%|▉ | 36705/371472 [2:57:45<26:21:32, 3.53it/s] 10%|▉ | 36706/371472 [2:57:45<26:05:54, 3.56it/s] 10%|▉ | 36707/371472 [2:57:46<27:22:56, 3.40it/s] 10%|▉ | 36708/371472 [2:57:46<26:45:04, 3.48it/s] 10%|▉ | 36709/371472 [2:57:46<26:16:52, 3.54it/s] 10%|▉ | 36710/371472 [2:57:47<27:27:36, 3.39it/s] 10%|▉ | 36711/371472 [2:57:47<26:34:31, 3.50it/s] 10%|▉ | 36712/371472 [2:57:47<26:36:23, 3.49it/s] 10%|▉ | 36713/371472 [2:57:47<26:04:06, 3.57it/s] 10%|▉ | 36714/371472 [2:57:48<25:42:01, 3.62it/s] 10%|▉ | 36715/371472 [2:57:48<28:01:26, 3.32it/s] 10%|▉ | 36716/371472 [2:57:48<28:04:45, 3.31it/s] 10%|▉ | 36717/371472 [2:57:49<26:56:42, 3.45it/s] 10%|▉ | 36718/371472 [2:57:49<26:40:59, 3.48it/s] 10%|▉ | 36719/371472 [2:57:49<26:32:11, 3.50it/s] 10%|▉ | 36720/371472 [2:57:50<29:22:02, 3.17it/s] {'loss': 4.5454, 'learning_rate': 9.114719127755392e-07, 'epoch': 1.58} + 10%|▉ | 36720/371472 [2:57:50<29:22:02, 3.17it/s] 10%|▉ | 36721/371472 [2:57:50<29:01:10, 3.20it/s] 10%|▉ | 36722/371472 [2:57:50<28:31:32, 3.26it/s] 10%|▉ | 36723/371472 [2:57:50<27:10:11, 3.42it/s] 10%|▉ | 36724/371472 [2:57:51<26:42:00, 3.48it/s] 10%|▉ | 36725/371472 [2:57:51<25:31:55, 3.64it/s] 10%|▉ | 36726/371472 [2:57:51<25:54:26, 3.59it/s] 10%|▉ | 36727/371472 [2:57:51<25:20:02, 3.67it/s] 10%|▉ | 36728/371472 [2:57:52<25:08:07, 3.70it/s] 10%|▉ | 36729/371472 [2:57:52<25:09:34, 3.70it/s] 10%|▉ | 36730/371472 [2:57:52<25:32:36, 3.64it/s] 10%|▉ | 36731/371472 [2:57:53<25:21:58, 3.67it/s] 10%|▉ | 36732/371472 [2:57:53<26:25:55, 3.52it/s] 10%|▉ | 36733/371472 [2:57:53<28:29:07, 3.26it/s] 10%|▉ | 36734/371472 [2:57:54<29:20:32, 3.17it/s] 10%|▉ | 36735/371472 [2:57:54<30:44:30, 3.02it/s] 10%|▉ | 36736/371472 [2:57:54<31:01:06, 3.00it/s] 10%|▉ | 36737/371472 [2:57:55<30:20:34, 3.06it/s] 10%|▉ | 36738/371472 [2:57:55<28:53:46, 3.22it/s] 10%|▉ | 36739/371472 [2:57:55<30:45:52, 3.02it/s] 10%|▉ | 36740/371472 [2:57:56<28:53:36, 3.22it/s] {'loss': 4.5105, 'learning_rate': 9.114234308000604e-07, 'epoch': 1.58} + 10%|▉ | 36740/371472 [2:57:56<28:53:36, 3.22it/s] 10%|▉ | 36741/371472 [2:57:56<28:50:48, 3.22it/s] 10%|▉ | 36742/371472 [2:57:56<28:02:45, 3.32it/s] 10%|▉ | 36743/371472 [2:57:56<26:44:09, 3.48it/s] 10%|▉ | 36744/371472 [2:57:57<26:39:06, 3.49it/s] 10%|▉ | 36745/371472 [2:57:57<26:51:18, 3.46it/s] 10%|▉ | 36746/371472 [2:57:57<26:01:57, 3.57it/s] 10%|▉ | 36747/371472 [2:57:57<25:29:29, 3.65it/s] 10%|▉ | 36748/371472 [2:57:58<24:53:50, 3.73it/s] 10%|▉ | 36749/371472 [2:57:58<27:01:31, 3.44it/s] 10%|▉ | 36750/371472 [2:57:58<25:36:52, 3.63it/s] 10%|▉ | 36751/371472 [2:57:59<26:28:03, 3.51it/s] 10%|▉ | 36752/371472 [2:57:59<26:55:25, 3.45it/s] 10%|▉ | 36753/371472 [2:57:59<27:10:34, 3.42it/s] 10%|▉ | 36754/371472 [2:57:59<26:31:55, 3.50it/s] 10%|▉ | 36755/371472 [2:58:00<32:59:18, 2.82it/s] 10%|▉ | 36756/371472 [2:58:00<30:13:01, 3.08it/s] 10%|▉ | 36757/371472 [2:58:01<29:11:26, 3.19it/s] 10%|▉ | 36758/371472 [2:58:01<27:54:15, 3.33it/s] 10%|▉ | 36759/371472 [2:58:01<28:27:38, 3.27it/s] 10%|▉ | 36760/371472 [2:58:01<28:39:22, 3.24it/s] {'loss': 4.5245, 'learning_rate': 9.113749488245814e-07, 'epoch': 1.58} + 10%|▉ | 36760/371472 [2:58:01<28:39:22, 3.24it/s] 10%|▉ | 36761/371472 [2:58:02<28:04:41, 3.31it/s] 10%|▉ | 36762/371472 [2:58:02<28:42:17, 3.24it/s] 10%|▉ | 36763/371472 [2:58:02<28:13:42, 3.29it/s] 10%|▉ | 36764/371472 [2:58:03<28:44:29, 3.23it/s] 10%|▉ | 36765/371472 [2:58:03<28:05:51, 3.31it/s] 10%|▉ | 36766/371472 [2:58:03<27:43:36, 3.35it/s] 10%|▉ | 36767/371472 [2:58:04<29:00:36, 3.20it/s] 10%|▉ | 36768/371472 [2:58:04<27:48:36, 3.34it/s] 10%|▉ | 36769/371472 [2:58:04<26:47:03, 3.47it/s] 10%|▉ | 36770/371472 [2:58:04<26:15:47, 3.54it/s] 10%|▉ | 36771/371472 [2:58:05<29:07:43, 3.19it/s] 10%|▉ | 36772/371472 [2:58:05<28:35:34, 3.25it/s] 10%|▉ | 36773/371472 [2:58:05<29:47:23, 3.12it/s] 10%|▉ | 36774/371472 [2:58:06<28:35:52, 3.25it/s] 10%|▉ | 36775/371472 [2:58:06<27:26:30, 3.39it/s] 10%|▉ | 36776/371472 [2:58:06<26:35:50, 3.50it/s] 10%|▉ | 36777/371472 [2:58:06<25:50:36, 3.60it/s] 10%|▉ | 36778/371472 [2:58:07<25:47:10, 3.61it/s] 10%|▉ | 36779/371472 [2:58:07<25:02:07, 3.71it/s] 10%|▉ | 36780/371472 [2:58:07<26:19:22, 3.53it/s] {'loss': 4.3384, 'learning_rate': 9.113264668491025e-07, 'epoch': 1.58} + 10%|▉ | 36780/371472 [2:58:07<26:19:22, 3.53it/s] 10%|▉ | 36781/371472 [2:58:08<26:43:16, 3.48it/s] 10%|▉ | 36782/371472 [2:58:08<26:32:29, 3.50it/s] 10%|▉ | 36783/371472 [2:58:08<26:44:40, 3.48it/s] 10%|▉ | 36784/371472 [2:58:08<26:53:04, 3.46it/s] 10%|▉ | 36785/371472 [2:58:09<26:30:42, 3.51it/s] 10%|▉ | 36786/371472 [2:58:09<26:20:03, 3.53it/s] 10%|▉ | 36787/371472 [2:58:09<27:30:54, 3.38it/s] 10%|▉ | 36788/371472 [2:58:10<26:53:22, 3.46it/s] 10%|▉ | 36789/371472 [2:58:10<25:47:28, 3.60it/s] 10%|▉ | 36790/371472 [2:58:10<26:27:12, 3.51it/s] 10%|▉ | 36791/371472 [2:58:10<27:25:58, 3.39it/s] 10%|▉ | 36792/371472 [2:58:11<26:01:35, 3.57it/s] 10%|▉ | 36793/371472 [2:58:11<27:01:44, 3.44it/s] 10%|▉ | 36794/371472 [2:58:11<26:45:58, 3.47it/s] 10%|▉ | 36795/371472 [2:58:12<26:38:10, 3.49it/s] 10%|▉ | 36796/371472 [2:58:12<26:34:42, 3.50it/s] 10%|▉ | 36797/371472 [2:58:12<26:55:09, 3.45it/s] 10%|▉ | 36798/371472 [2:58:12<25:48:38, 3.60it/s] 10%|▉ | 36799/371472 [2:58:13<25:18:19, 3.67it/s] 10%|▉ | 36800/371472 [2:58:13<25:00:11, 3.72it/s] {'loss': 4.6087, 'learning_rate': 9.112779848736236e-07, 'epoch': 1.59} + 10%|▉ | 36800/371472 [2:58:13<25:00:11, 3.72it/s] 10%|▉ | 36801/371472 [2:58:13<26:30:10, 3.51it/s] 10%|▉ | 36802/371472 [2:58:14<25:28:18, 3.65it/s] 10%|▉ | 36803/371472 [2:58:14<25:29:56, 3.65it/s] 10%|▉ | 36804/371472 [2:58:14<26:02:20, 3.57it/s] 10%|▉ | 36805/371472 [2:58:14<27:09:13, 3.42it/s] 10%|▉ | 36806/371472 [2:58:15<26:32:45, 3.50it/s] 10%|▉ | 36807/371472 [2:58:15<28:33:29, 3.26it/s] 10%|▉ | 36808/371472 [2:58:15<28:06:58, 3.31it/s] 10%|▉ | 36809/371472 [2:58:16<30:58:41, 3.00it/s] 10%|▉ | 36810/371472 [2:58:16<30:11:38, 3.08it/s] 10%|▉ | 36811/371472 [2:58:16<29:47:05, 3.12it/s] 10%|▉ | 36812/371472 [2:58:17<27:59:31, 3.32it/s] 10%|▉ | 36813/371472 [2:58:17<26:27:08, 3.51it/s] 10%|▉ | 36814/371472 [2:58:17<25:26:42, 3.65it/s] 10%|▉ | 36815/371472 [2:58:17<25:07:40, 3.70it/s] 10%|▉ | 36816/371472 [2:58:18<25:15:26, 3.68it/s] 10%|▉ | 36817/371472 [2:58:18<25:24:49, 3.66it/s] 10%|▉ | 36818/371472 [2:58:18<25:34:34, 3.63it/s] 10%|▉ | 36819/371472 [2:58:18<25:17:04, 3.68it/s] 10%|▉ | 36820/371472 [2:58:19<25:07:33, 3.70it/s] {'loss': 4.4014, 'learning_rate': 9.112295028981446e-07, 'epoch': 1.59} + 10%|▉ | 36820/371472 [2:58:19<25:07:33, 3.70it/s] 10%|▉ | 36821/371472 [2:58:19<25:29:37, 3.65it/s] 10%|▉ | 36822/371472 [2:58:19<24:45:27, 3.75it/s] 10%|▉ | 36823/371472 [2:58:20<24:35:52, 3.78it/s] 10%|▉ | 36824/371472 [2:58:20<24:30:34, 3.79it/s] 10%|▉ | 36825/371472 [2:58:20<27:05:15, 3.43it/s] 10%|▉ | 36826/371472 [2:58:20<26:43:20, 3.48it/s] 10%|▉ | 36827/371472 [2:58:21<26:05:48, 3.56it/s] 10%|▉ | 36828/371472 [2:58:21<26:51:04, 3.46it/s] 10%|▉ | 36829/371472 [2:58:21<26:30:17, 3.51it/s] 10%|▉ | 36830/371472 [2:58:22<27:08:52, 3.42it/s] 10%|▉ | 36831/371472 [2:58:22<30:10:12, 3.08it/s] 10%|▉ | 36832/371472 [2:58:22<28:08:36, 3.30it/s] 10%|▉ | 36833/371472 [2:58:23<27:36:01, 3.37it/s] 10%|▉ | 36834/371472 [2:58:23<28:51:34, 3.22it/s] 10%|▉ | 36835/371472 [2:58:23<28:40:28, 3.24it/s] 10%|▉ | 36836/371472 [2:58:23<28:51:00, 3.22it/s] 10%|▉ | 36837/371472 [2:58:24<27:57:45, 3.32it/s] 10%|▉ | 36838/371472 [2:58:24<27:45:03, 3.35it/s] 10%|▉ | 36839/371472 [2:58:24<26:44:15, 3.48it/s] 10%|▉ | 36840/371472 [2:58:25<27:51:17, 3.34it/s] {'loss': 4.4017, 'learning_rate': 9.111810209226658e-07, 'epoch': 1.59} + 10%|▉ | 36840/371472 [2:58:25<27:51:17, 3.34it/s] 10%|▉ | 36841/371472 [2:58:25<26:57:05, 3.45it/s] 10%|▉ | 36842/371472 [2:58:25<27:36:16, 3.37it/s] 10%|▉ | 36843/371472 [2:58:26<28:30:01, 3.26it/s] 10%|▉ | 36844/371472 [2:58:26<28:22:14, 3.28it/s] 10%|▉ | 36845/371472 [2:58:26<27:24:21, 3.39it/s] 10%|▉ | 36846/371472 [2:58:26<27:28:51, 3.38it/s] 10%|▉ | 36847/371472 [2:58:27<26:38:01, 3.49it/s] 10%|▉ | 36848/371472 [2:58:27<26:48:17, 3.47it/s] 10%|▉ | 36849/371472 [2:58:27<26:23:58, 3.52it/s] 10%|▉ | 36850/371472 [2:58:28<25:53:38, 3.59it/s] 10%|▉ | 36851/371472 [2:58:28<25:12:45, 3.69it/s] 10%|▉ | 36852/371472 [2:58:28<27:10:24, 3.42it/s] 10%|▉ | 36853/371472 [2:58:28<28:42:21, 3.24it/s] 10%|▉ | 36854/371472 [2:58:29<29:07:41, 3.19it/s] 10%|▉ | 36855/371472 [2:58:29<27:57:22, 3.32it/s] 10%|▉ | 36856/371472 [2:58:29<27:17:43, 3.41it/s] 10%|▉ | 36857/371472 [2:58:30<28:15:25, 3.29it/s] 10%|▉ | 36858/371472 [2:58:30<27:32:19, 3.38it/s] 10%|▉ | 36859/371472 [2:58:30<26:32:59, 3.50it/s] 10%|▉ | 36860/371472 [2:58:30<26:14:08, 3.54it/s] {'loss': 4.4921, 'learning_rate': 9.11132538947187e-07, 'epoch': 1.59} + 10%|▉ | 36860/371472 [2:58:30<26:14:08, 3.54it/s] 10%|▉ | 36861/371472 [2:58:31<25:35:17, 3.63it/s] 10%|▉ | 36862/371472 [2:58:31<25:11:21, 3.69it/s] 10%|▉ | 36863/371472 [2:58:31<25:47:13, 3.60it/s] 10%|▉ | 36864/371472 [2:58:32<28:07:03, 3.31it/s] 10%|▉ | 36865/371472 [2:58:32<27:27:32, 3.38it/s] 10%|▉ | 36866/371472 [2:58:32<26:19:28, 3.53it/s] 10%|▉ | 36867/371472 [2:58:32<25:56:46, 3.58it/s] 10%|▉ | 36868/371472 [2:58:33<25:02:57, 3.71it/s] 10%|▉ | 36869/371472 [2:58:33<25:01:50, 3.71it/s] 10%|▉ | 36870/371472 [2:58:33<25:16:25, 3.68it/s] 10%|▉ | 36871/371472 [2:58:34<25:07:27, 3.70it/s] 10%|▉ | 36872/371472 [2:58:34<25:32:20, 3.64it/s] 10%|▉ | 36873/371472 [2:58:34<27:54:31, 3.33it/s] 10%|▉ | 36874/371472 [2:58:34<26:41:11, 3.48it/s] 10%|▉ | 36875/371472 [2:58:35<26:54:04, 3.45it/s] 10%|▉ | 36876/371472 [2:58:35<28:21:46, 3.28it/s] 10%|▉ | 36877/371472 [2:58:35<27:58:57, 3.32it/s] 10%|▉ | 36878/371472 [2:58:36<26:47:13, 3.47it/s] 10%|▉ | 36879/371472 [2:58:36<27:16:07, 3.41it/s] 10%|▉ | 36880/371472 [2:58:36<27:08:22, 3.42it/s] {'loss': 4.3694, 'learning_rate': 9.110840569717081e-07, 'epoch': 1.59} + 10%|▉ | 36880/371472 [2:58:36<27:08:22, 3.42it/s] 10%|▉ | 36881/371472 [2:58:37<27:21:29, 3.40it/s] 10%|▉ | 36882/371472 [2:58:37<27:44:20, 3.35it/s] 10%|▉ | 36883/371472 [2:58:37<27:24:52, 3.39it/s] 10%|▉ | 36884/371472 [2:58:37<26:53:38, 3.46it/s] 10%|▉ | 36885/371472 [2:58:38<27:01:28, 3.44it/s] 10%|▉ | 36886/371472 [2:58:38<26:19:29, 3.53it/s] 10%|▉ | 36887/371472 [2:58:38<27:06:53, 3.43it/s] 10%|▉ | 36888/371472 [2:58:39<27:14:11, 3.41it/s] 10%|▉ | 36889/371472 [2:58:39<26:52:51, 3.46it/s] 10%|▉ | 36890/371472 [2:58:39<29:41:37, 3.13it/s] 10%|▉ | 36891/371472 [2:58:40<29:12:58, 3.18it/s] 10%|▉ | 36892/371472 [2:58:40<27:54:50, 3.33it/s] 10%|▉ | 36893/371472 [2:58:40<27:30:24, 3.38it/s] 10%|▉ | 36894/371472 [2:58:40<28:48:02, 3.23it/s] 10%|▉ | 36895/371472 [2:58:41<28:06:26, 3.31it/s] 10%|▉ | 36896/371472 [2:58:41<27:35:24, 3.37it/s] 10%|▉ | 36897/371472 [2:58:41<27:57:18, 3.32it/s] 10%|▉ | 36898/371472 [2:58:42<27:21:17, 3.40it/s] 10%|▉ | 36899/371472 [2:58:42<26:12:33, 3.55it/s] 10%|▉ | 36900/371472 [2:58:42<29:07:12, 3.19it/s] {'loss': 4.6063, 'learning_rate': 9.110355749962291e-07, 'epoch': 1.59} + 10%|▉ | 36900/371472 [2:58:42<29:07:12, 3.19it/s] 10%|▉ | 36901/371472 [2:58:42<27:47:02, 3.34it/s] 10%|▉ | 36902/371472 [2:58:43<26:49:54, 3.46it/s] 10%|▉ | 36903/371472 [2:58:43<26:27:22, 3.51it/s] 10%|▉ | 36904/371472 [2:58:43<27:17:13, 3.41it/s] 10%|▉ | 36905/371472 [2:58:44<26:08:26, 3.56it/s] 10%|▉ | 36906/371472 [2:58:44<25:52:07, 3.59it/s] 10%|▉ | 36907/371472 [2:58:44<25:47:29, 3.60it/s] 10%|▉ | 36908/371472 [2:58:44<26:22:41, 3.52it/s] 10%|▉ | 36909/371472 [2:58:45<25:59:07, 3.58it/s] 10%|▉ | 36910/371472 [2:58:45<26:42:39, 3.48it/s] 10%|▉ | 36911/371472 [2:58:45<26:07:15, 3.56it/s] 10%|▉ | 36912/371472 [2:58:46<25:46:33, 3.61it/s] 10%|▉ | 36913/371472 [2:58:46<27:01:22, 3.44it/s] 10%|▉ | 36914/371472 [2:58:46<27:11:20, 3.42it/s] 10%|▉ | 36915/371472 [2:58:46<26:32:01, 3.50it/s] 10%|▉ | 36916/371472 [2:58:47<26:17:35, 3.53it/s] 10%|▉ | 36917/371472 [2:58:47<26:34:11, 3.50it/s] 10%|▉ | 36918/371472 [2:58:47<27:38:59, 3.36it/s] 10%|▉ | 36919/371472 [2:58:48<28:03:04, 3.31it/s] 10%|▉ | 36920/371472 [2:58:48<26:28:35, 3.51it/s] {'loss': 4.4014, 'learning_rate': 9.109870930207502e-07, 'epoch': 1.59} + 10%|▉ | 36920/371472 [2:58:48<26:28:35, 3.51it/s] 10%|▉ | 36921/371472 [2:58:48<26:23:14, 3.52it/s] 10%|▉ | 36922/371472 [2:58:49<27:55:36, 3.33it/s] 10%|▉ | 36923/371472 [2:58:49<26:24:37, 3.52it/s] 10%|▉ | 36924/371472 [2:58:49<26:02:41, 3.57it/s] 10%|▉ | 36925/371472 [2:58:49<25:37:36, 3.63it/s] 10%|▉ | 36926/371472 [2:58:50<25:30:32, 3.64it/s] 10%|▉ | 36927/371472 [2:58:50<25:56:35, 3.58it/s] 10%|▉ | 36928/371472 [2:58:50<26:15:34, 3.54it/s] 10%|▉ | 36929/371472 [2:58:50<25:54:44, 3.59it/s] 10%|▉ | 36930/371472 [2:58:51<27:40:21, 3.36it/s] 10%|▉ | 36931/371472 [2:58:51<27:13:51, 3.41it/s] 10%|▉ | 36932/371472 [2:58:51<26:24:27, 3.52it/s] 10%|▉ | 36933/371472 [2:58:52<26:20:39, 3.53it/s] 10%|▉ | 36934/371472 [2:58:52<27:48:36, 3.34it/s] 10%|▉ | 36935/371472 [2:58:52<27:31:56, 3.38it/s] 10%|▉ | 36936/371472 [2:58:52<26:29:37, 3.51it/s] 10%|▉ | 36937/371472 [2:58:53<26:31:20, 3.50it/s] 10%|▉ | 36938/371472 [2:58:53<27:58:26, 3.32it/s] 10%|▉ | 36939/371472 [2:58:53<27:36:28, 3.37it/s] 10%|▉ | 36940/371472 [2:58:54<26:55:48, 3.45it/s] {'loss': 4.6915, 'learning_rate': 9.109386110452713e-07, 'epoch': 1.59} + 10%|▉ | 36940/371472 [2:58:54<26:55:48, 3.45it/s] 10%|▉ | 36941/371472 [2:58:54<26:27:40, 3.51it/s] 10%|▉ | 36942/371472 [2:58:54<25:42:28, 3.61it/s] 10%|▉ | 36943/371472 [2:58:54<25:22:37, 3.66it/s] 10%|▉ | 36944/371472 [2:58:55<26:36:56, 3.49it/s] 10%|▉ | 36945/371472 [2:58:55<26:00:42, 3.57it/s] 10%|▉ | 36946/371472 [2:58:55<27:35:44, 3.37it/s] 10%|▉ | 36947/371472 [2:58:56<27:08:07, 3.42it/s] 10%|▉ | 36948/371472 [2:58:56<26:25:06, 3.52it/s] 10%|▉ | 36949/371472 [2:58:56<25:55:41, 3.58it/s] 10%|▉ | 36950/371472 [2:58:56<26:51:03, 3.46it/s] 10%|▉ | 36951/371472 [2:58:57<27:21:24, 3.40it/s] 10%|▉ | 36952/371472 [2:58:57<26:42:50, 3.48it/s] 10%|▉ | 36953/371472 [2:58:57<27:28:49, 3.38it/s] 10%|▉ | 36954/371472 [2:58:58<28:01:45, 3.32it/s] 10%|▉ | 36955/371472 [2:58:58<26:59:35, 3.44it/s] 10%|▉ | 36956/371472 [2:58:58<25:54:12, 3.59it/s] 10%|▉ | 36957/371472 [2:58:58<25:39:02, 3.62it/s] 10%|▉ | 36958/371472 [2:58:59<24:41:34, 3.76it/s] 10%|▉ | 36959/371472 [2:58:59<24:23:34, 3.81it/s] 10%|▉ | 36960/371472 [2:58:59<24:21:13, 3.82it/s] {'loss': 4.4906, 'learning_rate': 9.108901290697925e-07, 'epoch': 1.59} + 10%|▉ | 36960/371472 [2:58:59<24:21:13, 3.82it/s] 10%|▉ | 36961/371472 [2:58:59<24:10:19, 3.84it/s] 10%|▉ | 36962/371472 [2:59:00<25:44:20, 3.61it/s] 10%|▉ | 36963/371472 [2:59:00<25:45:28, 3.61it/s] 10%|▉ | 36964/371472 [2:59:00<26:23:41, 3.52it/s] 10%|▉ | 36965/371472 [2:59:01<26:21:44, 3.52it/s] 10%|▉ | 36966/371472 [2:59:01<26:03:25, 3.57it/s] 10%|▉ | 36967/371472 [2:59:01<25:19:48, 3.67it/s] 10%|▉ | 36968/371472 [2:59:02<26:54:50, 3.45it/s] 10%|▉ | 36969/371472 [2:59:02<29:14:27, 3.18it/s] 10%|▉ | 36970/371472 [2:59:02<28:20:39, 3.28it/s] 10%|▉ | 36971/371472 [2:59:02<27:39:17, 3.36it/s] 10%|▉ | 36972/371472 [2:59:03<29:01:36, 3.20it/s] 10%|▉ | 36973/371472 [2:59:03<28:40:30, 3.24it/s] 10%|▉ | 36974/371472 [2:59:03<29:55:18, 3.11it/s] 10%|▉ | 36975/371472 [2:59:04<28:30:44, 3.26it/s] 10%|▉ | 36976/371472 [2:59:04<28:01:24, 3.32it/s] 10%|▉ | 36977/371472 [2:59:04<28:20:53, 3.28it/s] 10%|▉ | 36978/371472 [2:59:05<26:59:01, 3.44it/s] 10%|▉ | 36979/371472 [2:59:05<26:43:25, 3.48it/s] 10%|▉ | 36980/371472 [2:59:05<26:21:28, 3.53it/s] {'loss': 4.5105, 'learning_rate': 9.108416470943135e-07, 'epoch': 1.59} + 10%|▉ | 36980/371472 [2:59:05<26:21:28, 3.53it/s] 10%|▉ | 36981/371472 [2:59:05<27:22:12, 3.39it/s] 10%|▉ | 36982/371472 [2:59:06<26:43:34, 3.48it/s] 10%|▉ | 36983/371472 [2:59:06<26:24:35, 3.52it/s] 10%|▉ | 36984/371472 [2:59:06<26:03:55, 3.56it/s] 10%|▉ | 36985/371472 [2:59:07<26:37:56, 3.49it/s] 10%|▉ | 36986/371472 [2:59:07<26:46:51, 3.47it/s] 10%|▉ | 36987/371472 [2:59:07<31:42:18, 2.93it/s] 10%|▉ | 36988/371472 [2:59:08<30:03:29, 3.09it/s] 10%|▉ | 36989/371472 [2:59:08<28:12:05, 3.29it/s] 10%|▉ | 36990/371472 [2:59:08<26:42:49, 3.48it/s] 10%|▉ | 36991/371472 [2:59:08<26:07:09, 3.56it/s] 10%|▉ | 36992/371472 [2:59:09<26:56:14, 3.45it/s] 10%|▉ | 36993/371472 [2:59:09<26:10:22, 3.55it/s] 10%|▉ | 36994/371472 [2:59:09<26:12:07, 3.55it/s] 10%|▉ | 36995/371472 [2:59:10<27:15:44, 3.41it/s] 10%|▉ | 36996/371472 [2:59:10<26:28:44, 3.51it/s] 10%|▉ | 36997/371472 [2:59:10<26:45:07, 3.47it/s] 10%|▉ | 36998/371472 [2:59:10<26:22:50, 3.52it/s] 10%|▉ | 36999/371472 [2:59:11<25:27:54, 3.65it/s] 10%|▉ | 37000/371472 [2:59:11<26:59:12, 3.44it/s] {'loss': 4.4614, 'learning_rate': 9.107931651188347e-07, 'epoch': 1.59} + 10%|▉ | 37000/371472 [2:59:11<26:59:12, 3.44it/s] 10%|▉ | 37001/371472 [2:59:11<27:03:24, 3.43it/s] 10%|▉ | 37002/371472 [2:59:12<28:14:47, 3.29it/s] 10%|▉ | 37003/371472 [2:59:12<27:56:48, 3.32it/s] 10%|▉ | 37004/371472 [2:59:12<27:05:39, 3.43it/s] 10%|▉ | 37005/371472 [2:59:12<26:54:06, 3.45it/s] 10%|▉ | 37006/371472 [2:59:13<27:02:06, 3.44it/s] 10%|▉ | 37007/371472 [2:59:13<26:38:39, 3.49it/s] 10%|▉ | 37008/371472 [2:59:13<26:25:43, 3.52it/s] 10%|▉ | 37009/371472 [2:59:14<25:32:42, 3.64it/s] 10%|▉ | 37010/371472 [2:59:14<25:39:41, 3.62it/s] 10%|▉ | 37011/371472 [2:59:14<25:08:31, 3.70it/s] 10%|▉ | 37012/371472 [2:59:14<25:53:50, 3.59it/s] 10%|▉ | 37013/371472 [2:59:15<25:05:13, 3.70it/s] 10%|▉ | 37014/371472 [2:59:15<25:59:38, 3.57it/s] 10%|▉ | 37015/371472 [2:59:15<26:17:18, 3.53it/s] 10%|▉ | 37016/371472 [2:59:16<26:13:43, 3.54it/s] 10%|▉ | 37017/371472 [2:59:16<26:15:53, 3.54it/s] 10%|▉ | 37018/371472 [2:59:16<27:17:29, 3.40it/s] 10%|▉ | 37019/371472 [2:59:16<27:45:19, 3.35it/s] 10%|▉ | 37020/371472 [2:59:17<27:53:40, 3.33it/s] {'loss': 4.298, 'learning_rate': 9.107446831433558e-07, 'epoch': 1.59} + 10%|▉ | 37020/371472 [2:59:17<27:53:40, 3.33it/s] 10%|▉ | 37021/371472 [2:59:17<27:40:15, 3.36it/s] 10%|▉ | 37022/371472 [2:59:17<26:22:06, 3.52it/s] 10%|▉ | 37023/371472 [2:59:18<25:58:10, 3.58it/s] 10%|▉ | 37024/371472 [2:59:18<28:16:42, 3.29it/s] 10%|▉ | 37025/371472 [2:59:18<27:01:42, 3.44it/s] 10%|▉ | 37026/371472 [2:59:18<26:19:52, 3.53it/s] 10%|▉ | 37027/371472 [2:59:19<26:16:35, 3.54it/s] 10%|▉ | 37028/371472 [2:59:19<25:38:23, 3.62it/s] 10%|▉ | 37029/371472 [2:59:19<26:01:40, 3.57it/s] 10%|▉ | 37030/371472 [2:59:20<27:01:56, 3.44it/s] 10%|▉ | 37031/371472 [2:59:20<26:39:50, 3.48it/s] 10%|▉ | 37032/371472 [2:59:20<25:56:04, 3.58it/s] 10%|▉ | 37033/371472 [2:59:20<25:44:52, 3.61it/s] 10%|▉ | 37034/371472 [2:59:21<25:26:49, 3.65it/s] 10%|▉ | 37035/371472 [2:59:21<25:06:51, 3.70it/s] 10%|▉ | 37036/371472 [2:59:21<25:13:28, 3.68it/s] 10%|▉ | 37037/371472 [2:59:21<25:42:56, 3.61it/s] 10%|▉ | 37038/371472 [2:59:22<25:32:30, 3.64it/s] 10%|▉ | 37039/371472 [2:59:22<25:17:23, 3.67it/s] 10%|▉ | 37040/371472 [2:59:22<25:56:49, 3.58it/s] {'loss': 4.363, 'learning_rate': 9.106962011678769e-07, 'epoch': 1.6} + 10%|▉ | 37040/371472 [2:59:22<25:56:49, 3.58it/s] 10%|▉ | 37041/371472 [2:59:23<25:40:53, 3.62it/s] 10%|▉ | 37042/371472 [2:59:23<25:25:41, 3.65it/s] 10%|▉ | 37043/371472 [2:59:23<26:08:49, 3.55it/s] 10%|▉ | 37044/371472 [2:59:23<26:32:21, 3.50it/s] 10%|▉ | 37045/371472 [2:59:24<25:56:10, 3.58it/s] 10%|▉ | 37046/371472 [2:59:24<27:22:29, 3.39it/s] 10%|▉ | 37047/371472 [2:59:24<26:18:39, 3.53it/s] 10%|▉ | 37048/371472 [2:59:25<26:10:28, 3.55it/s] 10%|▉ | 37049/371472 [2:59:25<25:49:35, 3.60it/s] 10%|▉ | 37050/371472 [2:59:25<26:18:30, 3.53it/s] 10%|▉ | 37051/371472 [2:59:25<26:35:42, 3.49it/s] 10%|▉ | 37052/371472 [2:59:26<27:19:55, 3.40it/s] 10%|▉ | 37053/371472 [2:59:26<27:15:07, 3.41it/s] 10%|▉ | 37054/371472 [2:59:26<27:22:23, 3.39it/s] 10%|▉ | 37055/371472 [2:59:27<26:13:28, 3.54it/s] 10%|▉ | 37056/371472 [2:59:27<26:50:23, 3.46it/s] 10%|▉ | 37057/371472 [2:59:27<25:55:27, 3.58it/s] 10%|▉ | 37058/371472 [2:59:27<26:14:21, 3.54it/s] 10%|▉ | 37059/371472 [2:59:28<25:17:49, 3.67it/s] 10%|▉ | 37060/371472 [2:59:28<27:53:15, 3.33it/s] {'loss': 4.2414, 'learning_rate': 9.106477191923979e-07, 'epoch': 1.6} + 10%|▉ | 37060/371472 [2:59:28<27:53:15, 3.33it/s] 10%|▉ | 37061/371472 [2:59:28<27:32:06, 3.37it/s] 10%|▉ | 37062/371472 [2:59:29<27:24:14, 3.39it/s] 10%|▉ | 37063/371472 [2:59:29<26:26:35, 3.51it/s] 10%|▉ | 37064/371472 [2:59:29<25:12:59, 3.68it/s] 10%|▉ | 37065/371472 [2:59:30<27:27:51, 3.38it/s] 10%|▉ | 37066/371472 [2:59:30<26:33:49, 3.50it/s] 10%|▉ | 37067/371472 [2:59:30<26:11:00, 3.55it/s] 10%|▉ | 37068/371472 [2:59:30<25:55:57, 3.58it/s] 10%|▉ | 37069/371472 [2:59:31<25:27:49, 3.65it/s] 10%|▉ | 37070/371472 [2:59:31<25:18:42, 3.67it/s] 10%|▉ | 37071/371472 [2:59:31<25:28:06, 3.65it/s] 10%|▉ | 37072/371472 [2:59:31<24:36:50, 3.77it/s] 10%|▉ | 37073/371472 [2:59:32<24:45:03, 3.75it/s] 10%|▉ | 37074/371472 [2:59:32<25:18:47, 3.67it/s] 10%|▉ | 37075/371472 [2:59:32<24:30:54, 3.79it/s] 10%|▉ | 37076/371472 [2:59:32<25:21:46, 3.66it/s] 10%|▉ | 37077/371472 [2:59:33<25:13:31, 3.68it/s] 10%|▉ | 37078/371472 [2:59:33<24:22:58, 3.81it/s] 10%|▉ | 37079/371472 [2:59:33<26:01:23, 3.57it/s] 10%|▉ | 37080/371472 [2:59:34<25:25:52, 3.65it/s] {'loss': 4.5554, 'learning_rate': 9.105992372169191e-07, 'epoch': 1.6} + 10%|▉ | 37080/371472 [2:59:34<25:25:52, 3.65it/s] 10%|▉ | 37081/371472 [2:59:34<25:26:00, 3.65it/s] 10%|▉ | 37082/371472 [2:59:34<27:32:19, 3.37it/s] 10%|▉ | 37083/371472 [2:59:35<29:38:15, 3.13it/s] 10%|▉ | 37084/371472 [2:59:35<29:08:29, 3.19it/s] 10%|▉ | 37085/371472 [2:59:35<27:42:31, 3.35it/s] 10%|▉ | 37086/371472 [2:59:35<26:46:37, 3.47it/s] 10%|▉ | 37087/371472 [2:59:36<27:01:32, 3.44it/s] 10%|▉ | 37088/371472 [2:59:36<28:11:50, 3.29it/s] 10%|▉ | 37089/371472 [2:59:36<27:56:37, 3.32it/s] 10%|▉ | 37090/371472 [2:59:37<27:49:18, 3.34it/s] 10%|▉ | 37091/371472 [2:59:37<27:30:43, 3.38it/s] 10%|▉ | 37092/371472 [2:59:37<27:16:29, 3.41it/s] 10%|▉ | 37093/371472 [2:59:37<26:59:48, 3.44it/s] 10%|▉ | 37094/371472 [2:59:38<27:21:08, 3.40it/s] 10%|▉ | 37095/371472 [2:59:38<26:21:21, 3.52it/s] 10%|▉ | 37096/371472 [2:59:38<26:40:06, 3.48it/s] 10%|▉ | 37097/371472 [2:59:39<27:24:46, 3.39it/s] 10%|▉ | 37098/371472 [2:59:39<26:36:24, 3.49it/s] 10%|▉ | 37099/371472 [2:59:39<27:07:23, 3.42it/s] 10%|▉ | 37100/371472 [2:59:39<26:23:57, 3.52it/s] {'loss': 4.3394, 'learning_rate': 9.105507552414402e-07, 'epoch': 1.6} + 10%|▉ | 37100/371472 [2:59:39<26:23:57, 3.52it/s] 10%|▉ | 37101/371472 [2:59:40<26:55:47, 3.45it/s] 10%|▉ | 37102/371472 [2:59:40<25:44:12, 3.61it/s] 10%|▉ | 37103/371472 [2:59:40<26:21:11, 3.52it/s] 10%|▉ | 37104/371472 [2:59:41<26:37:48, 3.49it/s] 10%|▉ | 37105/371472 [2:59:41<26:06:33, 3.56it/s] 10%|▉ | 37106/371472 [2:59:41<25:00:34, 3.71it/s] 10%|▉ | 37107/371472 [2:59:41<24:32:32, 3.78it/s] 10%|▉ | 37108/371472 [2:59:42<26:19:34, 3.53it/s] 10%|▉ | 37109/371472 [2:59:42<26:49:18, 3.46it/s] 10%|▉ | 37110/371472 [2:59:42<27:17:35, 3.40it/s] 10%|▉ | 37111/371472 [2:59:43<26:22:17, 3.52it/s] 10%|▉ | 37112/371472 [2:59:43<26:17:10, 3.53it/s] 10%|▉ | 37113/371472 [2:59:43<25:57:27, 3.58it/s] 10%|▉ | 37114/371472 [2:59:43<26:17:54, 3.53it/s] 10%|▉ | 37115/371472 [2:59:44<25:48:53, 3.60it/s] 10%|▉ | 37116/371472 [2:59:44<25:07:00, 3.70it/s] 10%|▉ | 37117/371472 [2:59:44<25:36:45, 3.63it/s] 10%|▉ | 37118/371472 [2:59:45<26:01:21, 3.57it/s] 10%|▉ | 37119/371472 [2:59:45<25:02:31, 3.71it/s] 10%|▉ | 37120/371472 [2:59:45<25:56:21, 3.58it/s] {'loss': 4.4551, 'learning_rate': 9.105022732659612e-07, 'epoch': 1.6} + 10%|▉ | 37120/371472 [2:59:45<25:56:21, 3.58it/s] 10%|▉ | 37121/371472 [2:59:45<25:58:43, 3.58it/s] 10%|▉ | 37122/371472 [2:59:46<27:14:30, 3.41it/s] 10%|▉ | 37123/371472 [2:59:46<26:34:19, 3.50it/s] 10%|▉ | 37124/371472 [2:59:46<26:11:48, 3.55it/s] 10%|▉ | 37125/371472 [2:59:46<25:41:15, 3.62it/s] 10%|▉ | 37126/371472 [2:59:47<26:55:57, 3.45it/s] 10%|▉ | 37127/371472 [2:59:47<27:20:18, 3.40it/s] 10%|▉ | 37128/371472 [2:59:47<26:26:51, 3.51it/s] 10%|▉ | 37129/371472 [2:59:48<25:30:28, 3.64it/s] 10%|▉ | 37130/371472 [2:59:48<25:08:34, 3.69it/s] 10%|▉ | 37131/371472 [2:59:48<25:55:27, 3.58it/s] 10%|▉ | 37132/371472 [2:59:48<25:54:55, 3.58it/s] 10%|▉ | 37133/371472 [2:59:49<26:12:39, 3.54it/s] 10%|▉ | 37134/371472 [2:59:49<26:36:45, 3.49it/s] 10%|▉ | 37135/371472 [2:59:49<26:11:19, 3.55it/s] 10%|▉ | 37136/371472 [2:59:50<27:09:12, 3.42it/s] 10%|▉ | 37137/371472 [2:59:50<26:22:19, 3.52it/s] 10%|▉ | 37138/371472 [2:59:50<26:21:31, 3.52it/s] 10%|▉ | 37139/371472 [2:59:50<25:50:07, 3.59it/s] 10%|▉ | 37140/371472 [2:59:51<25:02:58, 3.71it/s] {'loss': 4.643, 'learning_rate': 9.104537912904824e-07, 'epoch': 1.6} + 10%|▉ | 37140/371472 [2:59:51<25:02:58, 3.71it/s] 10%|▉ | 37141/371472 [2:59:51<24:59:25, 3.72it/s] 10%|▉ | 37142/371472 [2:59:51<25:13:01, 3.68it/s] 10%|▉ | 37143/371472 [2:59:51<25:10:15, 3.69it/s] 10%|▉ | 37144/371472 [2:59:52<25:35:31, 3.63it/s] 10%|▉ | 37145/371472 [2:59:52<25:31:09, 3.64it/s] 10%|▉ | 37146/371472 [2:59:52<25:11:24, 3.69it/s] 10%|▉ | 37147/371472 [2:59:53<25:15:03, 3.68it/s] 10%|█ | 37148/371472 [2:59:53<25:05:34, 3.70it/s] 10%|█ | 37149/371472 [2:59:53<26:33:10, 3.50it/s] 10%|█ | 37150/371472 [2:59:53<26:34:06, 3.50it/s] 10%|█ | 37151/371472 [2:59:54<26:05:18, 3.56it/s] 10%|█ | 37152/371472 [2:59:54<26:03:09, 3.56it/s] 10%|█ | 37153/371472 [2:59:54<25:42:25, 3.61it/s] 10%|█ | 37154/371472 [2:59:55<28:57:40, 3.21it/s] 10%|█ | 37155/371472 [2:59:55<27:30:06, 3.38it/s] 10%|█ | 37156/371472 [2:59:55<27:33:21, 3.37it/s] 10%|█ | 37157/371472 [2:59:56<27:06:15, 3.43it/s] 10%|█ | 37158/371472 [2:59:56<26:33:30, 3.50it/s] 10%|█ | 37159/371472 [2:59:56<26:07:07, 3.56it/s] 10%|█ | 37160/371472 [2:59:56<25:16:33, 3.67it/s] {'loss': 4.2845, 'learning_rate': 9.104053093150035e-07, 'epoch': 1.6} + 10%|█ | 37160/371472 [2:59:56<25:16:33, 3.67it/s] 10%|█ | 37161/371472 [2:59:57<25:14:56, 3.68it/s] 10%|█ | 37162/371472 [2:59:57<24:57:48, 3.72it/s] 10%|█ | 37163/371472 [2:59:57<24:28:55, 3.79it/s] 10%|█ | 37164/371472 [2:59:57<25:50:43, 3.59it/s] 10%|█ | 37165/371472 [2:59:58<26:44:01, 3.47it/s] 10%|█ | 37166/371472 [2:59:58<26:00:43, 3.57it/s] 10%|█ | 37167/371472 [2:59:58<26:27:53, 3.51it/s] 10%|█ | 37168/371472 [2:59:59<26:49:05, 3.46it/s] 10%|█ | 37169/371472 [2:59:59<26:28:13, 3.51it/s] 10%|█ | 37170/371472 [2:59:59<25:58:13, 3.58it/s] 10%|█ | 37171/371472 [2:59:59<26:16:25, 3.53it/s] 10%|█ | 37172/371472 [3:00:00<27:00:15, 3.44it/s] 10%|█ | 37173/371472 [3:00:00<27:34:55, 3.37it/s] 10%|█ | 37174/371472 [3:00:00<28:54:37, 3.21it/s] 10%|█ | 37175/371472 [3:00:01<28:53:54, 3.21it/s] 10%|█ | 37176/371472 [3:00:01<28:56:56, 3.21it/s] 10%|█ | 37177/371472 [3:00:01<27:39:30, 3.36it/s] 10%|█ | 37178/371472 [3:00:02<26:41:57, 3.48it/s] 10%|█ | 37179/371472 [3:00:02<27:29:24, 3.38it/s] 10%|█ | 37180/371472 [3:00:02<27:31:48, 3.37it/s] {'loss': 4.526, 'learning_rate': 9.103568273395246e-07, 'epoch': 1.6} + 10%|█ | 37180/371472 [3:00:02<27:31:48, 3.37it/s] 10%|█ | 37181/371472 [3:00:02<26:41:01, 3.48it/s] 10%|█ | 37182/371472 [3:00:03<26:41:51, 3.48it/s] 10%|█ | 37183/371472 [3:00:03<26:16:14, 3.53it/s] 10%|█ | 37184/371472 [3:00:03<26:01:50, 3.57it/s] 10%|█ | 37185/371472 [3:00:03<25:13:03, 3.68it/s] 10%|█ | 37186/371472 [3:00:04<26:12:18, 3.54it/s] 10%|█ | 37187/371472 [3:00:04<27:19:17, 3.40it/s] 10%|█ | 37188/371472 [3:00:04<29:00:22, 3.20it/s] 10%|█ | 37189/371472 [3:00:05<29:36:15, 3.14it/s] 10%|█ | 37190/371472 [3:00:05<27:29:02, 3.38it/s] 10%|█ | 37191/371472 [3:00:05<26:49:12, 3.46it/s] 10%|█ | 37192/371472 [3:00:06<25:48:03, 3.60it/s] 10%|█ | 37193/371472 [3:00:06<25:32:11, 3.64it/s] 10%|█ | 37194/371472 [3:00:06<26:06:27, 3.56it/s] 10%|█ | 37195/371472 [3:00:06<25:56:03, 3.58it/s] 10%|█ | 37196/371472 [3:00:07<25:28:54, 3.64it/s] 10%|█ | 37197/371472 [3:00:07<26:20:11, 3.53it/s] 10%|█ | 37198/371472 [3:00:07<25:54:29, 3.58it/s] 10%|█ | 37199/371472 [3:00:08<26:47:29, 3.47it/s] 10%|█ | 37200/371472 [3:00:08<26:21:53, 3.52it/s] {'loss': 4.5398, 'learning_rate': 9.103083453640457e-07, 'epoch': 1.6} + 10%|█ | 37200/371472 [3:00:08<26:21:53, 3.52it/s] 10%|█ | 37201/371472 [3:00:08<25:56:11, 3.58it/s] 10%|█ | 37202/371472 [3:00:08<26:01:00, 3.57it/s] 10%|█ | 37203/371472 [3:00:09<27:15:35, 3.41it/s] 10%|█ | 37204/371472 [3:00:09<26:56:21, 3.45it/s] 10%|█ | 37205/371472 [3:00:09<26:44:26, 3.47it/s] 10%|█ | 37206/371472 [3:00:10<26:43:05, 3.48it/s] 10%|█ | 37207/371472 [3:00:10<26:44:24, 3.47it/s] 10%|█ | 37208/371472 [3:00:10<25:55:10, 3.58it/s] 10%|█ | 37209/371472 [3:00:10<26:21:22, 3.52it/s] 10%|█ | 37210/371472 [3:00:11<26:16:10, 3.53it/s] 10%|█ | 37211/371472 [3:00:11<27:48:12, 3.34it/s] 10%|█ | 37212/371472 [3:00:11<28:10:02, 3.30it/s] 10%|█ | 37213/371472 [3:00:12<27:05:35, 3.43it/s] 10%|█ | 37214/371472 [3:00:12<32:44:17, 2.84it/s] 10%|█ | 37215/371472 [3:00:12<30:09:51, 3.08it/s] 10%|█ | 37216/371472 [3:00:13<30:21:37, 3.06it/s] 10%|█ | 37217/371472 [3:00:13<29:27:59, 3.15it/s] 10%|█ | 37218/371472 [3:00:13<27:31:32, 3.37it/s] 10%|█ | 37219/371472 [3:00:14<28:38:33, 3.24it/s] 10%|█ | 37220/371472 [3:00:14<27:51:17, 3.33it/s] {'loss': 4.3236, 'learning_rate': 9.102598633885668e-07, 'epoch': 1.6} + 10%|█ | 37220/371472 [3:00:14<27:51:17, 3.33it/s] 10%|█ | 37221/371472 [3:00:14<28:43:59, 3.23it/s] 10%|█ | 37222/371472 [3:00:14<28:54:00, 3.21it/s] 10%|█ | 37223/371472 [3:00:15<28:25:43, 3.27it/s] 10%|█ | 37224/371472 [3:00:15<27:47:28, 3.34it/s] 10%|█ | 37225/371472 [3:00:15<29:10:56, 3.18it/s] 10%|█ | 37226/371472 [3:00:16<28:16:55, 3.28it/s] 10%|█ | 37227/371472 [3:00:16<29:17:29, 3.17it/s] 10%|█ | 37228/371472 [3:00:16<28:07:23, 3.30it/s] 10%|█ | 37229/371472 [3:00:17<27:25:22, 3.39it/s] 10%|█ | 37230/371472 [3:00:17<25:54:36, 3.58it/s] 10%|█ | 37231/371472 [3:00:17<25:59:41, 3.57it/s] 10%|█ | 37232/371472 [3:00:17<29:00:17, 3.20it/s] 10%|█ | 37233/371472 [3:00:18<28:13:43, 3.29it/s] 10%|█ | 37234/371472 [3:00:18<27:19:44, 3.40it/s] 10%|█ | 37235/371472 [3:00:18<26:25:20, 3.51it/s] 10%|█ | 37236/371472 [3:00:19<25:48:22, 3.60it/s] 10%|█ | 37237/371472 [3:00:19<26:21:23, 3.52it/s] 10%|█ | 37238/371472 [3:00:19<26:08:32, 3.55it/s] 10%|█ | 37239/371472 [3:00:19<25:10:54, 3.69it/s] 10%|█ | 37240/371472 [3:00:20<25:29:57, 3.64it/s] {'loss': 4.4828, 'learning_rate': 9.10211381413088e-07, 'epoch': 1.6} + 10%|█ | 37240/371472 [3:00:20<25:29:57, 3.64it/s] 10%|█ | 37241/371472 [3:00:20<25:20:42, 3.66it/s] 10%|█ | 37242/371472 [3:00:20<25:38:31, 3.62it/s] 10%|█ | 37243/371472 [3:00:21<25:39:23, 3.62it/s] 10%|█ | 37244/371472 [3:00:21<25:52:45, 3.59it/s] 10%|█ | 37245/371472 [3:00:21<27:59:24, 3.32it/s] 10%|█ | 37246/371472 [3:00:21<28:36:37, 3.25it/s] 10%|█ | 37247/371472 [3:00:22<27:50:15, 3.34it/s] 10%|█ | 37248/371472 [3:00:22<27:20:29, 3.40it/s] 10%|█ | 37249/371472 [3:00:22<27:11:08, 3.42it/s] 10%|█ | 37250/371472 [3:00:23<26:57:16, 3.44it/s] 10%|█ | 37251/371472 [3:00:23<27:05:03, 3.43it/s] 10%|█ | 37252/371472 [3:00:23<27:16:00, 3.40it/s] 10%|█ | 37253/371472 [3:00:23<26:24:04, 3.52it/s] 10%|█ | 37254/371472 [3:00:24<25:43:50, 3.61it/s] 10%|█ | 37255/371472 [3:00:24<28:53:01, 3.21it/s] 10%|█ | 37256/371472 [3:00:24<27:34:29, 3.37it/s] 10%|█ | 37257/371472 [3:00:25<27:26:20, 3.38it/s] 10%|█ | 37258/371472 [3:00:25<25:58:57, 3.57it/s] 10%|█ | 37259/371472 [3:00:25<26:15:35, 3.54it/s] 10%|█ | 37260/371472 [3:00:25<25:30:19, 3.64it/s] {'loss': 4.5626, 'learning_rate': 9.101628994376091e-07, 'epoch': 1.6} + 10%|█ | 37260/371472 [3:00:25<25:30:19, 3.64it/s] 10%|█ | 37261/371472 [3:00:26<24:49:54, 3.74it/s] 10%|█ | 37262/371472 [3:00:26<24:26:53, 3.80it/s] 10%|█ | 37263/371472 [3:00:26<26:01:48, 3.57it/s] 10%|█ | 37264/371472 [3:00:27<25:32:39, 3.63it/s] 10%|█ | 37265/371472 [3:00:27<25:14:14, 3.68it/s] 10%|█ | 37266/371472 [3:00:27<24:33:02, 3.78it/s] 10%|█ | 37267/371472 [3:00:27<26:39:21, 3.48it/s] 10%|█ | 37268/371472 [3:00:28<26:49:31, 3.46it/s] 10%|█ | 37269/371472 [3:00:28<28:07:20, 3.30it/s] 10%|█ | 37270/371472 [3:00:28<27:55:36, 3.32it/s] 10%|█ | 37271/371472 [3:00:29<27:05:43, 3.43it/s] 10%|█ | 37272/371472 [3:00:29<27:11:09, 3.41it/s] 10%|█ | 37273/371472 [3:00:29<27:34:20, 3.37it/s] 10%|█ | 37274/371472 [3:00:30<27:51:06, 3.33it/s] 10%|█ | 37275/371472 [3:00:30<27:21:44, 3.39it/s] 10%|█ | 37276/371472 [3:00:30<26:43:38, 3.47it/s] 10%|█ | 37277/371472 [3:00:30<26:11:51, 3.54it/s] 10%|█ | 37278/371472 [3:00:31<26:42:00, 3.48it/s] 10%|█ | 37279/371472 [3:00:31<27:35:11, 3.37it/s] 10%|█ | 37280/371472 [3:00:31<29:14:25, 3.17it/s] {'loss': 4.5117, 'learning_rate': 9.101144174621302e-07, 'epoch': 1.61} + 10%|█ | 37280/371472 [3:00:31<29:14:25, 3.17it/s] 10%|█ | 37281/371472 [3:00:32<29:03:51, 3.19it/s] 10%|█ | 37282/371472 [3:00:32<27:57:38, 3.32it/s] 10%|█ | 37283/371472 [3:00:32<27:06:01, 3.43it/s] 10%|█ | 37284/371472 [3:00:32<27:52:22, 3.33it/s] 10%|█ | 37285/371472 [3:00:33<27:27:45, 3.38it/s] 10%|█ | 37286/371472 [3:00:33<27:40:17, 3.35it/s] 10%|█ | 37287/371472 [3:00:33<27:04:01, 3.43it/s] 10%|█ | 37288/371472 [3:00:34<26:32:25, 3.50it/s] 10%|█ | 37289/371472 [3:00:34<26:33:20, 3.50it/s] 10%|█ | 37290/371472 [3:00:34<26:16:36, 3.53it/s] 10%|█ | 37291/371472 [3:00:34<26:21:53, 3.52it/s] 10%|█ | 37292/371472 [3:00:35<26:37:57, 3.49it/s] 10%|█ | 37293/371472 [3:00:35<25:51:51, 3.59it/s] 10%|█ | 37294/371472 [3:00:35<27:24:37, 3.39it/s] 10%|█ | 37295/371472 [3:00:36<27:04:55, 3.43it/s] 10%|█ | 37296/371472 [3:00:36<27:06:22, 3.42it/s] 10%|█ | 37297/371472 [3:00:36<27:12:52, 3.41it/s] 10%|█ | 37298/371472 [3:00:36<26:30:21, 3.50it/s] 10%|█ | 37299/371472 [3:00:37<26:17:38, 3.53it/s] 10%|█ | 37300/371472 [3:00:37<25:59:20, 3.57it/s] {'loss': 4.4454, 'learning_rate': 9.100659354866512e-07, 'epoch': 1.61} + 10%|█ | 37300/371472 [3:00:37<25:59:20, 3.57it/s] 10%|█ | 37301/371472 [3:00:37<25:42:53, 3.61it/s] 10%|█ | 37302/371472 [3:00:38<25:19:29, 3.67it/s] 10%|█ | 37303/371472 [3:00:38<25:19:44, 3.66it/s] 10%|█ | 37304/371472 [3:00:38<27:23:10, 3.39it/s] 10%|█ | 37305/371472 [3:00:39<27:57:12, 3.32it/s] 10%|█ | 37306/371472 [3:00:39<27:27:09, 3.38it/s] 10%|█ | 37307/371472 [3:00:39<26:43:49, 3.47it/s] 10%|█ | 37308/371472 [3:00:39<28:00:00, 3.32it/s] 10%|█ | 37309/371472 [3:00:40<28:24:26, 3.27it/s] 10%|█ | 37310/371472 [3:00:40<26:44:25, 3.47it/s] 10%|█ | 37311/371472 [3:00:40<26:23:53, 3.52it/s] 10%|█ | 37312/371472 [3:00:40<25:39:45, 3.62it/s] 10%|█ | 37313/371472 [3:00:41<26:16:14, 3.53it/s] 10%|█ | 37314/371472 [3:00:41<27:55:25, 3.32it/s] 10%|█ | 37315/371472 [3:00:41<26:50:46, 3.46it/s] 10%|█ | 37316/371472 [3:00:42<27:38:25, 3.36it/s] 10%|█ | 37317/371472 [3:00:42<26:40:38, 3.48it/s] 10%|█ | 37318/371472 [3:00:42<25:45:30, 3.60it/s] 10%|█ | 37319/371472 [3:00:43<25:42:52, 3.61it/s] 10%|█ | 37320/371472 [3:00:43<26:52:45, 3.45it/s] {'loss': 4.3637, 'learning_rate': 9.100174535111724e-07, 'epoch': 1.61} + 10%|█ | 37320/371472 [3:00:43<26:52:45, 3.45it/s] 10%|█ | 37321/371472 [3:00:43<28:44:41, 3.23it/s] 10%|█ | 37322/371472 [3:00:43<28:36:29, 3.24it/s] 10%|█ | 37323/371472 [3:00:44<28:14:34, 3.29it/s] 10%|█ | 37324/371472 [3:00:44<27:49:13, 3.34it/s] 10%|█ | 37325/371472 [3:00:44<27:14:17, 3.41it/s] 10%|█ | 37326/371472 [3:00:45<28:38:43, 3.24it/s] 10%|█ | 37327/371472 [3:00:45<27:26:30, 3.38it/s] 10%|█ | 37328/371472 [3:00:45<27:04:45, 3.43it/s] 10%|█ | 37329/371472 [3:00:46<26:41:59, 3.48it/s] 10%|█ | 37330/371472 [3:00:46<26:05:58, 3.56it/s] 10%|█ | 37331/371472 [3:00:46<25:40:42, 3.61it/s] 10%|█ | 37332/371472 [3:00:46<25:49:12, 3.59it/s] 10%|█ | 37333/371472 [3:00:47<25:18:29, 3.67it/s] 10%|█ | 37334/371472 [3:00:47<26:12:44, 3.54it/s] 10%|█ | 37335/371472 [3:00:47<26:23:40, 3.52it/s] 10%|█ | 37336/371472 [3:00:47<26:01:32, 3.57it/s] 10%|█ | 37337/371472 [3:00:48<25:58:47, 3.57it/s] 10%|█ | 37338/371472 [3:00:48<25:45:53, 3.60it/s] 10%|█ | 37339/371472 [3:00:48<27:21:12, 3.39it/s] 10%|█ | 37340/371472 [3:00:49<28:44:46, 3.23it/s] {'loss': 4.437, 'learning_rate': 9.099689715356935e-07, 'epoch': 1.61} + 10%|█ | 37340/371472 [3:00:49<28:44:46, 3.23it/s] 10%|█ | 37341/371472 [3:00:49<27:35:56, 3.36it/s] 10%|█ | 37342/371472 [3:00:49<26:49:14, 3.46it/s] 10%|█ | 37343/371472 [3:00:49<26:05:04, 3.56it/s] 10%|█ | 37344/371472 [3:00:50<25:30:44, 3.64it/s] 10%|█ | 37345/371472 [3:00:50<28:09:27, 3.30it/s] 10%|█ | 37346/371472 [3:00:50<29:51:05, 3.11it/s] 10%|█ | 37347/371472 [3:00:51<28:17:37, 3.28it/s] 10%|█ | 37348/371472 [3:00:51<26:56:56, 3.44it/s] 10%|█ | 37349/371472 [3:00:51<28:22:44, 3.27it/s] 10%|█ | 37350/371472 [3:00:52<28:09:55, 3.30it/s] 10%|█ | 37351/371472 [3:00:52<26:56:53, 3.44it/s] 10%|█ | 37352/371472 [3:00:52<26:15:58, 3.53it/s] 10%|█ | 37353/371472 [3:00:53<27:34:04, 3.37it/s] 10%|█ | 37354/371472 [3:00:53<26:57:11, 3.44it/s] 10%|█ | 37355/371472 [3:00:53<26:57:32, 3.44it/s] 10%|█ | 37356/371472 [3:00:53<26:52:59, 3.45it/s] 10%|█ | 37357/371472 [3:00:54<26:37:54, 3.48it/s] 10%|█ | 37358/371472 [3:00:54<26:07:43, 3.55it/s] 10%|█ | 37359/371472 [3:00:54<25:09:37, 3.69it/s] 10%|█ | 37360/371472 [3:00:54<26:36:09, 3.49it/s] {'loss': 4.1728, 'learning_rate': 9.099204895602145e-07, 'epoch': 1.61} + 10%|█ | 37360/371472 [3:00:54<26:36:09, 3.49it/s] 10%|█ | 37361/371472 [3:00:55<26:36:35, 3.49it/s] 10%|█ | 37362/371472 [3:00:55<25:48:25, 3.60it/s] 10%|█ | 37363/371472 [3:00:55<26:16:06, 3.53it/s] 10%|█ | 37364/371472 [3:00:56<26:29:25, 3.50it/s] 10%|█ | 37365/371472 [3:00:56<26:03:16, 3.56it/s] 10%|█ | 37366/371472 [3:00:56<25:41:33, 3.61it/s] 10%|█ | 37367/371472 [3:00:56<27:18:47, 3.40it/s] 10%|█ | 37368/371472 [3:00:57<26:40:26, 3.48it/s] 10%|█ | 37369/371472 [3:00:57<26:15:32, 3.53it/s] 10%|█ | 37370/371472 [3:00:57<26:06:27, 3.55it/s] 10%|█ | 37371/371472 [3:00:58<25:26:25, 3.65it/s] 10%|█ | 37372/371472 [3:00:58<24:44:10, 3.75it/s] 10%|█ | 37373/371472 [3:00:58<24:25:11, 3.80it/s] 10%|█ | 37374/371472 [3:00:58<24:42:33, 3.76it/s] 10%|█ | 37375/371472 [3:00:59<24:48:09, 3.74it/s] 10%|█ | 37376/371472 [3:00:59<26:00:54, 3.57it/s] 10%|█ | 37377/371472 [3:00:59<29:27:05, 3.15it/s] 10%|█ | 37378/371472 [3:01:00<28:30:46, 3.25it/s] 10%|█ | 37379/371472 [3:01:00<27:40:12, 3.35it/s] 10%|█ | 37380/371472 [3:01:00<26:54:22, 3.45it/s] {'loss': 4.491, 'learning_rate': 9.098720075847357e-07, 'epoch': 1.61} + 10%|█ | 37380/371472 [3:01:00<26:54:22, 3.45it/s] 10%|█ | 37381/371472 [3:01:00<26:19:49, 3.52it/s] 10%|█ | 37382/371472 [3:01:01<25:43:06, 3.61it/s] 10%|█ | 37383/371472 [3:01:01<25:08:55, 3.69it/s] 10%|█ | 37384/371472 [3:01:01<24:42:59, 3.75it/s] 10%|█ | 37385/371472 [3:01:01<25:24:44, 3.65it/s] 10%|█ | 37386/371472 [3:01:02<25:09:22, 3.69it/s] 10%|█ | 37387/371472 [3:01:02<25:33:12, 3.63it/s] 10%|█ | 37388/371472 [3:01:02<26:04:23, 3.56it/s] 10%|█ | 37389/371472 [3:01:03<26:00:20, 3.57it/s] 10%|█ | 37390/371472 [3:01:03<27:17:09, 3.40it/s] 10%|█ | 37391/371472 [3:01:03<27:40:55, 3.35it/s] 10%|█ | 37392/371472 [3:01:04<27:57:55, 3.32it/s] 10%|█ | 37393/371472 [3:01:04<27:44:40, 3.34it/s] 10%|█ | 37394/371472 [3:01:04<26:44:47, 3.47it/s] 10%|█ | 37395/371472 [3:01:04<26:08:39, 3.55it/s] 10%|█ | 37396/371472 [3:01:05<26:21:26, 3.52it/s] 10%|█ | 37397/371472 [3:01:05<25:08:52, 3.69it/s] 10%|█ | 37398/371472 [3:01:05<25:04:14, 3.70it/s] 10%|█ | 37399/371472 [3:01:06<26:54:14, 3.45it/s] 10%|█ | 37400/371472 [3:01:06<26:41:04, 3.48it/s] {'loss': 4.2619, 'learning_rate': 9.098235256092569e-07, 'epoch': 1.61} + 10%|█ | 37400/371472 [3:01:06<26:41:04, 3.48it/s] 10%|█ | 37401/371472 [3:01:06<29:21:18, 3.16it/s] 10%|█ | 37402/371472 [3:01:06<27:41:35, 3.35it/s] 10%|█ | 37403/371472 [3:01:07<32:14:14, 2.88it/s] 10%|█ | 37404/371472 [3:01:07<31:52:23, 2.91it/s] 10%|█ | 37405/371472 [3:01:07<29:57:00, 3.10it/s] 10%|█ | 37406/371472 [3:01:08<31:47:25, 2.92it/s] 10%|█ | 37407/371472 [3:01:08<29:21:05, 3.16it/s] 10%|█ | 37408/371472 [3:01:08<28:31:42, 3.25it/s] 10%|█ | 37409/371472 [3:01:09<29:12:13, 3.18it/s] 10%|█ | 37410/371472 [3:01:09<28:50:30, 3.22it/s] 10%|█ | 37411/371472 [3:01:09<27:30:16, 3.37it/s] 10%|█ | 37412/371472 [3:01:10<28:41:38, 3.23it/s] 10%|█ | 37413/371472 [3:01:10<27:27:52, 3.38it/s] 10%|█ | 37414/371472 [3:01:10<26:29:29, 3.50it/s] 10%|█ | 37415/371472 [3:01:10<25:49:22, 3.59it/s] 10%|█ | 37416/371472 [3:01:11<26:04:42, 3.56it/s] 10%|█ | 37417/371472 [3:01:11<26:20:41, 3.52it/s] 10%|█ | 37418/371472 [3:01:11<25:24:54, 3.65it/s] 10%|█ | 37419/371472 [3:01:12<25:42:08, 3.61it/s] 10%|█ | 37420/371472 [3:01:12<26:00:19, 3.57it/s] {'loss': 4.4321, 'learning_rate': 9.097750436337778e-07, 'epoch': 1.61} + 10%|█ | 37420/371472 [3:01:12<26:00:19, 3.57it/s] 10%|█ | 37421/371472 [3:01:12<25:33:42, 3.63it/s] 10%|█ | 37422/371472 [3:01:12<25:56:26, 3.58it/s] 10%|█ | 37423/371472 [3:01:13<26:04:02, 3.56it/s] 10%|█ | 37424/371472 [3:01:13<25:35:15, 3.63it/s] 10%|█ | 37425/371472 [3:01:13<26:05:39, 3.56it/s] 10%|█ | 37426/371472 [3:01:14<26:43:16, 3.47it/s] 10%|█ | 37427/371472 [3:01:14<27:04:52, 3.43it/s] 10%|█ | 37428/371472 [3:01:14<28:16:40, 3.28it/s] 10%|█ | 37429/371472 [3:01:14<26:57:08, 3.44it/s] 10%|█ | 37430/371472 [3:01:15<27:42:09, 3.35it/s] 10%|█ | 37431/371472 [3:01:15<27:45:51, 3.34it/s] 10%|█ | 37432/371472 [3:01:15<26:58:13, 3.44it/s] 10%|█ | 37433/371472 [3:01:16<29:21:34, 3.16it/s] 10%|█ | 37434/371472 [3:01:16<28:47:50, 3.22it/s] 10%|█ | 37435/371472 [3:01:16<28:36:39, 3.24it/s] 10%|█ | 37436/371472 [3:01:17<28:00:22, 3.31it/s] 10%|█ | 37437/371472 [3:01:17<26:53:36, 3.45it/s] 10%|█ | 37438/371472 [3:01:17<26:13:33, 3.54it/s] 10%|█ | 37439/371472 [3:01:17<27:33:18, 3.37it/s] 10%|█ | 37440/371472 [3:01:18<30:41:50, 3.02it/s] {'loss': 4.3316, 'learning_rate': 9.097265616582988e-07, 'epoch': 1.61} + 10%|█ | 37440/371472 [3:01:18<30:41:50, 3.02it/s] 10%|█ | 37441/371472 [3:01:18<30:00:24, 3.09it/s] 10%|█ | 37442/371472 [3:01:18<28:10:11, 3.29it/s] 10%|█ | 37443/371472 [3:01:19<27:49:41, 3.33it/s] 10%|█ | 37444/371472 [3:01:19<27:04:49, 3.43it/s] 10%|█ | 37445/371472 [3:01:19<26:33:48, 3.49it/s] 10%|█ | 37446/371472 [3:01:20<25:46:42, 3.60it/s] 10%|█ | 37447/371472 [3:01:20<25:32:46, 3.63it/s] 10%|█ | 37448/371472 [3:01:20<26:46:02, 3.47it/s] 10%|█ | 37449/371472 [3:01:20<25:43:50, 3.61it/s] 10%|█ | 37450/371472 [3:01:21<27:45:35, 3.34it/s] 10%|█ | 37451/371472 [3:01:21<26:14:47, 3.54it/s] 10%|█ | 37452/371472 [3:01:21<28:29:19, 3.26it/s] 10%|█ | 37453/371472 [3:01:22<28:50:59, 3.22it/s] 10%|█ | 37454/371472 [3:01:22<27:22:39, 3.39it/s] 10%|█ | 37455/371472 [3:01:22<27:15:06, 3.40it/s] 10%|█ | 37456/371472 [3:01:22<26:38:31, 3.48it/s] 10%|█ | 37457/371472 [3:01:23<26:33:45, 3.49it/s] 10%|█ | 37458/371472 [3:01:23<27:43:05, 3.35it/s] 10%|█ | 37459/371472 [3:01:23<28:05:19, 3.30it/s] 10%|█ | 37460/371472 [3:01:24<27:25:19, 3.38it/s] {'loss': 4.3961, 'learning_rate': 9.096780796828201e-07, 'epoch': 1.61} + 10%|█ | 37460/371472 [3:01:24<27:25:19, 3.38it/s] 10%|█ | 37461/371472 [3:01:24<28:03:12, 3.31it/s] 10%|█ | 37462/371472 [3:01:24<27:12:02, 3.41it/s] 10%|█ | 37463/371472 [3:01:25<27:08:48, 3.42it/s] 10%|█ | 37464/371472 [3:01:25<26:39:48, 3.48it/s] 10%|█ | 37465/371472 [3:01:25<26:56:24, 3.44it/s] 10%|█ | 37466/371472 [3:01:25<26:54:55, 3.45it/s] 10%|█ | 37467/371472 [3:01:26<27:21:54, 3.39it/s] 10%|█ | 37468/371472 [3:01:26<30:42:04, 3.02it/s] 10%|█ | 37469/371472 [3:01:26<29:04:33, 3.19it/s] 10%|█ | 37470/371472 [3:01:27<27:55:29, 3.32it/s] 10%|█ | 37471/371472 [3:01:27<27:04:00, 3.43it/s] 10%|█ | 37472/371472 [3:01:27<32:03:24, 2.89it/s] 10%|█ | 37473/371472 [3:01:28<30:25:57, 3.05it/s] 10%|█ | 37474/371472 [3:01:28<29:10:50, 3.18it/s] 10%|█ | 37475/371472 [3:01:28<28:01:28, 3.31it/s] 10%|█ | 37476/371472 [3:01:29<26:48:27, 3.46it/s] 10%|█ | 37477/371472 [3:01:29<26:35:04, 3.49it/s] 10%|█ | 37478/371472 [3:01:29<25:36:40, 3.62it/s] 10%|█ | 37479/371472 [3:01:29<26:01:06, 3.57it/s] 10%|█ | 37480/371472 [3:01:30<27:39:18, 3.35it/s] {'loss': 4.345, 'learning_rate': 9.096295977073412e-07, 'epoch': 1.61} + 10%|█ | 37480/371472 [3:01:30<27:39:18, 3.35it/s] 10%|█ | 37481/371472 [3:01:30<26:57:45, 3.44it/s] 10%|█ | 37482/371472 [3:01:30<26:35:35, 3.49it/s] 10%|█ | 37483/371472 [3:01:31<26:37:38, 3.48it/s] 10%|█ | 37484/371472 [3:01:31<26:19:43, 3.52it/s] 10%|█ | 37485/371472 [3:01:31<26:12:11, 3.54it/s] 10%|█ | 37486/371472 [3:01:31<25:22:12, 3.66it/s] 10%|█ | 37487/371472 [3:01:32<25:33:14, 3.63it/s] 10%|█ | 37488/371472 [3:01:32<25:50:43, 3.59it/s] 10%|█ | 37489/371472 [3:01:32<25:44:00, 3.61it/s] 10%|█ | 37490/371472 [3:01:32<24:53:04, 3.73it/s] 10%|█ | 37491/371472 [3:01:33<24:31:29, 3.78it/s] 10%|█ | 37492/371472 [3:01:33<26:14:08, 3.54it/s] 10%|█ | 37493/371472 [3:01:33<26:10:39, 3.54it/s] 10%|█ | 37494/371472 [3:01:34<27:01:46, 3.43it/s] 10%|█ | 37495/371472 [3:01:34<26:31:28, 3.50it/s] 10%|█ | 37496/371472 [3:01:34<27:17:59, 3.40it/s] 10%|█ | 37497/371472 [3:01:34<26:29:32, 3.50it/s] 10%|█ | 37498/371472 [3:01:35<25:59:35, 3.57it/s] 10%|█ | 37499/371472 [3:01:35<26:32:08, 3.50it/s] 10%|█ | 37500/371472 [3:01:35<26:34:34, 3.49it/s] {'loss': 4.168, 'learning_rate': 9.095811157318623e-07, 'epoch': 1.62} + 10%|█ | 37500/371472 [3:01:35<26:34:34, 3.49it/s] 10%|█ | 37501/371472 [3:01:36<26:37:58, 3.48it/s] 10%|█ | 37502/371472 [3:01:36<27:32:35, 3.37it/s] 10%|█ | 37503/371472 [3:01:36<26:42:42, 3.47it/s] 10%|█ | 37504/371472 [3:01:36<26:23:58, 3.51it/s] 10%|█ | 37505/371472 [3:01:37<27:04:45, 3.43it/s] 10%|█ | 37506/371472 [3:01:37<27:28:20, 3.38it/s] 10%|█ | 37507/371472 [3:01:37<29:51:06, 3.11it/s] 10%|█ | 37508/371472 [3:01:38<28:51:14, 3.22it/s] 10%|█ | 37509/371472 [3:01:38<29:10:20, 3.18it/s] 10%|█ | 37510/371472 [3:01:38<27:24:17, 3.39it/s] 10%|█ | 37511/371472 [3:01:39<29:09:24, 3.18it/s] 10%|█ | 37512/371472 [3:01:39<27:54:08, 3.32it/s] 10%|█ | 37513/371472 [3:01:39<26:23:13, 3.52it/s] 10%|█ | 37514/371472 [3:01:40<27:34:06, 3.36it/s] 10%|█ | 37515/371472 [3:01:40<26:44:41, 3.47it/s] 10%|█ | 37516/371472 [3:01:40<25:30:09, 3.64it/s] 10%|█ | 37517/371472 [3:01:40<25:45:33, 3.60it/s] 10%|█ | 37518/371472 [3:01:41<26:22:59, 3.52it/s] 10%|█ | 37519/371472 [3:01:41<26:37:11, 3.48it/s] 10%|█ | 37520/371472 [3:01:41<26:29:47, 3.50it/s] {'loss': 4.4326, 'learning_rate': 9.095326337563834e-07, 'epoch': 1.62} + 10%|█ | 37520/371472 [3:01:41<26:29:47, 3.50it/s] 10%|█ | 37521/371472 [3:01:41<26:21:59, 3.52it/s] 10%|█ | 37522/371472 [3:01:42<26:03:47, 3.56it/s] 10%|█ | 37523/371472 [3:01:42<28:14:50, 3.28it/s] 10%|█ | 37524/371472 [3:01:42<27:41:51, 3.35it/s] 10%|█ | 37525/371472 [3:01:43<30:14:04, 3.07it/s] 10%|█ | 37526/371472 [3:01:43<28:25:28, 3.26it/s] 10%|█ | 37527/371472 [3:01:43<28:08:11, 3.30it/s] 10%|█ | 37528/371472 [3:01:44<27:10:03, 3.41it/s] 10%|█ | 37529/371472 [3:01:44<26:42:25, 3.47it/s] 10%|█ | 37530/371472 [3:01:44<26:12:29, 3.54it/s] 10%|█ | 37531/371472 [3:01:44<25:38:10, 3.62it/s] 10%|█ | 37532/371472 [3:01:45<25:19:38, 3.66it/s] 10%|█ | 37533/371472 [3:01:45<26:47:19, 3.46it/s] 10%|█ | 37534/371472 [3:01:45<27:07:56, 3.42it/s] 10%|█ | 37535/371472 [3:01:46<26:41:54, 3.47it/s] 10%|█ | 37536/371472 [3:01:46<26:18:55, 3.52it/s] 10%|█ | 37537/371472 [3:01:46<25:46:05, 3.60it/s] 10%|█ | 37538/371472 [3:01:46<26:01:30, 3.56it/s] 10%|█ | 37539/371472 [3:01:47<27:25:52, 3.38it/s] 10%|█ | 37540/371472 [3:01:47<28:01:34, 3.31it/s] {'loss': 4.2533, 'learning_rate': 9.094841517809045e-07, 'epoch': 1.62} + 10%|█ | 37540/371472 [3:01:47<28:01:34, 3.31it/s] 10%|█ | 37541/371472 [3:01:47<28:48:33, 3.22it/s] 10%|█ | 37542/371472 [3:01:48<27:03:44, 3.43it/s] 10%|█ | 37543/371472 [3:01:48<26:36:15, 3.49it/s] 10%|█ | 37544/371472 [3:01:48<26:03:34, 3.56it/s] 10%|█ | 37545/371472 [3:01:48<26:15:59, 3.53it/s] 10%|█ | 37546/371472 [3:01:49<26:11:43, 3.54it/s] 10%|█ | 37547/371472 [3:01:49<26:16:18, 3.53it/s] 10%|█ | 37548/371472 [3:01:49<27:19:36, 3.39it/s] 10%|█ | 37549/371472 [3:01:50<26:26:43, 3.51it/s] 10%|█ | 37550/371472 [3:01:50<27:00:40, 3.43it/s] 10%|█ | 37551/371472 [3:01:50<25:53:11, 3.58it/s] 10%|█ | 37552/371472 [3:01:50<27:03:41, 3.43it/s] 10%|█ | 37553/371472 [3:01:51<28:48:47, 3.22it/s] 10%|█ | 37554/371472 [3:01:51<28:16:54, 3.28it/s] 10%|█ | 37555/371472 [3:01:51<27:28:41, 3.38it/s] 10%|█ | 37556/371472 [3:01:52<28:36:20, 3.24it/s] 10%|█ | 37557/371472 [3:01:52<28:29:56, 3.25it/s] 10%|█ | 37558/371472 [3:01:52<27:33:21, 3.37it/s] 10%|█ | 37559/371472 [3:01:53<26:03:32, 3.56it/s] 10%|█ | 37560/371472 [3:01:53<26:49:29, 3.46it/s] {'loss': 4.3453, 'learning_rate': 9.094356698054256e-07, 'epoch': 1.62} + 10%|█ | 37560/371472 [3:01:53<26:49:29, 3.46it/s] 10%|█ | 37561/371472 [3:01:53<25:46:22, 3.60it/s] 10%|█ | 37562/371472 [3:01:53<27:30:49, 3.37it/s] 10%|█ | 37563/371472 [3:01:54<26:37:48, 3.48it/s] 10%|█ | 37564/371472 [3:01:54<27:28:50, 3.38it/s] 10%|█ | 37565/371472 [3:01:54<27:18:08, 3.40it/s] 10%|█ | 37566/371472 [3:01:55<26:22:18, 3.52it/s] 10%|█ | 37567/371472 [3:01:55<25:55:46, 3.58it/s] 10%|█ | 37568/371472 [3:01:55<25:05:11, 3.70it/s] 10%|█ | 37569/371472 [3:01:55<24:58:45, 3.71it/s] 10%|█ | 37570/371472 [3:01:56<24:42:26, 3.75it/s] 10%|█ | 37571/371472 [3:01:56<25:08:39, 3.69it/s] 10%|█ | 37572/371472 [3:01:56<25:01:04, 3.71it/s] 10%|█ | 37573/371472 [3:01:56<24:51:08, 3.73it/s] 10%|█ | 37574/371472 [3:01:57<25:54:58, 3.58it/s] 10%|█ | 37575/371472 [3:01:57<25:53:47, 3.58it/s] 10%|█ | 37576/371472 [3:01:57<26:36:13, 3.49it/s] 10%|█ | 37577/371472 [3:01:58<25:51:35, 3.59it/s] 10%|█ | 37578/371472 [3:01:58<25:40:32, 3.61it/s] 10%|█ | 37579/371472 [3:01:58<25:39:48, 3.61it/s] 10%|█ | 37580/371472 [3:01:58<27:35:14, 3.36it/s] {'loss': 4.2739, 'learning_rate': 9.093871878299467e-07, 'epoch': 1.62} + 10%|█ | 37580/371472 [3:01:58<27:35:14, 3.36it/s] 10%|█ | 37581/371472 [3:01:59<26:53:59, 3.45it/s] 10%|█ | 37582/371472 [3:01:59<28:10:10, 3.29it/s] 10%|█ | 37583/371472 [3:01:59<29:39:49, 3.13it/s] 10%|█ | 37584/371472 [3:02:00<32:06:14, 2.89it/s] 10%|█ | 37585/371472 [3:02:00<29:51:36, 3.11it/s] 10%|█ | 37586/371472 [3:02:00<28:28:46, 3.26it/s] 10%|█ | 37587/371472 [3:02:01<27:04:20, 3.43it/s] 10%|█ | 37588/371472 [3:02:01<28:13:09, 3.29it/s] 10%|█ | 37589/371472 [3:02:01<27:57:30, 3.32it/s] 10%|█ | 37590/371472 [3:02:02<27:51:42, 3.33it/s] 10%|█ | 37591/371472 [3:02:02<26:14:51, 3.53it/s] 10%|█ | 37592/371472 [3:02:02<27:09:02, 3.42it/s] 10%|█ | 37593/371472 [3:02:02<28:28:19, 3.26it/s] 10%|█ | 37594/371472 [3:02:03<27:45:27, 3.34it/s] 10%|█ | 37595/371472 [3:02:03<27:06:51, 3.42it/s] 10%|█ | 37596/371472 [3:02:03<26:39:38, 3.48it/s] 10%|█ | 37597/371472 [3:02:04<25:44:45, 3.60it/s] 10%|█ | 37598/371472 [3:02:04<25:43:51, 3.60it/s] 10%|█ | 37599/371472 [3:02:04<26:42:36, 3.47it/s] 10%|█ | 37600/371472 [3:02:04<26:21:19, 3.52it/s] {'loss': 4.3841, 'learning_rate': 9.093387058544678e-07, 'epoch': 1.62} + 10%|█ | 37600/371472 [3:02:04<26:21:19, 3.52it/s] 10%|█ | 37601/371472 [3:02:05<25:31:25, 3.63it/s] 10%|█ | 37602/371472 [3:02:05<26:57:20, 3.44it/s] 10%|█ | 37603/371472 [3:02:05<26:23:59, 3.51it/s] 10%|█ | 37604/371472 [3:02:06<26:16:33, 3.53it/s] 10%|█ | 37605/371472 [3:02:06<26:28:45, 3.50it/s] 10%|█ | 37606/371472 [3:02:06<26:26:27, 3.51it/s] 10%|█ | 37607/371472 [3:02:06<25:58:32, 3.57it/s] 10%|█ | 37608/371472 [3:02:07<26:17:39, 3.53it/s] 10%|█ | 37609/371472 [3:02:07<27:04:05, 3.43it/s] 10%|█ | 37610/371472 [3:02:07<26:43:05, 3.47it/s] 10%|█ | 37611/371472 [3:02:08<27:19:41, 3.39it/s] 10%|█ | 37612/371472 [3:02:08<26:59:20, 3.44it/s] 10%|█ | 37613/371472 [3:02:08<28:13:31, 3.29it/s] 10%|█ | 37614/371472 [3:02:09<27:54:27, 3.32it/s] 10%|█ | 37615/371472 [3:02:09<26:56:49, 3.44it/s] 10%|█ | 37616/371472 [3:02:09<28:01:55, 3.31it/s] 10%|█ | 37617/371472 [3:02:09<27:02:19, 3.43it/s] 10%|█ | 37618/371472 [3:02:10<27:05:36, 3.42it/s] 10%|█ | 37619/371472 [3:02:10<25:50:06, 3.59it/s] 10%|█ | 37620/371472 [3:02:10<29:01:31, 3.20it/s] {'loss': 4.3808, 'learning_rate': 9.09290223878989e-07, 'epoch': 1.62} + 10%|█ | 37620/371472 [3:02:10<29:01:31, 3.20it/s] 10%|█ | 37621/371472 [3:02:11<28:52:43, 3.21it/s] 10%|█ | 37622/371472 [3:02:11<28:08:59, 3.29it/s] 10%|█ | 37623/371472 [3:02:11<30:13:01, 3.07it/s] 10%|█ | 37624/371472 [3:02:12<31:33:56, 2.94it/s] 10%|█ | 37625/371472 [3:02:12<29:43:25, 3.12it/s] 10%|█ | 37626/371472 [3:02:12<30:36:23, 3.03it/s] 10%|█ | 37627/371472 [3:02:13<28:50:58, 3.21it/s] 10%|█ | 37628/371472 [3:02:13<32:28:32, 2.86it/s] 10%|█ | 37629/371472 [3:02:13<31:35:57, 2.93it/s] 10%|█ | 37630/371472 [3:02:14<31:05:20, 2.98it/s] 10%|█ | 37631/371472 [3:02:14<31:06:08, 2.98it/s] 10%|█ | 37632/371472 [3:02:14<29:12:04, 3.18it/s] 10%|█ | 37633/371472 [3:02:15<29:43:43, 3.12it/s] 10%|█ | 37634/371472 [3:02:15<28:31:54, 3.25it/s] 10%|█ | 37635/371472 [3:02:15<27:47:05, 3.34it/s] 10%|█ | 37636/371472 [3:02:15<29:28:57, 3.15it/s] 10%|█ | 37637/371472 [3:02:16<28:28:37, 3.26it/s] 10%|█ | 37638/371472 [3:02:16<27:26:23, 3.38it/s] 10%|█ | 37639/371472 [3:02:16<27:48:31, 3.33it/s] 10%|█ | 37640/371472 [3:02:17<27:17:27, 3.40it/s] {'loss': 4.366, 'learning_rate': 9.092417419035101e-07, 'epoch': 1.62} + 10%|█ | 37640/371472 [3:02:17<27:17:27, 3.40it/s] 10%|█ | 37641/371472 [3:02:17<27:35:07, 3.36it/s] 10%|█ | 37642/371472 [3:02:17<26:06:45, 3.55it/s] 10%|█ | 37643/371472 [3:02:17<25:56:10, 3.58it/s] 10%|█ | 37644/371472 [3:02:18<26:45:30, 3.47it/s] 10%|█ | 37645/371472 [3:02:18<25:53:16, 3.58it/s] 10%|█ | 37646/371472 [3:02:18<25:44:16, 3.60it/s] 10%|█ | 37647/371472 [3:02:19<25:43:55, 3.60it/s] 10%|█ | 37648/371472 [3:02:19<25:06:01, 3.69it/s] 10%|█ | 37649/371472 [3:02:19<26:44:40, 3.47it/s] 10%|█ | 37650/371472 [3:02:19<27:27:04, 3.38it/s] 10%|█ | 37651/371472 [3:02:20<28:08:40, 3.29it/s] 10%|█ | 37652/371472 [3:02:20<30:01:40, 3.09it/s] 10%|█ | 37653/371472 [3:02:20<30:17:15, 3.06it/s] 10%|█ | 37654/371472 [3:02:21<28:48:35, 3.22it/s] 10%|█ | 37655/371472 [3:02:21<28:22:32, 3.27it/s] 10%|█ | 37656/371472 [3:02:21<27:18:08, 3.40it/s] 10%|█ | 37657/371472 [3:02:22<26:05:35, 3.55it/s] 10%|█ | 37658/371472 [3:02:22<26:38:57, 3.48it/s] 10%|█ | 37659/371472 [3:02:22<26:39:32, 3.48it/s] 10%|█ | 37660/371472 [3:02:23<28:27:50, 3.26it/s] {'loss': 4.3987, 'learning_rate': 9.091932599280312e-07, 'epoch': 1.62} + 10%|█ | 37660/371472 [3:02:23<28:27:50, 3.26it/s] 10%|█ | 37661/371472 [3:02:23<27:34:52, 3.36it/s] 10%|█ | 37662/371472 [3:02:23<26:34:47, 3.49it/s] 10%|█ | 37663/371472 [3:02:23<27:15:49, 3.40it/s] 10%|█ | 37664/371472 [3:02:24<26:46:00, 3.46it/s] 10%|█ | 37665/371472 [3:02:24<25:33:03, 3.63it/s] 10%|█ | 37666/371472 [3:02:24<26:01:11, 3.56it/s] 10%|█ | 37667/371472 [3:02:24<26:25:05, 3.51it/s] 10%|█ | 37668/371472 [3:02:25<26:14:35, 3.53it/s] 10%|█ | 37669/371472 [3:02:25<25:59:56, 3.57it/s] 10%|█ | 37670/371472 [3:02:25<25:43:34, 3.60it/s] 10%|█ | 37671/371472 [3:02:26<25:17:47, 3.67it/s] 10%|█ | 37672/371472 [3:02:26<26:55:11, 3.44it/s] 10%|█ | 37673/371472 [3:02:26<26:50:13, 3.45it/s] 10%|█ | 37674/371472 [3:02:26<26:02:30, 3.56it/s] 10%|█ | 37675/371472 [3:02:27<25:31:47, 3.63it/s] 10%|█ | 37676/371472 [3:02:27<25:25:54, 3.65it/s] 10%|█ | 37677/371472 [3:02:27<26:16:15, 3.53it/s] 10%|█ | 37678/371472 [3:02:28<26:03:45, 3.56it/s] 10%|█ | 37679/371472 [3:02:28<25:31:17, 3.63it/s] 10%|█ | 37680/371472 [3:02:28<26:25:19, 3.51it/s] {'loss': 4.6227, 'learning_rate': 9.091447779525522e-07, 'epoch': 1.62} + 10%|█ | 37680/371472 [3:02:28<26:25:19, 3.51it/s] 10%|█ | 37681/371472 [3:02:28<27:56:33, 3.32it/s] 10%|█ | 37682/371472 [3:02:29<26:48:53, 3.46it/s] 10%|█ | 37683/371472 [3:02:29<27:59:46, 3.31it/s] 10%|█ | 37684/371472 [3:02:29<26:35:14, 3.49it/s] 10%|█ | 37685/371472 [3:02:30<25:52:27, 3.58it/s] 10%|█ | 37686/371472 [3:02:30<26:28:54, 3.50it/s] 10%|█ | 37687/371472 [3:02:30<27:20:35, 3.39it/s] 10%|█ | 37688/371472 [3:02:30<26:34:44, 3.49it/s] 10%|█ | 37689/371472 [3:02:31<25:58:04, 3.57it/s] 10%|█ | 37690/371472 [3:02:31<25:23:39, 3.65it/s] 10%|█ | 37691/371472 [3:02:31<25:12:24, 3.68it/s] 10%|█ | 37692/371472 [3:02:32<26:08:00, 3.55it/s] 10%|█ | 37693/371472 [3:02:32<27:31:29, 3.37it/s] 10%|█ | 37694/371472 [3:02:32<26:39:46, 3.48it/s] 10%|█ | 37695/371472 [3:02:32<26:33:12, 3.49it/s] 10%|█ | 37696/371472 [3:02:33<25:57:41, 3.57it/s] 10%|█ | 37697/371472 [3:02:33<25:30:12, 3.64it/s] 10%|█ | 37698/371472 [3:02:33<26:20:31, 3.52it/s] 10%|█ | 37699/371472 [3:02:34<26:11:27, 3.54it/s] 10%|█ | 37700/371472 [3:02:34<27:12:48, 3.41it/s] {'loss': 4.1742, 'learning_rate': 9.090962959770734e-07, 'epoch': 1.62} + 10%|█ | 37700/371472 [3:02:34<27:12:48, 3.41it/s] 10%|█ | 37701/371472 [3:02:34<26:53:55, 3.45it/s] 10%|█ | 37702/371472 [3:02:34<26:13:49, 3.53it/s] 10%|█ | 37703/371472 [3:02:35<27:20:09, 3.39it/s] 10%|█ | 37704/371472 [3:02:35<30:35:53, 3.03it/s] 10%|█ | 37705/371472 [3:02:35<30:12:18, 3.07it/s] 10%|█ | 37706/371472 [3:02:36<28:52:31, 3.21it/s] 10%|█ | 37707/371472 [3:02:36<27:53:20, 3.32it/s] 10%|█ | 37708/371472 [3:02:36<27:11:48, 3.41it/s] 10%|█ | 37709/371472 [3:02:37<27:41:28, 3.35it/s] 10%|█ | 37710/371472 [3:02:37<27:27:04, 3.38it/s] 10%|█ | 37711/371472 [3:02:37<27:27:56, 3.38it/s] 10%|█ | 37712/371472 [3:02:38<29:00:21, 3.20it/s] 10%|█ | 37713/371472 [3:02:38<28:17:41, 3.28it/s] 10%|█ | 37714/371472 [3:02:38<27:42:32, 3.35it/s] 10%|█ | 37715/371472 [3:02:38<27:52:17, 3.33it/s] 10%|█ | 37716/371472 [3:02:39<26:30:44, 3.50it/s] 10%|█ | 37717/371472 [3:02:39<25:43:23, 3.60it/s] 10%|█ | 37718/371472 [3:02:39<26:06:14, 3.55it/s] 10%|█ | 37719/371472 [3:02:39<25:12:47, 3.68it/s] 10%|█ | 37720/371472 [3:02:40<26:01:23, 3.56it/s] {'loss': 4.361, 'learning_rate': 9.090478140015944e-07, 'epoch': 1.62} + 10%|█ | 37720/371472 [3:02:40<26:01:23, 3.56it/s] 10%|█ | 37721/371472 [3:02:40<25:56:01, 3.57it/s] 10%|█ | 37722/371472 [3:02:40<25:40:43, 3.61it/s] 10%|█ | 37723/371472 [3:02:41<25:05:48, 3.69it/s] 10%|█ | 37724/371472 [3:02:41<24:39:11, 3.76it/s] 10%|█ | 37725/371472 [3:02:41<24:26:01, 3.79it/s] 10%|█ | 37726/371472 [3:02:41<25:25:57, 3.65it/s] 10%|█ | 37727/371472 [3:02:42<27:58:24, 3.31it/s] 10%|█ | 37728/371472 [3:02:42<27:38:20, 3.35it/s] 10%|█ | 37729/371472 [3:02:42<27:00:52, 3.43it/s] 10%|█ | 37730/371472 [3:02:43<26:03:23, 3.56it/s] 10%|█ | 37731/371472 [3:02:43<26:18:49, 3.52it/s] 10%|█ | 37732/371472 [3:02:43<26:12:21, 3.54it/s] 10%|█ | 37733/371472 [3:02:43<27:15:17, 3.40it/s] 10%|█ | 37734/371472 [3:02:44<26:07:36, 3.55it/s] 10%|█ | 37735/371472 [3:02:44<25:23:38, 3.65it/s] 10%|█ | 37736/371472 [3:02:44<25:08:53, 3.69it/s] 10%|█ | 37737/371472 [3:02:45<26:24:58, 3.51it/s] 10%|█ | 37738/371472 [3:02:45<26:39:55, 3.48it/s] 10%|█ | 37739/371472 [3:02:45<25:17:23, 3.67it/s] 10%|█ | 37740/371472 [3:02:45<28:14:56, 3.28it/s] {'loss': 4.3502, 'learning_rate': 9.089993320261156e-07, 'epoch': 1.63} + 10%|█ | 37740/371472 [3:02:45<28:14:56, 3.28it/s] 10%|█ | 37741/371472 [3:02:46<28:40:08, 3.23it/s] 10%|█ | 37742/371472 [3:02:46<28:08:50, 3.29it/s] 10%|█ | 37743/371472 [3:02:46<28:43:29, 3.23it/s] 10%|█ | 37744/371472 [3:02:47<27:39:47, 3.35it/s] 10%|█ | 37745/371472 [3:02:47<27:10:05, 3.41it/s] 10%|█ | 37746/371472 [3:02:47<27:29:52, 3.37it/s] 10%|█ | 37747/371472 [3:02:48<26:52:44, 3.45it/s] 10%|█ | 37748/371472 [3:02:48<26:18:36, 3.52it/s] 10%|█ | 37749/371472 [3:02:48<26:01:28, 3.56it/s] 10%|█ | 37750/371472 [3:02:48<25:55:36, 3.58it/s] 10%|█ | 37751/371472 [3:02:49<25:12:07, 3.68it/s] 10%|█ | 37752/371472 [3:02:49<25:53:20, 3.58it/s] 10%|█ | 37753/371472 [3:02:49<25:21:29, 3.66it/s] 10%|█ | 37754/371472 [3:02:49<25:19:47, 3.66it/s] 10%|█ | 37755/371472 [3:02:50<25:40:27, 3.61it/s] 10%|█ | 37756/371472 [3:02:50<25:37:39, 3.62it/s] 10%|█ | 37757/371472 [3:02:50<25:30:51, 3.63it/s] 10%|█ | 37758/371472 [3:02:51<24:44:14, 3.75it/s] 10%|█ | 37759/371472 [3:02:51<25:43:30, 3.60it/s] 10%|█ | 37760/371472 [3:02:51<26:09:02, 3.54it/s] {'loss': 4.4801, 'learning_rate': 9.089508500506367e-07, 'epoch': 1.63} + 10%|█ | 37760/371472 [3:02:51<26:09:02, 3.54it/s] 10%|█ | 37761/371472 [3:02:51<25:42:58, 3.60it/s] 10%|█ | 37762/371472 [3:02:52<25:29:25, 3.64it/s] 10%|█ | 37763/371472 [3:02:52<25:16:39, 3.67it/s] 10%|█ | 37764/371472 [3:02:52<26:05:27, 3.55it/s] 10%|█ | 37765/371472 [3:02:53<26:28:34, 3.50it/s] 10%|█ | 37766/371472 [3:02:53<26:51:08, 3.45it/s] 10%|█ | 37767/371472 [3:02:53<25:36:43, 3.62it/s] 10%|█ | 37768/371472 [3:02:53<28:33:42, 3.25it/s] 10%|█ | 37769/371472 [3:02:54<27:26:37, 3.38it/s] 10%|█ | 37770/371472 [3:02:54<26:35:53, 3.48it/s] 10%|█ | 37771/371472 [3:02:54<25:36:10, 3.62it/s] 10%|█ | 37772/371472 [3:02:55<25:33:00, 3.63it/s] 10%|█ | 37773/371472 [3:02:55<25:25:26, 3.65it/s] 10%|█ | 37774/371472 [3:02:55<24:45:38, 3.74it/s] 10%|█ | 37775/371472 [3:02:55<25:24:59, 3.65it/s] 10%|█ | 37776/371472 [3:02:56<25:34:19, 3.62it/s] 10%|█ | 37777/371472 [3:02:56<27:18:32, 3.39it/s] 10%|█ | 37778/371472 [3:02:56<26:08:30, 3.55it/s] 10%|█ | 37779/371472 [3:02:56<26:05:53, 3.55it/s] 10%|█ | 37780/371472 [3:02:57<26:01:15, 3.56it/s] {'loss': 4.412, 'learning_rate': 9.089023680751579e-07, 'epoch': 1.63} + 10%|█ | 37780/371472 [3:02:57<26:01:15, 3.56it/s] 10%|█ | 37781/371472 [3:02:57<26:06:50, 3.55it/s] 10%|█ | 37782/371472 [3:02:57<26:20:38, 3.52it/s] 10%|█ | 37783/371472 [3:02:58<25:40:18, 3.61it/s] 10%|█ | 37784/371472 [3:02:58<26:52:18, 3.45it/s] 10%|█ | 37785/371472 [3:02:58<26:20:48, 3.52it/s] 10%|█ | 37786/371472 [3:02:58<25:40:04, 3.61it/s] 10%|█ | 37787/371472 [3:02:59<27:18:55, 3.39it/s] 10%|█ | 37788/371472 [3:02:59<28:25:19, 3.26it/s] 10%|█ | 37789/371472 [3:02:59<27:24:15, 3.38it/s] 10%|█ | 37790/371472 [3:03:00<27:28:01, 3.37it/s] 10%|█ | 37791/371472 [3:03:00<26:52:24, 3.45it/s] 10%|█ | 37792/371472 [3:03:00<26:54:54, 3.44it/s] 10%|█ | 37793/371472 [3:03:01<26:47:35, 3.46it/s] 10%|█ | 37794/371472 [3:03:01<26:32:07, 3.49it/s] 10%|█ | 37795/371472 [3:03:01<25:57:15, 3.57it/s] 10%|█ | 37796/371472 [3:03:01<26:14:14, 3.53it/s] 10%|█ | 37797/371472 [3:03:02<25:37:27, 3.62it/s] 10%|█ | 37798/371472 [3:03:02<25:09:44, 3.68it/s] 10%|█ | 37799/371472 [3:03:02<26:56:34, 3.44it/s] 10%|█ | 37800/371472 [3:03:02<26:00:29, 3.56it/s] {'loss': 4.3157, 'learning_rate': 9.088538860996788e-07, 'epoch': 1.63} + 10%|█ | 37800/371472 [3:03:02<26:00:29, 3.56it/s] 10%|█ | 37801/371472 [3:03:03<26:04:06, 3.56it/s] 10%|█ | 37802/371472 [3:03:03<26:16:25, 3.53it/s] 10%|█ | 37803/371472 [3:03:03<26:12:42, 3.54it/s] 10%|█ | 37804/371472 [3:03:04<25:37:55, 3.62it/s] 10%|█ | 37805/371472 [3:03:04<25:09:01, 3.69it/s] 10%|█ | 37806/371472 [3:03:04<27:19:23, 3.39it/s] 10%|█ | 37807/371472 [3:03:04<27:11:52, 3.41it/s] 10%|█ | 37808/371472 [3:03:05<27:09:03, 3.41it/s] 10%|█ | 37809/371472 [3:03:05<27:51:21, 3.33it/s] 10%|█ | 37810/371472 [3:03:05<26:47:44, 3.46it/s] 10%|█ | 37811/371472 [3:03:06<25:40:41, 3.61it/s] 10%|█ | 37812/371472 [3:03:06<25:44:35, 3.60it/s] 10%|█ | 37813/371472 [3:03:06<26:11:43, 3.54it/s] 10%|█ | 37814/371472 [3:03:06<26:16:21, 3.53it/s] 10%|█ | 37815/371472 [3:03:07<25:41:18, 3.61it/s] 10%|█ | 37816/371472 [3:03:07<25:37:11, 3.62it/s] 10%|█ | 37817/371472 [3:03:07<25:26:54, 3.64it/s] 10%|█ | 37818/371472 [3:03:08<24:48:17, 3.74it/s] 10%|█ | 37819/371472 [3:03:08<24:46:15, 3.74it/s] 10%|█ | 37820/371472 [3:03:08<24:52:29, 3.73it/s] {'loss': 4.2927, 'learning_rate': 9.088054041242e-07, 'epoch': 1.63} + 10%|█ | 37820/371472 [3:03:08<24:52:29, 3.73it/s] 10%|█ | 37821/371472 [3:03:08<26:26:19, 3.51it/s] 10%|█ | 37822/371472 [3:03:09<26:53:54, 3.45it/s] 10%|█ | 37823/371472 [3:03:09<26:55:54, 3.44it/s] 10%|█ | 37824/371472 [3:03:09<26:09:57, 3.54it/s] 10%|█ | 37825/371472 [3:03:10<25:51:19, 3.58it/s] 10%|█ | 37826/371472 [3:03:10<26:32:15, 3.49it/s] 10%|█ | 37827/371472 [3:03:10<25:33:19, 3.63it/s] 10%|█ | 37828/371472 [3:03:10<27:25:10, 3.38it/s] 10%|█ | 37829/371472 [3:03:11<26:03:59, 3.56it/s] 10%|█ | 37830/371472 [3:03:11<25:45:17, 3.60it/s] 10%|█ | 37831/371472 [3:03:11<26:46:04, 3.46it/s] 10%|█ | 37832/371472 [3:03:12<28:23:43, 3.26it/s] 10%|█ | 37833/371472 [3:03:12<28:25:26, 3.26it/s] 10%|█ | 37834/371472 [3:03:12<27:11:15, 3.41it/s] 10%|█ | 37835/371472 [3:03:12<27:07:35, 3.42it/s] 10%|█ | 37836/371472 [3:03:13<26:38:13, 3.48it/s] 10%|█ | 37837/371472 [3:03:13<25:34:55, 3.62it/s] 10%|█ | 37838/371472 [3:03:13<25:27:29, 3.64it/s] 10%|█ | 37839/371472 [3:03:14<27:23:33, 3.38it/s] 10%|█ | 37840/371472 [3:03:14<28:05:11, 3.30it/s] {'loss': 4.4417, 'learning_rate': 9.087569221487211e-07, 'epoch': 1.63} + 10%|█ | 37840/371472 [3:03:14<28:05:11, 3.30it/s] 10%|█ | 37841/371472 [3:03:14<27:57:54, 3.31it/s] 10%|█ | 37842/371472 [3:03:14<26:52:13, 3.45it/s] 10%|█ | 37843/371472 [3:03:15<27:03:45, 3.42it/s] 10%|█ | 37844/371472 [3:03:15<25:59:43, 3.57it/s] 10%|█ | 37845/371472 [3:03:15<25:31:14, 3.63it/s] 10%|█ | 37846/371472 [3:03:16<25:47:38, 3.59it/s] 10%|█ | 37847/371472 [3:03:16<25:18:45, 3.66it/s] 10%|█ | 37848/371472 [3:03:16<25:23:08, 3.65it/s] 10%|█ | 37849/371472 [3:03:16<26:12:35, 3.54it/s] 10%|█ | 37850/371472 [3:03:17<25:19:15, 3.66it/s] 10%|█ | 37851/371472 [3:03:17<26:20:02, 3.52it/s] 10%|█ | 37852/371472 [3:03:17<25:54:37, 3.58it/s] 10%|█ | 37853/371472 [3:03:18<28:05:03, 3.30it/s] 10%|█ | 37854/371472 [3:03:18<27:18:28, 3.39it/s] 10%|█ | 37855/371472 [3:03:18<29:13:09, 3.17it/s] 10%|█ | 37856/371472 [3:03:19<27:53:46, 3.32it/s] 10%|█ | 37857/371472 [3:03:19<26:41:14, 3.47it/s] 10%|█ | 37858/371472 [3:03:19<26:14:49, 3.53it/s] 10%|█ | 37859/371472 [3:03:19<25:48:35, 3.59it/s] 10%|█ | 37860/371472 [3:03:20<25:49:53, 3.59it/s] {'loss': 4.4633, 'learning_rate': 9.087084401732423e-07, 'epoch': 1.63} + 10%|█ | 37860/371472 [3:03:20<25:49:53, 3.59it/s] 10%|█ | 37861/371472 [3:03:20<26:22:56, 3.51it/s] 10%|█ | 37862/371472 [3:03:20<25:53:31, 3.58it/s] 10%|█ | 37863/371472 [3:03:20<25:34:53, 3.62it/s] 10%|█ | 37864/371472 [3:03:21<26:19:29, 3.52it/s] 10%|█ | 37865/371472 [3:03:21<25:28:04, 3.64it/s] 10%|█ | 37866/371472 [3:03:21<24:55:02, 3.72it/s] 10%|█ | 37867/371472 [3:03:21<24:53:22, 3.72it/s] 10%|█ | 37868/371472 [3:03:22<24:44:41, 3.74it/s] 10%|█ | 37869/371472 [3:03:22<24:48:28, 3.74it/s] 10%|█ | 37870/371472 [3:03:22<25:02:12, 3.70it/s] 10%|█ | 37871/371472 [3:03:23<25:05:21, 3.69it/s] 10%|█ | 37872/371472 [3:03:23<26:33:16, 3.49it/s] 10%|█ | 37873/371472 [3:03:23<26:51:30, 3.45it/s] 10%|█ | 37874/371472 [3:03:23<26:29:05, 3.50it/s] 10%|█ | 37875/371472 [3:03:24<25:16:09, 3.67it/s] 10%|█ | 37876/371472 [3:03:24<24:46:46, 3.74it/s] 10%|█ | 37877/371472 [3:03:24<25:35:04, 3.62it/s] 10%|█ | 37878/371472 [3:03:25<26:25:29, 3.51it/s] 10%|█ | 37879/371472 [3:03:25<26:01:32, 3.56it/s] 10%|█ | 37880/371472 [3:03:25<25:45:28, 3.60it/s] {'loss': 4.619, 'learning_rate': 9.086599581977633e-07, 'epoch': 1.63} + 10%|█ | 37880/371472 [3:03:25<25:45:28, 3.60it/s] 10%|█ | 37881/371472 [3:03:25<26:18:53, 3.52it/s] 10%|█ | 37882/371472 [3:03:26<26:27:27, 3.50it/s] 10%|█ | 37883/371472 [3:03:26<26:01:21, 3.56it/s] 10%|█ | 37884/371472 [3:03:26<26:09:00, 3.54it/s] 10%|█ | 37885/371472 [3:03:27<27:41:56, 3.35it/s] 10%|█ | 37886/371472 [3:03:27<26:40:39, 3.47it/s] 10%|█ | 37887/371472 [3:03:27<26:14:36, 3.53it/s] 10%|█ | 37888/371472 [3:03:28<29:24:22, 3.15it/s] 10%|█ | 37889/371472 [3:03:28<27:35:24, 3.36it/s] 10%|█ | 37890/371472 [3:03:28<26:39:58, 3.47it/s] 10%|█ | 37891/371472 [3:03:28<25:27:38, 3.64it/s] 10%|█ | 37892/371472 [3:03:29<25:25:21, 3.64it/s] 10%|█ | 37893/371472 [3:03:29<25:40:53, 3.61it/s] 10%|█ | 37894/371472 [3:03:29<27:41:05, 3.35it/s] 10%|█ | 37895/371472 [3:03:29<27:30:41, 3.37it/s] 10%|█ | 37896/371472 [3:03:30<29:23:44, 3.15it/s] 10%|█ | 37897/371472 [3:03:30<30:05:05, 3.08it/s] 10%|█ | 37898/371472 [3:03:31<29:39:27, 3.12it/s] 10%|█ | 37899/371472 [3:03:31<28:55:10, 3.20it/s] 10%|█ | 37900/371472 [3:03:31<27:55:02, 3.32it/s] {'loss': 4.429, 'learning_rate': 9.086114762222844e-07, 'epoch': 1.63} + 10%|█ | 37900/371472 [3:03:31<27:55:02, 3.32it/s] 10%|█ | 37901/371472 [3:03:31<26:45:11, 3.46it/s] 10%|█ | 37902/371472 [3:03:32<26:40:41, 3.47it/s] 10%|█ | 37903/371472 [3:03:32<25:49:57, 3.59it/s] 10%|█ | 37904/371472 [3:03:32<26:16:33, 3.53it/s] 10%|█ | 37905/371472 [3:03:32<26:10:30, 3.54it/s] 10%|█ | 37906/371472 [3:03:33<27:40:23, 3.35it/s] 10%|█ | 37907/371472 [3:03:33<27:12:01, 3.41it/s] 10%|█ | 37908/371472 [3:03:33<28:08:44, 3.29it/s] 10%|█ | 37909/371472 [3:03:34<27:15:29, 3.40it/s] 10%|█ | 37910/371472 [3:03:34<27:26:16, 3.38it/s] 10%|█ | 37911/371472 [3:03:34<28:23:44, 3.26it/s] 10%|█ | 37912/371472 [3:03:35<27:52:53, 3.32it/s] 10%|█ | 37913/371472 [3:03:35<27:01:49, 3.43it/s] 10%|█ | 37914/371472 [3:03:35<26:10:17, 3.54it/s] 10%|█ | 37915/371472 [3:03:35<26:42:47, 3.47it/s] 10%|█ | 37916/371472 [3:03:36<25:59:04, 3.57it/s] 10%|█ | 37917/371472 [3:03:36<25:19:22, 3.66it/s] 10%|█ | 37918/371472 [3:03:36<24:43:15, 3.75it/s] 10%|█ | 37919/371472 [3:03:36<25:36:19, 3.62it/s] 10%|█ | 37920/371472 [3:03:37<26:38:50, 3.48it/s] {'loss': 4.3935, 'learning_rate': 9.085629942468056e-07, 'epoch': 1.63} + 10%|█ | 37920/371472 [3:03:37<26:38:50, 3.48it/s] 10%|█ | 37921/371472 [3:03:37<26:22:12, 3.51it/s] 10%|█ | 37922/371472 [3:03:37<26:06:44, 3.55it/s] 10%|█ | 37923/371472 [3:03:38<25:54:58, 3.58it/s] 10%|█ | 37924/371472 [3:03:38<25:13:18, 3.67it/s] 10%|█ | 37925/371472 [3:03:38<26:08:44, 3.54it/s] 10%|█ | 37926/371472 [3:03:38<25:43:39, 3.60it/s] 10%|█ | 37927/371472 [3:03:39<25:37:54, 3.61it/s] 10%|█ | 37928/371472 [3:03:39<25:23:02, 3.65it/s] 10%|█ | 37929/371472 [3:03:39<26:11:49, 3.54it/s] 10%|█ | 37930/371472 [3:03:40<29:36:46, 3.13it/s] 10%|█ | 37931/371472 [3:03:40<28:33:15, 3.24it/s] 10%|█ | 37932/371472 [3:03:40<27:20:08, 3.39it/s] 10%|█ | 37933/371472 [3:03:41<26:17:44, 3.52it/s] 10%|█ | 37934/371472 [3:03:41<25:30:16, 3.63it/s] 10%|█ | 37935/371472 [3:03:41<26:19:56, 3.52it/s] 10%|█ | 37936/371472 [3:03:41<25:47:24, 3.59it/s] 10%|█ | 37937/371472 [3:03:42<26:28:05, 3.50it/s] 10%|█ | 37938/371472 [3:03:42<25:56:11, 3.57it/s] 10%|█ | 37939/371472 [3:03:42<25:57:03, 3.57it/s] 10%|█ | 37940/371472 [3:03:42<24:50:17, 3.73it/s] {'loss': 4.5075, 'learning_rate': 9.085145122713267e-07, 'epoch': 1.63} + 10%|█ | 37940/371472 [3:03:42<24:50:17, 3.73it/s] 10%|█ | 37941/371472 [3:03:43<25:58:43, 3.57it/s] 10%|█ | 37942/371472 [3:03:43<26:55:27, 3.44it/s] 10%|█ | 37943/371472 [3:03:43<26:52:45, 3.45it/s] 10%|█ | 37944/371472 [3:03:44<25:44:16, 3.60it/s] 10%|█ | 37945/371472 [3:03:44<25:07:05, 3.69it/s] 10%|█ | 37946/371472 [3:03:44<25:10:55, 3.68it/s] 10%|█ | 37947/371472 [3:03:44<26:45:35, 3.46it/s] 10%|█ | 37948/371472 [3:03:45<26:22:42, 3.51it/s] 10%|█ | 37949/371472 [3:03:45<26:06:54, 3.55it/s] 10%|█ | 37950/371472 [3:03:45<25:59:19, 3.56it/s] 10%|█ | 37951/371472 [3:03:46<26:07:48, 3.55it/s] 10%|█ | 37952/371472 [3:03:46<26:06:45, 3.55it/s] 10%|█ | 37953/371472 [3:03:46<25:19:42, 3.66it/s] 10%|█ | 37954/371472 [3:03:46<25:31:12, 3.63it/s] 10%|█ | 37955/371472 [3:03:47<24:35:25, 3.77it/s] 10%|█ | 37956/371472 [3:03:47<25:21:58, 3.65it/s] 10%|█ | 37957/371472 [3:03:47<25:09:42, 3.68it/s] 10%|█ | 37958/371472 [3:03:47<24:48:38, 3.73it/s] 10%|█ | 37959/371472 [3:03:48<27:23:25, 3.38it/s] 10%|█ | 37960/371472 [3:03:48<26:49:14, 3.45it/s] {'loss': 4.2795, 'learning_rate': 9.084660302958478e-07, 'epoch': 1.64} + 10%|█ | 37960/371472 [3:03:48<26:49:14, 3.45it/s] 10%|█ | 37961/371472 [3:03:48<28:28:09, 3.25it/s] 10%|█ | 37962/371472 [3:03:49<27:06:31, 3.42it/s] 10%|█ | 37963/371472 [3:03:49<25:45:53, 3.60it/s] 10%|█ | 37964/371472 [3:03:49<25:55:18, 3.57it/s] 10%|█ | 37965/371472 [3:03:49<25:34:44, 3.62it/s] 10%|█ | 37966/371472 [3:03:50<24:58:30, 3.71it/s] 10%|█ | 37967/371472 [3:03:50<24:53:45, 3.72it/s] 10%|█ | 37968/371472 [3:03:50<26:44:59, 3.46it/s] 10%|█ | 37969/371472 [3:03:51<26:53:44, 3.44it/s] 10%|█ | 37970/371472 [3:03:51<25:57:03, 3.57it/s] 10%|█ | 37971/371472 [3:03:51<25:52:08, 3.58it/s] 10%|█ | 37972/371472 [3:03:51<25:16:45, 3.66it/s] 10%|█ | 37973/371472 [3:03:52<25:10:06, 3.68it/s] 10%|█ | 37974/371472 [3:03:52<24:40:36, 3.75it/s] 10%|█ | 37975/371472 [3:03:52<25:07:09, 3.69it/s] 10%|█ | 37976/371472 [3:03:52<24:57:35, 3.71it/s] 10%|█ | 37977/371472 [3:03:53<25:04:23, 3.69it/s] 10%|█ | 37978/371472 [3:03:53<24:46:02, 3.74it/s] 10%|█ | 37979/371472 [3:03:53<25:53:29, 3.58it/s] 10%|█ | 37980/371472 [3:03:54<25:13:07, 3.67it/s] {'loss': 4.4882, 'learning_rate': 9.084175483203688e-07, 'epoch': 1.64} + 10%|█ | 37980/371472 [3:03:54<25:13:07, 3.67it/s] 10%|█ | 37981/371472 [3:03:54<25:40:20, 3.61it/s] 10%|█ | 37982/371472 [3:03:54<25:33:40, 3.62it/s] 10%|█ | 37983/371472 [3:03:55<28:35:20, 3.24it/s] 10%|█ | 37984/371472 [3:03:55<27:28:50, 3.37it/s] 10%|█ | 37985/371472 [3:03:55<27:13:25, 3.40it/s] 10%|█ | 37986/371472 [3:03:55<26:36:14, 3.48it/s] 10%|█ | 37987/371472 [3:03:56<26:18:41, 3.52it/s] 10%|█ | 37988/371472 [3:03:56<25:46:44, 3.59it/s] 10%|█ | 37989/371472 [3:03:56<27:43:48, 3.34it/s] 10%|█ | 37990/371472 [3:03:57<27:19:47, 3.39it/s] 10%|█ | 37991/371472 [3:03:57<27:13:15, 3.40it/s] 10%|█ | 37992/371472 [3:03:57<26:51:59, 3.45it/s] 10%|█ | 37993/371472 [3:03:57<26:12:17, 3.53it/s] 10%|█ | 37994/371472 [3:03:58<25:21:30, 3.65it/s] 10%|█ | 37995/371472 [3:03:58<27:04:35, 3.42it/s] 10%|█ | 37996/371472 [3:03:58<27:03:41, 3.42it/s] 10%|█ | 37997/371472 [3:03:59<27:14:07, 3.40it/s] 10%|█ | 37998/371472 [3:03:59<26:20:13, 3.52it/s] 10%|█ | 37999/371472 [3:03:59<25:40:12, 3.61it/s] 10%|█ | 38000/371472 [3:03:59<25:32:20, 3.63it/s] {'loss': 4.3903, 'learning_rate': 9.0836906634489e-07, 'epoch': 1.64} + 10%|█ | 38000/371472 [3:03:59<25:32:20, 3.63it/s] 10%|█ | 38001/371472 [3:04:00<25:26:03, 3.64it/s] 10%|█ | 38002/371472 [3:04:00<25:03:15, 3.70it/s] 10%|█ | 38003/371472 [3:04:00<25:00:30, 3.70it/s] 10%|█ | 38004/371472 [3:04:00<25:03:44, 3.70it/s] 10%|█ | 38005/371472 [3:04:01<25:04:45, 3.69it/s] 10%|█ | 38006/371472 [3:04:01<26:34:58, 3.48it/s] 10%|█ | 38007/371472 [3:04:01<25:25:37, 3.64it/s] 10%|█ | 38008/371472 [3:04:02<24:42:11, 3.75it/s] 10%|█ | 38009/371472 [3:04:02<26:47:58, 3.46it/s] 10%|█ | 38010/371472 [3:04:02<26:33:49, 3.49it/s] 10%|█ | 38011/371472 [3:04:02<26:24:11, 3.51it/s] 10%|█ | 38012/371472 [3:04:03<25:58:45, 3.57it/s] 10%|█ | 38013/371472 [3:04:03<26:38:38, 3.48it/s] 10%|█ | 38014/371472 [3:04:03<27:07:50, 3.41it/s] 10%|█ | 38015/371472 [3:04:04<26:24:42, 3.51it/s] 10%|█ | 38016/371472 [3:04:04<25:57:05, 3.57it/s] 10%|█ | 38017/371472 [3:04:04<27:02:18, 3.43it/s] 10%|█ | 38018/371472 [3:04:04<27:37:30, 3.35it/s] 10%|█ | 38019/371472 [3:04:05<26:52:56, 3.45it/s] 10%|█ | 38020/371472 [3:04:05<25:59:38, 3.56it/s] {'loss': 4.4282, 'learning_rate': 9.08320584369411e-07, 'epoch': 1.64} + 10%|█ | 38020/371472 [3:04:05<25:59:38, 3.56it/s] 10%|█ | 38021/371472 [3:04:05<26:07:05, 3.55it/s] 10%|█ | 38022/371472 [3:04:06<25:46:47, 3.59it/s] 10%|█ | 38023/371472 [3:04:06<26:26:58, 3.50it/s] 10%|█ | 38024/371472 [3:04:06<25:50:18, 3.58it/s] 10%|█ | 38025/371472 [3:04:06<26:05:08, 3.55it/s] 10%|█ | 38026/371472 [3:04:07<25:36:18, 3.62it/s] 10%|█ | 38027/371472 [3:04:07<25:15:57, 3.67it/s] 10%|█ | 38028/371472 [3:04:07<26:41:31, 3.47it/s] 10%|█ | 38029/371472 [3:04:08<26:17:57, 3.52it/s] 10%|█ | 38030/371472 [3:04:08<27:26:49, 3.37it/s] 10%|█ | 38031/371472 [3:04:08<28:52:55, 3.21it/s] 10%|█ | 38032/371472 [3:04:09<29:55:34, 3.10it/s] 10%|█ | 38033/371472 [3:04:09<28:54:47, 3.20it/s] 10%|█ | 38034/371472 [3:04:09<27:32:50, 3.36it/s] 10%|█ | 38035/371472 [3:04:09<26:24:08, 3.51it/s] 10%|█ | 38036/371472 [3:04:10<28:12:00, 3.28it/s] 10%|█ | 38037/371472 [3:04:10<26:55:57, 3.44it/s] 10%|█ | 38038/371472 [3:04:10<26:08:03, 3.54it/s] 10%|█ | 38039/371472 [3:04:10<25:28:46, 3.64it/s] 10%|█ | 38040/371472 [3:04:11<25:44:17, 3.60it/s] {'loss': 4.4172, 'learning_rate': 9.082721023939322e-07, 'epoch': 1.64} + 10%|█ | 38040/371472 [3:04:11<25:44:17, 3.60it/s] 10%|█ | 38041/371472 [3:04:11<25:52:00, 3.58it/s] 10%|█ | 38042/371472 [3:04:11<25:58:42, 3.57it/s] 10%|█ | 38043/371472 [3:04:12<25:59:28, 3.56it/s] 10%|█ | 38044/371472 [3:04:12<26:28:45, 3.50it/s] 10%|█ | 38045/371472 [3:04:12<28:18:39, 3.27it/s] 10%|█ | 38046/371472 [3:04:13<27:45:28, 3.34it/s] 10%|█ | 38047/371472 [3:04:13<26:27:26, 3.50it/s] 10%|█ | 38048/371472 [3:04:13<26:04:29, 3.55it/s] 10%|█ | 38049/371472 [3:04:13<26:29:42, 3.50it/s] 10%|█ | 38050/371472 [3:04:14<26:46:59, 3.46it/s] 10%|█ | 38051/371472 [3:04:14<30:01:13, 3.09it/s] 10%|█ | 38052/371472 [3:04:14<28:33:17, 3.24it/s] 10%|█ | 38053/371472 [3:04:15<28:05:47, 3.30it/s] 10%|█ | 38054/371472 [3:04:15<28:12:13, 3.28it/s] 10%|█ | 38055/371472 [3:04:15<26:56:13, 3.44it/s] 10%|█ | 38056/371472 [3:04:15<26:47:55, 3.46it/s] 10%|█ | 38057/371472 [3:04:16<26:34:39, 3.48it/s] 10%|█ | 38058/371472 [3:04:16<25:26:56, 3.64it/s] 10%|█ | 38059/371472 [3:04:16<25:20:44, 3.65it/s] 10%|█ | 38060/371472 [3:04:17<24:32:17, 3.77it/s] {'loss': 4.405, 'learning_rate': 9.082236204184532e-07, 'epoch': 1.64} + 10%|█ | 38060/371472 [3:04:17<24:32:17, 3.77it/s] 10%|█ | 38061/371472 [3:04:17<24:36:11, 3.76it/s] 10%|█ | 38062/371472 [3:04:17<25:20:38, 3.65it/s] 10%|█ | 38063/371472 [3:04:17<26:23:23, 3.51it/s] 10%|█ | 38064/371472 [3:04:18<27:16:55, 3.39it/s] 10%|█ | 38065/371472 [3:04:18<27:34:48, 3.36it/s] 10%|█ | 38066/371472 [3:04:18<27:34:33, 3.36it/s] 10%|█ | 38067/371472 [3:04:19<27:05:47, 3.42it/s] 10%|█ | 38068/371472 [3:04:19<26:58:55, 3.43it/s] 10%|█ | 38069/371472 [3:04:19<26:34:34, 3.48it/s] 10%|█ | 38070/371472 [3:04:19<25:52:26, 3.58it/s] 10%|█ | 38071/371472 [3:04:20<25:51:31, 3.58it/s] 10%|█ | 38072/371472 [3:04:20<25:39:10, 3.61it/s] 10%|█ | 38073/371472 [3:04:20<26:08:18, 3.54it/s] 10%|█ | 38074/371472 [3:04:21<26:17:42, 3.52it/s] 10%|█ | 38075/371472 [3:04:21<25:26:31, 3.64it/s] 10%|█ | 38076/371472 [3:04:21<25:58:19, 3.57it/s] 10%|█ | 38077/371472 [3:04:21<25:49:27, 3.59it/s] 10%|█ | 38078/371472 [3:04:22<25:23:02, 3.65it/s] 10%|█ | 38079/371472 [3:04:22<26:19:54, 3.52it/s] 10%|█ | 38080/371472 [3:04:22<25:52:40, 3.58it/s] {'loss': 4.3862, 'learning_rate': 9.081751384429744e-07, 'epoch': 1.64} + 10%|█ | 38080/371472 [3:04:22<25:52:40, 3.58it/s] 10%|█ | 38081/371472 [3:04:23<25:59:57, 3.56it/s] 10%|█ | 38082/371472 [3:04:23<25:01:43, 3.70it/s] 10%|█ | 38083/371472 [3:04:23<24:35:23, 3.77it/s] 10%|█ | 38084/371472 [3:04:23<25:00:05, 3.70it/s] 10%|█ | 38085/371472 [3:04:24<24:41:16, 3.75it/s] 10%|█ | 38086/371472 [3:04:24<26:25:57, 3.50it/s] 10%|█ | 38087/371472 [3:04:24<25:52:21, 3.58it/s] 10%|█ | 38088/371472 [3:04:24<25:39:27, 3.61it/s] 10%|█ | 38089/371472 [3:04:25<25:34:59, 3.62it/s] 10%|█ | 38090/371472 [3:04:25<24:51:51, 3.72it/s] 10%|█ | 38091/371472 [3:04:25<27:45:38, 3.34it/s] 10%|█ | 38092/371472 [3:04:26<26:27:43, 3.50it/s] 10%|█ | 38093/371472 [3:04:26<25:15:19, 3.67it/s] 10%|█ | 38094/371472 [3:04:26<27:49:10, 3.33it/s] 10%|█ | 38095/371472 [3:04:26<27:47:15, 3.33it/s] 10%|█ | 38096/371472 [3:04:27<27:21:31, 3.38it/s] 10%|█ | 38097/371472 [3:04:27<26:56:10, 3.44it/s] 10%|█ | 38098/371472 [3:04:27<27:08:48, 3.41it/s] 10%|█ | 38099/371472 [3:04:28<26:33:19, 3.49it/s] 10%|█ | 38100/371472 [3:04:28<25:47:17, 3.59it/s] {'loss': 4.2601, 'learning_rate': 9.081266564674954e-07, 'epoch': 1.64} + 10%|█ | 38100/371472 [3:04:28<25:47:17, 3.59it/s] 10%|█ | 38101/371472 [3:04:28<25:43:35, 3.60it/s] 10%|█ | 38102/371472 [3:04:28<27:17:12, 3.39it/s] 10%|█ | 38103/371472 [3:04:29<26:37:26, 3.48it/s] 10%|█ | 38104/371472 [3:04:29<25:52:00, 3.58it/s] 10%|█ | 38105/371472 [3:04:29<25:53:43, 3.58it/s] 10%|█ | 38106/371472 [3:04:30<25:40:44, 3.61it/s] 10%|█ | 38107/371472 [3:04:30<27:06:03, 3.42it/s] 10%|█ | 38108/371472 [3:04:30<26:40:17, 3.47it/s] 10%|█ | 38109/371472 [3:04:30<26:46:50, 3.46it/s] 10%|█ | 38110/371472 [3:04:31<26:19:36, 3.52it/s] 10%|█ | 38111/371472 [3:04:31<26:22:23, 3.51it/s] 10%|█ | 38112/371472 [3:04:31<25:34:03, 3.62it/s] 10%|█ | 38113/371472 [3:04:32<26:17:39, 3.52it/s] 10%|█ | 38114/371472 [3:04:32<27:52:24, 3.32it/s] 10%|█ | 38115/371472 [3:04:32<27:49:39, 3.33it/s] 10%|█ | 38116/371472 [3:04:32<27:17:11, 3.39it/s] 10%|█ | 38117/371472 [3:04:33<26:55:06, 3.44it/s] 10%|█ | 38118/371472 [3:04:33<27:00:39, 3.43it/s] 10%|█ | 38119/371472 [3:04:33<28:33:25, 3.24it/s] 10%|█ | 38120/371472 [3:04:34<29:08:25, 3.18it/s] {'loss': 4.3856, 'learning_rate': 9.080781744920166e-07, 'epoch': 1.64} + 10%|█ | 38120/371472 [3:04:34<29:08:25, 3.18it/s] 10%|█ | 38121/371472 [3:04:34<27:45:10, 3.34it/s] 10%|█ | 38122/371472 [3:04:34<26:38:06, 3.48it/s] 10%|█ | 38123/371472 [3:04:35<26:32:54, 3.49it/s] 10%|█ | 38124/371472 [3:04:35<26:23:31, 3.51it/s] 10%|█ | 38125/371472 [3:04:35<25:39:41, 3.61it/s] 10%|█ | 38126/371472 [3:04:35<26:00:55, 3.56it/s] 10%|█ | 38127/371472 [3:04:36<26:42:20, 3.47it/s] 10%|█ | 38128/371472 [3:04:36<26:01:25, 3.56it/s] 10%|█ | 38129/371472 [3:04:36<25:53:06, 3.58it/s] 10%|█ | 38130/371472 [3:04:37<26:04:13, 3.55it/s] 10%|█ | 38131/371472 [3:04:37<26:51:45, 3.45it/s] 10%|█ | 38132/371472 [3:04:37<26:47:26, 3.46it/s] 10%|█ | 38133/371472 [3:04:37<26:42:33, 3.47it/s] 10%|█ | 38134/371472 [3:04:38<28:06:17, 3.29it/s] 10%|█ | 38135/371472 [3:04:38<27:14:43, 3.40it/s] 10%|█ | 38136/371472 [3:04:38<27:44:22, 3.34it/s] 10%|█ | 38137/371472 [3:04:39<27:35:55, 3.35it/s] 10%|█ | 38138/371472 [3:04:39<27:29:36, 3.37it/s] 10%|█ | 38139/371472 [3:04:39<27:27:11, 3.37it/s] 10%|█ | 38140/371472 [3:04:39<26:34:00, 3.49it/s] {'loss': 4.4117, 'learning_rate': 9.080296925165377e-07, 'epoch': 1.64} + 10%|█ | 38140/371472 [3:04:39<26:34:00, 3.49it/s] 10%|█ | 38141/371472 [3:04:40<26:06:11, 3.55it/s] 10%|█ | 38142/371472 [3:04:40<25:40:30, 3.61it/s] 10%|█ | 38143/371472 [3:04:40<25:46:35, 3.59it/s] 10%|█ | 38144/371472 [3:04:41<26:18:06, 3.52it/s] 10%|█ | 38145/371472 [3:04:41<25:43:18, 3.60it/s] 10%|█ | 38146/371472 [3:04:41<26:28:24, 3.50it/s] 10%|█ | 38147/371472 [3:04:41<27:07:03, 3.41it/s] 10%|█ | 38148/371472 [3:04:42<26:05:20, 3.55it/s] 10%|█ | 38149/371472 [3:04:42<26:20:39, 3.51it/s] 10%|█ | 38150/371472 [3:04:42<27:01:18, 3.43it/s] 10%|█ | 38151/371472 [3:04:43<26:24:33, 3.51it/s] 10%|█ | 38152/371472 [3:04:43<26:07:44, 3.54it/s] 10%|█ | 38153/371472 [3:04:43<25:23:29, 3.65it/s] 10%|█ | 38154/371472 [3:04:43<26:55:37, 3.44it/s] 10%|█ | 38155/371472 [3:04:44<27:12:07, 3.40it/s] 10%|█ | 38156/371472 [3:04:44<27:44:59, 3.34it/s] 10%|█ | 38157/371472 [3:04:44<28:01:10, 3.30it/s] 10%|█ | 38158/371472 [3:04:45<28:17:08, 3.27it/s] 10%|█ | 38159/371472 [3:04:45<31:03:21, 2.98it/s] 10%|█ | 38160/371472 [3:04:45<29:12:28, 3.17it/s] {'loss': 4.3225, 'learning_rate': 9.079812105410589e-07, 'epoch': 1.64} + 10%|█ | 38160/371472 [3:04:45<29:12:28, 3.17it/s] 10%|█ | 38161/371472 [3:04:46<27:57:46, 3.31it/s] 10%|█ | 38162/371472 [3:04:46<26:39:34, 3.47it/s] 10%|█ | 38163/371472 [3:04:46<26:47:41, 3.46it/s] 10%|█ | 38164/371472 [3:04:46<27:04:02, 3.42it/s] 10%|█ | 38165/371472 [3:04:47<26:41:44, 3.47it/s] 10%|█ | 38166/371472 [3:04:47<28:17:06, 3.27it/s] 10%|█ | 38167/371472 [3:04:47<28:54:46, 3.20it/s] 10%|█ | 38168/371472 [3:04:48<28:37:25, 3.23it/s] 10%|█ | 38169/371472 [3:04:48<27:13:53, 3.40it/s] 10%|█ | 38170/371472 [3:04:48<26:38:01, 3.48it/s] 10%|█ | 38171/371472 [3:04:49<25:31:05, 3.63it/s] 10%|█ | 38172/371472 [3:04:49<25:00:52, 3.70it/s] 10%|█ | 38173/371472 [3:04:49<25:01:36, 3.70it/s] 10%|█ | 38174/371472 [3:04:49<24:48:59, 3.73it/s] 10%|█ | 38175/371472 [3:04:50<25:45:23, 3.59it/s] 10%|█ | 38176/371472 [3:04:50<25:55:34, 3.57it/s] 10%|█ | 38177/371472 [3:04:50<25:59:36, 3.56it/s] 10%|█ | 38178/371472 [3:04:50<25:57:32, 3.57it/s] 10%|█ | 38179/371472 [3:04:51<25:32:44, 3.62it/s] 10%|█ | 38180/371472 [3:04:51<24:59:29, 3.70it/s] {'loss': 4.5712, 'learning_rate': 9.079327285655798e-07, 'epoch': 1.64} + 10%|█ | 38180/371472 [3:04:51<24:59:29, 3.70it/s] 10%|█ | 38181/371472 [3:04:51<25:06:23, 3.69it/s] 10%|█ | 38182/371472 [3:04:52<25:40:15, 3.61it/s] 10%|█ | 38183/371472 [3:04:52<25:19:35, 3.66it/s] 10%|█ | 38184/371472 [3:04:52<25:26:38, 3.64it/s] 10%|█ | 38185/371472 [3:04:52<25:10:54, 3.68it/s] 10%|█ | 38186/371472 [3:04:53<24:53:19, 3.72it/s] 10%|█ | 38187/371472 [3:04:53<25:16:49, 3.66it/s] 10%|█ | 38188/371472 [3:04:53<26:06:04, 3.55it/s] 10%|█ | 38189/371472 [3:04:53<25:56:03, 3.57it/s] 10%|█ | 38190/371472 [3:04:54<25:29:47, 3.63it/s] 10%|█ | 38191/371472 [3:04:54<26:42:35, 3.47it/s] 10%|█ | 38192/371472 [3:04:54<25:59:20, 3.56it/s] 10%|█ | 38193/371472 [3:04:55<25:22:08, 3.65it/s] 10%|█ | 38194/371472 [3:04:55<25:29:06, 3.63it/s] 10%|█ | 38195/371472 [3:04:55<26:07:16, 3.54it/s] 10%|█ | 38196/371472 [3:04:55<25:41:52, 3.60it/s] 10%|█ | 38197/371472 [3:04:56<25:20:28, 3.65it/s] 10%|█ | 38198/371472 [3:04:56<25:43:19, 3.60it/s] 10%|█ | 38199/371472 [3:04:56<25:18:51, 3.66it/s] 10%|█ | 38200/371472 [3:04:57<26:15:56, 3.52it/s] {'loss': 4.7472, 'learning_rate': 9.07884246590101e-07, 'epoch': 1.65} + 10%|█ | 38200/371472 [3:04:57<26:15:56, 3.52it/s] 10%|█ | 38201/371472 [3:04:57<25:36:20, 3.62it/s] 10%|█ | 38202/371472 [3:04:57<26:34:01, 3.48it/s] 10%|█ | 38203/371472 [3:04:57<28:49:30, 3.21it/s] 10%|█ | 38204/371472 [3:04:58<28:26:23, 3.26it/s] 10%|█ | 38205/371472 [3:04:58<29:46:25, 3.11it/s] 10%|█ | 38206/371472 [3:04:58<28:58:10, 3.20it/s] 10%|█ | 38207/371472 [3:04:59<27:23:26, 3.38it/s] 10%|█ | 38208/371472 [3:04:59<26:53:17, 3.44it/s] 10%|█ | 38209/371472 [3:04:59<27:55:56, 3.31it/s] 10%|█ | 38210/371472 [3:05:00<27:22:07, 3.38it/s] 10%|█ | 38211/371472 [3:05:00<26:50:03, 3.45it/s] 10%|█ | 38212/371472 [3:05:00<28:03:03, 3.30it/s] 10%|█ | 38213/371472 [3:05:00<27:38:40, 3.35it/s] 10%|█ | 38214/371472 [3:05:01<27:06:41, 3.41it/s] 10%|█ | 38215/371472 [3:05:01<28:03:52, 3.30it/s] 10%|█ | 38216/371472 [3:05:01<27:53:42, 3.32it/s] 10%|█ | 38217/371472 [3:05:02<27:58:39, 3.31it/s] 10%|█ | 38218/371472 [3:05:02<26:46:40, 3.46it/s] 10%|█ | 38219/371472 [3:05:02<25:36:29, 3.61it/s] 10%|█ | 38220/371472 [3:05:02<25:19:59, 3.65it/s] {'loss': 4.463, 'learning_rate': 9.078357646146221e-07, 'epoch': 1.65} + 10%|█ | 38220/371472 [3:05:02<25:19:59, 3.65it/s] 10%|█ | 38221/371472 [3:05:03<25:19:42, 3.65it/s] 10%|█ | 38222/371472 [3:05:03<25:25:28, 3.64it/s] 10%|█ | 38223/371472 [3:05:03<26:22:16, 3.51it/s] 10%|█ | 38224/371472 [3:05:04<25:33:30, 3.62it/s] 10%|█ | 38225/371472 [3:05:04<25:32:32, 3.62it/s] 10%|█ | 38226/371472 [3:05:04<25:17:29, 3.66it/s] 10%|█ | 38227/371472 [3:05:04<25:13:38, 3.67it/s] 10%|█ | 38228/371472 [3:05:05<26:49:57, 3.45it/s] 10%|█ | 38229/371472 [3:05:05<27:25:45, 3.37it/s] 10%|█ | 38230/371472 [3:05:05<26:26:31, 3.50it/s] 10%|█ | 38231/371472 [3:05:06<26:08:42, 3.54it/s] 10%|█ | 38232/371472 [3:05:06<25:53:49, 3.57it/s] 10%|█ | 38233/371472 [3:05:06<25:53:48, 3.57it/s] 10%|█ | 38234/371472 [3:05:06<27:08:03, 3.41it/s] 10%|█ | 38235/371472 [3:05:07<27:03:55, 3.42it/s] 10%|█ | 38236/371472 [3:05:07<27:18:05, 3.39it/s] 10%|█ | 38237/371472 [3:05:07<27:29:03, 3.37it/s] 10%|█ | 38238/371472 [3:05:08<28:10:02, 3.29it/s] 10%|█ | 38239/371472 [3:05:08<27:41:02, 3.34it/s] 10%|█ | 38240/371472 [3:05:08<26:51:42, 3.45it/s] {'loss': 4.3645, 'learning_rate': 9.077872826391433e-07, 'epoch': 1.65} + 10%|█ | 38240/371472 [3:05:08<26:51:42, 3.45it/s] 10%|█ | 38241/371472 [3:05:08<26:08:52, 3.54it/s] 10%|█ | 38242/371472 [3:05:09<25:37:35, 3.61it/s] 10%|█ | 38243/371472 [3:05:09<25:56:49, 3.57it/s] 10%|█ | 38244/371472 [3:05:09<26:08:36, 3.54it/s] 10%|█ | 38245/371472 [3:05:10<25:43:47, 3.60it/s] 10%|█ | 38246/371472 [3:05:10<25:45:29, 3.59it/s] 10%|█ | 38247/371472 [3:05:10<28:51:52, 3.21it/s] 10%|█ | 38248/371472 [3:05:11<27:24:20, 3.38it/s] 10%|█ | 38249/371472 [3:05:11<28:14:11, 3.28it/s] 10%|█ | 38250/371472 [3:05:11<27:28:54, 3.37it/s] 10%|█ | 38251/371472 [3:05:11<28:00:31, 3.30it/s] 10%|█ | 38252/371472 [3:05:12<29:12:52, 3.17it/s] 10%|█ | 38253/371472 [3:05:12<29:43:26, 3.11it/s] 10%|█ | 38254/371472 [3:05:12<28:45:30, 3.22it/s] 10%|█ | 38255/371472 [3:05:13<29:13:56, 3.17it/s] 10%|█ | 38256/371472 [3:05:13<27:49:31, 3.33it/s] 10%|█ | 38257/371472 [3:05:13<28:27:37, 3.25it/s] 10%|█ | 38258/371472 [3:05:14<27:57:12, 3.31it/s] 10%|█ | 38259/371472 [3:05:14<26:50:46, 3.45it/s] 10%|█ | 38260/371472 [3:05:14<27:00:15, 3.43it/s] {'loss': 4.1832, 'learning_rate': 9.077388006636643e-07, 'epoch': 1.65} + 10%|█ | 38260/371472 [3:05:14<27:00:15, 3.43it/s] 10%|█ | 38261/371472 [3:05:14<28:02:45, 3.30it/s] 10%|█ | 38262/371472 [3:05:15<26:38:04, 3.48it/s] 10%|█ | 38263/371472 [3:05:15<26:31:03, 3.49it/s] 10%|█ | 38264/371472 [3:05:15<27:22:04, 3.38it/s] 10%|█ | 38265/371472 [3:05:16<29:11:31, 3.17it/s] 10%|█ | 38266/371472 [3:05:16<29:49:33, 3.10it/s] 10%|█ | 38267/371472 [3:05:16<28:24:57, 3.26it/s] 10%|█ | 38268/371472 [3:05:17<28:00:55, 3.30it/s] 10%|█ | 38269/371472 [3:05:17<26:32:37, 3.49it/s] 10%|█ | 38270/371472 [3:05:17<26:21:31, 3.51it/s] 10%|█ | 38271/371472 [3:05:17<26:34:36, 3.48it/s] 10%|█ | 38272/371472 [3:05:18<28:29:47, 3.25it/s] 10%|█ | 38273/371472 [3:05:18<27:16:03, 3.39it/s] 10%|█ | 38274/371472 [3:05:18<26:16:02, 3.52it/s] 10%|█ | 38275/371472 [3:05:19<26:18:44, 3.52it/s] 10%|█ | 38276/371472 [3:05:19<26:40:39, 3.47it/s] 10%|█ | 38277/371472 [3:05:19<26:12:25, 3.53it/s] 10%|█ | 38278/371472 [3:05:20<28:05:58, 3.29it/s] 10%|█ | 38279/371472 [3:05:20<27:10:43, 3.41it/s] 10%|█ | 38280/371472 [3:05:20<26:28:00, 3.50it/s] {'loss': 4.2881, 'learning_rate': 9.076903186881855e-07, 'epoch': 1.65} + 10%|█ | 38280/371472 [3:05:20<26:28:00, 3.50it/s] 10%|█ | 38281/371472 [3:05:20<25:54:56, 3.57it/s] 10%|█ | 38282/371472 [3:05:21<27:16:29, 3.39it/s] 10%|█ | 38283/371472 [3:05:21<26:01:11, 3.56it/s] 10%|█ | 38284/371472 [3:05:21<25:20:44, 3.65it/s] 10%|█ | 38285/371472 [3:05:21<25:46:16, 3.59it/s] 10%|█ | 38286/371472 [3:05:22<26:59:30, 3.43it/s] 10%|█ | 38287/371472 [3:05:22<26:33:38, 3.48it/s] 10%|█ | 38288/371472 [3:05:22<27:24:25, 3.38it/s] 10%|█ | 38289/371472 [3:05:23<28:24:47, 3.26it/s] 10%|█ | 38290/371472 [3:05:23<29:28:51, 3.14it/s] 10%|█ | 38291/371472 [3:05:23<30:10:31, 3.07it/s] 10%|█ | 38292/371472 [3:05:24<28:42:36, 3.22it/s] 10%|█ | 38293/371472 [3:05:24<28:03:24, 3.30it/s] 10%|█ | 38294/371472 [3:05:24<27:38:26, 3.35it/s] 10%|█ | 38295/371472 [3:05:25<27:38:58, 3.35it/s] 10%|█ | 38296/371472 [3:05:25<27:10:51, 3.40it/s] 10%|█ | 38297/371472 [3:05:25<26:36:57, 3.48it/s] 10%|█ | 38298/371472 [3:05:25<27:49:59, 3.33it/s] 10%|█ | 38299/371472 [3:05:26<26:56:43, 3.43it/s] 10%|█ | 38300/371472 [3:05:26<26:06:36, 3.54it/s] {'loss': 4.5145, 'learning_rate': 9.076418367127066e-07, 'epoch': 1.65} + 10%|█ | 38300/371472 [3:05:26<26:06:36, 3.54it/s] 10%|█ | 38301/371472 [3:05:26<26:57:33, 3.43it/s] 10%|█ | 38302/371472 [3:05:27<26:24:13, 3.51it/s] 10%|█ | 38303/371472 [3:05:27<26:16:17, 3.52it/s] 10%|█ | 38304/371472 [3:05:27<25:34:35, 3.62it/s] 10%|█ | 38305/371472 [3:05:27<25:29:14, 3.63it/s] 10%|█ | 38306/371472 [3:05:28<25:08:06, 3.68it/s] 10%|█ | 38307/371472 [3:05:28<25:19:06, 3.66it/s] 10%|█ | 38308/371472 [3:05:28<25:51:32, 3.58it/s] 10%|█ | 38309/371472 [3:05:28<26:53:28, 3.44it/s] 10%|█ | 38310/371472 [3:05:29<26:35:51, 3.48it/s] 10%|█ | 38311/371472 [3:05:29<25:51:48, 3.58it/s] 10%|█ | 38312/371472 [3:05:29<26:24:54, 3.50it/s] 10%|█ | 38313/371472 [3:05:30<25:41:47, 3.60it/s] 10%|█ | 38314/371472 [3:05:30<27:18:11, 3.39it/s] 10%|█ | 38315/371472 [3:05:30<26:08:43, 3.54it/s] 10%|█ | 38316/371472 [3:05:30<26:21:37, 3.51it/s] 10%|█ | 38317/371472 [3:05:31<25:27:29, 3.64it/s] 10%|█ | 38318/371472 [3:05:31<25:38:59, 3.61it/s] 10%|█ | 38319/371472 [3:05:31<26:09:30, 3.54it/s] 10%|█ | 38320/371472 [3:05:32<25:53:57, 3.57it/s] {'loss': 4.46, 'learning_rate': 9.075933547372277e-07, 'epoch': 1.65} + 10%|█ | 38320/371472 [3:05:32<25:53:57, 3.57it/s] 10%|█ | 38321/371472 [3:05:32<25:29:57, 3.63it/s] 10%|█ | 38322/371472 [3:05:32<24:42:30, 3.75it/s] 10%|█ | 38323/371472 [3:05:32<25:05:39, 3.69it/s] 10%|█ | 38324/371472 [3:05:33<26:30:29, 3.49it/s] 10%|█ | 38325/371472 [3:05:33<27:48:16, 3.33it/s] 10%|█ | 38326/371472 [3:05:33<29:02:07, 3.19it/s] 10%|█ | 38327/371472 [3:05:34<28:49:12, 3.21it/s] 10%|█ | 38328/371472 [3:05:34<28:04:16, 3.30it/s] 10%|█ | 38329/371472 [3:05:34<28:19:56, 3.27it/s] 10%|█ | 38330/371472 [3:05:35<29:14:41, 3.16it/s] 10%|█ | 38331/371472 [3:05:35<28:18:16, 3.27it/s] 10%|█ | 38332/371472 [3:05:35<27:28:33, 3.37it/s] 10%|█ | 38333/371472 [3:05:35<27:25:40, 3.37it/s] 10%|█ | 38334/371472 [3:05:36<26:26:32, 3.50it/s] 10%|█ | 38335/371472 [3:05:36<25:37:56, 3.61it/s] 10%|█ | 38336/371472 [3:05:36<25:10:20, 3.68it/s] 10%|█ | 38337/371472 [3:05:36<24:33:05, 3.77it/s] 10%|█ | 38338/371472 [3:05:37<24:34:05, 3.77it/s] 10%|█ | 38339/371472 [3:05:37<25:10:20, 3.68it/s] 10%|█ | 38340/371472 [3:05:37<26:07:52, 3.54it/s] {'loss': 4.4185, 'learning_rate': 9.075448727617487e-07, 'epoch': 1.65} + 10%|█ | 38340/371472 [3:05:37<26:07:52, 3.54it/s] 10%|█ | 38341/371472 [3:05:38<26:57:02, 3.43it/s] 10%|█ | 38342/371472 [3:05:38<26:02:11, 3.55it/s] 10%|█ | 38343/371472 [3:05:38<27:12:46, 3.40it/s] 10%|█ | 38344/371472 [3:05:39<27:25:23, 3.37it/s] 10%|█ | 38345/371472 [3:05:39<28:01:40, 3.30it/s] 10%|█ | 38346/371472 [3:05:39<27:18:55, 3.39it/s] 10%|█ | 38347/371472 [3:05:39<28:08:43, 3.29it/s] 10%|█ | 38348/371472 [3:05:40<27:06:16, 3.41it/s] 10%|█ | 38349/371472 [3:05:40<26:08:40, 3.54it/s] 10%|█ | 38350/371472 [3:05:40<25:44:15, 3.60it/s] 10%|█ | 38351/371472 [3:05:41<25:29:18, 3.63it/s] 10%|█ | 38352/371472 [3:05:41<27:04:39, 3.42it/s] 10%|█ | 38353/371472 [3:05:41<26:41:49, 3.47it/s] 10%|█ | 38354/371472 [3:05:41<26:05:13, 3.55it/s] 10%|█ | 38355/371472 [3:05:42<25:40:26, 3.60it/s] 10%|█ | 38356/371472 [3:05:42<25:45:11, 3.59it/s] 10%|█ | 38357/371472 [3:05:42<24:49:41, 3.73it/s] 10%|█ | 38358/371472 [3:05:42<24:41:10, 3.75it/s] 10%|█ | 38359/371472 [3:05:43<25:11:40, 3.67it/s] 10%|█ | 38360/371472 [3:05:43<24:30:46, 3.77it/s] {'loss': 4.4524, 'learning_rate': 9.074963907862699e-07, 'epoch': 1.65} + 10%|█ | 38360/371472 [3:05:43<24:30:46, 3.77it/s] 10%|█ | 38361/371472 [3:05:43<24:47:06, 3.73it/s] 10%|█ | 38362/371472 [3:05:44<24:28:32, 3.78it/s] 10%|█ | 38363/371472 [3:05:44<29:32:56, 3.13it/s] 10%|█ | 38364/371472 [3:05:44<27:55:10, 3.31it/s] 10%|█ | 38365/371472 [3:05:44<26:20:23, 3.51it/s] 10%|█ | 38366/371472 [3:05:45<26:21:07, 3.51it/s] 10%|█ | 38367/371472 [3:05:45<27:32:10, 3.36it/s] 10%|█ | 38368/371472 [3:05:45<26:58:12, 3.43it/s] 10%|█ | 38369/371472 [3:05:46<28:27:06, 3.25it/s] 10%|█ | 38370/371472 [3:05:46<28:51:50, 3.21it/s] 10%|█ | 38371/371472 [3:05:46<27:23:06, 3.38it/s] 10%|█ | 38372/371472 [3:05:47<27:42:35, 3.34it/s] 10%|█ | 38373/371472 [3:05:47<27:14:10, 3.40it/s] 10%|█ | 38374/371472 [3:05:47<27:38:37, 3.35it/s] 10%|█ | 38375/371472 [3:05:48<28:52:33, 3.20it/s] 10%|█ | 38376/371472 [3:05:48<28:15:55, 3.27it/s] 10%|█ | 38377/371472 [3:05:48<28:49:47, 3.21it/s] 10%|█ | 38378/371472 [3:05:48<28:37:45, 3.23it/s] 10%|█ | 38379/371472 [3:05:49<28:17:14, 3.27it/s] 10%|█ | 38380/371472 [3:05:49<27:29:38, 3.37it/s] {'loss': 4.54, 'learning_rate': 9.074479088107909e-07, 'epoch': 1.65} + 10%|█ | 38380/371472 [3:05:49<27:29:38, 3.37it/s] 10%|█ | 38381/371472 [3:05:49<28:05:42, 3.29it/s] 10%|█ | 38382/371472 [3:05:50<28:22:36, 3.26it/s] 10%|█ | 38383/371472 [3:05:50<28:39:26, 3.23it/s] 10%|█ | 38384/371472 [3:05:50<31:23:12, 2.95it/s] 10%|█ | 38385/371472 [3:05:51<29:27:03, 3.14it/s] 10%|█ | 38386/371472 [3:05:51<29:40:44, 3.12it/s] 10%|█ | 38387/371472 [3:05:51<28:18:09, 3.27it/s] 10%|█ | 38388/371472 [3:05:52<28:54:44, 3.20it/s] 10%|█ | 38389/371472 [3:05:52<28:12:08, 3.28it/s] 10%|█ | 38390/371472 [3:05:52<28:02:22, 3.30it/s] 10%|█ | 38391/371472 [3:05:53<32:07:38, 2.88it/s] 10%|█ | 38392/371472 [3:05:53<30:41:49, 3.01it/s] 10%|█ | 38393/371472 [3:05:53<29:43:46, 3.11it/s] 10%|█ | 38394/371472 [3:05:54<30:18:52, 3.05it/s] 10%|█ | 38395/371472 [3:05:54<28:40:19, 3.23it/s] 10%|█ | 38396/371472 [3:05:54<27:20:18, 3.38it/s] 10%|█ | 38397/371472 [3:05:54<26:32:59, 3.48it/s] 10%|█ | 38398/371472 [3:05:55<26:06:26, 3.54it/s] 10%|█ | 38399/371472 [3:05:55<25:33:05, 3.62it/s] 10%|█ | 38400/371472 [3:05:55<25:04:36, 3.69it/s] {'loss': 4.2419, 'learning_rate': 9.073994268353122e-07, 'epoch': 1.65} + 10%|█ | 38400/371472 [3:05:55<25:04:36, 3.69it/s] 10%|█ | 38401/371472 [3:05:55<25:49:46, 3.58it/s] 10%|█ | 38402/371472 [3:05:56<25:09:04, 3.68it/s] 10%|█ | 38403/371472 [3:05:56<25:08:54, 3.68it/s] 10%|█ | 38404/371472 [3:05:56<25:03:30, 3.69it/s] 10%|█ | 38405/371472 [3:05:57<36:50:37, 2.51it/s] 10%|█ | 38406/371472 [3:05:57<33:43:29, 2.74it/s] 10%|█ | 38407/371472 [3:05:57<31:27:13, 2.94it/s] 10%|█ | 38408/371472 [3:05:58<29:00:21, 3.19it/s] 10%|█ | 38409/371472 [3:05:58<27:09:41, 3.41it/s] 10%|█ | 38410/371472 [3:05:58<27:43:03, 3.34it/s] 10%|█ | 38411/371472 [3:05:59<27:27:28, 3.37it/s] 10%|█ | 38412/371472 [3:05:59<27:13:18, 3.40it/s] 10%|█ | 38413/371472 [3:05:59<26:26:47, 3.50it/s] 10%|█ | 38414/371472 [3:05:59<25:57:50, 3.56it/s] 10%|█ | 38415/371472 [3:06:00<27:13:51, 3.40it/s] 10%|█ | 38416/371472 [3:06:00<28:40:58, 3.23it/s] 10%|█ | 38417/371472 [3:06:00<28:53:32, 3.20it/s] 10%|█ | 38418/371472 [3:06:01<28:22:40, 3.26it/s] 10%|█ | 38419/371472 [3:06:01<27:42:57, 3.34it/s] 10%|█ | 38420/371472 [3:06:01<29:04:01, 3.18it/s] {'loss': 4.4214, 'learning_rate': 9.073509448598332e-07, 'epoch': 1.65} + 10%|█ | 38420/371472 [3:06:01<29:04:01, 3.18it/s] 10%|█ | 38421/371472 [3:06:02<29:09:36, 3.17it/s] 10%|█ | 38422/371472 [3:06:02<27:50:23, 3.32it/s] 10%|█ | 38423/371472 [3:06:02<26:59:15, 3.43it/s] 10%|█ | 38424/371472 [3:06:02<26:21:35, 3.51it/s] 10%|█ | 38425/371472 [3:06:03<26:28:54, 3.49it/s] 10%|█ | 38426/371472 [3:06:03<25:56:48, 3.57it/s] 10%|█ | 38427/371472 [3:06:03<27:01:41, 3.42it/s] 10%|█ | 38428/371472 [3:06:04<27:39:23, 3.35it/s] 10%|█ | 38429/371472 [3:06:04<27:46:50, 3.33it/s] 10%|█ | 38430/371472 [3:06:04<26:30:36, 3.49it/s] 10%|█ | 38431/371472 [3:06:05<27:33:21, 3.36it/s] 10%|█ | 38432/371472 [3:06:05<26:02:48, 3.55it/s] 10%|█ | 38433/371472 [3:06:05<26:31:54, 3.49it/s] 10%|█ | 38434/371472 [3:06:05<26:15:37, 3.52it/s] 10%|█ | 38435/371472 [3:06:06<26:35:36, 3.48it/s] 10%|█ | 38436/371472 [3:06:06<27:13:01, 3.40it/s] 10%|█ | 38437/371472 [3:06:06<26:39:56, 3.47it/s] 10%|█ | 38438/371472 [3:06:07<26:40:44, 3.47it/s] 10%|█ | 38439/371472 [3:06:07<26:35:45, 3.48it/s] 10%|█ | 38440/371472 [3:06:07<25:49:56, 3.58it/s] {'loss': 4.5933, 'learning_rate': 9.073024628843542e-07, 'epoch': 1.66} + 10%|█ | 38440/371472 [3:06:07<25:49:56, 3.58it/s] 10%|█ | 38441/371472 [3:06:07<27:19:25, 3.39it/s] 10%|█ | 38442/371472 [3:06:08<27:10:18, 3.40it/s] 10%|█ | 38443/371472 [3:06:08<27:08:02, 3.41it/s] 10%|█ | 38444/371472 [3:06:08<26:46:44, 3.45it/s] 10%|█ | 38445/371472 [3:06:09<26:20:38, 3.51it/s] 10%|█ | 38446/371472 [3:06:09<26:13:00, 3.53it/s] 10%|█ | 38447/371472 [3:06:09<26:31:29, 3.49it/s] 10%|█ | 38448/371472 [3:06:09<26:31:28, 3.49it/s] 10%|█ | 38449/371472 [3:06:10<25:36:29, 3.61it/s] 10%|█ | 38450/371472 [3:06:10<25:34:11, 3.62it/s] 10%|█ | 38451/371472 [3:06:10<25:30:36, 3.63it/s] 10%|█ | 38452/371472 [3:06:11<26:26:26, 3.50it/s] 10%|█ | 38453/371472 [3:06:11<28:14:30, 3.28it/s] 10%|█ | 38454/371472 [3:06:11<27:14:42, 3.40it/s] 10%|█ | 38455/371472 [3:06:11<27:04:42, 3.42it/s] 10%|█ | 38456/371472 [3:06:12<26:43:59, 3.46it/s] 10%|█ | 38457/371472 [3:06:12<26:40:47, 3.47it/s] 10%|█ | 38458/371472 [3:06:12<26:05:01, 3.55it/s] 10%|█ | 38459/371472 [3:06:13<27:08:01, 3.41it/s] 10%|█ | 38460/371472 [3:06:13<26:33:15, 3.48it/s] {'loss': 4.4869, 'learning_rate': 9.072539809088754e-07, 'epoch': 1.66} + 10%|█ | 38460/371472 [3:06:13<26:33:15, 3.48it/s] 10%|█ | 38461/371472 [3:06:13<28:14:11, 3.28it/s] 10%|█ | 38462/371472 [3:06:13<27:58:56, 3.31it/s] 10%|█ | 38463/371472 [3:06:14<28:59:59, 3.19it/s] 10%|█ | 38464/371472 [3:06:14<28:00:15, 3.30it/s] 10%|█ | 38465/371472 [3:06:14<27:24:04, 3.38it/s] 10%|█ | 38466/371472 [3:06:15<29:04:51, 3.18it/s] 10%|█ | 38467/371472 [3:06:15<27:43:45, 3.34it/s] 10%|█ | 38468/371472 [3:06:15<27:29:48, 3.36it/s] 10%|█ | 38469/371472 [3:06:16<26:27:06, 3.50it/s] 10%|█ | 38470/371472 [3:06:16<25:44:35, 3.59it/s] 10%|█ | 38471/371472 [3:06:16<25:31:13, 3.62it/s] 10%|█ | 38472/371472 [3:06:16<25:01:41, 3.70it/s] 10%|█ | 38473/371472 [3:06:17<25:07:31, 3.68it/s] 10%|█ | 38474/371472 [3:06:17<26:09:54, 3.54it/s] 10%|█ | 38475/371472 [3:06:17<25:50:19, 3.58it/s] 10%|█ | 38476/371472 [3:06:17<26:08:19, 3.54it/s] 10%|█ | 38477/371472 [3:06:18<26:51:07, 3.44it/s] 10%|█ | 38478/371472 [3:06:18<28:37:39, 3.23it/s] 10%|█ | 38479/371472 [3:06:19<29:48:29, 3.10it/s] 10%|█ | 38480/371472 [3:06:19<29:28:40, 3.14it/s] {'loss': 4.3976, 'learning_rate': 9.072054989333966e-07, 'epoch': 1.66} + 10%|█ | 38480/371472 [3:06:19<29:28:40, 3.14it/s] 10%|█ | 38481/371472 [3:06:19<31:25:12, 2.94it/s] 10%|█ | 38482/371472 [3:06:19<29:23:06, 3.15it/s] 10%|█ | 38483/371472 [3:06:20<28:16:28, 3.27it/s] 10%|█ | 38484/371472 [3:06:20<28:27:41, 3.25it/s] 10%|█ | 38485/371472 [3:06:20<26:54:10, 3.44it/s] 10%|█ | 38486/371472 [3:06:21<26:07:31, 3.54it/s] 10%|█ | 38487/371472 [3:06:21<25:44:39, 3.59it/s] 10%|█ | 38488/371472 [3:06:21<25:59:17, 3.56it/s] 10%|█ | 38489/371472 [3:06:21<25:21:48, 3.65it/s] 10%|█ | 38490/371472 [3:06:22<25:12:50, 3.67it/s] 10%|█ | 38491/371472 [3:06:22<25:41:48, 3.60it/s] 10%|█ | 38492/371472 [3:06:22<25:24:37, 3.64it/s] 10%|█ | 38493/371472 [3:06:23<26:30:52, 3.49it/s] 10%|█ | 38494/371472 [3:06:23<25:33:34, 3.62it/s] 10%|█ | 38495/371472 [3:06:23<27:14:28, 3.40it/s] 10%|█ | 38496/371472 [3:06:23<27:08:42, 3.41it/s] 10%|█ | 38497/371472 [3:06:24<26:21:27, 3.51it/s] 10%|█ | 38498/371472 [3:06:24<27:02:03, 3.42it/s] 10%|█ | 38499/371472 [3:06:24<27:04:02, 3.42it/s] 10%|█ | 38500/371472 [3:06:25<27:56:44, 3.31it/s] {'loss': 4.3852, 'learning_rate': 9.071570169579176e-07, 'epoch': 1.66} + 10%|█ | 38500/371472 [3:06:25<27:56:44, 3.31it/s] 10%|█ | 38501/371472 [3:06:25<26:57:29, 3.43it/s] 10%|█ | 38502/371472 [3:06:25<26:05:50, 3.54it/s] 10%|█ | 38503/371472 [3:06:25<26:06:45, 3.54it/s] 10%|█ | 38504/371472 [3:06:26<26:11:57, 3.53it/s] 10%|█ | 38505/371472 [3:06:26<26:15:32, 3.52it/s] 10%|█ | 38506/371472 [3:06:26<26:39:38, 3.47it/s] 10%|█ | 38507/371472 [3:06:27<27:50:37, 3.32it/s] 10%|█ | 38508/371472 [3:06:27<26:54:32, 3.44it/s] 10%|█ | 38509/371472 [3:06:27<26:33:45, 3.48it/s] 10%|█ | 38510/371472 [3:06:27<26:21:14, 3.51it/s] 10%|█ | 38511/371472 [3:06:28<26:12:41, 3.53it/s] 10%|█ | 38512/371472 [3:06:28<25:49:38, 3.58it/s] 10%|█ | 38513/371472 [3:06:28<26:44:22, 3.46it/s] 10%|█ | 38514/371472 [3:06:29<27:42:12, 3.34it/s] 10%|█ | 38515/371472 [3:06:29<27:06:07, 3.41it/s] 10%|█ | 38516/371472 [3:06:29<25:52:46, 3.57it/s] 10%|█ | 38517/371472 [3:06:29<26:25:07, 3.50it/s] 10%|█ | 38518/371472 [3:06:30<25:15:09, 3.66it/s] 10%|█ | 38519/371472 [3:06:30<25:06:24, 3.68it/s] 10%|█ | 38520/371472 [3:06:30<25:06:29, 3.68it/s] {'loss': 4.641, 'learning_rate': 9.071085349824387e-07, 'epoch': 1.66} + 10%|█ | 38520/371472 [3:06:30<25:06:29, 3.68it/s] 10%|█ | 38521/371472 [3:06:31<25:48:00, 3.58it/s] 10%|█ | 38522/371472 [3:06:31<25:53:49, 3.57it/s] 10%|█ | 38523/371472 [3:06:31<25:37:51, 3.61it/s] 10%|█ | 38524/371472 [3:06:31<25:03:25, 3.69it/s] 10%|█ | 38525/371472 [3:06:32<25:12:23, 3.67it/s] 10%|█ | 38526/371472 [3:06:32<25:54:53, 3.57it/s] 10%|█ | 38527/371472 [3:06:32<25:47:10, 3.59it/s] 10%|█ | 38528/371472 [3:06:32<26:22:17, 3.51it/s] 10%|█ | 38529/371472 [3:06:33<28:26:29, 3.25it/s] 10%|█ | 38530/371472 [3:06:33<27:42:27, 3.34it/s] 10%|█ | 38531/371472 [3:06:34<33:09:25, 2.79it/s] 10%|█ | 38532/371472 [3:06:34<30:38:23, 3.02it/s] 10%|█ | 38533/371472 [3:06:34<29:36:59, 3.12it/s] 10%|█ | 38534/371472 [3:06:34<29:13:08, 3.17it/s] 10%|█ | 38535/371472 [3:06:35<28:53:52, 3.20it/s] 10%|█ | 38536/371472 [3:06:35<27:03:07, 3.42it/s] 10%|█ | 38537/371472 [3:06:35<29:10:12, 3.17it/s] 10%|█ | 38538/371472 [3:06:36<27:19:55, 3.38it/s] 10%|█ | 38539/371472 [3:06:36<27:31:50, 3.36it/s] 10%|█ | 38540/371472 [3:06:36<27:12:18, 3.40it/s] {'loss': 4.4362, 'learning_rate': 9.070600530069599e-07, 'epoch': 1.66} + 10%|█ | 38540/371472 [3:06:36<27:12:18, 3.40it/s] 10%|█ | 38541/371472 [3:06:37<26:53:21, 3.44it/s] 10%|█ | 38542/371472 [3:06:37<26:46:41, 3.45it/s] 10%|█ | 38543/371472 [3:06:37<26:46:11, 3.45it/s] 10%|█ | 38544/371472 [3:06:37<25:49:37, 3.58it/s] 10%|█ | 38545/371472 [3:06:38<26:19:01, 3.51it/s] 10%|█ | 38546/371472 [3:06:38<25:19:00, 3.65it/s] 10%|█ | 38547/371472 [3:06:38<24:45:19, 3.74it/s] 10%|█ | 38548/371472 [3:06:38<24:23:01, 3.79it/s] 10%|█ | 38549/371472 [3:06:39<24:16:37, 3.81it/s] 10%|█ | 38550/371472 [3:06:39<24:37:57, 3.75it/s] 10%|█ | 38551/371472 [3:06:39<24:39:20, 3.75it/s] 10%|█ | 38552/371472 [3:06:40<25:21:54, 3.65it/s] 10%|█ | 38553/371472 [3:06:40<26:47:56, 3.45it/s] 10%|█ | 38554/371472 [3:06:40<25:57:42, 3.56it/s] 10%|█ | 38555/371472 [3:06:40<27:25:24, 3.37it/s] 10%|█ | 38556/371472 [3:06:41<25:57:35, 3.56it/s] 10%|█ | 38557/371472 [3:06:41<27:25:49, 3.37it/s] 10%|█ | 38558/371472 [3:06:41<26:15:12, 3.52it/s] 10%|█ | 38559/371472 [3:06:42<27:34:38, 3.35it/s] 10%|█ | 38560/371472 [3:06:42<27:12:54, 3.40it/s] {'loss': 4.6085, 'learning_rate': 9.070115710314809e-07, 'epoch': 1.66} + 10%|█ | 38560/371472 [3:06:42<27:12:54, 3.40it/s] 10%|█ | 38561/371472 [3:06:42<28:22:14, 3.26it/s] 10%|█ | 38562/371472 [3:06:42<27:20:28, 3.38it/s] 10%|█ | 38563/371472 [3:06:43<27:41:43, 3.34it/s] 10%|█ | 38564/371472 [3:06:43<28:12:51, 3.28it/s] 10%|█ | 38565/371472 [3:06:43<27:35:04, 3.35it/s] 10%|█ | 38566/371472 [3:06:44<26:26:40, 3.50it/s] 10%|█ | 38567/371472 [3:06:44<25:38:03, 3.61it/s] 10%|█ | 38568/371472 [3:06:44<26:15:58, 3.52it/s] 10%|█ | 38569/371472 [3:06:45<28:55:58, 3.20it/s] 10%|█ | 38570/371472 [3:06:45<27:25:48, 3.37it/s] 10%|█ | 38571/371472 [3:06:45<28:41:25, 3.22it/s] 10%|█ | 38572/371472 [3:06:45<28:08:14, 3.29it/s] 10%|█ | 38573/371472 [3:06:46<28:58:21, 3.19it/s] 10%|█ | 38574/371472 [3:06:46<28:46:12, 3.21it/s] 10%|█ | 38575/371472 [3:06:46<28:17:49, 3.27it/s] 10%|█ | 38576/371472 [3:06:47<26:55:50, 3.43it/s] 10%|█ | 38577/371472 [3:06:47<26:17:40, 3.52it/s] 10%|█ | 38578/371472 [3:06:47<25:18:01, 3.65it/s] 10%|█ | 38579/371472 [3:06:48<26:50:43, 3.44it/s] 10%|█ | 38580/371472 [3:06:48<26:04:09, 3.55it/s] {'loss': 4.3794, 'learning_rate': 9.06963089056002e-07, 'epoch': 1.66} + 10%|█ | 38580/371472 [3:06:48<26:04:09, 3.55it/s] 10%|█ | 38581/371472 [3:06:48<25:33:12, 3.62it/s] 10%|█ | 38582/371472 [3:06:48<25:34:44, 3.62it/s] 10%|█ | 38583/371472 [3:06:49<25:01:24, 3.70it/s] 10%|█ | 38584/371472 [3:06:49<24:49:15, 3.73it/s] 10%|█ | 38585/371472 [3:06:49<29:12:00, 3.17it/s] 10%|█ | 38586/371472 [3:06:50<29:42:16, 3.11it/s] 10%|█ | 38587/371472 [3:06:50<28:23:28, 3.26it/s] 10%|█ | 38588/371472 [3:06:50<28:27:09, 3.25it/s] 10%|█ | 38589/371472 [3:06:50<27:43:40, 3.33it/s] 10%|█ | 38590/371472 [3:06:51<28:00:37, 3.30it/s] 10%|█ | 38591/371472 [3:06:51<29:17:06, 3.16it/s] 10%|█ | 38592/371472 [3:06:51<29:09:52, 3.17it/s] 10%|█ | 38593/371472 [3:06:52<27:31:29, 3.36it/s] 10%|█ | 38594/371472 [3:06:52<27:26:47, 3.37it/s] 10%|█ | 38595/371472 [3:06:52<28:11:05, 3.28it/s] 10%|█ | 38596/371472 [3:06:53<26:59:58, 3.42it/s] 10%|█ | 38597/371472 [3:06:53<27:50:53, 3.32it/s] 10%|█ | 38598/371472 [3:06:53<29:53:31, 3.09it/s] 10%|█ | 38599/371472 [3:06:54<28:42:47, 3.22it/s] 10%|█ | 38600/371472 [3:06:54<28:14:34, 3.27it/s] {'loss': 4.4347, 'learning_rate': 9.069146070805231e-07, 'epoch': 1.66} + 10%|█ | 38600/371472 [3:06:54<28:14:34, 3.27it/s] 10%|█ | 38601/371472 [3:06:54<27:39:09, 3.34it/s] 10%|█ | 38602/371472 [3:06:54<28:47:50, 3.21it/s] 10%|█ | 38603/371472 [3:06:55<28:11:13, 3.28it/s] 10%|█ | 38604/371472 [3:06:55<27:24:51, 3.37it/s] 10%|█ | 38605/371472 [3:06:55<27:18:35, 3.39it/s] 10%|█ | 38606/371472 [3:06:56<28:33:58, 3.24it/s] 10%|█ | 38607/371472 [3:06:56<29:05:04, 3.18it/s] 10%|█ | 38608/371472 [3:06:56<27:45:28, 3.33it/s] 10%|█ | 38609/371472 [3:06:57<28:25:53, 3.25it/s] 10%|█ | 38610/371472 [3:06:57<26:55:37, 3.43it/s] 10%|█ | 38611/371472 [3:06:57<26:15:52, 3.52it/s] 10%|█ | 38612/371472 [3:06:57<26:08:10, 3.54it/s] 10%|█ | 38613/371472 [3:06:58<25:11:12, 3.67it/s] 10%|█ | 38614/371472 [3:06:58<24:50:06, 3.72it/s] 10%|█ | 38615/371472 [3:06:58<25:14:46, 3.66it/s] 10%|█ | 38616/371472 [3:06:58<25:29:51, 3.63it/s] 10%|█ | 38617/371472 [3:06:59<25:43:26, 3.59it/s] 10%|█ | 38618/371472 [3:06:59<26:05:34, 3.54it/s] 10%|█ | 38619/371472 [3:06:59<26:29:19, 3.49it/s] 10%|█ | 38620/371472 [3:07:00<27:37:45, 3.35it/s] {'loss': 4.5453, 'learning_rate': 9.068661251050443e-07, 'epoch': 1.66} + 10%|█ | 38620/371472 [3:07:00<27:37:45, 3.35it/s] 10%|█ | 38621/371472 [3:07:00<26:48:34, 3.45it/s] 10%|█ | 38622/371472 [3:07:00<27:08:40, 3.41it/s] 10%|█ | 38623/371472 [3:07:01<27:40:48, 3.34it/s] 10%|█ | 38624/371472 [3:07:01<27:03:39, 3.42it/s] 10%|█ | 38625/371472 [3:07:01<26:32:40, 3.48it/s] 10%|█ | 38626/371472 [3:07:01<28:07:51, 3.29it/s] 10%|█ | 38627/371472 [3:07:02<28:19:30, 3.26it/s] 10%|█ | 38628/371472 [3:07:02<27:51:28, 3.32it/s] 10%|█ | 38629/371472 [3:07:02<27:02:28, 3.42it/s] 10%|█ | 38630/371472 [3:07:03<26:45:19, 3.46it/s] 10%|█ | 38631/371472 [3:07:03<26:13:17, 3.53it/s] 10%|█ | 38632/371472 [3:07:03<27:00:44, 3.42it/s] 10%|█ | 38633/371472 [3:07:03<26:47:44, 3.45it/s] 10%|█ | 38634/371472 [3:07:04<26:28:38, 3.49it/s] 10%|█ | 38635/371472 [3:07:04<26:48:49, 3.45it/s] 10%|█ | 38636/371472 [3:07:04<28:38:47, 3.23it/s] 10%|█ | 38637/371472 [3:07:05<28:27:25, 3.25it/s] 10%|█ | 38638/371472 [3:07:05<27:31:53, 3.36it/s] 10%|█ | 38639/371472 [3:07:05<26:38:48, 3.47it/s] 10%|█ | 38640/371472 [3:07:06<26:15:15, 3.52it/s] {'loss': 4.2614, 'learning_rate': 9.068176431295653e-07, 'epoch': 1.66} + 10%|█ | 38640/371472 [3:07:06<26:15:15, 3.52it/s] 10%|█ | 38641/371472 [3:07:06<26:52:57, 3.44it/s] 10%|█ | 38642/371472 [3:07:06<27:44:17, 3.33it/s] 10%|█ | 38643/371472 [3:07:06<26:46:05, 3.45it/s] 10%|█ | 38644/371472 [3:07:07<26:03:36, 3.55it/s] 10%|█ | 38645/371472 [3:07:07<25:57:11, 3.56it/s] 10%|█ | 38646/371472 [3:07:07<27:39:42, 3.34it/s] 10%|█ | 38647/371472 [3:07:08<26:56:29, 3.43it/s] 10%|█ | 38648/371472 [3:07:08<26:01:50, 3.55it/s] 10%|█ | 38649/371472 [3:07:08<28:02:37, 3.30it/s] 10%|█ | 38650/371472 [3:07:08<27:49:57, 3.32it/s] 10%|█ | 38651/371472 [3:07:09<26:01:17, 3.55it/s] 10%|█ | 38652/371472 [3:07:09<25:27:57, 3.63it/s] 10%|█ | 38653/371472 [3:07:09<26:23:34, 3.50it/s] 10%|█ | 38654/371472 [3:07:10<26:52:20, 3.44it/s] 10%|█ | 38655/371472 [3:07:10<26:13:30, 3.53it/s] 10%|█ | 38656/371472 [3:07:10<26:03:46, 3.55it/s] 10%|█ | 38657/371472 [3:07:10<26:56:52, 3.43it/s] 10%|█ | 38658/371472 [3:07:11<27:34:58, 3.35it/s] 10%|█ | 38659/371472 [3:07:11<27:04:46, 3.41it/s] 10%|█ | 38660/371472 [3:07:11<26:27:24, 3.49it/s] {'loss': 4.4148, 'learning_rate': 9.067691611540865e-07, 'epoch': 1.67} + 10%|█ | 38660/371472 [3:07:11<26:27:24, 3.49it/s] 10%|█ | 38661/371472 [3:07:12<28:13:27, 3.28it/s] 10%|█ | 38662/371472 [3:07:12<27:36:07, 3.35it/s] 10%|█ | 38663/371472 [3:07:12<26:28:46, 3.49it/s] 10%|█ | 38664/371472 [3:07:13<26:57:59, 3.43it/s] 10%|█ | 38665/371472 [3:07:13<26:02:33, 3.55it/s] 10%|█ | 38666/371472 [3:07:13<28:25:52, 3.25it/s] 10%|█ | 38667/371472 [3:07:13<28:00:33, 3.30it/s] 10%|█ | 38668/371472 [3:07:14<27:25:14, 3.37it/s] 10%|█ | 38669/371472 [3:07:14<26:28:47, 3.49it/s] 10%|█ | 38670/371472 [3:07:14<25:23:51, 3.64it/s] 10%|█ | 38671/371472 [3:07:15<26:18:14, 3.51it/s] 10%|█ | 38672/371472 [3:07:15<27:08:13, 3.41it/s] 10%|█ | 38673/371472 [3:07:15<26:09:32, 3.53it/s] 10%|█ | 38674/371472 [3:07:15<26:45:41, 3.45it/s] 10%|█ | 38675/371472 [3:07:16<26:04:21, 3.55it/s] 10%|█ | 38676/371472 [3:07:16<29:32:21, 3.13it/s] 10%|█ | 38677/371472 [3:07:16<28:43:48, 3.22it/s] 10%|█ | 38678/371472 [3:07:17<27:09:13, 3.40it/s] 10%|█ | 38679/371472 [3:07:17<28:29:17, 3.24it/s] 10%|█ | 38680/371472 [3:07:17<27:24:51, 3.37it/s] {'loss': 4.2945, 'learning_rate': 9.067206791786076e-07, 'epoch': 1.67} + 10%|█ | 38680/371472 [3:07:17<27:24:51, 3.37it/s] 10%|█ | 38681/371472 [3:07:18<27:09:05, 3.40it/s] 10%|█ | 38682/371472 [3:07:18<26:31:25, 3.49it/s] 10%|█ | 38683/371472 [3:07:18<26:01:25, 3.55it/s] 10%|█ | 38684/371472 [3:07:18<25:30:47, 3.62it/s] 10%|█ | 38685/371472 [3:07:19<25:11:39, 3.67it/s] 10%|█ | 38686/371472 [3:07:19<26:53:05, 3.44it/s] 10%|█ | 38687/371472 [3:07:19<25:47:16, 3.58it/s] 10%|█ | 38688/371472 [3:07:19<25:31:14, 3.62it/s] 10%|█ | 38689/371472 [3:07:20<25:03:06, 3.69it/s] 10%|█ | 38690/371472 [3:07:20<25:08:03, 3.68it/s] 10%|█ | 38691/371472 [3:07:20<26:57:41, 3.43it/s] 10%|█ | 38692/371472 [3:07:21<26:45:57, 3.45it/s] 10%|█ | 38693/371472 [3:07:21<27:07:55, 3.41it/s] 10%|█ | 38694/371472 [3:07:21<27:37:44, 3.35it/s] 10%|█ | 38695/371472 [3:07:22<27:48:20, 3.32it/s] 10%|█ | 38696/371472 [3:07:22<26:48:11, 3.45it/s] 10%|█ | 38697/371472 [3:07:22<26:46:18, 3.45it/s] 10%|█ | 38698/371472 [3:07:22<25:56:53, 3.56it/s] 10%|█ | 38699/371472 [3:07:23<26:23:44, 3.50it/s] 10%|█ | 38700/371472 [3:07:23<28:55:56, 3.19it/s] {'loss': 4.3848, 'learning_rate': 9.066721972031286e-07, 'epoch': 1.67} + 10%|█ | 38700/371472 [3:07:23<28:55:56, 3.19it/s] 10%|█ | 38701/371472 [3:07:23<27:33:07, 3.35it/s] 10%|█ | 38702/371472 [3:07:24<26:36:02, 3.47it/s] 10%|█ | 38703/371472 [3:07:24<29:12:10, 3.17it/s] 10%|█ | 38704/371472 [3:07:24<28:06:21, 3.29it/s] 10%|█ | 38705/371472 [3:07:24<27:01:40, 3.42it/s] 10%|█ | 38706/371472 [3:07:25<27:07:45, 3.41it/s] 10%|█ | 38707/371472 [3:07:25<27:17:48, 3.39it/s] 10%|█ | 38708/371472 [3:07:25<26:38:01, 3.47it/s] 10%|█ | 38709/371472 [3:07:26<26:39:43, 3.47it/s] 10%|█ | 38710/371472 [3:07:26<26:13:49, 3.52it/s] 10%|█ | 38711/371472 [3:07:26<26:05:17, 3.54it/s] 10%|█ | 38712/371472 [3:07:27<29:23:36, 3.14it/s] 10%|█ | 38713/371472 [3:07:27<28:36:07, 3.23it/s] 10%|█ | 38714/371472 [3:07:27<27:49:40, 3.32it/s] 10%|█ | 38715/371472 [3:07:27<28:23:57, 3.25it/s] 10%|█ | 38716/371472 [3:07:28<31:43:32, 2.91it/s] 10%|█ | 38717/371472 [3:07:28<33:05:59, 2.79it/s] 10%|█ | 38718/371472 [3:07:29<31:10:48, 2.96it/s] 10%|█ | 38719/371472 [3:07:29<30:04:53, 3.07it/s] 10%|█ | 38720/371472 [3:07:29<28:19:46, 3.26it/s] {'loss': 4.2856, 'learning_rate': 9.066237152276497e-07, 'epoch': 1.67} + 10%|█ | 38720/371472 [3:07:29<28:19:46, 3.26it/s] 10%|█ | 38721/371472 [3:07:29<27:35:47, 3.35it/s] 10%|█ | 38722/371472 [3:07:30<26:54:34, 3.43it/s] 10%|█ | 38723/371472 [3:07:30<27:50:31, 3.32it/s] 10%|█ | 38724/371472 [3:07:30<27:25:25, 3.37it/s] 10%|█ | 38725/371472 [3:07:31<27:59:54, 3.30it/s] 10%|█ | 38726/371472 [3:07:31<27:13:26, 3.40it/s] 10%|█ | 38727/371472 [3:07:31<25:58:15, 3.56it/s] 10%|█ | 38728/371472 [3:07:31<25:52:25, 3.57it/s] 10%|█ | 38729/371472 [3:07:32<26:06:43, 3.54it/s] 10%|█ | 38730/371472 [3:07:32<25:39:25, 3.60it/s] 10%|█ | 38731/371472 [3:07:32<25:49:18, 3.58it/s] 10%|█ | 38732/371472 [3:07:33<27:33:37, 3.35it/s] 10%|█ | 38733/371472 [3:07:33<27:08:23, 3.41it/s] 10%|█ | 38734/371472 [3:07:33<27:12:43, 3.40it/s] 10%|█ | 38735/371472 [3:07:33<27:23:17, 3.37it/s] 10%|█ | 38736/371472 [3:07:34<26:35:38, 3.48it/s] 10%|█ | 38737/371472 [3:07:34<27:57:12, 3.31it/s] 10%|█ | 38738/371472 [3:07:34<26:38:38, 3.47it/s] 10%|█ | 38739/371472 [3:07:35<26:38:23, 3.47it/s] 10%|█ | 38740/371472 [3:07:35<26:27:16, 3.49it/s] {'loss': 4.4474, 'learning_rate': 9.065752332521709e-07, 'epoch': 1.67} + 10%|█ | 38740/371472 [3:07:35<26:27:16, 3.49it/s] 10%|█ | 38741/371472 [3:07:35<27:42:40, 3.34it/s] 10%|█ | 38742/371472 [3:07:36<29:46:10, 3.10it/s] 10%|█ | 38743/371472 [3:07:36<29:59:18, 3.08it/s] 10%|█ | 38744/371472 [3:07:36<28:45:17, 3.21it/s] 10%|█ | 38745/371472 [3:07:36<27:00:54, 3.42it/s] 10%|█ | 38746/371472 [3:07:37<26:33:18, 3.48it/s] 10%|█ | 38747/371472 [3:07:37<26:45:23, 3.45it/s] 10%|█ | 38748/371472 [3:07:37<26:36:08, 3.47it/s] 10%|█ | 38749/371472 [3:07:38<27:20:30, 3.38it/s] 10%|█ | 38750/371472 [3:07:38<26:55:40, 3.43it/s] 10%|█ | 38751/371472 [3:07:38<28:17:06, 3.27it/s] 10%|█ | 38752/371472 [3:07:39<28:19:18, 3.26it/s] 10%|█ | 38753/371472 [3:07:39<27:35:25, 3.35it/s] 10%|█ | 38754/371472 [3:07:39<26:37:43, 3.47it/s] 10%|█ | 38755/371472 [3:07:39<26:07:22, 3.54it/s] 10%|█ | 38756/371472 [3:07:40<25:52:31, 3.57it/s] 10%|█ | 38757/371472 [3:07:40<25:42:51, 3.59it/s] 10%|█ | 38758/371472 [3:07:40<25:51:35, 3.57it/s] 10%|█ | 38759/371472 [3:07:40<26:24:50, 3.50it/s] 10%|█ | 38760/371472 [3:07:41<27:28:32, 3.36it/s] {'loss': 4.3869, 'learning_rate': 9.06526751276692e-07, 'epoch': 1.67} + 10%|█ | 38760/371472 [3:07:41<27:28:32, 3.36it/s] 10%|█ | 38761/371472 [3:07:41<26:21:29, 3.51it/s] 10%|█ | 38762/371472 [3:07:41<28:49:57, 3.21it/s] 10%|█ | 38763/371472 [3:07:42<28:06:20, 3.29it/s] 10%|█ | 38764/371472 [3:07:42<30:50:49, 3.00it/s] 10%|█ | 38765/371472 [3:07:42<29:55:19, 3.09it/s] 10%|█ | 38766/371472 [3:07:43<30:53:58, 2.99it/s] 10%|█ | 38767/371472 [3:07:43<29:42:16, 3.11it/s] 10%|█ | 38768/371472 [3:07:43<28:32:05, 3.24it/s] 10%|█ | 38769/371472 [3:07:44<27:24:24, 3.37it/s] 10%|█ | 38770/371472 [3:07:44<26:26:21, 3.50it/s] 10%|█ | 38771/371472 [3:07:44<27:36:05, 3.35it/s] 10%|█ | 38772/371472 [3:07:44<26:24:32, 3.50it/s] 10%|█ | 38773/371472 [3:07:45<28:45:36, 3.21it/s] 10%|█ | 38774/371472 [3:07:45<27:45:07, 3.33it/s] 10%|█ | 38775/371472 [3:07:45<27:21:04, 3.38it/s] 10%|█ | 38776/371472 [3:07:46<26:27:46, 3.49it/s] 10%|█ | 38777/371472 [3:07:46<28:25:53, 3.25it/s] 10%|█ | 38778/371472 [3:07:46<27:23:44, 3.37it/s] 10%|█ | 38779/371472 [3:07:47<27:20:05, 3.38it/s] 10%|█ | 38780/371472 [3:07:47<27:11:04, 3.40it/s] {'loss': 4.5912, 'learning_rate': 9.064782693012131e-07, 'epoch': 1.67} + 10%|█ | 38780/371472 [3:07:47<27:11:04, 3.40it/s] 10%|█ | 38781/371472 [3:07:47<26:14:07, 3.52it/s] 10%|█ | 38782/371472 [3:07:47<25:55:28, 3.56it/s] 10%|█ | 38783/371472 [3:07:48<25:56:05, 3.56it/s] 10%|█ | 38784/371472 [3:07:48<26:14:32, 3.52it/s] 10%|█ | 38785/371472 [3:07:48<26:24:43, 3.50it/s] 10%|█ | 38786/371472 [3:07:49<26:13:02, 3.52it/s] 10%|█ | 38787/371472 [3:07:49<25:47:21, 3.58it/s] 10%|█ | 38788/371472 [3:07:49<25:10:45, 3.67it/s] 10%|█ | 38789/371472 [3:07:49<25:00:05, 3.70it/s] 10%|█ | 38790/371472 [3:07:50<26:36:13, 3.47it/s] 10%|█ | 38791/371472 [3:07:50<28:28:32, 3.25it/s] 10%|█ | 38792/371472 [3:07:50<26:54:55, 3.43it/s] 10%|█ | 38793/371472 [3:07:51<25:34:15, 3.61it/s] 10%|█ | 38794/371472 [3:07:51<24:51:21, 3.72it/s] 10%|█ | 38795/371472 [3:07:51<25:05:47, 3.68it/s] 10%|█ | 38796/371472 [3:07:51<26:23:51, 3.50it/s] 10%|█ | 38797/371472 [3:07:52<26:19:02, 3.51it/s] 10%|█ | 38798/371472 [3:07:52<26:15:34, 3.52it/s] 10%|█ | 38799/371472 [3:07:52<26:05:39, 3.54it/s] 10%|█ | 38800/371472 [3:07:52<25:35:37, 3.61it/s] {'loss': 4.7353, 'learning_rate': 9.064297873257342e-07, 'epoch': 1.67} + 10%|█ | 38800/371472 [3:07:52<25:35:37, 3.61it/s] 10%|█ | 38801/371472 [3:07:53<27:23:11, 3.37it/s] 10%|█ | 38802/371472 [3:07:53<26:55:49, 3.43it/s] 10%|█ | 38803/371472 [3:07:53<27:27:52, 3.36it/s] 10%|█ | 38804/371472 [3:07:54<26:21:58, 3.50it/s] 10%|█ | 38805/371472 [3:07:54<27:27:02, 3.37it/s] 10%|█ | 38806/371472 [3:07:54<27:08:32, 3.40it/s] 10%|█ | 38807/371472 [3:07:55<27:04:43, 3.41it/s] 10%|█ | 38808/371472 [3:07:55<27:21:18, 3.38it/s] 10%|█ | 38809/371472 [3:07:55<26:33:59, 3.48it/s] 10%|█ | 38810/371472 [3:07:55<26:11:38, 3.53it/s] 10%|█ | 38811/371472 [3:07:56<28:48:23, 3.21it/s] 10%|█ | 38812/371472 [3:07:56<28:59:43, 3.19it/s] 10%|█ | 38813/371472 [3:07:56<28:39:41, 3.22it/s] 10%|█ | 38814/371472 [3:07:57<28:12:09, 3.28it/s] 10%|█ | 38815/371472 [3:07:57<28:49:32, 3.21it/s] 10%|█ | 38816/371472 [3:07:57<28:38:07, 3.23it/s] 10%|█ | 38817/371472 [3:07:58<28:06:12, 3.29it/s] 10%|█ | 38818/371472 [3:07:58<27:10:34, 3.40it/s] 10%|█ | 38819/371472 [3:07:58<27:07:45, 3.41it/s] 10%|█ | 38820/371472 [3:07:59<28:31:15, 3.24it/s] {'loss': 4.3667, 'learning_rate': 9.063813053502553e-07, 'epoch': 1.67} + 10%|█ | 38820/371472 [3:07:59<28:31:15, 3.24it/s] 10%|█ | 38821/371472 [3:07:59<30:35:24, 3.02it/s] 10%|█ | 38822/371472 [3:07:59<28:40:55, 3.22it/s] 10%|█ | 38823/371472 [3:07:59<27:32:39, 3.35it/s] 10%|█ | 38824/371472 [3:08:00<26:27:06, 3.49it/s] 10%|█ | 38825/371472 [3:08:00<28:10:10, 3.28it/s] 10%|█ | 38826/371472 [3:08:00<28:08:46, 3.28it/s] 10%|█ | 38827/371472 [3:08:01<27:06:02, 3.41it/s] 10%|█ | 38828/371472 [3:08:01<26:42:08, 3.46it/s] 10%|█ | 38829/371472 [3:08:01<26:22:07, 3.50it/s] 10%|█ | 38830/371472 [3:08:01<26:25:48, 3.50it/s] 10%|█ | 38831/371472 [3:08:02<26:48:02, 3.45it/s] 10%|█ | 38832/371472 [3:08:02<26:02:20, 3.55it/s] 10%|█ | 38833/371472 [3:08:02<25:58:33, 3.56it/s] 10%|█ | 38834/371472 [3:08:03<26:11:47, 3.53it/s] 10%|█ | 38835/371472 [3:08:03<27:20:18, 3.38it/s] 10%|█ | 38836/371472 [3:08:03<29:26:11, 3.14it/s] 10%|█ | 38837/371472 [3:08:04<27:28:01, 3.36it/s] 10%|█ | 38838/371472 [3:08:04<26:43:19, 3.46it/s] 10%|█ | 38839/371472 [3:08:04<26:41:14, 3.46it/s] 10%|█ | 38840/371472 [3:08:04<25:54:11, 3.57it/s] {'loss': 4.4119, 'learning_rate': 9.063328233747764e-07, 'epoch': 1.67} + 10%|█ | 38840/371472 [3:08:04<25:54:11, 3.57it/s] 10%|█ | 38841/371472 [3:08:05<25:28:32, 3.63it/s] 10%|█ | 38842/371472 [3:08:05<26:10:51, 3.53it/s] 10%|█ | 38843/371472 [3:08:05<25:41:23, 3.60it/s] 10%|█ | 38844/371472 [3:08:06<27:35:30, 3.35it/s] 10%|█ | 38845/371472 [3:08:06<26:21:32, 3.51it/s] 10%|█ | 38846/371472 [3:08:06<26:50:48, 3.44it/s] 10%|█ | 38847/371472 [3:08:06<26:18:04, 3.51it/s] 10%|█ | 38848/371472 [3:08:07<25:40:07, 3.60it/s] 10%|█ | 38849/371472 [3:08:07<26:12:45, 3.52it/s] 10%|█ | 38850/371472 [3:08:07<27:24:43, 3.37it/s] 10%|█ | 38851/371472 [3:08:08<27:08:10, 3.40it/s] 10%|█ | 38852/371472 [3:08:08<28:28:04, 3.25it/s] 10%|█ | 38853/371472 [3:08:08<27:04:37, 3.41it/s] 10%|█ | 38854/371472 [3:08:08<26:25:43, 3.50it/s] 10%|█ | 38855/371472 [3:08:09<26:48:38, 3.45it/s] 10%|█ | 38856/371472 [3:08:09<25:54:58, 3.57it/s] 10%|█ | 38857/371472 [3:08:09<25:53:44, 3.57it/s] 10%|█ | 38858/371472 [3:08:10<26:34:05, 3.48it/s] 10%|█ | 38859/371472 [3:08:10<26:44:50, 3.45it/s] 10%|█ | 38860/371472 [3:08:10<25:45:21, 3.59it/s] {'loss': 4.4805, 'learning_rate': 9.062843413992975e-07, 'epoch': 1.67} + 10%|█ | 38860/371472 [3:08:10<25:45:21, 3.59it/s] 10%|█ | 38861/371472 [3:08:10<26:01:44, 3.55it/s] 10%|█ | 38862/371472 [3:08:11<25:45:45, 3.59it/s] 10%|█ | 38863/371472 [3:08:11<28:49:16, 3.21it/s] 10%|█ | 38864/371472 [3:08:11<27:43:29, 3.33it/s] 10%|█ | 38865/371472 [3:08:12<27:22:31, 3.37it/s] 10%|█ | 38866/371472 [3:08:12<26:19:21, 3.51it/s] 10%|█ | 38867/371472 [3:08:12<25:54:41, 3.57it/s] 10%|█ | 38868/371472 [3:08:12<25:46:40, 3.58it/s] 10%|█ | 38869/371472 [3:08:13<25:46:23, 3.58it/s] 10%|█ | 38870/371472 [3:08:13<25:25:52, 3.63it/s] 10%|█ | 38871/371472 [3:08:13<24:43:01, 3.74it/s] 10%|█ | 38872/371472 [3:08:14<25:19:15, 3.65it/s] 10%|█ | 38873/371472 [3:08:14<29:17:22, 3.15it/s] 10%|█ | 38874/371472 [3:08:14<27:40:50, 3.34it/s] 10%|█ | 38875/371472 [3:08:14<26:28:03, 3.49it/s] 10%|█ | 38876/371472 [3:08:15<27:06:45, 3.41it/s] 10%|█ | 38877/371472 [3:08:15<27:07:13, 3.41it/s] 10%|█ | 38878/371472 [3:08:15<26:04:59, 3.54it/s] 10%|█ | 38879/371472 [3:08:16<25:48:51, 3.58it/s] 10%|█ | 38880/371472 [3:08:16<26:09:16, 3.53it/s] {'loss': 4.3972, 'learning_rate': 9.062358594238186e-07, 'epoch': 1.67} + 10%|█ | 38880/371472 [3:08:16<26:09:16, 3.53it/s] 10%|█ | 38881/371472 [3:08:16<26:17:02, 3.51it/s] 10%|█ | 38882/371472 [3:08:16<26:20:33, 3.51it/s] 10%|█ | 38883/371472 [3:08:17<25:38:14, 3.60it/s] 10%|█ | 38884/371472 [3:08:17<26:13:32, 3.52it/s] 10%|█ | 38885/371472 [3:08:17<27:31:43, 3.36it/s] 10%|█ | 38886/371472 [3:08:18<27:36:22, 3.35it/s] 10%|█ | 38887/371472 [3:08:18<27:21:49, 3.38it/s] 10%|█ | 38888/371472 [3:08:18<27:13:30, 3.39it/s] 10%|█ | 38889/371472 [3:08:18<26:21:16, 3.51it/s] 10%|█ | 38890/371472 [3:08:19<26:15:53, 3.52it/s] 10%|█ | 38891/371472 [3:08:19<26:50:14, 3.44it/s] 10%|█ | 38892/371472 [3:08:19<26:05:19, 3.54it/s] 10%|█ | 38893/371472 [3:08:20<25:06:11, 3.68it/s] 10%|█ | 38894/371472 [3:08:20<25:21:53, 3.64it/s] 10%|█ | 38895/371472 [3:08:20<24:49:42, 3.72it/s] 10%|█ | 38896/371472 [3:08:20<26:11:55, 3.53it/s] 10%|█ | 38897/371472 [3:08:21<25:35:49, 3.61it/s] 10%|█ | 38898/371472 [3:08:21<24:36:30, 3.75it/s] 10%|█ | 38899/371472 [3:08:21<24:14:55, 3.81it/s] 10%|█ | 38900/371472 [3:08:21<24:39:26, 3.75it/s] {'loss': 4.5487, 'learning_rate': 9.061873774483397e-07, 'epoch': 1.68} + 10%|█ | 38900/371472 [3:08:21<24:39:26, 3.75it/s] 10%|█ | 38901/371472 [3:08:22<24:36:16, 3.75it/s] 10%|█ | 38902/371472 [3:08:22<25:35:54, 3.61it/s] 10%|█ | 38903/371472 [3:08:22<25:25:54, 3.63it/s] 10%|█ | 38904/371472 [3:08:23<25:11:41, 3.67it/s] 10%|█ | 38905/371472 [3:08:23<25:12:50, 3.66it/s] 10%|█ | 38906/371472 [3:08:23<25:11:49, 3.67it/s] 10%|█ | 38907/371472 [3:08:23<26:54:23, 3.43it/s] 10%|█ | 38908/371472 [3:08:24<27:36:48, 3.35it/s] 10%|█ | 38909/371472 [3:08:24<27:52:31, 3.31it/s] 10%|█ | 38910/371472 [3:08:24<28:27:44, 3.25it/s] 10%|█ | 38911/371472 [3:08:25<27:06:17, 3.41it/s] 10%|█ | 38912/371472 [3:08:25<26:29:17, 3.49it/s] 10%|█ | 38913/371472 [3:08:25<27:44:24, 3.33it/s] 10%|█ | 38914/371472 [3:08:26<27:59:24, 3.30it/s] 10%|█ | 38915/371472 [3:08:26<29:03:58, 3.18it/s] 10%|█ | 38916/371472 [3:08:26<27:35:42, 3.35it/s] 10%|█ | 38917/371472 [3:08:26<26:05:05, 3.54it/s] 10%|█ | 38918/371472 [3:08:27<26:57:15, 3.43it/s] 10%|█ | 38919/371472 [3:08:27<26:24:01, 3.50it/s] 10%|█ | 38920/371472 [3:08:27<25:51:51, 3.57it/s] {'loss': 4.4102, 'learning_rate': 9.061388954728609e-07, 'epoch': 1.68} + 10%|█ | 38920/371472 [3:08:27<25:51:51, 3.57it/s] 10%|█ | 38921/371472 [3:08:28<30:00:21, 3.08it/s] 10%|█ | 38922/371472 [3:08:28<28:04:57, 3.29it/s] 10%|█ | 38923/371472 [3:08:28<30:32:58, 3.02it/s] 10%|█ | 38924/371472 [3:08:29<29:27:16, 3.14it/s] 10%|█ | 38925/371472 [3:08:29<28:16:44, 3.27it/s] 10%|█ | 38926/371472 [3:08:29<27:18:44, 3.38it/s] 10%|█ | 38927/371472 [3:08:30<27:57:26, 3.30it/s] 10%|█ | 38928/371472 [3:08:30<27:01:36, 3.42it/s] 10%|█ | 38929/371472 [3:08:30<26:33:27, 3.48it/s] 10%|█ | 38930/371472 [3:08:30<26:55:31, 3.43it/s] 10%|█ | 38931/371472 [3:08:31<27:19:44, 3.38it/s] 10%|█ | 38932/371472 [3:08:31<26:22:15, 3.50it/s] 10%|█ | 38933/371472 [3:08:31<27:45:24, 3.33it/s] 10%|█ | 38934/371472 [3:08:32<26:37:20, 3.47it/s] 10%|█ | 38935/371472 [3:08:32<26:42:32, 3.46it/s] 10%|█ | 38936/371472 [3:08:32<26:00:38, 3.55it/s] 10%|█ | 38937/371472 [3:08:32<25:35:09, 3.61it/s] 10%|█ | 38938/371472 [3:08:33<24:59:19, 3.70it/s] 10%|█ | 38939/371472 [3:08:33<25:41:48, 3.59it/s] 10%|█ | 38940/371472 [3:08:33<26:52:13, 3.44it/s] {'loss': 4.1832, 'learning_rate': 9.060904134973819e-07, 'epoch': 1.68} + 10%|█ | 38940/371472 [3:08:33<26:52:13, 3.44it/s] 10%|█ | 38941/371472 [3:08:34<27:29:01, 3.36it/s] 10%|█ | 38942/371472 [3:08:34<26:55:55, 3.43it/s] 10%|█ | 38943/371472 [3:08:34<26:54:10, 3.43it/s] 10%|█ | 38944/371472 [3:08:34<26:59:53, 3.42it/s] 10%|█ | 38945/371472 [3:08:35<28:11:16, 3.28it/s] 10%|█ | 38946/371472 [3:08:35<26:49:18, 3.44it/s] 10%|█ | 38947/371472 [3:08:35<28:23:21, 3.25it/s] 10%|█ | 38948/371472 [3:08:36<26:54:10, 3.43it/s] 10%|█ | 38949/371472 [3:08:36<26:15:04, 3.52it/s] 10%|█ | 38950/371472 [3:08:36<27:40:33, 3.34it/s] 10%|█ | 38951/371472 [3:08:36<26:45:24, 3.45it/s] 10%|█ | 38952/371472 [3:08:37<27:09:15, 3.40it/s] 10%|█ | 38953/371472 [3:08:37<27:03:12, 3.41it/s] 10%|█ | 38954/371472 [3:08:37<28:20:35, 3.26it/s] 10%|█ | 38955/371472 [3:08:38<26:56:39, 3.43it/s] 10%|█ | 38956/371472 [3:08:38<26:19:58, 3.51it/s] 10%|█ | 38957/371472 [3:08:38<27:05:50, 3.41it/s] 10%|█ | 38958/371472 [3:08:38<26:12:24, 3.52it/s] 10%|█ | 38959/371472 [3:08:39<27:11:26, 3.40it/s] 10%|█ | 38960/371472 [3:08:39<29:16:18, 3.16it/s] {'loss': 4.5223, 'learning_rate': 9.06041931521903e-07, 'epoch': 1.68} + 10%|█ | 38960/371472 [3:08:39<29:16:18, 3.16it/s] 10%|█ | 38961/371472 [3:08:39<28:14:43, 3.27it/s] 10%|█ | 38962/371472 [3:08:40<28:57:01, 3.19it/s] 10%|█ | 38963/371472 [3:08:40<27:40:11, 3.34it/s] 10%|█ | 38964/371472 [3:08:40<26:31:02, 3.48it/s] 10%|█ | 38965/371472 [3:08:41<26:23:10, 3.50it/s] 10%|█ | 38966/371472 [3:08:41<26:51:55, 3.44it/s] 10%|█ | 38967/371472 [3:08:41<26:19:43, 3.51it/s] 10%|█ | 38968/371472 [3:08:41<25:42:10, 3.59it/s] 10%|█ | 38969/371472 [3:08:42<26:10:57, 3.53it/s] 10%|█ | 38970/371472 [3:08:42<26:04:26, 3.54it/s] 10%|█ | 38971/371472 [3:08:42<25:46:45, 3.58it/s] 10%|█ | 38972/371472 [3:08:43<27:10:44, 3.40it/s] 10%|█ | 38973/371472 [3:08:43<26:38:38, 3.47it/s] 10%|█ | 38974/371472 [3:08:43<26:34:02, 3.48it/s] 10%|█ | 38975/371472 [3:08:43<27:47:43, 3.32it/s] 10%|█ | 38976/371472 [3:08:44<26:18:31, 3.51it/s] 10%|█ | 38977/371472 [3:08:44<26:57:50, 3.43it/s] 10%|█ | 38978/371472 [3:08:44<25:54:37, 3.56it/s] 10%|█ | 38979/371472 [3:08:45<26:38:01, 3.47it/s] 10%|█ | 38980/371472 [3:08:45<25:30:45, 3.62it/s] {'loss': 4.8365, 'learning_rate': 9.059934495464241e-07, 'epoch': 1.68} + 10%|█ | 38980/371472 [3:08:45<25:30:45, 3.62it/s] 10%|█ | 38981/371472 [3:08:45<28:22:14, 3.26it/s] 10%|█ | 38982/371472 [3:08:46<27:44:15, 3.33it/s] 10%|█ | 38983/371472 [3:08:46<26:30:03, 3.49it/s] 10%|█ | 38984/371472 [3:08:46<26:48:57, 3.44it/s] 10%|█ | 38985/371472 [3:08:46<26:16:07, 3.52it/s] 10%|█ | 38986/371472 [3:08:47<25:37:05, 3.61it/s] 10%|█ | 38987/371472 [3:08:47<26:16:47, 3.51it/s] 10%|█ | 38988/371472 [3:08:47<26:16:47, 3.51it/s] 10%|█ | 38989/371472 [3:08:47<25:24:25, 3.64it/s] 10%|█ | 38990/371472 [3:08:48<25:22:37, 3.64it/s] 10%|█ | 38991/371472 [3:08:48<25:40:32, 3.60it/s] 10%|█ | 38992/371472 [3:08:48<26:50:55, 3.44it/s] 10%|█ | 38993/371472 [3:08:49<26:31:22, 3.48it/s] 10%|█ | 38994/371472 [3:08:49<25:36:20, 3.61it/s] 10%|█ | 38995/371472 [3:08:49<25:00:29, 3.69it/s] 10%|█ | 38996/371472 [3:08:49<24:42:13, 3.74it/s] 10%|█ | 38997/371472 [3:08:50<24:41:43, 3.74it/s] 10%|█ | 38998/371472 [3:08:50<24:29:32, 3.77it/s] 10%|█ | 38999/371472 [3:08:50<27:03:18, 3.41it/s] 10%|█ | 39000/371472 [3:08:51<26:17:04, 3.51it/s] {'loss': 4.2166, 'learning_rate': 9.059449675709452e-07, 'epoch': 1.68} + 10%|█ | 39000/371472 [3:08:51<26:17:04, 3.51it/s] 10%|█ | 39001/371472 [3:08:51<27:14:14, 3.39it/s] 10%|█ | 39002/371472 [3:08:51<27:29:21, 3.36it/s] 10%|█ | 39003/371472 [3:08:51<26:41:31, 3.46it/s] 10%|█ | 39004/371472 [3:08:52<28:11:06, 3.28it/s] 11%|█ | 39005/371472 [3:08:52<27:21:45, 3.38it/s] 11%|█ | 39006/371472 [3:08:52<29:11:23, 3.16it/s] 11%|█ | 39007/371472 [3:08:53<29:57:25, 3.08it/s] 11%|█ | 39008/371472 [3:08:53<28:34:28, 3.23it/s] 11%|█ | 39009/371472 [3:08:53<28:35:10, 3.23it/s] 11%|█ | 39010/371472 [3:08:54<28:42:13, 3.22it/s] 11%|█ | 39011/371472 [3:08:54<30:12:39, 3.06it/s] 11%|█ | 39012/371472 [3:08:54<28:33:07, 3.23it/s] 11%|█ | 39013/371472 [3:08:55<27:34:06, 3.35it/s] 11%|█ | 39014/371472 [3:08:55<27:29:00, 3.36it/s] 11%|█ | 39015/371472 [3:08:55<27:43:58, 3.33it/s] 11%|█ | 39016/371472 [3:08:55<28:47:01, 3.21it/s] 11%|█ | 39017/371472 [3:08:56<28:02:41, 3.29it/s] 11%|█ | 39018/371472 [3:08:56<28:07:35, 3.28it/s] 11%|█ | 39019/371472 [3:08:56<27:13:01, 3.39it/s] 11%|█ | 39020/371472 [3:08:57<26:11:48, 3.53it/s] {'loss': 4.1617, 'learning_rate': 9.058964855954664e-07, 'epoch': 1.68} + 11%|█ | 39020/371472 [3:08:57<26:11:48, 3.53it/s] 11%|█ | 39021/371472 [3:08:57<25:44:28, 3.59it/s] 11%|█ | 39022/371472 [3:08:57<25:48:52, 3.58it/s] 11%|█ | 39023/371472 [3:08:57<26:49:56, 3.44it/s] 11%|█ | 39024/371472 [3:08:58<26:14:20, 3.52it/s] 11%|█ | 39025/371472 [3:08:58<26:55:58, 3.43it/s] 11%|█ | 39026/371472 [3:08:58<27:27:35, 3.36it/s] 11%|█ | 39027/371472 [3:08:59<26:08:34, 3.53it/s] 11%|█ | 39028/371472 [3:08:59<27:29:42, 3.36it/s] 11%|█ | 39029/371472 [3:08:59<26:25:00, 3.50it/s] 11%|█ | 39030/371472 [3:09:00<27:15:23, 3.39it/s] 11%|█ | 39031/371472 [3:09:00<27:53:59, 3.31it/s] 11%|█ | 39032/371472 [3:09:00<26:43:52, 3.45it/s] 11%|█ | 39033/371472 [3:09:00<26:25:19, 3.49it/s] 11%|█ | 39034/371472 [3:09:01<25:46:37, 3.58it/s] 11%|█ | 39035/371472 [3:09:01<25:19:30, 3.65it/s] 11%|█ | 39036/371472 [3:09:01<26:17:33, 3.51it/s] 11%|█ | 39037/371472 [3:09:01<25:56:43, 3.56it/s] 11%|█ | 39038/371472 [3:09:02<28:59:23, 3.19it/s] 11%|█ | 39039/371472 [3:09:02<28:27:35, 3.24it/s] 11%|█ | 39040/371472 [3:09:02<28:06:38, 3.28it/s] {'loss': 4.5641, 'learning_rate': 9.058480036199875e-07, 'epoch': 1.68} + 11%|█ | 39040/371472 [3:09:02<28:06:38, 3.28it/s] 11%|█ | 39041/371472 [3:09:03<27:10:16, 3.40it/s] 11%|█ | 39042/371472 [3:09:03<26:47:33, 3.45it/s] 11%|█ | 39043/371472 [3:09:03<26:40:20, 3.46it/s] 11%|█ | 39044/371472 [3:09:04<25:43:48, 3.59it/s] 11%|█ | 39045/371472 [3:09:04<24:50:44, 3.72it/s] 11%|█ | 39046/371472 [3:09:04<25:20:17, 3.64it/s] 11%|█ | 39047/371472 [3:09:04<26:05:00, 3.54it/s] 11%|█ | 39048/371472 [3:09:05<25:58:49, 3.55it/s] 11%|█ | 39049/371472 [3:09:05<25:51:47, 3.57it/s] 11%|█ | 39050/371472 [3:09:05<26:39:58, 3.46it/s] 11%|█ | 39051/371472 [3:09:06<28:13:03, 3.27it/s] 11%|█ | 39052/371472 [3:09:06<27:34:42, 3.35it/s] 11%|█ | 39053/371472 [3:09:06<26:37:12, 3.47it/s] 11%|█ | 39054/371472 [3:09:06<25:29:22, 3.62it/s] 11%|█ | 39055/371472 [3:09:07<25:28:55, 3.62it/s] 11%|█ | 39056/371472 [3:09:07<25:44:53, 3.59it/s] 11%|█ | 39057/371472 [3:09:07<26:23:00, 3.50it/s] 11%|█ | 39058/371472 [3:09:08<26:15:17, 3.52it/s] 11%|█ | 39059/371472 [3:09:08<26:26:37, 3.49it/s] 11%|█ | 39060/371472 [3:09:08<27:55:36, 3.31it/s] {'loss': 4.455, 'learning_rate': 9.057995216445086e-07, 'epoch': 1.68} + 11%|█ | 39060/371472 [3:09:08<27:55:36, 3.31it/s] 11%|█ | 39061/371472 [3:09:08<26:44:54, 3.45it/s] 11%|█ | 39062/371472 [3:09:09<26:33:18, 3.48it/s] 11%|█ | 39063/371472 [3:09:09<26:17:55, 3.51it/s] 11%|█ | 39064/371472 [3:09:09<26:26:29, 3.49it/s] 11%|█ | 39065/371472 [3:09:10<29:09:58, 3.17it/s] 11%|█ | 39066/371472 [3:09:10<28:14:39, 3.27it/s] 11%|█ | 39067/371472 [3:09:10<27:00:35, 3.42it/s] 11%|█ | 39068/371472 [3:09:10<26:43:57, 3.45it/s] 11%|█ | 39069/371472 [3:09:11<28:29:29, 3.24it/s] 11%|█ | 39070/371472 [3:09:11<27:27:11, 3.36it/s] 11%|█ | 39071/371472 [3:09:11<27:21:02, 3.38it/s] 11%|█ | 39072/371472 [3:09:12<26:30:03, 3.48it/s] 11%|█ | 39073/371472 [3:09:12<27:06:14, 3.41it/s] 11%|█ | 39074/371472 [3:09:12<25:57:50, 3.56it/s] 11%|█ | 39075/371472 [3:09:12<24:51:28, 3.71it/s] 11%|█ | 39076/371472 [3:09:13<27:09:15, 3.40it/s] 11%|█ | 39077/371472 [3:09:13<27:20:39, 3.38it/s] 11%|█ | 39078/371472 [3:09:13<27:54:09, 3.31it/s] 11%|█ | 39079/371472 [3:09:14<30:01:03, 3.08it/s] 11%|█ | 39080/371472 [3:09:14<29:04:31, 3.18it/s] {'loss': 4.2939, 'learning_rate': 9.057510396690296e-07, 'epoch': 1.68} + 11%|█ | 39080/371472 [3:09:14<29:04:31, 3.18it/s] 11%|█ | 39081/371472 [3:09:14<29:02:36, 3.18it/s] 11%|█ | 39082/371472 [3:09:15<27:55:31, 3.31it/s] 11%|█ | 39083/371472 [3:09:15<27:45:14, 3.33it/s] 11%|█ | 39084/371472 [3:09:15<28:41:05, 3.22it/s] 11%|█ | 39085/371472 [3:09:16<27:10:37, 3.40it/s] 11%|█ | 39086/371472 [3:09:16<29:19:36, 3.15it/s] 11%|█ | 39087/371472 [3:09:16<27:24:34, 3.37it/s] 11%|█ | 39088/371472 [3:09:16<25:57:49, 3.56it/s] 11%|█ | 39089/371472 [3:09:17<26:32:28, 3.48it/s] 11%|█ | 39090/371472 [3:09:17<27:31:32, 3.35it/s] 11%|█ | 39091/371472 [3:09:17<26:42:48, 3.46it/s] 11%|█ | 39092/371472 [3:09:18<26:34:42, 3.47it/s] 11%|█ | 39093/371472 [3:09:18<26:09:17, 3.53it/s] 11%|█ | 39094/371472 [3:09:18<26:48:09, 3.44it/s] 11%|█ | 39095/371472 [3:09:18<26:10:21, 3.53it/s] 11%|█ | 39096/371472 [3:09:19<26:36:25, 3.47it/s] 11%|█ | 39097/371472 [3:09:19<26:47:04, 3.45it/s] 11%|█ | 39098/371472 [3:09:19<26:08:10, 3.53it/s] 11%|█ | 39099/371472 [3:09:20<25:47:52, 3.58it/s] 11%|█ | 39100/371472 [3:09:20<27:54:23, 3.31it/s] {'loss': 4.3436, 'learning_rate': 9.057025576935507e-07, 'epoch': 1.68} + 11%|█ | 39100/371472 [3:09:20<27:54:23, 3.31it/s] 11%|█ | 39101/371472 [3:09:20<27:03:35, 3.41it/s] 11%|█ | 39102/371472 [3:09:21<28:03:14, 3.29it/s] 11%|█ | 39103/371472 [3:09:21<27:02:04, 3.42it/s] 11%|█ | 39104/371472 [3:09:21<26:31:17, 3.48it/s] 11%|█ | 39105/371472 [3:09:21<25:35:24, 3.61it/s] 11%|█ | 39106/371472 [3:09:22<25:14:55, 3.66it/s] 11%|█ | 39107/371472 [3:09:22<25:12:05, 3.66it/s] 11%|█ | 39108/371472 [3:09:22<25:11:52, 3.66it/s] 11%|█ | 39109/371472 [3:09:22<26:21:58, 3.50it/s] 11%|█ | 39110/371472 [3:09:23<26:36:52, 3.47it/s] 11%|█ | 39111/371472 [3:09:23<27:31:43, 3.35it/s] 11%|█ | 39112/371472 [3:09:23<26:40:53, 3.46it/s] 11%|█ | 39113/371472 [3:09:24<28:34:57, 3.23it/s] 11%|█ | 39114/371472 [3:09:24<28:11:44, 3.27it/s] 11%|█ | 39115/371472 [3:09:24<27:05:47, 3.41it/s] 11%|█ | 39116/371472 [3:09:25<27:50:11, 3.32it/s] 11%|█ | 39117/371472 [3:09:25<27:27:05, 3.36it/s] 11%|█ | 39118/371472 [3:09:25<27:14:41, 3.39it/s] 11%|█ | 39119/371472 [3:09:25<26:48:45, 3.44it/s] 11%|█ | 39120/371472 [3:09:26<27:33:41, 3.35it/s] {'loss': 4.2504, 'learning_rate': 9.056540757180719e-07, 'epoch': 1.68} + 11%|█ | 39120/371472 [3:09:26<27:33:41, 3.35it/s] 11%|█ | 39121/371472 [3:09:26<27:42:50, 3.33it/s] 11%|█ | 39122/371472 [3:09:26<27:45:41, 3.33it/s] 11%|█ | 39123/371472 [3:09:27<27:26:42, 3.36it/s] 11%|█ | 39124/371472 [3:09:27<27:01:59, 3.42it/s] 11%|█ | 39125/371472 [3:09:27<28:06:57, 3.28it/s] 11%|█ | 39126/371472 [3:09:28<27:36:18, 3.34it/s] 11%|█ | 39127/371472 [3:09:28<26:39:41, 3.46it/s] 11%|█ | 39128/371472 [3:09:28<26:08:30, 3.53it/s] 11%|█ | 39129/371472 [3:09:28<27:01:18, 3.42it/s] 11%|█ | 39130/371472 [3:09:29<27:33:24, 3.35it/s] 11%|█ | 39131/371472 [3:09:29<27:20:45, 3.38it/s] 11%|█ | 39132/371472 [3:09:29<27:55:59, 3.30it/s] 11%|█ | 39133/371472 [3:09:30<28:05:39, 3.29it/s] 11%|█ | 39134/371472 [3:09:30<27:34:58, 3.35it/s] 11%|█ | 39135/371472 [3:09:30<28:31:44, 3.24it/s] 11%|█ | 39136/371472 [3:09:31<28:49:15, 3.20it/s] 11%|█ | 39137/371472 [3:09:31<28:08:33, 3.28it/s] 11%|█ | 39138/371472 [3:09:31<26:50:31, 3.44it/s] 11%|█ | 39139/371472 [3:09:31<25:41:03, 3.59it/s] 11%|█ | 39140/371472 [3:09:32<27:25:20, 3.37it/s] {'loss': 4.3646, 'learning_rate': 9.05605593742593e-07, 'epoch': 1.69} + 11%|█ | 39140/371472 [3:09:32<27:25:20, 3.37it/s] 11%|█ | 39141/371472 [3:09:32<27:52:04, 3.31it/s] 11%|█ | 39142/371472 [3:09:32<26:50:27, 3.44it/s] 11%|█ | 39143/371472 [3:09:33<26:04:17, 3.54it/s] 11%|█ | 39144/371472 [3:09:33<26:10:59, 3.53it/s] 11%|█ | 39145/371472 [3:09:33<25:53:43, 3.56it/s] 11%|█ | 39146/371472 [3:09:33<26:13:45, 3.52it/s] 11%|█ | 39147/371472 [3:09:34<25:27:52, 3.63it/s] 11%|█ | 39148/371472 [3:09:34<24:32:04, 3.76it/s] 11%|█ | 39149/371472 [3:09:34<25:22:54, 3.64it/s] 11%|█ | 39150/371472 [3:09:34<25:07:39, 3.67it/s] 11%|█ | 39151/371472 [3:09:35<25:50:52, 3.57it/s] 11%|█ | 39152/371472 [3:09:35<26:48:51, 3.44it/s] 11%|█ | 39153/371472 [3:09:35<27:39:57, 3.34it/s] 11%|█ | 39154/371472 [3:09:36<27:14:11, 3.39it/s] 11%|█ | 39155/371472 [3:09:36<28:27:14, 3.24it/s] 11%|█ | 39156/371472 [3:09:36<27:33:44, 3.35it/s] 11%|█ | 39157/371472 [3:09:37<27:25:37, 3.37it/s] 11%|█ | 39158/371472 [3:09:37<29:02:31, 3.18it/s] 11%|█ | 39159/371472 [3:09:37<29:33:46, 3.12it/s] 11%|█ | 39160/371472 [3:09:38<28:22:23, 3.25it/s] {'loss': 4.4256, 'learning_rate': 9.055571117671141e-07, 'epoch': 1.69} + 11%|█ | 39160/371472 [3:09:38<28:22:23, 3.25it/s] 11%|█ | 39161/371472 [3:09:38<28:44:46, 3.21it/s] 11%|█ | 39162/371472 [3:09:38<27:47:42, 3.32it/s] 11%|█ | 39163/371472 [3:09:38<27:22:23, 3.37it/s] 11%|█ | 39164/371472 [3:09:39<27:05:14, 3.41it/s] 11%|█ | 39165/371472 [3:09:39<25:57:19, 3.56it/s] 11%|█ | 39166/371472 [3:09:39<25:13:44, 3.66it/s] 11%|█ | 39167/371472 [3:09:40<26:37:35, 3.47it/s] 11%|█ | 39168/371472 [3:09:40<27:02:59, 3.41it/s] 11%|█ | 39169/371472 [3:09:40<27:13:14, 3.39it/s] 11%|█ | 39170/371472 [3:09:40<26:37:59, 3.47it/s] 11%|█ | 39171/371472 [3:09:41<25:48:42, 3.58it/s] 11%|█ | 39172/371472 [3:09:41<25:54:48, 3.56it/s] 11%|█ | 39173/371472 [3:09:41<26:55:04, 3.43it/s] 11%|█ | 39174/371472 [3:09:42<26:36:49, 3.47it/s] 11%|█ | 39175/371472 [3:09:42<27:15:53, 3.39it/s] 11%|█ | 39176/371472 [3:09:42<28:28:05, 3.24it/s] 11%|█ | 39177/371472 [3:09:43<27:02:36, 3.41it/s] 11%|█ | 39178/371472 [3:09:43<26:53:33, 3.43it/s] 11%|█ | 39179/371472 [3:09:43<28:21:59, 3.25it/s] 11%|█ | 39180/371472 [3:09:43<27:14:24, 3.39it/s] {'loss': 4.3223, 'learning_rate': 9.055086297916352e-07, 'epoch': 1.69} + 11%|█ | 39180/371472 [3:09:43<27:14:24, 3.39it/s] 11%|█ | 39181/371472 [3:09:44<27:26:11, 3.36it/s] 11%|█ | 39182/371472 [3:09:44<28:19:24, 3.26it/s] 11%|█ | 39183/371472 [3:09:44<26:56:34, 3.43it/s] 11%|█ | 39184/371472 [3:09:45<26:19:28, 3.51it/s] 11%|█ | 39185/371472 [3:09:45<26:46:41, 3.45it/s] 11%|█ | 39186/371472 [3:09:45<26:59:35, 3.42it/s] 11%|█ | 39187/371472 [3:09:46<28:27:40, 3.24it/s] 11%|█ | 39188/371472 [3:09:46<28:19:44, 3.26it/s] 11%|█ | 39189/371472 [3:09:46<27:27:10, 3.36it/s] 11%|█ | 39190/371472 [3:09:46<26:36:04, 3.47it/s] 11%|█ | 39191/371472 [3:09:47<28:24:56, 3.25it/s] 11%|█ | 39192/371472 [3:09:47<29:00:31, 3.18it/s] 11%|█ | 39193/371472 [3:09:47<27:08:12, 3.40it/s] 11%|█ | 39194/371472 [3:09:48<26:07:42, 3.53it/s] 11%|█ | 39195/371472 [3:09:48<26:07:39, 3.53it/s] 11%|█ | 39196/371472 [3:09:48<26:39:50, 3.46it/s] 11%|█ | 39197/371472 [3:09:48<25:53:34, 3.56it/s] 11%|█ | 39198/371472 [3:09:49<27:08:42, 3.40it/s] 11%|█ | 39199/371472 [3:09:49<28:08:52, 3.28it/s] 11%|█ | 39200/371472 [3:09:49<28:16:15, 3.26it/s] {'loss': 4.3405, 'learning_rate': 9.054601478161563e-07, 'epoch': 1.69} + 11%|█ | 39200/371472 [3:09:49<28:16:15, 3.26it/s] 11%|█ | 39201/371472 [3:09:50<26:54:58, 3.43it/s] 11%|█ | 39202/371472 [3:09:50<28:23:26, 3.25it/s] 11%|█ | 39203/371472 [3:09:50<28:29:04, 3.24it/s] 11%|█ | 39204/371472 [3:09:51<27:16:32, 3.38it/s] 11%|█ | 39205/371472 [3:09:51<27:20:27, 3.38it/s] 11%|█ | 39206/371472 [3:09:51<28:02:33, 3.29it/s] 11%|█ | 39207/371472 [3:09:51<27:01:08, 3.42it/s] 11%|█ | 39208/371472 [3:09:52<26:28:42, 3.49it/s] 11%|█ | 39209/371472 [3:09:52<28:04:57, 3.29it/s] 11%|█ | 39210/371472 [3:09:52<26:34:19, 3.47it/s] 11%|█ | 39211/371472 [3:09:53<26:05:33, 3.54it/s] 11%|█ | 39212/371472 [3:09:53<27:07:47, 3.40it/s] 11%|█ | 39213/371472 [3:09:53<26:14:02, 3.52it/s] 11%|█ | 39214/371472 [3:09:53<26:41:23, 3.46it/s] 11%|█ | 39215/371472 [3:09:54<25:57:39, 3.56it/s] 11%|█ | 39216/371472 [3:09:54<25:57:59, 3.55it/s] 11%|█ | 39217/371472 [3:09:54<25:32:12, 3.61it/s] 11%|█ | 39218/371472 [3:09:54<24:48:38, 3.72it/s] 11%|█ | 39219/371472 [3:09:55<24:56:30, 3.70it/s] 11%|█ | 39220/371472 [3:09:55<24:46:56, 3.72it/s] {'loss': 4.5136, 'learning_rate': 9.054116658406772e-07, 'epoch': 1.69} + 11%|█ | 39220/371472 [3:09:55<24:46:56, 3.72it/s] 11%|█ | 39221/371472 [3:09:55<25:09:37, 3.67it/s] 11%|█ | 39222/371472 [3:09:56<26:09:33, 3.53it/s] 11%|█ | 39223/371472 [3:09:56<25:35:12, 3.61it/s] 11%|█ | 39224/371472 [3:09:56<26:01:41, 3.55it/s] 11%|█ | 39225/371472 [3:09:56<25:32:26, 3.61it/s] 11%|█ | 39226/371472 [3:09:57<26:43:55, 3.45it/s] 11%|█ | 39227/371472 [3:09:57<29:03:27, 3.18it/s] 11%|█ | 39228/371472 [3:09:57<27:42:21, 3.33it/s] 11%|█ | 39229/371472 [3:09:58<27:09:54, 3.40it/s] 11%|█ | 39230/371472 [3:09:58<26:07:13, 3.53it/s] 11%|█ | 39231/371472 [3:09:58<26:44:04, 3.45it/s] 11%|█ | 39232/371472 [3:09:59<27:04:34, 3.41it/s] 11%|█ | 39233/371472 [3:09:59<26:28:23, 3.49it/s] 11%|█ | 39234/371472 [3:09:59<26:11:25, 3.52it/s] 11%|█ | 39235/371472 [3:09:59<26:07:27, 3.53it/s] 11%|█ | 39236/371472 [3:10:00<25:30:32, 3.62it/s] 11%|█ | 39237/371472 [3:10:00<25:33:40, 3.61it/s] 11%|█ | 39238/371472 [3:10:00<26:35:19, 3.47it/s] 11%|█ | 39239/371472 [3:10:01<27:06:09, 3.41it/s] 11%|█ | 39240/371472 [3:10:01<26:11:47, 3.52it/s] {'loss': 4.3134, 'learning_rate': 9.053631838651985e-07, 'epoch': 1.69} + 11%|█ | 39240/371472 [3:10:01<26:11:47, 3.52it/s] 11%|█ | 39241/371472 [3:10:01<27:05:27, 3.41it/s] 11%|█ | 39242/371472 [3:10:01<27:00:59, 3.42it/s] 11%|█ | 39243/371472 [3:10:02<26:01:33, 3.55it/s] 11%|█ | 39244/371472 [3:10:02<26:59:14, 3.42it/s] 11%|█ | 39245/371472 [3:10:02<28:07:29, 3.28it/s] 11%|█ | 39246/371472 [3:10:03<29:00:11, 3.18it/s] 11%|█ | 39247/371472 [3:10:03<28:32:36, 3.23it/s] 11%|█ | 39248/371472 [3:10:03<28:02:06, 3.29it/s] 11%|█ | 39249/371472 [3:10:04<26:56:59, 3.42it/s] 11%|█ | 39250/371472 [3:10:04<31:37:25, 2.92it/s] 11%|█ | 39251/371472 [3:10:04<29:09:21, 3.17it/s] 11%|█ | 39252/371472 [3:10:05<29:14:27, 3.16it/s] 11%|█ | 39253/371472 [3:10:05<28:28:20, 3.24it/s] 11%|█ | 39254/371472 [3:10:05<31:01:29, 2.97it/s] 11%|█ | 39255/371472 [3:10:06<31:31:56, 2.93it/s] 11%|█ | 39256/371472 [3:10:06<28:58:10, 3.19it/s] 11%|█ | 39257/371472 [3:10:06<27:48:39, 3.32it/s] 11%|█ | 39258/371472 [3:10:06<26:52:13, 3.43it/s] 11%|█ | 39259/371472 [3:10:07<26:45:04, 3.45it/s] 11%|█ | 39260/371472 [3:10:07<27:04:25, 3.41it/s] {'loss': 4.4582, 'learning_rate': 9.053147018897196e-07, 'epoch': 1.69} + 11%|█ | 39260/371472 [3:10:07<27:04:25, 3.41it/s] 11%|█ | 39261/371472 [3:10:07<27:19:15, 3.38it/s] 11%|█ | 39262/371472 [3:10:08<27:10:11, 3.40it/s] 11%|█ | 39263/371472 [3:10:08<26:17:37, 3.51it/s] 11%|█ | 39264/371472 [3:10:08<26:41:11, 3.46it/s] 11%|█ | 39265/371472 [3:10:08<25:55:07, 3.56it/s] 11%|█ | 39266/371472 [3:10:09<25:35:51, 3.60it/s] 11%|█ | 39267/371472 [3:10:09<26:02:52, 3.54it/s] 11%|█ | 39268/371472 [3:10:09<26:22:40, 3.50it/s] 11%|█ | 39269/371472 [3:10:10<26:31:54, 3.48it/s] 11%|█ | 39270/371472 [3:10:10<28:47:53, 3.20it/s] 11%|█ | 39271/371472 [3:10:10<28:22:38, 3.25it/s] 11%|█ | 39272/371472 [3:10:10<27:01:35, 3.41it/s] 11%|█ | 39273/371472 [3:10:11<26:13:21, 3.52it/s] 11%|█ | 39274/371472 [3:10:11<25:37:08, 3.60it/s] 11%|█ | 39275/371472 [3:10:11<25:13:35, 3.66it/s] 11%|█ | 39276/371472 [3:10:12<27:22:37, 3.37it/s] 11%|█ | 39277/371472 [3:10:12<27:25:40, 3.36it/s] 11%|█ | 39278/371472 [3:10:12<26:34:01, 3.47it/s] 11%|█ | 39279/371472 [3:10:12<27:33:11, 3.35it/s] 11%|█ | 39280/371472 [3:10:13<27:34:39, 3.35it/s] {'loss': 4.5434, 'learning_rate': 9.052662199142408e-07, 'epoch': 1.69} + 11%|█ | 39280/371472 [3:10:13<27:34:39, 3.35it/s] 11%|�� | 39281/371472 [3:10:13<26:34:42, 3.47it/s] 11%|█ | 39282/371472 [3:10:13<27:35:45, 3.34it/s] 11%|█ | 39283/371472 [3:10:14<26:52:38, 3.43it/s] 11%|█ | 39284/371472 [3:10:14<26:43:26, 3.45it/s] 11%|█ | 39285/371472 [3:10:14<27:41:39, 3.33it/s] 11%|█ | 39286/371472 [3:10:15<27:30:45, 3.35it/s] 11%|█ | 39287/371472 [3:10:15<26:53:59, 3.43it/s] 11%|█ | 39288/371472 [3:10:15<26:16:39, 3.51it/s] 11%|█ | 39289/371472 [3:10:15<26:08:24, 3.53it/s] 11%|█ | 39290/371472 [3:10:16<26:09:25, 3.53it/s] 11%|█ | 39291/371472 [3:10:16<25:25:47, 3.63it/s] 11%|█ | 39292/371472 [3:10:16<25:16:15, 3.65it/s] 11%|█ | 39293/371472 [3:10:16<25:20:46, 3.64it/s] 11%|█ | 39294/371472 [3:10:17<24:33:28, 3.76it/s] 11%|█ | 39295/371472 [3:10:17<25:46:20, 3.58it/s] 11%|█ | 39296/371472 [3:10:17<27:05:09, 3.41it/s] 11%|█ | 39297/371472 [3:10:18<26:33:42, 3.47it/s] 11%|█ | 39298/371472 [3:10:18<26:53:52, 3.43it/s] 11%|█ | 39299/371472 [3:10:18<26:22:41, 3.50it/s] 11%|█ | 39300/371472 [3:10:18<25:13:54, 3.66it/s] {'loss': 4.4312, 'learning_rate': 9.052177379387618e-07, 'epoch': 1.69} + 11%|█ | 39300/371472 [3:10:18<25:13:54, 3.66it/s] 11%|█ | 39301/371472 [3:10:19<26:56:30, 3.42it/s] 11%|█ | 39302/371472 [3:10:19<26:36:17, 3.47it/s] 11%|█ | 39303/371472 [3:10:19<25:51:06, 3.57it/s] 11%|█ | 39304/371472 [3:10:20<26:59:34, 3.42it/s] 11%|█ | 39305/371472 [3:10:20<27:23:14, 3.37it/s] 11%|█ | 39306/371472 [3:10:20<28:23:46, 3.25it/s] 11%|█ | 39307/371472 [3:10:21<27:16:02, 3.38it/s] 11%|█ | 39308/371472 [3:10:21<27:55:38, 3.30it/s] 11%|█ | 39309/371472 [3:10:21<27:04:07, 3.41it/s] 11%|█ | 39310/371472 [3:10:21<26:04:32, 3.54it/s] 11%|█ | 39311/371472 [3:10:22<26:24:30, 3.49it/s] 11%|█ | 39312/371472 [3:10:22<27:12:19, 3.39it/s] 11%|█ | 39313/371472 [3:10:22<29:58:15, 3.08it/s] 11%|█ | 39314/371472 [3:10:23<28:24:00, 3.25it/s] 11%|█ | 39315/371472 [3:10:23<27:17:48, 3.38it/s] 11%|█ | 39316/371472 [3:10:23<27:07:08, 3.40it/s] 11%|█ | 39317/371472 [3:10:24<27:43:27, 3.33it/s] 11%|█ | 39318/371472 [3:10:24<27:39:40, 3.34it/s] 11%|█ | 39319/371472 [3:10:24<26:26:40, 3.49it/s] 11%|█ | 39320/371472 [3:10:24<25:33:59, 3.61it/s] {'loss': 4.6348, 'learning_rate': 9.051692559632829e-07, 'epoch': 1.69} + 11%|█ | 39320/371472 [3:10:24<25:33:59, 3.61it/s] 11%|█ | 39321/371472 [3:10:25<26:36:37, 3.47it/s] 11%|█ | 39322/371472 [3:10:25<26:25:21, 3.49it/s] 11%|█ | 39323/371472 [3:10:25<26:11:24, 3.52it/s] 11%|█ | 39324/371472 [3:10:25<25:14:00, 3.66it/s] 11%|█ | 39325/371472 [3:10:26<25:47:55, 3.58it/s] 11%|█ | 39326/371472 [3:10:26<25:30:12, 3.62it/s] 11%|█ | 39327/371472 [3:10:26<25:06:39, 3.67it/s] 11%|█ | 39328/371472 [3:10:27<24:39:04, 3.74it/s] 11%|█ | 39329/371472 [3:10:27<24:40:37, 3.74it/s] 11%|█ | 39330/371472 [3:10:27<25:04:03, 3.68it/s] 11%|█ | 39331/371472 [3:10:27<25:12:40, 3.66it/s] 11%|█ | 39332/371472 [3:10:28<25:48:28, 3.57it/s] 11%|█ | 39333/371472 [3:10:28<25:38:45, 3.60it/s] 11%|█ | 39334/371472 [3:10:28<26:09:58, 3.53it/s] 11%|█ | 39335/371472 [3:10:29<25:52:37, 3.57it/s] 11%|█ | 39336/371472 [3:10:29<25:36:25, 3.60it/s] 11%|█ | 39337/371472 [3:10:29<26:28:10, 3.49it/s] 11%|█ | 39338/371472 [3:10:29<27:44:48, 3.33it/s] 11%|█ | 39339/371472 [3:10:30<27:20:17, 3.37it/s] 11%|█ | 39340/371472 [3:10:30<26:17:13, 3.51it/s] {'loss': 4.4803, 'learning_rate': 9.05120773987804e-07, 'epoch': 1.69} + 11%|█ | 39340/371472 [3:10:30<26:17:13, 3.51it/s] 11%|█ | 39341/371472 [3:10:30<26:27:39, 3.49it/s] 11%|█ | 39342/371472 [3:10:31<26:02:54, 3.54it/s] 11%|█ | 39343/371472 [3:10:31<25:23:17, 3.63it/s] 11%|█ | 39344/371472 [3:10:31<25:02:45, 3.68it/s] 11%|█ | 39345/371472 [3:10:31<24:49:28, 3.72it/s] 11%|█ | 39346/371472 [3:10:32<27:03:24, 3.41it/s] 11%|█ | 39347/371472 [3:10:32<26:12:36, 3.52it/s] 11%|█ | 39348/371472 [3:10:32<27:14:10, 3.39it/s] 11%|█ | 39349/371472 [3:10:33<28:27:52, 3.24it/s] 11%|█ | 39350/371472 [3:10:33<27:56:13, 3.30it/s] 11%|█ | 39351/371472 [3:10:33<27:37:24, 3.34it/s] 11%|█ | 39352/371472 [3:10:33<26:44:01, 3.45it/s] 11%|█ | 39353/371472 [3:10:34<26:30:44, 3.48it/s] 11%|█ | 39354/371472 [3:10:34<28:57:43, 3.19it/s] 11%|█ | 39355/371472 [3:10:34<30:04:48, 3.07it/s] 11%|█ | 39356/371472 [3:10:35<28:52:58, 3.19it/s] 11%|█ | 39357/371472 [3:10:35<29:10:41, 3.16it/s] 11%|█ | 39358/371472 [3:10:35<27:39:11, 3.34it/s] 11%|█ | 39359/371472 [3:10:36<25:58:06, 3.55it/s] 11%|█ | 39360/371472 [3:10:36<26:11:54, 3.52it/s] {'loss': 4.2973, 'learning_rate': 9.050722920123252e-07, 'epoch': 1.7} + 11%|█ | 39360/371472 [3:10:36<26:11:54, 3.52it/s] 11%|█ | 39361/371472 [3:10:36<25:55:07, 3.56it/s] 11%|█ | 39362/371472 [3:10:36<25:21:25, 3.64it/s] 11%|█ | 39363/371472 [3:10:37<25:54:53, 3.56it/s] 11%|█ | 39364/371472 [3:10:37<25:58:16, 3.55it/s] 11%|█ | 39365/371472 [3:10:37<25:21:35, 3.64it/s] 11%|█ | 39366/371472 [3:10:38<28:08:22, 3.28it/s] 11%|█ | 39367/371472 [3:10:38<28:39:02, 3.22it/s] 11%|█ | 39368/371472 [3:10:38<28:08:14, 3.28it/s] 11%|█ | 39369/371472 [3:10:38<26:56:16, 3.42it/s] 11%|█ | 39370/371472 [3:10:39<26:19:45, 3.50it/s] 11%|█ | 39371/371472 [3:10:39<26:26:32, 3.49it/s] 11%|█ | 39372/371472 [3:10:39<27:10:00, 3.40it/s] 11%|█ | 39373/371472 [3:10:40<26:19:01, 3.51it/s] 11%|█ | 39374/371472 [3:10:40<25:32:54, 3.61it/s] 11%|█ | 39375/371472 [3:10:40<25:58:14, 3.55it/s] 11%|█ | 39376/371472 [3:10:40<27:28:15, 3.36it/s] 11%|█ | 39377/371472 [3:10:41<26:32:21, 3.48it/s] 11%|█ | 39378/371472 [3:10:41<25:58:38, 3.55it/s] 11%|█ | 39379/371472 [3:10:41<26:14:44, 3.51it/s] 11%|█ | 39380/371472 [3:10:42<25:42:48, 3.59it/s] {'loss': 4.3818, 'learning_rate': 9.050238100368462e-07, 'epoch': 1.7} + 11%|█ | 39380/371472 [3:10:42<25:42:48, 3.59it/s] 11%|█ | 39381/371472 [3:10:42<25:13:48, 3.66it/s] 11%|█ | 39382/371472 [3:10:42<24:44:24, 3.73it/s] 11%|█ | 39383/371472 [3:10:42<25:56:44, 3.56it/s] 11%|█ | 39384/371472 [3:10:43<27:49:01, 3.32it/s] 11%|█ | 39385/371472 [3:10:43<27:01:00, 3.41it/s] 11%|█ | 39386/371472 [3:10:43<31:40:56, 2.91it/s] 11%|█ | 39387/371472 [3:10:44<31:09:11, 2.96it/s] 11%|█ | 39388/371472 [3:10:44<30:26:47, 3.03it/s] 11%|█ | 39389/371472 [3:10:44<30:24:05, 3.03it/s] 11%|█ | 39390/371472 [3:10:45<29:25:32, 3.13it/s] 11%|█ | 39391/371472 [3:10:45<27:46:53, 3.32it/s] 11%|█ | 39392/371472 [3:10:45<26:54:31, 3.43it/s] 11%|█ | 39393/371472 [3:10:46<26:13:55, 3.52it/s] 11%|█ | 39394/371472 [3:10:46<27:04:35, 3.41it/s] 11%|█ | 39395/371472 [3:10:46<27:21:23, 3.37it/s] 11%|█ | 39396/371472 [3:10:46<26:16:29, 3.51it/s] 11%|█ | 39397/371472 [3:10:47<26:30:48, 3.48it/s] 11%|█ | 39398/371472 [3:10:47<25:38:31, 3.60it/s] 11%|█ | 39399/371472 [3:10:47<26:24:51, 3.49it/s] 11%|█ | 39400/371472 [3:10:48<26:02:42, 3.54it/s] {'loss': 4.5338, 'learning_rate': 9.049753280613674e-07, 'epoch': 1.7} + 11%|█ | 39400/371472 [3:10:48<26:02:42, 3.54it/s] 11%|█ | 39401/371472 [3:10:48<29:17:43, 3.15it/s] 11%|█ | 39402/371472 [3:10:48<28:45:13, 3.21it/s] 11%|█ | 39403/371472 [3:10:49<27:12:53, 3.39it/s] 11%|█ | 39404/371472 [3:10:49<27:31:42, 3.35it/s] 11%|█ | 39405/371472 [3:10:49<28:18:20, 3.26it/s] 11%|█ | 39406/371472 [3:10:49<27:13:41, 3.39it/s] 11%|█ | 39407/371472 [3:10:50<26:51:39, 3.43it/s] 11%|█ | 39408/371472 [3:10:50<26:57:18, 3.42it/s] 11%|█ | 39409/371472 [3:10:50<27:37:39, 3.34it/s] 11%|█ | 39410/371472 [3:10:51<26:20:52, 3.50it/s] 11%|█ | 39411/371472 [3:10:51<25:39:55, 3.59it/s] 11%|█ | 39412/371472 [3:10:51<25:07:14, 3.67it/s] 11%|█ | 39413/371472 [3:10:51<26:28:11, 3.48it/s] 11%|█ | 39414/371472 [3:10:52<26:56:07, 3.42it/s] 11%|█ | 39415/371472 [3:10:52<25:52:40, 3.56it/s] 11%|█ | 39416/371472 [3:10:52<26:32:04, 3.48it/s] 11%|█ | 39417/371472 [3:10:53<25:28:43, 3.62it/s] 11%|█ | 39418/371472 [3:10:53<27:30:56, 3.35it/s] 11%|█ | 39419/371472 [3:10:53<26:41:47, 3.46it/s] 11%|█ | 39420/371472 [3:10:53<26:40:51, 3.46it/s] {'loss': 4.2399, 'learning_rate': 9.049268460858885e-07, 'epoch': 1.7} + 11%|█ | 39420/371472 [3:10:53<26:40:51, 3.46it/s] 11%|█ | 39421/371472 [3:10:54<25:43:36, 3.59it/s] 11%|█ | 39422/371472 [3:10:54<25:24:51, 3.63it/s] 11%|█ | 39423/371472 [3:10:54<25:00:28, 3.69it/s] 11%|█ | 39424/371472 [3:10:54<25:17:25, 3.65it/s] 11%|█ | 39425/371472 [3:10:55<25:58:19, 3.55it/s] 11%|█ | 39426/371472 [3:10:55<27:32:11, 3.35it/s] 11%|█ | 39427/371472 [3:10:55<28:40:11, 3.22it/s] 11%|█ | 39428/371472 [3:10:56<28:54:58, 3.19it/s] 11%|█ | 39429/371472 [3:10:56<27:12:06, 3.39it/s] 11%|█ | 39430/371472 [3:10:56<28:19:25, 3.26it/s] 11%|█ | 39431/371472 [3:10:57<26:36:19, 3.47it/s] 11%|█ | 39432/371472 [3:10:57<30:18:15, 3.04it/s] 11%|█ | 39433/371472 [3:10:57<29:34:26, 3.12it/s] 11%|█ | 39434/371472 [3:10:58<27:50:58, 3.31it/s] 11%|█ | 39435/371472 [3:10:58<29:23:08, 3.14it/s] 11%|█ | 39436/371472 [3:10:58<28:14:10, 3.27it/s] 11%|█ | 39437/371472 [3:10:58<27:06:53, 3.40it/s] 11%|█ | 39438/371472 [3:10:59<26:33:21, 3.47it/s] 11%|█ | 39439/371472 [3:10:59<25:20:52, 3.64it/s] 11%|█ | 39440/371472 [3:10:59<25:38:46, 3.60it/s] {'loss': 4.3713, 'learning_rate': 9.048783641104097e-07, 'epoch': 1.7} + 11%|█ | 39440/371472 [3:10:59<25:38:46, 3.60it/s] 11%|█ | 39441/371472 [3:11:00<25:00:28, 3.69it/s] 11%|█ | 39442/371472 [3:11:00<24:35:41, 3.75it/s] 11%|█ | 39443/371472 [3:11:00<24:37:36, 3.75it/s] 11%|█ | 39444/371472 [3:11:00<27:35:52, 3.34it/s] 11%|█ | 39445/371472 [3:11:01<27:19:43, 3.37it/s] 11%|█ | 39446/371472 [3:11:01<27:19:54, 3.37it/s] 11%|█ | 39447/371472 [3:11:01<27:07:06, 3.40it/s] 11%|█ | 39448/371472 [3:11:02<26:44:05, 3.45it/s] 11%|█ | 39449/371472 [3:11:02<26:54:25, 3.43it/s] 11%|█ | 39450/371472 [3:11:02<26:46:46, 3.44it/s] 11%|█ | 39451/371472 [3:11:02<25:36:58, 3.60it/s] 11%|█ | 39452/371472 [3:11:03<24:52:39, 3.71it/s] 11%|█ | 39453/371472 [3:11:03<26:54:34, 3.43it/s] 11%|█ | 39454/371472 [3:11:03<26:46:46, 3.44it/s] 11%|█ | 39455/371472 [3:11:04<26:14:25, 3.51it/s] 11%|█ | 39456/371472 [3:11:04<25:55:34, 3.56it/s] 11%|█ | 39457/371472 [3:11:04<25:15:51, 3.65it/s] 11%|█ | 39458/371472 [3:11:04<25:29:27, 3.62it/s] 11%|█ | 39459/371472 [3:11:05<25:26:40, 3.62it/s] 11%|█ | 39460/371472 [3:11:05<26:21:19, 3.50it/s] {'loss': 4.2661, 'learning_rate': 9.048298821349306e-07, 'epoch': 1.7} + 11%|█ | 39460/371472 [3:11:05<26:21:19, 3.50it/s] 11%|█ | 39461/371472 [3:11:05<30:18:24, 3.04it/s] 11%|█ | 39462/371472 [3:11:06<30:06:05, 3.06it/s] 11%|█ | 39463/371472 [3:11:06<28:28:13, 3.24it/s] 11%|█ | 39464/371472 [3:11:06<26:52:31, 3.43it/s] 11%|█ | 39465/371472 [3:11:07<26:23:59, 3.49it/s] 11%|█ | 39466/371472 [3:11:07<27:35:37, 3.34it/s] 11%|█ | 39467/371472 [3:11:07<26:32:12, 3.48it/s] 11%|█ | 39468/371472 [3:11:07<26:08:22, 3.53it/s] 11%|█ | 39469/371472 [3:11:08<26:34:13, 3.47it/s] 11%|█ | 39470/371472 [3:11:08<26:19:14, 3.50it/s] 11%|█ | 39471/371472 [3:11:08<25:48:32, 3.57it/s] 11%|█ | 39472/371472 [3:11:08<25:30:04, 3.62it/s] 11%|█ | 39473/371472 [3:11:09<25:44:22, 3.58it/s] 11%|█ | 39474/371472 [3:11:09<26:10:47, 3.52it/s] 11%|█ | 39475/371472 [3:11:09<25:41:45, 3.59it/s] 11%|█ | 39476/371472 [3:11:10<24:48:55, 3.72it/s] 11%|█ | 39477/371472 [3:11:10<25:09:51, 3.66it/s] 11%|█ | 39478/371472 [3:11:10<25:09:47, 3.66it/s] 11%|█ | 39479/371472 [3:11:10<27:08:47, 3.40it/s] 11%|█ | 39480/371472 [3:11:11<27:22:33, 3.37it/s] {'loss': 4.1642, 'learning_rate': 9.047814001594518e-07, 'epoch': 1.7} + 11%|█ | 39480/371472 [3:11:11<27:22:33, 3.37it/s] 11%|█ | 39481/371472 [3:11:11<26:44:14, 3.45it/s] 11%|█ | 39482/371472 [3:11:11<25:39:31, 3.59it/s] 11%|█ | 39483/371472 [3:11:12<26:36:30, 3.47it/s] 11%|█ | 39484/371472 [3:11:12<26:29:20, 3.48it/s] 11%|█ | 39485/371472 [3:11:12<26:43:39, 3.45it/s] 11%|█ | 39486/371472 [3:11:12<26:11:36, 3.52it/s] 11%|█ | 39487/371472 [3:11:13<27:36:00, 3.34it/s] 11%|█ | 39488/371472 [3:11:13<27:47:13, 3.32it/s] 11%|█ | 39489/371472 [3:11:13<27:29:54, 3.35it/s] 11%|█ | 39490/371472 [3:11:14<27:12:28, 3.39it/s] 11%|█ | 39491/371472 [3:11:14<29:20:30, 3.14it/s] 11%|█ | 39492/371472 [3:11:14<28:48:51, 3.20it/s] 11%|█ | 39493/371472 [3:11:15<30:05:45, 3.06it/s] 11%|█ | 39494/371472 [3:11:15<28:12:13, 3.27it/s] 11%|█ | 39495/371472 [3:11:15<27:29:30, 3.35it/s] 11%|█ | 39496/371472 [3:11:16<26:13:42, 3.52it/s] 11%|█ | 39497/371472 [3:11:16<28:21:49, 3.25it/s] 11%|█ | 39498/371472 [3:11:16<27:30:15, 3.35it/s] 11%|█ | 39499/371472 [3:11:16<27:05:17, 3.40it/s] 11%|█ | 39500/371472 [3:11:17<26:14:47, 3.51it/s] {'loss': 4.239, 'learning_rate': 9.047329181839729e-07, 'epoch': 1.7} + 11%|█ | 39500/371472 [3:11:17<26:14:47, 3.51it/s] 11%|█ | 39501/371472 [3:11:17<28:24:52, 3.25it/s] 11%|█ | 39502/371472 [3:11:17<27:17:33, 3.38it/s] 11%|█ | 39503/371472 [3:11:18<28:51:41, 3.20it/s] 11%|█ | 39504/371472 [3:11:18<27:50:24, 3.31it/s] 11%|█ | 39505/371472 [3:11:18<26:46:32, 3.44it/s] 11%|█ | 39506/371472 [3:11:18<25:52:30, 3.56it/s] 11%|█ | 39507/371472 [3:11:19<25:02:20, 3.68it/s] 11%|█ | 39508/371472 [3:11:19<24:52:36, 3.71it/s] 11%|█ | 39509/371472 [3:11:19<25:53:57, 3.56it/s] 11%|█ | 39510/371472 [3:11:20<26:38:26, 3.46it/s] 11%|█ | 39511/371472 [3:11:20<26:12:39, 3.52it/s] 11%|█ | 39512/371472 [3:11:20<26:06:34, 3.53it/s] 11%|█ | 39513/371472 [3:11:20<26:01:16, 3.54it/s] 11%|█ | 39514/371472 [3:11:21<25:39:42, 3.59it/s] 11%|█ | 39515/371472 [3:11:21<24:59:52, 3.69it/s] 11%|█ | 39516/371472 [3:11:21<27:48:30, 3.32it/s] 11%|█ | 39517/371472 [3:11:22<28:06:18, 3.28it/s] 11%|█ | 39518/371472 [3:11:22<26:58:55, 3.42it/s] 11%|█ | 39519/371472 [3:11:22<25:59:16, 3.55it/s] 11%|█ | 39520/371472 [3:11:22<26:57:11, 3.42it/s] {'loss': 4.3927, 'learning_rate': 9.04684436208494e-07, 'epoch': 1.7} + 11%|█ | 39520/371472 [3:11:22<26:57:11, 3.42it/s] 11%|█ | 39521/371472 [3:11:23<28:32:40, 3.23it/s] 11%|█ | 39522/371472 [3:11:23<27:16:12, 3.38it/s] 11%|█ | 39523/371472 [3:11:23<28:02:10, 3.29it/s] 11%|█ | 39524/371472 [3:11:24<27:22:59, 3.37it/s] 11%|█ | 39525/371472 [3:11:24<27:10:10, 3.39it/s] 11%|█ | 39526/371472 [3:11:24<26:24:47, 3.49it/s] 11%|█ | 39527/371472 [3:11:25<25:58:55, 3.55it/s] 11%|█ | 39528/371472 [3:11:25<26:40:16, 3.46it/s] 11%|█ | 39529/371472 [3:11:25<27:29:40, 3.35it/s] 11%|█ | 39530/371472 [3:11:26<28:39:33, 3.22it/s] 11%|█ | 39531/371472 [3:11:26<29:00:30, 3.18it/s] 11%|█ | 39532/371472 [3:11:26<29:41:27, 3.11it/s] 11%|█ | 39533/371472 [3:11:26<27:57:45, 3.30it/s] 11%|█ | 39534/371472 [3:11:27<26:34:44, 3.47it/s] 11%|█ | 39535/371472 [3:11:27<28:47:47, 3.20it/s] 11%|█ | 39536/371472 [3:11:27<28:21:14, 3.25it/s] 11%|█ | 39537/371472 [3:11:28<27:21:51, 3.37it/s] 11%|█ | 39538/371472 [3:11:28<26:22:18, 3.50it/s] 11%|█ | 39539/371472 [3:11:28<25:33:18, 3.61it/s] 11%|█ | 39540/371472 [3:11:28<25:06:34, 3.67it/s] {'loss': 4.4087, 'learning_rate': 9.046359542330151e-07, 'epoch': 1.7} + 11%|█ | 39540/371472 [3:11:28<25:06:34, 3.67it/s] 11%|█ | 39541/371472 [3:11:29<25:24:47, 3.63it/s] 11%|█ | 39542/371472 [3:11:29<25:48:53, 3.57it/s] 11%|█ | 39543/371472 [3:11:29<25:34:55, 3.60it/s] 11%|█ | 39544/371472 [3:11:30<25:45:14, 3.58it/s] 11%|█ | 39545/371472 [3:11:30<26:06:23, 3.53it/s] 11%|█ | 39546/371472 [3:11:30<28:02:29, 3.29it/s] 11%|█ | 39547/371472 [3:11:30<26:59:41, 3.42it/s] 11%|█ | 39548/371472 [3:11:31<26:18:40, 3.50it/s] 11%|█ | 39549/371472 [3:11:31<25:41:10, 3.59it/s] 11%|█ | 39550/371472 [3:11:31<26:03:25, 3.54it/s] 11%|█ | 39551/371472 [3:11:32<26:13:32, 3.52it/s] 11%|█ | 39552/371472 [3:11:32<25:48:37, 3.57it/s] 11%|█ | 39553/371472 [3:11:32<26:41:24, 3.45it/s] 11%|█ | 39554/371472 [3:11:32<26:50:59, 3.43it/s] 11%|█ | 39555/371472 [3:11:33<26:44:15, 3.45it/s] 11%|█ | 39556/371472 [3:11:33<29:50:27, 3.09it/s] 11%|█ | 39557/371472 [3:11:33<28:35:24, 3.22it/s] 11%|█ | 39558/371472 [3:11:34<27:20:06, 3.37it/s] 11%|█ | 39559/371472 [3:11:34<26:49:27, 3.44it/s] 11%|█ | 39560/371472 [3:11:34<28:28:25, 3.24it/s] {'loss': 4.5352, 'learning_rate': 9.045874722575363e-07, 'epoch': 1.7} + 11%|█ | 39560/371472 [3:11:34<28:28:25, 3.24it/s] 11%|█ | 39561/371472 [3:11:35<27:09:45, 3.39it/s] 11%|█ | 39562/371472 [3:11:35<26:14:49, 3.51it/s] 11%|█ | 39563/371472 [3:11:35<28:25:40, 3.24it/s] 11%|█ | 39564/371472 [3:11:35<27:42:06, 3.33it/s] 11%|█ | 39565/371472 [3:11:36<26:54:01, 3.43it/s] 11%|█ | 39566/371472 [3:11:36<27:06:10, 3.40it/s] 11%|█ | 39567/371472 [3:11:36<26:53:09, 3.43it/s] 11%|█ | 39568/371472 [3:11:37<26:58:50, 3.42it/s] 11%|█ | 39569/371472 [3:11:37<27:12:23, 3.39it/s] 11%|█ | 39570/371472 [3:11:37<26:42:06, 3.45it/s] 11%|█ | 39571/371472 [3:11:37<26:10:21, 3.52it/s] 11%|█ | 39572/371472 [3:11:38<25:31:20, 3.61it/s] 11%|█ | 39573/371472 [3:11:38<24:55:44, 3.70it/s] 11%|█ | 39574/371472 [3:11:38<24:48:03, 3.72it/s] 11%|█ | 39575/371472 [3:11:39<25:13:15, 3.66it/s] 11%|█ | 39576/371472 [3:11:39<25:08:43, 3.67it/s] 11%|█ | 39577/371472 [3:11:39<25:48:34, 3.57it/s] 11%|█ | 39578/371472 [3:11:39<25:37:10, 3.60it/s] 11%|█ | 39579/371472 [3:11:40<25:27:33, 3.62it/s] 11%|█ | 39580/371472 [3:11:40<28:06:07, 3.28it/s] {'loss': 4.5012, 'learning_rate': 9.045389902820573e-07, 'epoch': 1.7} + 11%|█ | 39580/371472 [3:11:40<28:06:07, 3.28it/s] 11%|█ | 39581/371472 [3:11:40<27:28:30, 3.36it/s] 11%|█ | 39582/371472 [3:11:41<26:27:33, 3.48it/s] 11%|█ | 39583/371472 [3:11:41<27:05:20, 3.40it/s] 11%|█ | 39584/371472 [3:11:41<27:03:47, 3.41it/s] 11%|█ | 39585/371472 [3:11:41<26:07:40, 3.53it/s] 11%|█ | 39586/371472 [3:11:42<26:47:28, 3.44it/s] 11%|█ | 39587/371472 [3:11:42<25:54:50, 3.56it/s] 11%|█ | 39588/371472 [3:11:42<25:46:38, 3.58it/s] 11%|█ | 39589/371472 [3:11:43<25:51:45, 3.56it/s] 11%|█ | 39590/371472 [3:11:43<26:30:41, 3.48it/s] 11%|█ | 39591/371472 [3:11:43<26:39:56, 3.46it/s] 11%|█ | 39592/371472 [3:11:43<28:10:03, 3.27it/s] 11%|█ | 39593/371472 [3:11:44<27:02:14, 3.41it/s] 11%|█ | 39594/371472 [3:11:44<26:10:49, 3.52it/s] 11%|█ | 39595/371472 [3:11:44<25:26:06, 3.62it/s] 11%|█ | 39596/371472 [3:11:45<25:27:35, 3.62it/s] 11%|█ | 39597/371472 [3:11:45<28:28:37, 3.24it/s] 11%|█ | 39598/371472 [3:11:45<28:04:00, 3.28it/s] 11%|█ | 39599/371472 [3:11:45<26:54:46, 3.43it/s] 11%|█ | 39600/371472 [3:11:46<26:27:52, 3.48it/s] {'loss': 4.5037, 'learning_rate': 9.044905083065783e-07, 'epoch': 1.71} + 11%|█ | 39600/371472 [3:11:46<26:27:52, 3.48it/s] 11%|█ | 39601/371472 [3:11:46<25:44:41, 3.58it/s] 11%|█ | 39602/371472 [3:11:46<25:22:59, 3.63it/s] 11%|█ | 39603/371472 [3:11:47<25:36:32, 3.60it/s] 11%|█ | 39604/371472 [3:11:47<25:41:22, 3.59it/s] 11%|█ | 39605/371472 [3:11:47<25:12:50, 3.66it/s] 11%|█ | 39606/371472 [3:11:47<25:08:46, 3.67it/s] 11%|█ | 39607/371472 [3:11:48<27:02:26, 3.41it/s] 11%|█ | 39608/371472 [3:11:48<27:23:45, 3.36it/s] 11%|█ | 39609/371472 [3:11:48<27:32:54, 3.35it/s] 11%|█ | 39610/371472 [3:11:49<26:16:41, 3.51it/s] 11%|█ | 39611/371472 [3:11:49<25:30:44, 3.61it/s] 11%|█ | 39612/371472 [3:11:49<27:19:43, 3.37it/s] 11%|█ | 39613/371472 [3:11:49<27:03:03, 3.41it/s] 11%|█ | 39614/371472 [3:11:50<26:35:32, 3.47it/s] 11%|█ | 39615/371472 [3:11:50<28:07:42, 3.28it/s] 11%|█ | 39616/371472 [3:11:50<28:56:19, 3.19it/s] 11%|█ | 39617/371472 [3:11:51<27:34:26, 3.34it/s] 11%|█ | 39618/371472 [3:11:51<26:22:43, 3.49it/s] 11%|█ | 39619/371472 [3:11:51<26:08:32, 3.53it/s] 11%|█ | 39620/371472 [3:11:52<26:54:01, 3.43it/s] {'loss': 4.3658, 'learning_rate': 9.044420263310995e-07, 'epoch': 1.71} + 11%|█ | 39620/371472 [3:11:52<26:54:01, 3.43it/s] 11%|█ | 39621/371472 [3:11:52<26:07:48, 3.53it/s] 11%|█ | 39622/371472 [3:11:52<25:32:19, 3.61it/s] 11%|█ | 39623/371472 [3:11:52<26:18:13, 3.50it/s] 11%|█ | 39624/371472 [3:11:53<25:39:25, 3.59it/s] 11%|█ | 39625/371472 [3:11:53<25:23:06, 3.63it/s] 11%|█ | 39626/371472 [3:11:53<26:25:40, 3.49it/s] 11%|█ | 39627/371472 [3:11:53<25:57:34, 3.55it/s] 11%|█ | 39628/371472 [3:11:54<25:25:16, 3.63it/s] 11%|█ | 39629/371472 [3:11:54<26:22:38, 3.49it/s] 11%|█ | 39630/371472 [3:11:54<25:34:41, 3.60it/s] 11%|█ | 39631/371472 [3:11:55<25:35:01, 3.60it/s] 11%|█ | 39632/371472 [3:11:55<25:00:38, 3.69it/s] 11%|█ | 39633/371472 [3:11:55<24:52:45, 3.70it/s] 11%|█ | 39634/371472 [3:11:55<24:48:13, 3.72it/s] 11%|█ | 39635/371472 [3:11:56<25:09:51, 3.66it/s] 11%|█ | 39636/371472 [3:11:56<26:13:23, 3.52it/s] 11%|█ | 39637/371472 [3:11:56<25:49:02, 3.57it/s] 11%|█ | 39638/371472 [3:11:57<27:08:23, 3.40it/s] 11%|█ | 39639/371472 [3:11:57<26:13:30, 3.51it/s] 11%|█ | 39640/371472 [3:11:57<26:44:31, 3.45it/s] {'loss': 4.3553, 'learning_rate': 9.043935443556205e-07, 'epoch': 1.71} + 11%|█ | 39640/371472 [3:11:57<26:44:31, 3.45it/s] 11%|█ | 39641/371472 [3:11:57<26:04:16, 3.54it/s] 11%|█ | 39642/371472 [3:11:58<26:46:40, 3.44it/s] 11%|█ | 39643/371472 [3:11:58<26:38:25, 3.46it/s] 11%|█ | 39644/371472 [3:11:58<26:13:53, 3.51it/s] 11%|█ | 39645/371472 [3:11:59<25:49:09, 3.57it/s] 11%|█ | 39646/371472 [3:11:59<27:21:46, 3.37it/s] 11%|█ | 39647/371472 [3:11:59<26:22:31, 3.49it/s] 11%|█ | 39648/371472 [3:11:59<25:20:51, 3.64it/s] 11%|█ | 39649/371472 [3:12:00<25:47:19, 3.57it/s] 11%|█ | 39650/371472 [3:12:00<25:09:10, 3.66it/s] 11%|█ | 39651/371472 [3:12:00<24:42:15, 3.73it/s] 11%|█ | 39652/371472 [3:12:00<24:14:50, 3.80it/s] 11%|█ | 39653/371472 [3:12:01<26:06:21, 3.53it/s] 11%|█ | 39654/371472 [3:12:01<25:59:20, 3.55it/s] 11%|█ | 39655/371472 [3:12:01<26:04:09, 3.54it/s] 11%|█ | 39656/371472 [3:12:02<26:23:09, 3.49it/s] 11%|█ | 39657/371472 [3:12:02<25:56:37, 3.55it/s] 11%|█ | 39658/371472 [3:12:02<26:42:47, 3.45it/s] 11%|█ | 39659/371472 [3:12:02<26:22:10, 3.50it/s] 11%|█ | 39660/371472 [3:12:03<25:42:57, 3.58it/s] {'loss': 4.344, 'learning_rate': 9.043450623801418e-07, 'epoch': 1.71} + 11%|█ | 39660/371472 [3:12:03<25:42:57, 3.58it/s] 11%|█ | 39661/371472 [3:12:03<25:05:43, 3.67it/s] 11%|█ | 39662/371472 [3:12:03<25:37:53, 3.60it/s] 11%|█ | 39663/371472 [3:12:04<25:23:39, 3.63it/s] 11%|█ | 39664/371472 [3:12:04<25:19:29, 3.64it/s] 11%|█ | 39665/371472 [3:12:04<25:06:54, 3.67it/s] 11%|█ | 39666/371472 [3:12:04<25:54:48, 3.56it/s] 11%|█ | 39667/371472 [3:12:05<25:27:19, 3.62it/s] 11%|█ | 39668/371472 [3:12:05<25:26:41, 3.62it/s] 11%|█ | 39669/371472 [3:12:05<25:39:29, 3.59it/s] 11%|█ | 39670/371472 [3:12:06<25:35:30, 3.60it/s] 11%|█ | 39671/371472 [3:12:06<26:30:20, 3.48it/s] 11%|█ | 39672/371472 [3:12:06<25:36:23, 3.60it/s] 11%|█ | 39673/371472 [3:12:06<24:44:40, 3.72it/s] 11%|█ | 39674/371472 [3:12:07<24:47:52, 3.72it/s] 11%|█ | 39675/371472 [3:12:07<25:24:18, 3.63it/s] 11%|█ | 39676/371472 [3:12:07<26:57:46, 3.42it/s] 11%|█ | 39677/371472 [3:12:07<26:50:36, 3.43it/s] 11%|█ | 39678/371472 [3:12:08<27:19:15, 3.37it/s] 11%|█ | 39679/371472 [3:12:08<26:39:03, 3.46it/s] 11%|█ | 39680/371472 [3:12:08<28:37:17, 3.22it/s] {'loss': 4.2964, 'learning_rate': 9.042965804046628e-07, 'epoch': 1.71} + 11%|█ | 39680/371472 [3:12:08<28:37:17, 3.22it/s] 11%|█ | 39681/371472 [3:12:09<27:23:02, 3.37it/s] 11%|█ | 39682/371472 [3:12:09<26:28:46, 3.48it/s] 11%|█ | 39683/371472 [3:12:09<26:04:23, 3.53it/s] 11%|█ | 39684/371472 [3:12:10<26:09:40, 3.52it/s] 11%|█ | 39685/371472 [3:12:10<26:25:09, 3.49it/s] 11%|█ | 39686/371472 [3:12:10<25:56:37, 3.55it/s] 11%|█ | 39687/371472 [3:12:10<26:41:29, 3.45it/s] 11%|█ | 39688/371472 [3:12:11<26:42:21, 3.45it/s] 11%|█ | 39689/371472 [3:12:11<25:59:18, 3.55it/s] 11%|█ | 39690/371472 [3:12:11<27:25:51, 3.36it/s] 11%|█ | 39691/371472 [3:12:12<28:30:02, 3.23it/s] 11%|█ | 39692/371472 [3:12:12<26:59:24, 3.41it/s] 11%|█ | 39693/371472 [3:12:12<25:51:49, 3.56it/s] 11%|█ | 39694/371472 [3:12:12<25:26:11, 3.62it/s] 11%|█ | 39695/371472 [3:12:13<24:36:18, 3.75it/s] 11%|█ | 39696/371472 [3:12:13<25:29:45, 3.61it/s] 11%|█ | 39697/371472 [3:12:13<25:59:32, 3.55it/s] 11%|█ | 39698/371472 [3:12:13<25:25:10, 3.63it/s] 11%|█ | 39699/371472 [3:12:14<25:18:24, 3.64it/s] 11%|█ | 39700/371472 [3:12:14<25:29:43, 3.61it/s] {'loss': 4.0981, 'learning_rate': 9.04248098429184e-07, 'epoch': 1.71} + 11%|█ | 39700/371472 [3:12:14<25:29:43, 3.61it/s] 11%|█ | 39701/371472 [3:12:14<27:19:56, 3.37it/s] 11%|█ | 39702/371472 [3:12:15<29:22:32, 3.14it/s] 11%|█ | 39703/371472 [3:12:15<28:03:37, 3.28it/s] 11%|█ | 39704/371472 [3:12:15<26:54:10, 3.43it/s] 11%|█ | 39705/371472 [3:12:16<28:25:43, 3.24it/s] 11%|█ | 39706/371472 [3:12:16<27:07:48, 3.40it/s] 11%|█ | 39707/371472 [3:12:16<27:19:17, 3.37it/s] 11%|█ | 39708/371472 [3:12:17<30:58:56, 2.97it/s] 11%|█ | 39709/371472 [3:12:17<29:08:49, 3.16it/s] 11%|█ | 39710/371472 [3:12:17<28:05:15, 3.28it/s] 11%|█ | 39711/371472 [3:12:17<26:53:47, 3.43it/s] 11%|█ | 39712/371472 [3:12:18<26:22:44, 3.49it/s] 11%|█ | 39713/371472 [3:12:18<27:53:25, 3.30it/s] 11%|█ | 39714/371472 [3:12:18<29:51:21, 3.09it/s] 11%|█ | 39715/371472 [3:12:19<28:53:37, 3.19it/s] 11%|█ | 39716/371472 [3:12:19<27:43:24, 3.32it/s] 11%|█ | 39717/371472 [3:12:19<27:33:12, 3.34it/s] 11%|█ | 39718/371472 [3:12:20<27:07:53, 3.40it/s] 11%|█ | 39719/371472 [3:12:20<26:06:17, 3.53it/s] 11%|█ | 39720/371472 [3:12:20<25:35:45, 3.60it/s] {'loss': 4.2457, 'learning_rate': 9.04199616453705e-07, 'epoch': 1.71} + 11%|█ | 39720/371472 [3:12:20<25:35:45, 3.60it/s] 11%|█ | 39721/371472 [3:12:20<26:56:03, 3.42it/s] 11%|█ | 39722/371472 [3:12:21<27:07:54, 3.40it/s] 11%|█ | 39723/371472 [3:12:21<26:24:06, 3.49it/s] 11%|█ | 39724/371472 [3:12:21<26:08:22, 3.53it/s] 11%|█ | 39725/371472 [3:12:22<26:18:39, 3.50it/s] 11%|█ | 39726/371472 [3:12:22<25:29:48, 3.61it/s] 11%|█ | 39727/371472 [3:12:22<25:20:27, 3.64it/s] 11%|█ | 39728/371472 [3:12:22<26:50:42, 3.43it/s] 11%|█ | 39729/371472 [3:12:23<27:19:00, 3.37it/s] 11%|█ | 39730/371472 [3:12:23<27:30:22, 3.35it/s] 11%|█ | 39731/371472 [3:12:23<27:35:08, 3.34it/s] 11%|█ | 39732/371472 [3:12:24<25:52:29, 3.56it/s] 11%|█ | 39733/371472 [3:12:24<25:50:10, 3.57it/s] 11%|█ | 39734/371472 [3:12:24<25:34:05, 3.60it/s] 11%|█ | 39735/371472 [3:12:24<25:32:26, 3.61it/s] 11%|█ | 39736/371472 [3:12:25<25:25:52, 3.62it/s] 11%|█ | 39737/371472 [3:12:25<25:05:39, 3.67it/s] 11%|█ | 39738/371472 [3:12:25<24:55:45, 3.70it/s] 11%|█ | 39739/371472 [3:12:26<27:12:36, 3.39it/s] 11%|█ | 39740/371472 [3:12:26<26:29:17, 3.48it/s] {'loss': 4.0537, 'learning_rate': 9.041511344782263e-07, 'epoch': 1.71} + 11%|█ | 39740/371472 [3:12:26<26:29:17, 3.48it/s] 11%|█ | 39741/371472 [3:12:26<26:06:20, 3.53it/s] 11%|█ | 39742/371472 [3:12:26<25:49:32, 3.57it/s] 11%|█ | 39743/371472 [3:12:27<25:45:49, 3.58it/s] 11%|█ | 39744/371472 [3:12:27<24:49:19, 3.71it/s] 11%|█ | 39745/371472 [3:12:27<25:27:04, 3.62it/s] 11%|█ | 39746/371472 [3:12:27<25:43:03, 3.58it/s] 11%|█ | 39747/371472 [3:12:28<25:10:01, 3.66it/s] 11%|█ | 39748/371472 [3:12:28<25:25:00, 3.63it/s] 11%|█ | 39749/371472 [3:12:28<25:07:32, 3.67it/s] 11%|█ | 39750/371472 [3:12:29<25:36:42, 3.60it/s] 11%|█ | 39751/371472 [3:12:29<26:54:28, 3.42it/s] 11%|█ | 39752/371472 [3:12:29<26:27:55, 3.48it/s] 11%|█ | 39753/371472 [3:12:29<27:05:59, 3.40it/s] 11%|█ | 39754/371472 [3:12:30<26:21:05, 3.50it/s] 11%|█ | 39755/371472 [3:12:30<27:57:16, 3.30it/s] 11%|█ | 39756/371472 [3:12:30<26:52:07, 3.43it/s] 11%|█ | 39757/371472 [3:12:31<26:19:24, 3.50it/s] 11%|█ | 39758/371472 [3:12:31<25:53:10, 3.56it/s] 11%|█ | 39759/371472 [3:12:31<25:52:50, 3.56it/s] 11%|█ | 39760/371472 [3:12:31<26:01:28, 3.54it/s] {'loss': 4.5998, 'learning_rate': 9.041026525027472e-07, 'epoch': 1.71} + 11%|█ | 39760/371472 [3:12:31<26:01:28, 3.54it/s] 11%|█ | 39761/371472 [3:12:32<25:59:44, 3.54it/s] 11%|█ | 39762/371472 [3:12:32<25:17:11, 3.64it/s] 11%|█ | 39763/371472 [3:12:32<27:24:35, 3.36it/s] 11%|█ | 39764/371472 [3:12:33<27:59:03, 3.29it/s] 11%|█ | 39765/371472 [3:12:33<27:39:57, 3.33it/s] 11%|█ | 39766/371472 [3:12:33<27:40:20, 3.33it/s] 11%|█ | 39767/371472 [3:12:34<26:18:22, 3.50it/s] 11%|█ | 39768/371472 [3:12:34<27:44:24, 3.32it/s] 11%|█ | 39769/371472 [3:12:34<27:14:16, 3.38it/s] 11%|█ | 39770/371472 [3:12:34<26:24:48, 3.49it/s] 11%|█ | 39771/371472 [3:12:35<29:43:47, 3.10it/s] 11%|█ | 39772/371472 [3:12:35<30:43:54, 3.00it/s] 11%|█ | 39773/371472 [3:12:35<30:25:02, 3.03it/s] 11%|█ | 39774/371472 [3:12:36<30:21:43, 3.03it/s] 11%|█ | 39775/371472 [3:12:36<29:20:31, 3.14it/s] 11%|█ | 39776/371472 [3:12:37<31:38:29, 2.91it/s] 11%|█ | 39777/371472 [3:12:37<31:04:03, 2.97it/s] 11%|█ | 39778/371472 [3:12:37<28:50:54, 3.19it/s] 11%|█ | 39779/371472 [3:12:37<28:00:55, 3.29it/s] 11%|█ | 39780/371472 [3:12:38<28:14:30, 3.26it/s] {'loss': 4.5172, 'learning_rate': 9.040541705272684e-07, 'epoch': 1.71} + 11%|█ | 39780/371472 [3:12:38<28:14:30, 3.26it/s] 11%|█ | 39781/371472 [3:12:38<28:57:17, 3.18it/s] 11%|█ | 39782/371472 [3:12:38<28:30:20, 3.23it/s] 11%|█ | 39783/371472 [3:12:39<27:45:52, 3.32it/s] 11%|█ | 39784/371472 [3:12:39<27:33:48, 3.34it/s] 11%|█ | 39785/371472 [3:12:39<27:24:13, 3.36it/s] 11%|█ | 39786/371472 [3:12:39<27:26:14, 3.36it/s] 11%|█ | 39787/371472 [3:12:40<28:26:20, 3.24it/s] 11%|█ | 39788/371472 [3:12:40<26:48:59, 3.44it/s] 11%|█ | 39789/371472 [3:12:40<26:14:20, 3.51it/s] 11%|█ | 39790/371472 [3:12:41<25:55:46, 3.55it/s] 11%|█ | 39791/371472 [3:12:41<25:40:09, 3.59it/s] 11%|█ | 39792/371472 [3:12:41<27:35:54, 3.34it/s] 11%|█ | 39793/371472 [3:12:42<26:59:32, 3.41it/s] 11%|█ | 39794/371472 [3:12:42<26:53:56, 3.43it/s] 11%|█ | 39795/371472 [3:12:42<26:03:09, 3.54it/s] 11%|█ | 39796/371472 [3:12:42<25:26:46, 3.62it/s] 11%|█ | 39797/371472 [3:12:43<25:16:29, 3.65it/s] 11%|█ | 39798/371472 [3:12:43<26:27:55, 3.48it/s] 11%|█ | 39799/371472 [3:12:43<25:28:36, 3.62it/s] 11%|█ | 39800/371472 [3:12:43<25:35:06, 3.60it/s] {'loss': 4.3175, 'learning_rate': 9.040056885517895e-07, 'epoch': 1.71} + 11%|█ | 39800/371472 [3:12:43<25:35:06, 3.60it/s] 11%|█ | 39801/371472 [3:12:44<25:05:34, 3.67it/s] 11%|█ | 39802/371472 [3:12:44<26:07:21, 3.53it/s] 11%|█ | 39803/371472 [3:12:44<25:40:44, 3.59it/s] 11%|█ | 39804/371472 [3:12:45<26:53:42, 3.43it/s] 11%|█ | 39805/371472 [3:12:45<26:39:00, 3.46it/s] 11%|█ | 39806/371472 [3:12:45<25:59:52, 3.54it/s] 11%|█ | 39807/371472 [3:12:45<27:30:40, 3.35it/s] 11%|█ | 39808/371472 [3:12:46<27:21:27, 3.37it/s] 11%|█ | 39809/371472 [3:12:46<27:12:13, 3.39it/s] 11%|█ | 39810/371472 [3:12:46<26:14:38, 3.51it/s] 11%|█ | 39811/371472 [3:12:47<26:37:23, 3.46it/s] 11%|█ | 39812/371472 [3:12:47<27:44:24, 3.32it/s] 11%|█ | 39813/371472 [3:12:47<26:35:22, 3.46it/s] 11%|█ | 39814/371472 [3:12:48<27:02:13, 3.41it/s] 11%|█ | 39815/371472 [3:12:48<28:44:45, 3.20it/s] 11%|█ | 39816/371472 [3:12:48<27:25:13, 3.36it/s] 11%|█ | 39817/371472 [3:12:48<26:41:59, 3.45it/s] 11%|█ | 39818/371472 [3:12:49<26:02:46, 3.54it/s] 11%|█ | 39819/371472 [3:12:49<26:21:50, 3.49it/s] 11%|█ | 39820/371472 [3:12:49<25:42:08, 3.58it/s] {'loss': 4.5334, 'learning_rate': 9.039572065763107e-07, 'epoch': 1.72} + 11%|█ | 39820/371472 [3:12:49<25:42:08, 3.58it/s] 11%|█ | 39821/371472 [3:12:50<26:11:59, 3.52it/s] 11%|█ | 39822/371472 [3:12:50<26:10:28, 3.52it/s] 11%|█ | 39823/371472 [3:12:50<26:12:20, 3.52it/s] 11%|█ | 39824/371472 [3:12:50<25:41:48, 3.59it/s] 11%|█ | 39825/371472 [3:12:51<26:00:42, 3.54it/s] 11%|█ | 39826/371472 [3:12:51<26:38:51, 3.46it/s] 11%|█ | 39827/371472 [3:12:51<25:46:15, 3.57it/s] 11%|█ | 39828/371472 [3:12:51<25:23:12, 3.63it/s] 11%|█ | 39829/371472 [3:12:52<26:09:38, 3.52it/s] 11%|█ | 39830/371472 [3:12:52<25:51:48, 3.56it/s] 11%|█ | 39831/371472 [3:12:52<27:28:57, 3.35it/s] 11%|█ | 39832/371472 [3:12:53<27:15:35, 3.38it/s] 11%|█ | 39833/371472 [3:12:53<26:25:56, 3.49it/s] 11%|█ | 39834/371472 [3:12:53<27:07:27, 3.40it/s] 11%|█ | 39835/371472 [3:12:54<26:19:47, 3.50it/s] 11%|█ | 39836/371472 [3:12:54<25:32:24, 3.61it/s] 11%|█ | 39837/371472 [3:12:54<24:35:48, 3.75it/s] 11%|█ | 39838/371472 [3:12:54<24:17:09, 3.79it/s] 11%|█ | 39839/371472 [3:12:55<25:21:38, 3.63it/s] 11%|█ | 39840/371472 [3:12:55<25:55:15, 3.55it/s] {'loss': 4.4264, 'learning_rate': 9.039087246008316e-07, 'epoch': 1.72} + 11%|█ | 39840/371472 [3:12:55<25:55:15, 3.55it/s] 11%|█ | 39841/371472 [3:12:55<26:16:50, 3.51it/s] 11%|█ | 39842/371472 [3:12:55<26:50:05, 3.43it/s] 11%|█ | 39843/371472 [3:12:56<26:06:49, 3.53it/s] 11%|█ | 39844/371472 [3:12:56<26:24:41, 3.49it/s] 11%|█ | 39845/371472 [3:12:56<27:24:25, 3.36it/s] 11%|█ | 39846/371472 [3:12:57<27:24:19, 3.36it/s] 11%|█ | 39847/371472 [3:12:57<26:45:25, 3.44it/s] 11%|█ | 39848/371472 [3:12:57<27:15:28, 3.38it/s] 11%|█ | 39849/371472 [3:12:58<26:57:02, 3.42it/s] 11%|█ | 39850/371472 [3:12:58<25:43:57, 3.58it/s] 11%|█ | 39851/371472 [3:12:58<25:03:50, 3.68it/s] 11%|█ | 39852/371472 [3:12:58<24:58:05, 3.69it/s] 11%|█ | 39853/371472 [3:12:59<25:28:16, 3.62it/s] 11%|█ | 39854/371472 [3:12:59<29:45:46, 3.10it/s] 11%|█ | 39855/371472 [3:12:59<27:58:46, 3.29it/s] 11%|█ | 39856/371472 [3:13:00<29:06:05, 3.17it/s] 11%|█ | 39857/371472 [3:13:00<29:16:18, 3.15it/s] 11%|█ | 39858/371472 [3:13:00<27:44:41, 3.32it/s] 11%|█ | 39859/371472 [3:13:00<26:47:30, 3.44it/s] 11%|█ | 39860/371472 [3:13:01<26:18:52, 3.50it/s] {'loss': 4.4285, 'learning_rate': 9.038602426253528e-07, 'epoch': 1.72} + 11%|█ | 39860/371472 [3:13:01<26:18:52, 3.50it/s] 11%|█ | 39861/371472 [3:13:01<25:34:28, 3.60it/s] 11%|█ | 39862/371472 [3:13:01<25:34:02, 3.60it/s] 11%|█ | 39863/371472 [3:13:02<24:54:39, 3.70it/s] 11%|█ | 39864/371472 [3:13:02<24:36:42, 3.74it/s] 11%|█ | 39865/371472 [3:13:02<25:25:21, 3.62it/s] 11%|█ | 39866/371472 [3:13:02<24:59:35, 3.69it/s] 11%|█ | 39867/371472 [3:13:03<24:59:34, 3.69it/s] 11%|█ | 39868/371472 [3:13:03<27:35:44, 3.34it/s] 11%|█ | 39869/371472 [3:13:03<27:23:07, 3.36it/s] 11%|█ | 39870/371472 [3:13:04<27:55:49, 3.30it/s] 11%|█ | 39871/371472 [3:13:04<27:56:02, 3.30it/s] 11%|█ | 39872/371472 [3:13:04<27:31:51, 3.35it/s] 11%|█ | 39873/371472 [3:13:04<26:29:08, 3.48it/s] 11%|█ | 39874/371472 [3:13:05<26:33:42, 3.47it/s] 11%|█ | 39875/371472 [3:13:05<26:28:04, 3.48it/s] 11%|█ | 39876/371472 [3:13:05<26:11:08, 3.52it/s] 11%|█ | 39877/371472 [3:13:06<25:18:56, 3.64it/s] 11%|█ | 39878/371472 [3:13:06<25:36:36, 3.60it/s] 11%|█ | 39879/371472 [3:13:06<31:40:29, 2.91it/s] 11%|█ | 39880/371472 [3:13:07<30:53:02, 2.98it/s] {'loss': 4.2603, 'learning_rate': 9.038117606498739e-07, 'epoch': 1.72} + 11%|█ | 39880/371472 [3:13:07<30:53:02, 2.98it/s] 11%|█ | 39881/371472 [3:13:07<29:53:48, 3.08it/s] 11%|█ | 39882/371472 [3:13:07<29:31:22, 3.12it/s] 11%|█ | 39883/371472 [3:13:08<28:32:00, 3.23it/s] 11%|█ | 39884/371472 [3:13:08<27:44:30, 3.32it/s] 11%|█ | 39885/371472 [3:13:08<27:04:35, 3.40it/s] 11%|█ | 39886/371472 [3:13:08<27:37:32, 3.33it/s] 11%|█ | 39887/371472 [3:13:09<27:41:35, 3.33it/s] 11%|█ | 39888/371472 [3:13:09<27:30:27, 3.35it/s] 11%|█ | 39889/371472 [3:13:09<26:48:58, 3.43it/s] 11%|█ | 39890/371472 [3:13:10<26:45:50, 3.44it/s] 11%|█ | 39891/371472 [3:13:10<27:23:52, 3.36it/s] 11%|█ | 39892/371472 [3:13:10<27:06:46, 3.40it/s] 11%|█ | 39893/371472 [3:13:10<26:19:49, 3.50it/s] 11%|█ | 39894/371472 [3:13:11<26:03:41, 3.53it/s] 11%|█ | 39895/371472 [3:13:11<26:58:48, 3.41it/s] 11%|█ | 39896/371472 [3:13:11<27:09:55, 3.39it/s] 11%|█ | 39897/371472 [3:13:12<26:03:03, 3.54it/s] 11%|█ | 39898/371472 [3:13:12<27:17:22, 3.38it/s] 11%|█ | 39899/371472 [3:13:12<26:15:43, 3.51it/s] 11%|█ | 39900/371472 [3:13:12<25:29:33, 3.61it/s] {'loss': 4.4473, 'learning_rate': 9.037632786743951e-07, 'epoch': 1.72} + 11%|█ | 39900/371472 [3:13:12<25:29:33, 3.61it/s] 11%|█ | 39901/371472 [3:13:13<26:36:28, 3.46it/s] 11%|█ | 39902/371472 [3:13:13<26:15:51, 3.51it/s] 11%|█ | 39903/371472 [3:13:13<27:05:00, 3.40it/s] 11%|█ | 39904/371472 [3:13:14<26:10:07, 3.52it/s] 11%|█ | 39905/371472 [3:13:14<26:17:36, 3.50it/s] 11%|█ | 39906/371472 [3:13:14<26:52:50, 3.43it/s] 11%|█ | 39907/371472 [3:13:15<29:19:00, 3.14it/s] 11%|█ | 39908/371472 [3:13:15<27:52:32, 3.30it/s] 11%|█ | 39909/371472 [3:13:15<29:01:54, 3.17it/s] 11%|█ | 39910/371472 [3:13:15<28:36:57, 3.22it/s] 11%|█ | 39911/371472 [3:13:16<27:13:12, 3.38it/s] 11%|█ | 39912/371472 [3:13:16<26:14:29, 3.51it/s] 11%|█ | 39913/371472 [3:13:16<26:07:38, 3.53it/s] 11%|█ | 39914/371472 [3:13:17<28:17:34, 3.26it/s] 11%|█ | 39915/371472 [3:13:17<30:08:34, 3.06it/s] 11%|█ | 39916/371472 [3:13:17<29:47:11, 3.09it/s] 11%|█ | 39917/371472 [3:13:18<28:35:03, 3.22it/s] 11%|█ | 39918/371472 [3:13:18<28:10:49, 3.27it/s] 11%|█ | 39919/371472 [3:13:18<27:43:12, 3.32it/s] 11%|█ | 39920/371472 [3:13:18<26:32:04, 3.47it/s] {'loss': 4.3832, 'learning_rate': 9.037147966989161e-07, 'epoch': 1.72} + 11%|█ | 39920/371472 [3:13:18<26:32:04, 3.47it/s] 11%|█ | 39921/371472 [3:13:19<25:38:09, 3.59it/s] 11%|█ | 39922/371472 [3:13:19<24:50:51, 3.71it/s] 11%|█ | 39923/371472 [3:13:19<24:32:24, 3.75it/s] 11%|█ | 39924/371472 [3:13:20<24:46:37, 3.72it/s] 11%|█ | 39925/371472 [3:13:20<24:10:14, 3.81it/s] 11%|█ | 39926/371472 [3:13:20<24:00:34, 3.84it/s] 11%|█ | 39927/371472 [3:13:20<24:28:01, 3.76it/s] 11%|█ | 39928/371472 [3:13:21<25:27:39, 3.62it/s] 11%|█ | 39929/371472 [3:13:21<25:46:02, 3.57it/s] 11%|█ | 39930/371472 [3:13:21<25:41:18, 3.59it/s] 11%|█ | 39931/371472 [3:13:21<26:17:49, 3.50it/s] 11%|█ | 39932/371472 [3:13:22<26:07:19, 3.53it/s] 11%|█ | 39933/371472 [3:13:22<25:32:34, 3.61it/s] 11%|█ | 39934/371472 [3:13:22<25:40:39, 3.59it/s] 11%|█ | 39935/371472 [3:13:23<25:53:45, 3.56it/s] 11%|█ | 39936/371472 [3:13:23<26:53:49, 3.42it/s] 11%|█ | 39937/371472 [3:13:23<26:49:02, 3.43it/s] 11%|█ | 39938/371472 [3:13:23<25:42:51, 3.58it/s] 11%|█ | 39939/371472 [3:13:24<26:16:06, 3.51it/s] 11%|█ | 39940/371472 [3:13:24<26:04:26, 3.53it/s] {'loss': 4.2055, 'learning_rate': 9.036663147234373e-07, 'epoch': 1.72} + 11%|█ | 39940/371472 [3:13:24<26:04:26, 3.53it/s] 11%|█ | 39941/371472 [3:13:24<26:58:55, 3.41it/s] 11%|█ | 39942/371472 [3:13:25<26:52:59, 3.43it/s] 11%|█ | 39943/371472 [3:13:25<26:43:22, 3.45it/s] 11%|█ | 39944/371472 [3:13:25<26:51:34, 3.43it/s] 11%|█ | 39945/371472 [3:13:25<27:09:48, 3.39it/s] 11%|█ | 39946/371472 [3:13:26<27:11:16, 3.39it/s] 11%|█ | 39947/371472 [3:13:26<26:05:37, 3.53it/s] 11%|█ | 39948/371472 [3:13:26<26:43:44, 3.45it/s] 11%|█ | 39949/371472 [3:13:27<26:31:23, 3.47it/s] 11%|█ | 39950/371472 [3:13:27<28:41:15, 3.21it/s] 11%|█ | 39951/371472 [3:13:27<29:10:40, 3.16it/s] 11%|█ | 39952/371472 [3:13:28<27:25:21, 3.36it/s] 11%|█ | 39953/371472 [3:13:28<26:52:31, 3.43it/s] 11%|█ | 39954/371472 [3:13:28<27:34:15, 3.34it/s] 11%|█ | 39955/371472 [3:13:28<27:14:59, 3.38it/s] 11%|█ | 39956/371472 [3:13:29<26:19:42, 3.50it/s] 11%|█ | 39957/371472 [3:13:29<27:04:41, 3.40it/s] 11%|█ | 39958/371472 [3:13:29<26:05:11, 3.53it/s] 11%|█ | 39959/371472 [3:13:30<26:28:50, 3.48it/s] 11%|█ | 39960/371472 [3:13:30<25:28:51, 3.61it/s] {'loss': 4.4615, 'learning_rate': 9.036178327479583e-07, 'epoch': 1.72} + 11%|█ | 39960/371472 [3:13:30<25:28:51, 3.61it/s] 11%|█ | 39961/371472 [3:13:30<26:19:36, 3.50it/s] 11%|█ | 39962/371472 [3:13:30<27:08:33, 3.39it/s] 11%|█ | 39963/371472 [3:13:31<26:59:02, 3.41it/s] 11%|█ | 39964/371472 [3:13:31<27:02:41, 3.40it/s] 11%|█ | 39965/371472 [3:13:31<26:23:43, 3.49it/s] 11%|█ | 39966/371472 [3:13:32<26:00:28, 3.54it/s] 11%|█ | 39967/371472 [3:13:32<25:55:12, 3.55it/s] 11%|█ | 39968/371472 [3:13:32<27:12:18, 3.38it/s] 11%|█ | 39969/371472 [3:13:32<25:37:58, 3.59it/s] 11%|█ | 39970/371472 [3:13:33<26:15:55, 3.51it/s] 11%|█ | 39971/371472 [3:13:33<25:41:55, 3.58it/s] 11%|█ | 39972/371472 [3:13:33<26:38:20, 3.46it/s] 11%|█ | 39973/371472 [3:13:34<27:19:30, 3.37it/s] 11%|█ | 39974/371472 [3:13:34<26:34:57, 3.46it/s] 11%|█ | 39975/371472 [3:13:34<28:04:45, 3.28it/s] 11%|█ | 39976/371472 [3:13:35<27:11:28, 3.39it/s] 11%|█ | 39977/371472 [3:13:35<26:51:49, 3.43it/s] 11%|█ | 39978/371472 [3:13:35<26:51:26, 3.43it/s] 11%|█ | 39979/371472 [3:13:35<26:08:07, 3.52it/s] 11%|█ | 39980/371472 [3:13:36<25:27:31, 3.62it/s] {'loss': 4.5087, 'learning_rate': 9.035693507724794e-07, 'epoch': 1.72} + 11%|█ | 39980/371472 [3:13:36<25:27:31, 3.62it/s] 11%|█ | 39981/371472 [3:13:36<26:50:32, 3.43it/s] 11%|█ | 39982/371472 [3:13:36<28:19:12, 3.25it/s] 11%|█ | 39983/371472 [3:13:37<28:21:51, 3.25it/s] 11%|█ | 39984/371472 [3:13:37<27:57:23, 3.29it/s] 11%|█ | 39985/371472 [3:13:37<27:18:56, 3.37it/s] 11%|█ | 39986/371472 [3:13:37<26:13:47, 3.51it/s] 11%|█ | 39987/371472 [3:13:38<25:48:06, 3.57it/s] 11%|█ | 39988/371472 [3:13:38<25:27:14, 3.62it/s] 11%|█ | 39989/371472 [3:13:38<27:58:07, 3.29it/s] 11%|█ | 39990/371472 [3:13:39<28:15:15, 3.26it/s] 11%|█ | 39991/371472 [3:13:39<28:31:03, 3.23it/s] 11%|█ | 39992/371472 [3:13:39<27:32:48, 3.34it/s] 11%|█ | 39993/371472 [3:13:40<27:19:54, 3.37it/s] 11%|█ | 39994/371472 [3:13:40<26:44:12, 3.44it/s] 11%|█ | 39995/371472 [3:13:40<29:18:53, 3.14it/s] 11%|█ | 39996/371472 [3:13:40<28:22:13, 3.25it/s] 11%|█ | 39997/371472 [3:13:41<27:04:57, 3.40it/s] 11%|█ | 39998/371472 [3:13:41<26:17:33, 3.50it/s] 11%|█ | 39999/371472 [3:13:41<25:31:13, 3.61it/s] 11%|█ | 40000/371472 [3:13:42<25:34:35, 3.60it/s] {'loss': 4.2625, 'learning_rate': 9.035208687970005e-07, 'epoch': 1.72} + 11%|█ | 40000/371472 [3:13:42<25:34:35, 3.60it/s] 11%|█ | 40001/371472 [3:13:42<27:19:16, 3.37it/s] 11%|█ | 40002/371472 [3:13:42<27:02:36, 3.40it/s] 11%|█ | 40003/371472 [3:13:42<26:50:38, 3.43it/s] 11%|█ | 40004/371472 [3:13:43<27:31:37, 3.34it/s] 11%|█ | 40005/371472 [3:13:43<26:28:48, 3.48it/s] 11%|█ | 40006/371472 [3:13:43<26:26:21, 3.48it/s] 11%|█ | 40007/371472 [3:13:44<26:06:02, 3.53it/s] 11%|█ | 40008/371472 [3:13:44<25:51:10, 3.56it/s] 11%|█ | 40009/371472 [3:13:44<26:16:59, 3.50it/s] 11%|█ | 40010/371472 [3:13:44<27:04:48, 3.40it/s] 11%|█ | 40011/371472 [3:13:45<27:13:45, 3.38it/s] 11%|█ | 40012/371472 [3:13:45<26:11:01, 3.52it/s] 11%|█ | 40013/371472 [3:13:45<26:25:41, 3.48it/s] 11%|█ | 40014/371472 [3:13:46<25:44:03, 3.58it/s] 11%|█ | 40015/371472 [3:13:46<25:47:21, 3.57it/s] 11%|█ | 40016/371472 [3:13:46<26:11:43, 3.51it/s] 11%|█ | 40017/371472 [3:13:46<26:13:55, 3.51it/s] 11%|█ | 40018/371472 [3:13:47<26:15:10, 3.51it/s] 11%|█ | 40019/371472 [3:13:47<26:06:43, 3.53it/s] 11%|█ | 40020/371472 [3:13:47<25:32:11, 3.61it/s] {'loss': 4.1945, 'learning_rate': 9.034723868215217e-07, 'epoch': 1.72} + 11%|█ | 40020/371472 [3:13:47<25:32:11, 3.61it/s] 11%|█ | 40021/371472 [3:13:48<25:15:22, 3.65it/s] 11%|█ | 40022/371472 [3:13:48<25:46:50, 3.57it/s] 11%|█ | 40023/371472 [3:13:48<25:13:04, 3.65it/s] 11%|█ | 40024/371472 [3:13:48<27:52:57, 3.30it/s] 11%|█ | 40025/371472 [3:13:49<27:11:48, 3.39it/s] 11%|█ | 40026/371472 [3:13:49<28:10:36, 3.27it/s] 11%|█ | 40027/371472 [3:13:49<26:53:02, 3.42it/s] 11%|█ | 40028/371472 [3:13:50<26:33:43, 3.47it/s] 11%|█ | 40029/371472 [3:13:50<25:43:28, 3.58it/s] 11%|█ | 40030/371472 [3:13:50<25:02:33, 3.68it/s] 11%|█ | 40031/371472 [3:13:50<26:13:23, 3.51it/s] 11%|█ | 40032/371472 [3:13:51<27:21:22, 3.37it/s] 11%|█ | 40033/371472 [3:13:51<27:03:41, 3.40it/s] 11%|█ | 40034/371472 [3:13:51<27:08:25, 3.39it/s] 11%|█ | 40035/371472 [3:13:52<26:23:41, 3.49it/s] 11%|█ | 40036/371472 [3:13:52<25:41:46, 3.58it/s] 11%|█ | 40037/371472 [3:13:52<25:10:23, 3.66it/s] 11%|█ | 40038/371472 [3:13:52<24:37:57, 3.74it/s] 11%|█ | 40039/371472 [3:13:53<26:37:36, 3.46it/s] 11%|█ | 40040/371472 [3:13:53<26:22:22, 3.49it/s] {'loss': 4.6889, 'learning_rate': 9.034239048460428e-07, 'epoch': 1.72} + 11%|█ | 40040/371472 [3:13:53<26:22:22, 3.49it/s] 11%|█ | 40041/371472 [3:13:53<27:11:26, 3.39it/s] 11%|█ | 40042/371472 [3:13:54<26:33:59, 3.47it/s] 11%|█ | 40043/371472 [3:13:54<27:57:21, 3.29it/s] 11%|█ | 40044/371472 [3:13:54<28:52:54, 3.19it/s] 11%|█ | 40045/371472 [3:13:55<27:09:39, 3.39it/s] 11%|█ | 40046/371472 [3:13:55<26:08:11, 3.52it/s] 11%|█ | 40047/371472 [3:13:55<27:41:16, 3.33it/s] 11%|█ | 40048/371472 [3:13:55<28:10:56, 3.27it/s] 11%|█ | 40049/371472 [3:13:56<26:52:26, 3.43it/s] 11%|█ | 40050/371472 [3:13:56<26:58:13, 3.41it/s] 11%|█ | 40051/371472 [3:13:56<28:00:41, 3.29it/s] 11%|█ | 40052/371472 [3:13:57<27:05:52, 3.40it/s] 11%|█ | 40053/371472 [3:13:57<26:14:08, 3.51it/s] 11%|█ | 40054/371472 [3:13:57<26:46:20, 3.44it/s] 11%|█ | 40055/371472 [3:13:57<26:21:57, 3.49it/s] 11%|█ | 40056/371472 [3:13:58<26:37:39, 3.46it/s] 11%|█ | 40057/371472 [3:13:58<26:04:59, 3.53it/s] 11%|█ | 40058/371472 [3:13:58<27:36:59, 3.33it/s] 11%|█ | 40059/371472 [3:13:59<27:09:26, 3.39it/s] 11%|█ | 40060/371472 [3:13:59<26:28:49, 3.48it/s] {'loss': 4.3781, 'learning_rate': 9.033754228705638e-07, 'epoch': 1.73} + 11%|█ | 40060/371472 [3:13:59<26:28:49, 3.48it/s] 11%|█ | 40061/371472 [3:13:59<26:29:15, 3.48it/s] 11%|█ | 40062/371472 [3:13:59<26:29:47, 3.47it/s] 11%|█ | 40063/371472 [3:14:00<28:04:59, 3.28it/s] 11%|█ | 40064/371472 [3:14:00<27:24:27, 3.36it/s] 11%|█ | 40065/371472 [3:14:00<28:22:50, 3.24it/s] 11%|█ | 40066/371472 [3:14:01<26:37:03, 3.46it/s] 11%|█ | 40067/371472 [3:14:01<27:13:57, 3.38it/s] 11%|█ | 40068/371472 [3:14:01<28:09:33, 3.27it/s] 11%|█ | 40069/371472 [3:14:02<28:07:02, 3.27it/s] 11%|█ | 40070/371472 [3:14:02<28:03:58, 3.28it/s] 11%|█ | 40071/371472 [3:14:02<27:32:24, 3.34it/s] 11%|█ | 40072/371472 [3:14:03<26:59:10, 3.41it/s] 11%|█ | 40073/371472 [3:14:03<28:20:02, 3.25it/s] 11%|█ | 40074/371472 [3:14:03<28:04:28, 3.28it/s] 11%|█ | 40075/371472 [3:14:03<27:48:26, 3.31it/s] 11%|█ | 40076/371472 [3:14:04<26:54:30, 3.42it/s] 11%|█ | 40077/371472 [3:14:04<26:01:27, 3.54it/s] 11%|█ | 40078/371472 [3:14:04<27:05:56, 3.40it/s] 11%|█ | 40079/371472 [3:14:05<26:50:24, 3.43it/s] 11%|█ | 40080/371472 [3:14:05<27:05:40, 3.40it/s] {'loss': 4.4345, 'learning_rate': 9.03326940895085e-07, 'epoch': 1.73} + 11%|█ | 40080/371472 [3:14:05<27:05:40, 3.40it/s] 11%|█ | 40081/371472 [3:14:05<26:16:10, 3.50it/s] 11%|█ | 40082/371472 [3:14:05<25:47:56, 3.57it/s] 11%|█ | 40083/371472 [3:14:06<26:22:29, 3.49it/s] 11%|█ | 40084/371472 [3:14:06<25:32:44, 3.60it/s] 11%|█ | 40085/371472 [3:14:06<26:18:39, 3.50it/s] 11%|█ | 40086/371472 [3:14:07<26:33:50, 3.47it/s] 11%|█ | 40087/371472 [3:14:07<27:43:02, 3.32it/s] 11%|█ | 40088/371472 [3:14:07<26:34:51, 3.46it/s] 11%|█ | 40089/371472 [3:14:07<25:35:56, 3.60it/s] 11%|█ | 40090/371472 [3:14:08<27:28:34, 3.35it/s] 11%|█ | 40091/371472 [3:14:08<27:32:49, 3.34it/s] 11%|█ | 40092/371472 [3:14:08<26:10:24, 3.52it/s] 11%|█ | 40093/371472 [3:14:09<25:58:08, 3.54it/s] 11%|█ | 40094/371472 [3:14:09<25:18:43, 3.64it/s] 11%|█ | 40095/371472 [3:14:09<25:20:41, 3.63it/s] 11%|█ | 40096/371472 [3:14:09<24:45:30, 3.72it/s] 11%|█ | 40097/371472 [3:14:10<25:04:43, 3.67it/s] 11%|█ | 40098/371472 [3:14:10<25:54:33, 3.55it/s] 11%|█ | 40099/371472 [3:14:10<29:36:13, 3.11it/s] 11%|█ | 40100/371472 [3:14:11<28:39:31, 3.21it/s] {'loss': 4.315, 'learning_rate': 9.032784589196061e-07, 'epoch': 1.73} + 11%|█ | 40100/371472 [3:14:11<28:39:31, 3.21it/s] 11%|█ | 40101/371472 [3:14:11<27:42:20, 3.32it/s] 11%|█ | 40102/371472 [3:14:11<26:56:15, 3.42it/s] 11%|█ | 40103/371472 [3:14:12<27:43:07, 3.32it/s] 11%|█ | 40104/371472 [3:14:12<26:18:07, 3.50it/s] 11%|█ | 40105/371472 [3:14:12<25:23:55, 3.62it/s] 11%|█ | 40106/371472 [3:14:12<25:19:56, 3.63it/s] 11%|█ | 40107/371472 [3:14:13<25:49:27, 3.56it/s] 11%|█ | 40108/371472 [3:14:13<25:32:46, 3.60it/s] 11%|█ | 40109/371472 [3:14:13<26:16:16, 3.50it/s] 11%|█ | 40110/371472 [3:14:14<27:26:46, 3.35it/s] 11%|█ | 40111/371472 [3:14:14<27:32:49, 3.34it/s] 11%|█ | 40112/371472 [3:14:14<27:27:47, 3.35it/s] 11%|█ | 40113/371472 [3:14:14<27:47:42, 3.31it/s] 11%|█ | 40114/371472 [3:14:15<27:07:37, 3.39it/s] 11%|█ | 40115/371472 [3:14:15<25:37:18, 3.59it/s] 11%|█ | 40116/371472 [3:14:15<26:00:53, 3.54it/s] 11%|█ | 40117/371472 [3:14:16<26:04:37, 3.53it/s] 11%|█ | 40118/371472 [3:14:16<26:57:41, 3.41it/s] 11%|█ | 40119/371472 [3:14:16<26:47:06, 3.44it/s] 11%|█ | 40120/371472 [3:14:16<26:41:32, 3.45it/s] {'loss': 4.5759, 'learning_rate': 9.032299769441272e-07, 'epoch': 1.73} + 11%|█ | 40120/371472 [3:14:16<26:41:32, 3.45it/s] 11%|█ | 40121/371472 [3:14:17<28:09:45, 3.27it/s] 11%|█ | 40122/371472 [3:14:17<26:28:14, 3.48it/s] 11%|█ | 40123/371472 [3:14:17<29:19:13, 3.14it/s] 11%|█ | 40124/371472 [3:14:18<27:44:30, 3.32it/s] 11%|█ | 40125/371472 [3:14:18<27:59:34, 3.29it/s] 11%|█ | 40126/371472 [3:14:18<26:43:38, 3.44it/s] 11%|█ | 40127/371472 [3:14:18<25:53:58, 3.55it/s] 11%|█ | 40128/371472 [3:14:19<25:16:16, 3.64it/s] 11%|█ | 40129/371472 [3:14:19<25:08:41, 3.66it/s] 11%|█ | 40130/371472 [3:14:19<26:09:55, 3.52it/s] 11%|█ | 40131/371472 [3:14:20<25:34:28, 3.60it/s] 11%|█ | 40132/371472 [3:14:20<25:46:32, 3.57it/s] 11%|█ | 40133/371472 [3:14:20<25:21:21, 3.63it/s] 11%|█ | 40134/371472 [3:14:20<25:36:11, 3.59it/s] 11%|█ | 40135/371472 [3:14:21<25:50:29, 3.56it/s] 11%|█ | 40136/371472 [3:14:21<26:00:43, 3.54it/s] 11%|█ | 40137/371472 [3:14:21<26:50:03, 3.43it/s] 11%|█ | 40138/371472 [3:14:22<26:29:26, 3.47it/s] 11%|█ | 40139/371472 [3:14:22<25:37:56, 3.59it/s] 11%|█ | 40140/371472 [3:14:22<26:50:17, 3.43it/s] {'loss': 4.4615, 'learning_rate': 9.031814949686482e-07, 'epoch': 1.73} + 11%|█ | 40140/371472 [3:14:22<26:50:17, 3.43it/s] 11%|█ | 40141/371472 [3:14:22<27:33:17, 3.34it/s] 11%|█ | 40142/371472 [3:14:23<27:07:54, 3.39it/s] 11%|█ | 40143/371472 [3:14:23<26:06:44, 3.52it/s] 11%|█ | 40144/371472 [3:14:23<27:17:53, 3.37it/s] 11%|█ | 40145/371472 [3:14:24<29:38:59, 3.10it/s] 11%|█ | 40146/371472 [3:14:24<28:41:33, 3.21it/s] 11%|█ | 40147/371472 [3:14:24<28:24:45, 3.24it/s] 11%|█ | 40148/371472 [3:14:25<27:49:35, 3.31it/s] 11%|█ | 40149/371472 [3:14:25<28:35:07, 3.22it/s] 11%|█ | 40150/371472 [3:14:25<27:30:54, 3.34it/s] 11%|█ | 40151/371472 [3:14:26<31:35:23, 2.91it/s] 11%|█ | 40152/371472 [3:14:26<30:07:38, 3.05it/s] 11%|█ | 40153/371472 [3:14:26<28:53:13, 3.19it/s] 11%|█ | 40154/371472 [3:14:26<27:41:16, 3.32it/s] 11%|█ | 40155/371472 [3:14:27<27:08:51, 3.39it/s] 11%|█ | 40156/371472 [3:14:27<26:48:01, 3.43it/s] 11%|█ | 40157/371472 [3:14:27<27:21:38, 3.36it/s] 11%|█ | 40158/371472 [3:14:28<27:32:06, 3.34it/s] 11%|█ | 40159/371472 [3:14:28<27:02:05, 3.40it/s] 11%|█ | 40160/371472 [3:14:28<26:02:39, 3.53it/s] {'loss': 4.6207, 'learning_rate': 9.031330129931693e-07, 'epoch': 1.73} + 11%|█ | 40160/371472 [3:14:28<26:02:39, 3.53it/s] 11%|█ | 40161/371472 [3:14:29<26:37:53, 3.46it/s] 11%|█ | 40162/371472 [3:14:29<26:13:10, 3.51it/s] 11%|█ | 40163/371472 [3:14:29<25:58:29, 3.54it/s] 11%|█ | 40164/371472 [3:14:29<26:17:09, 3.50it/s] 11%|█ | 40165/371472 [3:14:30<26:39:32, 3.45it/s] 11%|█ | 40166/371472 [3:14:30<28:20:43, 3.25it/s] 11%|█ | 40167/371472 [3:14:30<26:37:48, 3.46it/s] 11%|█ | 40168/371472 [3:14:31<26:25:06, 3.48it/s] 11%|█ | 40169/371472 [3:14:31<26:32:46, 3.47it/s] 11%|█ | 40170/371472 [3:14:31<27:35:52, 3.33it/s] 11%|█ | 40171/371472 [3:14:31<26:04:02, 3.53it/s] 11%|█ | 40172/371472 [3:14:32<25:36:08, 3.59it/s] 11%|█ | 40173/371472 [3:14:32<25:23:02, 3.63it/s] 11%|█ | 40174/371472 [3:14:32<25:13:48, 3.65it/s] 11%|█ | 40175/371472 [3:14:32<24:58:14, 3.69it/s] 11%|█ | 40176/371472 [3:14:33<26:35:25, 3.46it/s] 11%|█ | 40177/371472 [3:14:33<30:51:40, 2.98it/s] 11%|█ | 40178/371472 [3:14:33<28:35:14, 3.22it/s] 11%|█ | 40179/371472 [3:14:34<27:35:47, 3.33it/s] 11%|█ | 40180/371472 [3:14:34<28:26:03, 3.24it/s] {'loss': 4.4212, 'learning_rate': 9.030845310176905e-07, 'epoch': 1.73} + 11%|█ | 40180/371472 [3:14:34<28:26:03, 3.24it/s] 11%|█ | 40181/371472 [3:14:34<28:31:27, 3.23it/s] 11%|█ | 40182/371472 [3:14:35<27:27:04, 3.35it/s] 11%|█ | 40183/371472 [3:14:35<27:06:12, 3.40it/s] 11%|█ | 40184/371472 [3:14:35<26:19:06, 3.50it/s] 11%|█ | 40185/371472 [3:14:36<26:04:28, 3.53it/s] 11%|█ | 40186/371472 [3:14:36<25:09:45, 3.66it/s] 11%|█ | 40187/371472 [3:14:36<25:15:15, 3.64it/s] 11%|█ | 40188/371472 [3:14:36<26:03:33, 3.53it/s] 11%|█ | 40189/371472 [3:14:37<25:41:15, 3.58it/s] 11%|█ | 40190/371472 [3:14:37<25:14:05, 3.65it/s] 11%|█ | 40191/371472 [3:14:37<25:16:24, 3.64it/s] 11%|█ | 40192/371472 [3:14:37<25:10:22, 3.66it/s] 11%|█ | 40193/371472 [3:14:38<25:29:40, 3.61it/s] 11%|█ | 40194/371472 [3:14:38<25:04:29, 3.67it/s] 11%|█ | 40195/371472 [3:14:38<26:10:22, 3.52it/s] 11%|█ | 40196/371472 [3:14:39<24:59:00, 3.68it/s] 11%|█ | 40197/371472 [3:14:39<24:21:48, 3.78it/s] 11%|█ | 40198/371472 [3:14:39<24:39:25, 3.73it/s] 11%|█ | 40199/371472 [3:14:39<27:16:45, 3.37it/s] 11%|█ | 40200/371472 [3:14:40<25:45:28, 3.57it/s] {'loss': 4.3989, 'learning_rate': 9.030360490422117e-07, 'epoch': 1.73} + 11%|█ | 40200/371472 [3:14:40<25:45:28, 3.57it/s] 11%|█ | 40201/371472 [3:14:40<29:24:59, 3.13it/s] 11%|█ | 40202/371472 [3:14:40<28:07:21, 3.27it/s] 11%|█ | 40203/371472 [3:14:41<27:09:24, 3.39it/s] 11%|█ | 40204/371472 [3:14:41<26:52:30, 3.42it/s] 11%|█ | 40205/371472 [3:14:41<26:52:03, 3.42it/s] 11%|█ | 40206/371472 [3:14:41<26:43:19, 3.44it/s] 11%|█ | 40207/371472 [3:14:42<26:50:50, 3.43it/s] 11%|█ | 40208/371472 [3:14:42<26:54:43, 3.42it/s] 11%|█ | 40209/371472 [3:14:42<27:16:39, 3.37it/s] 11%|█ | 40210/371472 [3:14:43<26:42:22, 3.45it/s] 11%|█ | 40211/371472 [3:14:43<27:25:27, 3.36it/s] 11%|█ | 40212/371472 [3:14:43<26:26:32, 3.48it/s] 11%|█ | 40213/371472 [3:14:43<25:37:16, 3.59it/s] 11%|█ | 40214/371472 [3:14:44<27:47:06, 3.31it/s] 11%|█ | 40215/371472 [3:14:44<26:58:02, 3.41it/s] 11%|█ | 40216/371472 [3:14:45<30:10:34, 3.05it/s] 11%|█ | 40217/371472 [3:14:45<28:32:05, 3.22it/s] 11%|█ | 40218/371472 [3:14:45<27:09:32, 3.39it/s] 11%|█ | 40219/371472 [3:14:45<26:26:28, 3.48it/s] 11%|█ | 40220/371472 [3:14:46<25:54:32, 3.55it/s] {'loss': 4.4788, 'learning_rate': 9.029875670667326e-07, 'epoch': 1.73} + 11%|█ | 40220/371472 [3:14:46<25:54:32, 3.55it/s] 11%|█ | 40221/371472 [3:14:46<26:21:36, 3.49it/s] 11%|█ | 40222/371472 [3:14:46<26:20:57, 3.49it/s] 11%|█ | 40223/371472 [3:14:46<26:57:52, 3.41it/s] 11%|█ | 40224/371472 [3:14:47<27:09:08, 3.39it/s] 11%|█ | 40225/371472 [3:14:47<27:46:55, 3.31it/s] 11%|█ | 40226/371472 [3:14:47<26:27:43, 3.48it/s] 11%|█ | 40227/371472 [3:14:48<25:59:22, 3.54it/s] 11%|█ | 40228/371472 [3:14:48<25:43:25, 3.58it/s] 11%|█ | 40229/371472 [3:14:48<26:47:10, 3.44it/s] 11%|█ | 40230/371472 [3:14:48<26:51:49, 3.43it/s] 11%|█ | 40231/371472 [3:14:49<26:33:36, 3.46it/s] 11%|█ | 40232/371472 [3:14:49<27:00:07, 3.41it/s] 11%|█ | 40233/371472 [3:14:49<28:32:19, 3.22it/s] 11%|█ | 40234/371472 [3:14:50<26:54:47, 3.42it/s] 11%|█ | 40235/371472 [3:14:50<27:27:32, 3.35it/s] 11%|█ | 40236/371472 [3:14:50<26:45:41, 3.44it/s] 11%|█ | 40237/371472 [3:14:51<26:13:12, 3.51it/s] 11%|█ | 40238/371472 [3:14:51<27:21:30, 3.36it/s] 11%|█ | 40239/371472 [3:14:51<26:36:18, 3.46it/s] 11%|█ | 40240/371472 [3:14:51<26:16:00, 3.50it/s] {'loss': 4.4364, 'learning_rate': 9.029390850912538e-07, 'epoch': 1.73} + 11%|█ | 40240/371472 [3:14:51<26:16:00, 3.50it/s] 11%|█ | 40241/371472 [3:14:52<25:38:09, 3.59it/s] 11%|█ | 40242/371472 [3:14:52<26:22:54, 3.49it/s] 11%|█ | 40243/371472 [3:14:52<26:59:35, 3.41it/s] 11%|█ | 40244/371472 [3:14:53<27:41:44, 3.32it/s] 11%|█ | 40245/371472 [3:14:53<27:37:50, 3.33it/s] 11%|█ | 40246/371472 [3:14:53<27:48:12, 3.31it/s] 11%|█ | 40247/371472 [3:14:53<26:34:19, 3.46it/s] 11%|█ | 40248/371472 [3:14:54<26:15:09, 3.50it/s] 11%|█ | 40249/371472 [3:14:54<26:49:11, 3.43it/s] 11%|█ | 40250/371472 [3:14:54<26:17:26, 3.50it/s] 11%|█ | 40251/371472 [3:14:55<25:37:32, 3.59it/s] 11%|█ | 40252/371472 [3:14:55<25:03:35, 3.67it/s] 11%|█ | 40253/371472 [3:14:55<26:17:55, 3.50it/s] 11%|█ | 40254/371472 [3:14:55<27:03:24, 3.40it/s] 11%|█ | 40255/371472 [3:14:56<27:23:13, 3.36it/s] 11%|█ | 40256/371472 [3:14:56<29:32:59, 3.11it/s] 11%|█ | 40257/371472 [3:14:56<28:08:07, 3.27it/s] 11%|█ | 40258/371472 [3:14:57<27:35:50, 3.33it/s] 11%|█ | 40259/371472 [3:14:57<26:48:14, 3.43it/s] 11%|█ | 40260/371472 [3:14:57<25:48:11, 3.57it/s] {'loss': 4.4474, 'learning_rate': 9.028906031157749e-07, 'epoch': 1.73} + 11%|█ | 40260/371472 [3:14:57<25:48:11, 3.57it/s] 11%|█ | 40261/371472 [3:14:58<25:25:22, 3.62it/s] 11%|█ | 40262/371472 [3:14:58<26:34:03, 3.46it/s] 11%|█ | 40263/371472 [3:14:58<26:55:11, 3.42it/s] 11%|█ | 40264/371472 [3:14:58<28:33:17, 3.22it/s] 11%|█ | 40265/371472 [3:14:59<26:57:57, 3.41it/s] 11%|█ | 40266/371472 [3:14:59<27:35:30, 3.33it/s] 11%|█ | 40267/371472 [3:14:59<26:21:53, 3.49it/s] 11%|█ | 40268/371472 [3:15:00<27:32:02, 3.34it/s] 11%|█ | 40269/371472 [3:15:00<26:03:02, 3.53it/s] 11%|█ | 40270/371472 [3:15:00<25:22:22, 3.63it/s] 11%|█ | 40271/371472 [3:15:00<25:57:21, 3.54it/s] 11%|█ | 40272/371472 [3:15:01<27:27:44, 3.35it/s] 11%|█ | 40273/371472 [3:15:01<26:25:58, 3.48it/s] 11%|█ | 40274/371472 [3:15:01<25:56:17, 3.55it/s] 11%|█ | 40275/371472 [3:15:02<24:59:19, 3.68it/s] 11%|█ | 40276/371472 [3:15:02<25:28:28, 3.61it/s] 11%|█ | 40277/371472 [3:15:02<25:39:59, 3.58it/s] 11%|█ | 40278/371472 [3:15:02<25:01:52, 3.68it/s] 11%|█ | 40279/371472 [3:15:03<25:47:35, 3.57it/s] 11%|█ | 40280/371472 [3:15:03<26:47:57, 3.43it/s] {'loss': 4.4528, 'learning_rate': 9.02842121140296e-07, 'epoch': 1.73} + 11%|█ | 40280/371472 [3:15:03<26:47:57, 3.43it/s] 11%|█ | 40281/371472 [3:15:03<26:40:33, 3.45it/s] 11%|█ | 40282/371472 [3:15:04<26:14:52, 3.50it/s] 11%|█ | 40283/371472 [3:15:04<25:33:29, 3.60it/s] 11%|█ | 40284/371472 [3:15:04<26:39:15, 3.45it/s] 11%|█ | 40285/371472 [3:15:04<25:45:30, 3.57it/s] 11%|█ | 40286/371472 [3:15:05<25:31:17, 3.60it/s] 11%|█ | 40287/371472 [3:15:05<25:13:52, 3.65it/s] 11%|█ | 40288/371472 [3:15:05<25:03:27, 3.67it/s] 11%|█ | 40289/371472 [3:15:05<25:35:25, 3.59it/s] 11%|█ | 40290/371472 [3:15:06<25:18:55, 3.63it/s] 11%|█ | 40291/371472 [3:15:06<26:12:56, 3.51it/s] 11%|█ | 40292/371472 [3:15:06<28:25:41, 3.24it/s] 11%|█ | 40293/371472 [3:15:07<26:39:56, 3.45it/s] 11%|█ | 40294/371472 [3:15:07<27:04:51, 3.40it/s] 11%|█ | 40295/371472 [3:15:07<27:14:01, 3.38it/s] 11%|█ | 40296/371472 [3:15:08<29:24:05, 3.13it/s] 11%|█ | 40297/371472 [3:15:08<28:38:47, 3.21it/s] 11%|█ | 40298/371472 [3:15:08<27:18:14, 3.37it/s] 11%|█ | 40299/371472 [3:15:08<26:20:47, 3.49it/s] 11%|█ | 40300/371472 [3:15:09<25:46:03, 3.57it/s] {'loss': 4.3944, 'learning_rate': 9.027936391648171e-07, 'epoch': 1.74} + 11%|█ | 40300/371472 [3:15:09<25:46:03, 3.57it/s] 11%|█ | 40301/371472 [3:15:09<24:54:19, 3.69it/s] 11%|█ | 40302/371472 [3:15:09<25:02:28, 3.67it/s] 11%|█ | 40303/371472 [3:15:10<25:03:35, 3.67it/s] 11%|█ | 40304/371472 [3:15:10<24:50:40, 3.70it/s] 11%|█ | 40305/371472 [3:15:10<24:37:16, 3.74it/s] 11%|█ | 40306/371472 [3:15:10<24:45:17, 3.72it/s] 11%|█ | 40307/371472 [3:15:11<24:30:33, 3.75it/s] 11%|█ | 40308/371472 [3:15:11<25:05:24, 3.67it/s] 11%|█ | 40309/371472 [3:15:11<25:18:06, 3.64it/s] 11%|█ | 40310/371472 [3:15:11<24:59:12, 3.68it/s] 11%|█ | 40311/371472 [3:15:12<24:34:04, 3.74it/s] 11%|█ | 40312/371472 [3:15:12<27:53:18, 3.30it/s] 11%|█ | 40313/371472 [3:15:12<26:33:47, 3.46it/s] 11%|█ | 40314/371472 [3:15:13<26:31:54, 3.47it/s] 11%|█ | 40315/371472 [3:15:13<26:12:29, 3.51it/s] 11%|█ | 40316/371472 [3:15:13<26:09:14, 3.52it/s] 11%|█ | 40317/371472 [3:15:13<27:06:56, 3.39it/s] 11%|█ | 40318/371472 [3:15:14<26:51:48, 3.42it/s] 11%|█ | 40319/371472 [3:15:14<26:45:37, 3.44it/s] 11%|█ | 40320/371472 [3:15:14<27:44:50, 3.32it/s] {'loss': 4.3709, 'learning_rate': 9.027451571893383e-07, 'epoch': 1.74} + 11%|█ | 40320/371472 [3:15:14<27:44:50, 3.32it/s] 11%|█ | 40321/371472 [3:15:15<26:58:47, 3.41it/s] 11%|█ | 40322/371472 [3:15:15<25:47:04, 3.57it/s] 11%|█ | 40323/371472 [3:15:15<25:19:49, 3.63it/s] 11%|█ | 40324/371472 [3:15:15<26:09:41, 3.52it/s] 11%|█ | 40325/371472 [3:15:16<25:07:56, 3.66it/s] 11%|█ | 40326/371472 [3:15:16<25:48:20, 3.56it/s] 11%|█ | 40327/371472 [3:15:16<26:18:01, 3.50it/s] 11%|█ | 40328/371472 [3:15:17<27:38:14, 3.33it/s] 11%|█ | 40329/371472 [3:15:17<27:05:36, 3.40it/s] 11%|█ | 40330/371472 [3:15:17<26:01:48, 3.53it/s] 11%|█ | 40331/371472 [3:15:18<26:51:41, 3.42it/s] 11%|█ | 40332/371472 [3:15:18<25:38:08, 3.59it/s] 11%|█ | 40333/371472 [3:15:18<25:10:18, 3.65it/s] 11%|█ | 40334/371472 [3:15:18<24:38:57, 3.73it/s] 11%|█ | 40335/371472 [3:15:19<25:06:07, 3.66it/s] 11%|█ | 40336/371472 [3:15:19<24:40:58, 3.73it/s] 11%|█ | 40337/371472 [3:15:19<24:35:45, 3.74it/s] 11%|█ | 40338/371472 [3:15:19<24:28:14, 3.76it/s] 11%|█ | 40339/371472 [3:15:20<24:04:48, 3.82it/s] 11%|█ | 40340/371472 [3:15:20<24:28:26, 3.76it/s] {'loss': 4.5746, 'learning_rate': 9.026966752138594e-07, 'epoch': 1.74} + 11%|█ | 40340/371472 [3:15:20<24:28:26, 3.76it/s] 11%|█ | 40341/371472 [3:15:20<25:41:28, 3.58it/s] 11%|█ | 40342/371472 [3:15:20<24:44:58, 3.72it/s] 11%|█ | 40343/371472 [3:15:21<25:02:12, 3.67it/s] 11%|█ | 40344/371472 [3:15:21<26:15:56, 3.50it/s] 11%|█ | 40345/371472 [3:15:21<27:09:03, 3.39it/s] 11%|█ | 40346/371472 [3:15:22<25:50:45, 3.56it/s] 11%|█ | 40347/371472 [3:15:22<25:43:33, 3.58it/s] 11%|█ | 40348/371472 [3:15:22<24:47:25, 3.71it/s] 11%|█ | 40349/371472 [3:15:22<25:02:31, 3.67it/s] 11%|█ | 40350/371472 [3:15:23<26:21:15, 3.49it/s] 11%|█ | 40351/371472 [3:15:23<26:00:49, 3.54it/s] 11%|█ | 40352/371472 [3:15:23<25:48:36, 3.56it/s] 11%|█ | 40353/371472 [3:15:24<27:06:54, 3.39it/s] 11%|█ | 40354/371472 [3:15:24<27:52:18, 3.30it/s] 11%|█ | 40355/371472 [3:15:24<26:41:33, 3.45it/s] 11%|█ | 40356/371472 [3:15:25<28:07:02, 3.27it/s] 11%|█ | 40357/371472 [3:15:25<27:13:00, 3.38it/s] 11%|█ | 40358/371472 [3:15:25<26:03:56, 3.53it/s] 11%|█ | 40359/371472 [3:15:25<26:15:08, 3.50it/s] 11%|█ | 40360/371472 [3:15:26<25:46:44, 3.57it/s] {'loss': 4.3533, 'learning_rate': 9.026481932383804e-07, 'epoch': 1.74} + 11%|█ | 40360/371472 [3:15:26<25:46:44, 3.57it/s] 11%|█ | 40361/371472 [3:15:26<25:19:19, 3.63it/s] 11%|█ | 40362/371472 [3:15:26<25:50:01, 3.56it/s] 11%|█ | 40363/371472 [3:15:26<25:46:40, 3.57it/s] 11%|█ | 40364/371472 [3:15:27<26:17:50, 3.50it/s] 11%|█ | 40365/371472 [3:15:27<25:49:25, 3.56it/s] 11%|█ | 40366/371472 [3:15:27<26:48:57, 3.43it/s] 11%|█ | 40367/371472 [3:15:28<26:40:45, 3.45it/s] 11%|█ | 40368/371472 [3:15:28<26:12:21, 3.51it/s] 11%|█ | 40369/371472 [3:15:28<25:46:59, 3.57it/s] 11%|█ | 40370/371472 [3:15:28<26:00:18, 3.54it/s] 11%|█ | 40371/371472 [3:15:29<28:29:31, 3.23it/s] 11%|█ | 40372/371472 [3:15:29<27:31:44, 3.34it/s] 11%|█ | 40373/371472 [3:15:29<27:13:48, 3.38it/s] 11%|█ | 40374/371472 [3:15:30<26:26:51, 3.48it/s] 11%|█ | 40375/371472 [3:15:30<26:11:52, 3.51it/s] 11%|█ | 40376/371472 [3:15:30<25:38:25, 3.59it/s] 11%|█ | 40377/371472 [3:15:30<25:06:35, 3.66it/s] 11%|█ | 40378/371472 [3:15:31<24:19:15, 3.78it/s] 11%|█ | 40379/371472 [3:15:31<25:16:53, 3.64it/s] 11%|█ | 40380/371472 [3:15:31<24:30:08, 3.75it/s] {'loss': 4.4538, 'learning_rate': 9.025997112629015e-07, 'epoch': 1.74} + 11%|█ | 40380/371472 [3:15:31<24:30:08, 3.75it/s] 11%|█ | 40381/371472 [3:15:32<25:37:23, 3.59it/s] 11%|█ | 40382/371472 [3:15:32<25:26:44, 3.61it/s] 11%|█ | 40383/371472 [3:15:32<25:35:04, 3.59it/s] 11%|█ | 40384/371472 [3:15:32<26:25:17, 3.48it/s] 11%|█ | 40385/371472 [3:15:33<26:00:30, 3.54it/s] 11%|█ | 40386/371472 [3:15:33<26:28:21, 3.47it/s] 11%|█ | 40387/371472 [3:15:33<26:27:03, 3.48it/s] 11%|█ | 40388/371472 [3:15:34<25:12:03, 3.65it/s] 11%|█ | 40389/371472 [3:15:34<25:00:36, 3.68it/s] 11%|█ | 40390/371472 [3:15:34<25:04:28, 3.67it/s] 11%|█ | 40391/371472 [3:15:34<25:01:42, 3.67it/s] 11%|█ | 40392/371472 [3:15:35<25:05:53, 3.66it/s] 11%|█ | 40393/371472 [3:15:35<24:44:53, 3.72it/s] 11%|█ | 40394/371472 [3:15:35<24:42:16, 3.72it/s] 11%|█ | 40395/371472 [3:15:35<25:03:00, 3.67it/s] 11%|█ | 40396/371472 [3:15:36<26:03:21, 3.53it/s] 11%|█ | 40397/371472 [3:15:36<25:56:14, 3.55it/s] 11%|█ | 40398/371472 [3:15:36<27:06:06, 3.39it/s] 11%|█ | 40399/371472 [3:15:37<28:32:47, 3.22it/s] 11%|█ | 40400/371472 [3:15:37<29:25:54, 3.12it/s] {'loss': 4.2207, 'learning_rate': 9.025512292874227e-07, 'epoch': 1.74} + 11%|█ | 40400/371472 [3:15:37<29:25:54, 3.12it/s] 11%|█ | 40401/371472 [3:15:37<27:54:47, 3.29it/s] 11%|█ | 40402/371472 [3:15:38<26:54:44, 3.42it/s] 11%|█ | 40403/371472 [3:15:38<25:35:09, 3.59it/s] 11%|█ | 40404/371472 [3:15:38<25:40:08, 3.58it/s] 11%|█ | 40405/371472 [3:15:38<25:27:54, 3.61it/s] 11%|█ | 40406/371472 [3:15:39<25:51:41, 3.56it/s] 11%|█ | 40407/371472 [3:15:39<27:11:00, 3.38it/s] 11%|█ | 40408/371472 [3:15:39<26:42:31, 3.44it/s] 11%|█ | 40409/371472 [3:15:40<26:34:45, 3.46it/s] 11%|█ | 40410/371472 [3:15:40<25:42:50, 3.58it/s] 11%|█ | 40411/371472 [3:15:40<25:29:09, 3.61it/s] 11%|█ | 40412/371472 [3:15:40<25:34:42, 3.60it/s] 11%|█ | 40413/371472 [3:15:41<25:42:55, 3.58it/s] 11%|█ | 40414/371472 [3:15:41<25:53:08, 3.55it/s] 11%|█ | 40415/371472 [3:15:41<25:56:19, 3.55it/s] 11%|█ | 40416/371472 [3:15:41<25:27:17, 3.61it/s] 11%|█ | 40417/371472 [3:15:42<25:27:29, 3.61it/s] 11%|█ | 40418/371472 [3:15:42<27:24:56, 3.35it/s] 11%|█ | 40419/371472 [3:15:42<27:41:49, 3.32it/s] 11%|█ | 40420/371472 [3:15:43<26:20:54, 3.49it/s] {'loss': 4.4206, 'learning_rate': 9.025027473119438e-07, 'epoch': 1.74} + 11%|█ | 40420/371472 [3:15:43<26:20:54, 3.49it/s] 11%|█ | 40421/371472 [3:15:43<26:31:58, 3.47it/s] 11%|█ | 40422/371472 [3:15:43<26:23:13, 3.48it/s] 11%|█ | 40423/371472 [3:15:43<25:26:41, 3.61it/s] 11%|█ | 40424/371472 [3:15:44<27:29:51, 3.34it/s] 11%|█ | 40425/371472 [3:15:44<27:54:54, 3.29it/s] 11%|█ | 40426/371472 [3:15:44<27:20:50, 3.36it/s] 11%|█ | 40427/371472 [3:15:45<26:55:51, 3.41it/s] 11%|█ | 40428/371472 [3:15:45<28:33:45, 3.22it/s] 11%|█ | 40429/371472 [3:15:45<28:02:05, 3.28it/s] 11%|█ | 40430/371472 [3:15:46<26:22:50, 3.49it/s] 11%|█ | 40431/371472 [3:15:46<25:32:12, 3.60it/s] 11%|█ | 40432/371472 [3:15:46<24:59:34, 3.68it/s] 11%|█ | 40433/371472 [3:15:46<27:12:46, 3.38it/s] 11%|█ | 40434/371472 [3:15:47<27:59:27, 3.29it/s] 11%|█ | 40435/371472 [3:15:47<29:01:37, 3.17it/s] 11%|█ | 40436/371472 [3:15:47<28:36:31, 3.21it/s] 11%|█ | 40437/371472 [3:15:48<28:07:22, 3.27it/s] 11%|█ | 40438/371472 [3:15:48<27:55:46, 3.29it/s] 11%|█ | 40439/371472 [3:15:48<26:39:44, 3.45it/s] 11%|█ | 40440/371472 [3:15:49<27:48:51, 3.31it/s] {'loss': 4.4268, 'learning_rate': 9.024542653364649e-07, 'epoch': 1.74} + 11%|█ | 40440/371472 [3:15:49<27:48:51, 3.31it/s] 11%|█ | 40441/371472 [3:15:49<26:53:48, 3.42it/s] 11%|█ | 40442/371472 [3:15:49<25:44:25, 3.57it/s] 11%|█ | 40443/371472 [3:15:49<24:58:26, 3.68it/s] 11%|█ | 40444/371472 [3:15:50<26:22:15, 3.49it/s] 11%|█ | 40445/371472 [3:15:50<26:39:23, 3.45it/s] 11%|█ | 40446/371472 [3:15:50<26:04:18, 3.53it/s] 11%|█ | 40447/371472 [3:15:51<26:24:26, 3.48it/s] 11%|█ | 40448/371472 [3:15:51<26:17:28, 3.50it/s] 11%|█ | 40449/371472 [3:15:51<25:47:31, 3.57it/s] 11%|█ | 40450/371472 [3:15:51<28:43:15, 3.20it/s] 11%|█ | 40451/371472 [3:15:52<28:13:11, 3.26it/s] 11%|█ | 40452/371472 [3:15:52<27:32:02, 3.34it/s] 11%|█ | 40453/371472 [3:15:52<27:24:13, 3.36it/s] 11%|█ | 40454/371472 [3:15:53<27:13:31, 3.38it/s] 11%|█ | 40455/371472 [3:15:53<27:04:20, 3.40it/s] 11%|█ | 40456/371472 [3:15:53<27:32:50, 3.34it/s] 11%|█ | 40457/371472 [3:15:54<29:40:17, 3.10it/s] 11%|█ | 40458/371472 [3:15:54<28:00:46, 3.28it/s] 11%|█ | 40459/371472 [3:15:54<27:02:11, 3.40it/s] 11%|█ | 40460/371472 [3:15:54<26:11:40, 3.51it/s] {'loss': 4.4761, 'learning_rate': 9.02405783360986e-07, 'epoch': 1.74} + 11%|█ | 40460/371472 [3:15:54<26:11:40, 3.51it/s] 11%|█ | 40461/371472 [3:15:55<25:36:28, 3.59it/s] 11%|█ | 40462/371472 [3:15:55<25:34:22, 3.60it/s] 11%|█ | 40463/371472 [3:15:55<25:43:34, 3.57it/s] 11%|█ | 40464/371472 [3:15:56<25:50:32, 3.56it/s] 11%|█ | 40465/371472 [3:15:56<25:21:43, 3.63it/s] 11%|█ | 40466/371472 [3:15:56<25:00:07, 3.68it/s] 11%|█ | 40467/371472 [3:15:56<24:57:55, 3.68it/s] 11%|█ | 40468/371472 [3:15:57<24:45:29, 3.71it/s] 11%|█ | 40469/371472 [3:15:57<30:47:38, 2.99it/s] 11%|█ | 40470/371472 [3:15:57<28:33:14, 3.22it/s] 11%|█ | 40471/371472 [3:15:58<27:17:59, 3.37it/s] 11%|█ | 40472/371472 [3:15:58<27:00:50, 3.40it/s] 11%|█ | 40473/371472 [3:15:58<29:00:54, 3.17it/s] 11%|█ | 40474/371472 [3:15:59<28:47:46, 3.19it/s] 11%|█ | 40475/371472 [3:15:59<27:16:52, 3.37it/s] 11%|█ | 40476/371472 [3:15:59<26:10:40, 3.51it/s] 11%|█ | 40477/371472 [3:15:59<25:50:53, 3.56it/s] 11%|█ | 40478/371472 [3:16:00<26:12:17, 3.51it/s] 11%|█ | 40479/371472 [3:16:00<27:22:28, 3.36it/s] 11%|█ | 40480/371472 [3:16:00<26:03:45, 3.53it/s] {'loss': 4.5912, 'learning_rate': 9.023573013855071e-07, 'epoch': 1.74} + 11%|█ | 40480/371472 [3:16:00<26:03:45, 3.53it/s] 11%|█ | 40481/371472 [3:16:01<29:08:24, 3.16it/s] 11%|█ | 40482/371472 [3:16:01<28:10:08, 3.26it/s] 11%|█ | 40483/371472 [3:16:01<26:59:27, 3.41it/s] 11%|█ | 40484/371472 [3:16:01<25:52:49, 3.55it/s] 11%|█ | 40485/371472 [3:16:02<27:08:00, 3.39it/s] 11%|█ | 40486/371472 [3:16:02<27:42:10, 3.32it/s] 11%|█ | 40487/371472 [3:16:02<28:21:23, 3.24it/s] 11%|█ | 40488/371472 [3:16:03<27:34:09, 3.33it/s] 11%|█ | 40489/371472 [3:16:03<28:16:58, 3.25it/s] 11%|█ | 40490/371472 [3:16:03<27:48:37, 3.31it/s] 11%|█ | 40491/371472 [3:16:04<27:54:40, 3.29it/s] 11%|█ | 40492/371472 [3:16:04<27:26:04, 3.35it/s] 11%|█ | 40493/371472 [3:16:04<26:49:37, 3.43it/s] 11%|█ | 40494/371472 [3:16:04<26:08:01, 3.52it/s] 11%|█ | 40495/371472 [3:16:05<27:58:43, 3.29it/s] 11%|█ | 40496/371472 [3:16:05<26:16:39, 3.50it/s] 11%|█ | 40497/371472 [3:16:05<25:35:18, 3.59it/s] 11%|█ | 40498/371472 [3:16:06<25:19:51, 3.63it/s] 11%|█ | 40499/371472 [3:16:06<25:54:54, 3.55it/s] 11%|█ | 40500/371472 [3:16:06<26:18:16, 3.50it/s] {'loss': 4.1854, 'learning_rate': 9.023088194100282e-07, 'epoch': 1.74} + 11%|█ | 40500/371472 [3:16:06<26:18:16, 3.50it/s] 11%|█ | 40501/371472 [3:16:06<25:27:14, 3.61it/s] 11%|█ | 40502/371472 [3:16:07<25:17:33, 3.63it/s] 11%|█ | 40503/371472 [3:16:07<25:38:13, 3.59it/s] 11%|█ | 40504/371472 [3:16:07<25:19:21, 3.63it/s] 11%|█ | 40505/371472 [3:16:07<25:20:00, 3.63it/s] 11%|█ | 40506/371472 [3:16:08<25:22:50, 3.62it/s] 11%|█ | 40507/371472 [3:16:08<25:48:13, 3.56it/s] 11%|█ | 40508/371472 [3:16:08<24:58:46, 3.68it/s] 11%|█ | 40509/371472 [3:16:09<26:48:19, 3.43it/s] 11%|█ | 40510/371472 [3:16:09<26:13:56, 3.50it/s] 11%|█ | 40511/371472 [3:16:09<26:34:49, 3.46it/s] 11%|█ | 40512/371472 [3:16:10<26:50:29, 3.43it/s] 11%|█ | 40513/371472 [3:16:10<26:29:09, 3.47it/s] 11%|█ | 40514/371472 [3:16:10<26:14:18, 3.50it/s] 11%|█ | 40515/371472 [3:16:10<26:18:16, 3.49it/s] 11%|█ | 40516/371472 [3:16:11<25:55:23, 3.55it/s] 11%|█ | 40517/371472 [3:16:11<25:38:38, 3.58it/s] 11%|█ | 40518/371472 [3:16:11<26:05:30, 3.52it/s] 11%|█ | 40519/371472 [3:16:12<27:05:12, 3.39it/s] 11%|█ | 40520/371472 [3:16:12<29:10:48, 3.15it/s] {'loss': 4.4955, 'learning_rate': 9.022603374345493e-07, 'epoch': 1.75} + 11%|█ | 40520/371472 [3:16:12<29:10:48, 3.15it/s] 11%|█ | 40521/371472 [3:16:12<27:57:40, 3.29it/s] 11%|█ | 40522/371472 [3:16:12<26:55:24, 3.41it/s] 11%|█ | 40523/371472 [3:16:13<26:32:29, 3.46it/s] 11%|█ | 40524/371472 [3:16:13<26:04:32, 3.53it/s] 11%|█ | 40525/371472 [3:16:13<26:27:55, 3.47it/s] 11%|█ | 40526/371472 [3:16:14<27:28:15, 3.35it/s] 11%|█ | 40527/371472 [3:16:14<27:09:18, 3.39it/s] 11%|█ | 40528/371472 [3:16:14<26:23:21, 3.48it/s] 11%|█ | 40529/371472 [3:16:14<26:37:42, 3.45it/s] 11%|█ | 40530/371472 [3:16:15<25:30:31, 3.60it/s] 11%|█ | 40531/371472 [3:16:15<26:11:20, 3.51it/s] 11%|█ | 40532/371472 [3:16:15<25:39:17, 3.58it/s] 11%|█ | 40533/371472 [3:16:16<25:28:00, 3.61it/s] 11%|█ | 40534/371472 [3:16:16<24:55:38, 3.69it/s] 11%|█ | 40535/371472 [3:16:16<25:44:39, 3.57it/s] 11%|█ | 40536/371472 [3:16:16<25:57:47, 3.54it/s] 11%|█ | 40537/371472 [3:16:17<25:53:56, 3.55it/s] 11%|█ | 40538/371472 [3:16:17<25:56:27, 3.54it/s] 11%|█ | 40539/371472 [3:16:17<25:38:15, 3.59it/s] 11%|█ | 40540/371472 [3:16:17<25:27:16, 3.61it/s] {'loss': 4.2738, 'learning_rate': 9.022118554590704e-07, 'epoch': 1.75} + 11%|█ | 40540/371472 [3:16:17<25:27:16, 3.61it/s] 11%|█ | 40541/371472 [3:16:18<26:06:30, 3.52it/s] 11%|█ | 40542/371472 [3:16:18<25:33:43, 3.60it/s] 11%|█ | 40543/371472 [3:16:18<25:33:45, 3.60it/s] 11%|█ | 40544/371472 [3:16:19<25:53:42, 3.55it/s] 11%|█ | 40545/371472 [3:16:19<27:11:01, 3.38it/s] 11%|█ | 40546/371472 [3:16:19<26:19:25, 3.49it/s] 11%|█ | 40547/371472 [3:16:19<25:49:34, 3.56it/s] 11%|█ | 40548/371472 [3:16:20<26:21:18, 3.49it/s] 11%|█ | 40549/371472 [3:16:20<25:47:57, 3.56it/s] 11%|█ | 40550/371472 [3:16:20<25:34:10, 3.60it/s] 11%|█ | 40551/371472 [3:16:21<25:02:19, 3.67it/s] 11%|█ | 40552/371472 [3:16:21<25:39:44, 3.58it/s] 11%|█ | 40553/371472 [3:16:21<26:13:00, 3.51it/s] 11%|█ | 40554/371472 [3:16:21<26:38:44, 3.45it/s] 11%|█ | 40555/371472 [3:16:22<26:09:32, 3.51it/s] 11%|█ | 40556/371472 [3:16:22<25:37:03, 3.59it/s] 11%|█ | 40557/371472 [3:16:22<25:34:36, 3.59it/s] 11%|█ | 40558/371472 [3:16:23<26:02:08, 3.53it/s] 11%|█ | 40559/371472 [3:16:23<25:14:37, 3.64it/s] 11%|█ | 40560/371472 [3:16:23<26:04:58, 3.52it/s] {'loss': 4.4197, 'learning_rate': 9.021633734835916e-07, 'epoch': 1.75} + 11%|█ | 40560/371472 [3:16:23<26:04:58, 3.52it/s] 11%|█ | 40561/371472 [3:16:23<27:21:11, 3.36it/s] 11%|█ | 40562/371472 [3:16:24<26:10:02, 3.51it/s] 11%|█ | 40563/371472 [3:16:24<26:12:02, 3.51it/s] 11%|█ | 40564/371472 [3:16:24<26:53:03, 3.42it/s] 11%|█ | 40565/371472 [3:16:25<27:05:10, 3.39it/s] 11%|█ | 40566/371472 [3:16:25<26:30:02, 3.47it/s] 11%|█ | 40567/371472 [3:16:25<26:20:18, 3.49it/s] 11%|█ | 40568/371472 [3:16:25<25:41:51, 3.58it/s] 11%|█ | 40569/371472 [3:16:26<25:41:56, 3.58it/s] 11%|█ | 40570/371472 [3:16:26<25:00:41, 3.67it/s] 11%|█ | 40571/371472 [3:16:26<25:27:53, 3.61it/s] 11%|█ | 40572/371472 [3:16:27<25:48:53, 3.56it/s] 11%|█ | 40573/371472 [3:16:27<25:42:57, 3.57it/s] 11%|█ | 40574/371472 [3:16:27<26:32:54, 3.46it/s] 11%|█ | 40575/371472 [3:16:27<26:00:33, 3.53it/s] 11%|█ | 40576/371472 [3:16:28<26:08:03, 3.52it/s] 11%|█ | 40577/371472 [3:16:28<26:17:33, 3.50it/s] 11%|█ | 40578/371472 [3:16:28<25:26:18, 3.61it/s] 11%|█ | 40579/371472 [3:16:29<25:34:45, 3.59it/s] 11%|█ | 40580/371472 [3:16:29<25:26:31, 3.61it/s] {'loss': 4.4153, 'learning_rate': 9.021148915081126e-07, 'epoch': 1.75} + 11%|█ | 40580/371472 [3:16:29<25:26:31, 3.61it/s] 11%|█ | 40581/371472 [3:16:29<24:57:07, 3.68it/s] 11%|█ | 40582/371472 [3:16:29<24:38:25, 3.73it/s] 11%|█ | 40583/371472 [3:16:30<24:09:04, 3.81it/s] 11%|█ | 40584/371472 [3:16:30<24:12:31, 3.80it/s] 11%|█ | 40585/371472 [3:16:30<24:49:54, 3.70it/s] 11%|█ | 40586/371472 [3:16:30<25:25:49, 3.61it/s] 11%|█ | 40587/371472 [3:16:31<25:03:51, 3.67it/s] 11%|█ | 40588/371472 [3:16:31<25:33:08, 3.60it/s] 11%|█ | 40589/371472 [3:16:31<26:15:10, 3.50it/s] 11%|█ | 40590/371472 [3:16:32<25:36:42, 3.59it/s] 11%|█ | 40591/371472 [3:16:32<25:57:35, 3.54it/s] 11%|█ | 40592/371472 [3:16:32<27:15:20, 3.37it/s] 11%|█ | 40593/371472 [3:16:32<27:30:13, 3.34it/s] 11%|█ | 40594/371472 [3:16:33<26:51:22, 3.42it/s] 11%|█ | 40595/371472 [3:16:33<26:56:02, 3.41it/s] 11%|█ | 40596/371472 [3:16:33<26:56:07, 3.41it/s] 11%|█ | 40597/371472 [3:16:34<25:58:58, 3.54it/s] 11%|█ | 40598/371472 [3:16:34<25:24:37, 3.62it/s] 11%|█ | 40599/371472 [3:16:34<25:24:20, 3.62it/s] 11%|█ | 40600/371472 [3:16:34<24:44:04, 3.72it/s] {'loss': 4.3795, 'learning_rate': 9.020664095326336e-07, 'epoch': 1.75} + 11%|█ | 40600/371472 [3:16:34<24:44:04, 3.72it/s] 11%|█ | 40601/371472 [3:16:35<25:11:41, 3.65it/s] 11%|█ | 40602/371472 [3:16:35<25:00:02, 3.68it/s] 11%|█ | 40603/371472 [3:16:35<26:03:59, 3.53it/s] 11%|█ | 40604/371472 [3:16:36<25:30:10, 3.60it/s] 11%|█ | 40605/371472 [3:16:36<25:21:29, 3.62it/s] 11%|█ | 40606/371472 [3:16:36<25:26:36, 3.61it/s] 11%|█ | 40607/371472 [3:16:36<25:25:02, 3.62it/s] 11%|█ | 40608/371472 [3:16:37<25:56:47, 3.54it/s] 11%|█ | 40609/371472 [3:16:37<25:30:31, 3.60it/s] 11%|█ | 40610/371472 [3:16:37<27:03:30, 3.40it/s] 11%|█ | 40611/371472 [3:16:38<27:07:38, 3.39it/s] 11%|█ | 40612/371472 [3:16:38<26:23:05, 3.48it/s] 11%|█ | 40613/371472 [3:16:38<26:08:50, 3.51it/s] 11%|█ | 40614/371472 [3:16:38<26:08:58, 3.51it/s] 11%|█ | 40615/371472 [3:16:39<27:10:16, 3.38it/s] 11%|█ | 40616/371472 [3:16:39<28:39:40, 3.21it/s] 11%|█ | 40617/371472 [3:16:39<27:09:29, 3.38it/s] 11%|█ | 40618/371472 [3:16:40<28:34:08, 3.22it/s] 11%|█ | 40619/371472 [3:16:40<27:47:28, 3.31it/s] 11%|█ | 40620/371472 [3:16:40<26:21:56, 3.49it/s] {'loss': 4.3038, 'learning_rate': 9.020179275571548e-07, 'epoch': 1.75} + 11%|█ | 40620/371472 [3:16:40<26:21:56, 3.49it/s] 11%|█ | 40621/371472 [3:16:40<27:37:56, 3.33it/s] 11%|█ | 40622/371472 [3:16:41<28:25:35, 3.23it/s] 11%|█ | 40623/371472 [3:16:41<26:52:20, 3.42it/s] 11%|█ | 40624/371472 [3:16:41<26:25:34, 3.48it/s] 11%|█ | 40625/371472 [3:16:42<25:39:16, 3.58it/s] 11%|█ | 40626/371472 [3:16:42<25:07:04, 3.66it/s] 11%|█ | 40627/371472 [3:16:42<25:12:09, 3.65it/s] 11%|█ | 40628/371472 [3:16:43<32:41:21, 2.81it/s] 11%|█ | 40629/371472 [3:16:43<30:14:14, 3.04it/s] 11%|█ | 40630/371472 [3:16:43<28:14:12, 3.25it/s] 11%|█ | 40631/371472 [3:16:44<28:24:24, 3.24it/s] 11%|█ | 40632/371472 [3:16:44<27:03:53, 3.40it/s] 11%|█ | 40633/371472 [3:16:44<26:26:44, 3.48it/s] 11%|█ | 40634/371472 [3:16:44<26:22:57, 3.48it/s] 11%|█ | 40635/371472 [3:16:45<26:44:08, 3.44it/s] 11%|█ | 40636/371472 [3:16:45<25:51:59, 3.55it/s] 11%|█ | 40637/371472 [3:16:45<25:27:31, 3.61it/s] 11%|█ | 40638/371472 [3:16:45<26:19:51, 3.49it/s] 11%|█ | 40639/371472 [3:16:46<25:32:02, 3.60it/s] 11%|█ | 40640/371472 [3:16:46<26:24:04, 3.48it/s] {'loss': 4.4357, 'learning_rate': 9.01969445581676e-07, 'epoch': 1.75} + 11%|█ | 40640/371472 [3:16:46<26:24:04, 3.48it/s] 11%|█ | 40641/371472 [3:16:46<25:47:05, 3.56it/s] 11%|█ | 40642/371472 [3:16:47<25:57:19, 3.54it/s] 11%|█ | 40643/371472 [3:16:47<27:12:32, 3.38it/s] 11%|█ | 40644/371472 [3:16:47<29:57:12, 3.07it/s] 11%|█ | 40645/371472 [3:16:48<29:56:11, 3.07it/s] 11%|█ | 40646/371472 [3:16:48<28:49:42, 3.19it/s] 11%|█ | 40647/371472 [3:16:48<27:54:53, 3.29it/s] 11%|█ | 40648/371472 [3:16:49<27:37:56, 3.33it/s] 11%|█ | 40649/371472 [3:16:49<27:17:53, 3.37it/s] 11%|█ | 40650/371472 [3:16:49<28:19:04, 3.25it/s] 11%|█ | 40651/371472 [3:16:49<27:29:20, 3.34it/s] 11%|█ | 40652/371472 [3:16:50<26:41:30, 3.44it/s] 11%|█ | 40653/371472 [3:16:50<26:36:11, 3.45it/s] 11%|█ | 40654/371472 [3:16:50<26:10:20, 3.51it/s] 11%|█ | 40655/371472 [3:16:51<25:44:28, 3.57it/s] 11%|█ | 40656/371472 [3:16:51<25:54:08, 3.55it/s] 11%|█ | 40657/371472 [3:16:51<25:02:15, 3.67it/s] 11%|█ | 40658/371472 [3:16:51<25:38:04, 3.58it/s] 11%|█ | 40659/371472 [3:16:52<26:01:56, 3.53it/s] 11%|█ | 40660/371472 [3:16:52<25:43:57, 3.57it/s] {'loss': 4.5771, 'learning_rate': 9.01920963606197e-07, 'epoch': 1.75} + 11%|█ | 40660/371472 [3:16:52<25:43:57, 3.57it/s] 11%|█ | 40661/371472 [3:16:52<25:05:06, 3.66it/s] 11%|█ | 40662/371472 [3:16:53<27:42:36, 3.32it/s] 11%|█ | 40663/371472 [3:16:53<26:22:02, 3.49it/s] 11%|█ | 40664/371472 [3:16:53<25:57:05, 3.54it/s] 11%|█ | 40665/371472 [3:16:53<25:55:31, 3.54it/s] 11%|█ | 40666/371472 [3:16:54<26:40:49, 3.44it/s] 11%|█ | 40667/371472 [3:16:54<27:14:49, 3.37it/s] 11%|█ | 40668/371472 [3:16:54<27:31:23, 3.34it/s] 11%|█ | 40669/371472 [3:16:55<26:38:21, 3.45it/s] 11%|█ | 40670/371472 [3:16:55<26:07:25, 3.52it/s] 11%|█ | 40671/371472 [3:16:55<25:26:09, 3.61it/s] 11%|█ | 40672/371472 [3:16:55<25:18:11, 3.63it/s] 11%|█ | 40673/371472 [3:16:56<24:42:44, 3.72it/s] 11%|█ | 40674/371472 [3:16:56<26:43:30, 3.44it/s] 11%|█ | 40675/371472 [3:16:56<27:22:51, 3.36it/s] 11%|█ | 40676/371472 [3:16:57<26:14:51, 3.50it/s] 11%|█ | 40677/371472 [3:16:57<26:04:27, 3.52it/s] 11%|█ | 40678/371472 [3:16:57<25:56:39, 3.54it/s] 11%|█ | 40679/371472 [3:16:57<25:15:06, 3.64it/s] 11%|█ | 40680/371472 [3:16:58<27:08:26, 3.39it/s] {'loss': 4.1847, 'learning_rate': 9.018724816307181e-07, 'epoch': 1.75} + 11%|█ | 40680/371472 [3:16:58<27:08:26, 3.39it/s] 11%|█ | 40681/371472 [3:16:58<26:18:55, 3.49it/s] 11%|█ | 40682/371472 [3:16:58<26:24:23, 3.48it/s] 11%|█ | 40683/371472 [3:16:59<26:39:41, 3.45it/s] 11%|█ | 40684/371472 [3:16:59<26:49:29, 3.43it/s] 11%|█ | 40685/371472 [3:16:59<26:39:38, 3.45it/s] 11%|█ | 40686/371472 [3:16:59<27:02:56, 3.40it/s] 11%|█ | 40687/371472 [3:17:00<27:25:45, 3.35it/s] 11%|█ | 40688/371472 [3:17:00<27:10:10, 3.38it/s] 11%|█ | 40689/371472 [3:17:00<26:52:36, 3.42it/s] 11%|█ | 40690/371472 [3:17:01<26:30:45, 3.47it/s] 11%|█ | 40691/371472 [3:17:01<25:56:30, 3.54it/s] 11%|█ | 40692/371472 [3:17:01<26:37:34, 3.45it/s] 11%|█ | 40693/371472 [3:17:01<26:36:00, 3.45it/s] 11%|█ | 40694/371472 [3:17:02<26:53:57, 3.42it/s] 11%|█ | 40695/371472 [3:17:02<27:11:43, 3.38it/s] 11%|█ | 40696/371472 [3:17:02<27:52:56, 3.30it/s] 11%|█ | 40697/371472 [3:17:03<27:32:20, 3.34it/s] 11%|█ | 40698/371472 [3:17:03<26:52:40, 3.42it/s] 11%|█ | 40699/371472 [3:17:03<26:50:35, 3.42it/s] 11%|█ | 40700/371472 [3:17:03<26:08:28, 3.51it/s] {'loss': 4.3277, 'learning_rate': 9.018239996552393e-07, 'epoch': 1.75} + 11%|█ | 40700/371472 [3:17:03<26:08:28, 3.51it/s] 11%|█ | 40701/371472 [3:17:04<25:30:52, 3.60it/s] 11%|█ | 40702/371472 [3:17:04<25:37:24, 3.59it/s] 11%|█ | 40703/371472 [3:17:04<25:16:33, 3.64it/s] 11%|█ | 40704/371472 [3:17:05<25:35:40, 3.59it/s] 11%|█ | 40705/371472 [3:17:05<24:40:00, 3.72it/s] 11%|█ | 40706/371472 [3:17:05<24:07:09, 3.81it/s] 11%|█ | 40707/371472 [3:17:05<24:51:32, 3.70it/s] 11%|█ | 40708/371472 [3:17:06<25:06:55, 3.66it/s] 11%|█ | 40709/371472 [3:17:06<26:18:24, 3.49it/s] 11%|█ | 40710/371472 [3:17:06<26:05:04, 3.52it/s] 11%|█ | 40711/371472 [3:17:06<25:10:39, 3.65it/s] 11%|█ | 40712/371472 [3:17:07<26:11:10, 3.51it/s] 11%|█ | 40713/371472 [3:17:07<26:18:00, 3.49it/s] 11%|█ | 40714/371472 [3:17:07<25:39:27, 3.58it/s] 11%|█ | 40715/371472 [3:17:08<25:45:04, 3.57it/s] 11%|█ | 40716/371472 [3:17:08<25:26:57, 3.61it/s] 11%|█ | 40717/371472 [3:17:08<25:57:01, 3.54it/s] 11%|█ | 40718/371472 [3:17:08<25:09:19, 3.65it/s] 11%|█ | 40719/371472 [3:17:09<27:17:45, 3.37it/s] 11%|█ | 40720/371472 [3:17:09<26:43:59, 3.44it/s] {'loss': 4.2053, 'learning_rate': 9.017755176797604e-07, 'epoch': 1.75} + 11%|█ | 40720/371472 [3:17:09<26:43:59, 3.44it/s] 11%|█ | 40721/371472 [3:17:09<26:12:21, 3.51it/s] 11%|█ | 40722/371472 [3:17:10<26:44:36, 3.44it/s] 11%|█ | 40723/371472 [3:17:10<26:06:03, 3.52it/s] 11%|█ | 40724/371472 [3:17:10<25:25:58, 3.61it/s] 11%|█ | 40725/371472 [3:17:11<30:01:12, 3.06it/s] 11%|█ | 40726/371472 [3:17:11<28:00:45, 3.28it/s] 11%|█ | 40727/371472 [3:17:11<27:12:24, 3.38it/s] 11%|█ | 40728/371472 [3:17:11<25:56:01, 3.54it/s] 11%|█ | 40729/371472 [3:17:12<25:45:22, 3.57it/s] 11%|█ | 40730/371472 [3:17:12<25:41:23, 3.58it/s] 11%|█ | 40731/371472 [3:17:12<26:18:58, 3.49it/s] 11%|█ | 40732/371472 [3:17:13<25:39:58, 3.58it/s] 11%|█ | 40733/371472 [3:17:13<25:26:39, 3.61it/s] 11%|█ | 40734/371472 [3:17:13<26:49:42, 3.42it/s] 11%|█ | 40735/371472 [3:17:13<27:59:37, 3.28it/s] 11%|█ | 40736/371472 [3:17:14<28:03:16, 3.27it/s] 11%|█ | 40737/371472 [3:17:14<28:12:52, 3.26it/s] 11%|█ | 40738/371472 [3:17:14<26:59:49, 3.40it/s] 11%|█ | 40739/371472 [3:17:15<26:15:53, 3.50it/s] 11%|█ | 40740/371472 [3:17:15<26:22:17, 3.48it/s] {'loss': 4.4161, 'learning_rate': 9.017270357042814e-07, 'epoch': 1.75} + 11%|█ | 40740/371472 [3:17:15<26:22:17, 3.48it/s] 11%|█ | 40741/371472 [3:17:15<25:48:59, 3.56it/s] 11%|█ | 40742/371472 [3:17:15<26:31:48, 3.46it/s] 11%|█ | 40743/371472 [3:17:16<26:15:16, 3.50it/s] 11%|█ | 40744/371472 [3:17:16<25:54:21, 3.55it/s] 11%|█ | 40745/371472 [3:17:16<25:46:36, 3.56it/s] 11%|█ | 40746/371472 [3:17:17<25:41:49, 3.58it/s] 11%|█ | 40747/371472 [3:17:17<25:16:05, 3.64it/s] 11%|█ | 40748/371472 [3:17:17<25:14:20, 3.64it/s] 11%|█ | 40749/371472 [3:17:17<24:49:59, 3.70it/s] 11%|█ | 40750/371472 [3:17:18<25:58:03, 3.54it/s] 11%|█ | 40751/371472 [3:17:18<26:11:40, 3.51it/s] 11%|█ | 40752/371472 [3:17:18<26:38:35, 3.45it/s] 11%|█ | 40753/371472 [3:17:19<26:14:02, 3.50it/s] 11%|█ | 40754/371472 [3:17:19<26:06:05, 3.52it/s] 11%|█ | 40755/371472 [3:17:19<25:44:33, 3.57it/s] 11%|█ | 40756/371472 [3:17:19<26:42:26, 3.44it/s] 11%|█ | 40757/371472 [3:17:20<26:22:57, 3.48it/s] 11%|█ | 40758/371472 [3:17:20<25:52:02, 3.55it/s] 11%|█ | 40759/371472 [3:17:20<25:31:35, 3.60it/s] 11%|█ | 40760/371472 [3:17:21<26:20:49, 3.49it/s] {'loss': 4.4419, 'learning_rate': 9.016785537288025e-07, 'epoch': 1.76} + 11%|█ | 40760/371472 [3:17:21<26:20:49, 3.49it/s] 11%|█ | 40761/371472 [3:17:21<27:48:31, 3.30it/s] 11%|█ | 40762/371472 [3:17:21<26:13:08, 3.50it/s] 11%|█ | 40763/371472 [3:17:21<26:08:18, 3.51it/s] 11%|█ | 40764/371472 [3:17:22<27:28:54, 3.34it/s] 11%|█ | 40765/371472 [3:17:22<26:16:35, 3.50it/s] 11%|█ | 40766/371472 [3:17:22<26:00:11, 3.53it/s] 11%|█ | 40767/371472 [3:17:23<25:57:27, 3.54it/s] 11%|█ | 40768/371472 [3:17:23<27:31:59, 3.34it/s] 11%|█ | 40769/371472 [3:17:23<26:22:39, 3.48it/s] 11%|█ | 40770/371472 [3:17:23<26:20:12, 3.49it/s] 11%|█ | 40771/371472 [3:17:24<27:05:05, 3.39it/s] 11%|█ | 40772/371472 [3:17:24<26:30:44, 3.46it/s] 11%|█ | 40773/371472 [3:17:24<26:23:53, 3.48it/s] 11%|█ | 40774/371472 [3:17:25<29:56:29, 3.07it/s] 11%|█ | 40775/371472 [3:17:25<28:45:59, 3.19it/s] 11%|█ | 40776/371472 [3:17:25<27:14:05, 3.37it/s] 11%|█ | 40777/371472 [3:17:26<26:06:18, 3.52it/s] 11%|█ | 40778/371472 [3:17:26<25:49:27, 3.56it/s] 11%|█ | 40779/371472 [3:17:26<25:21:20, 3.62it/s] 11%|█ | 40780/371472 [3:17:26<25:02:25, 3.67it/s] {'loss': 4.3885, 'learning_rate': 9.016300717533237e-07, 'epoch': 1.76} + 11%|█ | 40780/371472 [3:17:26<25:02:25, 3.67it/s] 11%|█ | 40781/371472 [3:17:27<25:15:53, 3.64it/s] 11%|█ | 40782/371472 [3:17:27<25:55:55, 3.54it/s] 11%|█ | 40783/371472 [3:17:27<25:23:10, 3.62it/s] 11%|█ | 40784/371472 [3:17:28<27:12:48, 3.38it/s] 11%|█ | 40785/371472 [3:17:28<26:03:24, 3.53it/s] 11%|█ | 40786/371472 [3:17:28<27:34:47, 3.33it/s] 11%|█ | 40787/371472 [3:17:28<27:10:28, 3.38it/s] 11%|█ | 40788/371472 [3:17:29<26:32:45, 3.46it/s] 11%|█ | 40789/371472 [3:17:29<27:26:21, 3.35it/s] 11%|█ | 40790/371472 [3:17:29<27:25:20, 3.35it/s] 11%|█ | 40791/371472 [3:17:30<26:09:40, 3.51it/s] 11%|█ | 40792/371472 [3:17:30<25:52:03, 3.55it/s] 11%|█ | 40793/371472 [3:17:30<25:21:41, 3.62it/s] 11%|█ | 40794/371472 [3:17:30<24:50:52, 3.70it/s] 11%|█ | 40795/371472 [3:17:31<26:59:40, 3.40it/s] 11%|█ | 40796/371472 [3:17:31<25:39:23, 3.58it/s] 11%|█ | 40797/371472 [3:17:31<25:51:41, 3.55it/s] 11%|█ | 40798/371472 [3:17:32<27:35:30, 3.33it/s] 11%|█ | 40799/371472 [3:17:32<26:24:54, 3.48it/s] 11%|█ | 40800/371472 [3:17:32<25:38:23, 3.58it/s] {'loss': 4.3935, 'learning_rate': 9.015815897778448e-07, 'epoch': 1.76} + 11%|█ | 40800/371472 [3:17:32<25:38:23, 3.58it/s] 11%|█ | 40801/371472 [3:17:32<25:52:05, 3.55it/s] 11%|█ | 40802/371472 [3:17:33<26:25:46, 3.48it/s] 11%|█ | 40803/371472 [3:17:33<26:14:50, 3.50it/s] 11%|█ | 40804/371472 [3:17:33<26:21:35, 3.48it/s] 11%|█ | 40805/371472 [3:17:34<25:54:12, 3.55it/s] 11%|█ | 40806/371472 [3:17:34<27:49:43, 3.30it/s] 11%|█ | 40807/371472 [3:17:34<27:24:41, 3.35it/s] 11%|█ | 40808/371472 [3:17:34<28:07:15, 3.27it/s] 11%|█ | 40809/371472 [3:17:35<27:11:44, 3.38it/s] 11%|█ | 40810/371472 [3:17:35<29:37:06, 3.10it/s] 11%|█ | 40811/371472 [3:17:35<28:16:08, 3.25it/s] 11%|█ | 40812/371472 [3:17:36<27:12:22, 3.38it/s] 11%|█ | 40813/371472 [3:17:36<27:56:23, 3.29it/s] 11%|█ | 40814/371472 [3:17:36<27:56:39, 3.29it/s] 11%|█ | 40815/371472 [3:17:37<28:49:25, 3.19it/s] 11%|█ | 40816/371472 [3:17:37<27:29:12, 3.34it/s] 11%|█ | 40817/371472 [3:17:37<27:56:16, 3.29it/s] 11%|█ | 40818/371472 [3:17:38<31:27:30, 2.92it/s] 11%|█ | 40819/371472 [3:17:38<29:18:43, 3.13it/s] 11%|█ | 40820/371472 [3:17:38<28:17:14, 3.25it/s] {'loss': 4.4418, 'learning_rate': 9.015331078023659e-07, 'epoch': 1.76} + 11%|█ | 40820/371472 [3:17:38<28:17:14, 3.25it/s] 11%|█ | 40821/371472 [3:17:38<28:16:04, 3.25it/s] 11%|█ | 40822/371472 [3:17:39<27:00:15, 3.40it/s] 11%|█ | 40823/371472 [3:17:39<26:53:16, 3.42it/s] 11%|█ | 40824/371472 [3:17:39<26:21:33, 3.48it/s] 11%|█ | 40825/371472 [3:17:40<27:10:40, 3.38it/s] 11%|█ | 40826/371472 [3:17:40<26:22:30, 3.48it/s] 11%|█ | 40827/371472 [3:17:40<25:20:58, 3.62it/s] 11%|█ | 40828/371472 [3:17:40<25:39:56, 3.58it/s] 11%|█ | 40829/371472 [3:17:41<25:17:55, 3.63it/s] 11%|█ | 40830/371472 [3:17:41<24:49:14, 3.70it/s] 11%|█ | 40831/371472 [3:17:41<25:25:56, 3.61it/s] 11%|█ | 40832/371472 [3:17:42<25:17:04, 3.63it/s] 11%|█ | 40833/371472 [3:17:42<24:57:23, 3.68it/s] 11%|█ | 40834/371472 [3:17:42<27:25:32, 3.35it/s] 11%|█ | 40835/371472 [3:17:42<28:00:12, 3.28it/s] 11%|█ | 40836/371472 [3:17:43<26:26:00, 3.47it/s] 11%|█ | 40837/371472 [3:17:43<25:54:14, 3.55it/s] 11%|█ | 40838/371472 [3:17:43<26:03:30, 3.52it/s] 11%|█ | 40839/371472 [3:17:44<25:57:21, 3.54it/s] 11%|█ | 40840/371472 [3:17:44<25:14:03, 3.64it/s] {'loss': 4.517, 'learning_rate': 9.01484625826887e-07, 'epoch': 1.76} + 11%|█ | 40840/371472 [3:17:44<25:14:03, 3.64it/s] 11%|█ | 40841/371472 [3:17:44<24:45:03, 3.71it/s] 11%|█ | 40842/371472 [3:17:44<24:42:23, 3.72it/s] 11%|█ | 40843/371472 [3:17:45<26:34:56, 3.45it/s] 11%|█ | 40844/371472 [3:17:45<25:40:42, 3.58it/s] 11%|█ | 40845/371472 [3:17:45<25:44:39, 3.57it/s] 11%|█ | 40846/371472 [3:17:45<25:34:32, 3.59it/s] 11%|█ | 40847/371472 [3:17:46<25:32:09, 3.60it/s] 11%|█ | 40848/371472 [3:17:46<25:00:04, 3.67it/s] 11%|█ | 40849/371472 [3:17:46<24:51:35, 3.69it/s] 11%|█ | 40850/371472 [3:17:47<24:38:35, 3.73it/s] 11%|█ | 40851/371472 [3:17:47<24:04:35, 3.81it/s] 11%|█ | 40852/371472 [3:17:47<25:40:52, 3.58it/s] 11%|█ | 40853/371472 [3:17:47<25:36:46, 3.59it/s] 11%|█ | 40854/371472 [3:17:48<25:39:28, 3.58it/s] 11%|█ | 40855/371472 [3:17:48<25:15:47, 3.64it/s] 11%|█ | 40856/371472 [3:17:48<25:06:04, 3.66it/s] 11%|█ | 40857/371472 [3:17:48<25:08:32, 3.65it/s] 11%|█ | 40858/371472 [3:17:49<25:29:48, 3.60it/s] 11%|█ | 40859/371472 [3:17:49<24:53:47, 3.69it/s] 11%|█ | 40860/371472 [3:17:49<25:56:57, 3.54it/s] {'loss': 4.2625, 'learning_rate': 9.014361438514081e-07, 'epoch': 1.76} + 11%|█ | 40860/371472 [3:17:49<25:56:57, 3.54it/s] 11%|█ | 40861/371472 [3:17:50<25:25:21, 3.61it/s] 11%|█ | 40862/371472 [3:17:50<25:32:22, 3.60it/s] 11%|█ | 40863/371472 [3:17:50<25:24:22, 3.61it/s] 11%|█ | 40864/371472 [3:17:50<25:48:24, 3.56it/s] 11%|█ | 40865/371472 [3:17:51<26:01:45, 3.53it/s] 11%|█ | 40866/371472 [3:17:51<25:50:18, 3.55it/s] 11%|█ | 40867/371472 [3:17:51<25:16:18, 3.63it/s] 11%|█ | 40868/371472 [3:17:52<25:02:06, 3.67it/s] 11%|█ | 40869/371472 [3:17:52<25:17:39, 3.63it/s] 11%|█ | 40870/371472 [3:17:52<25:27:48, 3.61it/s] 11%|█ | 40871/371472 [3:17:52<28:02:22, 3.28it/s] 11%|█ | 40872/371472 [3:17:53<30:02:08, 3.06it/s] 11%|█ | 40873/371472 [3:17:53<28:56:26, 3.17it/s] 11%|█ | 40874/371472 [3:17:53<29:55:08, 3.07it/s] 11%|█ | 40875/371472 [3:17:54<28:25:16, 3.23it/s] 11%|█ | 40876/371472 [3:17:54<31:39:27, 2.90it/s] 11%|█ | 40877/371472 [3:17:54<30:26:37, 3.02it/s] 11%|█ | 40878/371472 [3:17:55<29:55:16, 3.07it/s] 11%|█ | 40879/371472 [3:17:55<27:44:31, 3.31it/s] 11%|█ | 40880/371472 [3:17:55<27:09:27, 3.38it/s] {'loss': 4.5596, 'learning_rate': 9.013876618759291e-07, 'epoch': 1.76} + 11%|█ | 40880/371472 [3:17:55<27:09:27, 3.38it/s] 11%|█ | 40881/371472 [3:17:56<28:29:20, 3.22it/s] 11%|█ | 40882/371472 [3:17:56<28:58:16, 3.17it/s] 11%|█ | 40883/371472 [3:17:56<28:43:45, 3.20it/s] 11%|█ | 40884/371472 [3:17:57<27:54:34, 3.29it/s] 11%|█ | 40885/371472 [3:17:57<26:34:25, 3.46it/s] 11%|█ | 40886/371472 [3:17:57<26:13:14, 3.50it/s] 11%|█ | 40887/371472 [3:17:57<27:49:16, 3.30it/s] 11%|█ | 40888/371472 [3:17:58<27:44:45, 3.31it/s] 11%|█ | 40889/371472 [3:17:58<27:48:58, 3.30it/s] 11%|█ | 40890/371472 [3:17:58<27:31:15, 3.34it/s] 11%|█ | 40891/371472 [3:17:59<26:17:32, 3.49it/s] 11%|█ | 40892/371472 [3:17:59<25:42:34, 3.57it/s] 11%|█ | 40893/371472 [3:17:59<25:56:43, 3.54it/s] 11%|█ | 40894/371472 [3:17:59<25:29:07, 3.60it/s] 11%|█ | 40895/371472 [3:18:00<26:08:57, 3.51it/s] 11%|█ | 40896/371472 [3:18:00<25:43:11, 3.57it/s] 11%|█ | 40897/371472 [3:18:00<25:04:04, 3.66it/s] 11%|█ | 40898/371472 [3:18:01<24:29:52, 3.75it/s] 11%|█ | 40899/371472 [3:18:01<24:36:30, 3.73it/s] 11%|█ | 40900/371472 [3:18:01<24:47:08, 3.70it/s] {'loss': 4.3313, 'learning_rate': 9.013391799004503e-07, 'epoch': 1.76} + 11%|█ | 40900/371472 [3:18:01<24:47:08, 3.70it/s] 11%|█ | 40901/371472 [3:18:01<25:22:05, 3.62it/s] 11%|█ | 40902/371472 [3:18:02<25:04:46, 3.66it/s] 11%|█ | 40903/371472 [3:18:02<25:04:42, 3.66it/s] 11%|█ | 40904/371472 [3:18:02<24:45:10, 3.71it/s] 11%|█ | 40905/371472 [3:18:02<26:13:01, 3.50it/s] 11%|█ | 40906/371472 [3:18:03<25:26:58, 3.61it/s] 11%|█ | 40907/371472 [3:18:03<25:18:27, 3.63it/s] 11%|█ | 40908/371472 [3:18:03<26:07:39, 3.51it/s] 11%|█ | 40909/371472 [3:18:04<26:53:03, 3.42it/s] 11%|█ | 40910/371472 [3:18:04<26:48:34, 3.42it/s] 11%|█ | 40911/371472 [3:18:04<26:18:05, 3.49it/s] 11%|█ | 40912/371472 [3:18:04<25:19:42, 3.63it/s] 11%|█ | 40913/371472 [3:18:05<25:17:50, 3.63it/s] 11%|█ | 40914/371472 [3:18:05<25:17:11, 3.63it/s] 11%|█ | 40915/371472 [3:18:05<26:17:39, 3.49it/s] 11%|█ | 40916/371472 [3:18:06<25:46:23, 3.56it/s] 11%|█ | 40917/371472 [3:18:06<26:41:57, 3.44it/s] 11%|█ | 40918/371472 [3:18:06<26:38:05, 3.45it/s] 11%|█ | 40919/371472 [3:18:06<25:47:34, 3.56it/s] 11%|█ | 40920/371472 [3:18:07<24:51:37, 3.69it/s] {'loss': 4.4232, 'learning_rate': 9.012906979249714e-07, 'epoch': 1.76} + 11%|█ | 40920/371472 [3:18:07<24:51:37, 3.69it/s] 11%|█ | 40921/371472 [3:18:07<25:17:32, 3.63it/s] 11%|█ | 40922/371472 [3:18:07<26:08:49, 3.51it/s] 11%|█ | 40923/371472 [3:18:08<26:29:58, 3.46it/s] 11%|█ | 40924/371472 [3:18:08<27:12:07, 3.38it/s] 11%|█ | 40925/371472 [3:18:08<26:39:43, 3.44it/s] 11%|█ | 40926/371472 [3:18:08<27:13:11, 3.37it/s] 11%|█ | 40927/371472 [3:18:09<27:13:07, 3.37it/s] 11%|█ | 40928/371472 [3:18:09<27:19:50, 3.36it/s] 11%|█ | 40929/371472 [3:18:09<28:09:37, 3.26it/s] 11%|█ | 40930/371472 [3:18:10<28:25:13, 3.23it/s] 11%|█ | 40931/371472 [3:18:10<27:59:47, 3.28it/s] 11%|█ | 40932/371472 [3:18:10<26:57:20, 3.41it/s] 11%|█ | 40933/371472 [3:18:11<26:09:55, 3.51it/s] 11%|█ | 40934/371472 [3:18:11<27:01:31, 3.40it/s] 11%|█ | 40935/371472 [3:18:11<29:47:24, 3.08it/s] 11%|█ | 40936/371472 [3:18:11<27:30:22, 3.34it/s] 11%|█ | 40937/371472 [3:18:12<26:41:53, 3.44it/s] 11%|█ | 40938/371472 [3:18:12<26:41:02, 3.44it/s] 11%|█ | 40939/371472 [3:18:12<27:58:39, 3.28it/s] 11%|█ | 40940/371472 [3:18:13<31:15:05, 2.94it/s] {'loss': 4.2839, 'learning_rate': 9.012422159494926e-07, 'epoch': 1.76} + 11%|█ | 40940/371472 [3:18:13<31:15:05, 2.94it/s] 11%|█ | 40941/371472 [3:18:13<29:43:22, 3.09it/s] 11%|█ | 40942/371472 [3:18:13<29:46:05, 3.08it/s] 11%|█ | 40943/371472 [3:18:14<28:07:25, 3.26it/s] 11%|█ | 40944/371472 [3:18:14<26:51:21, 3.42it/s] 11%|█ | 40945/371472 [3:18:14<26:28:31, 3.47it/s] 11%|█ | 40946/371472 [3:18:15<26:14:35, 3.50it/s] 11%|█ | 40947/371472 [3:18:15<25:42:44, 3.57it/s] 11%|█ | 40948/371472 [3:18:15<26:05:05, 3.52it/s] 11%|█ | 40949/371472 [3:18:15<25:27:12, 3.61it/s] 11%|█ | 40950/371472 [3:18:16<25:19:49, 3.62it/s] 11%|█ | 40951/371472 [3:18:16<24:38:17, 3.73it/s] 11%|█ | 40952/371472 [3:18:16<25:13:09, 3.64it/s] 11%|█ | 40953/371472 [3:18:16<24:50:11, 3.70it/s] 11%|█ | 40954/371472 [3:18:17<24:57:46, 3.68it/s] 11%|█ | 40955/371472 [3:18:17<25:40:06, 3.58it/s] 11%|█ | 40956/371472 [3:18:17<26:19:25, 3.49it/s] 11%|█ | 40957/371472 [3:18:18<26:11:34, 3.51it/s] 11%|█ | 40958/371472 [3:18:18<26:33:23, 3.46it/s] 11%|█ | 40959/371472 [3:18:18<25:46:20, 3.56it/s] 11%|█ | 40960/371472 [3:18:18<26:57:02, 3.41it/s] {'loss': 4.5854, 'learning_rate': 9.011937339740136e-07, 'epoch': 1.76} + 11%|█ | 40960/371472 [3:18:18<26:57:02, 3.41it/s] 11%|█ | 40961/371472 [3:18:19<25:47:44, 3.56it/s] 11%|█ | 40962/371472 [3:18:19<26:02:26, 3.53it/s] 11%|█ | 40963/371472 [3:18:19<25:28:22, 3.60it/s] 11%|█ | 40964/371472 [3:18:20<25:46:50, 3.56it/s] 11%|█ | 40965/371472 [3:18:20<25:45:16, 3.56it/s] 11%|█ | 40966/371472 [3:18:20<25:57:07, 3.54it/s] 11%|█ | 40967/371472 [3:18:20<26:33:24, 3.46it/s] 11%|█ | 40968/371472 [3:18:21<26:10:57, 3.51it/s] 11%|█ | 40969/371472 [3:18:21<25:53:26, 3.55it/s] 11%|█ | 40970/371472 [3:18:21<25:46:01, 3.56it/s] 11%|█ | 40971/371472 [3:18:22<25:59:31, 3.53it/s] 11%|█ | 40972/371472 [3:18:22<25:01:51, 3.67it/s] 11%|█ | 40973/371472 [3:18:22<24:36:51, 3.73it/s] 11%|█ | 40974/371472 [3:18:22<26:30:30, 3.46it/s] 11%|█ | 40975/371472 [3:18:23<26:36:26, 3.45it/s] 11%|█ | 40976/371472 [3:18:23<27:03:49, 3.39it/s] 11%|█ | 40977/371472 [3:18:23<26:36:04, 3.45it/s] 11%|█ | 40978/371472 [3:18:24<26:13:34, 3.50it/s] 11%|█ | 40979/371472 [3:18:24<28:12:09, 3.26it/s] 11%|█ | 40980/371472 [3:18:24<27:05:49, 3.39it/s] {'loss': 4.561, 'learning_rate': 9.011452519985347e-07, 'epoch': 1.77} + 11%|█ | 40980/371472 [3:18:24<27:05:49, 3.39it/s] 11%|█ | 40981/371472 [3:18:24<26:02:46, 3.52it/s] 11%|█ | 40982/371472 [3:18:25<25:46:01, 3.56it/s] 11%|█ | 40983/371472 [3:18:25<24:54:35, 3.69it/s] 11%|█ | 40984/371472 [3:18:25<27:01:24, 3.40it/s] 11%|█ | 40985/371472 [3:18:26<27:21:54, 3.35it/s] 11%|█ | 40986/371472 [3:18:26<26:09:55, 3.51it/s] 11%|█ | 40987/371472 [3:18:26<25:49:05, 3.56it/s] 11%|█ | 40988/371472 [3:18:26<26:42:25, 3.44it/s] 11%|█ | 40989/371472 [3:18:27<28:28:32, 3.22it/s] 11%|█ | 40990/371472 [3:18:27<27:17:41, 3.36it/s] 11%|█ | 40991/371472 [3:18:27<26:05:39, 3.52it/s] 11%|█ | 40992/371472 [3:18:28<27:25:25, 3.35it/s] 11%|█ | 40993/371472 [3:18:28<26:49:04, 3.42it/s] 11%|█ | 40994/371472 [3:18:28<26:03:38, 3.52it/s] 11%|█ | 40995/371472 [3:18:29<29:21:39, 3.13it/s] 11%|█ | 40996/371472 [3:18:29<28:09:10, 3.26it/s] 11%|█ | 40997/371472 [3:18:29<27:18:03, 3.36it/s] 11%|█ | 40998/371472 [3:18:29<27:38:37, 3.32it/s] 11%|█ | 40999/371472 [3:18:30<28:38:04, 3.21it/s] 11%|█ | 41000/371472 [3:18:30<28:11:03, 3.26it/s] {'loss': 4.4233, 'learning_rate': 9.010967700230557e-07, 'epoch': 1.77} + 11%|█ | 41000/371472 [3:18:30<28:11:03, 3.26it/s] 11%|█ | 41001/371472 [3:18:30<28:25:59, 3.23it/s] 11%|█ | 41002/371472 [3:18:31<27:57:49, 3.28it/s] 11%|█ | 41003/371472 [3:18:31<31:38:28, 2.90it/s] 11%|█ | 41004/371472 [3:18:31<29:21:06, 3.13it/s] 11%|█ | 41005/371472 [3:18:32<29:49:29, 3.08it/s] 11%|█ | 41006/371472 [3:18:32<28:28:53, 3.22it/s] 11%|█ | 41007/371472 [3:18:32<27:24:19, 3.35it/s] 11%|█ | 41008/371472 [3:18:33<27:17:44, 3.36it/s] 11%|█ | 41009/371472 [3:18:33<26:46:21, 3.43it/s] 11%|█ | 41010/371472 [3:18:33<27:04:22, 3.39it/s] 11%|█ | 41011/371472 [3:18:33<27:37:07, 3.32it/s] 11%|█ | 41012/371472 [3:18:34<26:36:34, 3.45it/s] 11%|█ | 41013/371472 [3:18:34<25:44:00, 3.57it/s] 11%|█ | 41014/371472 [3:18:34<25:13:33, 3.64it/s] 11%|█ | 41015/371472 [3:18:35<26:00:37, 3.53it/s] 11%|█ | 41016/371472 [3:18:35<26:43:09, 3.44it/s] 11%|█ | 41017/371472 [3:18:35<27:55:43, 3.29it/s] 11%|█ | 41018/371472 [3:18:35<27:08:52, 3.38it/s] 11%|█ | 41019/371472 [3:18:36<26:51:31, 3.42it/s] 11%|█ | 41020/371472 [3:18:36<25:30:12, 3.60it/s] {'loss': 4.2088, 'learning_rate': 9.01048288047577e-07, 'epoch': 1.77} + 11%|█ | 41020/371472 [3:18:36<25:30:12, 3.60it/s] 11%|█ | 41021/371472 [3:18:36<26:27:44, 3.47it/s] 11%|█ | 41022/371472 [3:18:37<27:33:37, 3.33it/s] 11%|█ | 41023/371472 [3:18:37<26:30:47, 3.46it/s] 11%|█ | 41024/371472 [3:18:37<26:46:05, 3.43it/s] 11%|█ | 41025/371472 [3:18:37<26:51:49, 3.42it/s] 11%|█ | 41026/371472 [3:18:38<26:02:56, 3.52it/s] 11%|█ | 41027/371472 [3:18:38<25:22:14, 3.62it/s] 11%|█ | 41028/371472 [3:18:38<25:06:34, 3.66it/s] 11%|█ | 41029/371472 [3:18:39<25:19:47, 3.62it/s] 11%|█ | 41030/371472 [3:18:39<24:44:33, 3.71it/s] 11%|█ | 41031/371472 [3:18:39<25:19:47, 3.62it/s] 11%|█ | 41032/371472 [3:18:39<25:43:22, 3.57it/s] 11%|█ | 41033/371472 [3:18:40<26:00:57, 3.53it/s] 11%|█ | 41034/371472 [3:18:40<25:03:34, 3.66it/s] 11%|█ | 41035/371472 [3:18:40<25:21:53, 3.62it/s] 11%|█ | 41036/371472 [3:18:41<25:41:23, 3.57it/s] 11%|█ | 41037/371472 [3:18:41<25:11:48, 3.64it/s] 11%|█ | 41038/371472 [3:18:41<25:17:32, 3.63it/s] 11%|█ | 41039/371472 [3:18:41<25:07:07, 3.65it/s] 11%|█ | 41040/371472 [3:18:42<26:05:04, 3.52it/s] {'loss': 4.473, 'learning_rate': 9.00999806072098e-07, 'epoch': 1.77} + 11%|█ | 41040/371472 [3:18:42<26:05:04, 3.52it/s] 11%|█ | 41041/371472 [3:18:42<25:49:31, 3.55it/s] 11%|█ | 41042/371472 [3:18:42<25:19:38, 3.62it/s] 11%|█ | 41043/371472 [3:18:42<26:06:16, 3.52it/s] 11%|█ | 41044/371472 [3:18:43<26:16:20, 3.49it/s] 11%|█ | 41045/371472 [3:18:43<26:35:44, 3.45it/s] 11%|█ | 41046/371472 [3:18:43<27:03:21, 3.39it/s] 11%|█ | 41047/371472 [3:18:44<26:24:12, 3.48it/s] 11%|█ | 41048/371472 [3:18:44<26:09:02, 3.51it/s] 11%|█ | 41049/371472 [3:18:44<25:53:18, 3.55it/s] 11%|█ | 41050/371472 [3:18:44<26:13:20, 3.50it/s] 11%|█ | 41051/371472 [3:18:45<26:27:23, 3.47it/s] 11%|█ | 41052/371472 [3:18:45<25:53:52, 3.54it/s] 11%|█ | 41053/371472 [3:18:45<26:46:14, 3.43it/s] 11%|█ | 41054/371472 [3:18:46<26:05:53, 3.52it/s] 11%|█ | 41055/371472 [3:18:46<26:53:52, 3.41it/s] 11%|█ | 41056/371472 [3:18:46<25:59:37, 3.53it/s] 11%|█ | 41057/371472 [3:18:46<25:34:19, 3.59it/s] 11%|█ | 41058/371472 [3:18:47<25:22:39, 3.62it/s] 11%|█ | 41059/371472 [3:18:47<25:28:30, 3.60it/s] 11%|█ | 41060/371472 [3:18:47<24:46:35, 3.70it/s] {'loss': 4.3715, 'learning_rate': 9.009513240966192e-07, 'epoch': 1.77} + 11%|█ | 41060/371472 [3:18:47<24:46:35, 3.70it/s] 11%|█ | 41061/371472 [3:18:48<26:07:08, 3.51it/s] 11%|█ | 41062/371472 [3:18:48<25:42:11, 3.57it/s] 11%|█ | 41063/371472 [3:18:48<26:05:14, 3.52it/s] 11%|█ | 41064/371472 [3:18:48<25:47:58, 3.56it/s] 11%|█ | 41065/371472 [3:18:49<25:45:48, 3.56it/s] 11%|█ | 41066/371472 [3:18:49<25:36:30, 3.58it/s] 11%|█ | 41067/371472 [3:18:49<25:07:41, 3.65it/s] 11%|█ | 41068/371472 [3:18:50<26:06:47, 3.51it/s] 11%|█ | 41069/371472 [3:18:50<25:50:52, 3.55it/s] 11%|█ | 41070/371472 [3:18:50<25:11:52, 3.64it/s] 11%|█ | 41071/371472 [3:18:50<26:08:52, 3.51it/s] 11%|█ | 41072/371472 [3:18:51<26:23:50, 3.48it/s] 11%|█ | 41073/371472 [3:18:51<26:07:44, 3.51it/s] 11%|█ | 41074/371472 [3:18:51<25:55:46, 3.54it/s] 11%|█ | 41075/371472 [3:18:52<25:18:28, 3.63it/s] 11%|█ | 41076/371472 [3:18:52<25:24:20, 3.61it/s] 11%|█ | 41077/371472 [3:18:52<25:47:00, 3.56it/s] 11%|█ | 41078/371472 [3:18:52<26:34:04, 3.45it/s] 11%|█ | 41079/371472 [3:18:53<26:11:00, 3.51it/s] 11%|█ | 41080/371472 [3:18:53<26:45:07, 3.43it/s] {'loss': 4.3307, 'learning_rate': 9.009028421211403e-07, 'epoch': 1.77} + 11%|█ | 41080/371472 [3:18:53<26:45:07, 3.43it/s] 11%|█ | 41081/371472 [3:18:53<26:16:52, 3.49it/s] 11%|█ | 41082/371472 [3:18:54<26:04:03, 3.52it/s] 11%|█ | 41083/371472 [3:18:54<25:45:54, 3.56it/s] 11%|█ | 41084/371472 [3:18:54<25:25:51, 3.61it/s] 11%|█ | 41085/371472 [3:18:54<25:31:13, 3.60it/s] 11%|█ | 41086/371472 [3:18:55<24:54:16, 3.69it/s] 11%|█ | 41087/371472 [3:18:55<24:23:26, 3.76it/s] 11%|█ | 41088/371472 [3:18:55<24:18:51, 3.77it/s] 11%|█ | 41089/371472 [3:18:55<25:37:57, 3.58it/s] 11%|█ | 41090/371472 [3:18:56<25:21:54, 3.62it/s] 11%|█ | 41091/371472 [3:18:56<25:32:23, 3.59it/s] 11%|█ | 41092/371472 [3:18:56<25:02:54, 3.66it/s] 11%|█ | 41093/371472 [3:18:56<24:23:19, 3.76it/s] 11%|█ | 41094/371472 [3:18:57<24:14:27, 3.79it/s] 11%|█ | 41095/371472 [3:18:57<23:51:56, 3.85it/s] 11%|█ | 41096/371472 [3:18:57<25:58:04, 3.53it/s] 11%|█ | 41097/371472 [3:18:58<25:50:02, 3.55it/s] 11%|█ | 41098/371472 [3:18:58<27:46:18, 3.30it/s] 11%|█ | 41099/371472 [3:18:58<28:24:47, 3.23it/s] 11%|█ | 41100/371472 [3:18:59<26:47:05, 3.43it/s] {'loss': 4.4411, 'learning_rate': 9.008543601456614e-07, 'epoch': 1.77} + 11%|█ | 41100/371472 [3:18:59<26:47:05, 3.43it/s] 11%|█ | 41101/371472 [3:18:59<26:35:09, 3.45it/s] 11%|█ | 41102/371472 [3:18:59<27:23:09, 3.35it/s] 11%|█ | 41103/371472 [3:18:59<26:08:59, 3.51it/s] 11%|█ | 41104/371472 [3:19:00<26:37:19, 3.45it/s] 11%|█ | 41105/371472 [3:19:00<26:39:26, 3.44it/s] 11%|█ | 41106/371472 [3:19:00<26:45:05, 3.43it/s] 11%|█ | 41107/371472 [3:19:01<26:55:46, 3.41it/s] 11%|█ | 41108/371472 [3:19:01<27:17:16, 3.36it/s] 11%|█ | 41109/371472 [3:19:01<27:05:30, 3.39it/s] 11%|█ | 41110/371472 [3:19:01<26:44:59, 3.43it/s] 11%|█ | 41111/371472 [3:19:02<26:40:47, 3.44it/s] 11%|█ | 41112/371472 [3:19:02<25:49:52, 3.55it/s] 11%|█ | 41113/371472 [3:19:02<26:02:51, 3.52it/s] 11%|█ | 41114/371472 [3:19:03<24:56:16, 3.68it/s] 11%|█ | 41115/371472 [3:19:03<27:21:54, 3.35it/s] 11%|█ | 41116/371472 [3:19:03<26:54:19, 3.41it/s] 11%|█ | 41117/371472 [3:19:03<26:18:33, 3.49it/s] 11%|█ | 41118/371472 [3:19:04<25:28:25, 3.60it/s] 11%|█ | 41119/371472 [3:19:04<25:59:26, 3.53it/s] 11%|█ | 41120/371472 [3:19:04<25:44:02, 3.57it/s] {'loss': 4.5383, 'learning_rate': 9.008058781701824e-07, 'epoch': 1.77} + 11%|█ | 41120/371472 [3:19:04<25:44:02, 3.57it/s] 11%|█ | 41121/371472 [3:19:05<25:48:30, 3.56it/s] 11%|█ | 41122/371472 [3:19:05<25:13:26, 3.64it/s] 11%|█ | 41123/371472 [3:19:05<25:21:00, 3.62it/s] 11%|█ | 41124/371472 [3:19:05<25:08:18, 3.65it/s] 11%|█ | 41125/371472 [3:19:06<26:59:10, 3.40it/s] 11%|█ | 41126/371472 [3:19:06<26:00:10, 3.53it/s] 11%|█ | 41127/371472 [3:19:06<26:30:21, 3.46it/s] 11%|█ | 41128/371472 [3:19:07<29:01:14, 3.16it/s] 11%|█ | 41129/371472 [3:19:07<28:04:37, 3.27it/s] 11%|█ | 41130/371472 [3:19:07<27:17:00, 3.36it/s] 11%|█ | 41131/371472 [3:19:08<29:25:27, 3.12it/s] 11%|█ | 41132/371472 [3:19:08<27:38:34, 3.32it/s] 11%|█ | 41133/371472 [3:19:08<27:41:14, 3.31it/s] 11%|█ | 41134/371472 [3:19:08<27:09:50, 3.38it/s] 11%|█ | 41135/371472 [3:19:09<26:12:54, 3.50it/s] 11%|█ | 41136/371472 [3:19:09<25:25:27, 3.61it/s] 11%|█ | 41137/371472 [3:19:09<25:13:34, 3.64it/s] 11%|█ | 41138/371472 [3:19:10<27:28:21, 3.34it/s] 11%|█ | 41139/371472 [3:19:10<27:09:11, 3.38it/s] 11%|█ | 41140/371472 [3:19:10<26:36:23, 3.45it/s] {'loss': 4.4853, 'learning_rate': 9.007573961947035e-07, 'epoch': 1.77} + 11%|█ | 41140/371472 [3:19:10<26:36:23, 3.45it/s] 11%|█ | 41141/371472 [3:19:10<26:20:05, 3.48it/s] 11%|█ | 41142/371472 [3:19:11<26:50:29, 3.42it/s] 11%|█ | 41143/371472 [3:19:11<26:26:18, 3.47it/s] 11%|█ | 41144/371472 [3:19:11<26:28:32, 3.47it/s] 11%|█ | 41145/371472 [3:19:12<26:51:09, 3.42it/s] 11%|█ | 41146/371472 [3:19:12<26:47:02, 3.43it/s] 11%|█ | 41147/371472 [3:19:12<26:19:33, 3.49it/s] 11%|█ | 41148/371472 [3:19:12<26:19:19, 3.49it/s] 11%|█ | 41149/371472 [3:19:13<25:35:49, 3.58it/s] 11%|█ | 41150/371472 [3:19:13<26:14:37, 3.50it/s] 11%|█ | 41151/371472 [3:19:13<26:55:36, 3.41it/s] 11%|█ | 41152/371472 [3:19:14<25:58:47, 3.53it/s] 11%|█ | 41153/371472 [3:19:14<25:39:39, 3.58it/s] 11%|█ | 41154/371472 [3:19:14<24:53:49, 3.69it/s] 11%|█ | 41155/371472 [3:19:14<24:21:57, 3.77it/s] 11%|█ | 41156/371472 [3:19:15<25:44:14, 3.57it/s] 11%|█ | 41157/371472 [3:19:15<25:59:06, 3.53it/s] 11%|█ | 41158/371472 [3:19:15<30:36:25, 3.00it/s] 11%|█ | 41159/371472 [3:19:16<30:55:19, 2.97it/s] 11%|█ | 41160/371472 [3:19:16<31:40:29, 2.90it/s] {'loss': 4.2566, 'learning_rate': 9.007089142192247e-07, 'epoch': 1.77} + 11%|█ | 41160/371472 [3:19:16<31:40:29, 2.90it/s] 11%|█ | 41161/371472 [3:19:16<30:11:15, 3.04it/s] 11%|█ | 41162/371472 [3:19:17<29:23:44, 3.12it/s] 11%|█ | 41163/371472 [3:19:17<28:52:32, 3.18it/s] 11%|█ | 41164/371472 [3:19:17<29:07:41, 3.15it/s] 11%|█ | 41165/371472 [3:19:18<27:12:32, 3.37it/s] 11%|█ | 41166/371472 [3:19:18<27:10:03, 3.38it/s] 11%|█ | 41167/371472 [3:19:18<26:22:21, 3.48it/s] 11%|█ | 41168/371472 [3:19:18<26:20:36, 3.48it/s] 11%|█ | 41169/371472 [3:19:19<25:55:22, 3.54it/s] 11%|█ | 41170/371472 [3:19:19<27:56:04, 3.28it/s] 11%|█ | 41171/371472 [3:19:19<28:33:57, 3.21it/s] 11%|█ | 41172/371472 [3:19:20<28:04:23, 3.27it/s] 11%|█ | 41173/371472 [3:19:20<26:39:01, 3.44it/s] 11%|█ | 41174/371472 [3:19:20<27:26:54, 3.34it/s] 11%|█ | 41175/371472 [3:19:21<27:09:01, 3.38it/s] 11%|█ | 41176/371472 [3:19:21<26:05:23, 3.52it/s] 11%|█ | 41177/371472 [3:19:21<26:05:06, 3.52it/s] 11%|█ | 41178/371472 [3:19:21<25:19:52, 3.62it/s] 11%|█ | 41179/371472 [3:19:22<26:53:01, 3.41it/s] 11%|█ | 41180/371472 [3:19:22<26:52:01, 3.41it/s] {'loss': 4.3662, 'learning_rate': 9.006604322437459e-07, 'epoch': 1.77} + 11%|█ | 41180/371472 [3:19:22<26:52:01, 3.41it/s] 11%|█ | 41181/371472 [3:19:22<27:11:47, 3.37it/s] 11%|█ | 41182/371472 [3:19:23<26:06:13, 3.51it/s] 11%|█ | 41183/371472 [3:19:23<25:13:36, 3.64it/s] 11%|█ | 41184/371472 [3:19:23<25:01:18, 3.67it/s] 11%|█ | 41185/371472 [3:19:23<25:13:27, 3.64it/s] 11%|█ | 41186/371472 [3:19:24<25:15:18, 3.63it/s] 11%|█ | 41187/371472 [3:19:24<25:42:57, 3.57it/s] 11%|█ | 41188/371472 [3:19:24<25:56:05, 3.54it/s] 11%|█ | 41189/371472 [3:19:24<25:44:24, 3.56it/s] 11%|█ | 41190/371472 [3:19:25<25:10:31, 3.64it/s] 11%|█ | 41191/371472 [3:19:25<27:49:27, 3.30it/s] 11%|█ | 41192/371472 [3:19:25<27:43:26, 3.31it/s] 11%|█ | 41193/371472 [3:19:26<28:01:02, 3.27it/s] 11%|█ | 41194/371472 [3:19:26<27:05:23, 3.39it/s] 11%|█ | 41195/371472 [3:19:26<27:51:11, 3.29it/s] 11%|█ | 41196/371472 [3:19:27<26:58:14, 3.40it/s] 11%|█ | 41197/371472 [3:19:27<25:56:22, 3.54it/s] 11%|█ | 41198/371472 [3:19:27<25:53:21, 3.54it/s] 11%|█ | 41199/371472 [3:19:27<25:15:53, 3.63it/s] 11%|█ | 41200/371472 [3:19:28<26:12:10, 3.50it/s] {'loss': 4.293, 'learning_rate': 9.006119502682669e-07, 'epoch': 1.77} + 11%|█ | 41200/371472 [3:19:28<26:12:10, 3.50it/s] 11%|█ | 41201/371472 [3:19:28<26:58:25, 3.40it/s] 11%|█ | 41202/371472 [3:19:28<27:08:44, 3.38it/s] 11%|█ | 41203/371472 [3:19:29<26:59:35, 3.40it/s] 11%|█ | 41204/371472 [3:19:29<26:05:21, 3.52it/s] 11%|█ | 41205/371472 [3:19:29<26:43:49, 3.43it/s] 11%|█ | 41206/371472 [3:19:29<25:42:33, 3.57it/s] 11%|█ | 41207/371472 [3:19:30<26:30:34, 3.46it/s] 11%|█ | 41208/371472 [3:19:30<26:07:56, 3.51it/s] 11%|█ | 41209/371472 [3:19:30<25:18:08, 3.63it/s] 11%|█ | 41210/371472 [3:19:31<26:14:04, 3.50it/s] 11%|█ | 41211/371472 [3:19:31<27:25:51, 3.34it/s] 11%|█ | 41212/371472 [3:19:31<26:17:20, 3.49it/s] 11%|█ | 41213/371472 [3:19:31<25:38:07, 3.58it/s] 11%|█ | 41214/371472 [3:19:32<25:53:21, 3.54it/s] 11%|█ | 41215/371472 [3:19:32<25:52:45, 3.54it/s] 11%|█ | 41216/371472 [3:19:32<25:02:40, 3.66it/s] 11%|█ | 41217/371472 [3:19:33<25:51:49, 3.55it/s] 11%|█ | 41218/371472 [3:19:33<29:59:40, 3.06it/s] 11%|█ | 41219/371472 [3:19:33<28:54:09, 3.17it/s] 11%|█ | 41220/371472 [3:19:34<29:22:12, 3.12it/s] {'loss': 4.442, 'learning_rate': 9.00563468292788e-07, 'epoch': 1.78} + 11%|█ | 41220/371472 [3:19:34<29:22:12, 3.12it/s] 11%|█ | 41221/371472 [3:19:34<27:36:38, 3.32it/s] 11%|█ | 41222/371472 [3:19:34<27:37:37, 3.32it/s] 11%|█ | 41223/371472 [3:19:34<27:14:09, 3.37it/s] 11%|█ | 41224/371472 [3:19:35<27:28:18, 3.34it/s] 11%|█ | 41225/371472 [3:19:35<28:10:00, 3.26it/s] 11%|█ | 41226/371472 [3:19:35<26:54:18, 3.41it/s] 11%|█ | 41227/371472 [3:19:36<26:36:37, 3.45it/s] 11%|█ | 41228/371472 [3:19:36<26:07:31, 3.51it/s] 11%|█ | 41229/371472 [3:19:36<26:23:02, 3.48it/s] 11%|█ | 41230/371472 [3:19:36<25:29:36, 3.60it/s] 11%|█ | 41231/371472 [3:19:37<26:24:57, 3.47it/s] 11%|█ | 41232/371472 [3:19:37<25:42:12, 3.57it/s] 11%|█ | 41233/371472 [3:19:37<25:05:29, 3.66it/s] 11%|█ | 41234/371472 [3:19:38<24:28:09, 3.75it/s] 11%|█ | 41235/371472 [3:19:38<25:09:53, 3.65it/s] 11%|█ | 41236/371472 [3:19:38<25:47:03, 3.56it/s] 11%|█ | 41237/371472 [3:19:38<28:05:51, 3.26it/s] 11%|█ | 41238/371472 [3:19:39<27:27:04, 3.34it/s] 11%|█ | 41239/371472 [3:19:39<26:28:21, 3.47it/s] 11%|█ | 41240/371472 [3:19:39<26:41:19, 3.44it/s] {'loss': 4.4136, 'learning_rate': 9.005149863173091e-07, 'epoch': 1.78} + 11%|█ | 41240/371472 [3:19:39<26:41:19, 3.44it/s] 11%|█ | 41241/371472 [3:19:40<26:20:07, 3.48it/s] 11%|█ | 41242/371472 [3:19:40<28:32:04, 3.21it/s] 11%|█ | 41243/371472 [3:19:40<27:02:35, 3.39it/s] 11%|█ | 41244/371472 [3:19:40<26:56:53, 3.40it/s] 11%|█ | 41245/371472 [3:19:41<25:45:54, 3.56it/s] 11%|█ | 41246/371472 [3:19:41<26:08:38, 3.51it/s] 11%|█ | 41247/371472 [3:19:41<26:18:10, 3.49it/s] 11%|█ | 41248/371472 [3:19:42<25:33:58, 3.59it/s] 11%|█ | 41249/371472 [3:19:42<26:27:34, 3.47it/s] 11%|█ | 41250/371472 [3:19:42<26:21:04, 3.48it/s] 11%|█ | 41251/371472 [3:19:42<25:16:06, 3.63it/s] 11%|█ | 41252/371472 [3:19:43<24:37:19, 3.73it/s] 11%|█ | 41253/371472 [3:19:43<25:44:14, 3.56it/s] 11%|█ | 41254/371472 [3:19:43<25:32:12, 3.59it/s] 11%|█ | 41255/371472 [3:19:44<25:07:07, 3.65it/s] 11%|█ | 41256/371472 [3:19:44<25:48:00, 3.56it/s] 11%|█ | 41257/371472 [3:19:44<27:26:56, 3.34it/s] 11%|█ | 41258/371472 [3:19:44<28:09:27, 3.26it/s] 11%|█ | 41259/371472 [3:19:45<27:53:02, 3.29it/s] 11%|█ | 41260/371472 [3:19:45<27:18:54, 3.36it/s] {'loss': 4.4029, 'learning_rate': 9.004665043418301e-07, 'epoch': 1.78} + 11%|█ | 41260/371472 [3:19:45<27:18:54, 3.36it/s] 11%|█ | 41261/371472 [3:19:45<27:02:31, 3.39it/s] 11%|█ | 41262/371472 [3:19:46<27:34:06, 3.33it/s] 11%|█ | 41263/371472 [3:19:46<26:28:34, 3.46it/s] 11%|█ | 41264/371472 [3:19:46<25:36:34, 3.58it/s] 11%|█ | 41265/371472 [3:19:47<26:41:44, 3.44it/s] 11%|█ | 41266/371472 [3:19:47<25:44:15, 3.56it/s] 11%|█ | 41267/371472 [3:19:47<26:03:06, 3.52it/s] 11%|█ | 41268/371472 [3:19:47<28:33:46, 3.21it/s] 11%|█ | 41269/371472 [3:19:48<27:54:38, 3.29it/s] 11%|█ | 41270/371472 [3:19:48<26:30:32, 3.46it/s] 11%|█ | 41271/371472 [3:19:48<26:09:02, 3.51it/s] 11%|█ | 41272/371472 [3:19:49<27:44:09, 3.31it/s] 11%|█ | 41273/371472 [3:19:49<27:13:03, 3.37it/s] 11%|█ | 41274/371472 [3:19:49<26:38:42, 3.44it/s] 11%|█ | 41275/371472 [3:19:49<27:35:50, 3.32it/s] 11%|█ | 41276/371472 [3:19:50<26:01:40, 3.52it/s] 11%|█ | 41277/371472 [3:19:50<26:08:19, 3.51it/s] 11%|█ | 41278/371472 [3:19:50<26:26:30, 3.47it/s] 11%|█ | 41279/371472 [3:19:51<26:45:38, 3.43it/s] 11%|█ | 41280/371472 [3:19:51<27:55:49, 3.28it/s] {'loss': 4.3309, 'learning_rate': 9.004180223663513e-07, 'epoch': 1.78} + 11%|█ | 41280/371472 [3:19:51<27:55:49, 3.28it/s] 11%|█ | 41281/371472 [3:19:51<28:01:12, 3.27it/s] 11%|█ | 41282/371472 [3:19:52<27:34:20, 3.33it/s] 11%|█ | 41283/371472 [3:19:52<26:46:05, 3.43it/s] 11%|█ | 41284/371472 [3:19:52<27:51:34, 3.29it/s] 11%|█ | 41285/371472 [3:19:52<28:14:07, 3.25it/s] 11%|█ | 41286/371472 [3:19:53<26:42:37, 3.43it/s] 11%|█ | 41287/371472 [3:19:53<25:49:39, 3.55it/s] 11%|█ | 41288/371472 [3:19:53<25:47:47, 3.56it/s] 11%|█ | 41289/371472 [3:19:54<27:19:55, 3.36it/s] 11%|█ | 41290/371472 [3:19:54<26:42:28, 3.43it/s] 11%|█ | 41291/371472 [3:19:54<28:51:45, 3.18it/s] 11%|█ | 41292/371472 [3:19:54<27:00:54, 3.39it/s] 11%|█ | 41293/371472 [3:19:55<26:17:55, 3.49it/s] 11%|█ | 41294/371472 [3:19:55<26:33:29, 3.45it/s] 11%|█ | 41295/371472 [3:19:55<29:37:36, 3.10it/s] 11%|█ | 41296/371472 [3:19:56<31:28:00, 2.91it/s] 11%|█ | 41297/371472 [3:19:56<29:29:30, 3.11it/s] 11%|█ | 41298/371472 [3:19:56<28:56:03, 3.17it/s] 11%|█ | 41299/371472 [3:19:57<29:11:03, 3.14it/s] 11%|█ | 41300/371472 [3:19:57<28:41:02, 3.20it/s] {'loss': 4.5112, 'learning_rate': 9.003695403908724e-07, 'epoch': 1.78} + 11%|█ | 41300/371472 [3:19:57<28:41:02, 3.20it/s] 11%|█ | 41301/371472 [3:19:57<27:28:00, 3.34it/s] 11%|█ | 41302/371472 [3:19:58<26:16:03, 3.49it/s] 11%|█ | 41303/371472 [3:19:58<26:32:25, 3.46it/s] 11%|█ | 41304/371472 [3:19:58<26:08:58, 3.51it/s] 11%|█ | 41305/371472 [3:19:58<27:50:17, 3.29it/s] 11%|█ | 41306/371472 [3:19:59<26:16:33, 3.49it/s] 11%|█ | 41307/371472 [3:19:59<26:09:01, 3.51it/s] 11%|█ | 41308/371472 [3:19:59<25:44:45, 3.56it/s] 11%|█ | 41309/371472 [3:20:00<25:26:07, 3.61it/s] 11%|█ | 41310/371472 [3:20:00<25:02:30, 3.66it/s] 11%|█ | 41311/371472 [3:20:00<26:35:23, 3.45it/s] 11%|█ | 41312/371472 [3:20:00<26:01:27, 3.52it/s] 11%|█ | 41313/371472 [3:20:01<25:47:33, 3.56it/s] 11%|█ | 41314/371472 [3:20:01<26:49:55, 3.42it/s] 11%|█ | 41315/371472 [3:20:01<28:20:35, 3.24it/s] 11%|█ | 41316/371472 [3:20:02<27:17:43, 3.36it/s] 11%|█ | 41317/371472 [3:20:02<26:49:55, 3.42it/s] 11%|█ | 41318/371472 [3:20:02<27:48:36, 3.30it/s] 11%|█ | 41319/371472 [3:20:02<26:29:43, 3.46it/s] 11%|█ | 41320/371472 [3:20:03<26:23:21, 3.48it/s] {'loss': 4.2248, 'learning_rate': 9.003210584153936e-07, 'epoch': 1.78} + 11%|█ | 41320/371472 [3:20:03<26:23:21, 3.48it/s] 11%|█ | 41321/371472 [3:20:03<25:45:33, 3.56it/s] 11%|█ | 41322/371472 [3:20:03<28:03:41, 3.27it/s] 11%|█ | 41323/371472 [3:20:04<27:17:23, 3.36it/s] 11%|█ | 41324/371472 [3:20:04<27:01:04, 3.39it/s] 11%|█ | 41325/371472 [3:20:04<26:00:10, 3.53it/s] 11%|█ | 41326/371472 [3:20:05<28:40:41, 3.20it/s] 11%|█ | 41327/371472 [3:20:05<27:55:26, 3.28it/s] 11%|█ | 41328/371472 [3:20:05<28:13:16, 3.25it/s] 11%|█ | 41329/371472 [3:20:05<27:54:33, 3.29it/s] 11%|█ | 41330/371472 [3:20:06<30:11:57, 3.04it/s] 11%|█ | 41331/371472 [3:20:06<29:07:16, 3.15it/s] 11%|█ | 41332/371472 [3:20:06<29:08:00, 3.15it/s] 11%|█ | 41333/371472 [3:20:07<30:55:43, 2.97it/s] 11%|█ | 41334/371472 [3:20:07<29:15:47, 3.13it/s] 11%|█ | 41335/371472 [3:20:07<27:53:45, 3.29it/s] 11%|█ | 41336/371472 [3:20:08<29:26:21, 3.12it/s] 11%|█ | 41337/371472 [3:20:08<27:41:38, 3.31it/s] 11%|█ | 41338/371472 [3:20:08<27:28:53, 3.34it/s] 11%|█ | 41339/371472 [3:20:09<29:08:22, 3.15it/s] 11%|█ | 41340/371472 [3:20:09<27:11:57, 3.37it/s] {'loss': 4.4054, 'learning_rate': 9.002725764399146e-07, 'epoch': 1.78} + 11%|█ | 41340/371472 [3:20:09<27:11:57, 3.37it/s] 11%|█ | 41341/371472 [3:20:09<27:42:09, 3.31it/s] 11%|█ | 41342/371472 [3:20:10<27:00:45, 3.39it/s] 11%|█ | 41343/371472 [3:20:10<27:13:23, 3.37it/s] 11%|█ | 41344/371472 [3:20:10<26:07:59, 3.51it/s] 11%|█ | 41345/371472 [3:20:10<26:59:43, 3.40it/s] 11%|█ | 41346/371472 [3:20:11<27:48:20, 3.30it/s] 11%|█ | 41347/371472 [3:20:11<26:28:07, 3.46it/s] 11%|█ | 41348/371472 [3:20:11<26:17:07, 3.49it/s] 11%|█ | 41349/371472 [3:20:12<26:23:50, 3.47it/s] 11%|█ | 41350/371472 [3:20:12<26:10:28, 3.50it/s] 11%|█ | 41351/371472 [3:20:12<27:18:36, 3.36it/s] 11%|█ | 41352/371472 [3:20:12<26:35:00, 3.45it/s] 11%|█ | 41353/371472 [3:20:13<27:03:59, 3.39it/s] 11%|█ | 41354/371472 [3:20:13<28:00:26, 3.27it/s] 11%|█ | 41355/371472 [3:20:13<27:22:10, 3.35it/s] 11%|█ | 41356/371472 [3:20:14<29:06:01, 3.15it/s] 11%|█ | 41357/371472 [3:20:14<28:01:37, 3.27it/s] 11%|█ | 41358/371472 [3:20:14<26:22:24, 3.48it/s] 11%|█ | 41359/371472 [3:20:15<26:26:35, 3.47it/s] 11%|█ | 41360/371472 [3:20:15<27:10:12, 3.37it/s] {'loss': 4.3054, 'learning_rate': 9.002240944644357e-07, 'epoch': 1.78} + 11%|█ | 41360/371472 [3:20:15<27:10:12, 3.37it/s] 11%|█ | 41361/371472 [3:20:15<26:10:41, 3.50it/s] 11%|█ | 41362/371472 [3:20:15<25:04:18, 3.66it/s] 11%|█ | 41363/371472 [3:20:16<25:03:18, 3.66it/s] 11%|█ | 41364/371472 [3:20:16<26:29:17, 3.46it/s] 11%|█ | 41365/371472 [3:20:16<25:59:21, 3.53it/s] 11%|█ | 41366/371472 [3:20:17<28:22:46, 3.23it/s] 11%|█ | 41367/371472 [3:20:17<28:29:48, 3.22it/s] 11%|█ | 41368/371472 [3:20:17<28:58:56, 3.16it/s] 11%|█ | 41369/371472 [3:20:18<28:02:11, 3.27it/s] 11%|█ | 41370/371472 [3:20:18<27:50:57, 3.29it/s] 11%|█ | 41371/371472 [3:20:18<27:00:46, 3.39it/s] 11%|█ | 41372/371472 [3:20:18<26:04:35, 3.52it/s] 11%|█ | 41373/371472 [3:20:19<27:28:28, 3.34it/s] 11%|█ | 41374/371472 [3:20:19<27:22:38, 3.35it/s] 11%|█ | 41375/371472 [3:20:19<26:27:25, 3.47it/s] 11%|█ | 41376/371472 [3:20:19<25:22:16, 3.61it/s] 11%|█ | 41377/371472 [3:20:20<25:32:03, 3.59it/s] 11%|█ | 41378/371472 [3:20:20<26:02:02, 3.52it/s] 11%|█ | 41379/371472 [3:20:20<26:13:47, 3.50it/s] 11%|█ | 41380/371472 [3:20:21<25:29:50, 3.60it/s] {'loss': 4.4377, 'learning_rate': 9.001756124889568e-07, 'epoch': 1.78} + 11%|█ | 41380/371472 [3:20:21<25:29:50, 3.60it/s] 11%|█ | 41381/371472 [3:20:21<25:27:58, 3.60it/s] 11%|█ | 41382/371472 [3:20:21<24:49:17, 3.69it/s] 11%|█ | 41383/371472 [3:20:21<24:57:10, 3.67it/s] 11%|█ | 41384/371472 [3:20:22<25:48:36, 3.55it/s] 11%|█ | 41385/371472 [3:20:22<25:18:41, 3.62it/s] 11%|█ | 41386/371472 [3:20:22<25:45:46, 3.56it/s] 11%|█ | 41387/371472 [3:20:23<26:10:37, 3.50it/s] 11%|█ | 41388/371472 [3:20:23<25:48:23, 3.55it/s] 11%|█ | 41389/371472 [3:20:23<25:17:17, 3.63it/s] 11%|█ | 41390/371472 [3:20:23<27:15:01, 3.36it/s] 11%|█ | 41391/371472 [3:20:24<26:40:03, 3.44it/s] 11%|█ | 41392/371472 [3:20:24<25:16:08, 3.63it/s] 11%|█ | 41393/371472 [3:20:24<24:41:53, 3.71it/s] 11%|█ | 41394/371472 [3:20:24<24:12:38, 3.79it/s] 11%|█ | 41395/371472 [3:20:25<25:19:02, 3.62it/s] 11%|█ | 41396/371472 [3:20:25<25:51:43, 3.55it/s] 11%|█ | 41397/371472 [3:20:25<25:31:11, 3.59it/s] 11%|█ | 41398/371472 [3:20:26<25:01:03, 3.66it/s] 11%|█ | 41399/371472 [3:20:26<24:57:12, 3.67it/s] 11%|█ | 41400/371472 [3:20:26<26:08:49, 3.51it/s] {'loss': 4.6227, 'learning_rate': 9.00127130513478e-07, 'epoch': 1.78} + 11%|█ | 41400/371472 [3:20:26<26:08:49, 3.51it/s] 11%|█ | 41401/371472 [3:20:26<26:05:38, 3.51it/s] 11%|█ | 41402/371472 [3:20:27<25:07:01, 3.65it/s] 11%|█ | 41403/371472 [3:20:27<24:44:46, 3.71it/s] 11%|█ | 41404/371472 [3:20:27<25:09:50, 3.64it/s] 11%|█ | 41405/371472 [3:20:28<26:46:21, 3.42it/s] 11%|█ | 41406/371472 [3:20:28<26:22:26, 3.48it/s] 11%|█ | 41407/371472 [3:20:28<28:56:21, 3.17it/s] 11%|█ | 41408/371472 [3:20:29<28:31:30, 3.21it/s] 11%|█ | 41409/371472 [3:20:29<29:02:57, 3.16it/s] 11%|█ | 41410/371472 [3:20:29<28:32:22, 3.21it/s] 11%|█ | 41411/371472 [3:20:30<28:13:55, 3.25it/s] 11%|█ | 41412/371472 [3:20:30<27:10:38, 3.37it/s] 11%|█ | 41413/371472 [3:20:30<26:11:13, 3.50it/s] 11%|█ | 41414/371472 [3:20:30<25:31:42, 3.59it/s] 11%|█ | 41415/371472 [3:20:31<25:24:48, 3.61it/s] 11%|█ | 41416/371472 [3:20:31<27:22:35, 3.35it/s] 11%|█ | 41417/371472 [3:20:31<26:55:39, 3.40it/s] 11%|█ | 41418/371472 [3:20:32<27:45:18, 3.30it/s] 11%|█ | 41419/371472 [3:20:32<26:49:38, 3.42it/s] 11%|█ | 41420/371472 [3:20:32<26:39:38, 3.44it/s] {'loss': 4.4275, 'learning_rate': 9.000786485379989e-07, 'epoch': 1.78} + 11%|█ | 41420/371472 [3:20:32<26:39:38, 3.44it/s] 11%|█ | 41421/371472 [3:20:32<26:37:59, 3.44it/s] 11%|█ | 41422/371472 [3:20:33<28:40:09, 3.20it/s] 11%|█ | 41423/371472 [3:20:33<29:06:19, 3.15it/s] 11%|█ | 41424/371472 [3:20:33<31:06:59, 2.95it/s] 11%|█ | 41425/371472 [3:20:34<28:48:38, 3.18it/s] 11%|█ | 41426/371472 [3:20:34<27:29:38, 3.33it/s] 11%|█ | 41427/371472 [3:20:34<27:41:59, 3.31it/s] 11%|█ | 41428/371472 [3:20:35<27:12:47, 3.37it/s] 11%|█ | 41429/371472 [3:20:35<25:50:19, 3.55it/s] 11%|█ | 41430/371472 [3:20:35<25:44:21, 3.56it/s] 11%|█ | 41431/371472 [3:20:35<25:42:05, 3.57it/s] 11%|█ | 41432/371472 [3:20:36<24:57:33, 3.67it/s] 11%|█ | 41433/371472 [3:20:36<25:08:55, 3.65it/s] 11%|█ | 41434/371472 [3:20:36<25:24:43, 3.61it/s] 11%|█ | 41435/371472 [3:20:36<25:56:07, 3.53it/s] 11%|█ | 41436/371472 [3:20:37<25:32:17, 3.59it/s] 11%|█ | 41437/371472 [3:20:37<24:55:46, 3.68it/s] 11%|█ | 41438/371472 [3:20:37<24:30:41, 3.74it/s] 11%|█ | 41439/371472 [3:20:38<27:30:14, 3.33it/s] 11%|█ | 41440/371472 [3:20:38<27:31:35, 3.33it/s] {'loss': 4.2365, 'learning_rate': 9.000301665625202e-07, 'epoch': 1.78} + 11%|█ | 41440/371472 [3:20:38<27:31:35, 3.33it/s] 11%|█ | 41441/371472 [3:20:38<30:17:23, 3.03it/s] 11%|█ | 41442/371472 [3:20:39<30:06:05, 3.05it/s] 11%|█ | 41443/371472 [3:20:39<28:17:16, 3.24it/s] 11%|█ | 41444/371472 [3:20:39<27:59:31, 3.28it/s] 11%|█ | 41445/371472 [3:20:40<27:17:59, 3.36it/s] 11%|█ | 41446/371472 [3:20:40<26:10:58, 3.50it/s] 11%|█ | 41447/371472 [3:20:40<25:35:20, 3.58it/s] 11%|█ | 41448/371472 [3:20:40<25:12:40, 3.64it/s] 11%|█ | 41449/371472 [3:20:41<25:23:06, 3.61it/s] 11%|█ | 41450/371472 [3:20:41<25:35:28, 3.58it/s] 11%|█ | 41451/371472 [3:20:41<25:05:20, 3.65it/s] 11%|█ | 41452/371472 [3:20:41<24:47:47, 3.70it/s] 11%|█ | 41453/371472 [3:20:42<24:43:56, 3.71it/s] 11%|█ | 41454/371472 [3:20:42<27:21:28, 3.35it/s] 11%|█ | 41455/371472 [3:20:42<26:21:33, 3.48it/s] 11%|█ | 41456/371472 [3:20:43<25:57:02, 3.53it/s] 11%|█ | 41457/371472 [3:20:43<25:10:45, 3.64it/s] 11%|█ | 41458/371472 [3:20:43<24:35:13, 3.73it/s] 11%|█ | 41459/371472 [3:20:43<26:54:03, 3.41it/s] 11%|█ | 41460/371472 [3:20:44<26:42:29, 3.43it/s] {'loss': 4.2151, 'learning_rate': 8.999816845870413e-07, 'epoch': 1.79} + 11%|█ | 41460/371472 [3:20:44<26:42:29, 3.43it/s] 11%|█ | 41461/371472 [3:20:44<25:46:10, 3.56it/s] 11%|█ | 41462/371472 [3:20:44<25:47:57, 3.55it/s] 11%|█ | 41463/371472 [3:20:45<27:00:19, 3.39it/s] 11%|█ | 41464/371472 [3:20:45<27:39:42, 3.31it/s] 11%|█ | 41465/371472 [3:20:45<26:26:37, 3.47it/s] 11%|█ | 41466/371472 [3:20:45<25:50:13, 3.55it/s] 11%|█ | 41467/371472 [3:20:46<25:47:56, 3.55it/s] 11%|█ | 41468/371472 [3:20:46<26:55:42, 3.40it/s] 11%|█ | 41469/371472 [3:20:46<25:54:02, 3.54it/s] 11%|█ | 41470/371472 [3:20:47<25:24:43, 3.61it/s] 11%|█ | 41471/371472 [3:20:47<28:16:59, 3.24it/s] 11%|█ | 41472/371472 [3:20:47<26:45:52, 3.42it/s] 11%|█ | 41473/371472 [3:20:47<27:33:15, 3.33it/s] 11%|█ | 41474/371472 [3:20:48<27:45:10, 3.30it/s] 11%|█ | 41475/371472 [3:20:48<28:21:09, 3.23it/s] 11%|█ | 41476/371472 [3:20:48<27:04:14, 3.39it/s] 11%|█ | 41477/371472 [3:20:49<26:11:59, 3.50it/s] 11%|█ | 41478/371472 [3:20:49<25:45:02, 3.56it/s] 11%|█ | 41479/371472 [3:20:49<25:43:25, 3.56it/s] 11%|█ | 41480/371472 [3:20:49<26:13:22, 3.50it/s] {'loss': 4.4628, 'learning_rate': 8.999332026115625e-07, 'epoch': 1.79} + 11%|█ | 41480/371472 [3:20:49<26:13:22, 3.50it/s] 11%|█ | 41481/371472 [3:20:50<26:16:10, 3.49it/s] 11%|█ | 41482/371472 [3:20:50<26:07:07, 3.51it/s] 11%|█ | 41483/371472 [3:20:50<25:39:08, 3.57it/s] 11%|█ | 41484/371472 [3:20:51<25:30:35, 3.59it/s] 11%|█ | 41485/371472 [3:20:51<26:52:48, 3.41it/s] 11%|█ | 41486/371472 [3:20:51<26:15:43, 3.49it/s] 11%|█ | 41487/371472 [3:20:52<26:31:53, 3.45it/s] 11%|█ | 41488/371472 [3:20:52<27:47:36, 3.30it/s] 11%|█ | 41489/371472 [3:20:52<26:41:16, 3.43it/s] 11%|█ | 41490/371472 [3:20:52<26:30:32, 3.46it/s] 11%|█ | 41491/371472 [3:20:53<27:32:56, 3.33it/s] 11%|█ | 41492/371472 [3:20:53<26:44:12, 3.43it/s] 11%|█ | 41493/371472 [3:20:53<26:58:05, 3.40it/s] 11%|█ | 41494/371472 [3:20:54<26:22:58, 3.47it/s] 11%|█ | 41495/371472 [3:20:54<29:34:03, 3.10it/s] 11%|█ | 41496/371472 [3:20:54<27:58:55, 3.28it/s] 11%|█ | 41497/371472 [3:20:55<28:10:54, 3.25it/s] 11%|█ | 41498/371472 [3:20:55<28:17:56, 3.24it/s] 11%|█ | 41499/371472 [3:20:55<27:35:26, 3.32it/s] 11%|█ | 41500/371472 [3:20:55<26:32:43, 3.45it/s] {'loss': 4.1836, 'learning_rate': 8.998847206360834e-07, 'epoch': 1.79} + 11%|█ | 41500/371472 [3:20:55<26:32:43, 3.45it/s] 11%|█ | 41501/371472 [3:20:56<26:39:58, 3.44it/s] 11%|█ | 41502/371472 [3:20:56<25:23:54, 3.61it/s] 11%|█ | 41503/371472 [3:20:56<26:18:04, 3.48it/s] 11%|█ | 41504/371472 [3:20:57<25:41:36, 3.57it/s] 11%|█ | 41505/371472 [3:20:57<25:39:01, 3.57it/s] 11%|█ | 41506/371472 [3:20:57<27:28:35, 3.34it/s] 11%|█ | 41507/371472 [3:20:57<26:43:30, 3.43it/s] 11%|█ | 41508/371472 [3:20:58<27:19:05, 3.36it/s] 11%|█ | 41509/371472 [3:20:58<26:16:23, 3.49it/s] 11%|█ | 41510/371472 [3:20:58<29:15:33, 3.13it/s] 11%|█ | 41511/371472 [3:20:59<28:51:41, 3.18it/s] 11%|█ | 41512/371472 [3:20:59<29:44:57, 3.08it/s] 11%|█ | 41513/371472 [3:20:59<28:39:10, 3.20it/s] 11%|█ | 41514/371472 [3:21:00<29:33:20, 3.10it/s] 11%|█ | 41515/371472 [3:21:00<30:03:22, 3.05it/s] 11%|█ | 41516/371472 [3:21:00<33:01:28, 2.78it/s] 11%|█ | 41517/371472 [3:21:01<30:43:16, 2.98it/s] 11%|█ | 41518/371472 [3:21:01<30:09:03, 3.04it/s] 11%|█ | 41519/371472 [3:21:01<28:35:32, 3.21it/s] 11%|█ | 41520/371472 [3:21:02<27:28:34, 3.34it/s] {'loss': 4.2967, 'learning_rate': 8.998362386606047e-07, 'epoch': 1.79} + 11%|█ | 41520/371472 [3:21:02<27:28:34, 3.34it/s] 11%|█ | 41521/371472 [3:21:02<26:21:40, 3.48it/s] 11%|█ | 41522/371472 [3:21:02<26:18:36, 3.48it/s] 11%|█ | 41523/371472 [3:21:02<26:53:32, 3.41it/s] 11%|█ | 41524/371472 [3:21:03<25:57:58, 3.53it/s] 11%|█ | 41525/371472 [3:21:03<25:48:29, 3.55it/s] 11%|█ | 41526/371472 [3:21:03<26:06:53, 3.51it/s] 11%|█ | 41527/371472 [3:21:04<25:27:56, 3.60it/s] 11%|█ | 41528/371472 [3:21:04<24:38:38, 3.72it/s] 11%|█ | 41529/371472 [3:21:04<24:35:45, 3.73it/s] 11%|█ | 41530/371472 [3:21:04<25:06:21, 3.65it/s] 11%|█ | 41531/371472 [3:21:05<24:53:37, 3.68it/s] 11%|█ | 41532/371472 [3:21:05<26:43:27, 3.43it/s] 11%|█ | 41533/371472 [3:21:05<26:04:32, 3.51it/s] 11%|█ | 41534/371472 [3:21:05<25:44:21, 3.56it/s] 11%|█ | 41535/371472 [3:21:06<24:50:33, 3.69it/s] 11%|█ | 41536/371472 [3:21:06<26:17:14, 3.49it/s] 11%|█ | 41537/371472 [3:21:06<26:39:30, 3.44it/s] 11%|█ | 41538/371472 [3:21:07<27:11:26, 3.37it/s] 11%|█ | 41539/371472 [3:21:07<26:16:36, 3.49it/s] 11%|█ | 41540/371472 [3:21:07<25:41:37, 3.57it/s] {'loss': 4.4711, 'learning_rate': 8.997877566851257e-07, 'epoch': 1.79} + 11%|█ | 41540/371472 [3:21:07<25:41:37, 3.57it/s] 11%|█ | 41541/371472 [3:21:07<25:07:19, 3.65it/s] 11%|█ | 41542/371472 [3:21:08<25:06:16, 3.65it/s] 11%|█ | 41543/371472 [3:21:08<24:20:56, 3.76it/s] 11%|█ | 41544/371472 [3:21:08<26:22:28, 3.47it/s] 11%|█ | 41545/371472 [3:21:09<25:38:25, 3.57it/s] 11%|█ | 41546/371472 [3:21:09<25:40:38, 3.57it/s] 11%|█ | 41547/371472 [3:21:09<24:49:31, 3.69it/s] 11%|█ | 41548/371472 [3:21:09<24:19:49, 3.77it/s] 11%|█ | 41549/371472 [3:21:10<25:29:05, 3.60it/s] 11%|█ | 41550/371472 [3:21:10<26:33:56, 3.45it/s] 11%|█ | 41551/371472 [3:21:10<26:52:37, 3.41it/s] 11%|█ | 41552/371472 [3:21:11<26:55:09, 3.40it/s] 11%|█ | 41553/371472 [3:21:11<25:58:47, 3.53it/s] 11%|█ | 41554/371472 [3:21:11<25:26:59, 3.60it/s] 11%|█ | 41555/371472 [3:21:11<26:14:41, 3.49it/s] 11%|█ | 41556/371472 [3:21:12<26:23:17, 3.47it/s] 11%|█ | 41557/371472 [3:21:12<25:10:32, 3.64it/s] 11%|█ | 41558/371472 [3:21:12<26:04:39, 3.51it/s] 11%|█ | 41559/371472 [3:21:13<26:17:07, 3.49it/s] 11%|█ | 41560/371472 [3:21:13<25:19:17, 3.62it/s] {'loss': 4.3517, 'learning_rate': 8.997392747096468e-07, 'epoch': 1.79} + 11%|█ | 41560/371472 [3:21:13<25:19:17, 3.62it/s] 11%|█ | 41561/371472 [3:21:13<25:40:13, 3.57it/s] 11%|█ | 41562/371472 [3:21:13<25:47:42, 3.55it/s] 11%|█ | 41563/371472 [3:21:14<25:11:20, 3.64it/s] 11%|█ | 41564/371472 [3:21:14<26:20:38, 3.48it/s] 11%|█ | 41565/371472 [3:21:14<25:55:34, 3.53it/s] 11%|█ | 41566/371472 [3:21:14<26:05:16, 3.51it/s] 11%|█ | 41567/371472 [3:21:15<26:22:03, 3.48it/s] 11%|█ | 41568/371472 [3:21:15<25:56:48, 3.53it/s] 11%|█ | 41569/371472 [3:21:15<27:19:18, 3.35it/s] 11%|█ | 41570/371472 [3:21:16<26:39:27, 3.44it/s] 11%|█ | 41571/371472 [3:21:16<26:47:17, 3.42it/s] 11%|█ | 41572/371472 [3:21:16<25:45:46, 3.56it/s] 11%|█ | 41573/371472 [3:21:16<25:34:04, 3.58it/s] 11%|█ | 41574/371472 [3:21:17<26:35:11, 3.45it/s] 11%|█ | 41575/371472 [3:21:17<25:27:06, 3.60it/s] 11%|█ | 41576/371472 [3:21:17<25:09:41, 3.64it/s] 11%|█ | 41577/371472 [3:21:18<26:45:44, 3.42it/s] 11%|█ | 41578/371472 [3:21:18<27:00:14, 3.39it/s] 11%|█ | 41579/371472 [3:21:18<26:19:47, 3.48it/s] 11%|█ | 41580/371472 [3:21:18<25:27:26, 3.60it/s] {'loss': 4.4242, 'learning_rate': 8.996907927341679e-07, 'epoch': 1.79} + 11%|█ | 41580/371472 [3:21:18<25:27:26, 3.60it/s] 11%|█ | 41581/371472 [3:21:19<24:59:12, 3.67it/s] 11%|█ | 41582/371472 [3:21:19<24:55:38, 3.68it/s] 11%|█ | 41583/371472 [3:21:19<25:08:59, 3.64it/s] 11%|█ | 41584/371472 [3:21:20<24:12:14, 3.79it/s] 11%|█ | 41585/371472 [3:21:20<26:35:13, 3.45it/s] 11%|█ | 41586/371472 [3:21:20<26:17:22, 3.49it/s] 11%|█ | 41587/371472 [3:21:20<27:13:31, 3.37it/s] 11%|█ | 41588/371472 [3:21:21<27:07:54, 3.38it/s] 11%|█ | 41589/371472 [3:21:21<26:54:54, 3.40it/s] 11%|█ | 41590/371472 [3:21:21<26:56:35, 3.40it/s] 11%|█ | 41591/371472 [3:21:22<25:55:49, 3.53it/s] 11%|█ | 41592/371472 [3:21:22<28:54:04, 3.17it/s] 11%|█ | 41593/371472 [3:21:22<29:53:43, 3.07it/s] 11%|█ | 41594/371472 [3:21:23<28:22:30, 3.23it/s] 11%|█ | 41595/371472 [3:21:23<27:01:18, 3.39it/s] 11%|█ | 41596/371472 [3:21:23<26:48:03, 3.42it/s] 11%|█ | 41597/371472 [3:21:23<26:05:30, 3.51it/s] 11%|█ | 41598/371472 [3:21:24<25:29:31, 3.59it/s] 11%|█ | 41599/371472 [3:21:24<25:52:00, 3.54it/s] 11%|█ | 41600/371472 [3:21:24<25:29:41, 3.59it/s] {'loss': 4.2808, 'learning_rate': 8.996423107586891e-07, 'epoch': 1.79} + 11%|█ | 41600/371472 [3:21:24<25:29:41, 3.59it/s] 11%|█ | 41601/371472 [3:21:25<25:35:00, 3.58it/s] 11%|█ | 41602/371472 [3:21:25<26:19:22, 3.48it/s] 11%|█ | 41603/371472 [3:21:25<26:00:07, 3.52it/s] 11%|█ | 41604/371472 [3:21:25<25:58:04, 3.53it/s] 11%|█ | 41605/371472 [3:21:26<25:32:18, 3.59it/s] 11%|█ | 41606/371472 [3:21:26<25:06:30, 3.65it/s] 11%|█ | 41607/371472 [3:21:26<24:55:16, 3.68it/s] 11%|█ | 41608/371472 [3:21:26<24:04:35, 3.81it/s] 11%|█ | 41609/371472 [3:21:27<24:28:05, 3.74it/s] 11%|█ | 41610/371472 [3:21:27<24:53:07, 3.68it/s] 11%|█ | 41611/371472 [3:21:27<28:47:04, 3.18it/s] 11%|█ | 41612/371472 [3:21:28<28:02:31, 3.27it/s] 11%|█ | 41613/371472 [3:21:28<26:08:49, 3.50it/s] 11%|█ | 41614/371472 [3:21:28<27:06:07, 3.38it/s] 11%|█ | 41615/371472 [3:21:29<26:58:18, 3.40it/s] 11%|█ | 41616/371472 [3:21:29<26:12:27, 3.50it/s] 11%|█ | 41617/371472 [3:21:29<26:14:51, 3.49it/s] 11%|█ | 41618/371472 [3:21:29<25:50:33, 3.55it/s] 11%|█ | 41619/371472 [3:21:30<26:22:05, 3.47it/s] 11%|█ | 41620/371472 [3:21:30<27:46:25, 3.30it/s] {'loss': 4.2009, 'learning_rate': 8.995938287832101e-07, 'epoch': 1.79} + 11%|█ | 41620/371472 [3:21:30<27:46:25, 3.30it/s] 11%|█ | 41621/371472 [3:21:30<29:02:58, 3.15it/s] 11%|█ | 41622/371472 [3:21:31<27:40:21, 3.31it/s] 11%|█ | 41623/371472 [3:21:31<29:33:44, 3.10it/s] 11%|█ | 41624/371472 [3:21:31<28:43:57, 3.19it/s] 11%|█ | 41625/371472 [3:21:32<28:49:47, 3.18it/s] 11%|█ | 41626/371472 [3:21:32<27:59:50, 3.27it/s] 11%|█ | 41627/371472 [3:21:32<30:01:24, 3.05it/s] 11%|█ | 41628/371472 [3:21:33<29:45:24, 3.08it/s] 11%|█ | 41629/371472 [3:21:33<28:03:51, 3.26it/s] 11%|█ | 41630/371472 [3:21:33<27:08:17, 3.38it/s] 11%|█ | 41631/371472 [3:21:33<26:59:35, 3.39it/s] 11%|█ | 41632/371472 [3:21:34<26:26:23, 3.47it/s] 11%|█ | 41633/371472 [3:21:34<25:46:51, 3.55it/s] 11%|█ | 41634/371472 [3:21:34<26:07:37, 3.51it/s] 11%|█ | 41635/371472 [3:21:35<25:39:42, 3.57it/s] 11%|█ | 41636/371472 [3:21:35<26:05:15, 3.51it/s] 11%|█ | 41637/371472 [3:21:35<25:31:05, 3.59it/s] 11%|█ | 41638/371472 [3:21:35<26:32:45, 3.45it/s] 11%|█ | 41639/371472 [3:21:36<25:42:18, 3.56it/s] 11%|█ | 41640/371472 [3:21:36<25:17:55, 3.62it/s] {'loss': 4.3003, 'learning_rate': 8.995453468077312e-07, 'epoch': 1.79} + 11%|█ | 41640/371472 [3:21:36<25:17:55, 3.62it/s] 11%|█ | 41641/371472 [3:21:36<25:25:56, 3.60it/s] 11%|█ | 41642/371472 [3:21:36<24:58:20, 3.67it/s] 11%|█ | 41643/371472 [3:21:37<25:07:41, 3.65it/s] 11%|█ | 41644/371472 [3:21:37<25:00:27, 3.66it/s] 11%|█ | 41645/371472 [3:21:37<25:28:53, 3.60it/s] 11%|█ | 41646/371472 [3:21:38<24:46:35, 3.70it/s] 11%|█ | 41647/371472 [3:21:38<25:26:50, 3.60it/s] 11%|█ | 41648/371472 [3:21:38<24:49:04, 3.69it/s] 11%|█ | 41649/371472 [3:21:38<25:02:07, 3.66it/s] 11%|█ | 41650/371472 [3:21:39<24:38:46, 3.72it/s] 11%|█ | 41651/371472 [3:21:39<25:14:42, 3.63it/s] 11%|█ | 41652/371472 [3:21:39<26:01:35, 3.52it/s] 11%|█ | 41653/371472 [3:21:39<25:14:13, 3.63it/s] 11%|█ | 41654/371472 [3:21:40<25:06:34, 3.65it/s] 11%|█ | 41655/371472 [3:21:40<24:45:22, 3.70it/s] 11%|█ | 41656/371472 [3:21:40<25:09:20, 3.64it/s] 11%|█ | 41657/371472 [3:21:41<24:34:56, 3.73it/s] 11%|█ | 41658/371472 [3:21:41<24:21:55, 3.76it/s] 11%|█ | 41659/371472 [3:21:41<24:27:40, 3.75it/s] 11%|█ | 41660/371472 [3:21:41<24:27:38, 3.75it/s] {'loss': 4.3452, 'learning_rate': 8.994968648322523e-07, 'epoch': 1.79} + 11%|█ | 41660/371472 [3:21:41<24:27:38, 3.75it/s] 11%|█ | 41661/371472 [3:21:42<25:21:31, 3.61it/s] 11%|█ | 41662/371472 [3:21:42<27:35:50, 3.32it/s] 11%|█ | 41663/371472 [3:21:42<26:45:12, 3.42it/s] 11%|█ | 41664/371472 [3:21:43<26:12:10, 3.50it/s] 11%|█ | 41665/371472 [3:21:43<26:10:56, 3.50it/s] 11%|█ | 41666/371472 [3:21:43<26:17:53, 3.48it/s] 11%|█ | 41667/371472 [3:21:43<26:22:47, 3.47it/s] 11%|█ | 41668/371472 [3:21:44<26:15:48, 3.49it/s] 11%|█ | 41669/371472 [3:21:44<25:50:55, 3.54it/s] 11%|█ | 41670/371472 [3:21:44<26:23:05, 3.47it/s] 11%|█ | 41671/371472 [3:21:45<26:07:13, 3.51it/s] 11%|█ | 41672/371472 [3:21:45<26:09:43, 3.50it/s] 11%|█ | 41673/371472 [3:21:45<26:01:26, 3.52it/s] 11%|█ | 41674/371472 [3:21:45<25:03:31, 3.66it/s] 11%|█ | 41675/371472 [3:21:46<24:58:59, 3.67it/s] 11%|█ | 41676/371472 [3:21:46<25:13:07, 3.63it/s] 11%|█ | 41677/371472 [3:21:46<26:14:06, 3.49it/s] 11%|█ | 41678/371472 [3:21:47<26:48:16, 3.42it/s] 11%|█ | 41679/371472 [3:21:47<25:34:53, 3.58it/s] 11%|█ | 41680/371472 [3:21:47<25:16:14, 3.63it/s] {'loss': 4.2786, 'learning_rate': 8.994483828567734e-07, 'epoch': 1.8} + 11%|█ | 41680/371472 [3:21:47<25:16:14, 3.63it/s] 11%|█ | 41681/371472 [3:21:47<25:38:51, 3.57it/s] 11%|█ | 41682/371472 [3:21:48<24:41:41, 3.71it/s] 11%|█ | 41683/371472 [3:21:48<25:29:48, 3.59it/s] 11%|█ | 41684/371472 [3:21:48<27:34:33, 3.32it/s] 11%|█ | 41685/371472 [3:21:49<26:10:13, 3.50it/s] 11%|█ | 41686/371472 [3:21:49<25:42:08, 3.56it/s] 11%|█ | 41687/371472 [3:21:49<26:05:13, 3.51it/s] 11%|█ | 41688/371472 [3:21:49<26:29:09, 3.46it/s] 11%|█ | 41689/371472 [3:21:50<26:11:52, 3.50it/s] 11%|█ | 41690/371472 [3:21:50<26:06:39, 3.51it/s] 11%|█ | 41691/371472 [3:21:50<26:06:19, 3.51it/s] 11%|█ | 41692/371472 [3:21:50<25:11:28, 3.64it/s] 11%|█ | 41693/371472 [3:21:51<25:06:17, 3.65it/s] 11%|█ | 41694/371472 [3:21:51<24:52:04, 3.68it/s] 11%|█ | 41695/371472 [3:21:51<24:31:23, 3.74it/s] 11%|█ | 41696/371472 [3:21:52<26:07:45, 3.51it/s] 11%|█ | 41697/371472 [3:21:52<26:24:10, 3.47it/s] 11%|█ | 41698/371472 [3:21:52<26:58:58, 3.39it/s] 11%|█ | 41699/371472 [3:21:53<28:16:36, 3.24it/s] 11%|█ | 41700/371472 [3:21:53<27:40:19, 3.31it/s] {'loss': 4.2059, 'learning_rate': 8.993999008812946e-07, 'epoch': 1.8} + 11%|█ | 41700/371472 [3:21:53<27:40:19, 3.31it/s] 11%|█ | 41701/371472 [3:21:53<26:51:26, 3.41it/s] 11%|█ | 41702/371472 [3:21:54<30:24:36, 3.01it/s] 11%|█ | 41703/371472 [3:21:54<32:14:15, 2.84it/s] 11%|█ | 41704/371472 [3:21:54<30:13:14, 3.03it/s] 11%|█ | 41705/371472 [3:21:54<28:04:09, 3.26it/s] 11%|█ | 41706/371472 [3:21:55<27:26:31, 3.34it/s] 11%|█ | 41707/371472 [3:21:55<26:24:13, 3.47it/s] 11%|█ | 41708/371472 [3:21:55<25:20:26, 3.61it/s] 11%|█ | 41709/371472 [3:21:56<25:07:23, 3.65it/s] 11%|█ | 41710/371472 [3:21:56<25:11:24, 3.64it/s] 11%|█ | 41711/371472 [3:21:56<25:00:20, 3.66it/s] 11%|█ | 41712/371472 [3:21:56<25:49:33, 3.55it/s] 11%|█ | 41713/371472 [3:21:57<25:34:34, 3.58it/s] 11%|█ | 41714/371472 [3:21:57<27:04:44, 3.38it/s] 11%|█ | 41715/371472 [3:21:57<26:47:28, 3.42it/s] 11%|█ | 41716/371472 [3:21:58<26:04:56, 3.51it/s] 11%|█ | 41717/371472 [3:21:58<25:49:12, 3.55it/s] 11%|█ | 41718/371472 [3:21:58<25:30:36, 3.59it/s] 11%|█ | 41719/371472 [3:21:58<26:19:21, 3.48it/s] 11%|█ | 41720/371472 [3:21:59<26:14:42, 3.49it/s] {'loss': 4.2892, 'learning_rate': 8.993514189058157e-07, 'epoch': 1.8} + 11%|█ | 41720/371472 [3:21:59<26:14:42, 3.49it/s] 11%|█ | 41721/371472 [3:21:59<25:34:24, 3.58it/s] 11%|█ | 41722/371472 [3:21:59<25:20:32, 3.61it/s] 11%|█ | 41723/371472 [3:21:59<25:50:57, 3.54it/s] 11%|█ | 41724/371472 [3:22:00<26:05:11, 3.51it/s] 11%|█ | 41725/371472 [3:22:00<26:07:57, 3.51it/s] 11%|█ | 41726/371472 [3:22:00<27:52:14, 3.29it/s] 11%|█ | 41727/371472 [3:22:01<27:33:31, 3.32it/s] 11%|█ | 41728/371472 [3:22:01<26:23:23, 3.47it/s] 11%|█ | 41729/371472 [3:22:01<25:38:41, 3.57it/s] 11%|█ | 41730/371472 [3:22:02<25:38:39, 3.57it/s] 11%|█ | 41731/371472 [3:22:02<27:19:07, 3.35it/s] 11%|█ | 41732/371472 [3:22:02<27:15:48, 3.36it/s] 11%|█ | 41733/371472 [3:22:02<26:11:43, 3.50it/s] 11%|█ | 41734/371472 [3:22:03<28:07:30, 3.26it/s] 11%|█ | 41735/371472 [3:22:03<27:13:01, 3.37it/s] 11%|█ | 41736/371472 [3:22:03<27:35:52, 3.32it/s] 11%|█ | 41737/371472 [3:22:04<27:19:25, 3.35it/s] 11%|█ | 41738/371472 [3:22:04<28:00:49, 3.27it/s] 11%|█ | 41739/371472 [3:22:04<27:06:59, 3.38it/s] 11%|█ | 41740/371472 [3:22:05<26:47:24, 3.42it/s] {'loss': 4.4366, 'learning_rate': 8.993029369303367e-07, 'epoch': 1.8} + 11%|█ | 41740/371472 [3:22:05<26:47:24, 3.42it/s] 11%|█ | 41741/371472 [3:22:05<26:27:37, 3.46it/s] 11%|█ | 41742/371472 [3:22:05<26:27:17, 3.46it/s] 11%|█ | 41743/371472 [3:22:05<26:29:48, 3.46it/s] 11%|█ | 41744/371472 [3:22:06<26:08:52, 3.50it/s] 11%|█ | 41745/371472 [3:22:06<26:00:52, 3.52it/s] 11%|█ | 41746/371472 [3:22:06<28:39:38, 3.20it/s] 11%|█ | 41747/371472 [3:22:07<27:55:16, 3.28it/s] 11%|█ | 41748/371472 [3:22:07<27:08:22, 3.37it/s] 11%|█ | 41749/371472 [3:22:07<27:14:52, 3.36it/s] 11%|█ | 41750/371472 [3:22:07<27:07:26, 3.38it/s] 11%|█ | 41751/371472 [3:22:08<26:47:59, 3.42it/s] 11%|█ | 41752/371472 [3:22:08<26:56:30, 3.40it/s] 11%|█ | 41753/371472 [3:22:08<26:34:21, 3.45it/s] 11%|█ | 41754/371472 [3:22:09<26:50:51, 3.41it/s] 11%|█ | 41755/371472 [3:22:09<25:37:37, 3.57it/s] 11%|█ | 41756/371472 [3:22:09<25:45:12, 3.56it/s] 11%|█ | 41757/371472 [3:22:09<25:09:41, 3.64it/s] 11%|█ | 41758/371472 [3:22:10<24:44:30, 3.70it/s] 11%|█ | 41759/371472 [3:22:10<24:11:36, 3.79it/s] 11%|█ | 41760/371472 [3:22:10<24:58:08, 3.67it/s] {'loss': 4.2924, 'learning_rate': 8.992544549548578e-07, 'epoch': 1.8} + 11%|█ | 41760/371472 [3:22:10<24:58:08, 3.67it/s] 11%|█ | 41761/371472 [3:22:11<27:00:35, 3.39it/s] 11%|█ | 41762/371472 [3:22:11<26:43:33, 3.43it/s] 11%|█ | 41763/371472 [3:22:11<26:19:03, 3.48it/s] 11%|█ | 41764/371472 [3:22:11<26:47:36, 3.42it/s] 11%|█ | 41765/371472 [3:22:12<26:23:46, 3.47it/s] 11%|█ | 41766/371472 [3:22:12<26:32:33, 3.45it/s] 11%|█ | 41767/371472 [3:22:12<29:00:12, 3.16it/s] 11%|█ | 41768/371472 [3:22:13<27:35:45, 3.32it/s] 11%|█ | 41769/371472 [3:22:13<26:32:14, 3.45it/s] 11%|█ | 41770/371472 [3:22:13<25:21:48, 3.61it/s] 11%|█ | 41771/371472 [3:22:13<26:41:52, 3.43it/s] 11%|█ | 41772/371472 [3:22:14<27:39:09, 3.31it/s] 11%|█ | 41773/371472 [3:22:14<26:34:12, 3.45it/s] 11%|█ | 41774/371472 [3:22:14<26:50:40, 3.41it/s] 11%|█ | 41775/371472 [3:22:15<26:15:56, 3.49it/s] 11%|█ | 41776/371472 [3:22:15<26:05:09, 3.51it/s] 11%|█ | 41777/371472 [3:22:15<25:27:05, 3.60it/s] 11%|█ | 41778/371472 [3:22:15<26:10:53, 3.50it/s] 11%|█ | 41779/371472 [3:22:16<25:42:39, 3.56it/s] 11%|█ | 41780/371472 [3:22:16<25:27:09, 3.60it/s] {'loss': 4.4312, 'learning_rate': 8.99205972979379e-07, 'epoch': 1.8} + 11%|█ | 41780/371472 [3:22:16<25:27:09, 3.60it/s] 11%|█ | 41781/371472 [3:22:16<25:13:07, 3.63it/s] 11%|█ | 41782/371472 [3:22:17<25:11:22, 3.64it/s] 11%|█ | 41783/371472 [3:22:17<27:32:57, 3.32it/s] 11%|█ | 41784/371472 [3:22:17<26:34:37, 3.45it/s] 11%|█ | 41785/371472 [3:22:17<25:48:55, 3.55it/s] 11%|█ | 41786/371472 [3:22:18<26:27:29, 3.46it/s] 11%|█ | 41787/371472 [3:22:18<27:39:19, 3.31it/s] 11%|█ | 41788/371472 [3:22:18<26:19:36, 3.48it/s] 11%|█ | 41789/371472 [3:22:19<26:42:51, 3.43it/s] 11%|█ | 41790/371472 [3:22:19<27:03:10, 3.39it/s] 11%|█▏ | 41791/371472 [3:22:19<26:14:15, 3.49it/s] 11%|█▏ | 41792/371472 [3:22:20<25:55:36, 3.53it/s] 11%|█▏ | 41793/371472 [3:22:20<26:01:51, 3.52it/s] 11%|█▏ | 41794/371472 [3:22:20<27:12:41, 3.37it/s] 11%|█▏ | 41795/371472 [3:22:21<30:12:11, 3.03it/s] 11%|█▏ | 41796/371472 [3:22:21<29:07:26, 3.14it/s] 11%|█▏ | 41797/371472 [3:22:21<27:50:52, 3.29it/s] 11%|█▏ | 41798/371472 [3:22:21<27:09:30, 3.37it/s] 11%|█▏ | 41799/371472 [3:22:22<26:23:59, 3.47it/s] 11%|█▏ | 41800/371472 [3:22:22<25:47:57, 3.55it/s] {'loss': 4.2705, 'learning_rate': 8.991574910039e-07, 'epoch': 1.8} + 11%|█▏ | 41800/371472 [3:22:22<25:47:57, 3.55it/s] 11%|█▏ | 41801/371472 [3:22:22<25:21:38, 3.61it/s] 11%|█▏ | 41802/371472 [3:22:22<26:17:32, 3.48it/s] 11%|█▏ | 41803/371472 [3:22:23<25:51:02, 3.54it/s] 11%|█▏ | 41804/371472 [3:22:23<25:14:02, 3.63it/s] 11%|█▏ | 41805/371472 [3:22:23<25:47:02, 3.55it/s] 11%|█▏ | 41806/371472 [3:22:24<25:27:02, 3.60it/s] 11%|█▏ | 41807/371472 [3:22:24<25:24:40, 3.60it/s] 11%|█▏ | 41808/371472 [3:22:24<24:20:46, 3.76it/s] 11%|█▏ | 41809/371472 [3:22:24<25:42:11, 3.56it/s] 11%|█▏ | 41810/371472 [3:22:25<26:53:33, 3.41it/s] 11%|█▏ | 41811/371472 [3:22:25<26:03:47, 3.51it/s] 11%|█▏ | 41812/371472 [3:22:25<26:12:23, 3.49it/s] 11%|█▏ | 41813/371472 [3:22:26<25:21:12, 3.61it/s] 11%|█▏ | 41814/371472 [3:22:26<25:04:48, 3.65it/s] 11%|█▏ | 41815/371472 [3:22:26<25:23:08, 3.61it/s] 11%|█▏ | 41816/371472 [3:22:26<27:48:07, 3.29it/s] 11%|█▏ | 41817/371472 [3:22:27<26:52:13, 3.41it/s] 11%|█▏ | 41818/371472 [3:22:27<31:04:20, 2.95it/s] 11%|█▏ | 41819/371472 [3:22:27<28:50:07, 3.18it/s] 11%|█▏ | 41820/371472 [3:22:28<27:13:24, 3.36it/s] {'loss': 4.5428, 'learning_rate': 8.991090090284212e-07, 'epoch': 1.8} + 11%|█▏ | 41820/371472 [3:22:28<27:13:24, 3.36it/s] 11%|█▏ | 41821/371472 [3:22:28<26:53:03, 3.41it/s] 11%|█▏ | 41822/371472 [3:22:28<27:22:19, 3.35it/s] 11%|█▏ | 41823/371472 [3:22:29<26:22:31, 3.47it/s] 11%|█▏ | 41824/371472 [3:22:29<26:27:46, 3.46it/s] 11%|█▏ | 41825/371472 [3:22:29<25:59:56, 3.52it/s] 11%|█▏ | 41826/371472 [3:22:29<25:36:30, 3.58it/s] 11%|█▏ | 41827/371472 [3:22:30<25:22:03, 3.61it/s] 11%|█▏ | 41828/371472 [3:22:30<25:42:11, 3.56it/s] 11%|█▏ | 41829/371472 [3:22:30<25:37:27, 3.57it/s] 11%|█▏ | 41830/371472 [3:22:30<24:45:38, 3.70it/s] 11%|█▏ | 41831/371472 [3:22:31<25:02:41, 3.66it/s] 11%|█▏ | 41832/371472 [3:22:31<25:26:57, 3.60it/s] 11%|█▏ | 41833/371472 [3:22:31<25:49:10, 3.55it/s] 11%|█▏ | 41834/371472 [3:22:32<25:08:04, 3.64it/s] 11%|█▏ | 41835/371472 [3:22:32<25:15:13, 3.63it/s] 11%|█▏ | 41836/371472 [3:22:32<26:12:00, 3.49it/s] 11%|█▏ | 41837/371472 [3:22:32<26:12:24, 3.49it/s] 11%|█▏ | 41838/371472 [3:22:33<25:29:16, 3.59it/s] 11%|█▏ | 41839/371472 [3:22:33<26:36:56, 3.44it/s] 11%|█▏ | 41840/371472 [3:22:33<26:25:18, 3.47it/s] {'loss': 4.2956, 'learning_rate': 8.990605270529423e-07, 'epoch': 1.8} + 11%|█▏ | 41840/371472 [3:22:33<26:25:18, 3.47it/s] 11%|█▏ | 41841/371472 [3:22:34<26:33:17, 3.45it/s] 11%|█▏ | 41842/371472 [3:22:34<26:14:10, 3.49it/s] 11%|█▏ | 41843/371472 [3:22:34<26:06:25, 3.51it/s] 11%|█▏ | 41844/371472 [3:22:35<27:19:40, 3.35it/s] 11%|█▏ | 41845/371472 [3:22:35<26:44:11, 3.42it/s] 11%|█▏ | 41846/371472 [3:22:35<26:29:29, 3.46it/s] 11%|█▏ | 41847/371472 [3:22:35<25:53:32, 3.54it/s] 11%|█▏ | 41848/371472 [3:22:36<25:35:12, 3.58it/s] 11%|█▏ | 41849/371472 [3:22:36<25:42:21, 3.56it/s] 11%|█▏ | 41850/371472 [3:22:36<25:01:02, 3.66it/s] 11%|█▏ | 41851/371472 [3:22:36<24:24:59, 3.75it/s] 11%|█▏ | 41852/371472 [3:22:37<26:34:31, 3.45it/s] 11%|█▏ | 41853/371472 [3:22:37<26:29:52, 3.46it/s] 11%|█▏ | 41854/371472 [3:22:37<26:59:56, 3.39it/s] 11%|█▏ | 41855/371472 [3:22:38<27:13:54, 3.36it/s] 11%|█▏ | 41856/371472 [3:22:38<25:59:11, 3.52it/s] 11%|█▏ | 41857/371472 [3:22:38<27:09:30, 3.37it/s] 11%|█▏ | 41858/371472 [3:22:39<28:52:15, 3.17it/s] 11%|█▏ | 41859/371472 [3:22:39<27:40:13, 3.31it/s] 11%|█▏ | 41860/371472 [3:22:39<28:31:06, 3.21it/s] {'loss': 4.3577, 'learning_rate': 8.990120450774634e-07, 'epoch': 1.8} + 11%|█▏ | 41860/371472 [3:22:39<28:31:06, 3.21it/s] 11%|█▏ | 41861/371472 [3:22:39<28:06:28, 3.26it/s] 11%|█▏ | 41862/371472 [3:22:40<28:12:12, 3.25it/s] 11%|█▏ | 41863/371472 [3:22:40<28:06:49, 3.26it/s] 11%|█▏ | 41864/371472 [3:22:40<27:17:41, 3.35it/s] 11%|█▏ | 41865/371472 [3:22:41<26:28:06, 3.46it/s] 11%|█▏ | 41866/371472 [3:22:41<26:08:08, 3.50it/s] 11%|█▏ | 41867/371472 [3:22:41<26:00:38, 3.52it/s] 11%|█▏ | 41868/371472 [3:22:41<25:41:20, 3.56it/s] 11%|█▏ | 41869/371472 [3:22:42<25:56:27, 3.53it/s] 11%|█▏ | 41870/371472 [3:22:42<25:59:01, 3.52it/s] 11%|█▏ | 41871/371472 [3:22:42<25:41:21, 3.56it/s] 11%|█▏ | 41872/371472 [3:22:43<25:58:50, 3.52it/s] 11%|█▏ | 41873/371472 [3:22:43<26:05:10, 3.51it/s] 11%|█▏ | 41874/371472 [3:22:43<26:38:42, 3.44it/s] 11%|█▏ | 41875/371472 [3:22:43<25:46:46, 3.55it/s] 11%|█▏ | 41876/371472 [3:22:44<25:26:04, 3.60it/s] 11%|█▏ | 41877/371472 [3:22:44<24:32:17, 3.73it/s] 11%|█▏ | 41878/371472 [3:22:44<25:46:09, 3.55it/s] 11%|█▏ | 41879/371472 [3:22:45<26:32:30, 3.45it/s] 11%|█▏ | 41880/371472 [3:22:45<26:08:15, 3.50it/s] {'loss': 4.543, 'learning_rate': 8.989635631019844e-07, 'epoch': 1.8} + 11%|█▏ | 41880/371472 [3:22:45<26:08:15, 3.50it/s] 11%|█▏ | 41881/371472 [3:22:45<25:37:28, 3.57it/s] 11%|█▏ | 41882/371472 [3:22:45<26:52:18, 3.41it/s] 11%|█▏ | 41883/371472 [3:22:46<26:56:37, 3.40it/s] 11%|█▏ | 41884/371472 [3:22:46<25:58:36, 3.52it/s] 11%|█▏ | 41885/371472 [3:22:46<25:34:00, 3.58it/s] 11%|█▏ | 41886/371472 [3:22:47<24:43:46, 3.70it/s] 11%|█▏ | 41887/371472 [3:22:47<24:23:45, 3.75it/s] 11%|█▏ | 41888/371472 [3:22:47<24:54:10, 3.68it/s] 11%|█▏ | 41889/371472 [3:22:47<24:48:16, 3.69it/s] 11%|█▏ | 41890/371472 [3:22:48<26:36:50, 3.44it/s] 11%|█▏ | 41891/371472 [3:22:48<26:22:48, 3.47it/s] 11%|█▏ | 41892/371472 [3:22:48<26:49:33, 3.41it/s] 11%|█▏ | 41893/371472 [3:22:49<26:35:24, 3.44it/s] 11%|█▏ | 41894/371472 [3:22:49<25:44:34, 3.56it/s] 11%|█▏ | 41895/371472 [3:22:49<25:17:48, 3.62it/s] 11%|█▏ | 41896/371472 [3:22:49<26:29:39, 3.46it/s] 11%|█▏ | 41897/371472 [3:22:50<26:07:33, 3.50it/s] 11%|█▏ | 41898/371472 [3:22:50<26:24:26, 3.47it/s] 11%|█▏ | 41899/371472 [3:22:50<25:36:43, 3.57it/s] 11%|█▏ | 41900/371472 [3:22:50<24:32:22, 3.73it/s] {'loss': 4.7283, 'learning_rate': 8.989150811265056e-07, 'epoch': 1.8} + 11%|█▏ | 41900/371472 [3:22:50<24:32:22, 3.73it/s] 11%|█▏ | 41901/371472 [3:22:51<24:31:24, 3.73it/s] 11%|█▏ | 41902/371472 [3:22:51<24:25:56, 3.75it/s] 11%|█▏ | 41903/371472 [3:22:51<25:50:14, 3.54it/s] 11%|█▏ | 41904/371472 [3:22:52<25:44:25, 3.56it/s] 11%|█▏ | 41905/371472 [3:22:52<25:39:10, 3.57it/s] 11%|█▏ | 41906/371472 [3:22:52<25:11:27, 3.63it/s] 11%|█▏ | 41907/371472 [3:22:52<24:46:16, 3.70it/s] 11%|█▏ | 41908/371472 [3:22:53<26:33:51, 3.45it/s] 11%|█▏ | 41909/371472 [3:22:53<25:37:21, 3.57it/s] 11%|█▏ | 41910/371472 [3:22:53<27:28:30, 3.33it/s] 11%|█▏ | 41911/371472 [3:22:54<27:13:43, 3.36it/s] 11%|█▏ | 41912/371472 [3:22:54<26:38:52, 3.44it/s] 11%|█▏ | 41913/371472 [3:22:54<25:50:33, 3.54it/s] 11%|█▏ | 41914/371472 [3:22:54<25:27:33, 3.60it/s] 11%|█▏ | 41915/371472 [3:22:55<25:51:47, 3.54it/s] 11%|█▏ | 41916/371472 [3:22:55<26:59:23, 3.39it/s] 11%|█▏ | 41917/371472 [3:22:55<25:59:22, 3.52it/s] 11%|█▏ | 41918/371472 [3:22:56<26:08:49, 3.50it/s] 11%|█▏ | 41919/371472 [3:22:56<27:29:25, 3.33it/s] 11%|█▏ | 41920/371472 [3:22:56<27:44:25, 3.30it/s] {'loss': 4.2655, 'learning_rate': 8.988665991510267e-07, 'epoch': 1.81} + 11%|█▏ | 41920/371472 [3:22:56<27:44:25, 3.30it/s] 11%|█▏ | 41921/371472 [3:22:56<26:07:14, 3.50it/s] 11%|█▏ | 41922/371472 [3:22:57<25:31:13, 3.59it/s] 11%|█▏ | 41923/371472 [3:22:57<24:59:35, 3.66it/s] 11%|█▏ | 41924/371472 [3:22:57<24:34:20, 3.73it/s] 11%|█▏ | 41925/371472 [3:22:58<24:32:10, 3.73it/s] 11%|█▏ | 41926/371472 [3:22:58<24:48:46, 3.69it/s] 11%|█▏ | 41927/371472 [3:22:58<24:48:42, 3.69it/s] 11%|█▏ | 41928/371472 [3:22:58<26:30:43, 3.45it/s] 11%|█▏ | 41929/371472 [3:22:59<27:14:53, 3.36it/s] 11%|█▏ | 41930/371472 [3:22:59<27:27:40, 3.33it/s] 11%|█▏ | 41931/371472 [3:22:59<26:08:57, 3.50it/s] 11%|█▏ | 41932/371472 [3:23:00<26:39:49, 3.43it/s] 11%|█▏ | 41933/371472 [3:23:00<26:13:41, 3.49it/s] 11%|█▏ | 41934/371472 [3:23:00<26:57:36, 3.40it/s] 11%|█▏ | 41935/371472 [3:23:01<28:05:03, 3.26it/s] 11%|█▏ | 41936/371472 [3:23:01<26:51:14, 3.41it/s] 11%|█▏ | 41937/371472 [3:23:01<26:13:40, 3.49it/s] 11%|█▏ | 41938/371472 [3:23:01<26:37:00, 3.44it/s] 11%|█▏ | 41939/371472 [3:23:02<26:23:30, 3.47it/s] 11%|█▏ | 41940/371472 [3:23:02<26:30:55, 3.45it/s] {'loss': 4.3562, 'learning_rate': 8.988181171755477e-07, 'epoch': 1.81} + 11%|█▏ | 41940/371472 [3:23:02<26:30:55, 3.45it/s] 11%|█▏ | 41941/371472 [3:23:02<29:09:23, 3.14it/s] 11%|█▏ | 41942/371472 [3:23:03<27:44:46, 3.30it/s] 11%|█▏ | 41943/371472 [3:23:03<26:41:46, 3.43it/s] 11%|█▏ | 41944/371472 [3:23:03<27:40:24, 3.31it/s] 11%|█▏ | 41945/371472 [3:23:03<26:48:01, 3.42it/s] 11%|█▏ | 41946/371472 [3:23:04<25:27:30, 3.60it/s] 11%|█▏ | 41947/371472 [3:23:04<25:46:40, 3.55it/s] 11%|█▏ | 41948/371472 [3:23:04<26:20:14, 3.48it/s] 11%|█▏ | 41949/371472 [3:23:05<28:16:11, 3.24it/s] 11%|█▏ | 41950/371472 [3:23:05<27:49:05, 3.29it/s] 11%|█▏ | 41951/371472 [3:23:05<27:09:51, 3.37it/s] 11%|█▏ | 41952/371472 [3:23:06<27:37:45, 3.31it/s] 11%|█▏ | 41953/371472 [3:23:06<26:06:11, 3.51it/s] 11%|█▏ | 41954/371472 [3:23:06<27:20:08, 3.35it/s] 11%|█▏ | 41955/371472 [3:23:06<27:24:16, 3.34it/s] 11%|█▏ | 41956/371472 [3:23:07<26:42:33, 3.43it/s] 11%|█▏ | 41957/371472 [3:23:07<25:48:41, 3.55it/s] 11%|█▏ | 41958/371472 [3:23:07<25:32:52, 3.58it/s] 11%|█▏ | 41959/371472 [3:23:07<25:11:47, 3.63it/s] 11%|█▏ | 41960/371472 [3:23:08<28:07:07, 3.26it/s] {'loss': 4.3716, 'learning_rate': 8.987696352000689e-07, 'epoch': 1.81} + 11%|█▏ | 41960/371472 [3:23:08<28:07:07, 3.26it/s] 11%|█▏ | 41961/371472 [3:23:08<27:03:47, 3.38it/s] 11%|█▏ | 41962/371472 [3:23:08<26:17:45, 3.48it/s] 11%|█▏ | 41963/371472 [3:23:09<26:25:01, 3.46it/s] 11%|█▏ | 41964/371472 [3:23:09<25:45:00, 3.55it/s] 11%|█▏ | 41965/371472 [3:23:09<24:56:18, 3.67it/s] 11%|█▏ | 41966/371472 [3:23:09<24:42:58, 3.70it/s] 11%|█▏ | 41967/371472 [3:23:10<24:32:27, 3.73it/s] 11%|█▏ | 41968/371472 [3:23:10<25:35:57, 3.58it/s] 11%|█▏ | 41969/371472 [3:23:10<25:29:11, 3.59it/s] 11%|█▏ | 41970/371472 [3:23:11<24:47:58, 3.69it/s] 11%|█▏ | 41971/371472 [3:23:11<26:29:59, 3.45it/s] 11%|█▏ | 41972/371472 [3:23:11<26:32:36, 3.45it/s] 11%|█▏ | 41973/371472 [3:23:11<25:54:20, 3.53it/s] 11%|█▏ | 41974/371472 [3:23:12<26:14:32, 3.49it/s] 11%|█▏ | 41975/371472 [3:23:12<25:21:12, 3.61it/s] 11%|█▏ | 41976/371472 [3:23:12<25:00:03, 3.66it/s] 11%|█▏ | 41977/371472 [3:23:13<25:48:52, 3.55it/s] 11%|█▏ | 41978/371472 [3:23:13<25:41:38, 3.56it/s] 11%|█▏ | 41979/371472 [3:23:13<25:38:40, 3.57it/s] 11%|█▏ | 41980/371472 [3:23:13<26:07:51, 3.50it/s] {'loss': 4.4538, 'learning_rate': 8.987211532245901e-07, 'epoch': 1.81} + 11%|█▏ | 41980/371472 [3:23:13<26:07:51, 3.50it/s] 11%|█▏ | 41981/371472 [3:23:14<25:10:31, 3.64it/s] 11%|█▏ | 41982/371472 [3:23:14<25:11:06, 3.63it/s] 11%|█▏ | 41983/371472 [3:23:14<24:49:10, 3.69it/s] 11%|█▏ | 41984/371472 [3:23:14<24:32:54, 3.73it/s] 11%|█▏ | 41985/371472 [3:23:15<26:14:45, 3.49it/s] 11%|█▏ | 41986/371472 [3:23:15<26:06:46, 3.50it/s] 11%|█▏ | 41987/371472 [3:23:15<27:31:38, 3.32it/s] 11%|█▏ | 41988/371472 [3:23:16<26:09:49, 3.50it/s] 11%|█▏ | 41989/371472 [3:23:16<27:45:21, 3.30it/s] 11%|█▏ | 41990/371472 [3:23:16<27:08:59, 3.37it/s] 11%|█▏ | 41991/371472 [3:23:17<27:00:00, 3.39it/s] 11%|█▏ | 41992/371472 [3:23:17<29:37:44, 3.09it/s] 11%|█▏ | 41993/371472 [3:23:17<27:50:19, 3.29it/s] 11%|█▏ | 41994/371472 [3:23:18<26:59:31, 3.39it/s] 11%|█▏ | 41995/371472 [3:23:18<25:22:34, 3.61it/s] 11%|█▏ | 41996/371472 [3:23:18<27:12:43, 3.36it/s] 11%|█▏ | 41997/371472 [3:23:18<26:22:51, 3.47it/s] 11%|█▏ | 41998/371472 [3:23:19<26:13:58, 3.49it/s] 11%|█▏ | 41999/371472 [3:23:19<25:49:37, 3.54it/s] 11%|█▏ | 42000/371472 [3:23:19<27:25:15, 3.34it/s] {'loss': 4.5135, 'learning_rate': 8.986726712491111e-07, 'epoch': 1.81} + 11%|█▏ | 42000/371472 [3:23:19<27:25:15, 3.34it/s] 11%|█▏ | 42001/371472 [3:23:20<26:19:23, 3.48it/s] 11%|█▏ | 42002/371472 [3:23:20<25:55:32, 3.53it/s] 11%|█▏ | 42003/371472 [3:23:20<25:32:50, 3.58it/s] 11%|█▏ | 42004/371472 [3:23:20<25:57:22, 3.53it/s] 11%|█▏ | 42005/371472 [3:23:21<26:39:51, 3.43it/s] 11%|█▏ | 42006/371472 [3:23:21<26:51:19, 3.41it/s] 11%|█▏ | 42007/371472 [3:23:21<25:55:33, 3.53it/s] 11%|█▏ | 42008/371472 [3:23:21<25:31:50, 3.58it/s] 11%|█▏ | 42009/371472 [3:23:22<25:20:47, 3.61it/s] 11%|█▏ | 42010/371472 [3:23:22<25:27:14, 3.60it/s] 11%|█▏ | 42011/371472 [3:23:22<26:05:30, 3.51it/s] 11%|█▏ | 42012/371472 [3:23:23<25:57:11, 3.53it/s] 11%|█▏ | 42013/371472 [3:23:23<25:43:59, 3.56it/s] 11%|█▏ | 42014/371472 [3:23:23<25:29:03, 3.59it/s] 11%|█▏ | 42015/371472 [3:23:23<25:18:38, 3.62it/s] 11%|█▏ | 42016/371472 [3:23:24<25:24:56, 3.60it/s] 11%|█▏ | 42017/371472 [3:23:24<25:13:51, 3.63it/s] 11%|█▏ | 42018/371472 [3:23:24<24:28:47, 3.74it/s] 11%|█▏ | 42019/371472 [3:23:25<24:40:01, 3.71it/s] 11%|█▏ | 42020/371472 [3:23:25<26:41:41, 3.43it/s] {'loss': 4.2465, 'learning_rate': 8.986241892736322e-07, 'epoch': 1.81} + 11%|█▏ | 42020/371472 [3:23:25<26:41:41, 3.43it/s] 11%|█▏ | 42021/371472 [3:23:25<27:21:22, 3.35it/s] 11%|█▏ | 42022/371472 [3:23:26<29:19:44, 3.12it/s] 11%|█▏ | 42023/371472 [3:23:26<28:17:26, 3.23it/s] 11%|█▏ | 42024/371472 [3:23:26<26:46:37, 3.42it/s] 11%|█▏ | 42025/371472 [3:23:26<27:00:07, 3.39it/s] 11%|█▏ | 42026/371472 [3:23:27<26:19:26, 3.48it/s] 11%|█▏ | 42027/371472 [3:23:27<27:22:40, 3.34it/s] 11%|█▏ | 42028/371472 [3:23:27<26:05:38, 3.51it/s] 11%|█▏ | 42029/371472 [3:23:27<24:58:59, 3.66it/s] 11%|█▏ | 42030/371472 [3:23:28<24:18:05, 3.77it/s] 11%|█▏ | 42031/371472 [3:23:28<23:48:54, 3.84it/s] 11%|█▏ | 42032/371472 [3:23:28<23:52:57, 3.83it/s] 11%|█▏ | 42033/371472 [3:23:28<23:39:53, 3.87it/s] 11%|█▏ | 42034/371472 [3:23:29<24:51:31, 3.68it/s] 11%|█▏ | 42035/371472 [3:23:29<28:51:40, 3.17it/s] 11%|█▏ | 42036/371472 [3:23:30<28:13:55, 3.24it/s] 11%|█▏ | 42037/371472 [3:23:30<27:41:26, 3.30it/s] 11%|█▏ | 42038/371472 [3:23:30<28:47:37, 3.18it/s] 11%|█▏ | 42039/371472 [3:23:30<29:23:19, 3.11it/s] 11%|█▏ | 42040/371472 [3:23:31<28:45:27, 3.18it/s] {'loss': 4.6041, 'learning_rate': 8.985757072981533e-07, 'epoch': 1.81} + 11%|█▏ | 42040/371472 [3:23:31<28:45:27, 3.18it/s] 11%|█▏ | 42041/371472 [3:23:31<27:54:54, 3.28it/s] 11%|█▏ | 42042/371472 [3:23:31<27:18:17, 3.35it/s] 11%|█▏ | 42043/371472 [3:23:32<26:34:00, 3.44it/s] 11%|█▏ | 42044/371472 [3:23:32<25:25:43, 3.60it/s] 11%|█▏ | 42045/371472 [3:23:32<24:46:59, 3.69it/s] 11%|█▏ | 42046/371472 [3:23:32<24:34:26, 3.72it/s] 11%|█▏ | 42047/371472 [3:23:33<24:25:23, 3.75it/s] 11%|█▏ | 42048/371472 [3:23:33<24:13:50, 3.78it/s] 11%|█▏ | 42049/371472 [3:23:33<24:24:15, 3.75it/s] 11%|█▏ | 42050/371472 [3:23:33<25:23:08, 3.60it/s] 11%|█▏ | 42051/371472 [3:23:34<25:45:32, 3.55it/s] 11%|█▏ | 42052/371472 [3:23:34<26:14:18, 3.49it/s] 11%|█▏ | 42053/371472 [3:23:34<25:59:17, 3.52it/s] 11%|█▏ | 42054/371472 [3:23:35<25:06:19, 3.64it/s] 11%|█▏ | 42055/371472 [3:23:35<25:25:18, 3.60it/s] 11%|█▏ | 42056/371472 [3:23:35<24:30:23, 3.73it/s] 11%|█▏ | 42057/371472 [3:23:35<25:19:48, 3.61it/s] 11%|█▏ | 42058/371472 [3:23:36<25:32:58, 3.58it/s] 11%|█▏ | 42059/371472 [3:23:36<25:54:55, 3.53it/s] 11%|█▏ | 42060/371472 [3:23:36<25:44:30, 3.55it/s] {'loss': 4.7304, 'learning_rate': 8.985272253226745e-07, 'epoch': 1.81} + 11%|█▏ | 42060/371472 [3:23:36<25:44:30, 3.55it/s] 11%|█▏ | 42061/371472 [3:23:37<25:35:37, 3.58it/s] 11%|█▏ | 42062/371472 [3:23:37<25:32:37, 3.58it/s] 11%|█▏ | 42063/371472 [3:23:37<26:28:12, 3.46it/s] 11%|█▏ | 42064/371472 [3:23:37<26:09:17, 3.50it/s] 11%|█▏ | 42065/371472 [3:23:38<26:40:11, 3.43it/s] 11%|█▏ | 42066/371472 [3:23:38<27:35:39, 3.32it/s] 11%|█▏ | 42067/371472 [3:23:38<26:40:16, 3.43it/s] 11%|█▏ | 42068/371472 [3:23:39<26:18:41, 3.48it/s] 11%|█▏ | 42069/371472 [3:23:39<26:19:21, 3.48it/s] 11%|█▏ | 42070/371472 [3:23:39<25:26:28, 3.60it/s] 11%|█▏ | 42071/371472 [3:23:39<25:33:53, 3.58it/s] 11%|█▏ | 42072/371472 [3:23:40<25:47:30, 3.55it/s] 11%|█▏ | 42073/371472 [3:23:40<24:58:01, 3.66it/s] 11%|█▏ | 42074/371472 [3:23:40<24:53:56, 3.67it/s] 11%|█▏ | 42075/371472 [3:23:41<25:54:09, 3.53it/s] 11%|█▏ | 42076/371472 [3:23:41<25:19:20, 3.61it/s] 11%|█▏ | 42077/371472 [3:23:41<24:28:55, 3.74it/s] 11%|█▏ | 42078/371472 [3:23:41<26:18:18, 3.48it/s] 11%|█▏ | 42079/371472 [3:23:42<25:53:51, 3.53it/s] 11%|█▏ | 42080/371472 [3:23:42<25:16:16, 3.62it/s] {'loss': 4.4824, 'learning_rate': 8.984787433471956e-07, 'epoch': 1.81} + 11%|█▏ | 42080/371472 [3:23:42<25:16:16, 3.62it/s] 11%|█▏ | 42081/371472 [3:23:42<25:30:51, 3.59it/s] 11%|█▏ | 42082/371472 [3:23:42<25:43:02, 3.56it/s] 11%|█▏ | 42083/371472 [3:23:43<24:48:13, 3.69it/s] 11%|█▏ | 42084/371472 [3:23:43<25:38:31, 3.57it/s] 11%|█▏ | 42085/371472 [3:23:43<26:51:09, 3.41it/s] 11%|█▏ | 42086/371472 [3:23:44<26:18:55, 3.48it/s] 11%|█▏ | 42087/371472 [3:23:44<25:49:17, 3.54it/s] 11%|█▏ | 42088/371472 [3:23:44<25:05:25, 3.65it/s] 11%|█▏ | 42089/371472 [3:23:44<25:32:49, 3.58it/s] 11%|█▏ | 42090/371472 [3:23:45<24:40:06, 3.71it/s] 11%|█▏ | 42091/371472 [3:23:45<24:48:46, 3.69it/s] 11%|█▏ | 42092/371472 [3:23:45<25:38:27, 3.57it/s] 11%|█▏ | 42093/371472 [3:23:46<25:57:59, 3.52it/s] 11%|█▏ | 42094/371472 [3:23:46<26:13:31, 3.49it/s] 11%|█▏ | 42095/371472 [3:23:46<25:46:08, 3.55it/s] 11%|█▏ | 42096/371472 [3:23:46<26:01:20, 3.52it/s] 11%|█▏ | 42097/371472 [3:23:47<25:26:26, 3.60it/s] 11%|█▏ | 42098/371472 [3:23:47<24:34:34, 3.72it/s] 11%|█▏ | 42099/371472 [3:23:47<25:07:07, 3.64it/s] 11%|█▏ | 42100/371472 [3:23:47<24:36:42, 3.72it/s] {'loss': 4.4615, 'learning_rate': 8.984302613717167e-07, 'epoch': 1.81} + 11%|█▏ | 42100/371472 [3:23:47<24:36:42, 3.72it/s] 11%|█▏ | 42101/371472 [3:23:48<27:51:39, 3.28it/s] 11%|█▏ | 42102/371472 [3:23:48<27:11:26, 3.36it/s] 11%|█▏ | 42103/371472 [3:23:48<26:32:55, 3.45it/s] 11%|█▏ | 42104/371472 [3:23:49<26:17:13, 3.48it/s] 11%|█▏ | 42105/371472 [3:23:49<25:00:14, 3.66it/s] 11%|█▏ | 42106/371472 [3:23:49<27:33:54, 3.32it/s] 11%|█▏ | 42107/371472 [3:23:50<26:28:54, 3.45it/s] 11%|█▏ | 42108/371472 [3:23:50<26:51:21, 3.41it/s] 11%|█▏ | 42109/371472 [3:23:50<26:36:37, 3.44it/s] 11%|█▏ | 42110/371472 [3:23:50<25:34:39, 3.58it/s] 11%|█▏ | 42111/371472 [3:23:51<25:05:14, 3.65it/s] 11%|█▏ | 42112/371472 [3:23:51<24:48:03, 3.69it/s] 11%|█▏ | 42113/371472 [3:23:51<25:50:49, 3.54it/s] 11%|█▏ | 42114/371472 [3:23:52<26:02:24, 3.51it/s] 11%|█▏ | 42115/371472 [3:23:52<26:16:09, 3.48it/s] 11%|█▏ | 42116/371472 [3:23:52<25:35:51, 3.57it/s] 11%|█▏ | 42117/371472 [3:23:52<25:32:42, 3.58it/s] 11%|█▏ | 42118/371472 [3:23:53<24:55:32, 3.67it/s] 11%|█▏ | 42119/371472 [3:23:53<25:32:28, 3.58it/s] 11%|█▏ | 42120/371472 [3:23:53<26:24:50, 3.46it/s] {'loss': 4.3076, 'learning_rate': 8.983817793962378e-07, 'epoch': 1.81} + 11%|█▏ | 42120/371472 [3:23:53<26:24:50, 3.46it/s] 11%|█▏ | 42121/371472 [3:23:54<27:55:11, 3.28it/s] 11%|█▏ | 42122/371472 [3:23:54<26:51:08, 3.41it/s] 11%|█▏ | 42123/371472 [3:23:54<26:37:05, 3.44it/s] 11%|█▏ | 42124/371472 [3:23:54<26:13:23, 3.49it/s] 11%|█▏ | 42125/371472 [3:23:55<26:26:01, 3.46it/s] 11%|█▏ | 42126/371472 [3:23:55<25:48:16, 3.55it/s] 11%|█▏ | 42127/371472 [3:23:55<26:33:39, 3.44it/s] 11%|█▏ | 42128/371472 [3:23:56<27:54:58, 3.28it/s] 11%|█▏ | 42129/371472 [3:23:56<26:32:48, 3.45it/s] 11%|█▏ | 42130/371472 [3:23:56<26:22:21, 3.47it/s] 11%|█▏ | 42131/371472 [3:23:56<26:47:37, 3.41it/s] 11%|█▏ | 42132/371472 [3:23:57<26:10:01, 3.50it/s] 11%|█▏ | 42133/371472 [3:23:57<25:49:59, 3.54it/s] 11%|█▏ | 42134/371472 [3:23:57<27:03:42, 3.38it/s] 11%|█▏ | 42135/371472 [3:23:58<26:55:22, 3.40it/s] 11%|█▏ | 42136/371472 [3:23:58<25:45:43, 3.55it/s] 11%|█▏ | 42137/371472 [3:23:58<27:01:42, 3.38it/s] 11%|█▏ | 42138/371472 [3:23:58<25:44:02, 3.55it/s] 11%|█▏ | 42139/371472 [3:23:59<25:24:40, 3.60it/s] 11%|█▏ | 42140/371472 [3:23:59<24:43:13, 3.70it/s] {'loss': 4.4122, 'learning_rate': 8.983332974207589e-07, 'epoch': 1.82} + 11%|█▏ | 42140/371472 [3:23:59<24:43:13, 3.70it/s] 11%|█▏ | 42141/371472 [3:23:59<26:19:30, 3.48it/s] 11%|█▏ | 42142/371472 [3:24:00<25:48:48, 3.54it/s] 11%|█▏ | 42143/371472 [3:24:00<25:29:23, 3.59it/s] 11%|█▏ | 42144/371472 [3:24:00<29:46:38, 3.07it/s] 11%|█▏ | 42145/371472 [3:24:01<29:24:13, 3.11it/s] 11%|█▏ | 42146/371472 [3:24:01<29:15:25, 3.13it/s] 11%|█▏ | 42147/371472 [3:24:01<27:54:44, 3.28it/s] 11%|█▏ | 42148/371472 [3:24:01<27:04:33, 3.38it/s] 11%|█▏ | 42149/371472 [3:24:02<25:40:00, 3.56it/s] 11%|█▏ | 42150/371472 [3:24:02<25:26:26, 3.60it/s] 11%|█▏ | 42151/371472 [3:24:02<25:02:09, 3.65it/s] 11%|█▏ | 42152/371472 [3:24:03<25:55:04, 3.53it/s] 11%|█▏ | 42153/371472 [3:24:03<25:22:41, 3.60it/s] 11%|█▏ | 42154/371472 [3:24:03<26:08:57, 3.50it/s] 11%|█▏ | 42155/371472 [3:24:03<26:35:38, 3.44it/s] 11%|█▏ | 42156/371472 [3:24:04<26:55:08, 3.40it/s] 11%|█▏ | 42157/371472 [3:24:04<25:54:29, 3.53it/s] 11%|█▏ | 42158/371472 [3:24:04<25:22:28, 3.61it/s] 11%|█▏ | 42159/371472 [3:24:04<25:11:13, 3.63it/s] 11%|█▏ | 42160/371472 [3:24:05<25:20:31, 3.61it/s] {'loss': 4.2969, 'learning_rate': 8.982848154452799e-07, 'epoch': 1.82} + 11%|█▏ | 42160/371472 [3:24:05<25:20:31, 3.61it/s] 11%|█▏ | 42161/371472 [3:24:05<25:06:22, 3.64it/s] 11%|█▏ | 42162/371472 [3:24:05<26:16:04, 3.48it/s] 11%|█▏ | 42163/371472 [3:24:06<26:07:20, 3.50it/s] 11%|█▏ | 42164/371472 [3:24:06<26:50:29, 3.41it/s] 11%|█▏ | 42165/371472 [3:24:06<28:13:55, 3.24it/s] 11%|█▏ | 42166/371472 [3:24:07<28:32:57, 3.20it/s] 11%|█▏ | 42167/371472 [3:24:07<27:57:39, 3.27it/s] 11%|█▏ | 42168/371472 [3:24:07<26:14:28, 3.49it/s] 11%|█▏ | 42169/371472 [3:24:07<25:45:52, 3.55it/s] 11%|█▏ | 42170/371472 [3:24:08<29:01:03, 3.15it/s] 11%|█▏ | 42171/371472 [3:24:08<27:22:56, 3.34it/s] 11%|█▏ | 42172/371472 [3:24:08<26:14:36, 3.49it/s] 11%|█▏ | 42173/371472 [3:24:09<25:50:21, 3.54it/s] 11%|█▏ | 42174/371472 [3:24:09<28:04:45, 3.26it/s] 11%|█▏ | 42175/371472 [3:24:09<29:09:30, 3.14it/s] 11%|█▏ | 42176/371472 [3:24:10<27:57:35, 3.27it/s] 11%|█▏ | 42177/371472 [3:24:10<28:46:37, 3.18it/s] 11%|█▏ | 42178/371472 [3:24:10<27:30:20, 3.33it/s] 11%|█▏ | 42179/371472 [3:24:11<27:51:17, 3.28it/s] 11%|█▏ | 42180/371472 [3:24:11<26:35:33, 3.44it/s] {'loss': 4.3677, 'learning_rate': 8.982363334698011e-07, 'epoch': 1.82} + 11%|█▏ | 42180/371472 [3:24:11<26:35:33, 3.44it/s] 11%|█▏ | 42181/371472 [3:24:11<27:02:52, 3.38it/s] 11%|█▏ | 42182/371472 [3:24:11<26:11:42, 3.49it/s] 11%|█▏ | 42183/371472 [3:24:12<27:05:38, 3.38it/s] 11%|█▏ | 42184/371472 [3:24:12<26:27:38, 3.46it/s] 11%|█▏ | 42185/371472 [3:24:12<26:12:52, 3.49it/s] 11%|█▏ | 42186/371472 [3:24:12<24:56:03, 3.67it/s] 11%|█▏ | 42187/371472 [3:24:13<25:12:14, 3.63it/s] 11%|█▏ | 42188/371472 [3:24:13<24:47:00, 3.69it/s] 11%|█▏ | 42189/371472 [3:24:13<25:09:33, 3.64it/s] 11%|█▏ | 42190/371472 [3:24:14<24:50:46, 3.68it/s] 11%|█▏ | 42191/371472 [3:24:14<25:46:18, 3.55it/s] 11%|█▏ | 42192/371472 [3:24:14<25:18:45, 3.61it/s] 11%|█▏ | 42193/371472 [3:24:14<26:53:39, 3.40it/s] 11%|█▏ | 42194/371472 [3:24:15<25:44:58, 3.55it/s] 11%|█▏ | 42195/371472 [3:24:15<24:53:39, 3.67it/s] 11%|█▏ | 42196/371472 [3:24:15<24:59:29, 3.66it/s] 11%|█▏ | 42197/371472 [3:24:16<25:33:33, 3.58it/s] 11%|█▏ | 42198/371472 [3:24:16<24:39:11, 3.71it/s] 11%|█▏ | 42199/371472 [3:24:16<25:27:03, 3.59it/s] 11%|█▏ | 42200/371472 [3:24:16<27:24:19, 3.34it/s] {'loss': 4.5101, 'learning_rate': 8.981878514943222e-07, 'epoch': 1.82} + 11%|█▏ | 42200/371472 [3:24:16<27:24:19, 3.34it/s] 11%|█▏ | 42201/371472 [3:24:17<26:58:35, 3.39it/s] 11%|█▏ | 42202/371472 [3:24:17<26:19:15, 3.47it/s] 11%|█▏ | 42203/371472 [3:24:17<25:50:48, 3.54it/s] 11%|█▏ | 42204/371472 [3:24:18<26:37:56, 3.43it/s] 11%|█▏ | 42205/371472 [3:24:18<26:03:07, 3.51it/s] 11%|█▏ | 42206/371472 [3:24:18<26:03:47, 3.51it/s] 11%|█▏ | 42207/371472 [3:24:18<26:18:00, 3.48it/s] 11%|█▏ | 42208/371472 [3:24:19<25:55:15, 3.53it/s] 11%|█▏ | 42209/371472 [3:24:19<25:45:11, 3.55it/s] 11%|█▏ | 42210/371472 [3:24:19<25:49:57, 3.54it/s] 11%|█▏ | 42211/371472 [3:24:20<27:45:13, 3.30it/s] 11%|█▏ | 42212/371472 [3:24:20<27:35:39, 3.31it/s] 11%|█▏ | 42213/371472 [3:24:20<26:42:48, 3.42it/s] 11%|█▏ | 42214/371472 [3:24:20<26:10:51, 3.49it/s] 11%|█▏ | 42215/371472 [3:24:21<25:59:37, 3.52it/s] 11%|█▏ | 42216/371472 [3:24:21<26:27:40, 3.46it/s] 11%|█▏ | 42217/371472 [3:24:21<25:37:12, 3.57it/s] 11%|█▏ | 42218/371472 [3:24:22<27:46:29, 3.29it/s] 11%|█▏ | 42219/371472 [3:24:22<26:37:33, 3.43it/s] 11%|█▏ | 42220/371472 [3:24:22<28:41:04, 3.19it/s] {'loss': 4.4752, 'learning_rate': 8.981393695188433e-07, 'epoch': 1.82} + 11%|█▏ | 42220/371472 [3:24:22<28:41:04, 3.19it/s] 11%|█▏ | 42221/371472 [3:24:23<29:11:33, 3.13it/s] 11%|█▏ | 42222/371472 [3:24:23<28:19:17, 3.23it/s] 11%|█▏ | 42223/371472 [3:24:23<31:17:21, 2.92it/s] 11%|█▏ | 42224/371472 [3:24:24<30:00:04, 3.05it/s] 11%|█▏ | 42225/371472 [3:24:24<28:44:34, 3.18it/s] 11%|█▏ | 42226/371472 [3:24:24<31:14:39, 2.93it/s] 11%|█▏ | 42227/371472 [3:24:25<30:45:12, 2.97it/s] 11%|█▏ | 42228/371472 [3:24:25<28:32:55, 3.20it/s] 11%|█▏ | 42229/371472 [3:24:25<27:03:09, 3.38it/s] 11%|█▏ | 42230/371472 [3:24:25<26:04:59, 3.51it/s] 11%|█▏ | 42231/371472 [3:24:26<25:22:16, 3.60it/s] 11%|█▏ | 42232/371472 [3:24:26<24:26:09, 3.74it/s] 11%|█▏ | 42233/371472 [3:24:26<25:35:30, 3.57it/s] 11%|█▏ | 42234/371472 [3:24:27<29:22:36, 3.11it/s] 11%|█▏ | 42235/371472 [3:24:27<29:43:00, 3.08it/s] 11%|█▏ | 42236/371472 [3:24:27<30:02:24, 3.04it/s] 11%|█▏ | 42237/371472 [3:24:28<28:18:40, 3.23it/s] 11%|█▏ | 42238/371472 [3:24:28<28:34:26, 3.20it/s] 11%|█▏ | 42239/371472 [3:24:28<28:11:45, 3.24it/s] 11%|█▏ | 42240/371472 [3:24:28<27:34:59, 3.32it/s] {'loss': 4.7016, 'learning_rate': 8.980908875433644e-07, 'epoch': 1.82} + 11%|█▏ | 42240/371472 [3:24:28<27:34:59, 3.32it/s] 11%|█▏ | 42241/371472 [3:24:29<26:21:48, 3.47it/s] 11%|█▏ | 42242/371472 [3:24:29<26:41:36, 3.43it/s] 11%|█▏ | 42243/371472 [3:24:29<25:55:43, 3.53it/s] 11%|█▏ | 42244/371472 [3:24:30<26:00:10, 3.52it/s] 11%|█▏ | 42245/371472 [3:24:30<26:13:02, 3.49it/s] 11%|█▏ | 42246/371472 [3:24:30<26:26:42, 3.46it/s] 11%|█▏ | 42247/371472 [3:24:30<25:54:56, 3.53it/s] 11%|█▏ | 42248/371472 [3:24:31<25:50:26, 3.54it/s] 11%|█▏ | 42249/371472 [3:24:31<27:03:35, 3.38it/s] 11%|█▏ | 42250/371472 [3:24:31<27:13:07, 3.36it/s] 11%|█▏ | 42251/371472 [3:24:32<26:29:04, 3.45it/s] 11%|█▏ | 42252/371472 [3:24:32<25:24:46, 3.60it/s] 11%|█▏ | 42253/371472 [3:24:32<24:51:39, 3.68it/s] 11%|█▏ | 42254/371472 [3:24:32<24:42:29, 3.70it/s] 11%|█▏ | 42255/371472 [3:24:33<26:27:55, 3.46it/s] 11%|█▏ | 42256/371472 [3:24:33<25:05:21, 3.64it/s] 11%|█▏ | 42257/371472 [3:24:33<25:22:19, 3.60it/s] 11%|█▏ | 42258/371472 [3:24:34<27:44:51, 3.30it/s] 11%|█▏ | 42259/371472 [3:24:34<26:34:49, 3.44it/s] 11%|█▏ | 42260/371472 [3:24:34<25:59:52, 3.52it/s] {'loss': 4.6251, 'learning_rate': 8.980424055678855e-07, 'epoch': 1.82} + 11%|█▏ | 42260/371472 [3:24:34<25:59:52, 3.52it/s] 11%|█▏ | 42261/371472 [3:24:34<26:29:07, 3.45it/s] 11%|█▏ | 42262/371472 [3:24:35<26:12:23, 3.49it/s] 11%|█▏ | 42263/371472 [3:24:35<25:27:16, 3.59it/s] 11%|█▏ | 42264/371472 [3:24:35<26:00:30, 3.52it/s] 11%|█▏ | 42265/371472 [3:24:36<26:11:25, 3.49it/s] 11%|█▏ | 42266/371472 [3:24:36<25:41:17, 3.56it/s] 11%|█▏ | 42267/371472 [3:24:36<25:16:19, 3.62it/s] 11%|█▏ | 42268/371472 [3:24:36<24:59:20, 3.66it/s] 11%|█▏ | 42269/371472 [3:24:37<24:51:27, 3.68it/s] 11%|█▏ | 42270/371472 [3:24:37<25:44:48, 3.55it/s] 11%|█▏ | 42271/371472 [3:24:37<25:18:55, 3.61it/s] 11%|█▏ | 42272/371472 [3:24:37<25:59:08, 3.52it/s] 11%|█▏ | 42273/371472 [3:24:38<25:15:14, 3.62it/s] 11%|█▏ | 42274/371472 [3:24:38<26:10:59, 3.49it/s] 11%|█▏ | 42275/371472 [3:24:38<25:09:30, 3.63it/s] 11%|█▏ | 42276/371472 [3:24:39<24:37:37, 3.71it/s] 11%|█▏ | 42277/371472 [3:24:39<24:32:52, 3.73it/s] 11%|█▏ | 42278/371472 [3:24:39<25:49:05, 3.54it/s] 11%|█▏ | 42279/371472 [3:24:39<26:14:38, 3.48it/s] 11%|█▏ | 42280/371472 [3:24:40<25:33:42, 3.58it/s] {'loss': 4.4877, 'learning_rate': 8.979939235924066e-07, 'epoch': 1.82} + 11%|█▏ | 42280/371472 [3:24:40<25:33:42, 3.58it/s] 11%|█▏ | 42281/371472 [3:24:40<26:33:53, 3.44it/s] 11%|█▏ | 42282/371472 [3:24:40<27:03:02, 3.38it/s] 11%|█▏ | 42283/371472 [3:24:41<28:01:52, 3.26it/s] 11%|█▏ | 42284/371472 [3:24:41<27:37:43, 3.31it/s] 11%|█▏ | 42285/371472 [3:24:41<26:53:15, 3.40it/s] 11%|█▏ | 42286/371472 [3:24:41<26:33:45, 3.44it/s] 11%|█▏ | 42287/371472 [3:24:42<25:59:43, 3.52it/s] 11%|█▏ | 42288/371472 [3:24:42<25:14:37, 3.62it/s] 11%|█▏ | 42289/371472 [3:24:42<24:18:53, 3.76it/s] 11%|█▏ | 42290/371472 [3:24:43<26:21:17, 3.47it/s] 11%|█▏ | 42291/371472 [3:24:43<26:39:08, 3.43it/s] 11%|█▏ | 42292/371472 [3:24:43<26:12:02, 3.49it/s] 11%|█▏ | 42293/371472 [3:24:43<25:53:59, 3.53it/s] 11%|█▏ | 42294/371472 [3:24:44<26:14:53, 3.48it/s] 11%|█▏ | 42295/371472 [3:24:44<26:13:33, 3.49it/s] 11%|█▏ | 42296/371472 [3:24:44<25:46:15, 3.55it/s] 11%|█▏ | 42297/371472 [3:24:45<25:25:00, 3.60it/s] 11%|█▏ | 42298/371472 [3:24:45<26:02:31, 3.51it/s] 11%|█▏ | 42299/371472 [3:24:45<25:38:36, 3.57it/s] 11%|█▏ | 42300/371472 [3:24:45<25:16:45, 3.62it/s] {'loss': 4.2176, 'learning_rate': 8.979454416169277e-07, 'epoch': 1.82} + 11%|█▏ | 42300/371472 [3:24:45<25:16:45, 3.62it/s] 11%|█▏ | 42301/371472 [3:24:46<26:53:14, 3.40it/s] 11%|█▏ | 42302/371472 [3:24:46<25:44:08, 3.55it/s] 11%|█▏ | 42303/371472 [3:24:46<26:22:15, 3.47it/s] 11%|█▏ | 42304/371472 [3:24:47<25:43:58, 3.55it/s] 11%|█▏ | 42305/371472 [3:24:47<25:03:18, 3.65it/s] 11%|█▏ | 42306/371472 [3:24:47<25:19:58, 3.61it/s] 11%|█▏ | 42307/371472 [3:24:47<24:48:01, 3.69it/s] 11%|█▏ | 42308/371472 [3:24:48<24:48:03, 3.69it/s] 11%|█▏ | 42309/371472 [3:24:48<23:59:23, 3.81it/s] 11%|█▏ | 42310/371472 [3:24:48<24:33:20, 3.72it/s] 11%|█▏ | 42311/371472 [3:24:48<24:25:03, 3.74it/s] 11%|█▏ | 42312/371472 [3:24:49<24:28:16, 3.74it/s] 11%|█▏ | 42313/371472 [3:24:49<24:03:38, 3.80it/s] 11%|█▏ | 42314/371472 [3:24:49<24:15:00, 3.77it/s] 11%|█▏ | 42315/371472 [3:24:50<24:33:32, 3.72it/s] 11%|█▏ | 42316/371472 [3:24:50<25:07:44, 3.64it/s] 11%|█▏ | 42317/371472 [3:24:50<24:33:16, 3.72it/s] 11%|█▏ | 42318/371472 [3:24:50<24:41:32, 3.70it/s] 11%|█▏ | 42319/371472 [3:24:51<24:51:11, 3.68it/s] 11%|█▏ | 42320/371472 [3:24:51<24:23:19, 3.75it/s] {'loss': 4.5683, 'learning_rate': 8.978969596414488e-07, 'epoch': 1.82} + 11%|█▏ | 42320/371472 [3:24:51<24:23:19, 3.75it/s] 11%|█▏ | 42321/371472 [3:24:51<25:32:16, 3.58it/s] 11%|█▏ | 42322/371472 [3:24:51<26:50:09, 3.41it/s] 11%|█▏ | 42323/371472 [3:24:52<27:20:22, 3.34it/s] 11%|█▏ | 42324/371472 [3:24:52<26:51:26, 3.40it/s] 11%|█▏ | 42325/371472 [3:24:52<27:23:29, 3.34it/s] 11%|█▏ | 42326/371472 [3:24:53<27:18:15, 3.35it/s] 11%|█▏ | 42327/371472 [3:24:53<26:40:26, 3.43it/s] 11%|█▏ | 42328/371472 [3:24:53<26:00:23, 3.52it/s] 11%|█▏ | 42329/371472 [3:24:54<26:18:59, 3.47it/s] 11%|█▏ | 42330/371472 [3:24:54<25:28:40, 3.59it/s] 11%|█▏ | 42331/371472 [3:24:54<26:29:05, 3.45it/s] 11%|█▏ | 42332/371472 [3:24:54<25:57:00, 3.52it/s] 11%|█▏ | 42333/371472 [3:24:55<25:30:02, 3.59it/s] 11%|█▏ | 42334/371472 [3:24:55<25:48:50, 3.54it/s] 11%|█▏ | 42335/371472 [3:24:55<24:44:36, 3.69it/s] 11%|█▏ | 42336/371472 [3:24:55<24:42:17, 3.70it/s] 11%|█▏ | 42337/371472 [3:24:56<25:30:05, 3.59it/s] 11%|█▏ | 42338/371472 [3:24:56<26:50:28, 3.41it/s] 11%|█▏ | 42339/371472 [3:24:56<25:50:15, 3.54it/s] 11%|█▏ | 42340/371472 [3:24:57<25:43:48, 3.55it/s] {'loss': 4.3706, 'learning_rate': 8.978484776659699e-07, 'epoch': 1.82} + 11%|█▏ | 42340/371472 [3:24:57<25:43:48, 3.55it/s] 11%|█▏ | 42341/371472 [3:24:57<25:14:01, 3.62it/s] 11%|█▏ | 42342/371472 [3:24:57<24:51:57, 3.68it/s] 11%|█▏ | 42343/371472 [3:24:57<24:24:57, 3.74it/s] 11%|█▏ | 42344/371472 [3:24:58<26:53:21, 3.40it/s] 11%|█▏ | 42345/371472 [3:24:58<26:43:48, 3.42it/s] 11%|█▏ | 42346/371472 [3:24:58<25:59:08, 3.52it/s] 11%|█▏ | 42347/371472 [3:24:59<25:10:59, 3.63it/s] 11%|█▏ | 42348/371472 [3:24:59<26:55:05, 3.40it/s] 11%|█▏ | 42349/371472 [3:24:59<26:59:55, 3.39it/s] 11%|█▏ | 42350/371472 [3:24:59<26:05:34, 3.50it/s] 11%|█▏ | 42351/371472 [3:25:00<25:24:42, 3.60it/s] 11%|█▏ | 42352/371472 [3:25:00<25:16:28, 3.62it/s] 11%|█▏ | 42353/371472 [3:25:00<25:26:21, 3.59it/s] 11%|█▏ | 42354/371472 [3:25:01<26:47:55, 3.41it/s] 11%|█▏ | 42355/371472 [3:25:01<25:56:22, 3.52it/s] 11%|█▏ | 42356/371472 [3:25:01<27:18:21, 3.35it/s] 11%|█▏ | 42357/371472 [3:25:01<26:56:45, 3.39it/s] 11%|█▏ | 42358/371472 [3:25:02<26:49:32, 3.41it/s] 11%|█▏ | 42359/371472 [3:25:02<27:07:02, 3.37it/s] 11%|█▏ | 42360/371472 [3:25:02<26:46:15, 3.41it/s] {'loss': 4.3926, 'learning_rate': 8.977999956904911e-07, 'epoch': 1.82} + 11%|█▏ | 42360/371472 [3:25:02<26:46:15, 3.41it/s] 11%|█▏ | 42361/371472 [3:25:03<25:50:27, 3.54it/s] 11%|█▏ | 42362/371472 [3:25:03<24:58:13, 3.66it/s] 11%|█▏ | 42363/371472 [3:25:03<25:18:11, 3.61it/s] 11%|█▏ | 42364/371472 [3:25:03<27:06:36, 3.37it/s] 11%|█▏ | 42365/371472 [3:25:04<26:34:36, 3.44it/s] 11%|█▏ | 42366/371472 [3:25:04<27:19:49, 3.34it/s] 11%|█▏ | 42367/371472 [3:25:04<27:21:52, 3.34it/s] 11%|█▏ | 42368/371472 [3:25:05<28:21:19, 3.22it/s] 11%|█▏ | 42369/371472 [3:25:05<27:00:19, 3.39it/s] 11%|█▏ | 42370/371472 [3:25:05<27:04:44, 3.38it/s] 11%|█▏ | 42371/371472 [3:25:06<25:39:22, 3.56it/s] 11%|█▏ | 42372/371472 [3:25:06<26:22:56, 3.47it/s] 11%|█▏ | 42373/371472 [3:25:06<26:06:27, 3.50it/s] 11%|█▏ | 42374/371472 [3:25:06<25:21:22, 3.61it/s] 11%|█▏ | 42375/371472 [3:25:07<24:42:14, 3.70it/s] 11%|█▏ | 42376/371472 [3:25:07<25:06:40, 3.64it/s] 11%|█▏ | 42377/371472 [3:25:07<25:40:30, 3.56it/s] 11%|█▏ | 42378/371472 [3:25:07<25:49:19, 3.54it/s] 11%|█▏ | 42379/371472 [3:25:08<27:21:30, 3.34it/s] 11%|█▏ | 42380/371472 [3:25:08<27:36:19, 3.31it/s] {'loss': 4.4718, 'learning_rate': 8.97751513715012e-07, 'epoch': 1.83} + 11%|█▏ | 42380/371472 [3:25:08<27:36:19, 3.31it/s] 11%|█▏ | 42381/371472 [3:25:08<27:12:07, 3.36it/s] 11%|█▏ | 42382/371472 [3:25:09<26:06:52, 3.50it/s] 11%|█▏ | 42383/371472 [3:25:09<26:19:46, 3.47it/s] 11%|█▏ | 42384/371472 [3:25:09<26:28:12, 3.45it/s] 11%|█▏ | 42385/371472 [3:25:10<25:41:25, 3.56it/s] 11%|█▏ | 42386/371472 [3:25:10<25:16:41, 3.62it/s] 11%|█▏ | 42387/371472 [3:25:10<26:10:31, 3.49it/s] 11%|█▏ | 42388/371472 [3:25:10<25:07:04, 3.64it/s] 11%|█▏ | 42389/371472 [3:25:11<25:11:22, 3.63it/s] 11%|█▏ | 42390/371472 [3:25:11<25:16:40, 3.62it/s] 11%|█▏ | 42391/371472 [3:25:11<28:55:53, 3.16it/s] 11%|█▏ | 42392/371472 [3:25:12<28:23:14, 3.22it/s] 11%|█▏ | 42393/371472 [3:25:12<28:00:44, 3.26it/s] 11%|█▏ | 42394/371472 [3:25:12<27:52:17, 3.28it/s] 11%|█▏ | 42395/371472 [3:25:12<27:08:35, 3.37it/s] 11%|█▏ | 42396/371472 [3:25:13<26:32:24, 3.44it/s] 11%|█▏ | 42397/371472 [3:25:13<25:41:45, 3.56it/s] 11%|█▏ | 42398/371472 [3:25:13<26:20:02, 3.47it/s] 11%|█▏ | 42399/371472 [3:25:14<26:09:03, 3.50it/s] 11%|█▏ | 42400/371472 [3:25:14<27:27:11, 3.33it/s] {'loss': 4.2747, 'learning_rate': 8.977030317395332e-07, 'epoch': 1.83} + 11%|█▏ | 42400/371472 [3:25:14<27:27:11, 3.33it/s] 11%|█▏ | 42401/371472 [3:25:14<25:57:30, 3.52it/s] 11%|█▏ | 42402/371472 [3:25:14<25:19:11, 3.61it/s] 11%|█▏ | 42403/371472 [3:25:15<24:50:28, 3.68it/s] 11%|█▏ | 42404/371472 [3:25:15<24:31:34, 3.73it/s] 11%|█▏ | 42405/371472 [3:25:15<24:57:34, 3.66it/s] 11%|█▏ | 42406/371472 [3:25:16<25:40:12, 3.56it/s] 11%|█▏ | 42407/371472 [3:25:16<25:47:08, 3.54it/s] 11%|█▏ | 42408/371472 [3:25:16<25:50:31, 3.54it/s] 11%|█▏ | 42409/371472 [3:25:16<25:02:33, 3.65it/s] 11%|█▏ | 42410/371472 [3:25:17<26:12:12, 3.49it/s] 11%|█▏ | 42411/371472 [3:25:17<25:36:31, 3.57it/s] 11%|█▏ | 42412/371472 [3:25:17<24:50:56, 3.68it/s] 11%|█▏ | 42413/371472 [3:25:17<24:55:16, 3.67it/s] 11%|█▏ | 42414/371472 [3:25:18<24:36:45, 3.71it/s] 11%|█▏ | 42415/371472 [3:25:18<25:27:16, 3.59it/s] 11%|█▏ | 42416/371472 [3:25:18<26:51:37, 3.40it/s] 11%|█▏ | 42417/371472 [3:25:19<26:50:02, 3.41it/s] 11%|█▏ | 42418/371472 [3:25:19<26:39:25, 3.43it/s] 11%|█▏ | 42419/371472 [3:25:19<26:27:17, 3.46it/s] 11%|█▏ | 42420/371472 [3:25:19<25:39:56, 3.56it/s] {'loss': 4.4391, 'learning_rate': 8.976545497640543e-07, 'epoch': 1.83} + 11%|█▏ | 42420/371472 [3:25:20<25:39:56, 3.56it/s] 11%|█▏ | 42421/371472 [3:25:20<25:37:19, 3.57it/s] 11%|█▏ | 42422/371472 [3:25:20<25:19:55, 3.61it/s] 11%|█▏ | 42423/371472 [3:25:20<25:44:09, 3.55it/s] 11%|█▏ | 42424/371472 [3:25:21<25:11:22, 3.63it/s] 11%|█▏ | 42425/371472 [3:25:21<26:20:18, 3.47it/s] 11%|█▏ | 42426/371472 [3:25:21<25:27:34, 3.59it/s] 11%|█▏ | 42427/371472 [3:25:21<25:02:12, 3.65it/s] 11%|█▏ | 42428/371472 [3:25:22<26:23:40, 3.46it/s] 11%|█▏ | 42429/371472 [3:25:22<25:38:00, 3.57it/s] 11%|█▏ | 42430/371472 [3:25:22<24:30:35, 3.73it/s] 11%|█▏ | 42431/371472 [3:25:23<25:18:18, 3.61it/s] 11%|█▏ | 42432/371472 [3:25:23<24:46:10, 3.69it/s] 11%|█▏ | 42433/371472 [3:25:23<26:10:59, 3.49it/s] 11%|█▏ | 42434/371472 [3:25:23<27:10:00, 3.36it/s] 11%|█▏ | 42435/371472 [3:25:24<27:46:22, 3.29it/s] 11%|█▏ | 42436/371472 [3:25:24<28:17:30, 3.23it/s] 11%|█▏ | 42437/371472 [3:25:24<27:27:53, 3.33it/s] 11%|█▏ | 42438/371472 [3:25:25<26:29:52, 3.45it/s] 11%|█▏ | 42439/371472 [3:25:25<26:00:33, 3.51it/s] 11%|█▏ | 42440/371472 [3:25:25<26:08:29, 3.50it/s] {'loss': 4.5646, 'learning_rate': 8.976060677885755e-07, 'epoch': 1.83} + 11%|█▏ | 42440/371472 [3:25:25<26:08:29, 3.50it/s] 11%|█▏ | 42441/371472 [3:25:25<25:55:48, 3.52it/s] 11%|█▏ | 42442/371472 [3:25:26<25:08:03, 3.64it/s] 11%|█▏ | 42443/371472 [3:25:26<25:00:31, 3.65it/s] 11%|█▏ | 42444/371472 [3:25:26<25:30:03, 3.58it/s] 11%|█▏ | 42445/371472 [3:25:27<26:39:03, 3.43it/s] 11%|█▏ | 42446/371472 [3:25:27<25:54:26, 3.53it/s] 11%|█▏ | 42447/371472 [3:25:27<25:15:52, 3.62it/s] 11%|█▏ | 42448/371472 [3:25:27<24:56:30, 3.66it/s] 11%|█▏ | 42449/371472 [3:25:28<24:47:08, 3.69it/s] 11%|█▏ | 42450/371472 [3:25:28<24:48:16, 3.68it/s] 11%|█▏ | 42451/371472 [3:25:28<24:26:32, 3.74it/s] 11%|█▏ | 42452/371472 [3:25:28<24:41:24, 3.70it/s] 11%|█▏ | 42453/371472 [3:25:29<25:16:57, 3.61it/s] 11%|█▏ | 42454/371472 [3:25:29<25:46:12, 3.55it/s] 11%|█▏ | 42455/371472 [3:25:29<26:35:40, 3.44it/s] 11%|█▏ | 42456/371472 [3:25:30<25:55:46, 3.52it/s] 11%|█▏ | 42457/371472 [3:25:30<25:37:29, 3.57it/s] 11%|█▏ | 42458/371472 [3:25:30<27:56:28, 3.27it/s] 11%|█▏ | 42459/371472 [3:25:31<28:42:12, 3.18it/s] 11%|█▏ | 42460/371472 [3:25:31<30:43:38, 2.97it/s] {'loss': 4.2667, 'learning_rate': 8.975575858130965e-07, 'epoch': 1.83} + 11%|█▏ | 42460/371472 [3:25:31<30:43:38, 2.97it/s] 11%|█▏ | 42461/371472 [3:25:31<29:20:53, 3.11it/s] 11%|█▏ | 42462/371472 [3:25:32<28:12:44, 3.24it/s] 11%|█▏ | 42463/371472 [3:25:32<28:00:09, 3.26it/s] 11%|█▏ | 42464/371472 [3:25:32<26:54:03, 3.40it/s] 11%|█▏ | 42465/371472 [3:25:32<25:47:18, 3.54it/s] 11%|█▏ | 42466/371472 [3:25:33<24:50:14, 3.68it/s] 11%|█▏ | 42467/371472 [3:25:33<30:26:10, 3.00it/s] 11%|█▏ | 42468/371472 [3:25:33<29:36:23, 3.09it/s] 11%|█▏ | 42469/371472 [3:25:34<29:46:39, 3.07it/s] 11%|█▏ | 42470/371472 [3:25:34<29:44:37, 3.07it/s] 11%|█▏ | 42471/371472 [3:25:34<28:48:04, 3.17it/s] 11%|█▏ | 42472/371472 [3:25:35<27:27:50, 3.33it/s] 11%|█▏ | 42473/371472 [3:25:35<26:41:45, 3.42it/s] 11%|█▏ | 42474/371472 [3:25:35<25:32:23, 3.58it/s] 11%|█▏ | 42475/371472 [3:25:36<27:44:40, 3.29it/s] 11%|█▏ | 42476/371472 [3:25:36<26:29:43, 3.45it/s] 11%|█▏ | 42477/371472 [3:25:36<27:28:07, 3.33it/s] 11%|█▏ | 42478/371472 [3:25:36<29:10:25, 3.13it/s] 11%|█▏ | 42479/371472 [3:25:37<27:22:57, 3.34it/s] 11%|█▏ | 42480/371472 [3:25:37<26:25:33, 3.46it/s] {'loss': 4.4766, 'learning_rate': 8.975091038376177e-07, 'epoch': 1.83} + 11%|█▏ | 42480/371472 [3:25:37<26:25:33, 3.46it/s] 11%|█▏ | 42481/371472 [3:25:37<26:08:23, 3.50it/s] 11%|█▏ | 42482/371472 [3:25:38<25:44:16, 3.55it/s] 11%|█▏ | 42483/371472 [3:25:38<26:19:04, 3.47it/s] 11%|█▏ | 42484/371472 [3:25:38<26:25:46, 3.46it/s] 11%|█▏ | 42485/371472 [3:25:38<27:09:11, 3.37it/s] 11%|█▏ | 42486/371472 [3:25:39<26:22:29, 3.46it/s] 11%|█▏ | 42487/371472 [3:25:39<29:43:40, 3.07it/s] 11%|█▏ | 42488/371472 [3:25:39<29:08:02, 3.14it/s] 11%|█▏ | 42489/371472 [3:25:40<31:09:18, 2.93it/s] 11%|█▏ | 42490/371472 [3:25:40<28:57:41, 3.16it/s] 11%|█▏ | 42491/371472 [3:25:40<27:51:08, 3.28it/s] 11%|█▏ | 42492/371472 [3:25:41<28:24:21, 3.22it/s] 11%|█▏ | 42493/371472 [3:25:41<28:47:17, 3.17it/s] 11%|█▏ | 42494/371472 [3:25:41<28:36:55, 3.19it/s] 11%|█▏ | 42495/371472 [3:25:42<28:14:15, 3.24it/s] 11%|█▏ | 42496/371472 [3:25:42<28:35:11, 3.20it/s] 11%|█▏ | 42497/371472 [3:25:42<27:39:37, 3.30it/s] 11%|█▏ | 42498/371472 [3:25:42<26:26:26, 3.46it/s] 11%|█▏ | 42499/371472 [3:25:43<29:54:20, 3.06it/s] 11%|█▏ | 42500/371472 [3:25:43<29:16:28, 3.12it/s] {'loss': 4.2121, 'learning_rate': 8.974606218621388e-07, 'epoch': 1.83} + 11%|█▏ | 42500/371472 [3:25:43<29:16:28, 3.12it/s] 11%|█▏ | 42501/371472 [3:25:44<29:06:42, 3.14it/s] 11%|█▏ | 42502/371472 [3:25:44<28:01:44, 3.26it/s] 11%|█▏ | 42503/371472 [3:25:44<28:48:44, 3.17it/s] 11%|█▏ | 42504/371472 [3:25:44<28:33:59, 3.20it/s] 11%|█▏ | 42505/371472 [3:25:45<26:41:32, 3.42it/s] 11%|█▏ | 42506/371472 [3:25:45<26:20:23, 3.47it/s] 11%|█▏ | 42507/371472 [3:25:45<26:15:26, 3.48it/s] 11%|█▏ | 42508/371472 [3:25:46<28:40:20, 3.19it/s] 11%|█▏ | 42509/371472 [3:25:46<27:04:08, 3.38it/s] 11%|█▏ | 42510/371472 [3:25:46<26:42:20, 3.42it/s] 11%|█▏ | 42511/371472 [3:25:46<26:16:02, 3.48it/s] 11%|█▏ | 42512/371472 [3:25:47<26:07:41, 3.50it/s] 11%|█▏ | 42513/371472 [3:25:47<26:17:18, 3.48it/s] 11%|█▏ | 42514/371472 [3:25:47<26:12:33, 3.49it/s] 11%|█▏ | 42515/371472 [3:25:48<26:47:23, 3.41it/s] 11%|█▏ | 42516/371472 [3:25:48<27:22:07, 3.34it/s] 11%|█▏ | 42517/371472 [3:25:48<26:27:00, 3.45it/s] 11%|█▏ | 42518/371472 [3:25:48<27:06:50, 3.37it/s] 11%|█▏ | 42519/371472 [3:25:49<25:56:46, 3.52it/s] 11%|█▏ | 42520/371472 [3:25:49<25:18:11, 3.61it/s] {'loss': 4.4128, 'learning_rate': 8.974121398866599e-07, 'epoch': 1.83} + 11%|█▏ | 42520/371472 [3:25:49<25:18:11, 3.61it/s] 11%|█▏ | 42521/371472 [3:25:49<26:00:43, 3.51it/s] 11%|█▏ | 42522/371472 [3:25:50<24:48:53, 3.68it/s] 11%|█▏ | 42523/371472 [3:25:50<25:29:07, 3.59it/s] 11%|█▏ | 42524/371472 [3:25:50<25:20:39, 3.61it/s] 11%|█▏ | 42525/371472 [3:25:50<25:28:39, 3.59it/s] 11%|█▏ | 42526/371472 [3:25:51<25:10:14, 3.63it/s] 11%|█▏ | 42527/371472 [3:25:51<26:34:14, 3.44it/s] 11%|█▏ | 42528/371472 [3:25:51<25:13:06, 3.62it/s] 11%|█▏ | 42529/371472 [3:25:52<27:01:21, 3.38it/s] 11%|█▏ | 42530/371472 [3:25:52<26:10:32, 3.49it/s] 11%|█▏ | 42531/371472 [3:25:52<26:02:38, 3.51it/s] 11%|█▏ | 42532/371472 [3:25:52<26:31:00, 3.45it/s] 11%|█▏ | 42533/371472 [3:25:53<27:35:06, 3.31it/s] 11%|█▏ | 42534/371472 [3:25:53<26:20:02, 3.47it/s] 11%|█▏ | 42535/371472 [3:25:53<26:37:05, 3.43it/s] 11%|█▏ | 42536/371472 [3:25:54<27:55:52, 3.27it/s] 11%|█▏ | 42537/371472 [3:25:54<27:23:40, 3.34it/s] 11%|█▏ | 42538/371472 [3:25:54<27:30:21, 3.32it/s] 11%|█▏ | 42539/371472 [3:25:55<27:08:30, 3.37it/s] 11%|█▏ | 42540/371472 [3:25:55<26:27:59, 3.45it/s] {'loss': 4.2765, 'learning_rate': 8.973636579111809e-07, 'epoch': 1.83} + 11%|█▏ | 42540/371472 [3:25:55<26:27:59, 3.45it/s] 11%|█▏ | 42541/371472 [3:25:55<26:30:00, 3.45it/s] 11%|█▏ | 42542/371472 [3:25:55<25:45:34, 3.55it/s] 11%|█▏ | 42543/371472 [3:25:56<26:14:30, 3.48it/s] 11%|█▏ | 42544/371472 [3:25:56<25:19:06, 3.61it/s] 11%|█▏ | 42545/371472 [3:25:56<27:17:31, 3.35it/s] 11%|█▏ | 42546/371472 [3:25:57<27:22:45, 3.34it/s] 11%|█▏ | 42547/371472 [3:25:57<26:02:38, 3.51it/s] 11%|█▏ | 42548/371472 [3:25:57<25:26:46, 3.59it/s] 11%|█▏ | 42549/371472 [3:25:57<25:55:39, 3.52it/s] 11%|█▏ | 42550/371472 [3:25:58<25:14:48, 3.62it/s] 11%|█▏ | 42551/371472 [3:25:58<24:13:19, 3.77it/s] 11%|█▏ | 42552/371472 [3:25:58<25:21:17, 3.60it/s] 11%|█▏ | 42553/371472 [3:25:58<25:34:11, 3.57it/s] 11%|█▏ | 42554/371472 [3:25:59<27:40:22, 3.30it/s] 11%|█▏ | 42555/371472 [3:25:59<27:43:52, 3.29it/s] 11%|█▏ | 42556/371472 [3:25:59<28:26:53, 3.21it/s] 11%|█▏ | 42557/371472 [3:26:00<26:59:03, 3.39it/s] 11%|█▏ | 42558/371472 [3:26:00<26:38:08, 3.43it/s] 11%|█▏ | 42559/371472 [3:26:00<26:46:31, 3.41it/s] 11%|█▏ | 42560/371472 [3:26:01<26:29:26, 3.45it/s] {'loss': 4.5529, 'learning_rate': 8.973151759357021e-07, 'epoch': 1.83} + 11%|█▏ | 42560/371472 [3:26:01<26:29:26, 3.45it/s] 11%|█▏ | 42561/371472 [3:26:01<25:36:05, 3.57it/s] 11%|█▏ | 42562/371472 [3:26:01<25:29:53, 3.58it/s] 11%|█▏ | 42563/371472 [3:26:01<25:46:50, 3.54it/s] 11%|█▏ | 42564/371472 [3:26:02<25:49:57, 3.54it/s] 11%|█▏ | 42565/371472 [3:26:02<25:46:20, 3.55it/s] 11%|█▏ | 42566/371472 [3:26:02<24:49:39, 3.68it/s] 11%|█▏ | 42567/371472 [3:26:03<26:33:17, 3.44it/s] 11%|█▏ | 42568/371472 [3:26:03<26:37:32, 3.43it/s] 11%|█▏ | 42569/371472 [3:26:03<26:18:32, 3.47it/s] 11%|█▏ | 42570/371472 [3:26:03<26:46:05, 3.41it/s] 11%|█▏ | 42571/371472 [3:26:04<26:20:16, 3.47it/s] 11%|█▏ | 42572/371472 [3:26:04<25:30:28, 3.58it/s] 11%|█▏ | 42573/371472 [3:26:04<24:43:42, 3.69it/s] 11%|█▏ | 42574/371472 [3:26:04<24:25:53, 3.74it/s] 11%|█▏ | 42575/371472 [3:26:05<24:51:52, 3.67it/s] 11%|█▏ | 42576/371472 [3:26:05<24:34:43, 3.72it/s] 11%|█▏ | 42577/371472 [3:26:05<26:48:22, 3.41it/s] 11%|█▏ | 42578/371472 [3:26:06<26:26:45, 3.45it/s] 11%|█▏ | 42579/371472 [3:26:06<26:08:45, 3.49it/s] 11%|█▏ | 42580/371472 [3:26:06<27:26:47, 3.33it/s] {'loss': 4.3912, 'learning_rate': 8.972666939602232e-07, 'epoch': 1.83} + 11%|█▏ | 42580/371472 [3:26:06<27:26:47, 3.33it/s] 11%|█▏ | 42581/371472 [3:26:07<27:14:25, 3.35it/s] 11%|█▏ | 42582/371472 [3:26:07<30:26:20, 3.00it/s] 11%|█▏ | 42583/371472 [3:26:07<29:04:47, 3.14it/s] 11%|█▏ | 42584/371472 [3:26:08<28:04:50, 3.25it/s] 11%|█▏ | 42585/371472 [3:26:08<26:59:18, 3.39it/s] 11%|█▏ | 42586/371472 [3:26:08<26:30:45, 3.45it/s] 11%|█▏ | 42587/371472 [3:26:08<28:01:31, 3.26it/s] 11%|█▏ | 42588/371472 [3:26:09<27:35:06, 3.31it/s] 11%|█▏ | 42589/371472 [3:26:09<26:12:43, 3.49it/s] 11%|█▏ | 42590/371472 [3:26:09<27:29:21, 3.32it/s] 11%|█▏ | 42591/371472 [3:26:10<28:42:11, 3.18it/s] 11%|█▏ | 42592/371472 [3:26:10<27:20:50, 3.34it/s] 11%|█▏ | 42593/371472 [3:26:10<25:57:41, 3.52it/s] 11%|█▏ | 42594/371472 [3:26:10<25:22:24, 3.60it/s] 11%|█▏ | 42595/371472 [3:26:11<25:23:09, 3.60it/s] 11%|█▏ | 42596/371472 [3:26:11<24:55:24, 3.67it/s] 11%|█▏ | 42597/371472 [3:26:11<24:55:50, 3.66it/s] 11%|█▏ | 42598/371472 [3:26:12<25:56:49, 3.52it/s] 11%|█▏ | 42599/371472 [3:26:12<25:16:11, 3.62it/s] 11%|█▏ | 42600/371472 [3:26:12<25:42:20, 3.55it/s] {'loss': 4.5823, 'learning_rate': 8.972182119847444e-07, 'epoch': 1.83} + 11%|█▏ | 42600/371472 [3:26:12<25:42:20, 3.55it/s] 11%|█▏ | 42601/371472 [3:26:12<24:59:59, 3.65it/s] 11%|█▏ | 42602/371472 [3:26:13<25:17:58, 3.61it/s] 11%|█▏ | 42603/371472 [3:26:13<25:14:21, 3.62it/s] 11%|█▏ | 42604/371472 [3:26:13<25:09:56, 3.63it/s] 11%|█▏ | 42605/371472 [3:26:13<25:23:21, 3.60it/s] 11%|█▏ | 42606/371472 [3:26:14<28:24:34, 3.22it/s] 11%|█▏ | 42607/371472 [3:26:14<29:57:58, 3.05it/s] 11%|█▏ | 42608/371472 [3:26:14<27:42:53, 3.30it/s] 11%|█▏ | 42609/371472 [3:26:15<27:09:05, 3.36it/s] 11%|█▏ | 42610/371472 [3:26:15<26:32:51, 3.44it/s] 11%|█▏ | 42611/371472 [3:26:15<26:49:15, 3.41it/s] 11%|█▏ | 42612/371472 [3:26:16<25:39:20, 3.56it/s] 11%|█▏ | 42613/371472 [3:26:16<26:34:20, 3.44it/s] 11%|█▏ | 42614/371472 [3:26:16<26:57:28, 3.39it/s] 11%|█▏ | 42615/371472 [3:26:17<27:26:32, 3.33it/s] 11%|█▏ | 42616/371472 [3:26:17<26:21:13, 3.47it/s] 11%|█▏ | 42617/371472 [3:26:17<25:38:08, 3.56it/s] 11%|█▏ | 42618/371472 [3:26:17<25:01:58, 3.65it/s] 11%|█▏ | 42619/371472 [3:26:18<25:04:08, 3.64it/s] 11%|█▏ | 42620/371472 [3:26:18<25:24:26, 3.60it/s] {'loss': 4.3639, 'learning_rate': 8.971697300092654e-07, 'epoch': 1.84} + 11%|█▏ | 42620/371472 [3:26:18<25:24:26, 3.60it/s] 11%|█▏ | 42621/371472 [3:26:18<29:11:29, 3.13it/s] 11%|█▏ | 42622/371472 [3:26:19<28:25:53, 3.21it/s] 11%|█▏ | 42623/371472 [3:26:19<26:53:28, 3.40it/s] 11%|█▏ | 42624/371472 [3:26:19<25:45:55, 3.55it/s] 11%|█▏ | 42625/371472 [3:26:19<25:33:50, 3.57it/s] 11%|█▏ | 42626/371472 [3:26:20<26:06:24, 3.50it/s] 11%|█▏ | 42627/371472 [3:26:20<25:34:47, 3.57it/s] 11%|█▏ | 42628/371472 [3:26:20<25:10:17, 3.63it/s] 11%|█▏ | 42629/371472 [3:26:20<25:49:55, 3.54it/s] 11%|█▏ | 42630/371472 [3:26:21<25:56:15, 3.52it/s] 11%|█▏ | 42631/371472 [3:26:21<26:47:10, 3.41it/s] 11%|█▏ | 42632/371472 [3:26:21<28:20:49, 3.22it/s] 11%|█▏ | 42633/371472 [3:26:22<30:29:10, 3.00it/s] 11%|█▏ | 42634/371472 [3:26:22<30:01:58, 3.04it/s] 11%|█▏ | 42635/371472 [3:26:22<29:37:55, 3.08it/s] 11%|█▏ | 42636/371472 [3:26:23<28:01:50, 3.26it/s] 11%|█▏ | 42637/371472 [3:26:23<27:19:20, 3.34it/s] 11%|█▏ | 42638/371472 [3:26:23<27:25:27, 3.33it/s] 11%|█▏ | 42639/371472 [3:26:24<27:09:20, 3.36it/s] 11%|█▏ | 42640/371472 [3:26:24<26:45:11, 3.41it/s] {'loss': 4.301, 'learning_rate': 8.971212480337865e-07, 'epoch': 1.84} + 11%|█▏ | 42640/371472 [3:26:24<26:45:11, 3.41it/s] 11%|█▏ | 42641/371472 [3:26:24<25:53:27, 3.53it/s] 11%|█▏ | 42642/371472 [3:26:24<25:07:58, 3.63it/s] 11%|█▏ | 42643/371472 [3:26:25<26:46:26, 3.41it/s] 11%|█▏ | 42644/371472 [3:26:25<26:17:34, 3.47it/s] 11%|█▏ | 42645/371472 [3:26:25<25:25:37, 3.59it/s] 11%|█▏ | 42646/371472 [3:26:26<25:04:16, 3.64it/s] 11%|█▏ | 42647/371472 [3:26:26<25:39:48, 3.56it/s] 11%|█▏ | 42648/371472 [3:26:26<25:03:14, 3.65it/s] 11%|█▏ | 42649/371472 [3:26:26<24:33:10, 3.72it/s] 11%|█▏ | 42650/371472 [3:26:27<25:34:24, 3.57it/s] 11%|█▏ | 42651/371472 [3:26:27<25:31:36, 3.58it/s] 11%|█▏ | 42652/371472 [3:26:27<25:36:44, 3.57it/s] 11%|█▏ | 42653/371472 [3:26:27<25:26:55, 3.59it/s] 11%|█▏ | 42654/371472 [3:26:28<25:20:43, 3.60it/s] 11%|█▏ | 42655/371472 [3:26:28<25:45:20, 3.55it/s] 11%|█▏ | 42656/371472 [3:26:28<28:03:12, 3.26it/s] 11%|█▏ | 42657/371472 [3:26:29<27:15:07, 3.35it/s] 11%|█▏ | 42658/371472 [3:26:29<28:01:37, 3.26it/s] 11%|█▏ | 42659/371472 [3:26:29<26:47:56, 3.41it/s] 11%|█▏ | 42660/371472 [3:26:30<26:03:00, 3.51it/s] {'loss': 4.282, 'learning_rate': 8.970727660583076e-07, 'epoch': 1.84} + 11%|█▏ | 42660/371472 [3:26:30<26:03:00, 3.51it/s] 11%|█▏ | 42661/371472 [3:26:30<28:20:32, 3.22it/s] 11%|█▏ | 42662/371472 [3:26:30<26:43:24, 3.42it/s] 11%|█▏ | 42663/371472 [3:26:30<26:04:17, 3.50it/s] 11%|█▏ | 42664/371472 [3:26:31<26:20:51, 3.47it/s] 11%|█▏ | 42665/371472 [3:26:31<26:07:01, 3.50it/s] 11%|█▏ | 42666/371472 [3:26:31<27:59:57, 3.26it/s] 11%|█▏ | 42667/371472 [3:26:32<27:36:18, 3.31it/s] 11%|█▏ | 42668/371472 [3:26:32<26:34:41, 3.44it/s] 11%|█▏ | 42669/371472 [3:26:32<25:51:26, 3.53it/s] 11%|█▏ | 42670/371472 [3:26:32<25:41:01, 3.56it/s] 11%|█▏ | 42671/371472 [3:26:33<25:41:04, 3.56it/s] 11%|█▏ | 42672/371472 [3:26:33<25:17:05, 3.61it/s] 11%|█▏ | 42673/371472 [3:26:33<25:59:03, 3.51it/s] 11%|█▏ | 42674/371472 [3:26:34<25:05:07, 3.64it/s] 11%|█▏ | 42675/371472 [3:26:34<25:23:40, 3.60it/s] 11%|█▏ | 42676/371472 [3:26:34<26:49:28, 3.40it/s] 11%|█▏ | 42677/371472 [3:26:34<25:56:50, 3.52it/s] 11%|█▏ | 42678/371472 [3:26:35<25:49:36, 3.54it/s] 11%|█▏ | 42679/371472 [3:26:35<27:14:36, 3.35it/s] 11%|█▏ | 42680/371472 [3:26:35<27:30:04, 3.32it/s] {'loss': 4.4277, 'learning_rate': 8.970242840828288e-07, 'epoch': 1.84} + 11%|█▏ | 42680/371472 [3:26:35<27:30:04, 3.32it/s] 11%|█▏ | 42681/371472 [3:26:36<28:03:49, 3.25it/s] 11%|█▏ | 42682/371472 [3:26:36<27:10:49, 3.36it/s] 11%|█▏ | 42683/371472 [3:26:36<28:58:06, 3.15it/s] 11%|█▏ | 42684/371472 [3:26:37<29:21:31, 3.11it/s] 11%|█▏ | 42685/371472 [3:26:37<30:05:46, 3.03it/s] 11%|█▏ | 42686/371472 [3:26:37<31:01:30, 2.94it/s] 11%|█▏ | 42687/371472 [3:26:38<31:28:05, 2.90it/s] 11%|█▏ | 42688/371472 [3:26:38<31:26:46, 2.90it/s] 11%|█▏ | 42689/371472 [3:26:38<28:48:49, 3.17it/s] 11%|█▏ | 42690/371472 [3:26:39<27:12:23, 3.36it/s] 11%|█▏ | 42691/371472 [3:26:39<26:47:35, 3.41it/s] 11%|█▏ | 42692/371472 [3:26:39<25:50:45, 3.53it/s] 11%|█▏ | 42693/371472 [3:26:39<26:32:54, 3.44it/s] 11%|█▏ | 42694/371472 [3:26:40<27:18:32, 3.34it/s] 11%|█▏ | 42695/371472 [3:26:40<26:50:34, 3.40it/s] 11%|█▏ | 42696/371472 [3:26:40<26:20:27, 3.47it/s] 11%|█▏ | 42697/371472 [3:26:41<26:09:32, 3.49it/s] 11%|█▏ | 42698/371472 [3:26:41<27:51:14, 3.28it/s] 11%|█▏ | 42699/371472 [3:26:41<26:43:23, 3.42it/s] 11%|█▏ | 42700/371472 [3:26:41<26:30:59, 3.44it/s] {'loss': 4.5123, 'learning_rate': 8.969758021073498e-07, 'epoch': 1.84} + 11%|█▏ | 42700/371472 [3:26:41<26:30:59, 3.44it/s] 11%|█▏ | 42701/371472 [3:26:42<28:23:26, 3.22it/s] 11%|█▏ | 42702/371472 [3:26:42<28:05:46, 3.25it/s] 11%|█▏ | 42703/371472 [3:26:42<27:01:00, 3.38it/s] 11%|█▏ | 42704/371472 [3:26:43<26:15:37, 3.48it/s] 11%|█▏ | 42705/371472 [3:26:43<25:45:42, 3.54it/s] 11%|█▏ | 42706/371472 [3:26:43<27:24:03, 3.33it/s] 11%|█▏ | 42707/371472 [3:26:44<27:45:41, 3.29it/s] 11%|█▏ | 42708/371472 [3:26:44<27:00:57, 3.38it/s] 11%|█▏ | 42709/371472 [3:26:44<26:49:45, 3.40it/s] 11%|█▏ | 42710/371472 [3:26:44<26:52:18, 3.40it/s] 11%|█▏ | 42711/371472 [3:26:45<26:53:26, 3.40it/s] 11%|█▏ | 42712/371472 [3:26:45<28:19:52, 3.22it/s] 11%|█▏ | 42713/371472 [3:26:45<27:26:50, 3.33it/s] 11%|█▏ | 42714/371472 [3:26:46<27:22:40, 3.34it/s] 11%|█▏ | 42715/371472 [3:26:46<26:14:59, 3.48it/s] 11%|█▏ | 42716/371472 [3:26:46<27:18:43, 3.34it/s] 11%|█▏ | 42717/371472 [3:26:47<26:57:24, 3.39it/s] 11%|█▏ | 42718/371472 [3:26:47<26:17:53, 3.47it/s] 11%|█▏ | 42719/371472 [3:26:47<26:04:03, 3.50it/s] 12%|█▏ | 42720/371472 [3:26:47<26:02:06, 3.51it/s] {'loss': 4.4411, 'learning_rate': 8.96927320131871e-07, 'epoch': 1.84} + 12%|█▏ | 42720/371472 [3:26:47<26:02:06, 3.51it/s] 12%|█▏ | 42721/371472 [3:26:48<26:14:50, 3.48it/s] 12%|█▏ | 42722/371472 [3:26:48<25:35:05, 3.57it/s] 12%|█▏ | 42723/371472 [3:26:48<24:48:48, 3.68it/s] 12%|█▏ | 42724/371472 [3:26:49<28:01:01, 3.26it/s] 12%|█▏ | 42725/371472 [3:26:49<27:09:28, 3.36it/s] 12%|█▏ | 42726/371472 [3:26:49<27:33:43, 3.31it/s] 12%|█▏ | 42727/371472 [3:26:49<26:49:46, 3.40it/s] 12%|█▏ | 42728/371472 [3:26:50<28:40:01, 3.19it/s] 12%|█▏ | 42729/371472 [3:26:50<27:00:55, 3.38it/s] 12%|█▏ | 42730/371472 [3:26:50<27:00:45, 3.38it/s] 12%|█▏ | 42731/371472 [3:26:51<26:25:34, 3.46it/s] 12%|█▏ | 42732/371472 [3:26:51<25:51:25, 3.53it/s] 12%|█▏ | 42733/371472 [3:26:51<25:25:26, 3.59it/s] 12%|█▏ | 42734/371472 [3:26:51<25:22:53, 3.60it/s] 12%|█▏ | 42735/371472 [3:26:52<25:29:38, 3.58it/s] 12%|█▏ | 42736/371472 [3:26:52<27:45:09, 3.29it/s] 12%|█▏ | 42737/371472 [3:26:52<27:08:44, 3.36it/s] 12%|█▏ | 42738/371472 [3:26:53<26:49:31, 3.40it/s] 12%|█▏ | 42739/371472 [3:26:53<26:15:32, 3.48it/s] 12%|█▏ | 42740/371472 [3:26:53<28:17:40, 3.23it/s] {'loss': 4.4547, 'learning_rate': 8.968788381563921e-07, 'epoch': 1.84} + 12%|█▏ | 42740/371472 [3:26:53<28:17:40, 3.23it/s] 12%|█▏ | 42741/371472 [3:26:54<26:39:46, 3.42it/s] 12%|█▏ | 42742/371472 [3:26:54<27:42:11, 3.30it/s] 12%|█▏ | 42743/371472 [3:26:54<28:55:34, 3.16it/s] 12%|█▏ | 42744/371472 [3:26:55<29:13:46, 3.12it/s] 12%|█▏ | 42745/371472 [3:26:55<27:58:24, 3.26it/s] 12%|█▏ | 42746/371472 [3:26:55<27:05:13, 3.37it/s] 12%|█▏ | 42747/371472 [3:26:55<27:14:40, 3.35it/s] 12%|█▏ | 42748/371472 [3:26:56<25:41:52, 3.55it/s] 12%|���▏ | 42749/371472 [3:26:56<25:06:59, 3.64it/s] 12%|█▏ | 42750/371472 [3:26:56<25:14:23, 3.62it/s] 12%|█▏ | 42751/371472 [3:26:56<25:20:02, 3.60it/s] 12%|█▏ | 42752/371472 [3:26:57<25:27:59, 3.59it/s] 12%|█▏ | 42753/371472 [3:26:57<25:07:33, 3.63it/s] 12%|█▏ | 42754/371472 [3:26:57<25:33:08, 3.57it/s] 12%|█▏ | 42755/371472 [3:26:58<25:25:29, 3.59it/s] 12%|█▏ | 42756/371472 [3:26:58<25:33:55, 3.57it/s] 12%|█▏ | 42757/371472 [3:26:58<26:21:40, 3.46it/s] 12%|█▏ | 42758/371472 [3:26:58<26:09:34, 3.49it/s] 12%|█▏ | 42759/371472 [3:26:59<25:31:05, 3.58it/s] 12%|█▏ | 42760/371472 [3:26:59<25:56:16, 3.52it/s] {'loss': 4.2648, 'learning_rate': 8.968303561809131e-07, 'epoch': 1.84} + 12%|█▏ | 42760/371472 [3:26:59<25:56:16, 3.52it/s] 12%|█▏ | 42761/371472 [3:26:59<25:46:30, 3.54it/s] 12%|█▏ | 42762/371472 [3:27:00<24:44:39, 3.69it/s] 12%|█▏ | 42763/371472 [3:27:00<25:06:52, 3.64it/s] 12%|█▏ | 42764/371472 [3:27:00<25:44:59, 3.55it/s] 12%|█▏ | 42765/371472 [3:27:00<25:52:16, 3.53it/s] 12%|█▏ | 42766/371472 [3:27:01<25:22:02, 3.60it/s] 12%|█▏ | 42767/371472 [3:27:01<25:42:01, 3.55it/s] 12%|█▏ | 42768/371472 [3:27:01<26:01:33, 3.51it/s] 12%|█▏ | 42769/371472 [3:27:01<25:22:01, 3.60it/s] 12%|█▏ | 42770/371472 [3:27:02<25:35:27, 3.57it/s] 12%|█▏ | 42771/371472 [3:27:02<24:35:21, 3.71it/s] 12%|█▏ | 42772/371472 [3:27:02<25:25:32, 3.59it/s] 12%|█▏ | 42773/371472 [3:27:03<27:17:13, 3.35it/s] 12%|█▏ | 42774/371472 [3:27:03<26:04:16, 3.50it/s] 12%|█▏ | 42775/371472 [3:27:03<26:32:15, 3.44it/s] 12%|█▏ | 42776/371472 [3:27:04<27:34:14, 3.31it/s] 12%|█▏ | 42777/371472 [3:27:04<26:45:40, 3.41it/s] 12%|█▏ | 42778/371472 [3:27:04<27:00:18, 3.38it/s] 12%|█▏ | 42779/371472 [3:27:04<26:25:19, 3.46it/s] 12%|█▏ | 42780/371472 [3:27:05<27:24:28, 3.33it/s] {'loss': 4.4489, 'learning_rate': 8.967818742054341e-07, 'epoch': 1.84} + 12%|█▏ | 42780/371472 [3:27:05<27:24:28, 3.33it/s] 12%|█▏ | 42781/371472 [3:27:05<26:13:33, 3.48it/s] 12%|█▏ | 42782/371472 [3:27:05<25:40:04, 3.56it/s] 12%|█▏ | 42783/371472 [3:27:06<26:10:00, 3.49it/s] 12%|█▏ | 42784/371472 [3:27:06<25:19:09, 3.61it/s] 12%|█▏ | 42785/371472 [3:27:06<25:38:49, 3.56it/s] 12%|█▏ | 42786/371472 [3:27:06<25:33:52, 3.57it/s] 12%|█▏ | 42787/371472 [3:27:07<25:25:06, 3.59it/s] 12%|█▏ | 42788/371472 [3:27:07<25:14:02, 3.62it/s] 12%|█▏ | 42789/371472 [3:27:07<26:03:46, 3.50it/s] 12%|█▏ | 42790/371472 [3:27:08<25:51:52, 3.53it/s] 12%|█▏ | 42791/371472 [3:27:08<28:01:11, 3.26it/s] 12%|█▏ | 42792/371472 [3:27:08<26:21:45, 3.46it/s] 12%|█▏ | 42793/371472 [3:27:08<26:41:06, 3.42it/s] 12%|█▏ | 42794/371472 [3:27:09<30:13:48, 3.02it/s] 12%|█▏ | 42795/371472 [3:27:09<28:47:22, 3.17it/s] 12%|█▏ | 42796/371472 [3:27:09<27:47:41, 3.28it/s] 12%|█▏ | 42797/371472 [3:27:10<26:59:10, 3.38it/s] 12%|█▏ | 42798/371472 [3:27:10<25:52:54, 3.53it/s] 12%|█▏ | 42799/371472 [3:27:10<25:42:27, 3.55it/s] 12%|█▏ | 42800/371472 [3:27:10<26:07:34, 3.49it/s] {'loss': 4.4945, 'learning_rate': 8.967333922299554e-07, 'epoch': 1.84} + 12%|█▏ | 42800/371472 [3:27:10<26:07:34, 3.49it/s] 12%|█▏ | 42801/371472 [3:27:11<27:06:40, 3.37it/s] 12%|█▏ | 42802/371472 [3:27:11<27:38:48, 3.30it/s] 12%|█▏ | 42803/371472 [3:27:11<28:11:57, 3.24it/s] 12%|█▏ | 42804/371472 [3:27:12<27:54:21, 3.27it/s] 12%|█▏ | 42805/371472 [3:27:12<26:58:10, 3.39it/s] 12%|█▏ | 42806/371472 [3:27:12<26:25:11, 3.46it/s] 12%|█▏ | 42807/371472 [3:27:13<25:32:19, 3.57it/s] 12%|█▏ | 42808/371472 [3:27:13<24:41:32, 3.70it/s] 12%|█▏ | 42809/371472 [3:27:13<24:31:16, 3.72it/s] 12%|█▏ | 42810/371472 [3:27:13<24:34:56, 3.71it/s] 12%|█▏ | 42811/371472 [3:27:14<27:16:57, 3.35it/s] 12%|█▏ | 42812/371472 [3:27:14<27:45:33, 3.29it/s] 12%|█▏ | 42813/371472 [3:27:14<27:26:49, 3.33it/s] 12%|█▏ | 42814/371472 [3:27:15<27:03:06, 3.37it/s] 12%|█▏ | 42815/371472 [3:27:15<26:28:12, 3.45it/s] 12%|█▏ | 42816/371472 [3:27:15<25:47:43, 3.54it/s] 12%|█▏ | 42817/371472 [3:27:15<25:26:16, 3.59it/s] 12%|█▏ | 42818/371472 [3:27:16<25:18:24, 3.61it/s] 12%|█▏ | 42819/371472 [3:27:16<28:18:22, 3.23it/s] 12%|█▏ | 42820/371472 [3:27:16<30:17:56, 3.01it/s] {'loss': 4.2532, 'learning_rate': 8.966849102544765e-07, 'epoch': 1.84} + 12%|█▏ | 42820/371472 [3:27:16<30:17:56, 3.01it/s] 12%|█▏ | 42821/371472 [3:27:17<29:57:52, 3.05it/s] 12%|█▏ | 42822/371472 [3:27:17<28:24:27, 3.21it/s] 12%|█▏ | 42823/371472 [3:27:17<28:15:58, 3.23it/s] 12%|█▏ | 42824/371472 [3:27:18<31:53:41, 2.86it/s] 12%|█▏ | 42825/371472 [3:27:18<29:57:22, 3.05it/s] 12%|█▏ | 42826/371472 [3:27:18<28:35:15, 3.19it/s] 12%|█▏ | 42827/371472 [3:27:19<27:11:55, 3.36it/s] 12%|█▏ | 42828/371472 [3:27:19<26:06:34, 3.50it/s] 12%|█▏ | 42829/371472 [3:27:19<26:34:10, 3.44it/s] 12%|█▏ | 42830/371472 [3:27:19<25:54:57, 3.52it/s] 12%|█▏ | 42831/371472 [3:27:20<24:55:02, 3.66it/s] 12%|█▏ | 42832/371472 [3:27:20<24:23:44, 3.74it/s] 12%|█▏ | 42833/371472 [3:27:20<24:55:44, 3.66it/s] 12%|█▏ | 42834/371472 [3:27:20<24:39:35, 3.70it/s] 12%|█▏ | 42835/371472 [3:27:21<25:24:32, 3.59it/s] 12%|█▏ | 42836/371472 [3:27:21<24:59:20, 3.65it/s] 12%|█▏ | 42837/371472 [3:27:21<25:06:48, 3.64it/s] 12%|█▏ | 42838/371472 [3:27:22<24:59:48, 3.65it/s] 12%|█▏ | 42839/371472 [3:27:22<24:48:34, 3.68it/s] 12%|█▏ | 42840/371472 [3:27:22<25:02:24, 3.65it/s] {'loss': 4.2342, 'learning_rate': 8.966364282789975e-07, 'epoch': 1.85} + 12%|█▏ | 42840/371472 [3:27:22<25:02:24, 3.65it/s] 12%|█▏ | 42841/371472 [3:27:22<25:18:13, 3.61it/s] 12%|█▏ | 42842/371472 [3:27:23<26:05:04, 3.50it/s] 12%|█▏ | 42843/371472 [3:27:23<25:46:20, 3.54it/s] 12%|█▏ | 42844/371472 [3:27:23<25:51:36, 3.53it/s] 12%|█▏ | 42845/371472 [3:27:24<27:15:16, 3.35it/s] 12%|█▏ | 42846/371472 [3:27:24<26:18:12, 3.47it/s] 12%|█▏ | 42847/371472 [3:27:24<26:28:49, 3.45it/s] 12%|█▏ | 42848/371472 [3:27:24<26:04:59, 3.50it/s] 12%|█▏ | 42849/371472 [3:27:25<26:45:46, 3.41it/s] 12%|█▏ | 42850/371472 [3:27:25<25:37:14, 3.56it/s] 12%|█▏ | 42851/371472 [3:27:25<25:13:31, 3.62it/s] 12%|█▏ | 42852/371472 [3:27:26<25:05:01, 3.64it/s] 12%|█▏ | 42853/371472 [3:27:26<24:55:30, 3.66it/s] 12%|█▏ | 42854/371472 [3:27:26<26:14:33, 3.48it/s] 12%|█▏ | 42855/371472 [3:27:26<27:25:11, 3.33it/s] 12%|█▏ | 42856/371472 [3:27:27<26:11:47, 3.48it/s] 12%|█▏ | 42857/371472 [3:27:27<25:42:22, 3.55it/s] 12%|█▏ | 42858/371472 [3:27:27<26:26:23, 3.45it/s] 12%|█▏ | 42859/371472 [3:27:28<25:58:32, 3.51it/s] 12%|█▏ | 42860/371472 [3:27:28<26:11:04, 3.49it/s] {'loss': 4.677, 'learning_rate': 8.965879463035187e-07, 'epoch': 1.85} + 12%|█▏ | 42860/371472 [3:27:28<26:11:04, 3.49it/s] 12%|█▏ | 42861/371472 [3:27:28<27:35:44, 3.31it/s] 12%|█▏ | 42862/371472 [3:27:28<26:11:00, 3.49it/s] 12%|█▏ | 42863/371472 [3:27:29<25:37:36, 3.56it/s] 12%|█▏ | 42864/371472 [3:27:29<26:29:00, 3.45it/s] 12%|█▏ | 42865/371472 [3:27:29<26:42:36, 3.42it/s] 12%|█▏ | 42866/371472 [3:27:30<25:54:12, 3.52it/s] 12%|█▏ | 42867/371472 [3:27:30<26:00:30, 3.51it/s] 12%|█▏ | 42868/371472 [3:27:30<26:08:23, 3.49it/s] 12%|█▏ | 42869/371472 [3:27:30<25:42:47, 3.55it/s] 12%|█▏ | 42870/371472 [3:27:31<26:00:12, 3.51it/s] 12%|█▏ | 42871/371472 [3:27:31<28:17:58, 3.23it/s] 12%|█▏ | 42872/371472 [3:27:32<30:41:59, 2.97it/s] 12%|█▏ | 42873/371472 [3:27:32<30:11:53, 3.02it/s] 12%|█▏ | 42874/371472 [3:27:32<28:42:05, 3.18it/s] 12%|█▏ | 42875/371472 [3:27:32<27:29:13, 3.32it/s] 12%|█▏ | 42876/371472 [3:27:33<26:31:29, 3.44it/s] 12%|█▏ | 42877/371472 [3:27:33<26:07:24, 3.49it/s] 12%|█▏ | 42878/371472 [3:27:33<25:42:41, 3.55it/s] 12%|█▏ | 42879/371472 [3:27:33<25:45:56, 3.54it/s] 12%|█▏ | 42880/371472 [3:27:34<25:22:17, 3.60it/s] {'loss': 4.2939, 'learning_rate': 8.965394643280399e-07, 'epoch': 1.85} + 12%|█▏ | 42880/371472 [3:27:34<25:22:17, 3.60it/s] 12%|█▏ | 42881/371472 [3:27:34<25:11:29, 3.62it/s] 12%|█▏ | 42882/371472 [3:27:34<25:26:07, 3.59it/s] 12%|█▏ | 42883/371472 [3:27:35<24:38:36, 3.70it/s] 12%|█▏ | 42884/371472 [3:27:35<27:21:14, 3.34it/s] 12%|█▏ | 42885/371472 [3:27:35<26:57:29, 3.39it/s] 12%|█▏ | 42886/371472 [3:27:35<26:35:47, 3.43it/s] 12%|█▏ | 42887/371472 [3:27:36<25:30:15, 3.58it/s] 12%|█▏ | 42888/371472 [3:27:36<26:36:50, 3.43it/s] 12%|█▏ | 42889/371472 [3:27:36<27:23:38, 3.33it/s] 12%|█▏ | 42890/371472 [3:27:37<28:02:01, 3.26it/s] 12%|█▏ | 42891/371472 [3:27:37<27:48:58, 3.28it/s] 12%|█▏ | 42892/371472 [3:27:37<27:10:55, 3.36it/s] 12%|█▏ | 42893/371472 [3:27:38<26:03:10, 3.50it/s] 12%|█▏ | 42894/371472 [3:27:38<27:35:44, 3.31it/s] 12%|█▏ | 42895/371472 [3:27:38<27:37:59, 3.30it/s] 12%|█▏ | 42896/371472 [3:27:38<26:16:54, 3.47it/s] 12%|█▏ | 42897/371472 [3:27:39<25:57:02, 3.52it/s] 12%|█▏ | 42898/371472 [3:27:39<26:15:34, 3.48it/s] 12%|█▏ | 42899/371472 [3:27:39<26:54:40, 3.39it/s] 12%|█▏ | 42900/371472 [3:27:40<26:07:12, 3.49it/s] {'loss': 4.4327, 'learning_rate': 8.964909823525609e-07, 'epoch': 1.85} + 12%|█▏ | 42900/371472 [3:27:40<26:07:12, 3.49it/s] 12%|█▏ | 42901/371472 [3:27:40<26:14:33, 3.48it/s] 12%|█▏ | 42902/371472 [3:27:40<27:31:11, 3.32it/s] 12%|█▏ | 42903/371472 [3:27:40<26:20:52, 3.46it/s] 12%|█▏ | 42904/371472 [3:27:41<25:48:02, 3.54it/s] 12%|█▏ | 42905/371472 [3:27:41<26:12:10, 3.48it/s] 12%|█▏ | 42906/371472 [3:27:41<27:18:06, 3.34it/s] 12%|█▏ | 42907/371472 [3:27:42<26:55:53, 3.39it/s] 12%|█▏ | 42908/371472 [3:27:42<26:41:03, 3.42it/s] 12%|█▏ | 42909/371472 [3:27:42<26:40:37, 3.42it/s] 12%|█▏ | 42910/371472 [3:27:42<25:46:54, 3.54it/s] 12%|█▏ | 42911/371472 [3:27:43<25:05:36, 3.64it/s] 12%|█▏ | 42912/371472 [3:27:43<28:33:38, 3.20it/s] 12%|█▏ | 42913/371472 [3:27:43<28:20:52, 3.22it/s] 12%|█▏ | 42914/371472 [3:27:44<27:25:25, 3.33it/s] 12%|█▏ | 42915/371472 [3:27:44<28:00:48, 3.26it/s] 12%|█▏ | 42916/371472 [3:27:44<27:03:06, 3.37it/s] 12%|█▏ | 42917/371472 [3:27:45<25:49:18, 3.53it/s] 12%|█▏ | 42918/371472 [3:27:45<25:24:13, 3.59it/s] 12%|█▏ | 42919/371472 [3:27:45<27:34:39, 3.31it/s] 12%|█▏ | 42920/371472 [3:27:45<26:17:48, 3.47it/s] {'loss': 4.5367, 'learning_rate': 8.964425003770819e-07, 'epoch': 1.85} + 12%|█▏ | 42920/371472 [3:27:45<26:17:48, 3.47it/s] 12%|█▏ | 42921/371472 [3:27:46<25:20:04, 3.60it/s] 12%|█▏ | 42922/371472 [3:27:46<26:04:27, 3.50it/s] 12%|█▏ | 42923/371472 [3:27:46<26:06:59, 3.49it/s] 12%|█▏ | 42924/371472 [3:27:47<26:30:41, 3.44it/s] 12%|█▏ | 42925/371472 [3:27:47<25:40:40, 3.55it/s] 12%|█▏ | 42926/371472 [3:27:47<25:15:54, 3.61it/s] 12%|█▏ | 42927/371472 [3:27:47<25:36:09, 3.56it/s] 12%|█▏ | 42928/371472 [3:27:48<25:00:05, 3.65it/s] 12%|█▏ | 42929/371472 [3:27:48<24:54:34, 3.66it/s] 12%|█▏ | 42930/371472 [3:27:48<24:24:57, 3.74it/s] 12%|█▏ | 42931/371472 [3:27:48<24:47:29, 3.68it/s] 12%|█▏ | 42932/371472 [3:27:49<26:21:01, 3.46it/s] 12%|█▏ | 42933/371472 [3:27:49<25:47:57, 3.54it/s] 12%|█▏ | 42934/371472 [3:27:49<25:42:13, 3.55it/s] 12%|█▏ | 42935/371472 [3:27:50<25:47:04, 3.54it/s] 12%|█▏ | 42936/371472 [3:27:50<26:11:51, 3.48it/s] 12%|█▏ | 42937/371472 [3:27:50<26:12:31, 3.48it/s] 12%|█▏ | 42938/371472 [3:27:51<25:44:36, 3.54it/s] 12%|█▏ | 42939/371472 [3:27:51<25:08:31, 3.63it/s] 12%|█▏ | 42940/371472 [3:27:51<24:38:37, 3.70it/s] {'loss': 4.4483, 'learning_rate': 8.963940184016031e-07, 'epoch': 1.85} + 12%|█▏ | 42940/371472 [3:27:51<24:38:37, 3.70it/s] 12%|█▏ | 42941/371472 [3:27:52<32:40:06, 2.79it/s] 12%|█▏ | 42942/371472 [3:27:52<31:50:14, 2.87it/s] 12%|█▏ | 42943/371472 [3:27:52<29:13:29, 3.12it/s] 12%|█▏ | 42944/371472 [3:27:52<28:30:28, 3.20it/s] 12%|█▏ | 42945/371472 [3:27:53<30:26:27, 3.00it/s] 12%|█▏ | 42946/371472 [3:27:53<29:03:42, 3.14it/s] 12%|█▏ | 42947/371472 [3:27:53<28:21:25, 3.22it/s] 12%|█▏ | 42948/371472 [3:27:54<26:59:35, 3.38it/s] 12%|█▏ | 42949/371472 [3:27:54<26:00:31, 3.51it/s] 12%|█▏ | 42950/371472 [3:27:54<26:14:48, 3.48it/s] 12%|█▏ | 42951/371472 [3:27:54<25:35:30, 3.57it/s] 12%|█▏ | 42952/371472 [3:27:55<27:26:43, 3.32it/s] 12%|█▏ | 42953/371472 [3:27:55<26:25:19, 3.45it/s] 12%|█▏ | 42954/371472 [3:27:55<26:11:43, 3.48it/s] 12%|█▏ | 42955/371472 [3:27:56<25:50:14, 3.53it/s] 12%|█▏ | 42956/371472 [3:27:56<26:27:38, 3.45it/s] 12%|█▏ | 42957/371472 [3:27:56<25:49:21, 3.53it/s] 12%|█▏ | 42958/371472 [3:27:56<25:03:51, 3.64it/s] 12%|█▏ | 42959/371472 [3:27:57<24:54:08, 3.66it/s] 12%|█▏ | 42960/371472 [3:27:57<24:52:29, 3.67it/s] {'loss': 4.3877, 'learning_rate': 8.963455364261242e-07, 'epoch': 1.85} + 12%|█▏ | 42960/371472 [3:27:57<24:52:29, 3.67it/s] 12%|█▏ | 42961/371472 [3:27:57<24:50:43, 3.67it/s] 12%|█▏ | 42962/371472 [3:27:58<24:30:24, 3.72it/s] 12%|█▏ | 42963/371472 [3:27:58<24:29:55, 3.72it/s] 12%|█▏ | 42964/371472 [3:27:58<24:23:00, 3.74it/s] 12%|█▏ | 42965/371472 [3:27:58<25:20:01, 3.60it/s] 12%|█▏ | 42966/371472 [3:27:59<25:15:23, 3.61it/s] 12%|█▏ | 42967/371472 [3:27:59<24:50:51, 3.67it/s] 12%|█▏ | 42968/371472 [3:27:59<24:41:15, 3.70it/s] 12%|█▏ | 42969/371472 [3:27:59<25:17:21, 3.61it/s] 12%|█▏ | 42970/371472 [3:28:00<25:26:18, 3.59it/s] 12%|█▏ | 42971/371472 [3:28:00<25:16:43, 3.61it/s] 12%|█▏ | 42972/371472 [3:28:00<25:53:55, 3.52it/s] 12%|█▏ | 42973/371472 [3:28:01<25:10:10, 3.63it/s] 12%|█▏ | 42974/371472 [3:28:01<26:24:33, 3.46it/s] 12%|█▏ | 42975/371472 [3:28:01<26:13:30, 3.48it/s] 12%|█▏ | 42976/371472 [3:28:01<25:09:00, 3.63it/s] 12%|█▏ | 42977/371472 [3:28:02<25:32:05, 3.57it/s] 12%|█▏ | 42978/371472 [3:28:02<26:52:05, 3.40it/s] 12%|█▏ | 42979/371472 [3:28:02<25:40:38, 3.55it/s] 12%|█▏ | 42980/371472 [3:28:03<26:00:12, 3.51it/s] {'loss': 4.3075, 'learning_rate': 8.962970544506454e-07, 'epoch': 1.85} + 12%|█▏ | 42980/371472 [3:28:03<26:00:12, 3.51it/s] 12%|█▏ | 42981/371472 [3:28:03<27:07:32, 3.36it/s] 12%|█▏ | 42982/371472 [3:28:03<26:06:48, 3.49it/s] 12%|█▏ | 42983/371472 [3:28:03<25:51:23, 3.53it/s] 12%|█▏ | 42984/371472 [3:28:04<25:05:31, 3.64it/s] 12%|█▏ | 42985/371472 [3:28:04<25:34:34, 3.57it/s] 12%|█▏ | 42986/371472 [3:28:04<24:54:52, 3.66it/s] 12%|█▏ | 42987/371472 [3:28:05<25:27:15, 3.58it/s] 12%|█▏ | 42988/371472 [3:28:05<26:12:27, 3.48it/s] 12%|█▏ | 42989/371472 [3:28:05<25:47:50, 3.54it/s] 12%|█▏ | 42990/371472 [3:28:05<25:23:29, 3.59it/s] 12%|█▏ | 42991/371472 [3:28:06<24:52:55, 3.67it/s] 12%|█▏ | 42992/371472 [3:28:06<24:59:51, 3.65it/s] 12%|█▏ | 42993/371472 [3:28:06<24:51:14, 3.67it/s] 12%|█▏ | 42994/371472 [3:28:07<24:41:35, 3.70it/s] 12%|█▏ | 42995/371472 [3:28:07<25:49:34, 3.53it/s] 12%|█▏ | 42996/371472 [3:28:07<26:20:47, 3.46it/s] 12%|█▏ | 42997/371472 [3:28:07<25:52:22, 3.53it/s] 12%|█▏ | 42998/371472 [3:28:08<27:04:07, 3.37it/s] 12%|█▏ | 42999/371472 [3:28:08<26:59:15, 3.38it/s] 12%|█▏ | 43000/371472 [3:28:08<26:05:55, 3.50it/s] {'loss': 4.4414, 'learning_rate': 8.962485724751664e-07, 'epoch': 1.85} + 12%|█▏ | 43000/371472 [3:28:08<26:05:55, 3.50it/s] 12%|█▏ | 43001/371472 [3:28:09<25:24:35, 3.59it/s] 12%|█▏ | 43002/371472 [3:28:09<24:31:03, 3.72it/s] 12%|█▏ | 43003/371472 [3:28:09<24:15:04, 3.76it/s] 12%|█▏ | 43004/371472 [3:28:09<25:44:47, 3.54it/s] 12%|█▏ | 43005/371472 [3:28:10<24:48:25, 3.68it/s] 12%|█▏ | 43006/371472 [3:28:10<25:41:20, 3.55it/s] 12%|█▏ | 43007/371472 [3:28:10<26:33:50, 3.43it/s] 12%|█▏ | 43008/371472 [3:28:11<26:22:22, 3.46it/s] 12%|█▏ | 43009/371472 [3:28:11<25:52:59, 3.53it/s] 12%|█▏ | 43010/371472 [3:28:11<25:37:45, 3.56it/s] 12%|█▏ | 43011/371472 [3:28:11<25:47:54, 3.54it/s] 12%|█▏ | 43012/371472 [3:28:12<24:48:27, 3.68it/s] 12%|█▏ | 43013/371472 [3:28:12<24:30:25, 3.72it/s] 12%|█▏ | 43014/371472 [3:28:12<24:07:44, 3.78it/s] 12%|█▏ | 43015/371472 [3:28:12<24:09:36, 3.78it/s] 12%|█▏ | 43016/371472 [3:28:13<24:17:00, 3.76it/s] 12%|█▏ | 43017/371472 [3:28:13<24:38:33, 3.70it/s] 12%|█▏ | 43018/371472 [3:28:13<24:15:30, 3.76it/s] 12%|█▏ | 43019/371472 [3:28:13<25:25:27, 3.59it/s] 12%|█▏ | 43020/371472 [3:28:14<25:15:04, 3.61it/s] {'loss': 4.2825, 'learning_rate': 8.962000904996875e-07, 'epoch': 1.85} + 12%|█▏ | 43020/371472 [3:28:14<25:15:04, 3.61it/s] 12%|█▏ | 43021/371472 [3:28:14<28:20:26, 3.22it/s] 12%|█▏ | 43022/371472 [3:28:14<28:28:04, 3.20it/s] 12%|█▏ | 43023/371472 [3:28:15<27:50:53, 3.28it/s] 12%|█▏ | 43024/371472 [3:28:15<26:08:01, 3.49it/s] 12%|█▏ | 43025/371472 [3:28:15<26:13:12, 3.48it/s] 12%|█▏ | 43026/371472 [3:28:16<26:02:22, 3.50it/s] 12%|█▏ | 43027/371472 [3:28:16<25:50:33, 3.53it/s] 12%|█▏ | 43028/371472 [3:28:16<25:02:30, 3.64it/s] 12%|█▏ | 43029/371472 [3:28:16<24:28:40, 3.73it/s] 12%|█▏ | 43030/371472 [3:28:17<24:48:52, 3.68it/s] 12%|█▏ | 43031/371472 [3:28:17<25:43:12, 3.55it/s] 12%|█▏ | 43032/371472 [3:28:17<25:08:20, 3.63it/s] 12%|█▏ | 43033/371472 [3:28:17<24:43:17, 3.69it/s] 12%|█▏ | 43034/371472 [3:28:18<24:38:41, 3.70it/s] 12%|█▏ | 43035/371472 [3:28:18<25:29:06, 3.58it/s] 12%|█▏ | 43036/371472 [3:28:18<25:46:26, 3.54it/s] 12%|█▏ | 43037/371472 [3:28:19<26:08:34, 3.49it/s] 12%|█▏ | 43038/371472 [3:28:19<25:57:09, 3.52it/s] 12%|█▏ | 43039/371472 [3:28:19<26:02:07, 3.50it/s] 12%|█▏ | 43040/371472 [3:28:20<29:08:42, 3.13it/s] {'loss': 4.3867, 'learning_rate': 8.961516085242086e-07, 'epoch': 1.85} + 12%|█▏ | 43040/371472 [3:28:20<29:08:42, 3.13it/s] 12%|█▏ | 43041/371472 [3:28:20<28:49:19, 3.17it/s] 12%|█▏ | 43042/371472 [3:28:20<28:37:18, 3.19it/s] 12%|█▏ | 43043/371472 [3:28:20<27:41:28, 3.29it/s] 12%|█▏ | 43044/371472 [3:28:21<31:17:51, 2.91it/s] 12%|█▏ | 43045/371472 [3:28:21<30:31:10, 2.99it/s] 12%|█▏ | 43046/371472 [3:28:22<30:08:14, 3.03it/s] 12%|█▏ | 43047/371472 [3:28:22<29:08:34, 3.13it/s] 12%|█▏ | 43048/371472 [3:28:22<29:06:12, 3.13it/s] 12%|█▏ | 43049/371472 [3:28:22<28:00:10, 3.26it/s] 12%|█▏ | 43050/371472 [3:28:23<27:07:58, 3.36it/s] 12%|█▏ | 43051/371472 [3:28:23<26:37:03, 3.43it/s] 12%|█▏ | 43052/371472 [3:28:23<26:11:44, 3.48it/s] 12%|█▏ | 43053/371472 [3:28:24<25:35:22, 3.57it/s] 12%|█▏ | 43054/371472 [3:28:24<25:18:15, 3.61it/s] 12%|█▏ | 43055/371472 [3:28:24<29:13:23, 3.12it/s] 12%|█▏ | 43056/371472 [3:28:24<28:06:35, 3.25it/s] 12%|█▏ | 43057/371472 [3:28:25<28:34:06, 3.19it/s] 12%|█▏ | 43058/371472 [3:28:25<27:14:22, 3.35it/s] 12%|█▏ | 43059/371472 [3:28:25<26:44:21, 3.41it/s] 12%|█▏ | 43060/371472 [3:28:26<25:48:45, 3.53it/s] {'loss': 4.3749, 'learning_rate': 8.961031265487297e-07, 'epoch': 1.85} + 12%|█▏ | 43060/371472 [3:28:26<25:48:45, 3.53it/s] 12%|█▏ | 43061/371472 [3:28:26<25:52:24, 3.53it/s] 12%|█▏ | 43062/371472 [3:28:26<25:25:08, 3.59it/s] 12%|█▏ | 43063/371472 [3:28:26<24:18:17, 3.75it/s] 12%|█▏ | 43064/371472 [3:28:27<25:03:35, 3.64it/s] 12%|█▏ | 43065/371472 [3:28:27<25:31:29, 3.57it/s] 12%|█▏ | 43066/371472 [3:28:27<24:57:21, 3.66it/s] 12%|█▏ | 43067/371472 [3:28:28<25:30:19, 3.58it/s] 12%|█▏ | 43068/371472 [3:28:28<26:34:23, 3.43it/s] 12%|█▏ | 43069/371472 [3:28:28<28:13:01, 3.23it/s] 12%|█▏ | 43070/371472 [3:28:28<27:07:23, 3.36it/s] 12%|█▏ | 43071/371472 [3:28:29<26:15:32, 3.47it/s] 12%|█▏ | 43072/371472 [3:28:29<30:57:47, 2.95it/s] 12%|█▏ | 43073/371472 [3:28:29<28:44:37, 3.17it/s] 12%|█▏ | 43074/371472 [3:28:30<27:28:12, 3.32it/s] 12%|█▏ | 43075/371472 [3:28:30<27:02:21, 3.37it/s] 12%|█▏ | 43076/371472 [3:28:30<26:23:50, 3.46it/s] 12%|█▏ | 43077/371472 [3:28:31<26:22:48, 3.46it/s] 12%|█▏ | 43078/371472 [3:28:31<25:59:04, 3.51it/s] 12%|█▏ | 43079/371472 [3:28:31<27:26:59, 3.32it/s] 12%|█▏ | 43080/371472 [3:28:32<29:15:06, 3.12it/s] {'loss': 4.4202, 'learning_rate': 8.960546445732508e-07, 'epoch': 1.86} + 12%|█▏ | 43080/371472 [3:28:32<29:15:06, 3.12it/s] 12%|█▏ | 43081/371472 [3:28:32<27:42:16, 3.29it/s] 12%|█▏ | 43082/371472 [3:28:32<26:48:51, 3.40it/s] 12%|█▏ | 43083/371472 [3:28:32<27:00:55, 3.38it/s] 12%|█▏ | 43084/371472 [3:28:33<25:48:29, 3.53it/s] 12%|█▏ | 43085/371472 [3:28:33<28:36:21, 3.19it/s] 12%|█▏ | 43086/371472 [3:28:33<28:14:14, 3.23it/s] 12%|█▏ | 43087/371472 [3:28:34<27:12:46, 3.35it/s] 12%|█▏ | 43088/371472 [3:28:34<26:10:57, 3.48it/s] 12%|█▏ | 43089/371472 [3:28:34<25:27:56, 3.58it/s] 12%|█▏ | 43090/371472 [3:28:34<26:50:23, 3.40it/s] 12%|█▏ | 43091/371472 [3:28:35<26:41:55, 3.42it/s] 12%|█▏ | 43092/371472 [3:28:35<26:35:02, 3.43it/s] 12%|█▏ | 43093/371472 [3:28:35<27:32:36, 3.31it/s] 12%|█▏ | 43094/371472 [3:28:36<25:59:25, 3.51it/s] 12%|█▏ | 43095/371472 [3:28:36<25:16:02, 3.61it/s] 12%|█▏ | 43096/371472 [3:28:36<25:59:38, 3.51it/s] 12%|█▏ | 43097/371472 [3:28:36<25:14:09, 3.61it/s] 12%|█▏ | 43098/371472 [3:28:37<26:10:39, 3.48it/s] 12%|█▏ | 43099/371472 [3:28:37<25:49:17, 3.53it/s] 12%|█▏ | 43100/371472 [3:28:37<26:13:43, 3.48it/s] {'loss': 4.3727, 'learning_rate': 8.96006162597772e-07, 'epoch': 1.86} + 12%|█▏ | 43100/371472 [3:28:37<26:13:43, 3.48it/s] 12%|█▏ | 43101/371472 [3:28:38<26:33:52, 3.43it/s] 12%|█▏ | 43102/371472 [3:28:38<25:52:57, 3.52it/s] 12%|█▏ | 43103/371472 [3:28:38<26:44:01, 3.41it/s] 12%|█▏ | 43104/371472 [3:28:39<27:01:31, 3.38it/s] 12%|█▏ | 43105/371472 [3:28:39<26:13:07, 3.48it/s] 12%|█▏ | 43106/371472 [3:28:39<27:00:43, 3.38it/s] 12%|█▏ | 43107/371472 [3:28:39<26:37:09, 3.43it/s] 12%|█▏ | 43108/371472 [3:28:40<26:41:26, 3.42it/s] 12%|█▏ | 43109/371472 [3:28:40<25:52:37, 3.52it/s] 12%|█▏ | 43110/371472 [3:28:40<24:59:05, 3.65it/s] 12%|█▏ | 43111/371472 [3:28:40<24:10:10, 3.77it/s] 12%|█▏ | 43112/371472 [3:28:41<25:24:41, 3.59it/s] 12%|█▏ | 43113/371472 [3:28:41<24:59:43, 3.65it/s] 12%|█▏ | 43114/371472 [3:28:41<24:46:34, 3.68it/s] 12%|█▏ | 43115/371472 [3:28:42<26:39:23, 3.42it/s] 12%|█▏ | 43116/371472 [3:28:42<27:06:43, 3.36it/s] 12%|█▏ | 43117/371472 [3:28:42<26:10:58, 3.48it/s] 12%|█▏ | 43118/371472 [3:28:42<25:54:07, 3.52it/s] 12%|█▏ | 43119/371472 [3:28:43<25:19:16, 3.60it/s] 12%|█▏ | 43120/371472 [3:28:43<25:36:00, 3.56it/s] {'loss': 4.4348, 'learning_rate': 8.959576806222931e-07, 'epoch': 1.86} + 12%|█▏ | 43120/371472 [3:28:43<25:36:00, 3.56it/s] 12%|█▏ | 43121/371472 [3:28:43<26:32:28, 3.44it/s] 12%|█▏ | 43122/371472 [3:28:44<26:23:40, 3.46it/s] 12%|█▏ | 43123/371472 [3:28:44<25:58:27, 3.51it/s] 12%|█▏ | 43124/371472 [3:28:44<24:48:04, 3.68it/s] 12%|█▏ | 43125/371472 [3:28:44<24:34:14, 3.71it/s] 12%|█▏ | 43126/371472 [3:28:45<24:25:59, 3.73it/s] 12%|█▏ | 43127/371472 [3:28:45<24:25:07, 3.74it/s] 12%|█▏ | 43128/371472 [3:28:45<24:46:07, 3.68it/s] 12%|█▏ | 43129/371472 [3:28:45<24:02:55, 3.79it/s] 12%|█▏ | 43130/371472 [3:28:46<24:04:42, 3.79it/s] 12%|█▏ | 43131/371472 [3:28:46<24:38:50, 3.70it/s] 12%|█▏ | 43132/371472 [3:28:46<24:41:27, 3.69it/s] 12%|█▏ | 43133/371472 [3:28:47<25:38:07, 3.56it/s] 12%|█▏ | 43134/371472 [3:28:47<30:35:19, 2.98it/s] 12%|█▏ | 43135/371472 [3:28:47<29:02:17, 3.14it/s] 12%|█▏ | 43136/371472 [3:28:48<29:14:38, 3.12it/s] 12%|█▏ | 43137/371472 [3:28:48<28:09:08, 3.24it/s] 12%|█▏ | 43138/371472 [3:28:48<29:55:57, 3.05it/s] 12%|█▏ | 43139/371472 [3:28:49<28:17:17, 3.22it/s] 12%|█▏ | 43140/371472 [3:28:49<27:33:50, 3.31it/s] {'loss': 4.1969, 'learning_rate': 8.959091986468141e-07, 'epoch': 1.86} + 12%|█▏ | 43140/371472 [3:28:49<27:33:50, 3.31it/s] 12%|█▏ | 43141/371472 [3:28:49<27:17:36, 3.34it/s] 12%|█▏ | 43142/371472 [3:28:49<26:12:24, 3.48it/s] 12%|█▏ | 43143/371472 [3:28:50<26:09:16, 3.49it/s] 12%|█▏ | 43144/371472 [3:28:50<28:50:17, 3.16it/s] 12%|█▏ | 43145/371472 [3:28:50<28:33:04, 3.19it/s] 12%|█▏ | 43146/371472 [3:28:51<26:58:15, 3.38it/s] 12%|█▏ | 43147/371472 [3:28:51<26:13:35, 3.48it/s] 12%|█▏ | 43148/371472 [3:28:51<26:00:56, 3.51it/s] 12%|█▏ | 43149/371472 [3:28:51<25:33:16, 3.57it/s] 12%|█▏ | 43150/371472 [3:28:52<28:24:20, 3.21it/s] 12%|█▏ | 43151/371472 [3:28:52<27:56:38, 3.26it/s] 12%|█▏ | 43152/371472 [3:28:52<28:51:10, 3.16it/s] 12%|█▏ | 43153/371472 [3:28:53<27:45:38, 3.29it/s] 12%|█▏ | 43154/371472 [3:28:53<29:50:06, 3.06it/s] 12%|█▏ | 43155/371472 [3:28:53<30:39:23, 2.97it/s] 12%|█▏ | 43156/371472 [3:28:54<29:34:30, 3.08it/s] 12%|█▏ | 43157/371472 [3:28:54<28:15:20, 3.23it/s] 12%|█▏ | 43158/371472 [3:28:54<27:19:07, 3.34it/s] 12%|█▏ | 43159/371472 [3:28:55<26:45:34, 3.41it/s] 12%|█▏ | 43160/371472 [3:28:55<26:29:04, 3.44it/s] {'loss': 4.2814, 'learning_rate': 8.958607166713352e-07, 'epoch': 1.86} + 12%|█▏ | 43160/371472 [3:28:55<26:29:04, 3.44it/s] 12%|█▏ | 43161/371472 [3:28:55<27:29:20, 3.32it/s] 12%|█▏ | 43162/371472 [3:28:55<26:41:00, 3.42it/s] 12%|█▏ | 43163/371472 [3:28:56<26:05:19, 3.50it/s] 12%|█�� | 43164/371472 [3:28:56<25:26:15, 3.59it/s] 12%|█▏ | 43165/371472 [3:28:56<27:24:34, 3.33it/s] 12%|█▏ | 43166/371472 [3:28:57<28:48:41, 3.17it/s] 12%|█▏ | 43167/371472 [3:28:57<28:33:43, 3.19it/s] 12%|█▏ | 43168/371472 [3:28:57<29:40:53, 3.07it/s] 12%|█▏ | 43169/371472 [3:28:58<29:24:37, 3.10it/s] 12%|█▏ | 43170/371472 [3:28:58<28:07:18, 3.24it/s] 12%|█▏ | 43171/371472 [3:28:58<29:18:47, 3.11it/s] 12%|█▏ | 43172/371472 [3:28:59<29:54:13, 3.05it/s] 12%|█▏ | 43173/371472 [3:28:59<27:57:23, 3.26it/s] 12%|█▏ | 43174/371472 [3:28:59<29:01:18, 3.14it/s] 12%|█▏ | 43175/371472 [3:29:00<27:37:22, 3.30it/s] 12%|█▏ | 43176/371472 [3:29:00<28:30:29, 3.20it/s] 12%|█▏ | 43177/371472 [3:29:00<28:09:12, 3.24it/s] 12%|█▏ | 43178/371472 [3:29:00<26:47:46, 3.40it/s] 12%|█▏ | 43179/371472 [3:29:01<27:37:29, 3.30it/s] 12%|█▏ | 43180/371472 [3:29:01<26:46:48, 3.41it/s] {'loss': 4.4414, 'learning_rate': 8.958122346958564e-07, 'epoch': 1.86} + 12%|█▏ | 43180/371472 [3:29:01<26:46:48, 3.41it/s] 12%|█▏ | 43181/371472 [3:29:01<25:48:53, 3.53it/s] 12%|█▏ | 43182/371472 [3:29:02<24:56:32, 3.66it/s] 12%|█▏ | 43183/371472 [3:29:02<24:32:58, 3.71it/s] 12%|█▏ | 43184/371472 [3:29:02<24:25:24, 3.73it/s] 12%|█▏ | 43185/371472 [3:29:02<25:09:40, 3.62it/s] 12%|█▏ | 43186/371472 [3:29:03<28:46:00, 3.17it/s] 12%|█▏ | 43187/371472 [3:29:03<27:36:09, 3.30it/s] 12%|█▏ | 43188/371472 [3:29:03<27:54:04, 3.27it/s] 12%|█▏ | 43189/371472 [3:29:04<27:29:58, 3.32it/s] 12%|█▏ | 43190/371472 [3:29:04<26:47:56, 3.40it/s] 12%|█▏ | 43191/371472 [3:29:04<28:09:42, 3.24it/s] 12%|█▏ | 43192/371472 [3:29:05<26:49:14, 3.40it/s] 12%|█▏ | 43193/371472 [3:29:05<25:46:18, 3.54it/s] 12%|█▏ | 43194/371472 [3:29:05<27:23:32, 3.33it/s] 12%|█▏ | 43195/371472 [3:29:05<27:21:34, 3.33it/s] 12%|█▏ | 43196/371472 [3:29:06<26:59:45, 3.38it/s] 12%|█▏ | 43197/371472 [3:29:06<26:28:18, 3.44it/s] 12%|█▏ | 43198/371472 [3:29:06<25:38:33, 3.56it/s] 12%|█▏ | 43199/371472 [3:29:07<25:38:46, 3.56it/s] 12%|█▏ | 43200/371472 [3:29:07<26:00:27, 3.51it/s] {'loss': 4.2574, 'learning_rate': 8.957637527203775e-07, 'epoch': 1.86} + 12%|█▏ | 43200/371472 [3:29:07<26:00:27, 3.51it/s] 12%|█▏ | 43201/371472 [3:29:07<27:25:54, 3.32it/s] 12%|█▏ | 43202/371472 [3:29:07<27:08:57, 3.36it/s] 12%|█▏ | 43203/371472 [3:29:08<26:16:09, 3.47it/s] 12%|█▏ | 43204/371472 [3:29:08<27:30:12, 3.32it/s] 12%|█▏ | 43205/371472 [3:29:08<26:48:15, 3.40it/s] 12%|█▏ | 43206/371472 [3:29:09<27:27:48, 3.32it/s] 12%|█▏ | 43207/371472 [3:29:09<27:39:07, 3.30it/s] 12%|█▏ | 43208/371472 [3:29:09<27:22:57, 3.33it/s] 12%|█▏ | 43209/371472 [3:29:10<26:43:49, 3.41it/s] 12%|█▏ | 43210/371472 [3:29:10<25:54:39, 3.52it/s] 12%|█▏ | 43211/371472 [3:29:10<25:35:47, 3.56it/s] 12%|█▏ | 43212/371472 [3:29:10<25:27:07, 3.58it/s] 12%|█▏ | 43213/371472 [3:29:11<24:25:09, 3.73it/s] 12%|█▏ | 43214/371472 [3:29:11<25:46:14, 3.54it/s] 12%|█▏ | 43215/371472 [3:29:11<25:28:14, 3.58it/s] 12%|█▏ | 43216/371472 [3:29:11<25:03:59, 3.64it/s] 12%|█▏ | 43217/371472 [3:29:12<25:05:12, 3.63it/s] 12%|█▏ | 43218/371472 [3:29:12<25:05:18, 3.63it/s] 12%|█▏ | 43219/371472 [3:29:12<24:45:05, 3.68it/s] 12%|█▏ | 43220/371472 [3:29:13<25:19:52, 3.60it/s] {'loss': 4.3493, 'learning_rate': 8.957152707448986e-07, 'epoch': 1.86} + 12%|█▏ | 43220/371472 [3:29:13<25:19:52, 3.60it/s] 12%|█▏ | 43221/371472 [3:29:13<24:53:26, 3.66it/s] 12%|█▏ | 43222/371472 [3:29:13<25:01:54, 3.64it/s] 12%|█▏ | 43223/371472 [3:29:13<28:01:43, 3.25it/s] 12%|█▏ | 43224/371472 [3:29:14<27:48:06, 3.28it/s] 12%|█▏ | 43225/371472 [3:29:14<29:47:44, 3.06it/s] 12%|█▏ | 43226/371472 [3:29:14<28:41:12, 3.18it/s] 12%|█▏ | 43227/371472 [3:29:15<28:20:05, 3.22it/s] 12%|█▏ | 43228/371472 [3:29:15<26:55:17, 3.39it/s] 12%|█▏ | 43229/371472 [3:29:15<26:28:56, 3.44it/s] 12%|█▏ | 43230/371472 [3:29:16<26:18:29, 3.47it/s] 12%|█▏ | 43231/371472 [3:29:16<26:11:52, 3.48it/s] 12%|█▏ | 43232/371472 [3:29:16<25:18:22, 3.60it/s] 12%|█▏ | 43233/371472 [3:29:16<25:22:31, 3.59it/s] 12%|█▏ | 43234/371472 [3:29:17<25:45:24, 3.54it/s] 12%|█▏ | 43235/371472 [3:29:17<25:18:01, 3.60it/s] 12%|█▏ | 43236/371472 [3:29:17<25:35:33, 3.56it/s] 12%|█▏ | 43237/371472 [3:29:17<25:40:39, 3.55it/s] 12%|█▏ | 43238/371472 [3:29:18<25:39:05, 3.55it/s] 12%|█▏ | 43239/371472 [3:29:18<25:47:39, 3.53it/s] 12%|█▏ | 43240/371472 [3:29:18<24:46:04, 3.68it/s] {'loss': 4.3951, 'learning_rate': 8.956667887694197e-07, 'epoch': 1.86} + 12%|█▏ | 43240/371472 [3:29:18<24:46:04, 3.68it/s] 12%|█▏ | 43241/371472 [3:29:19<25:55:49, 3.52it/s] 12%|█▏ | 43242/371472 [3:29:19<26:21:00, 3.46it/s] 12%|█▏ | 43243/371472 [3:29:19<26:40:39, 3.42it/s] 12%|█▏ | 43244/371472 [3:29:20<26:37:02, 3.43it/s] 12%|█▏ | 43245/371472 [3:29:20<26:11:57, 3.48it/s] 12%|█▏ | 43246/371472 [3:29:20<25:26:53, 3.58it/s] 12%|█▏ | 43247/371472 [3:29:20<26:21:17, 3.46it/s] 12%|█▏ | 43248/371472 [3:29:21<26:15:13, 3.47it/s] 12%|█▏ | 43249/371472 [3:29:21<27:02:35, 3.37it/s] 12%|█▏ | 43250/371472 [3:29:21<27:11:16, 3.35it/s] 12%|█▏ | 43251/371472 [3:29:22<26:35:10, 3.43it/s] 12%|█▏ | 43252/371472 [3:29:22<25:17:25, 3.60it/s] 12%|█▏ | 43253/371472 [3:29:22<25:35:19, 3.56it/s] 12%|█▏ | 43254/371472 [3:29:22<24:40:54, 3.69it/s] 12%|█▏ | 43255/371472 [3:29:23<24:35:11, 3.71it/s] 12%|█▏ | 43256/371472 [3:29:23<26:10:04, 3.48it/s] 12%|█▏ | 43257/371472 [3:29:23<25:43:03, 3.55it/s] 12%|█▏ | 43258/371472 [3:29:23<25:08:10, 3.63it/s] 12%|█▏ | 43259/371472 [3:29:24<25:31:05, 3.57it/s] 12%|█▏ | 43260/371472 [3:29:24<26:18:22, 3.47it/s] {'loss': 4.2069, 'learning_rate': 8.956183067939409e-07, 'epoch': 1.86} + 12%|█▏ | 43260/371472 [3:29:24<26:18:22, 3.47it/s] 12%|█▏ | 43261/371472 [3:29:24<28:16:52, 3.22it/s] 12%|█▏ | 43262/371472 [3:29:25<28:05:59, 3.24it/s] 12%|█▏ | 43263/371472 [3:29:25<28:18:16, 3.22it/s] 12%|█▏ | 43264/371472 [3:29:25<29:26:56, 3.10it/s] 12%|█▏ | 43265/371472 [3:29:26<27:15:05, 3.35it/s] 12%|█▏ | 43266/371472 [3:29:26<26:25:33, 3.45it/s] 12%|█▏ | 43267/371472 [3:29:26<27:02:21, 3.37it/s] 12%|█▏ | 43268/371472 [3:29:26<25:52:40, 3.52it/s] 12%|█▏ | 43269/371472 [3:29:27<24:38:04, 3.70it/s] 12%|█▏ | 43270/371472 [3:29:27<25:50:07, 3.53it/s] 12%|█▏ | 43271/371472 [3:29:27<24:55:59, 3.66it/s] 12%|█▏ | 43272/371472 [3:29:28<24:26:13, 3.73it/s] 12%|█▏ | 43273/371472 [3:29:28<25:00:27, 3.65it/s] 12%|█▏ | 43274/371472 [3:29:28<25:24:36, 3.59it/s] 12%|█▏ | 43275/371472 [3:29:28<24:31:10, 3.72it/s] 12%|█▏ | 43276/371472 [3:29:29<25:39:17, 3.55it/s] 12%|█▏ | 43277/371472 [3:29:29<25:39:26, 3.55it/s] 12%|█▏ | 43278/371472 [3:29:29<27:07:21, 3.36it/s] 12%|█▏ | 43279/371472 [3:29:30<26:38:48, 3.42it/s] 12%|█▏ | 43280/371472 [3:29:30<26:27:05, 3.45it/s] {'loss': 4.2305, 'learning_rate': 8.955698248184619e-07, 'epoch': 1.86} + 12%|█▏ | 43280/371472 [3:29:30<26:27:05, 3.45it/s] 12%|█▏ | 43281/371472 [3:29:30<25:50:24, 3.53it/s] 12%|█▏ | 43282/371472 [3:29:30<25:59:41, 3.51it/s] 12%|█▏ | 43283/371472 [3:29:31<25:27:12, 3.58it/s] 12%|█▏ | 43284/371472 [3:29:31<27:49:48, 3.28it/s] 12%|█▏ | 43285/371472 [3:29:31<27:10:35, 3.35it/s] 12%|█▏ | 43286/371472 [3:29:32<26:29:13, 3.44it/s] 12%|█▏ | 43287/371472 [3:29:32<26:13:25, 3.48it/s] 12%|█▏ | 43288/371472 [3:29:32<27:13:49, 3.35it/s] 12%|█▏ | 43289/371472 [3:29:32<27:23:35, 3.33it/s] 12%|█▏ | 43290/371472 [3:29:33<26:24:26, 3.45it/s] 12%|█▏ | 43291/371472 [3:29:33<25:24:11, 3.59it/s] 12%|█▏ | 43292/371472 [3:29:33<26:20:26, 3.46it/s] 12%|█▏ | 43293/371472 [3:29:34<25:54:15, 3.52it/s] 12%|█▏ | 43294/371472 [3:29:34<25:21:03, 3.60it/s] 12%|█▏ | 43295/371472 [3:29:34<25:46:42, 3.54it/s] 12%|█▏ | 43296/371472 [3:29:34<25:25:45, 3.58it/s] 12%|█▏ | 43297/371472 [3:29:35<24:52:04, 3.67it/s] 12%|█▏ | 43298/371472 [3:29:35<26:23:09, 3.45it/s] 12%|█▏ | 43299/371472 [3:29:35<25:35:34, 3.56it/s] 12%|█▏ | 43300/371472 [3:29:36<25:53:44, 3.52it/s] {'loss': 4.2136, 'learning_rate': 8.955213428429829e-07, 'epoch': 1.87} + 12%|█▏ | 43300/371472 [3:29:36<25:53:44, 3.52it/s] 12%|█▏ | 43301/371472 [3:29:36<25:48:39, 3.53it/s] 12%|█▏ | 43302/371472 [3:29:36<25:56:10, 3.51it/s] 12%|█▏ | 43303/371472 [3:29:36<25:44:58, 3.54it/s] 12%|█▏ | 43304/371472 [3:29:37<25:22:11, 3.59it/s] 12%|█▏ | 43305/371472 [3:29:37<25:21:19, 3.60it/s] 12%|█▏ | 43306/371472 [3:29:37<25:00:20, 3.65it/s] 12%|█▏ | 43307/371472 [3:29:37<24:46:11, 3.68it/s] 12%|█▏ | 43308/371472 [3:29:38<25:38:32, 3.55it/s] 12%|█▏ | 43309/371472 [3:29:38<26:02:30, 3.50it/s] 12%|█▏ | 43310/371472 [3:29:38<26:48:09, 3.40it/s] 12%|█▏ | 43311/371472 [3:29:39<26:19:15, 3.46it/s] 12%|█▏ | 43312/371472 [3:29:39<26:45:06, 3.41it/s] 12%|█▏ | 43313/371472 [3:29:39<25:51:36, 3.52it/s] 12%|█▏ | 43314/371472 [3:29:40<26:36:32, 3.43it/s] 12%|█▏ | 43315/371472 [3:29:40<26:14:04, 3.47it/s] 12%|█▏ | 43316/371472 [3:29:40<25:19:42, 3.60it/s] 12%|█▏ | 43317/371472 [3:29:40<25:39:37, 3.55it/s] 12%|█▏ | 43318/371472 [3:29:41<26:17:20, 3.47it/s] 12%|█▏ | 43319/371472 [3:29:41<25:45:44, 3.54it/s] 12%|█▏ | 43320/371472 [3:29:41<25:08:53, 3.62it/s] {'loss': 4.3903, 'learning_rate': 8.954728608675041e-07, 'epoch': 1.87} + 12%|█▏ | 43320/371472 [3:29:41<25:08:53, 3.62it/s] 12%|█▏ | 43321/371472 [3:29:41<24:40:05, 3.70it/s] 12%|█▏ | 43322/371472 [3:29:42<23:52:42, 3.82it/s] 12%|█▏ | 43323/371472 [3:29:42<25:44:32, 3.54it/s] 12%|█▏ | 43324/371472 [3:29:42<27:09:13, 3.36it/s] 12%|█▏ | 43325/371472 [3:29:43<26:09:02, 3.49it/s] 12%|█▏ | 43326/371472 [3:29:43<26:04:35, 3.50it/s] 12%|█▏ | 43327/371472 [3:29:43<26:06:17, 3.49it/s] 12%|█▏ | 43328/371472 [3:29:44<27:10:44, 3.35it/s] 12%|█▏ | 43329/371472 [3:29:44<26:36:00, 3.43it/s] 12%|█▏ | 43330/371472 [3:29:44<26:14:41, 3.47it/s] 12%|█▏ | 43331/371472 [3:29:44<25:31:00, 3.57it/s] 12%|█▏ | 43332/371472 [3:29:45<25:01:40, 3.64it/s] 12%|█▏ | 43333/371472 [3:29:45<24:44:22, 3.68it/s] 12%|█▏ | 43334/371472 [3:29:45<24:31:39, 3.72it/s] 12%|█▏ | 43335/371472 [3:29:45<24:02:14, 3.79it/s] 12%|█▏ | 43336/371472 [3:29:46<23:31:27, 3.87it/s] 12%|█▏ | 43337/371472 [3:29:46<24:10:38, 3.77it/s] 12%|█▏ | 43338/371472 [3:29:46<24:19:23, 3.75it/s] 12%|█▏ | 43339/371472 [3:29:46<24:09:26, 3.77it/s] 12%|█▏ | 43340/371472 [3:29:47<24:37:24, 3.70it/s] {'loss': 4.6111, 'learning_rate': 8.954243788920253e-07, 'epoch': 1.87} + 12%|█▏ | 43340/371472 [3:29:47<24:37:24, 3.70it/s] 12%|█▏ | 43341/371472 [3:29:47<27:17:47, 3.34it/s] 12%|█▏ | 43342/371472 [3:29:47<27:00:32, 3.37it/s] 12%|█▏ | 43343/371472 [3:29:48<26:21:46, 3.46it/s] 12%|█▏ | 43344/371472 [3:29:48<25:14:23, 3.61it/s] 12%|█▏ | 43345/371472 [3:29:48<24:35:17, 3.71it/s] 12%|█▏ | 43346/371472 [3:29:48<26:51:21, 3.39it/s] 12%|█▏ | 43347/371472 [3:29:49<26:22:58, 3.45it/s] 12%|█▏ | 43348/371472 [3:29:49<25:44:48, 3.54it/s] 12%|█▏ | 43349/371472 [3:29:49<25:55:11, 3.52it/s] 12%|█▏ | 43350/371472 [3:29:50<25:43:47, 3.54it/s] 12%|█▏ | 43351/371472 [3:29:50<25:35:47, 3.56it/s] 12%|█▏ | 43352/371472 [3:29:50<24:44:45, 3.68it/s] 12%|█▏ | 43353/371472 [3:29:50<24:05:13, 3.78it/s] 12%|█▏ | 43354/371472 [3:29:51<24:12:23, 3.77it/s] 12%|█▏ | 43355/371472 [3:29:51<25:13:13, 3.61it/s] 12%|█▏ | 43356/371472 [3:29:51<24:46:34, 3.68it/s] 12%|█▏ | 43357/371472 [3:29:51<24:57:20, 3.65it/s] 12%|█▏ | 43358/371472 [3:29:52<24:28:53, 3.72it/s] 12%|█▏ | 43359/371472 [3:29:52<24:08:29, 3.78it/s] 12%|█▏ | 43360/371472 [3:29:52<25:45:51, 3.54it/s] {'loss': 4.4162, 'learning_rate': 8.953758969165464e-07, 'epoch': 1.87} + 12%|█▏ | 43360/371472 [3:29:52<25:45:51, 3.54it/s] 12%|█▏ | 43361/371472 [3:29:53<25:20:42, 3.60it/s] 12%|█▏ | 43362/371472 [3:29:53<24:55:21, 3.66it/s] 12%|█▏ | 43363/371472 [3:29:53<24:52:00, 3.67it/s] 12%|█▏ | 43364/371472 [3:29:53<26:33:12, 3.43it/s] 12%|█▏ | 43365/371472 [3:29:54<25:26:02, 3.58it/s] 12%|█▏ | 43366/371472 [3:29:54<25:25:04, 3.59it/s] 12%|█▏ | 43367/371472 [3:29:54<25:18:49, 3.60it/s] 12%|█▏ | 43368/371472 [3:29:55<28:29:11, 3.20it/s] 12%|█▏ | 43369/371472 [3:29:55<28:22:42, 3.21it/s] 12%|█▏ | 43370/371472 [3:29:55<27:32:40, 3.31it/s] 12%|█▏ | 43371/371472 [3:29:56<27:54:21, 3.27it/s] 12%|█▏ | 43372/371472 [3:29:56<26:34:34, 3.43it/s] 12%|█▏ | 43373/371472 [3:29:56<28:09:06, 3.24it/s] 12%|█▏ | 43374/371472 [3:29:56<26:55:35, 3.38it/s] 12%|█▏ | 43375/371472 [3:29:57<25:39:32, 3.55it/s] 12%|█▏ | 43376/371472 [3:29:57<25:22:49, 3.59it/s] 12%|█▏ | 43377/371472 [3:29:57<25:41:08, 3.55it/s] 12%|█▏ | 43378/371472 [3:29:58<26:24:00, 3.45it/s] 12%|█▏ | 43379/371472 [3:29:58<26:37:35, 3.42it/s] 12%|█▏ | 43380/371472 [3:29:58<26:21:53, 3.46it/s] {'loss': 4.438, 'learning_rate': 8.953274149410674e-07, 'epoch': 1.87} + 12%|█▏ | 43380/371472 [3:29:58<26:21:53, 3.46it/s] 12%|█▏ | 43381/371472 [3:29:58<26:06:37, 3.49it/s] 12%|█▏ | 43382/371472 [3:29:59<27:06:24, 3.36it/s] 12%|█▏ | 43383/371472 [3:29:59<26:25:30, 3.45it/s] 12%|█▏ | 43384/371472 [3:29:59<25:13:16, 3.61it/s] 12%|█▏ | 43385/371472 [3:30:00<24:21:09, 3.74it/s] 12%|█▏ | 43386/371472 [3:30:00<26:30:23, 3.44it/s] 12%|█▏ | 43387/371472 [3:30:00<26:30:19, 3.44it/s] 12%|█▏ | 43388/371472 [3:30:00<25:32:21, 3.57it/s] 12%|█▏ | 43389/371472 [3:30:01<25:37:36, 3.56it/s] 12%|█▏ | 43390/371472 [3:30:01<25:52:58, 3.52it/s] 12%|█▏ | 43391/371472 [3:30:01<25:28:41, 3.58it/s] 12%|█▏ | 43392/371472 [3:30:01<24:48:55, 3.67it/s] 12%|█▏ | 43393/371472 [3:30:02<24:57:51, 3.65it/s] 12%|█▏ | 43394/371472 [3:30:02<26:41:47, 3.41it/s] 12%|█▏ | 43395/371472 [3:30:02<27:40:24, 3.29it/s] 12%|█▏ | 43396/371472 [3:30:03<27:18:39, 3.34it/s] 12%|█▏ | 43397/371472 [3:30:03<26:16:58, 3.47it/s] 12%|█▏ | 43398/371472 [3:30:03<26:19:22, 3.46it/s] 12%|█▏ | 43399/371472 [3:30:04<27:10:41, 3.35it/s] 12%|█▏ | 43400/371472 [3:30:04<25:52:21, 3.52it/s] {'loss': 4.2809, 'learning_rate': 8.952789329655885e-07, 'epoch': 1.87} + 12%|█▏ | 43400/371472 [3:30:04<25:52:21, 3.52it/s] 12%|█▏ | 43401/371472 [3:30:04<26:02:10, 3.50it/s] 12%|█▏ | 43402/371472 [3:30:04<27:17:31, 3.34it/s] 12%|█▏ | 43403/371472 [3:30:05<26:45:08, 3.41it/s] 12%|█▏ | 43404/371472 [3:30:05<26:23:37, 3.45it/s] 12%|█▏ | 43405/371472 [3:30:05<26:58:01, 3.38it/s] 12%|█▏ | 43406/371472 [3:30:06<26:17:24, 3.47it/s] 12%|█▏ | 43407/371472 [3:30:06<27:42:59, 3.29it/s] 12%|█▏ | 43408/371472 [3:30:06<26:40:19, 3.42it/s] 12%|█▏ | 43409/371472 [3:30:06<25:53:37, 3.52it/s] 12%|█▏ | 43410/371472 [3:30:07<27:05:20, 3.36it/s] 12%|█▏ | 43411/371472 [3:30:07<26:50:35, 3.39it/s] 12%|█▏ | 43412/371472 [3:30:07<26:33:48, 3.43it/s] 12%|█▏ | 43413/371472 [3:30:08<25:59:45, 3.51it/s] 12%|█▏ | 43414/371472 [3:30:08<25:35:23, 3.56it/s] 12%|█▏ | 43415/371472 [3:30:08<25:54:38, 3.52it/s] 12%|█▏ | 43416/371472 [3:30:08<25:22:09, 3.59it/s] 12%|█▏ | 43417/371472 [3:30:09<27:17:52, 3.34it/s] 12%|█▏ | 43418/371472 [3:30:09<26:09:26, 3.48it/s] 12%|█▏ | 43419/371472 [3:30:09<25:42:33, 3.54it/s] 12%|█▏ | 43420/371472 [3:30:10<26:13:21, 3.48it/s] {'loss': 4.2481, 'learning_rate': 8.952304509901096e-07, 'epoch': 1.87} + 12%|█▏ | 43420/371472 [3:30:10<26:13:21, 3.48it/s] 12%|█▏ | 43421/371472 [3:30:10<26:04:53, 3.49it/s] 12%|█▏ | 43422/371472 [3:30:10<25:19:44, 3.60it/s] 12%|█▏ | 43423/371472 [3:30:11<30:00:31, 3.04it/s] 12%|█▏ | 43424/371472 [3:30:11<29:48:29, 3.06it/s] 12%|█▏ | 43425/371472 [3:30:11<28:41:38, 3.18it/s] 12%|█▏ | 43426/371472 [3:30:12<28:49:31, 3.16it/s] 12%|█▏ | 43427/371472 [3:30:12<31:31:39, 2.89it/s] 12%|█▏ | 43428/371472 [3:30:12<31:04:11, 2.93it/s] 12%|█▏ | 43429/371472 [3:30:13<28:59:26, 3.14it/s] 12%|█▏ | 43430/371472 [3:30:13<27:16:18, 3.34it/s] 12%|█▏ | 43431/371472 [3:30:13<27:31:01, 3.31it/s] 12%|█▏ | 43432/371472 [3:30:13<26:47:19, 3.40it/s] 12%|█▏ | 43433/371472 [3:30:14<26:51:38, 3.39it/s] 12%|█▏ | 43434/371472 [3:30:14<26:19:03, 3.46it/s] 12%|█▏ | 43435/371472 [3:30:14<26:24:46, 3.45it/s] 12%|█▏ | 43436/371472 [3:30:15<25:40:11, 3.55it/s] 12%|█▏ | 43437/371472 [3:30:15<27:04:24, 3.37it/s] 12%|█▏ | 43438/371472 [3:30:15<26:14:36, 3.47it/s] 12%|█▏ | 43439/371472 [3:30:15<26:16:56, 3.47it/s] 12%|█▏ | 43440/371472 [3:30:16<25:36:41, 3.56it/s] {'loss': 4.3533, 'learning_rate': 8.951819690146307e-07, 'epoch': 1.87} + 12%|█▏ | 43440/371472 [3:30:16<25:36:41, 3.56it/s] 12%|█▏ | 43441/371472 [3:30:16<25:09:45, 3.62it/s] 12%|█▏ | 43442/371472 [3:30:16<24:52:20, 3.66it/s] 12%|█▏ | 43443/371472 [3:30:17<25:10:01, 3.62it/s] 12%|█▏ | 43444/371472 [3:30:17<25:48:52, 3.53it/s] 12%|█▏ | 43445/371472 [3:30:17<25:46:32, 3.54it/s] 12%|█▏ | 43446/371472 [3:30:17<25:54:51, 3.52it/s] 12%|█▏ | 43447/371472 [3:30:18<31:36:00, 2.88it/s] 12%|█▏ | 43448/371472 [3:30:18<29:46:55, 3.06it/s] 12%|█▏ | 43449/371472 [3:30:18<28:28:43, 3.20it/s] 12%|█▏ | 43450/371472 [3:30:19<29:52:00, 3.05it/s] 12%|█▏ | 43451/371472 [3:30:19<31:15:16, 2.92it/s] 12%|█▏ | 43452/371472 [3:30:19<30:09:10, 3.02it/s] 12%|█▏ | 43453/371472 [3:30:20<28:12:00, 3.23it/s] 12%|█▏ | 43454/371472 [3:30:20<27:00:22, 3.37it/s] 12%|█▏ | 43455/371472 [3:30:20<25:43:07, 3.54it/s] 12%|█▏ | 43456/371472 [3:30:21<24:59:34, 3.65it/s] 12%|█▏ | 43457/371472 [3:30:21<26:24:34, 3.45it/s] 12%|█▏ | 43458/371472 [3:30:21<26:33:27, 3.43it/s] 12%|█▏ | 43459/371472 [3:30:21<26:25:08, 3.45it/s] 12%|█▏ | 43460/371472 [3:30:22<26:41:10, 3.41it/s] {'loss': 4.3064, 'learning_rate': 8.951334870391518e-07, 'epoch': 1.87} + 12%|█▏ | 43460/371472 [3:30:22<26:41:10, 3.41it/s] 12%|█▏ | 43461/371472 [3:30:22<26:18:45, 3.46it/s] 12%|█▏ | 43462/371472 [3:30:22<26:18:37, 3.46it/s] 12%|█▏ | 43463/371472 [3:30:23<27:31:55, 3.31it/s] 12%|█▏ | 43464/371472 [3:30:23<27:43:25, 3.29it/s] 12%|█▏ | 43465/371472 [3:30:23<26:25:58, 3.45it/s] 12%|█▏ | 43466/371472 [3:30:24<27:24:53, 3.32it/s] 12%|█▏ | 43467/371472 [3:30:24<26:54:37, 3.39it/s] 12%|█▏ | 43468/371472 [3:30:24<27:11:44, 3.35it/s] 12%|█▏ | 43469/371472 [3:30:24<27:02:58, 3.37it/s] 12%|█▏ | 43470/371472 [3:30:25<27:37:21, 3.30it/s] 12%|█▏ | 43471/371472 [3:30:25<27:06:46, 3.36it/s] 12%|█▏ | 43472/371472 [3:30:25<27:00:26, 3.37it/s] 12%|█▏ | 43473/371472 [3:30:26<26:05:17, 3.49it/s] 12%|█▏ | 43474/371472 [3:30:26<25:24:09, 3.59it/s] 12%|█▏ | 43475/371472 [3:30:26<25:44:30, 3.54it/s] 12%|█▏ | 43476/371472 [3:30:26<25:21:09, 3.59it/s] 12%|█▏ | 43477/371472 [3:30:27<24:57:21, 3.65it/s] 12%|█▏ | 43478/371472 [3:30:27<26:43:21, 3.41it/s] 12%|█▏ | 43479/371472 [3:30:27<26:29:53, 3.44it/s] 12%|█▏ | 43480/371472 [3:30:28<26:36:38, 3.42it/s] {'loss': 4.3509, 'learning_rate': 8.95085005063673e-07, 'epoch': 1.87} + 12%|█▏ | 43480/371472 [3:30:28<26:36:38, 3.42it/s] 12%|█▏ | 43481/371472 [3:30:28<26:20:15, 3.46it/s] 12%|█▏ | 43482/371472 [3:30:28<25:22:02, 3.59it/s] 12%|█▏ | 43483/371472 [3:30:28<25:51:47, 3.52it/s] 12%|█▏ | 43484/371472 [3:30:29<24:42:11, 3.69it/s] 12%|█▏ | 43485/371472 [3:30:29<26:24:47, 3.45it/s] 12%|█▏ | 43486/371472 [3:30:29<28:39:36, 3.18it/s] 12%|█▏ | 43487/371472 [3:30:30<27:51:14, 3.27it/s] 12%|█▏ | 43488/371472 [3:30:30<27:54:42, 3.26it/s] 12%|█▏ | 43489/371472 [3:30:30<27:12:05, 3.35it/s] 12%|█▏ | 43490/371472 [3:30:30<26:17:55, 3.46it/s] 12%|█▏ | 43491/371472 [3:30:31<26:04:25, 3.49it/s] 12%|█▏ | 43492/371472 [3:30:31<26:25:14, 3.45it/s] 12%|█▏ | 43493/371472 [3:30:31<25:44:27, 3.54it/s] 12%|█▏ | 43494/371472 [3:30:32<26:48:43, 3.40it/s] 12%|█▏ | 43495/371472 [3:30:32<26:15:30, 3.47it/s] 12%|█▏ | 43496/371472 [3:30:32<25:37:52, 3.55it/s] 12%|█▏ | 43497/371472 [3:30:32<25:55:17, 3.51it/s] 12%|█▏ | 43498/371472 [3:30:33<25:42:42, 3.54it/s] 12%|█▏ | 43499/371472 [3:30:33<26:17:09, 3.47it/s] 12%|█▏ | 43500/371472 [3:30:33<26:15:07, 3.47it/s] {'loss': 4.2292, 'learning_rate': 8.950365230881941e-07, 'epoch': 1.87} + 12%|█▏ | 43500/371472 [3:30:33<26:15:07, 3.47it/s] 12%|█▏ | 43501/371472 [3:30:34<25:23:18, 3.59it/s] 12%|█▏ | 43502/371472 [3:30:34<25:37:02, 3.56it/s] 12%|█▏ | 43503/371472 [3:30:34<25:21:00, 3.59it/s] 12%|█▏ | 43504/371472 [3:30:34<25:31:08, 3.57it/s] 12%|█▏ | 43505/371472 [3:30:35<24:48:48, 3.67it/s] 12%|█▏ | 43506/371472 [3:30:35<25:19:53, 3.60it/s] 12%|█▏ | 43507/371472 [3:30:35<26:11:18, 3.48it/s] 12%|█▏ | 43508/371472 [3:30:36<25:55:01, 3.52it/s] 12%|█▏ | 43509/371472 [3:30:36<25:56:30, 3.51it/s] 12%|█▏ | 43510/371472 [3:30:36<28:06:58, 3.24it/s] 12%|█▏ | 43511/371472 [3:30:37<27:48:51, 3.28it/s] 12%|█▏ | 43512/371472 [3:30:37<28:15:59, 3.22it/s] 12%|█▏ | 43513/371472 [3:30:37<28:00:48, 3.25it/s] 12%|█▏ | 43514/371472 [3:30:37<27:42:11, 3.29it/s] 12%|█▏ | 43515/371472 [3:30:38<26:14:59, 3.47it/s] 12%|█▏ | 43516/371472 [3:30:38<26:08:21, 3.49it/s] 12%|█▏ | 43517/371472 [3:30:38<25:08:29, 3.62it/s] 12%|█▏ | 43518/371472 [3:30:38<24:56:33, 3.65it/s] 12%|█▏ | 43519/371472 [3:30:39<26:42:02, 3.41it/s] 12%|█▏ | 43520/371472 [3:30:39<25:56:28, 3.51it/s] {'loss': 4.321, 'learning_rate': 8.949880411127151e-07, 'epoch': 1.87} + 12%|█▏ | 43520/371472 [3:30:39<25:56:28, 3.51it/s] 12%|█▏ | 43521/371472 [3:30:39<26:09:56, 3.48it/s] 12%|█▏ | 43522/371472 [3:30:40<29:17:30, 3.11it/s] 12%|█▏ | 43523/371472 [3:30:40<28:59:02, 3.14it/s] 12%|█▏ | 43524/371472 [3:30:40<28:22:55, 3.21it/s] 12%|█▏ | 43525/371472 [3:30:41<27:27:50, 3.32it/s] 12%|█▏ | 43526/371472 [3:30:41<26:29:26, 3.44it/s] 12%|█▏ | 43527/371472 [3:30:41<27:20:22, 3.33it/s] 12%|█▏ | 43528/371472 [3:30:42<27:36:43, 3.30it/s] 12%|█▏ | 43529/371472 [3:30:42<26:35:08, 3.43it/s] 12%|█▏ | 43530/371472 [3:30:42<25:37:49, 3.55it/s] 12%|█▏ | 43531/371472 [3:30:42<24:52:53, 3.66it/s] 12%|█▏ | 43532/371472 [3:30:43<25:06:01, 3.63it/s] 12%|█▏ | 43533/371472 [3:30:43<24:57:07, 3.65it/s] 12%|█▏ | 43534/371472 [3:30:43<24:58:44, 3.65it/s] 12%|█▏ | 43535/371472 [3:30:43<25:34:06, 3.56it/s] 12%|█▏ | 43536/371472 [3:30:44<25:30:10, 3.57it/s] 12%|█▏ | 43537/371472 [3:30:44<25:40:05, 3.55it/s] 12%|█▏ | 43538/371472 [3:30:44<25:01:51, 3.64it/s] 12%|█▏ | 43539/371472 [3:30:45<24:36:51, 3.70it/s] 12%|█▏ | 43540/371472 [3:30:45<24:25:49, 3.73it/s] {'loss': 4.2077, 'learning_rate': 8.949395591372362e-07, 'epoch': 1.88} + 12%|█▏ | 43540/371472 [3:30:45<24:25:49, 3.73it/s] 12%|█▏ | 43541/371472 [3:30:45<23:44:17, 3.84it/s] 12%|█▏ | 43542/371472 [3:30:45<23:47:12, 3.83it/s] 12%|█▏ | 43543/371472 [3:30:46<23:57:20, 3.80it/s] 12%|█▏ | 43544/371472 [3:30:46<24:54:05, 3.66it/s] 12%|█▏ | 43545/371472 [3:30:46<26:39:00, 3.42it/s] 12%|█▏ | 43546/371472 [3:30:47<27:46:16, 3.28it/s] 12%|█▏ | 43547/371472 [3:30:47<27:05:47, 3.36it/s] 12%|█▏ | 43548/371472 [3:30:47<27:40:10, 3.29it/s] 12%|█▏ | 43549/371472 [3:30:47<27:29:09, 3.31it/s] 12%|█▏ | 43550/371472 [3:30:48<27:30:21, 3.31it/s] 12%|█▏ | 43551/371472 [3:30:48<27:24:40, 3.32it/s] 12%|█▏ | 43552/371472 [3:30:48<27:40:11, 3.29it/s] 12%|█▏ | 43553/371472 [3:30:49<26:01:50, 3.50it/s] 12%|█▏ | 43554/371472 [3:30:49<25:34:53, 3.56it/s] 12%|█▏ | 43555/371472 [3:30:49<28:34:14, 3.19it/s] 12%|█▏ | 43556/371472 [3:30:50<27:58:51, 3.26it/s] 12%|█▏ | 43557/371472 [3:30:50<26:50:08, 3.39it/s] 12%|█▏ | 43558/371472 [3:30:50<26:23:19, 3.45it/s] 12%|█▏ | 43559/371472 [3:30:50<27:39:32, 3.29it/s] 12%|█▏ | 43560/371472 [3:30:51<29:11:16, 3.12it/s] {'loss': 4.2244, 'learning_rate': 8.948910771617574e-07, 'epoch': 1.88} + 12%|█▏ | 43560/371472 [3:30:51<29:11:16, 3.12it/s] 12%|█▏ | 43561/371472 [3:30:51<28:10:05, 3.23it/s] 12%|█▏ | 43562/371472 [3:30:51<26:20:10, 3.46it/s] 12%|█▏ | 43563/371472 [3:30:52<26:20:38, 3.46it/s] 12%|█▏ | 43564/371472 [3:30:52<25:36:11, 3.56it/s] 12%|█▏ | 43565/371472 [3:30:52<25:03:12, 3.64it/s] 12%|█▏ | 43566/371472 [3:30:52<25:26:43, 3.58it/s] 12%|█▏ | 43567/371472 [3:30:53<25:29:59, 3.57it/s] 12%|█▏ | 43568/371472 [3:30:53<25:00:28, 3.64it/s] 12%|█▏ | 43569/371472 [3:30:53<25:14:33, 3.61it/s] 12%|█▏ | 43570/371472 [3:30:54<27:26:17, 3.32it/s] 12%|█▏ | 43571/371472 [3:30:54<27:03:06, 3.37it/s] 12%|█▏ | 43572/371472 [3:30:54<26:16:10, 3.47it/s] 12%|█▏ | 43573/371472 [3:30:55<27:52:19, 3.27it/s] 12%|█▏ | 43574/371472 [3:30:55<27:16:52, 3.34it/s] 12%|█▏ | 43575/371472 [3:30:55<26:57:11, 3.38it/s] 12%|█▏ | 43576/371472 [3:30:55<25:53:21, 3.52it/s] 12%|█▏ | 43577/371472 [3:30:56<26:13:26, 3.47it/s] 12%|█▏ | 43578/371472 [3:30:56<26:25:30, 3.45it/s] 12%|█▏ | 43579/371472 [3:30:56<25:37:16, 3.55it/s] 12%|█▏ | 43580/371472 [3:30:56<25:18:02, 3.60it/s] {'loss': 4.3698, 'learning_rate': 8.948425951862785e-07, 'epoch': 1.88} + 12%|█▏ | 43580/371472 [3:30:56<25:18:02, 3.60it/s] 12%|█▏ | 43581/371472 [3:30:57<28:16:03, 3.22it/s] 12%|█▏ | 43582/371472 [3:30:57<26:51:51, 3.39it/s] 12%|█▏ | 43583/371472 [3:30:57<26:15:11, 3.47it/s] 12%|█▏ | 43584/371472 [3:30:58<25:51:14, 3.52it/s] 12%|█▏ | 43585/371472 [3:30:58<24:54:24, 3.66it/s] 12%|█▏ | 43586/371472 [3:30:58<24:26:06, 3.73it/s] 12%|█▏ | 43587/371472 [3:30:59<26:22:03, 3.45it/s] 12%|█▏ | 43588/371472 [3:30:59<27:02:33, 3.37it/s] 12%|█▏ | 43589/371472 [3:30:59<27:08:58, 3.35it/s] 12%|█▏ | 43590/371472 [3:30:59<26:43:33, 3.41it/s] 12%|█▏ | 43591/371472 [3:31:00<26:16:47, 3.47it/s] 12%|█▏ | 43592/371472 [3:31:00<26:07:15, 3.49it/s] 12%|█▏ | 43593/371472 [3:31:00<25:48:29, 3.53it/s] 12%|█▏ | 43594/371472 [3:31:01<25:39:35, 3.55it/s] 12%|█▏ | 43595/371472 [3:31:01<25:31:32, 3.57it/s] 12%|█▏ | 43596/371472 [3:31:01<25:29:57, 3.57it/s] 12%|█▏ | 43597/371472 [3:31:01<25:18:26, 3.60it/s] 12%|█▏ | 43598/371472 [3:31:02<26:51:54, 3.39it/s] 12%|█▏ | 43599/371472 [3:31:02<26:35:18, 3.43it/s] 12%|█▏ | 43600/371472 [3:31:02<26:16:20, 3.47it/s] {'loss': 4.1308, 'learning_rate': 8.947941132107996e-07, 'epoch': 1.88} + 12%|█▏ | 43600/371472 [3:31:02<26:16:20, 3.47it/s] 12%|█▏ | 43601/371472 [3:31:03<25:38:24, 3.55it/s] 12%|█▏ | 43602/371472 [3:31:03<27:40:12, 3.29it/s] 12%|█▏ | 43603/371472 [3:31:03<26:21:07, 3.46it/s] 12%|█▏ | 43604/371472 [3:31:03<26:17:54, 3.46it/s] 12%|█▏ | 43605/371472 [3:31:04<31:47:40, 2.86it/s] 12%|█▏ | 43606/371472 [3:31:04<31:06:10, 2.93it/s] 12%|█▏ | 43607/371472 [3:31:05<29:25:18, 3.10it/s] 12%|█▏ | 43608/371472 [3:31:05<27:44:53, 3.28it/s] 12%|█▏ | 43609/371472 [3:31:05<26:57:33, 3.38it/s] 12%|█▏ | 43610/371472 [3:31:05<25:43:47, 3.54it/s] 12%|█▏ | 43611/371472 [3:31:06<25:12:04, 3.61it/s] 12%|█▏ | 43612/371472 [3:31:06<24:56:56, 3.65it/s] 12%|█▏ | 43613/371472 [3:31:06<24:28:50, 3.72it/s] 12%|█▏ | 43614/371472 [3:31:06<24:13:59, 3.76it/s] 12%|█▏ | 43615/371472 [3:31:07<25:06:21, 3.63it/s] 12%|█▏ | 43616/371472 [3:31:07<25:54:37, 3.51it/s] 12%|█▏ | 43617/371472 [3:31:07<27:11:37, 3.35it/s] 12%|█▏ | 43618/371472 [3:31:08<25:32:23, 3.57it/s] 12%|█▏ | 43619/371472 [3:31:08<25:21:28, 3.59it/s] 12%|█▏ | 43620/371472 [3:31:08<24:52:49, 3.66it/s] {'loss': 4.2336, 'learning_rate': 8.947456312353207e-07, 'epoch': 1.88} + 12%|█▏ | 43620/371472 [3:31:08<24:52:49, 3.66it/s] 12%|█▏ | 43621/371472 [3:31:08<24:31:14, 3.71it/s] 12%|█▏ | 43622/371472 [3:31:09<25:03:25, 3.63it/s] 12%|█▏ | 43623/371472 [3:31:09<26:43:11, 3.41it/s] 12%|█▏ | 43624/371472 [3:31:09<26:40:28, 3.41it/s] 12%|█▏ | 43625/371472 [3:31:09<25:42:18, 3.54it/s] 12%|█▏ | 43626/371472 [3:31:10<25:44:00, 3.54it/s] 12%|█▏ | 43627/371472 [3:31:10<24:50:43, 3.67it/s] 12%|█▏ | 43628/371472 [3:31:10<26:43:02, 3.41it/s] 12%|█▏ | 43629/371472 [3:31:11<26:10:45, 3.48it/s] 12%|█▏ | 43630/371472 [3:31:11<25:29:41, 3.57it/s] 12%|█▏ | 43631/371472 [3:31:11<26:13:06, 3.47it/s] 12%|█▏ | 43632/371472 [3:31:11<25:44:47, 3.54it/s] 12%|█▏ | 43633/371472 [3:31:12<25:42:02, 3.54it/s] 12%|█▏ | 43634/371472 [3:31:12<27:16:57, 3.34it/s] 12%|█▏ | 43635/371472 [3:31:12<26:55:40, 3.38it/s] 12%|█▏ | 43636/371472 [3:31:13<28:28:09, 3.20it/s] 12%|█▏ | 43637/371472 [3:31:13<27:18:20, 3.34it/s] 12%|█▏ | 43638/371472 [3:31:13<26:38:25, 3.42it/s] 12%|█▏ | 43639/371472 [3:31:14<28:36:35, 3.18it/s] 12%|█▏ | 43640/371472 [3:31:14<27:44:47, 3.28it/s] {'loss': 4.3006, 'learning_rate': 8.946971492598419e-07, 'epoch': 1.88} + 12%|█▏ | 43640/371472 [3:31:14<27:44:47, 3.28it/s] 12%|█▏ | 43641/371472 [3:31:14<26:56:40, 3.38it/s] 12%|█▏ | 43642/371472 [3:31:15<27:30:34, 3.31it/s] 12%|█▏ | 43643/371472 [3:31:15<26:23:32, 3.45it/s] 12%|█▏ | 43644/371472 [3:31:15<28:10:58, 3.23it/s] 12%|█▏ | 43645/371472 [3:31:15<27:16:39, 3.34it/s] 12%|█▏ | 43646/371472 [3:31:16<25:52:45, 3.52it/s] 12%|█▏ | 43647/371472 [3:31:16<27:26:15, 3.32it/s] 12%|█▏ | 43648/371472 [3:31:16<29:38:00, 3.07it/s] 12%|█▏ | 43649/371472 [3:31:17<28:22:36, 3.21it/s] 12%|█▏ | 43650/371472 [3:31:17<27:37:35, 3.30it/s] 12%|█▏ | 43651/371472 [3:31:17<26:50:24, 3.39it/s] 12%|█▏ | 43652/371472 [3:31:17<26:28:27, 3.44it/s] 12%|█▏ | 43653/371472 [3:31:18<25:22:28, 3.59it/s] 12%|█▏ | 43654/371472 [3:31:18<24:41:28, 3.69it/s] 12%|█▏ | 43655/371472 [3:31:18<24:06:23, 3.78it/s] 12%|█▏ | 43656/371472 [3:31:19<24:03:03, 3.79it/s] 12%|█▏ | 43657/371472 [3:31:19<26:17:22, 3.46it/s] 12%|█▏ | 43658/371472 [3:31:19<25:12:57, 3.61it/s] 12%|█▏ | 43659/371472 [3:31:19<24:56:43, 3.65it/s] 12%|█▏ | 43660/371472 [3:31:20<24:12:15, 3.76it/s] {'loss': 4.3912, 'learning_rate': 8.946486672843628e-07, 'epoch': 1.88} + 12%|█▏ | 43660/371472 [3:31:20<24:12:15, 3.76it/s] 12%|█▏ | 43661/371472 [3:31:20<28:55:28, 3.15it/s] 12%|█▏ | 43662/371472 [3:31:20<27:37:40, 3.30it/s] 12%|█▏ | 43663/371472 [3:31:21<26:28:39, 3.44it/s] 12%|█▏ | 43664/371472 [3:31:21<26:02:35, 3.50it/s] 12%|█▏ | 43665/371472 [3:31:21<24:55:36, 3.65it/s] 12%|█▏ | 43666/371472 [3:31:21<24:41:52, 3.69it/s] 12%|█▏ | 43667/371472 [3:31:22<24:35:30, 3.70it/s] 12%|█▏ | 43668/371472 [3:31:22<24:40:03, 3.69it/s] 12%|█▏ | 43669/371472 [3:31:22<25:01:40, 3.64it/s] 12%|█▏ | 43670/371472 [3:31:22<25:28:34, 3.57it/s] 12%|█▏ | 43671/371472 [3:31:23<25:52:20, 3.52it/s] 12%|█▏ | 43672/371472 [3:31:23<26:01:28, 3.50it/s] 12%|█▏ | 43673/371472 [3:31:23<26:50:51, 3.39it/s] 12%|█▏ | 43674/371472 [3:31:24<26:22:34, 3.45it/s] 12%|█▏ | 43675/371472 [3:31:24<26:24:18, 3.45it/s] 12%|█▏ | 43676/371472 [3:31:24<26:34:03, 3.43it/s] 12%|█▏ | 43677/371472 [3:31:25<26:11:45, 3.48it/s] 12%|█▏ | 43678/371472 [3:31:25<30:33:26, 2.98it/s] 12%|█▏ | 43679/371472 [3:31:25<28:19:06, 3.22it/s] 12%|█▏ | 43680/371472 [3:31:25<26:49:48, 3.39it/s] {'loss': 4.3259, 'learning_rate': 8.94600185308884e-07, 'epoch': 1.88} + 12%|█▏ | 43680/371472 [3:31:25<26:49:48, 3.39it/s] 12%|█▏ | 43681/371472 [3:31:26<26:13:43, 3.47it/s] 12%|█▏ | 43682/371472 [3:31:26<25:50:18, 3.52it/s] 12%|█▏ | 43683/371472 [3:31:26<25:26:25, 3.58it/s] 12%|█▏ | 43684/371472 [3:31:27<25:10:58, 3.62it/s] 12%|█▏ | 43685/371472 [3:31:27<25:53:50, 3.52it/s] 12%|█▏ | 43686/371472 [3:31:27<27:31:49, 3.31it/s] 12%|█▏ | 43687/371472 [3:31:28<28:29:19, 3.20it/s] 12%|█▏ | 43688/371472 [3:31:28<27:14:07, 3.34it/s] 12%|█▏ | 43689/371472 [3:31:28<27:23:43, 3.32it/s] 12%|█▏ | 43690/371472 [3:31:28<26:41:16, 3.41it/s] 12%|█▏ | 43691/371472 [3:31:29<25:24:44, 3.58it/s] 12%|█▏ | 43692/371472 [3:31:29<25:20:20, 3.59it/s] 12%|█▏ | 43693/371472 [3:31:29<25:21:28, 3.59it/s] 12%|█▏ | 43694/371472 [3:31:29<25:09:25, 3.62it/s] 12%|█▏ | 43695/371472 [3:31:30<29:19:14, 3.11it/s] 12%|█▏ | 43696/371472 [3:31:30<28:02:34, 3.25it/s] 12%|█▏ | 43697/371472 [3:31:30<26:57:54, 3.38it/s] 12%|█▏ | 43698/371472 [3:31:31<26:14:16, 3.47it/s] 12%|█▏ | 43699/371472 [3:31:31<25:25:22, 3.58it/s] 12%|█▏ | 43700/371472 [3:31:31<25:49:37, 3.53it/s] {'loss': 4.456, 'learning_rate': 8.945517033334051e-07, 'epoch': 1.88} + 12%|█▏ | 43700/371472 [3:31:31<25:49:37, 3.53it/s] 12%|█▏ | 43701/371472 [3:31:32<25:56:07, 3.51it/s] 12%|█▏ | 43702/371472 [3:31:32<24:58:56, 3.64it/s] 12%|█▏ | 43703/371472 [3:31:32<25:11:11, 3.61it/s] 12%|█▏ | 43704/371472 [3:31:32<24:35:34, 3.70it/s] 12%|█▏ | 43705/371472 [3:31:33<26:14:41, 3.47it/s] 12%|█▏ | 43706/371472 [3:31:33<27:05:40, 3.36it/s] 12%|█▏ | 43707/371472 [3:31:33<28:12:05, 3.23it/s] 12%|█▏ | 43708/371472 [3:31:34<26:58:50, 3.37it/s] 12%|█▏ | 43709/371472 [3:31:34<25:56:00, 3.51it/s] 12%|█▏ | 43710/371472 [3:31:34<27:04:42, 3.36it/s] 12%|█▏ | 43711/371472 [3:31:34<26:55:47, 3.38it/s] 12%|█▏ | 43712/371472 [3:31:35<28:52:52, 3.15it/s] 12%|█▏ | 43713/371472 [3:31:35<27:30:43, 3.31it/s] 12%|█▏ | 43714/371472 [3:31:35<26:20:33, 3.46it/s] 12%|█▏ | 43715/371472 [3:31:36<24:59:29, 3.64it/s] 12%|█▏ | 43716/371472 [3:31:36<24:26:32, 3.72it/s] 12%|█▏ | 43717/371472 [3:31:36<24:29:50, 3.72it/s] 12%|█▏ | 43718/371472 [3:31:36<24:09:22, 3.77it/s] 12%|█▏ | 43719/371472 [3:31:37<24:15:09, 3.75it/s] 12%|█▏ | 43720/371472 [3:31:37<24:48:58, 3.67it/s] {'loss': 4.2251, 'learning_rate': 8.945032213579262e-07, 'epoch': 1.88} + 12%|█▏ | 43720/371472 [3:31:37<24:48:58, 3.67it/s] 12%|█▏ | 43721/371472 [3:31:37<24:37:56, 3.70it/s] 12%|█▏ | 43722/371472 [3:31:38<25:16:41, 3.60it/s] 12%|█▏ | 43723/371472 [3:31:38<26:43:11, 3.41it/s] 12%|█▏ | 43724/371472 [3:31:38<26:34:04, 3.43it/s] 12%|█▏ | 43725/371472 [3:31:38<25:53:04, 3.52it/s] 12%|█▏ | 43726/371472 [3:31:39<25:38:08, 3.55it/s] 12%|█▏ | 43727/371472 [3:31:39<26:55:31, 3.38it/s] 12%|█▏ | 43728/371472 [3:31:39<26:56:14, 3.38it/s] 12%|█▏ | 43729/371472 [3:31:40<26:34:48, 3.43it/s] 12%|█▏ | 43730/371472 [3:31:40<27:14:17, 3.34it/s] 12%|█▏ | 43731/371472 [3:31:40<27:46:44, 3.28it/s] 12%|█▏ | 43732/371472 [3:31:40<26:48:50, 3.40it/s] 12%|█▏ | 43733/371472 [3:31:41<25:49:10, 3.53it/s] 12%|█▏ | 43734/371472 [3:31:41<26:27:34, 3.44it/s] 12%|█▏ | 43735/371472 [3:31:41<26:10:53, 3.48it/s] 12%|█▏ | 43736/371472 [3:31:42<25:26:13, 3.58it/s] 12%|█▏ | 43737/371472 [3:31:42<25:08:36, 3.62it/s] 12%|█▏ | 43738/371472 [3:31:42<24:26:05, 3.73it/s] 12%|█▏ | 43739/371472 [3:31:42<25:17:45, 3.60it/s] 12%|█▏ | 43740/371472 [3:31:43<27:35:14, 3.30it/s] {'loss': 4.5059, 'learning_rate': 8.944547393824473e-07, 'epoch': 1.88} + 12%|█▏ | 43740/371472 [3:31:43<27:35:14, 3.30it/s] 12%|█▏ | 43741/371472 [3:31:43<26:56:04, 3.38it/s] 12%|█▏ | 43742/371472 [3:31:43<26:27:54, 3.44it/s] 12%|█▏ | 43743/371472 [3:31:44<26:59:05, 3.37it/s] 12%|█▏ | 43744/371472 [3:31:44<26:20:07, 3.46it/s] 12%|█▏ | 43745/371472 [3:31:44<26:02:50, 3.49it/s] 12%|█▏ | 43746/371472 [3:31:44<26:00:28, 3.50it/s] 12%|█▏ | 43747/371472 [3:31:45<26:14:34, 3.47it/s] 12%|█▏ | 43748/371472 [3:31:45<28:31:57, 3.19it/s] 12%|█▏ | 43749/371472 [3:31:45<27:21:02, 3.33it/s] 12%|█▏ | 43750/371472 [3:31:46<25:53:50, 3.52it/s] 12%|█▏ | 43751/371472 [3:31:46<25:23:45, 3.58it/s] 12%|█▏ | 43752/371472 [3:31:46<26:27:01, 3.44it/s] 12%|█▏ | 43753/371472 [3:31:47<26:48:21, 3.40it/s] 12%|█▏ | 43754/371472 [3:31:47<26:24:28, 3.45it/s] 12%|█▏ | 43755/371472 [3:31:47<25:59:52, 3.50it/s] 12%|█▏ | 43756/371472 [3:31:47<26:59:56, 3.37it/s] 12%|█▏ | 43757/371472 [3:31:48<26:50:15, 3.39it/s] 12%|█▏ | 43758/371472 [3:31:48<26:18:47, 3.46it/s] 12%|█▏ | 43759/371472 [3:31:48<25:31:28, 3.57it/s] 12%|█▏ | 43760/371472 [3:31:49<28:04:19, 3.24it/s] {'loss': 4.3112, 'learning_rate': 8.944062574069685e-07, 'epoch': 1.88} + 12%|█▏ | 43760/371472 [3:31:49<28:04:19, 3.24it/s] 12%|█▏ | 43761/371472 [3:31:49<26:14:53, 3.47it/s] 12%|█▏ | 43762/371472 [3:31:49<25:51:07, 3.52it/s] 12%|█▏ | 43763/371472 [3:31:49<25:20:38, 3.59it/s] 12%|█▏ | 43764/371472 [3:31:50<26:45:34, 3.40it/s] 12%|█▏ | 43765/371472 [3:31:50<26:55:33, 3.38it/s] 12%|█▏ | 43766/371472 [3:31:50<27:54:17, 3.26it/s] 12%|█▏ | 43767/371472 [3:31:51<26:44:50, 3.40it/s] 12%|█▏ | 43768/371472 [3:31:51<25:32:20, 3.56it/s] 12%|█▏ | 43769/371472 [3:31:51<24:50:36, 3.66it/s] 12%|█▏ | 43770/371472 [3:31:51<24:24:04, 3.73it/s] 12%|█▏ | 43771/371472 [3:31:52<26:06:26, 3.49it/s] 12%|█▏ | 43772/371472 [3:31:52<26:08:02, 3.48it/s] 12%|█▏ | 43773/371472 [3:31:52<25:41:33, 3.54it/s] 12%|█▏ | 43774/371472 [3:31:53<24:55:06, 3.65it/s] 12%|█▏ | 43775/371472 [3:31:53<24:24:33, 3.73it/s] 12%|█▏ | 43776/371472 [3:31:53<24:15:16, 3.75it/s] 12%|█▏ | 43777/371472 [3:31:53<25:45:05, 3.53it/s] 12%|█▏ | 43778/371472 [3:31:54<25:44:50, 3.54it/s] 12%|█▏ | 43779/371472 [3:31:54<24:45:17, 3.68it/s] 12%|█▏ | 43780/371472 [3:31:54<25:18:06, 3.60it/s] {'loss': 4.353, 'learning_rate': 8.943577754314895e-07, 'epoch': 1.89} + 12%|█▏ | 43780/371472 [3:31:54<25:18:06, 3.60it/s] 12%|█▏ | 43781/371472 [3:31:54<25:17:31, 3.60it/s] 12%|█▏ | 43782/371472 [3:31:55<24:36:08, 3.70it/s] 12%|█▏ | 43783/371472 [3:31:55<24:54:01, 3.66it/s] 12%|█▏ | 43784/371472 [3:31:55<25:04:32, 3.63it/s] 12%|█▏ | 43785/371472 [3:31:56<25:17:30, 3.60it/s] 12%|█▏ | 43786/371472 [3:31:56<25:13:02, 3.61it/s] 12%|█▏ | 43787/371472 [3:31:56<27:52:44, 3.26it/s] 12%|█▏ | 43788/371472 [3:31:57<27:58:27, 3.25it/s] 12%|█▏ | 43789/371472 [3:31:57<26:54:50, 3.38it/s] 12%|█▏ | 43790/371472 [3:31:57<27:44:20, 3.28it/s] 12%|█▏ | 43791/371472 [3:31:57<27:34:57, 3.30it/s] 12%|█▏ | 43792/371472 [3:31:58<27:20:30, 3.33it/s] 12%|█▏ | 43793/371472 [3:31:58<26:50:45, 3.39it/s] 12%|█▏ | 43794/371472 [3:31:58<26:43:38, 3.41it/s] 12%|█▏ | 43795/371472 [3:31:59<29:32:17, 3.08it/s] 12%|█▏ | 43796/371472 [3:31:59<28:50:26, 3.16it/s] 12%|█▏ | 43797/371472 [3:31:59<28:21:04, 3.21it/s] 12%|█▏ | 43798/371472 [3:32:00<27:07:51, 3.35it/s] 12%|█▏ | 43799/371472 [3:32:00<25:50:38, 3.52it/s] 12%|█▏ | 43800/371472 [3:32:00<25:48:39, 3.53it/s] {'loss': 4.3783, 'learning_rate': 8.943092934560107e-07, 'epoch': 1.89} + 12%|█▏ | 43800/371472 [3:32:00<25:48:39, 3.53it/s] 12%|█▏ | 43801/371472 [3:32:00<26:45:39, 3.40it/s] 12%|█▏ | 43802/371472 [3:32:01<26:21:31, 3.45it/s] 12%|█▏ | 43803/371472 [3:32:01<27:32:58, 3.30it/s] 12%|█▏ | 43804/371472 [3:32:01<27:36:52, 3.30it/s] 12%|█▏ | 43805/371472 [3:32:02<26:46:34, 3.40it/s] 12%|█▏ | 43806/371472 [3:32:02<27:12:09, 3.35it/s] 12%|█▏ | 43807/371472 [3:32:02<26:51:11, 3.39it/s] 12%|█▏ | 43808/371472 [3:32:02<26:39:17, 3.41it/s] 12%|█▏ | 43809/371472 [3:32:03<25:55:44, 3.51it/s] 12%|█▏ | 43810/371472 [3:32:03<25:51:49, 3.52it/s] 12%|█▏ | 43811/371472 [3:32:03<26:11:10, 3.48it/s] 12%|█▏ | 43812/371472 [3:32:04<25:31:30, 3.57it/s] 12%|█▏ | 43813/371472 [3:32:04<25:30:11, 3.57it/s] 12%|█▏ | 43814/371472 [3:32:04<25:18:27, 3.60it/s] 12%|█▏ | 43815/371472 [3:32:04<25:25:17, 3.58it/s] 12%|█▏ | 43816/371472 [3:32:05<25:14:30, 3.61it/s] 12%|█▏ | 43817/371472 [3:32:05<24:26:47, 3.72it/s] 12%|█▏ | 43818/371472 [3:32:05<26:27:56, 3.44it/s] 12%|█▏ | 43819/371472 [3:32:06<26:17:43, 3.46it/s] 12%|█▏ | 43820/371472 [3:32:06<25:42:02, 3.54it/s] {'loss': 4.3289, 'learning_rate': 8.942608114805317e-07, 'epoch': 1.89} + 12%|█▏ | 43820/371472 [3:32:06<25:42:02, 3.54it/s] 12%|█▏ | 43821/371472 [3:32:06<25:20:50, 3.59it/s] 12%|█▏ | 43822/371472 [3:32:06<24:55:19, 3.65it/s] 12%|█▏ | 43823/371472 [3:32:07<24:16:55, 3.75it/s] 12%|█▏ | 43824/371472 [3:32:07<23:54:08, 3.81it/s] 12%|█▏ | 43825/371472 [3:32:07<23:41:52, 3.84it/s] 12%|█▏ | 43826/371472 [3:32:07<25:34:11, 3.56it/s] 12%|█▏ | 43827/371472 [3:32:08<25:06:26, 3.62it/s] 12%|█▏ | 43828/371472 [3:32:08<24:48:07, 3.67it/s] 12%|█▏ | 43829/371472 [3:32:08<25:30:42, 3.57it/s] 12%|█▏ | 43830/371472 [3:32:09<26:12:31, 3.47it/s] 12%|█▏ | 43831/371472 [3:32:09<26:15:25, 3.47it/s] 12%|█▏ | 43832/371472 [3:32:09<25:08:24, 3.62it/s] 12%|█▏ | 43833/371472 [3:32:09<25:37:22, 3.55it/s] 12%|█▏ | 43834/371472 [3:32:10<26:55:07, 3.38it/s] 12%|█▏ | 43835/371472 [3:32:10<27:07:06, 3.36it/s] 12%|█▏ | 43836/371472 [3:32:10<26:42:51, 3.41it/s] 12%|█▏ | 43837/371472 [3:32:11<30:14:25, 3.01it/s] 12%|█▏ | 43838/371472 [3:32:11<28:20:08, 3.21it/s] 12%|█▏ | 43839/371472 [3:32:11<27:37:30, 3.29it/s] 12%|█▏ | 43840/371472 [3:32:12<26:44:46, 3.40it/s] {'loss': 4.519, 'learning_rate': 8.942123295050528e-07, 'epoch': 1.89} + 12%|█▏ | 43840/371472 [3:32:12<26:44:46, 3.40it/s] 12%|█▏ | 43841/371472 [3:32:12<25:56:42, 3.51it/s] 12%|█▏ | 43842/371472 [3:32:12<26:25:55, 3.44it/s] 12%|█▏ | 43843/371472 [3:32:12<25:36:35, 3.55it/s] 12%|█▏ | 43844/371472 [3:32:13<26:19:08, 3.46it/s] 12%|█▏ | 43845/371472 [3:32:13<26:35:31, 3.42it/s] 12%|█▏ | 43846/371472 [3:32:13<26:52:11, 3.39it/s] 12%|█▏ | 43847/371472 [3:32:14<25:43:49, 3.54it/s] 12%|█▏ | 43848/371472 [3:32:14<24:56:56, 3.65it/s] 12%|█▏ | 43849/371472 [3:32:14<24:29:45, 3.72it/s] 12%|█▏ | 43850/371472 [3:32:14<24:49:26, 3.67it/s] 12%|█▏ | 43851/371472 [3:32:15<26:34:03, 3.43it/s] 12%|█▏ | 43852/371472 [3:32:15<26:34:32, 3.42it/s] 12%|█▏ | 43853/371472 [3:32:15<26:22:31, 3.45it/s] 12%|█▏ | 43854/371472 [3:32:16<25:32:57, 3.56it/s] 12%|█▏ | 43855/371472 [3:32:16<27:01:50, 3.37it/s] 12%|█▏ | 43856/371472 [3:32:16<26:02:01, 3.50it/s] 12%|█▏ | 43857/371472 [3:32:16<25:27:44, 3.57it/s] 12%|█▏ | 43858/371472 [3:32:17<25:39:52, 3.55it/s] 12%|█▏ | 43859/371472 [3:32:17<26:52:11, 3.39it/s] 12%|█▏ | 43860/371472 [3:32:17<25:53:09, 3.52it/s] {'loss': 4.2126, 'learning_rate': 8.94163847529574e-07, 'epoch': 1.89} + 12%|█▏ | 43860/371472 [3:32:17<25:53:09, 3.52it/s] 12%|█▏ | 43861/371472 [3:32:18<25:22:11, 3.59it/s] 12%|█▏ | 43862/371472 [3:32:18<26:12:19, 3.47it/s] 12%|█▏ | 43863/371472 [3:32:18<25:47:42, 3.53it/s] 12%|█▏ | 43864/371472 [3:32:18<25:38:01, 3.55it/s] 12%|█▏ | 43865/371472 [3:32:19<25:01:58, 3.64it/s] 12%|█▏ | 43866/371472 [3:32:19<25:39:01, 3.55it/s] 12%|█▏ | 43867/371472 [3:32:19<25:44:19, 3.54it/s] 12%|█▏ | 43868/371472 [3:32:20<26:11:43, 3.47it/s] 12%|█▏ | 43869/371472 [3:32:20<27:37:01, 3.30it/s] 12%|█▏ | 43870/371472 [3:32:20<27:02:17, 3.37it/s] 12%|█▏ | 43871/371472 [3:32:20<26:06:00, 3.49it/s] 12%|█▏ | 43872/371472 [3:32:21<26:25:15, 3.44it/s] 12%|█▏ | 43873/371472 [3:32:21<26:19:57, 3.46it/s] 12%|█▏ | 43874/371472 [3:32:21<28:08:15, 3.23it/s] 12%|█▏ | 43875/371472 [3:32:22<29:45:03, 3.06it/s] 12%|█▏ | 43876/371472 [3:32:22<27:59:06, 3.25it/s] 12%|█▏ | 43877/371472 [3:32:22<28:35:08, 3.18it/s] 12%|█▏ | 43878/371472 [3:32:23<30:25:27, 2.99it/s] 12%|█▏ | 43879/371472 [3:32:23<28:56:53, 3.14it/s] 12%|█▏ | 43880/371472 [3:32:23<27:22:21, 3.32it/s] {'loss': 4.2742, 'learning_rate': 8.941153655540952e-07, 'epoch': 1.89} + 12%|█▏ | 43880/371472 [3:32:23<27:22:21, 3.32it/s] 12%|█▏ | 43881/371472 [3:32:24<28:48:05, 3.16it/s] 12%|█▏ | 43882/371472 [3:32:24<27:37:05, 3.29it/s] 12%|█▏ | 43883/371472 [3:32:24<27:28:16, 3.31it/s] 12%|█▏ | 43884/371472 [3:32:24<26:35:35, 3.42it/s] 12%|█▏ | 43885/371472 [3:32:25<25:55:29, 3.51it/s] 12%|█▏ | 43886/371472 [3:32:25<26:23:19, 3.45it/s] 12%|█▏ | 43887/371472 [3:32:25<26:07:16, 3.48it/s] 12%|█▏ | 43888/371472 [3:32:26<27:01:31, 3.37it/s] 12%|█▏ | 43889/371472 [3:32:26<26:48:56, 3.39it/s] 12%|█▏ | 43890/371472 [3:32:26<27:58:10, 3.25it/s] 12%|█▏ | 43891/371472 [3:32:27<28:50:06, 3.16it/s] 12%|█▏ | 43892/371472 [3:32:27<29:41:37, 3.06it/s] 12%|█▏ | 43893/371472 [3:32:27<27:51:44, 3.27it/s] 12%|█▏ | 43894/371472 [3:32:27<26:34:26, 3.42it/s] 12%|█▏ | 43895/371472 [3:32:28<27:26:49, 3.32it/s] 12%|█▏ | 43896/371472 [3:32:28<28:10:16, 3.23it/s] 12%|█▏ | 43897/371472 [3:32:28<27:42:32, 3.28it/s] 12%|█▏ | 43898/371472 [3:32:29<26:28:13, 3.44it/s] 12%|█▏ | 43899/371472 [3:32:29<25:55:05, 3.51it/s] 12%|█▏ | 43900/371472 [3:32:29<25:15:03, 3.60it/s] {'loss': 4.3156, 'learning_rate': 8.940668835786162e-07, 'epoch': 1.89} + 12%|█▏ | 43900/371472 [3:32:29<25:15:03, 3.60it/s] 12%|█▏ | 43901/371472 [3:32:29<26:19:24, 3.46it/s] 12%|█▏ | 43902/371472 [3:32:30<26:04:52, 3.49it/s] 12%|█▏ | 43903/371472 [3:32:30<25:19:34, 3.59it/s] 12%|█▏ | 43904/371472 [3:32:30<25:20:24, 3.59it/s] 12%|█▏ | 43905/371472 [3:32:31<26:40:36, 3.41it/s] 12%|█▏ | 43906/371472 [3:32:31<26:37:41, 3.42it/s] 12%|█▏ | 43907/371472 [3:32:31<26:33:58, 3.43it/s] 12%|█▏ | 43908/371472 [3:32:32<27:09:28, 3.35it/s] 12%|█▏ | 43909/371472 [3:32:32<26:14:25, 3.47it/s] 12%|█▏ | 43910/371472 [3:32:32<26:06:13, 3.49it/s] 12%|█▏ | 43911/371472 [3:32:32<25:17:05, 3.60it/s] 12%|█▏ | 43912/371472 [3:32:33<26:23:09, 3.45it/s] 12%|█▏ | 43913/371472 [3:32:33<29:04:49, 3.13it/s] 12%|█▏ | 43914/371472 [3:32:33<29:04:09, 3.13it/s] 12%|█▏ | 43915/371472 [3:32:34<27:30:13, 3.31it/s] 12%|█▏ | 43916/371472 [3:32:34<26:26:00, 3.44it/s] 12%|█▏ | 43917/371472 [3:32:34<25:37:16, 3.55it/s] 12%|█▏ | 43918/371472 [3:32:34<27:05:54, 3.36it/s] 12%|█▏ | 43919/371472 [3:32:35<27:54:53, 3.26it/s] 12%|█▏ | 43920/371472 [3:32:35<27:59:45, 3.25it/s] {'loss': 4.2147, 'learning_rate': 8.940184016031372e-07, 'epoch': 1.89} + 12%|█▏ | 43920/371472 [3:32:35<27:59:45, 3.25it/s] 12%|█▏ | 43921/371472 [3:32:35<26:58:12, 3.37it/s] 12%|█▏ | 43922/371472 [3:32:36<26:25:27, 3.44it/s] 12%|█▏ | 43923/371472 [3:32:36<25:17:54, 3.60it/s] 12%|█▏ | 43924/371472 [3:32:36<24:50:23, 3.66it/s] 12%|█▏ | 43925/371472 [3:32:36<24:51:49, 3.66it/s] 12%|█▏ | 43926/371472 [3:32:37<25:32:48, 3.56it/s] 12%|█▏ | 43927/371472 [3:32:37<25:24:10, 3.58it/s] 12%|█▏ | 43928/371472 [3:32:37<25:13:08, 3.61it/s] 12%|█▏ | 43929/371472 [3:32:38<24:59:20, 3.64it/s] 12%|█▏ | 43930/371472 [3:32:38<26:41:50, 3.41it/s] 12%|█▏ | 43931/371472 [3:32:38<26:02:56, 3.49it/s] 12%|█▏ | 43932/371472 [3:32:38<26:52:25, 3.39it/s] 12%|█▏ | 43933/371472 [3:32:39<26:26:29, 3.44it/s] 12%|█▏ | 43934/371472 [3:32:39<26:43:35, 3.40it/s] 12%|█▏ | 43935/371472 [3:32:39<26:10:40, 3.48it/s] 12%|█▏ | 43936/371472 [3:32:40<26:44:44, 3.40it/s] 12%|█▏ | 43937/371472 [3:32:40<25:40:47, 3.54it/s] 12%|█▏ | 43938/371472 [3:32:40<28:13:10, 3.22it/s] 12%|█▏ | 43939/371472 [3:32:41<27:55:28, 3.26it/s] 12%|█▏ | 43940/371472 [3:32:41<26:08:40, 3.48it/s] {'loss': 4.5395, 'learning_rate': 8.939699196276584e-07, 'epoch': 1.89} + 12%|█▏ | 43940/371472 [3:32:41<26:08:40, 3.48it/s] 12%|█▏ | 43941/371472 [3:32:41<26:19:45, 3.46it/s] 12%|█▏ | 43942/371472 [3:32:41<26:04:56, 3.49it/s] 12%|█▏ | 43943/371472 [3:32:42<25:50:13, 3.52it/s] 12%|█▏ | 43944/371472 [3:32:42<26:52:47, 3.38it/s] 12%|█▏ | 43945/371472 [3:32:42<26:20:05, 3.45it/s] 12%|█▏ | 43946/371472 [3:32:43<26:00:05, 3.50it/s] 12%|█▏ | 43947/371472 [3:32:43<25:27:21, 3.57it/s] 12%|█▏ | 43948/371472 [3:32:43<26:14:25, 3.47it/s] 12%|█▏ | 43949/371472 [3:32:43<26:23:09, 3.45it/s] 12%|█▏ | 43950/371472 [3:32:44<26:08:33, 3.48it/s] 12%|█▏ | 43951/371472 [3:32:44<25:58:43, 3.50it/s] 12%|█▏ | 43952/371472 [3:32:44<26:41:20, 3.41it/s] 12%|█▏ | 43953/371472 [3:32:45<26:08:41, 3.48it/s] 12%|█▏ | 43954/371472 [3:32:45<25:17:19, 3.60it/s] 12%|█▏ | 43955/371472 [3:32:45<25:31:19, 3.56it/s] 12%|█▏ | 43956/371472 [3:32:45<24:50:45, 3.66it/s] 12%|█▏ | 43957/371472 [3:32:46<25:37:21, 3.55it/s] 12%|█▏ | 43958/371472 [3:32:46<28:00:12, 3.25it/s] 12%|█▏ | 43959/371472 [3:32:46<26:32:06, 3.43it/s] 12%|█▏ | 43960/371472 [3:32:47<28:09:38, 3.23it/s] {'loss': 4.2764, 'learning_rate': 8.939214376521794e-07, 'epoch': 1.89} + 12%|█▏ | 43960/371472 [3:32:47<28:09:38, 3.23it/s] 12%|█▏ | 43961/371472 [3:32:47<27:04:49, 3.36it/s] 12%|█▏ | 43962/371472 [3:32:47<28:09:20, 3.23it/s] 12%|█▏ | 43963/371472 [3:32:48<27:28:00, 3.31it/s] 12%|█▏ | 43964/371472 [3:32:48<27:55:44, 3.26it/s] 12%|█▏ | 43965/371472 [3:32:48<26:55:36, 3.38it/s] 12%|█▏ | 43966/371472 [3:32:48<27:03:08, 3.36it/s] 12%|█▏ | 43967/371472 [3:32:49<27:15:56, 3.34it/s] 12%|█▏ | 43968/371472 [3:32:49<26:18:19, 3.46it/s] 12%|█▏ | 43969/371472 [3:32:49<25:55:25, 3.51it/s] 12%|█▏ | 43970/371472 [3:32:50<26:08:30, 3.48it/s] 12%|█▏ | 43971/371472 [3:32:50<25:33:40, 3.56it/s] 12%|█▏ | 43972/371472 [3:32:50<28:00:48, 3.25it/s] 12%|█▏ | 43973/371472 [3:32:50<26:22:29, 3.45it/s] 12%|█▏ | 43974/371472 [3:32:51<25:47:42, 3.53it/s] 12%|█▏ | 43975/371472 [3:32:51<25:18:58, 3.59it/s] 12%|█▏ | 43976/371472 [3:32:51<25:25:51, 3.58it/s] 12%|█▏ | 43977/371472 [3:32:52<25:13:29, 3.61it/s] 12%|█▏ | 43978/371472 [3:32:52<25:12:17, 3.61it/s] 12%|█▏ | 43979/371472 [3:32:52<26:43:32, 3.40it/s] 12%|█▏ | 43980/371472 [3:32:52<26:03:54, 3.49it/s] {'loss': 4.4255, 'learning_rate': 8.938729556767006e-07, 'epoch': 1.89} + 12%|█▏ | 43980/371472 [3:32:52<26:03:54, 3.49it/s] 12%|█▏ | 43981/371472 [3:32:53<25:47:25, 3.53it/s] 12%|█▏ | 43982/371472 [3:32:53<25:54:36, 3.51it/s] 12%|█▏ | 43983/371472 [3:32:53<26:24:22, 3.44it/s] 12%|█▏ | 43984/371472 [3:32:54<27:15:38, 3.34it/s] 12%|█▏ | 43985/371472 [3:32:54<26:00:10, 3.50it/s] 12%|█▏ | 43986/371472 [3:32:54<25:46:39, 3.53it/s] 12%|█▏ | 43987/371472 [3:32:54<25:33:44, 3.56it/s] 12%|█▏ | 43988/371472 [3:32:55<25:35:33, 3.55it/s] 12%|█▏ | 43989/371472 [3:32:55<27:15:28, 3.34it/s] 12%|█▏ | 43990/371472 [3:32:55<26:24:52, 3.44it/s] 12%|█▏ | 43991/371472 [3:32:56<26:11:56, 3.47it/s] 12%|█▏ | 43992/371472 [3:32:56<25:28:25, 3.57it/s] 12%|█▏ | 43993/371472 [3:32:56<26:16:04, 3.46it/s] 12%|█▏ | 43994/371472 [3:32:56<25:25:24, 3.58it/s] 12%|█▏ | 43995/371472 [3:32:57<27:17:30, 3.33it/s] 12%|█▏ | 43996/371472 [3:32:57<27:09:27, 3.35it/s] 12%|█▏ | 43997/371472 [3:32:57<27:13:40, 3.34it/s] 12%|█▏ | 43998/371472 [3:32:58<26:24:40, 3.44it/s] 12%|█▏ | 43999/371472 [3:32:58<26:35:14, 3.42it/s] 12%|█▏ | 44000/371472 [3:32:58<26:36:03, 3.42it/s] {'loss': 4.4142, 'learning_rate': 8.938244737012217e-07, 'epoch': 1.9} + 12%|█▏ | 44000/371472 [3:32:58<26:36:03, 3.42it/s] 12%|█▏ | 44001/371472 [3:32:58<25:58:41, 3.50it/s] 12%|█▏ | 44002/371472 [3:32:59<26:05:07, 3.49it/s] 12%|█▏ | 44003/371472 [3:32:59<25:24:26, 3.58it/s] 12%|█▏ | 44004/371472 [3:32:59<24:55:29, 3.65it/s] 12%|█▏ | 44005/371472 [3:33:00<24:32:10, 3.71it/s] 12%|█▏ | 44006/371472 [3:33:00<24:43:34, 3.68it/s] 12%|█▏ | 44007/371472 [3:33:00<24:32:52, 3.71it/s] 12%|█▏ | 44008/371472 [3:33:00<25:40:38, 3.54it/s] 12%|█▏ | 44009/371472 [3:33:01<25:06:58, 3.62it/s] 12%|█▏ | 44010/371472 [3:33:01<24:39:40, 3.69it/s] 12%|█▏ | 44011/371472 [3:33:01<24:56:38, 3.65it/s] 12%|█▏ | 44012/371472 [3:33:01<25:04:51, 3.63it/s] 12%|█▏ | 44013/371472 [3:33:02<24:46:06, 3.67it/s] 12%|█▏ | 44014/371472 [3:33:02<25:17:02, 3.60it/s] 12%|█▏ | 44015/371472 [3:33:02<25:02:36, 3.63it/s] 12%|█▏ | 44016/371472 [3:33:03<25:15:33, 3.60it/s] 12%|█▏ | 44017/371472 [3:33:03<24:38:45, 3.69it/s] 12%|█▏ | 44018/371472 [3:33:03<24:48:55, 3.67it/s] 12%|█▏ | 44019/371472 [3:33:03<24:39:15, 3.69it/s] 12%|█▏ | 44020/371472 [3:33:04<23:59:52, 3.79it/s] {'loss': 4.6664, 'learning_rate': 8.937759917257429e-07, 'epoch': 1.9} + 12%|█▏ | 44020/371472 [3:33:04<23:59:52, 3.79it/s] 12%|█▏ | 44021/371472 [3:33:04<24:24:43, 3.73it/s] 12%|█▏ | 44022/371472 [3:33:04<24:55:08, 3.65it/s] 12%|█▏ | 44023/371472 [3:33:04<24:58:47, 3.64it/s] 12%|█▏ | 44024/371472 [3:33:05<25:15:47, 3.60it/s] 12%|█▏ | 44025/371472 [3:33:05<25:06:44, 3.62it/s] 12%|█▏ | 44026/371472 [3:33:05<25:16:09, 3.60it/s] 12%|█▏ | 44027/371472 [3:33:06<25:20:16, 3.59it/s] 12%|█▏ | 44028/371472 [3:33:06<25:00:57, 3.64it/s] 12%|█▏ | 44029/371472 [3:33:06<26:28:03, 3.44it/s] 12%|█▏ | 44030/371472 [3:33:06<25:38:55, 3.55it/s] 12%|█▏ | 44031/371472 [3:33:07<25:30:23, 3.57it/s] 12%|█▏ | 44032/371472 [3:33:07<26:58:42, 3.37it/s] 12%|█▏ | 44033/371472 [3:33:07<25:41:13, 3.54it/s] 12%|█▏ | 44034/371472 [3:33:08<25:39:50, 3.54it/s] 12%|█▏ | 44035/371472 [3:33:08<25:59:44, 3.50it/s] 12%|█▏ | 44036/371472 [3:33:08<25:10:20, 3.61it/s] 12%|█▏ | 44037/371472 [3:33:09<28:24:09, 3.20it/s] 12%|█▏ | 44038/371472 [3:33:09<27:26:20, 3.31it/s] 12%|█▏ | 44039/371472 [3:33:09<26:07:46, 3.48it/s] 12%|█▏ | 44040/371472 [3:33:09<26:28:15, 3.44it/s] {'loss': 4.4711, 'learning_rate': 8.937275097502637e-07, 'epoch': 1.9} + 12%|█▏ | 44040/371472 [3:33:09<26:28:15, 3.44it/s] 12%|█▏ | 44041/371472 [3:33:10<27:51:56, 3.26it/s] 12%|█▏ | 44042/371472 [3:33:10<26:39:22, 3.41it/s] 12%|█▏ | 44043/371472 [3:33:10<27:29:38, 3.31it/s] 12%|█▏ | 44044/371472 [3:33:11<27:12:00, 3.34it/s] 12%|█▏ | 44045/371472 [3:33:11<25:59:27, 3.50it/s] 12%|█▏ | 44046/371472 [3:33:11<25:53:24, 3.51it/s] 12%|█▏ | 44047/371472 [3:33:11<25:42:20, 3.54it/s] 12%|█▏ | 44048/371472 [3:33:12<26:05:33, 3.49it/s] 12%|█▏ | 44049/371472 [3:33:12<26:06:46, 3.48it/s] 12%|█▏ | 44050/371472 [3:33:12<26:06:17, 3.48it/s] 12%|█▏ | 44051/371472 [3:33:13<26:45:49, 3.40it/s] 12%|█▏ | 44052/371472 [3:33:13<26:44:56, 3.40it/s] 12%|█▏ | 44053/371472 [3:33:13<25:46:20, 3.53it/s] 12%|█▏ | 44054/371472 [3:33:13<26:35:25, 3.42it/s] 12%|█▏ | 44055/371472 [3:33:14<26:54:49, 3.38it/s] 12%|█▏ | 44056/371472 [3:33:14<25:46:21, 3.53it/s] 12%|█▏ | 44057/371472 [3:33:14<28:18:14, 3.21it/s] 12%|█▏ | 44058/371472 [3:33:15<27:13:15, 3.34it/s] 12%|█▏ | 44059/371472 [3:33:15<25:49:08, 3.52it/s] 12%|█▏ | 44060/371472 [3:33:15<26:07:18, 3.48it/s] {'loss': 4.2542, 'learning_rate': 8.93679027774785e-07, 'epoch': 1.9} + 12%|█▏ | 44060/371472 [3:33:15<26:07:18, 3.48it/s] 12%|█▏ | 44061/371472 [3:33:15<25:13:03, 3.61it/s] 12%|█▏ | 44062/371472 [3:33:16<24:47:59, 3.67it/s] 12%|█▏ | 44063/371472 [3:33:16<24:57:11, 3.64it/s] 12%|█▏ | 44064/371472 [3:33:16<24:48:47, 3.67it/s] 12%|█��� | 44065/371472 [3:33:17<24:53:26, 3.65it/s] 12%|█▏ | 44066/371472 [3:33:17<25:33:21, 3.56it/s] 12%|█▏ | 44067/371472 [3:33:17<26:41:05, 3.41it/s] 12%|█▏ | 44068/371472 [3:33:17<26:08:04, 3.48it/s] 12%|█▏ | 44069/371472 [3:33:18<26:20:27, 3.45it/s] 12%|█▏ | 44070/371472 [3:33:18<28:13:19, 3.22it/s] 12%|█▏ | 44071/371472 [3:33:19<31:40:53, 2.87it/s] 12%|█▏ | 44072/371472 [3:33:19<30:06:31, 3.02it/s] 12%|█▏ | 44073/371472 [3:33:19<30:17:58, 3.00it/s] 12%|█▏ | 44074/371472 [3:33:19<28:02:35, 3.24it/s] 12%|█▏ | 44075/371472 [3:33:20<27:16:16, 3.33it/s] 12%|█▏ | 44076/371472 [3:33:20<26:04:37, 3.49it/s] 12%|█▏ | 44077/371472 [3:33:20<25:40:27, 3.54it/s] 12%|█▏ | 44078/371472 [3:33:21<26:06:54, 3.48it/s] 12%|█▏ | 44079/371472 [3:33:21<26:17:40, 3.46it/s] 12%|█▏ | 44080/371472 [3:33:21<25:30:18, 3.57it/s] {'loss': 4.2999, 'learning_rate': 8.936305457993061e-07, 'epoch': 1.9} + 12%|█▏ | 44080/371472 [3:33:21<25:30:18, 3.57it/s] 12%|█▏ | 44081/371472 [3:33:21<24:55:24, 3.65it/s] 12%|█▏ | 44082/371472 [3:33:22<26:42:59, 3.40it/s] 12%|█▏ | 44083/371472 [3:33:22<26:11:05, 3.47it/s] 12%|█▏ | 44084/371472 [3:33:22<28:03:24, 3.24it/s] 12%|█▏ | 44085/371472 [3:33:23<26:59:42, 3.37it/s] 12%|█▏ | 44086/371472 [3:33:23<26:28:56, 3.43it/s] 12%|█▏ | 44087/371472 [3:33:23<25:43:12, 3.54it/s] 12%|█▏ | 44088/371472 [3:33:23<26:57:02, 3.37it/s] 12%|█▏ | 44089/371472 [3:33:24<26:37:31, 3.42it/s] 12%|█▏ | 44090/371472 [3:33:24<27:36:09, 3.29it/s] 12%|█▏ | 44091/371472 [3:33:24<27:06:35, 3.35it/s] 12%|█▏ | 44092/371472 [3:33:25<26:02:26, 3.49it/s] 12%|█▏ | 44093/371472 [3:33:25<25:03:04, 3.63it/s] 12%|█▏ | 44094/371472 [3:33:25<27:28:27, 3.31it/s] 12%|█▏ | 44095/371472 [3:33:26<27:14:51, 3.34it/s] 12%|█▏ | 44096/371472 [3:33:26<28:19:51, 3.21it/s] 12%|█▏ | 44097/371472 [3:33:26<28:21:37, 3.21it/s] 12%|█▏ | 44098/371472 [3:33:26<27:04:46, 3.36it/s] 12%|█▏ | 44099/371472 [3:33:27<27:17:44, 3.33it/s] 12%|█▏ | 44100/371472 [3:33:27<27:05:54, 3.36it/s] {'loss': 4.5732, 'learning_rate': 8.935820638238273e-07, 'epoch': 1.9} + 12%|█▏ | 44100/371472 [3:33:27<27:05:54, 3.36it/s] 12%|█▏ | 44101/371472 [3:33:27<27:48:58, 3.27it/s] 12%|█▏ | 44102/371472 [3:33:28<27:36:19, 3.29it/s] 12%|█▏ | 44103/371472 [3:33:28<27:11:31, 3.34it/s] 12%|█▏ | 44104/371472 [3:33:28<26:33:29, 3.42it/s] 12%|█▏ | 44105/371472 [3:33:29<27:17:47, 3.33it/s] 12%|█▏ | 44106/371472 [3:33:29<26:23:19, 3.45it/s] 12%|█▏ | 44107/371472 [3:33:29<25:49:26, 3.52it/s] 12%|█▏ | 44108/371472 [3:33:29<25:55:38, 3.51it/s] 12%|█▏ | 44109/371472 [3:33:30<25:48:31, 3.52it/s] 12%|█▏ | 44110/371472 [3:33:30<27:24:15, 3.32it/s] 12%|█▏ | 44111/371472 [3:33:30<27:42:36, 3.28it/s] 12%|█▏ | 44112/371472 [3:33:31<27:19:59, 3.33it/s] 12%|█▏ | 44113/371472 [3:33:31<26:24:52, 3.44it/s] 12%|█▏ | 44114/371472 [3:33:31<25:56:55, 3.50it/s] 12%|█▏ | 44115/371472 [3:33:31<25:49:06, 3.52it/s] 12%|█▏ | 44116/371472 [3:33:32<26:34:13, 3.42it/s] 12%|█▏ | 44117/371472 [3:33:32<25:22:51, 3.58it/s] 12%|█▏ | 44118/371472 [3:33:32<24:49:18, 3.66it/s] 12%|█▏ | 44119/371472 [3:33:32<24:30:56, 3.71it/s] 12%|█▏ | 44120/371472 [3:33:33<25:16:12, 3.60it/s] {'loss': 4.4929, 'learning_rate': 8.935335818483483e-07, 'epoch': 1.9} + 12%|█▏ | 44120/371472 [3:33:33<25:16:12, 3.60it/s] 12%|█▏ | 44121/371472 [3:33:33<26:50:29, 3.39it/s] 12%|█▏ | 44122/371472 [3:33:33<26:56:57, 3.37it/s] 12%|█▏ | 44123/371472 [3:33:34<27:01:50, 3.36it/s] 12%|█▏ | 44124/371472 [3:33:34<27:45:44, 3.28it/s] 12%|█▏ | 44125/371472 [3:33:34<27:46:30, 3.27it/s] 12%|█▏ | 44126/371472 [3:33:35<26:09:37, 3.48it/s] 12%|█▏ | 44127/371472 [3:33:35<26:12:45, 3.47it/s] 12%|█▏ | 44128/371472 [3:33:35<26:49:48, 3.39it/s] 12%|█▏ | 44129/371472 [3:33:36<28:11:43, 3.22it/s] 12%|█▏ | 44130/371472 [3:33:36<28:15:34, 3.22it/s] 12%|█▏ | 44131/371472 [3:33:36<27:57:14, 3.25it/s] 12%|█▏ | 44132/371472 [3:33:36<27:12:56, 3.34it/s] 12%|█▏ | 44133/371472 [3:33:37<26:38:45, 3.41it/s] 12%|█▏ | 44134/371472 [3:33:37<26:21:15, 3.45it/s] 12%|█▏ | 44135/371472 [3:33:37<25:56:57, 3.50it/s] 12%|█▏ | 44136/371472 [3:33:38<26:46:26, 3.40it/s] 12%|█▏ | 44137/371472 [3:33:38<27:42:00, 3.28it/s] 12%|█▏ | 44138/371472 [3:33:38<27:12:56, 3.34it/s] 12%|█▏ | 44139/371472 [3:33:38<26:49:13, 3.39it/s] 12%|█▏ | 44140/371472 [3:33:39<26:12:37, 3.47it/s] {'loss': 4.2182, 'learning_rate': 8.934850998728695e-07, 'epoch': 1.9} + 12%|█▏ | 44140/371472 [3:33:39<26:12:37, 3.47it/s] 12%|█▏ | 44141/371472 [3:33:39<26:49:39, 3.39it/s] 12%|█▏ | 44142/371472 [3:33:39<27:03:28, 3.36it/s] 12%|█▏ | 44143/371472 [3:33:40<27:14:46, 3.34it/s] 12%|█▏ | 44144/371472 [3:33:40<26:17:02, 3.46it/s] 12%|█▏ | 44145/371472 [3:33:40<25:55:13, 3.51it/s] 12%|█▏ | 44146/371472 [3:33:40<25:30:29, 3.56it/s] 12%|█▏ | 44147/371472 [3:33:41<25:12:52, 3.61it/s] 12%|█▏ | 44148/371472 [3:33:41<25:28:38, 3.57it/s] 12%|█▏ | 44149/371472 [3:33:41<24:36:55, 3.69it/s] 12%|█▏ | 44150/371472 [3:33:42<24:45:54, 3.67it/s] 12%|█▏ | 44151/371472 [3:33:42<24:29:40, 3.71it/s] 12%|█▏ | 44152/371472 [3:33:42<23:57:01, 3.80it/s] 12%|█▏ | 44153/371472 [3:33:42<24:48:41, 3.66it/s] 12%|█▏ | 44154/371472 [3:33:43<25:21:20, 3.59it/s] 12%|█▏ | 44155/371472 [3:33:43<24:59:37, 3.64it/s] 12%|█▏ | 44156/371472 [3:33:43<25:06:17, 3.62it/s] 12%|█▏ | 44157/371472 [3:33:44<25:37:47, 3.55it/s] 12%|█▏ | 44158/371472 [3:33:44<25:21:31, 3.59it/s] 12%|█▏ | 44159/371472 [3:33:44<26:05:49, 3.48it/s] 12%|█▏ | 44160/371472 [3:33:44<25:02:08, 3.63it/s] {'loss': 4.4137, 'learning_rate': 8.934366178973905e-07, 'epoch': 1.9} + 12%|█▏ | 44160/371472 [3:33:44<25:02:08, 3.63it/s] 12%|█▏ | 44161/371472 [3:33:45<24:33:35, 3.70it/s] 12%|█▏ | 44162/371472 [3:33:45<25:24:38, 3.58it/s] 12%|█▏ | 44163/371472 [3:33:45<25:11:17, 3.61it/s] 12%|█▏ | 44164/371472 [3:33:45<26:08:20, 3.48it/s] 12%|█▏ | 44165/371472 [3:33:46<26:44:28, 3.40it/s] 12%|█▏ | 44166/371472 [3:33:46<26:00:24, 3.50it/s] 12%|█▏ | 44167/371472 [3:33:46<25:41:50, 3.54it/s] 12%|█▏ | 44168/371472 [3:33:47<26:50:13, 3.39it/s] 12%|█▏ | 44169/371472 [3:33:47<26:05:54, 3.48it/s] 12%|█▏ | 44170/371472 [3:33:47<26:02:14, 3.49it/s] 12%|█▏ | 44171/371472 [3:33:47<25:55:34, 3.51it/s] 12%|█▏ | 44172/371472 [3:33:48<25:23:31, 3.58it/s] 12%|█▏ | 44173/371472 [3:33:48<25:50:28, 3.52it/s] 12%|█▏ | 44174/371472 [3:33:48<27:32:04, 3.30it/s] 12%|█▏ | 44175/371472 [3:33:49<26:24:29, 3.44it/s] 12%|█▏ | 44176/371472 [3:33:49<25:39:26, 3.54it/s] 12%|█▏ | 44177/371472 [3:33:49<25:56:43, 3.50it/s] 12%|█▏ | 44178/371472 [3:33:50<27:54:41, 3.26it/s] 12%|█▏ | 44179/371472 [3:33:50<27:49:17, 3.27it/s] 12%|█▏ | 44180/371472 [3:33:50<28:52:15, 3.15it/s] {'loss': 4.3164, 'learning_rate': 8.933881359219117e-07, 'epoch': 1.9} + 12%|█▏ | 44180/371472 [3:33:50<28:52:15, 3.15it/s] 12%|█▏ | 44181/371472 [3:33:51<29:21:56, 3.10it/s] 12%|█▏ | 44182/371472 [3:33:51<27:30:40, 3.30it/s] 12%|█▏ | 44183/371472 [3:33:51<28:11:40, 3.22it/s] 12%|█▏ | 44184/371472 [3:33:51<27:52:15, 3.26it/s] 12%|█▏ | 44185/371472 [3:33:52<27:11:26, 3.34it/s] 12%|█▏ | 44186/371472 [3:33:52<26:43:42, 3.40it/s] 12%|█▏ | 44187/371472 [3:33:52<27:54:30, 3.26it/s] 12%|█▏ | 44188/371472 [3:33:53<27:18:15, 3.33it/s] 12%|█▏ | 44189/371472 [3:33:53<26:27:26, 3.44it/s] 12%|█▏ | 44190/371472 [3:33:53<26:27:37, 3.44it/s] 12%|█▏ | 44191/371472 [3:33:53<26:05:44, 3.48it/s] 12%|█▏ | 44192/371472 [3:33:54<26:14:33, 3.46it/s] 12%|█▏ | 44193/371472 [3:33:54<27:30:39, 3.30it/s] 12%|█▏ | 44194/371472 [3:33:54<26:51:25, 3.38it/s] 12%|█▏ | 44195/371472 [3:33:55<26:08:49, 3.48it/s] 12%|█▏ | 44196/371472 [3:33:55<25:49:10, 3.52it/s] 12%|█▏ | 44197/371472 [3:33:55<25:21:11, 3.59it/s] 12%|█▏ | 44198/371472 [3:33:55<25:38:12, 3.55it/s] 12%|█▏ | 44199/371472 [3:33:56<25:47:47, 3.52it/s] 12%|█▏ | 44200/371472 [3:33:56<26:39:07, 3.41it/s] {'loss': 4.1988, 'learning_rate': 8.933396539464327e-07, 'epoch': 1.9} + 12%|█▏ | 44200/371472 [3:33:56<26:39:07, 3.41it/s] 12%|█▏ | 44201/371472 [3:33:56<25:35:29, 3.55it/s] 12%|█▏ | 44202/371472 [3:33:57<25:13:01, 3.61it/s] 12%|█▏ | 44203/371472 [3:33:57<24:24:36, 3.72it/s] 12%|█▏ | 44204/371472 [3:33:57<24:52:17, 3.66it/s] 12%|█▏ | 44205/371472 [3:33:57<26:24:24, 3.44it/s] 12%|█▏ | 44206/371472 [3:33:58<26:40:09, 3.41it/s] 12%|█▏ | 44207/371472 [3:33:58<25:38:52, 3.54it/s] 12%|█▏ | 44208/371472 [3:33:58<25:24:07, 3.58it/s] 12%|█▏ | 44209/371472 [3:33:59<24:44:00, 3.68it/s] 12%|█▏ | 44210/371472 [3:33:59<24:46:43, 3.67it/s] 12%|█▏ | 44211/371472 [3:33:59<28:04:31, 3.24it/s] 12%|█▏ | 44212/371472 [3:33:59<26:58:36, 3.37it/s] 12%|█▏ | 44213/371472 [3:34:00<26:17:55, 3.46it/s] 12%|█▏ | 44214/371472 [3:34:00<27:07:00, 3.35it/s] 12%|█▏ | 44215/371472 [3:34:00<26:39:54, 3.41it/s] 12%|█▏ | 44216/371472 [3:34:01<27:03:11, 3.36it/s] 12%|█▏ | 44217/371472 [3:34:01<26:08:41, 3.48it/s] 12%|█▏ | 44218/371472 [3:34:01<26:18:46, 3.45it/s] 12%|█▏ | 44219/371472 [3:34:01<25:56:41, 3.50it/s] 12%|█▏ | 44220/371472 [3:34:02<27:02:44, 3.36it/s] {'loss': 4.281, 'learning_rate': 8.932911719709539e-07, 'epoch': 1.9} + 12%|█▏ | 44220/371472 [3:34:02<27:02:44, 3.36it/s] 12%|█▏ | 44221/371472 [3:34:02<25:57:56, 3.50it/s] 12%|█▏ | 44222/371472 [3:34:02<27:57:02, 3.25it/s] 12%|█▏ | 44223/371472 [3:34:03<27:52:30, 3.26it/s] 12%|█▏ | 44224/371472 [3:34:03<26:59:26, 3.37it/s] 12%|█▏ | 44225/371472 [3:34:03<27:23:10, 3.32it/s] 12%|█▏ | 44226/371472 [3:34:04<26:11:05, 3.47it/s] 12%|█▏ | 44227/371472 [3:34:04<24:43:37, 3.68it/s] 12%|█▏ | 44228/371472 [3:34:04<27:07:34, 3.35it/s] 12%|█▏ | 44229/371472 [3:34:04<27:00:21, 3.37it/s] 12%|█▏ | 44230/371472 [3:34:05<26:19:21, 3.45it/s] 12%|█▏ | 44231/371472 [3:34:05<27:16:55, 3.33it/s] 12%|█▏ | 44232/371472 [3:34:05<25:46:35, 3.53it/s] 12%|█▏ | 44233/371472 [3:34:06<27:19:21, 3.33it/s] 12%|█▏ | 44234/371472 [3:34:06<25:53:30, 3.51it/s] 12%|█▏ | 44235/371472 [3:34:06<25:03:31, 3.63it/s] 12%|█▏ | 44236/371472 [3:34:06<24:18:27, 3.74it/s] 12%|█▏ | 44237/371472 [3:34:07<25:50:25, 3.52it/s] 12%|█▏ | 44238/371472 [3:34:07<25:08:19, 3.62it/s] 12%|█▏ | 44239/371472 [3:34:07<25:51:30, 3.52it/s] 12%|█▏ | 44240/371472 [3:34:08<25:37:28, 3.55it/s] {'loss': 4.5675, 'learning_rate': 8.93242689995475e-07, 'epoch': 1.91} + 12%|█▏ | 44240/371472 [3:34:08<25:37:28, 3.55it/s] 12%|█▏ | 44241/371472 [3:34:08<25:22:28, 3.58it/s] 12%|█▏ | 44242/371472 [3:34:08<24:49:34, 3.66it/s] 12%|█▏ | 44243/371472 [3:34:08<26:09:16, 3.48it/s] 12%|█▏ | 44244/371472 [3:34:09<28:29:42, 3.19it/s] 12%|█▏ | 44245/371472 [3:34:09<27:41:25, 3.28it/s] 12%|█▏ | 44246/371472 [3:34:09<27:02:59, 3.36it/s] 12%|█▏ | 44247/371472 [3:34:10<26:07:53, 3.48it/s] 12%|█▏ | 44248/371472 [3:34:10<26:37:46, 3.41it/s] 12%|█▏ | 44249/371472 [3:34:10<26:01:02, 3.49it/s] 12%|█▏ | 44250/371472 [3:34:10<25:46:12, 3.53it/s] 12%|█▏ | 44251/371472 [3:34:11<26:04:13, 3.49it/s] 12%|█▏ | 44252/371472 [3:34:11<24:48:07, 3.66it/s] 12%|█▏ | 44253/371472 [3:34:11<24:25:22, 3.72it/s] 12%|█▏ | 44254/371472 [3:34:12<23:46:42, 3.82it/s] 12%|█▏ | 44255/371472 [3:34:12<24:56:05, 3.65it/s] 12%|█▏ | 44256/371472 [3:34:12<25:37:28, 3.55it/s] 12%|█▏ | 44257/371472 [3:34:12<25:08:55, 3.61it/s] 12%|█▏ | 44258/371472 [3:34:13<25:02:15, 3.63it/s] 12%|█▏ | 44259/371472 [3:34:13<24:43:03, 3.68it/s] 12%|█▏ | 44260/371472 [3:34:13<24:19:45, 3.74it/s] {'loss': 4.4851, 'learning_rate': 8.931942080199962e-07, 'epoch': 1.91} + 12%|█▏ | 44260/371472 [3:34:13<24:19:45, 3.74it/s] 12%|█▏ | 44261/371472 [3:34:13<24:29:17, 3.71it/s] 12%|█▏ | 44262/371472 [3:34:14<26:23:28, 3.44it/s] 12%|█▏ | 44263/371472 [3:34:14<25:47:16, 3.52it/s] 12%|█▏ | 44264/371472 [3:34:14<26:39:55, 3.41it/s] 12%|█▏ | 44265/371472 [3:34:15<25:58:20, 3.50it/s] 12%|█▏ | 44266/371472 [3:34:15<26:06:55, 3.48it/s] 12%|█▏ | 44267/371472 [3:34:15<25:24:58, 3.58it/s] 12%|█▏ | 44268/371472 [3:34:15<24:36:50, 3.69it/s] 12%|█▏ | 44269/371472 [3:34:16<23:59:06, 3.79it/s] 12%|█▏ | 44270/371472 [3:34:16<27:05:55, 3.35it/s] 12%|█▏ | 44271/371472 [3:34:16<27:35:31, 3.29it/s] 12%|█▏ | 44272/371472 [3:34:17<26:06:29, 3.48it/s] 12%|█▏ | 44273/371472 [3:34:17<25:27:10, 3.57it/s] 12%|█▏ | 44274/371472 [3:34:17<26:10:20, 3.47it/s] 12%|█▏ | 44275/371472 [3:34:17<25:24:24, 3.58it/s] 12%|█▏ | 44276/371472 [3:34:18<25:15:56, 3.60it/s] 12%|█▏ | 44277/371472 [3:34:18<24:58:32, 3.64it/s] 12%|█▏ | 44278/371472 [3:34:18<24:55:59, 3.65it/s] 12%|█▏ | 44279/371472 [3:34:19<25:16:36, 3.60it/s] 12%|█▏ | 44280/371472 [3:34:19<24:51:13, 3.66it/s] {'loss': 4.5483, 'learning_rate': 8.931457260445172e-07, 'epoch': 1.91} + 12%|█▏ | 44280/371472 [3:34:19<24:51:13, 3.66it/s] 12%|█▏ | 44281/371472 [3:34:19<25:06:31, 3.62it/s] 12%|█▏ | 44282/371472 [3:34:19<25:37:43, 3.55it/s] 12%|█▏ | 44283/371472 [3:34:20<28:24:24, 3.20it/s] 12%|█▏ | 44284/371472 [3:34:20<28:19:08, 3.21it/s] 12%|█▏ | 44285/371472 [3:34:20<26:47:21, 3.39it/s] 12%|█▏ | 44286/371472 [3:34:21<26:13:18, 3.47it/s] 12%|█▏ | 44287/371472 [3:34:21<29:17:31, 3.10it/s] 12%|█▏ | 44288/371472 [3:34:21<27:51:54, 3.26it/s] 12%|█▏ | 44289/371472 [3:34:22<27:13:59, 3.34it/s] 12%|█▏ | 44290/371472 [3:34:22<26:01:21, 3.49it/s] 12%|█▏ | 44291/371472 [3:34:22<25:36:59, 3.55it/s] 12%|█▏ | 44292/371472 [3:34:22<25:34:27, 3.55it/s] 12%|█▏ | 44293/371472 [3:34:23<26:22:51, 3.45it/s] 12%|█▏ | 44294/371472 [3:34:23<25:34:39, 3.55it/s] 12%|█▏ | 44295/371472 [3:34:23<25:26:07, 3.57it/s] 12%|█▏ | 44296/371472 [3:34:24<26:34:09, 3.42it/s] 12%|█▏ | 44297/371472 [3:34:24<27:21:29, 3.32it/s] 12%|█▏ | 44298/371472 [3:34:24<26:09:41, 3.47it/s] 12%|█▏ | 44299/371472 [3:34:24<25:12:56, 3.60it/s] 12%|█▏ | 44300/371472 [3:34:25<24:58:43, 3.64it/s] {'loss': 4.3183, 'learning_rate': 8.930972440690383e-07, 'epoch': 1.91} + 12%|█▏ | 44300/371472 [3:34:25<24:58:43, 3.64it/s] 12%|█▏ | 44301/371472 [3:34:25<24:22:30, 3.73it/s] 12%|█▏ | 44302/371472 [3:34:25<24:31:57, 3.70it/s] 12%|█▏ | 44303/371472 [3:34:25<24:40:09, 3.68it/s] 12%|█▏ | 44304/371472 [3:34:26<24:53:12, 3.65it/s] 12%|█▏ | 44305/371472 [3:34:26<25:09:17, 3.61it/s] 12%|█▏ | 44306/371472 [3:34:26<25:14:55, 3.60it/s] 12%|█▏ | 44307/371472 [3:34:27<25:36:22, 3.55it/s] 12%|█▏ | 44308/371472 [3:34:27<25:08:20, 3.62it/s] 12%|█▏ | 44309/371472 [3:34:27<28:08:33, 3.23it/s] 12%|█▏ | 44310/371472 [3:34:28<26:43:12, 3.40it/s] 12%|█▏ | 44311/371472 [3:34:28<25:34:55, 3.55it/s] 12%|█▏ | 44312/371472 [3:34:28<26:35:35, 3.42it/s] 12%|█▏ | 44313/371472 [3:34:28<25:33:17, 3.56it/s] 12%|█▏ | 44314/371472 [3:34:29<25:10:57, 3.61it/s] 12%|█▏ | 44315/371472 [3:34:29<25:46:08, 3.53it/s] 12%|█▏ | 44316/371472 [3:34:29<25:45:03, 3.53it/s] 12%|█▏ | 44317/371472 [3:34:30<27:43:55, 3.28it/s] 12%|█▏ | 44318/371472 [3:34:30<26:26:37, 3.44it/s] 12%|█▏ | 44319/371472 [3:34:30<26:19:07, 3.45it/s] 12%|█▏ | 44320/371472 [3:34:30<26:36:56, 3.41it/s] {'loss': 4.5662, 'learning_rate': 8.930487620935594e-07, 'epoch': 1.91} + 12%|█▏ | 44320/371472 [3:34:30<26:36:56, 3.41it/s] 12%|█▏ | 44321/371472 [3:34:31<26:05:46, 3.48it/s] 12%|█▏ | 44322/371472 [3:34:31<25:24:05, 3.58it/s] 12%|█▏ | 44323/371472 [3:34:31<24:59:08, 3.64it/s] 12%|█▏ | 44324/371472 [3:34:31<25:11:36, 3.61it/s] 12%|█▏ | 44325/371472 [3:34:32<25:36:09, 3.55it/s] 12%|█▏ | 44326/371472 [3:34:32<25:04:08, 3.62it/s] 12%|█▏ | 44327/371472 [3:34:32<25:28:50, 3.57it/s] 12%|█▏ | 44328/371472 [3:34:33<25:03:26, 3.63it/s] 12%|█▏ | 44329/371472 [3:34:33<25:30:13, 3.56it/s] 12%|█▏ | 44330/371472 [3:34:33<26:29:49, 3.43it/s] 12%|█▏ | 44331/371472 [3:34:33<26:16:26, 3.46it/s] 12%|█▏ | 44332/371472 [3:34:34<26:25:14, 3.44it/s] 12%|█▏ | 44333/371472 [3:34:34<26:09:17, 3.47it/s] 12%|█▏ | 44334/371472 [3:34:34<26:23:17, 3.44it/s] 12%|█▏ | 44335/371472 [3:34:35<27:26:21, 3.31it/s] 12%|█▏ | 44336/371472 [3:34:35<26:46:20, 3.39it/s] 12%|█▏ | 44337/371472 [3:34:35<26:09:11, 3.47it/s] 12%|█▏ | 44338/371472 [3:34:35<25:56:15, 3.50it/s] 12%|█▏ | 44339/371472 [3:34:36<27:06:07, 3.35it/s] 12%|█▏ | 44340/371472 [3:34:36<25:51:22, 3.51it/s] {'loss': 4.1618, 'learning_rate': 8.930002801180805e-07, 'epoch': 1.91} + 12%|█▏ | 44340/371472 [3:34:36<25:51:22, 3.51it/s] 12%|█▏ | 44341/371472 [3:34:36<25:48:10, 3.52it/s] 12%|█▏ | 44342/371472 [3:34:37<25:03:19, 3.63it/s] 12%|█▏ | 44343/371472 [3:34:37<24:47:55, 3.66it/s] 12%|█▏ | 44344/371472 [3:34:37<24:45:29, 3.67it/s] 12%|█▏ | 44345/371472 [3:34:37<25:02:57, 3.63it/s] 12%|█▏ | 44346/371472 [3:34:38<25:26:31, 3.57it/s] 12%|█▏ | 44347/371472 [3:34:38<25:12:59, 3.60it/s] 12%|█▏ | 44348/371472 [3:34:38<24:06:45, 3.77it/s] 12%|█▏ | 44349/371472 [3:34:38<23:43:15, 3.83it/s] 12%|█▏ | 44350/371472 [3:34:39<25:09:52, 3.61it/s] 12%|█▏ | 44351/371472 [3:34:39<26:13:13, 3.47it/s] 12%|█▏ | 44352/371472 [3:34:39<27:17:41, 3.33it/s] 12%|█▏ | 44353/371472 [3:34:40<26:36:20, 3.42it/s] 12%|█▏ | 44354/371472 [3:34:40<26:46:34, 3.39it/s] 12%|█▏ | 44355/371472 [3:34:40<27:01:11, 3.36it/s] 12%|█▏ | 44356/371472 [3:34:41<25:49:31, 3.52it/s] 12%|█▏ | 44357/371472 [3:34:41<24:50:11, 3.66it/s] 12%|█▏ | 44358/371472 [3:34:41<27:25:14, 3.31it/s] 12%|█▏ | 44359/371472 [3:34:42<28:42:19, 3.17it/s] 12%|█▏ | 44360/371472 [3:34:42<28:53:51, 3.14it/s] {'loss': 4.5918, 'learning_rate': 8.929517981426016e-07, 'epoch': 1.91} + 12%|█▏ | 44360/371472 [3:34:42<28:53:51, 3.14it/s] 12%|█▏ | 44361/371472 [3:34:42<27:29:34, 3.31it/s] 12%|█▏ | 44362/371472 [3:34:42<26:36:38, 3.41it/s] 12%|█▏ | 44363/371472 [3:34:43<25:24:01, 3.58it/s] 12%|█▏ | 44364/371472 [3:34:43<24:57:45, 3.64it/s] 12%|█▏ | 44365/371472 [3:34:43<25:02:00, 3.63it/s] 12%|█▏ | 44366/371472 [3:34:43<24:28:10, 3.71it/s] 12%|█▏ | 44367/371472 [3:34:44<24:02:19, 3.78it/s] 12%|█▏ | 44368/371472 [3:34:44<24:23:21, 3.73it/s] 12%|█▏ | 44369/371472 [3:34:44<24:09:41, 3.76it/s] 12%|█▏ | 44370/371472 [3:34:44<23:39:00, 3.84it/s] 12%|█▏ | 44371/371472 [3:34:45<24:05:19, 3.77it/s] 12%|█▏ | 44372/371472 [3:34:45<24:18:29, 3.74it/s] 12%|█▏ | 44373/371472 [3:34:45<24:56:56, 3.64it/s] 12%|█▏ | 44374/371472 [3:34:46<24:20:57, 3.73it/s] 12%|█▏ | 44375/371472 [3:34:46<25:43:24, 3.53it/s] 12%|█▏ | 44376/371472 [3:34:46<25:21:20, 3.58it/s] 12%|█▏ | 44377/371472 [3:34:46<25:27:00, 3.57it/s] 12%|█▏ | 44378/371472 [3:34:47<25:11:55, 3.61it/s] 12%|█▏ | 44379/371472 [3:34:47<24:46:39, 3.67it/s] 12%|█▏ | 44380/371472 [3:34:47<25:12:07, 3.61it/s] {'loss': 4.41, 'learning_rate': 8.929033161671227e-07, 'epoch': 1.91} + 12%|█▏ | 44380/371472 [3:34:47<25:12:07, 3.61it/s] 12%|█▏ | 44381/371472 [3:34:48<25:05:29, 3.62it/s] 12%|█▏ | 44382/371472 [3:34:48<24:20:36, 3.73it/s] 12%|█▏ | 44383/371472 [3:34:48<24:49:46, 3.66it/s] 12%|█▏ | 44384/371472 [3:34:48<25:44:02, 3.53it/s] 12%|█▏ | 44385/371472 [3:34:49<25:40:10, 3.54it/s] 12%|█▏ | 44386/371472 [3:34:49<25:54:48, 3.51it/s] 12%|█▏ | 44387/371472 [3:34:49<25:36:57, 3.55it/s] 12%|█▏ | 44388/371472 [3:34:49<24:38:15, 3.69it/s] 12%|█▏ | 44389/371472 [3:34:50<26:09:09, 3.47it/s] 12%|█▏ | 44390/371472 [3:34:50<25:31:36, 3.56it/s] 12%|█▏ | 44391/371472 [3:34:50<25:25:59, 3.57it/s] 12%|█▏ | 44392/371472 [3:34:51<26:33:13, 3.42it/s] 12%|█▏ | 44393/371472 [3:34:51<26:38:29, 3.41it/s] 12%|█▏ | 44394/371472 [3:34:51<26:21:20, 3.45it/s] 12%|█▏ | 44395/371472 [3:34:52<25:51:57, 3.51it/s] 12%|█▏ | 44396/371472 [3:34:52<25:41:58, 3.54it/s] 12%|█▏ | 44397/371472 [3:34:52<25:06:58, 3.62it/s] 12%|█▏ | 44398/371472 [3:34:52<24:43:44, 3.67it/s] 12%|█▏ | 44399/371472 [3:34:53<25:26:38, 3.57it/s] 12%|█▏ | 44400/371472 [3:34:53<24:31:48, 3.70it/s] {'loss': 4.3782, 'learning_rate': 8.928548341916439e-07, 'epoch': 1.91} + 12%|█▏ | 44400/371472 [3:34:53<24:31:48, 3.70it/s] 12%|█▏ | 44401/371472 [3:34:53<25:01:47, 3.63it/s] 12%|█▏ | 44402/371472 [3:34:53<26:04:17, 3.48it/s] 12%|█▏ | 44403/371472 [3:34:54<26:34:09, 3.42it/s] 12%|█▏ | 44404/371472 [3:34:54<28:12:45, 3.22it/s] 12%|█▏ | 44405/371472 [3:34:54<28:03:50, 3.24it/s] 12%|█▏ | 44406/371472 [3:34:55<28:25:56, 3.20it/s] 12%|█▏ | 44407/371472 [3:34:55<27:17:52, 3.33it/s] 12%|█▏ | 44408/371472 [3:34:55<27:40:08, 3.28it/s] 12%|█▏ | 44409/371472 [3:34:56<26:57:32, 3.37it/s] 12%|█▏ | 44410/371472 [3:34:56<27:01:06, 3.36it/s] 12%|█▏ | 44411/371472 [3:34:56<25:54:48, 3.51it/s] 12%|█▏ | 44412/371472 [3:34:57<27:21:46, 3.32it/s] 12%|█▏ | 44413/371472 [3:34:57<25:41:19, 3.54it/s] 12%|█▏ | 44414/371472 [3:34:57<24:42:44, 3.68it/s] 12%|█▏ | 44415/371472 [3:34:57<24:26:41, 3.72it/s] 12%|█▏ | 44416/371472 [3:34:58<24:19:53, 3.73it/s] 12%|█▏ | 44417/371472 [3:34:58<26:14:17, 3.46it/s] 12%|█▏ | 44418/371472 [3:34:58<25:26:33, 3.57it/s] 12%|█▏ | 44419/371472 [3:34:58<24:35:22, 3.69it/s] 12%|█▏ | 44420/371472 [3:34:59<23:54:57, 3.80it/s] {'loss': 4.518, 'learning_rate': 8.928063522161649e-07, 'epoch': 1.91} + 12%|█▏ | 44420/371472 [3:34:59<23:54:57, 3.80it/s] 12%|█▏ | 44421/371472 [3:34:59<24:28:25, 3.71it/s] 12%|█▏ | 44422/371472 [3:34:59<25:29:49, 3.56it/s] 12%|█▏ | 44423/371472 [3:34:59<24:33:44, 3.70it/s] 12%|█▏ | 44424/371472 [3:35:00<26:38:20, 3.41it/s] 12%|█▏ | 44425/371472 [3:35:00<27:31:31, 3.30it/s] 12%|█▏ | 44426/371472 [3:35:00<26:49:49, 3.39it/s] 12%|█▏ | 44427/371472 [3:35:01<26:35:34, 3.42it/s] 12%|█▏ | 44428/371472 [3:35:01<26:10:36, 3.47it/s] 12%|█▏ | 44429/371472 [3:35:01<25:36:43, 3.55it/s] 12%|█▏ | 44430/371472 [3:35:02<27:13:56, 3.34it/s] 12%|█▏ | 44431/371472 [3:35:02<28:45:55, 3.16it/s] 12%|█▏ | 44432/371472 [3:35:02<27:42:56, 3.28it/s] 12%|█▏ | 44433/371472 [3:35:03<29:37:56, 3.07it/s] 12%|█▏ | 44434/371472 [3:35:03<27:43:26, 3.28it/s] 12%|█▏ | 44435/371472 [3:35:03<26:55:48, 3.37it/s] 12%|█▏ | 44436/371472 [3:35:03<27:28:58, 3.31it/s] 12%|█▏ | 44437/371472 [3:35:04<26:13:00, 3.47it/s] 12%|█▏ | 44438/371472 [3:35:04<25:07:37, 3.62it/s] 12%|█▏ | 44439/371472 [3:35:04<24:33:39, 3.70it/s] 12%|█▏ | 44440/371472 [3:35:04<25:04:23, 3.62it/s] {'loss': 4.4662, 'learning_rate': 8.92757870240686e-07, 'epoch': 1.91} + 12%|█▏ | 44440/371472 [3:35:04<25:04:23, 3.62it/s] 12%|█▏ | 44441/371472 [3:35:05<28:02:41, 3.24it/s] 12%|█▏ | 44442/371472 [3:35:05<27:18:43, 3.33it/s] 12%|█▏ | 44443/371472 [3:35:05<26:15:59, 3.46it/s] 12%|█▏ | 44444/371472 [3:35:06<27:18:46, 3.33it/s] 12%|█▏ | 44445/371472 [3:35:06<25:46:17, 3.52it/s] 12%|█▏ | 44446/371472 [3:35:06<25:11:42, 3.61it/s] 12%|█▏ | 44447/371472 [3:35:07<25:10:18, 3.61it/s] 12%|█▏ | 44448/371472 [3:35:07<25:04:26, 3.62it/s] 12%|█▏ | 44449/371472 [3:35:07<25:25:53, 3.57it/s] 12%|█▏ | 44450/371472 [3:35:07<25:00:53, 3.63it/s] 12%|█▏ | 44451/371472 [3:35:08<24:34:20, 3.70it/s] 12%|█▏ | 44452/371472 [3:35:08<25:13:29, 3.60it/s] 12%|█▏ | 44453/371472 [3:35:08<25:20:31, 3.58it/s] 12%|█▏ | 44454/371472 [3:35:08<26:14:35, 3.46it/s] 12%|█▏ | 44455/371472 [3:35:09<25:37:41, 3.54it/s] 12%|█▏ | 44456/371472 [3:35:09<25:08:58, 3.61it/s] 12%|█▏ | 44457/371472 [3:35:09<27:22:44, 3.32it/s] 12%|█▏ | 44458/371472 [3:35:10<26:12:05, 3.47it/s] 12%|█▏ | 44459/371472 [3:35:10<25:43:11, 3.53it/s] 12%|█▏ | 44460/371472 [3:35:10<26:10:55, 3.47it/s] {'loss': 4.2872, 'learning_rate': 8.927093882652071e-07, 'epoch': 1.91} + 12%|█▏ | 44460/371472 [3:35:10<26:10:55, 3.47it/s] 12%|█▏ | 44461/371472 [3:35:11<27:09:59, 3.34it/s] 12%|█▏ | 44462/371472 [3:35:11<27:20:48, 3.32it/s] 12%|█▏ | 44463/371472 [3:35:11<28:46:53, 3.16it/s] 12%|█▏ | 44464/371472 [3:35:12<29:07:53, 3.12it/s] 12%|█▏ | 44465/371472 [3:35:12<27:20:09, 3.32it/s] 12%|█▏ | 44466/371472 [3:35:12<28:59:09, 3.13it/s] 12%|█▏ | 44467/371472 [3:35:12<28:29:49, 3.19it/s] 12%|█▏ | 44468/371472 [3:35:13<28:03:37, 3.24it/s] 12%|█▏ | 44469/371472 [3:35:13<26:45:08, 3.40it/s] 12%|█▏ | 44470/371472 [3:35:13<28:08:11, 3.23it/s] 12%|█▏ | 44471/371472 [3:35:14<27:10:17, 3.34it/s] 12%|█▏ | 44472/371472 [3:35:14<26:58:17, 3.37it/s] 12%|█▏ | 44473/371472 [3:35:14<26:21:56, 3.45it/s] 12%|█▏ | 44474/371472 [3:35:15<27:31:46, 3.30it/s] 12%|█▏ | 44475/371472 [3:35:15<26:46:54, 3.39it/s] 12%|█▏ | 44476/371472 [3:35:15<27:15:30, 3.33it/s] 12%|█▏ | 44477/371472 [3:35:15<26:06:44, 3.48it/s] 12%|█▏ | 44478/371472 [3:35:16<25:31:19, 3.56it/s] 12%|█▏ | 44479/371472 [3:35:16<25:05:54, 3.62it/s] 12%|█▏ | 44480/371472 [3:35:16<25:58:34, 3.50it/s] {'loss': 4.3866, 'learning_rate': 8.926609062897283e-07, 'epoch': 1.92} + 12%|█▏ | 44480/371472 [3:35:16<25:58:34, 3.50it/s] 12%|█▏ | 44481/371472 [3:35:17<27:25:22, 3.31it/s] 12%|█▏ | 44482/371472 [3:35:17<26:33:14, 3.42it/s] 12%|█▏ | 44483/371472 [3:35:17<26:05:55, 3.48it/s] 12%|█▏ | 44484/371472 [3:35:17<28:03:08, 3.24it/s] 12%|█▏ | 44485/371472 [3:35:18<28:24:17, 3.20it/s] 12%|█▏ | 44486/371472 [3:35:18<26:55:09, 3.37it/s] 12%|█▏ | 44487/371472 [3:35:18<25:48:08, 3.52it/s] 12%|█▏ | 44488/371472 [3:35:19<26:20:05, 3.45it/s] 12%|█▏ | 44489/371472 [3:35:19<27:09:36, 3.34it/s] 12%|█▏ | 44490/371472 [3:35:19<28:17:08, 3.21it/s] 12%|█▏ | 44491/371472 [3:35:20<27:24:53, 3.31it/s] 12%|█▏ | 44492/371472 [3:35:20<26:38:48, 3.41it/s] 12%|█▏ | 44493/371472 [3:35:20<26:53:35, 3.38it/s] 12%|█▏ | 44494/371472 [3:35:20<26:34:59, 3.42it/s] 12%|█▏ | 44495/371472 [3:35:21<26:09:54, 3.47it/s] 12%|█▏ | 44496/371472 [3:35:21<25:19:01, 3.59it/s] 12%|█▏ | 44497/371472 [3:35:21<25:07:59, 3.61it/s] 12%|█▏ | 44498/371472 [3:35:21<24:46:40, 3.67it/s] 12%|█▏ | 44499/371472 [3:35:22<25:28:30, 3.57it/s] 12%|█▏ | 44500/371472 [3:35:22<24:43:15, 3.67it/s] {'loss': 4.436, 'learning_rate': 8.926124243142493e-07, 'epoch': 1.92} + 12%|█▏ | 44500/371472 [3:35:22<24:43:15, 3.67it/s] 12%|█▏ | 44501/371472 [3:35:22<24:24:34, 3.72it/s] 12%|█▏ | 44502/371472 [3:35:23<24:17:39, 3.74it/s] 12%|█▏ | 44503/371472 [3:35:23<24:40:31, 3.68it/s] 12%|█▏ | 44504/371472 [3:35:23<25:49:02, 3.52it/s] 12%|█▏ | 44505/371472 [3:35:23<26:07:20, 3.48it/s] 12%|█▏ | 44506/371472 [3:35:24<27:10:18, 3.34it/s] 12%|█▏ | 44507/371472 [3:35:24<26:23:00, 3.44it/s] 12%|█▏ | 44508/371472 [3:35:24<27:25:06, 3.31it/s] 12%|█▏ | 44509/371472 [3:35:25<26:51:49, 3.38it/s] 12%|█▏ | 44510/371472 [3:35:25<26:11:08, 3.47it/s] 12%|█▏ | 44511/371472 [3:35:25<25:55:39, 3.50it/s] 12%|█▏ | 44512/371472 [3:35:25<25:45:34, 3.53it/s] 12%|█▏ | 44513/371472 [3:35:26<25:20:31, 3.58it/s] 12%|█▏ | 44514/371472 [3:35:26<24:24:00, 3.72it/s] 12%|█▏ | 44515/371472 [3:35:26<24:59:07, 3.63it/s] 12%|█▏ | 44516/371472 [3:35:27<26:50:55, 3.38it/s] 12%|█▏ | 44517/371472 [3:35:27<26:30:58, 3.43it/s] 12%|█▏ | 44518/371472 [3:35:27<26:03:26, 3.49it/s] 12%|█▏ | 44519/371472 [3:35:27<24:56:33, 3.64it/s] 12%|█▏ | 44520/371472 [3:35:28<28:00:36, 3.24it/s] {'loss': 4.3438, 'learning_rate': 8.925639423387705e-07, 'epoch': 1.92} + 12%|█▏ | 44520/371472 [3:35:28<28:00:36, 3.24it/s] 12%|█▏ | 44521/371472 [3:35:28<28:23:02, 3.20it/s] 12%|█▏ | 44522/371472 [3:35:28<28:10:51, 3.22it/s] 12%|█▏ | 44523/371472 [3:35:29<26:44:38, 3.40it/s] 12%|█▏ | 44524/371472 [3:35:29<26:04:50, 3.48it/s] 12%|█▏ | 44525/371472 [3:35:29<25:44:54, 3.53it/s] 12%|█▏ | 44526/371472 [3:35:29<25:05:56, 3.62it/s] 12%|█▏ | 44527/371472 [3:35:30<25:12:34, 3.60it/s] 12%|█▏ | 44528/371472 [3:35:30<24:35:58, 3.69it/s] 12%|█▏ | 44529/371472 [3:35:30<24:28:12, 3.71it/s] 12%|█▏ | 44530/371472 [3:35:31<25:02:26, 3.63it/s] 12%|█▏ | 44531/371472 [3:35:31<24:58:44, 3.64it/s] 12%|█▏ | 44532/371472 [3:35:31<24:36:23, 3.69it/s] 12%|█▏ | 44533/371472 [3:35:31<24:29:55, 3.71it/s] 12%|█▏ | 44534/371472 [3:35:32<25:11:14, 3.61it/s] 12%|█▏ | 44535/371472 [3:35:32<27:24:34, 3.31it/s] 12%|█▏ | 44536/371472 [3:35:32<26:06:08, 3.48it/s] 12%|█▏ | 44537/371472 [3:35:33<26:00:40, 3.49it/s] 12%|█▏ | 44538/371472 [3:35:33<25:38:36, 3.54it/s] 12%|█▏ | 44539/371472 [3:35:33<26:46:52, 3.39it/s] 12%|█▏ | 44540/371472 [3:35:33<27:17:57, 3.33it/s] {'loss': 4.4794, 'learning_rate': 8.925154603632916e-07, 'epoch': 1.92} + 12%|█▏ | 44540/371472 [3:35:33<27:17:57, 3.33it/s] 12%|█▏ | 44541/371472 [3:35:34<26:55:35, 3.37it/s] 12%|█▏ | 44542/371472 [3:35:34<27:32:57, 3.30it/s] 12%|█▏ | 44543/371472 [3:35:34<27:19:58, 3.32it/s] 12%|█▏ | 44544/371472 [3:35:35<26:44:26, 3.40it/s] 12%|█▏ | 44545/371472 [3:35:35<25:53:08, 3.51it/s] 12%|█▏ | 44546/371472 [3:35:35<24:52:18, 3.65it/s] 12%|█▏ | 44547/371472 [3:35:36<26:13:30, 3.46it/s] 12%|█▏ | 44548/371472 [3:35:36<26:43:32, 3.40it/s] 12%|█▏ | 44549/371472 [3:35:36<26:29:40, 3.43it/s] 12%|█▏ | 44550/371472 [3:35:36<25:35:35, 3.55it/s] 12%|█▏ | 44551/371472 [3:35:37<25:08:11, 3.61it/s] 12%|█▏ | 44552/371472 [3:35:37<25:13:40, 3.60it/s] 12%|█▏ | 44553/371472 [3:35:37<24:54:16, 3.65it/s] 12%|█▏ | 44554/371472 [3:35:38<28:08:24, 3.23it/s] 12%|█▏ | 44555/371472 [3:35:38<27:12:27, 3.34it/s] 12%|█▏ | 44556/371472 [3:35:38<26:09:17, 3.47it/s] 12%|█▏ | 44557/371472 [3:35:38<25:49:14, 3.52it/s] 12%|█▏ | 44558/371472 [3:35:39<24:54:04, 3.65it/s] 12%|█▏ | 44559/371472 [3:35:39<24:42:58, 3.67it/s] 12%|█▏ | 44560/371472 [3:35:39<25:49:59, 3.52it/s] {'loss': 4.2837, 'learning_rate': 8.924669783878125e-07, 'epoch': 1.92} + 12%|█▏ | 44560/371472 [3:35:39<25:49:59, 3.52it/s] 12%|█▏ | 44561/371472 [3:35:40<26:57:05, 3.37it/s] 12%|█▏ | 44562/371472 [3:35:40<29:00:12, 3.13it/s] 12%|█▏ | 44563/371472 [3:35:40<28:05:15, 3.23it/s] 12%|█▏ | 44564/371472 [3:35:40<27:08:53, 3.34it/s] 12%|█▏ | 44565/371472 [3:35:41<27:10:44, 3.34it/s] 12%|█▏ | 44566/371472 [3:35:41<26:54:41, 3.37it/s] 12%|█▏ | 44567/371472 [3:35:41<26:12:00, 3.47it/s] 12%|█▏ | 44568/371472 [3:35:42<28:24:59, 3.20it/s] 12%|█▏ | 44569/371472 [3:35:42<27:32:15, 3.30it/s] 12%|█▏ | 44570/371472 [3:35:42<29:05:16, 3.12it/s] 12%|█▏ | 44571/371472 [3:35:43<31:01:18, 2.93it/s] 12%|█▏ | 44572/371472 [3:35:43<30:11:27, 3.01it/s] 12%|█▏ | 44573/371472 [3:35:43<31:07:29, 2.92it/s] 12%|█▏ | 44574/371472 [3:35:44<28:47:37, 3.15it/s] 12%|█▏ | 44575/371472 [3:35:44<27:47:37, 3.27it/s] 12%|█▏ | 44576/371472 [3:35:44<26:40:26, 3.40it/s] 12%|█▏ | 44577/371472 [3:35:44<26:03:34, 3.48it/s] 12%|█▏ | 44578/371472 [3:35:45<25:22:06, 3.58it/s] 12%|█▏ | 44579/371472 [3:35:45<25:50:19, 3.51it/s] 12%|█▏ | 44580/371472 [3:35:45<26:39:46, 3.41it/s] {'loss': 4.1781, 'learning_rate': 8.924184964123337e-07, 'epoch': 1.92} + 12%|█▏ | 44580/371472 [3:35:45<26:39:46, 3.41it/s] 12%|█▏ | 44581/371472 [3:35:46<26:57:06, 3.37it/s] 12%|█▏ | 44582/371472 [3:35:46<26:45:26, 3.39it/s] 12%|█▏ | 44583/371472 [3:35:46<26:16:39, 3.46it/s] 12%|█▏ | 44584/371472 [3:35:46<25:57:22, 3.50it/s] 12%|█▏ | 44585/371472 [3:35:47<24:47:43, 3.66it/s] 12%|█▏ | 44586/371472 [3:35:47<24:04:39, 3.77it/s] 12%|█▏ | 44587/371472 [3:35:47<25:09:33, 3.61it/s] 12%|█▏ | 44588/371472 [3:35:48<24:53:25, 3.65it/s] 12%|█▏ | 44589/371472 [3:35:48<25:49:29, 3.52it/s] 12%|█▏ | 44590/371472 [3:35:48<25:23:39, 3.58it/s] 12%|█▏ | 44591/371472 [3:35:48<26:59:02, 3.36it/s] 12%|█▏ | 44592/371472 [3:35:49<27:57:29, 3.25it/s] 12%|█▏ | 44593/371472 [3:35:49<27:17:07, 3.33it/s] 12%|█▏ | 44594/371472 [3:35:49<26:56:50, 3.37it/s] 12%|█▏ | 44595/371472 [3:35:50<28:11:01, 3.22it/s] 12%|█▏ | 44596/371472 [3:35:50<27:27:01, 3.31it/s] 12%|█▏ | 44597/371472 [3:35:50<25:56:43, 3.50it/s] 12%|█▏ | 44598/371472 [3:35:51<25:21:51, 3.58it/s] 12%|█▏ | 44599/371472 [3:35:51<28:53:20, 3.14it/s] 12%|█▏ | 44600/371472 [3:35:51<27:53:00, 3.26it/s] {'loss': 4.244, 'learning_rate': 8.923700144368549e-07, 'epoch': 1.92} + 12%|█▏ | 44600/371472 [3:35:51<27:53:00, 3.26it/s] 12%|█▏ | 44601/371472 [3:35:51<26:41:00, 3.40it/s] 12%|█▏ | 44602/371472 [3:35:52<27:09:58, 3.34it/s] 12%|█▏ | 44603/371472 [3:35:52<27:19:52, 3.32it/s] 12%|█▏ | 44604/371472 [3:35:52<26:47:12, 3.39it/s] 12%|█▏ | 44605/371472 [3:35:53<26:55:43, 3.37it/s] 12%|█▏ | 44606/371472 [3:35:53<26:31:08, 3.42it/s] 12%|█▏ | 44607/371472 [3:35:53<25:51:57, 3.51it/s] 12%|█▏ | 44608/371472 [3:35:54<25:51:04, 3.51it/s] 12%|█▏ | 44609/371472 [3:35:54<25:55:28, 3.50it/s] 12%|█▏ | 44610/371472 [3:35:54<25:57:09, 3.50it/s] 12%|█▏ | 44611/371472 [3:35:54<25:39:31, 3.54it/s] 12%|█▏ | 44612/371472 [3:35:55<26:22:31, 3.44it/s] 12%|█▏ | 44613/371472 [3:35:55<26:27:19, 3.43it/s] 12%|█▏ | 44614/371472 [3:35:55<26:28:24, 3.43it/s] 12%|█▏ | 44615/371472 [3:35:56<25:38:28, 3.54it/s] 12%|█▏ | 44616/371472 [3:35:56<25:32:18, 3.56it/s] 12%|█▏ | 44617/371472 [3:35:56<26:09:27, 3.47it/s] 12%|█▏ | 44618/371472 [3:35:56<25:48:26, 3.52it/s] 12%|█▏ | 44619/371472 [3:35:57<25:20:10, 3.58it/s] 12%|█▏ | 44620/371472 [3:35:57<25:46:41, 3.52it/s] {'loss': 4.2507, 'learning_rate': 8.92321532461376e-07, 'epoch': 1.92} + 12%|█▏ | 44620/371472 [3:35:57<25:46:41, 3.52it/s] 12%|█▏ | 44621/371472 [3:35:57<25:38:52, 3.54it/s] 12%|█▏ | 44622/371472 [3:35:57<25:14:03, 3.60it/s] 12%|█▏ | 44623/371472 [3:35:58<24:48:04, 3.66it/s] 12%|█▏ | 44624/371472 [3:35:58<25:02:52, 3.62it/s] 12%|█▏ | 44625/371472 [3:35:58<26:04:19, 3.48it/s] 12%|█▏ | 44626/371472 [3:35:59<25:21:09, 3.58it/s] 12%|█▏ | 44627/371472 [3:35:59<27:09:53, 3.34it/s] 12%|█▏ | 44628/371472 [3:35:59<26:29:59, 3.43it/s] 12%|█▏ | 44629/371472 [3:36:00<27:08:45, 3.34it/s] 12%|█▏ | 44630/371472 [3:36:00<30:50:25, 2.94it/s] 12%|█▏ | 44631/371472 [3:36:00<28:57:47, 3.13it/s] 12%|█▏ | 44632/371472 [3:36:00<27:06:26, 3.35it/s] 12%|█▏ | 44633/371472 [3:36:01<27:48:36, 3.26it/s] 12%|█▏ | 44634/371472 [3:36:01<28:03:32, 3.24it/s] 12%|█▏ | 44635/371472 [3:36:01<26:53:21, 3.38it/s] 12%|█▏ | 44636/371472 [3:36:02<27:46:52, 3.27it/s] 12%|█▏ | 44637/371472 [3:36:02<26:28:42, 3.43it/s] 12%|█▏ | 44638/371472 [3:36:02<25:47:12, 3.52it/s] 12%|█▏ | 44639/371472 [3:36:02<25:00:36, 3.63it/s] 12%|█▏ | 44640/371472 [3:36:03<25:36:42, 3.54it/s] {'loss': 4.3202, 'learning_rate': 8.922730504858971e-07, 'epoch': 1.92} + 12%|█▏ | 44640/371472 [3:36:03<25:36:42, 3.54it/s] 12%|█▏ | 44641/371472 [3:36:03<25:47:05, 3.52it/s] 12%|█▏ | 44642/371472 [3:36:03<25:33:23, 3.55it/s] 12%|█▏ | 44643/371472 [3:36:04<25:55:31, 3.50it/s] 12%|█▏ | 44644/371472 [3:36:04<26:04:09, 3.48it/s] 12%|█▏ | 44645/371472 [3:36:04<25:42:23, 3.53it/s] 12%|█▏ | 44646/371472 [3:36:04<25:10:41, 3.61it/s] 12%|█▏ | 44647/371472 [3:36:05<27:35:16, 3.29it/s] 12%|█▏ | 44648/371472 [3:36:05<27:59:25, 3.24it/s] 12%|█▏ | 44649/371472 [3:36:05<26:26:54, 3.43it/s] 12%|█▏ | 44650/371472 [3:36:06<25:40:41, 3.54it/s] 12%|█▏ | 44651/371472 [3:36:06<24:55:10, 3.64it/s] 12%|█▏ | 44652/371472 [3:36:06<24:55:10, 3.64it/s] 12%|█▏ | 44653/371472 [3:36:06<24:43:33, 3.67it/s] 12%|█▏ | 44654/371472 [3:36:07<26:06:56, 3.48it/s] 12%|█▏ | 44655/371472 [3:36:07<26:12:41, 3.46it/s] 12%|█▏ | 44656/371472 [3:36:07<26:33:48, 3.42it/s] 12%|█▏ | 44657/371472 [3:36:08<26:23:38, 3.44it/s] 12%|█▏ | 44658/371472 [3:36:08<25:13:04, 3.60it/s] 12%|█▏ | 44659/371472 [3:36:08<25:37:39, 3.54it/s] 12%|█▏ | 44660/371472 [3:36:08<25:09:37, 3.61it/s] {'loss': 4.3428, 'learning_rate': 8.922245685104183e-07, 'epoch': 1.92} + 12%|█▏ | 44660/371472 [3:36:08<25:09:37, 3.61it/s] 12%|█▏ | 44661/371472 [3:36:09<26:40:33, 3.40it/s] 12%|█▏ | 44662/371472 [3:36:09<26:21:48, 3.44it/s] 12%|█▏ | 44663/371472 [3:36:09<25:54:04, 3.50it/s] 12%|█▏ | 44664/371472 [3:36:10<25:54:50, 3.50it/s] 12%|█▏ | 44665/371472 [3:36:10<26:18:55, 3.45it/s] 12%|█▏ | 44666/371472 [3:36:10<27:42:02, 3.28it/s] 12%|█▏ | 44667/371472 [3:36:11<28:04:20, 3.23it/s] 12%|█▏ | 44668/371472 [3:36:11<26:34:40, 3.42it/s] 12%|█▏ | 44669/371472 [3:36:11<26:34:43, 3.42it/s] 12%|█▏ | 44670/371472 [3:36:11<25:32:43, 3.55it/s] 12%|█▏ | 44671/371472 [3:36:12<25:00:19, 3.63it/s] 12%|█▏ | 44672/371472 [3:36:12<24:30:10, 3.70it/s] 12%|█▏ | 44673/371472 [3:36:12<26:46:46, 3.39it/s] 12%|█▏ | 44674/371472 [3:36:13<26:16:31, 3.45it/s] 12%|█▏ | 44675/371472 [3:36:13<26:30:49, 3.42it/s] 12%|█▏ | 44676/371472 [3:36:13<27:52:08, 3.26it/s] 12%|█▏ | 44677/371472 [3:36:13<27:16:09, 3.33it/s] 12%|█▏ | 44678/371472 [3:36:14<27:10:17, 3.34it/s] 12%|█▏ | 44679/371472 [3:36:14<26:45:25, 3.39it/s] 12%|█▏ | 44680/371472 [3:36:14<26:35:53, 3.41it/s] {'loss': 4.2586, 'learning_rate': 8.921760865349393e-07, 'epoch': 1.92} + 12%|█▏ | 44680/371472 [3:36:14<26:35:53, 3.41it/s] 12%|█▏ | 44681/371472 [3:36:15<26:18:05, 3.45it/s] 12%|█▏ | 44682/371472 [3:36:15<26:37:30, 3.41it/s] 12%|█▏ | 44683/371472 [3:36:15<25:36:04, 3.55it/s] 12%|█▏ | 44684/371472 [3:36:15<25:07:19, 3.61it/s] 12%|█▏ | 44685/371472 [3:36:16<25:29:31, 3.56it/s] 12%|█▏ | 44686/371472 [3:36:16<24:35:31, 3.69it/s] 12%|█▏ | 44687/371472 [3:36:16<25:08:02, 3.61it/s] 12%|█▏ | 44688/371472 [3:36:17<26:55:09, 3.37it/s] 12%|█▏ | 44689/371472 [3:36:17<26:54:37, 3.37it/s] 12%|█▏ | 44690/371472 [3:36:17<25:24:28, 3.57it/s] 12%|█▏ | 44691/371472 [3:36:17<24:57:00, 3.64it/s] 12%|█▏ | 44692/371472 [3:36:18<25:18:15, 3.59it/s] 12%|█▏ | 44693/371472 [3:36:18<25:38:59, 3.54it/s] 12%|█▏ | 44694/371472 [3:36:18<25:41:27, 3.53it/s] 12%|█▏ | 44695/371472 [3:36:19<25:23:57, 3.57it/s] 12%|█▏ | 44696/371472 [3:36:19<24:47:39, 3.66it/s] 12%|█▏ | 44697/371472 [3:36:19<24:22:57, 3.72it/s] 12%|█▏ | 44698/371472 [3:36:19<24:03:55, 3.77it/s] 12%|█▏ | 44699/371472 [3:36:20<27:10:08, 3.34it/s] 12%|█▏ | 44700/371472 [3:36:20<26:56:39, 3.37it/s] {'loss': 4.2734, 'learning_rate': 8.921276045594604e-07, 'epoch': 1.93} + 12%|█▏ | 44700/371472 [3:36:20<26:56:39, 3.37it/s] 12%|█▏ | 44701/371472 [3:36:20<26:51:42, 3.38it/s] 12%|█▏ | 44702/371472 [3:36:21<26:49:13, 3.38it/s] 12%|█▏ | 44703/371472 [3:36:21<26:16:23, 3.45it/s] 12%|█▏ | 44704/371472 [3:36:21<25:36:12, 3.55it/s] 12%|█▏ | 44705/371472 [3:36:21<26:22:05, 3.44it/s] 12%|█▏ | 44706/371472 [3:36:22<25:34:41, 3.55it/s] 12%|█▏ | 44707/371472 [3:36:22<27:15:50, 3.33it/s] 12%|█▏ | 44708/371472 [3:36:22<26:27:43, 3.43it/s] 12%|█▏ | 44709/371472 [3:36:23<26:46:12, 3.39it/s] 12%|█▏ | 44710/371472 [3:36:23<26:30:14, 3.42it/s] 12%|█▏ | 44711/371472 [3:36:23<25:42:24, 3.53it/s] 12%|█▏ | 44712/371472 [3:36:23<24:43:52, 3.67it/s] 12%|█▏ | 44713/371472 [3:36:24<24:05:28, 3.77it/s] 12%|█▏ | 44714/371472 [3:36:24<25:11:39, 3.60it/s] 12%|█▏ | 44715/371472 [3:36:24<25:33:01, 3.55it/s] 12%|█▏ | 44716/371472 [3:36:25<27:07:54, 3.35it/s] 12%|█▏ | 44717/371472 [3:36:25<25:47:42, 3.52it/s] 12%|█▏ | 44718/371472 [3:36:25<28:47:38, 3.15it/s] 12%|█▏ | 44719/371472 [3:36:26<26:52:59, 3.38it/s] 12%|█▏ | 44720/371472 [3:36:26<28:03:36, 3.23it/s] {'loss': 4.2984, 'learning_rate': 8.920791225839815e-07, 'epoch': 1.93} + 12%|█▏ | 44720/371472 [3:36:26<28:03:36, 3.23it/s] 12%|█▏ | 44721/371472 [3:36:26<26:44:10, 3.39it/s] 12%|█▏ | 44722/371472 [3:36:26<25:26:29, 3.57it/s] 12%|█▏ | 44723/371472 [3:36:27<24:57:06, 3.64it/s] 12%|█▏ | 44724/371472 [3:36:27<24:46:59, 3.66it/s] 12%|█▏ | 44725/371472 [3:36:27<25:00:55, 3.63it/s] 12%|█▏ | 44726/371472 [3:36:27<24:11:26, 3.75it/s] 12%|█▏ | 44727/371472 [3:36:28<24:04:41, 3.77it/s] 12%|█▏ | 44728/371472 [3:36:28<24:22:13, 3.72it/s] 12%|█▏ | 44729/371472 [3:36:28<24:15:54, 3.74it/s] 12%|█▏ | 44730/371472 [3:36:29<26:19:56, 3.45it/s] 12%|█▏ | 44731/371472 [3:36:29<26:48:13, 3.39it/s] 12%|█▏ | 44732/371472 [3:36:29<26:47:54, 3.39it/s] 12%|█▏ | 44733/371472 [3:36:29<26:17:02, 3.45it/s] 12%|█▏ | 44734/371472 [3:36:30<26:21:53, 3.44it/s] 12%|█▏ | 44735/371472 [3:36:30<25:20:26, 3.58it/s] 12%|█▏ | 44736/371472 [3:36:30<25:34:55, 3.55it/s] 12%|█▏ | 44737/371472 [3:36:31<25:28:58, 3.56it/s] 12%|█▏ | 44738/371472 [3:36:31<25:01:29, 3.63it/s] 12%|█▏ | 44739/371472 [3:36:31<25:06:43, 3.61it/s] 12%|█▏ | 44740/371472 [3:36:31<26:28:52, 3.43it/s] {'loss': 4.4429, 'learning_rate': 8.920306406085026e-07, 'epoch': 1.93} + 12%|█▏ | 44740/371472 [3:36:31<26:28:52, 3.43it/s] 12%|█▏ | 44741/371472 [3:36:32<25:42:05, 3.53it/s] 12%|█▏ | 44742/371472 [3:36:32<26:18:19, 3.45it/s] 12%|█▏ | 44743/371472 [3:36:32<25:45:58, 3.52it/s] 12%|█▏ | 44744/371472 [3:36:33<25:44:33, 3.53it/s] 12%|█▏ | 44745/371472 [3:36:33<25:40:02, 3.54it/s] 12%|█▏ | 44746/371472 [3:36:33<28:01:27, 3.24it/s] 12%|█▏ | 44747/371472 [3:36:34<28:37:09, 3.17it/s] 12%|█▏ | 44748/371472 [3:36:34<30:26:40, 2.98it/s] 12%|█▏ | 44749/371472 [3:36:34<28:20:06, 3.20it/s] 12%|█▏ | 44750/371472 [3:36:34<28:19:37, 3.20it/s] 12%|█▏ | 44751/371472 [3:36:35<27:57:50, 3.25it/s] 12%|█▏ | 44752/371472 [3:36:35<28:16:24, 3.21it/s] 12%|█▏ | 44753/371472 [3:36:35<30:28:54, 2.98it/s] 12%|█▏ | 44754/371472 [3:36:36<27:59:31, 3.24it/s] 12%|█▏ | 44755/371472 [3:36:36<28:18:59, 3.21it/s] 12%|█▏ | 44756/371472 [3:36:36<26:37:11, 3.41it/s] 12%|█▏ | 44757/371472 [3:36:37<26:05:29, 3.48it/s] 12%|█▏ | 44758/371472 [3:36:37<24:59:02, 3.63it/s] 12%|█▏ | 44759/371472 [3:36:37<25:09:21, 3.61it/s] 12%|█▏ | 44760/371472 [3:36:37<25:04:35, 3.62it/s] {'loss': 4.0907, 'learning_rate': 8.919821586330238e-07, 'epoch': 1.93} + 12%|█▏ | 44760/371472 [3:36:37<25:04:35, 3.62it/s] 12%|█▏ | 44761/371472 [3:36:38<25:26:35, 3.57it/s] 12%|█▏ | 44762/371472 [3:36:38<24:26:15, 3.71it/s] 12%|█▏ | 44763/371472 [3:36:38<25:40:40, 3.53it/s] 12%|█▏ | 44764/371472 [3:36:38<24:38:36, 3.68it/s] 12%|█▏ | 44765/371472 [3:36:39<28:35:52, 3.17it/s] 12%|█▏ | 44766/371472 [3:36:39<27:54:29, 3.25it/s] 12%|█▏ | 44767/371472 [3:36:39<27:20:30, 3.32it/s] 12%|█▏ | 44768/371472 [3:36:40<26:38:41, 3.41it/s] 12%|█▏ | 44769/371472 [3:36:40<27:54:59, 3.25it/s] 12%|█▏ | 44770/371472 [3:36:40<26:52:56, 3.38it/s] 12%|█▏ | 44771/371472 [3:36:41<26:24:57, 3.44it/s] 12%|█▏ | 44772/371472 [3:36:41<26:32:18, 3.42it/s] 12%|█▏ | 44773/371472 [3:36:41<26:59:22, 3.36it/s] 12%|█▏ | 44774/371472 [3:36:42<27:23:48, 3.31it/s] 12%|█▏ | 44775/371472 [3:36:42<26:55:46, 3.37it/s] 12%|█▏ | 44776/371472 [3:36:42<27:09:14, 3.34it/s] 12%|█▏ | 44777/371472 [3:36:42<25:50:30, 3.51it/s] 12%|█▏ | 44778/371472 [3:36:43<25:01:58, 3.63it/s] 12%|█▏ | 44779/371472 [3:36:43<25:10:29, 3.60it/s] 12%|█▏ | 44780/371472 [3:36:43<25:06:42, 3.61it/s] {'loss': 4.4002, 'learning_rate': 8.919336766575449e-07, 'epoch': 1.93} + 12%|█▏ | 44780/371472 [3:36:43<25:06:42, 3.61it/s] 12%|█▏ | 44781/371472 [3:36:44<25:57:26, 3.50it/s] 12%|█▏ | 44782/371472 [3:36:44<25:50:10, 3.51it/s] 12%|█▏ | 44783/371472 [3:36:44<28:38:51, 3.17it/s] 12%|█▏ | 44784/371472 [3:36:44<27:47:24, 3.27it/s] 12%|█▏ | 44785/371472 [3:36:45<27:19:32, 3.32it/s] 12%|█▏ | 44786/371472 [3:36:45<29:20:30, 3.09it/s] 12%|█▏ | 44787/371472 [3:36:45<28:44:32, 3.16it/s] 12%|█▏ | 44788/371472 [3:36:46<27:40:39, 3.28it/s] 12%|█▏ | 44789/371472 [3:36:46<26:43:24, 3.40it/s] 12%|█▏ | 44790/371472 [3:36:46<27:09:42, 3.34it/s] 12%|█▏ | 44791/371472 [3:36:47<27:08:25, 3.34it/s] 12%|█▏ | 44792/371472 [3:36:47<26:32:42, 3.42it/s] 12%|█▏ | 44793/371472 [3:36:47<26:46:22, 3.39it/s] 12%|█▏ | 44794/371472 [3:36:48<30:06:06, 3.01it/s] 12%|█▏ | 44795/371472 [3:36:48<28:15:18, 3.21it/s] 12%|█▏ | 44796/371472 [3:36:48<27:49:11, 3.26it/s] 12%|█▏ | 44797/371472 [3:36:48<29:00:48, 3.13it/s] 12%|█▏ | 44798/371472 [3:36:49<28:04:38, 3.23it/s] 12%|█▏ | 44799/371472 [3:36:49<27:50:33, 3.26it/s] 12%|█▏ | 44800/371472 [3:36:49<27:35:31, 3.29it/s] {'loss': 4.2448, 'learning_rate': 8.918851946820659e-07, 'epoch': 1.93} + 12%|█▏ | 44800/371472 [3:36:49<27:35:31, 3.29it/s] 12%|█▏ | 44801/371472 [3:36:50<26:35:02, 3.41it/s] 12%|█▏ | 44802/371472 [3:36:50<26:01:35, 3.49it/s] 12%|█▏ | 44803/371472 [3:36:50<25:36:31, 3.54it/s] 12%|█▏ | 44804/371472 [3:36:50<25:11:53, 3.60it/s] 12%|█▏ | 44805/371472 [3:36:51<24:55:57, 3.64it/s] 12%|█▏ | 44806/371472 [3:36:51<26:15:40, 3.46it/s] 12%|█▏ | 44807/371472 [3:36:51<25:10:55, 3.60it/s] 12%|█▏ | 44808/371472 [3:36:52<25:20:28, 3.58it/s] 12%|█▏ | 44809/371472 [3:36:52<25:37:01, 3.54it/s] 12%|█▏ | 44810/371472 [3:36:52<25:56:15, 3.50it/s] 12%|█▏ | 44811/371472 [3:36:52<25:53:52, 3.50it/s] 12%|█▏ | 44812/371472 [3:36:53<25:48:41, 3.52it/s] 12%|█▏ | 44813/371472 [3:36:53<26:03:04, 3.48it/s] 12%|█▏ | 44814/371472 [3:36:53<27:11:19, 3.34it/s] 12%|█▏ | 44815/371472 [3:36:54<26:30:58, 3.42it/s] 12%|█▏ | 44816/371472 [3:36:54<26:24:19, 3.44it/s] 12%|█▏ | 44817/371472 [3:36:54<25:36:06, 3.54it/s] 12%|█▏ | 44818/371472 [3:36:54<25:25:01, 3.57it/s] 12%|█▏ | 44819/371472 [3:36:55<26:04:51, 3.48it/s] 12%|█▏ | 44820/371472 [3:36:55<25:46:01, 3.52it/s] {'loss': 4.302, 'learning_rate': 8.91836712706587e-07, 'epoch': 1.93} + 12%|█▏ | 44820/371472 [3:36:55<25:46:01, 3.52it/s] 12%|█▏ | 44821/371472 [3:36:55<25:18:12, 3.59it/s] 12%|█▏ | 44822/371472 [3:36:56<25:27:11, 3.56it/s] 12%|█▏ | 44823/371472 [3:36:56<25:10:22, 3.60it/s] 12%|█▏ | 44824/371472 [3:36:56<24:33:57, 3.69it/s] 12%|█▏ | 44825/371472 [3:36:56<25:34:11, 3.55it/s] 12%|█▏ | 44826/371472 [3:36:57<26:06:43, 3.47it/s] 12%|█▏ | 44827/371472 [3:36:57<25:28:17, 3.56it/s] 12%|█▏ | 44828/371472 [3:36:57<25:02:16, 3.62it/s] 12%|█▏ | 44829/371472 [3:36:58<24:32:37, 3.70it/s] 12%|█▏ | 44830/371472 [3:36:58<26:25:06, 3.43it/s] 12%|█▏ | 44831/371472 [3:36:58<28:14:03, 3.21it/s] 12%|█▏ | 44832/371472 [3:36:58<27:35:27, 3.29it/s] 12%|█▏ | 44833/371472 [3:36:59<27:56:21, 3.25it/s] 12%|█▏ | 44834/371472 [3:36:59<27:56:11, 3.25it/s] 12%|█▏ | 44835/371472 [3:36:59<27:24:06, 3.31it/s] 12%|█▏ | 44836/371472 [3:37:00<29:32:23, 3.07it/s] 12%|█▏ | 44837/371472 [3:37:00<27:52:07, 3.26it/s] 12%|█▏ | 44838/371472 [3:37:00<27:08:39, 3.34it/s] 12%|█▏ | 44839/371472 [3:37:01<30:53:43, 2.94it/s] 12%|█▏ | 44840/371472 [3:37:01<28:58:01, 3.13it/s] {'loss': 4.322, 'learning_rate': 8.917882307311082e-07, 'epoch': 1.93} + 12%|█▏ | 44840/371472 [3:37:01<28:58:01, 3.13it/s] 12%|█▏ | 44841/371472 [3:37:01<29:54:56, 3.03it/s] 12%|█▏ | 44842/371472 [3:37:02<29:05:39, 3.12it/s] 12%|█▏ | 44843/371472 [3:37:02<28:00:38, 3.24it/s] 12%|█▏ | 44844/371472 [3:37:02<26:34:58, 3.41it/s] 12%|█▏ | 44845/371472 [3:37:02<25:58:52, 3.49it/s] 12%|█▏ | 44846/371472 [3:37:03<25:28:33, 3.56it/s] 12%|█▏ | 44847/371472 [3:37:03<24:41:28, 3.67it/s] 12%|█▏ | 44848/371472 [3:37:03<26:20:35, 3.44it/s] 12%|█▏ | 44849/371472 [3:37:04<24:55:05, 3.64it/s] 12%|█▏ | 44850/371472 [3:37:04<24:57:58, 3.63it/s] 12%|█▏ | 44851/371472 [3:37:04<25:26:56, 3.57it/s] 12%|█▏ | 44852/371472 [3:37:04<25:12:00, 3.60it/s] 12%|█▏ | 44853/371472 [3:37:05<25:50:52, 3.51it/s] 12%|█▏ | 44854/371472 [3:37:05<26:14:19, 3.46it/s] 12%|█▏ | 44855/371472 [3:37:05<25:41:23, 3.53it/s] 12%|█▏ | 44856/371472 [3:37:06<25:22:26, 3.58it/s] 12%|█▏ | 44857/371472 [3:37:06<25:52:35, 3.51it/s] 12%|█▏ | 44858/371472 [3:37:06<25:03:51, 3.62it/s] 12%|█▏ | 44859/371472 [3:37:06<24:52:58, 3.65it/s] 12%|█▏ | 44860/371472 [3:37:07<24:25:41, 3.71it/s] {'loss': 4.2744, 'learning_rate': 8.917397487556293e-07, 'epoch': 1.93} + 12%|█▏ | 44860/371472 [3:37:07<24:25:41, 3.71it/s] 12%|█▏ | 44861/371472 [3:37:07<24:23:01, 3.72it/s] 12%|█▏ | 44862/371472 [3:37:07<24:41:23, 3.67it/s] 12%|█▏ | 44863/371472 [3:37:07<24:00:51, 3.78it/s] 12%|█▏ | 44864/371472 [3:37:08<25:06:15, 3.61it/s] 12%|█▏ | 44865/371472 [3:37:08<25:23:43, 3.57it/s] 12%|█▏ | 44866/371472 [3:37:08<24:50:13, 3.65it/s] 12%|█▏ | 44867/371472 [3:37:09<24:37:54, 3.68it/s] 12%|█▏ | 44868/371472 [3:37:09<25:34:08, 3.55it/s] 12%|█▏ | 44869/371472 [3:37:09<24:31:59, 3.70it/s] 12%|█▏ | 44870/371472 [3:37:09<24:13:30, 3.74it/s] 12%|█▏ | 44871/371472 [3:37:10<24:58:44, 3.63it/s] 12%|█▏ | 44872/371472 [3:37:10<25:11:43, 3.60it/s] 12%|█▏ | 44873/371472 [3:37:10<25:25:19, 3.57it/s] 12%|█▏ | 44874/371472 [3:37:11<25:13:28, 3.60it/s] 12%|█▏ | 44875/371472 [3:37:11<25:17:35, 3.59it/s] 12%|█▏ | 44876/371472 [3:37:11<24:39:43, 3.68it/s] 12%|█▏ | 44877/371472 [3:37:11<23:57:47, 3.79it/s] 12%|█▏ | 44878/371472 [3:37:12<24:30:44, 3.70it/s] 12%|█▏ | 44879/371472 [3:37:12<25:37:13, 3.54it/s] 12%|█▏ | 44880/371472 [3:37:12<26:07:21, 3.47it/s] {'loss': 4.5504, 'learning_rate': 8.916912667801504e-07, 'epoch': 1.93} + 12%|█▏ | 44880/371472 [3:37:12<26:07:21, 3.47it/s] 12%|█▏ | 44881/371472 [3:37:12<26:03:33, 3.48it/s] 12%|█▏ | 44882/371472 [3:37:13<25:03:42, 3.62it/s] 12%|█▏ | 44883/371472 [3:37:13<24:57:49, 3.63it/s] 12%|█▏ | 44884/371472 [3:37:13<25:20:37, 3.58it/s] 12%|█▏ | 44885/371472 [3:37:14<25:14:26, 3.59it/s] 12%|█▏ | 44886/371472 [3:37:14<25:08:20, 3.61it/s] 12%|█▏ | 44887/371472 [3:37:14<24:37:34, 3.68it/s] 12%|█▏ | 44888/371472 [3:37:14<24:17:04, 3.74it/s] 12%|█▏ | 44889/371472 [3:37:15<24:30:04, 3.70it/s] 12%|█▏ | 44890/371472 [3:37:15<24:18:15, 3.73it/s] 12%|█▏ | 44891/371472 [3:37:15<24:13:13, 3.75it/s] 12%|█▏ | 44892/371472 [3:37:15<23:49:07, 3.81it/s] 12%|█▏ | 44893/371472 [3:37:16<24:10:34, 3.75it/s] 12%|█▏ | 44894/371472 [3:37:16<24:31:42, 3.70it/s] 12%|█▏ | 44895/371472 [3:37:16<24:54:26, 3.64it/s] 12%|█▏ | 44896/371472 [3:37:17<25:53:39, 3.50it/s] 12%|█▏ | 44897/371472 [3:37:17<27:42:19, 3.27it/s] 12%|█▏ | 44898/371472 [3:37:17<28:40:37, 3.16it/s] 12%|█▏ | 44899/371472 [3:37:18<28:12:42, 3.22it/s] 12%|█▏ | 44900/371472 [3:37:18<26:48:40, 3.38it/s] {'loss': 4.3332, 'learning_rate': 8.916427848046715e-07, 'epoch': 1.93} + 12%|█▏ | 44900/371472 [3:37:18<26:48:40, 3.38it/s] 12%|█▏ | 44901/371472 [3:37:18<27:32:26, 3.29it/s] 12%|█▏ | 44902/371472 [3:37:18<27:19:28, 3.32it/s] 12%|█▏ | 44903/371472 [3:37:19<26:46:45, 3.39it/s] 12%|█▏ | 44904/371472 [3:37:19<25:40:24, 3.53it/s] 12%|█▏ | 44905/371472 [3:37:19<26:54:40, 3.37it/s] 12%|█▏ | 44906/371472 [3:37:20<27:28:39, 3.30it/s] 12%|█▏ | 44907/371472 [3:37:20<27:24:31, 3.31it/s] 12%|█▏ | 44908/371472 [3:37:20<26:25:56, 3.43it/s] 12%|█▏ | 44909/371472 [3:37:20<27:21:17, 3.32it/s] 12%|█▏ | 44910/371472 [3:37:21<26:14:43, 3.46it/s] 12%|█▏ | 44911/371472 [3:37:21<25:45:57, 3.52it/s] 12%|█▏ | 44912/371472 [3:37:21<27:13:41, 3.33it/s] 12%|█▏ | 44913/371472 [3:37:22<26:30:34, 3.42it/s] 12%|█▏ | 44914/371472 [3:37:22<26:50:45, 3.38it/s] 12%|█▏ | 44915/371472 [3:37:22<26:28:07, 3.43it/s] 12%|█▏ | 44916/371472 [3:37:23<26:05:17, 3.48it/s] 12%|█▏ | 44917/371472 [3:37:23<24:55:07, 3.64it/s] 12%|█▏ | 44918/371472 [3:37:23<24:25:28, 3.71it/s] 12%|█▏ | 44919/371472 [3:37:23<27:42:10, 3.27it/s] 12%|█▏ | 44920/371472 [3:37:24<27:26:58, 3.30it/s] {'loss': 4.4607, 'learning_rate': 8.915943028291926e-07, 'epoch': 1.93} + 12%|█▏ | 44920/371472 [3:37:24<27:26:58, 3.30it/s] 12%|█▏ | 44921/371472 [3:37:24<26:44:01, 3.39it/s] 12%|█▏ | 44922/371472 [3:37:24<25:56:11, 3.50it/s] 12%|█▏ | 44923/371472 [3:37:25<26:47:50, 3.38it/s] 12%|█▏ | 44924/371472 [3:37:25<25:32:50, 3.55it/s] 12%|█▏ | 44925/371472 [3:37:25<26:30:52, 3.42it/s] 12%|█▏ | 44926/371472 [3:37:25<25:50:09, 3.51it/s] 12%|█▏ | 44927/371472 [3:37:26<26:52:55, 3.37it/s] 12%|█▏ | 44928/371472 [3:37:26<27:22:00, 3.31it/s] 12%|█▏ | 44929/371472 [3:37:26<27:21:06, 3.32it/s] 12%|█▏ | 44930/371472 [3:37:27<28:14:22, 3.21it/s] 12%|█▏ | 44931/371472 [3:37:27<27:15:42, 3.33it/s] 12%|█▏ | 44932/371472 [3:37:27<27:33:48, 3.29it/s] 12%|█▏ | 44933/371472 [3:37:28<26:19:52, 3.44it/s] 12%|█▏ | 44934/371472 [3:37:28<26:08:01, 3.47it/s] 12%|█▏ | 44935/371472 [3:37:28<26:18:57, 3.45it/s] 12%|█▏ | 44936/371472 [3:37:28<25:28:15, 3.56it/s] 12%|█▏ | 44937/371472 [3:37:29<26:05:29, 3.48it/s] 12%|█▏ | 44938/371472 [3:37:29<26:06:46, 3.47it/s] 12%|█▏ | 44939/371472 [3:37:29<26:44:46, 3.39it/s] 12%|█▏ | 44940/371472 [3:37:30<26:47:51, 3.38it/s] {'loss': 4.293, 'learning_rate': 8.915458208537136e-07, 'epoch': 1.94} + 12%|█▏ | 44940/371472 [3:37:30<26:47:51, 3.38it/s] 12%|█▏ | 44941/371472 [3:37:30<29:45:10, 3.05it/s] 12%|█▏ | 44942/371472 [3:37:30<27:39:02, 3.28it/s] 12%|█▏ | 44943/371472 [3:37:30<25:54:28, 3.50it/s] 12%|█▏ | 44944/371472 [3:37:31<25:21:13, 3.58it/s] 12%|█▏ | 44945/371472 [3:37:31<25:44:00, 3.52it/s] 12%|█▏ | 44946/371472 [3:37:31<26:32:41, 3.42it/s] 12%|█▏ | 44947/371472 [3:37:32<26:28:03, 3.43it/s] 12%|█▏ | 44948/371472 [3:37:32<25:58:57, 3.49it/s] 12%|█▏ | 44949/371472 [3:37:32<26:07:24, 3.47it/s] 12%|█▏ | 44950/371472 [3:37:32<27:15:25, 3.33it/s] 12%|█▏ | 44951/371472 [3:37:33<27:45:32, 3.27it/s] 12%|█▏ | 44952/371472 [3:37:33<26:51:25, 3.38it/s] 12%|█▏ | 44953/371472 [3:37:33<29:33:01, 3.07it/s] 12%|█▏ | 44954/371472 [3:37:34<27:56:07, 3.25it/s] 12%|█▏ | 44955/371472 [3:37:34<27:21:01, 3.32it/s] 12%|█▏ | 44956/371472 [3:37:34<26:24:11, 3.44it/s] 12%|█▏ | 44957/371472 [3:37:35<25:21:04, 3.58it/s] 12%|█▏ | 44958/371472 [3:37:35<24:46:57, 3.66it/s] 12%|█▏ | 44959/371472 [3:37:35<24:36:57, 3.68it/s] 12%|█▏ | 44960/371472 [3:37:35<25:08:40, 3.61it/s] {'loss': 4.3819, 'learning_rate': 8.914973388782348e-07, 'epoch': 1.94} + 12%|█▏ | 44960/371472 [3:37:35<25:08:40, 3.61it/s] 12%|█▏ | 44961/371472 [3:37:36<24:30:05, 3.70it/s] 12%|█▏ | 44962/371472 [3:37:36<23:51:58, 3.80it/s] 12%|█▏ | 44963/371472 [3:37:36<24:17:39, 3.73it/s] 12%|█▏ | 44964/371472 [3:37:36<24:26:35, 3.71it/s] 12%|█▏ | 44965/371472 [3:37:37<25:37:52, 3.54it/s] 12%|█▏ | 44966/371472 [3:37:37<24:57:48, 3.63it/s] 12%|█▏ | 44967/371472 [3:37:37<26:23:57, 3.44it/s] 12%|█▏ | 44968/371472 [3:37:38<25:53:20, 3.50it/s] 12%|█▏ | 44969/371472 [3:37:38<25:54:09, 3.50it/s] 12%|█▏ | 44970/371472 [3:37:38<24:43:46, 3.67it/s] 12%|█▏ | 44971/371472 [3:37:39<29:26:08, 3.08it/s] 12%|█▏ | 44972/371472 [3:37:39<28:40:59, 3.16it/s] 12%|█▏ | 44973/371472 [3:37:39<28:02:35, 3.23it/s] 12%|█▏ | 44974/371472 [3:37:39<27:50:16, 3.26it/s] 12%|█▏ | 44975/371472 [3:37:40<27:04:53, 3.35it/s] 12%|█▏ | 44976/371472 [3:37:40<26:17:02, 3.45it/s] 12%|█▏ | 44977/371472 [3:37:40<27:07:40, 3.34it/s] 12%|█▏ | 44978/371472 [3:37:41<25:52:47, 3.50it/s] 12%|█▏ | 44979/371472 [3:37:41<24:33:34, 3.69it/s] 12%|█▏ | 44980/371472 [3:37:41<27:07:25, 3.34it/s] {'loss': 4.5778, 'learning_rate': 8.914488569027559e-07, 'epoch': 1.94} + 12%|█▏ | 44980/371472 [3:37:41<27:07:25, 3.34it/s] 12%|█▏ | 44981/371472 [3:37:41<27:10:35, 3.34it/s] 12%|█▏ | 44982/371472 [3:37:42<26:03:14, 3.48it/s] 12%|█▏ | 44983/371472 [3:37:42<27:00:34, 3.36it/s] 12%|█▏ | 44984/371472 [3:37:42<26:54:40, 3.37it/s] 12%|█▏ | 44985/371472 [3:37:43<28:35:20, 3.17it/s] 12%|█▏ | 44986/371472 [3:37:43<27:36:18, 3.29it/s] 12%|█▏ | 44987/371472 [3:37:43<27:35:23, 3.29it/s] 12%|█▏ | 44988/371472 [3:37:44<28:53:48, 3.14it/s] 12%|█▏ | 44989/371472 [3:37:44<27:19:01, 3.32it/s] 12%|█▏ | 44990/371472 [3:37:44<27:13:19, 3.33it/s] 12%|█▏ | 44991/371472 [3:37:45<27:11:44, 3.33it/s] 12%|█▏ | 44992/371472 [3:37:45<26:05:24, 3.48it/s] 12%|█▏ | 44993/371472 [3:37:45<25:40:22, 3.53it/s] 12%|█▏ | 44994/371472 [3:37:45<25:07:19, 3.61it/s] 12%|█▏ | 44995/371472 [3:37:46<24:57:26, 3.63it/s] 12%|█▏ | 44996/371472 [3:37:46<25:05:01, 3.62it/s] 12%|█▏ | 44997/371472 [3:37:46<25:29:41, 3.56it/s] 12%|█▏ | 44998/371472 [3:37:46<24:41:55, 3.67it/s] 12%|█▏ | 44999/371472 [3:37:47<24:02:39, 3.77it/s] 12%|█▏ | 45000/371472 [3:37:47<24:20:03, 3.73it/s] {'loss': 4.4136, 'learning_rate': 8.91400374927277e-07, 'epoch': 1.94} + 12%|█▏ | 45000/371472 [3:37:47<24:20:03, 3.73it/s] 12%|█▏ | 45001/371472 [3:37:47<23:59:00, 3.78it/s] 12%|█▏ | 45002/371472 [3:37:47<24:37:47, 3.68it/s] 12%|█▏ | 45003/371472 [3:37:48<24:35:24, 3.69it/s] 12%|█▏ | 45004/371472 [3:37:48<24:21:11, 3.72it/s] 12%|█▏ | 45005/371472 [3:37:48<25:30:37, 3.55it/s] 12%|█▏ | 45006/371472 [3:37:49<25:16:05, 3.59it/s] 12%|█▏ | 45007/371472 [3:37:49<25:17:17, 3.59it/s] 12%|█▏ | 45008/371472 [3:37:49<24:59:27, 3.63it/s] 12%|█▏ | 45009/371472 [3:37:50<28:36:24, 3.17it/s] 12%|█▏ | 45010/371472 [3:37:50<28:11:06, 3.22it/s] 12%|█▏ | 45011/371472 [3:37:50<28:09:49, 3.22it/s] 12%|█▏ | 45012/371472 [3:37:50<27:34:48, 3.29it/s] 12%|█▏ | 45013/371472 [3:37:51<27:17:36, 3.32it/s] 12%|█▏ | 45014/371472 [3:37:51<26:48:26, 3.38it/s] 12%|█▏ | 45015/371472 [3:37:51<26:27:29, 3.43it/s] 12%|█▏ | 45016/371472 [3:37:52<25:56:03, 3.50it/s] 12%|█▏ | 45017/371472 [3:37:52<25:49:59, 3.51it/s] 12%|█▏ | 45018/371472 [3:37:52<28:25:26, 3.19it/s] 12%|█▏ | 45019/371472 [3:37:52<27:04:29, 3.35it/s] 12%|█▏ | 45020/371472 [3:37:53<27:40:52, 3.28it/s] {'loss': 4.3509, 'learning_rate': 8.913518929517981e-07, 'epoch': 1.94} + 12%|█▏ | 45020/371472 [3:37:53<27:40:52, 3.28it/s] 12%|█▏ | 45021/371472 [3:37:53<27:36:13, 3.29it/s] 12%|█▏ | 45022/371472 [3:37:53<28:54:16, 3.14it/s] 12%|█▏ | 45023/371472 [3:37:54<28:09:37, 3.22it/s] 12%|█▏ | 45024/371472 [3:37:54<27:38:22, 3.28it/s] 12%|█▏ | 45025/371472 [3:37:54<28:24:47, 3.19it/s] 12%|█▏ | 45026/371472 [3:37:55<28:36:42, 3.17it/s] 12%|█▏ | 45027/371472 [3:37:55<27:21:01, 3.32it/s] 12%|█▏ | 45028/371472 [3:37:55<26:49:20, 3.38it/s] 12%|█▏ | 45029/371472 [3:37:56<25:58:58, 3.49it/s] 12%|█▏ | 45030/371472 [3:37:56<26:04:41, 3.48it/s] 12%|█▏ | 45031/371472 [3:37:56<26:36:51, 3.41it/s] 12%|█▏ | 45032/371472 [3:37:56<26:03:59, 3.48it/s] 12%|█▏ | 45033/371472 [3:37:57<25:02:39, 3.62it/s] 12%|█▏ | 45034/371472 [3:37:57<24:27:08, 3.71it/s] 12%|█▏ | 45035/371472 [3:37:57<26:10:46, 3.46it/s] 12%|█▏ | 45036/371472 [3:37:58<26:03:15, 3.48it/s] 12%|█▏ | 45037/371472 [3:37:58<26:04:15, 3.48it/s] 12%|█▏ | 45038/371472 [3:37:58<26:19:15, 3.45it/s] 12%|█▏ | 45039/371472 [3:37:58<26:39:28, 3.40it/s] 12%|█▏ | 45040/371472 [3:37:59<25:42:13, 3.53it/s] {'loss': 4.5572, 'learning_rate': 8.913034109763192e-07, 'epoch': 1.94} + 12%|█▏ | 45040/371472 [3:37:59<25:42:13, 3.53it/s] 12%|█▏ | 45041/371472 [3:37:59<26:13:17, 3.46it/s] 12%|█▏ | 45042/371472 [3:37:59<27:33:55, 3.29it/s] 12%|█▏ | 45043/371472 [3:38:00<27:20:49, 3.32it/s] 12%|█▏ | 45044/371472 [3:38:00<26:40:15, 3.40it/s] 12%|█▏ | 45045/371472 [3:38:00<25:48:19, 3.51it/s] 12%|█▏ | 45046/371472 [3:38:00<25:12:00, 3.60it/s] 12%|█▏ | 45047/371472 [3:38:01<24:58:53, 3.63it/s] 12%|█▏ | 45048/371472 [3:38:01<24:39:36, 3.68it/s] 12%|█▏ | 45049/371472 [3:38:01<25:20:29, 3.58it/s] 12%|█▏ | 45050/371472 [3:38:02<25:09:49, 3.60it/s] 12%|█▏ | 45051/371472 [3:38:02<25:14:58, 3.59it/s] 12%|█▏ | 45052/371472 [3:38:02<26:59:10, 3.36it/s] 12%|█▏ | 45053/371472 [3:38:02<26:43:58, 3.39it/s] 12%|█▏ | 45054/371472 [3:38:03<26:20:30, 3.44it/s] 12%|█▏ | 45055/371472 [3:38:03<25:45:39, 3.52it/s] 12%|█▏ | 45056/371472 [3:38:03<24:51:40, 3.65it/s] 12%|█▏ | 45057/371472 [3:38:04<25:03:30, 3.62it/s] 12%|█▏ | 45058/371472 [3:38:04<25:04:25, 3.62it/s] 12%|█▏ | 45059/371472 [3:38:04<25:43:40, 3.52it/s] 12%|█▏ | 45060/371472 [3:38:04<25:08:44, 3.61it/s] {'loss': 4.479, 'learning_rate': 8.912549290008403e-07, 'epoch': 1.94} + 12%|█▏ | 45060/371472 [3:38:04<25:08:44, 3.61it/s] 12%|█▏ | 45061/371472 [3:38:05<24:35:39, 3.69it/s] 12%|█▏ | 45062/371472 [3:38:05<24:52:46, 3.64it/s] 12%|█▏ | 45063/371472 [3:38:05<27:06:12, 3.35it/s] 12%|█▏ | 45064/371472 [3:38:06<27:29:07, 3.30it/s] 12%|█▏ | 45065/371472 [3:38:06<27:42:37, 3.27it/s] 12%|█▏ | 45066/371472 [3:38:06<27:43:32, 3.27it/s] 12%|█▏ | 45067/371472 [3:38:06<27:28:07, 3.30it/s] 12%|█▏ | 45068/371472 [3:38:07<26:32:38, 3.42it/s] 12%|█▏ | 45069/371472 [3:38:07<25:38:33, 3.54it/s] 12%|█▏ | 45070/371472 [3:38:07<25:27:23, 3.56it/s] 12%|█▏ | 45071/371472 [3:38:08<25:38:29, 3.54it/s] 12%|█▏ | 45072/371472 [3:38:08<25:00:43, 3.62it/s] 12%|█▏ | 45073/371472 [3:38:08<25:12:19, 3.60it/s] 12%|█▏ | 45074/371472 [3:38:08<25:33:21, 3.55it/s] 12%|█▏ | 45075/371472 [3:38:09<25:49:00, 3.51it/s] 12%|█▏ | 45076/371472 [3:38:09<27:26:05, 3.30it/s] 12%|█▏ | 45077/371472 [3:38:09<27:33:51, 3.29it/s] 12%|█▏ | 45078/371472 [3:38:10<26:24:03, 3.43it/s] 12%|█▏ | 45079/371472 [3:38:10<25:57:06, 3.49it/s] 12%|█▏ | 45080/371472 [3:38:10<25:10:46, 3.60it/s] {'loss': 4.2881, 'learning_rate': 8.912064470253614e-07, 'epoch': 1.94} + 12%|█▏ | 45080/371472 [3:38:10<25:10:46, 3.60it/s] 12%|█▏ | 45081/371472 [3:38:10<24:32:31, 3.69it/s] 12%|█▏ | 45082/371472 [3:38:11<24:29:47, 3.70it/s] 12%|█▏ | 45083/371472 [3:38:11<25:05:13, 3.61it/s] 12%|█▏ | 45084/371472 [3:38:11<24:48:03, 3.66it/s] 12%|█▏ | 45085/371472 [3:38:11<25:14:24, 3.59it/s] 12%|█▏ | 45086/371472 [3:38:12<25:29:42, 3.56it/s] 12%|█▏ | 45087/371472 [3:38:12<26:37:58, 3.40it/s] 12%|█▏ | 45088/371472 [3:38:12<26:42:41, 3.39it/s] 12%|█▏ | 45089/371472 [3:38:13<28:17:58, 3.20it/s] 12%|█▏ | 45090/371472 [3:38:13<26:43:28, 3.39it/s] 12%|█▏ | 45091/371472 [3:38:13<26:47:27, 3.38it/s] 12%|█▏ | 45092/371472 [3:38:14<26:45:02, 3.39it/s] 12%|█▏ | 45093/371472 [3:38:14<25:53:24, 3.50it/s] 12%|█▏ | 45094/371472 [3:38:14<28:41:10, 3.16it/s] 12%|█▏ | 45095/371472 [3:38:15<28:45:18, 3.15it/s] 12%|█▏ | 45096/371472 [3:38:15<27:51:09, 3.25it/s] 12%|█▏ | 45097/371472 [3:38:15<26:38:43, 3.40it/s] 12%|█▏ | 45098/371472 [3:38:15<26:25:15, 3.43it/s] 12%|█▏ | 45099/371472 [3:38:16<25:45:32, 3.52it/s] 12%|█▏ | 45100/371472 [3:38:16<25:52:37, 3.50it/s] {'loss': 4.3303, 'learning_rate': 8.911579650498825e-07, 'epoch': 1.94} + 12%|█▏ | 45100/371472 [3:38:16<25:52:37, 3.50it/s] 12%|█▏ | 45101/371472 [3:38:16<24:51:56, 3.65it/s] 12%|█▏ | 45102/371472 [3:38:16<24:22:20, 3.72it/s] 12%|█▏ | 45103/371472 [3:38:17<25:21:05, 3.58it/s] 12%|█▏ | 45104/371472 [3:38:17<25:17:10, 3.59it/s] 12%|█▏ | 45105/371472 [3:38:17<26:09:48, 3.47it/s] 12%|█▏ | 45106/371472 [3:38:18<27:03:50, 3.35it/s] 12%|█▏ | 45107/371472 [3:38:18<25:59:48, 3.49it/s] 12%|█▏ | 45108/371472 [3:38:18<25:32:41, 3.55it/s] 12%|█▏ | 45109/371472 [3:38:18<25:43:27, 3.52it/s] 12%|█▏ | 45110/371472 [3:38:19<24:33:12, 3.69it/s] 12%|█▏ | 45111/371472 [3:38:19<26:23:10, 3.44it/s] 12%|█▏ | 45112/371472 [3:38:19<26:42:51, 3.39it/s] 12%|█▏ | 45113/371472 [3:38:20<28:20:22, 3.20it/s] 12%|█▏ | 45114/371472 [3:38:20<27:10:18, 3.34it/s] 12%|█▏ | 45115/371472 [3:38:20<27:50:57, 3.26it/s] 12%|█▏ | 45116/371472 [3:38:21<26:56:22, 3.37it/s] 12%|█▏ | 45117/371472 [3:38:21<25:47:26, 3.51it/s] 12%|█▏ | 45118/371472 [3:38:21<26:34:38, 3.41it/s] 12%|█▏ | 45119/371472 [3:38:21<27:15:54, 3.32it/s] 12%|█▏ | 45120/371472 [3:38:22<27:58:34, 3.24it/s] {'loss': 4.2722, 'learning_rate': 8.911094830744036e-07, 'epoch': 1.94} + 12%|█▏ | 45120/371472 [3:38:22<27:58:34, 3.24it/s] 12%|█▏ | 45121/371472 [3:38:22<26:40:18, 3.40it/s] 12%|█▏ | 45122/371472 [3:38:22<25:54:25, 3.50it/s] 12%|█▏ | 45123/371472 [3:38:23<25:16:52, 3.59it/s] 12%|█▏ | 45124/371472 [3:38:23<26:34:47, 3.41it/s] 12%|█▏ | 45125/371472 [3:38:23<26:20:07, 3.44it/s] 12%|█▏ | 45126/371472 [3:38:23<25:36:27, 3.54it/s] 12%|█▏ | 45127/371472 [3:38:24<25:53:20, 3.50it/s] 12%|█▏ | 45128/371472 [3:38:24<27:30:18, 3.30it/s] 12%|█▏ | 45129/371472 [3:38:24<27:02:29, 3.35it/s] 12%|█▏ | 45130/371472 [3:38:25<26:25:49, 3.43it/s] 12%|█▏ | 45131/371472 [3:38:25<26:04:13, 3.48it/s] 12%|█▏ | 45132/371472 [3:38:25<25:33:55, 3.55it/s] 12%|█▏ | 45133/371472 [3:38:25<24:47:17, 3.66it/s] 12%|█▏ | 45134/371472 [3:38:26<24:41:28, 3.67it/s] 12%|█▏ | 45135/371472 [3:38:26<24:25:40, 3.71it/s] 12%|█▏ | 45136/371472 [3:38:26<26:03:02, 3.48it/s] 12%|█▏ | 45137/371472 [3:38:27<27:32:58, 3.29it/s] 12%|█▏ | 45138/371472 [3:38:27<26:46:58, 3.38it/s] 12%|█▏ | 45139/371472 [3:38:27<27:58:28, 3.24it/s] 12%|█▏ | 45140/371472 [3:38:28<27:04:15, 3.35it/s] {'loss': 4.4041, 'learning_rate': 8.910610010989248e-07, 'epoch': 1.94} + 12%|█▏ | 45140/371472 [3:38:28<27:04:15, 3.35it/s] 12%|█▏ | 45141/371472 [3:38:28<26:41:01, 3.40it/s] 12%|█▏ | 45142/371472 [3:38:28<25:50:07, 3.51it/s] 12%|█▏ | 45143/371472 [3:38:28<26:31:44, 3.42it/s] 12%|█▏ | 45144/371472 [3:38:29<26:11:59, 3.46it/s] 12%|█▏ | 45145/371472 [3:38:29<25:48:13, 3.51it/s] 12%|█▏ | 45146/371472 [3:38:29<25:27:31, 3.56it/s] 12%|█▏ | 45147/371472 [3:38:30<26:19:47, 3.44it/s] 12%|█▏ | 45148/371472 [3:38:30<26:32:56, 3.41it/s] 12%|█▏ | 45149/371472 [3:38:30<25:46:23, 3.52it/s] 12%|█▏ | 45150/371472 [3:38:30<25:54:54, 3.50it/s] 12%|█▏ | 45151/371472 [3:38:31<25:25:58, 3.56it/s] 12%|█▏ | 45152/371472 [3:38:31<25:12:48, 3.60it/s] 12%|█▏ | 45153/371472 [3:38:31<25:15:33, 3.59it/s] 12%|█▏ | 45154/371472 [3:38:32<24:38:49, 3.68it/s] 12%|█▏ | 45155/371472 [3:38:32<24:23:13, 3.72it/s] 12%|█▏ | 45156/371472 [3:38:32<26:48:31, 3.38it/s] 12%|█▏ | 45157/371472 [3:38:32<26:29:44, 3.42it/s] 12%|█▏ | 45158/371472 [3:38:33<26:18:00, 3.45it/s] 12%|█▏ | 45159/371472 [3:38:33<27:16:39, 3.32it/s] 12%|█▏ | 45160/371472 [3:38:33<26:48:52, 3.38it/s] {'loss': 4.3474, 'learning_rate': 8.910125191234459e-07, 'epoch': 1.95} + 12%|█▏ | 45160/371472 [3:38:33<26:48:52, 3.38it/s] 12%|█▏ | 45161/371472 [3:38:34<28:27:30, 3.19it/s] 12%|█▏ | 45162/371472 [3:38:34<26:47:51, 3.38it/s] 12%|█▏ | 45163/371472 [3:38:34<27:01:12, 3.35it/s] 12%|█▏ | 45164/371472 [3:38:35<27:00:12, 3.36it/s] 12%|█▏ | 45165/371472 [3:38:35<26:08:10, 3.47it/s] 12%|█▏ | 45166/371472 [3:38:35<25:07:47, 3.61it/s] 12%|█▏ | 45167/371472 [3:38:35<26:07:07, 3.47it/s] 12%|█▏ | 45168/371472 [3:38:36<25:46:05, 3.52it/s] 12%|█▏ | 45169/371472 [3:38:36<25:02:54, 3.62it/s] 12%|█▏ | 45170/371472 [3:38:36<26:04:21, 3.48it/s] 12%|█▏ | 45171/371472 [3:38:36<25:49:41, 3.51it/s] 12%|█▏ | 45172/371472 [3:38:37<25:11:26, 3.60it/s] 12%|█▏ | 45173/371472 [3:38:37<28:25:25, 3.19it/s] 12%|█▏ | 45174/371472 [3:38:37<28:16:10, 3.21it/s] 12%|█▏ | 45175/371472 [3:38:38<26:25:52, 3.43it/s] 12%|█▏ | 45176/371472 [3:38:38<26:46:20, 3.39it/s] 12%|█▏ | 45177/371472 [3:38:38<26:22:33, 3.44it/s] 12%|█▏ | 45178/371472 [3:38:39<25:43:00, 3.52it/s] 12%|█▏ | 45179/371472 [3:38:39<25:39:24, 3.53it/s] 12%|█▏ | 45180/371472 [3:38:39<25:00:17, 3.62it/s] {'loss': 4.4535, 'learning_rate': 8.909640371479669e-07, 'epoch': 1.95} + 12%|█▏ | 45180/371472 [3:38:39<25:00:17, 3.62it/s] 12%|█▏ | 45181/371472 [3:38:39<25:12:40, 3.60it/s] 12%|█▏ | 45182/371472 [3:38:40<24:35:11, 3.69it/s] 12%|█▏ | 45183/371472 [3:38:40<25:10:37, 3.60it/s] 12%|█▏ | 45184/371472 [3:38:40<24:40:53, 3.67it/s] 12%|█▏ | 45185/371472 [3:38:40<25:45:04, 3.52it/s] 12%|█▏ | 45186/371472 [3:38:41<25:08:20, 3.61it/s] 12%|█▏ | 45187/371472 [3:38:41<25:30:59, 3.55it/s] 12%|█▏ | 45188/371472 [3:38:41<25:44:30, 3.52it/s] 12%|█▏ | 45189/371472 [3:38:42<25:16:19, 3.59it/s] 12%|█▏ | 45190/371472 [3:38:42<24:32:38, 3.69it/s] 12%|█▏ | 45191/371472 [3:38:42<24:41:01, 3.67it/s] 12%|█▏ | 45192/371472 [3:38:42<24:26:48, 3.71it/s] 12%|█▏ | 45193/371472 [3:38:43<24:08:02, 3.76it/s] 12%|█▏ | 45194/371472 [3:38:43<24:18:05, 3.73it/s] 12%|█▏ | 45195/371472 [3:38:43<25:28:00, 3.56it/s] 12%|█▏ | 45196/371472 [3:38:44<25:33:17, 3.55it/s] 12%|█▏ | 45197/371472 [3:38:44<25:16:03, 3.59it/s] 12%|█▏ | 45198/371472 [3:38:44<24:46:43, 3.66it/s] 12%|█▏ | 45199/371472 [3:38:44<24:49:32, 3.65it/s] 12%|█▏ | 45200/371472 [3:38:45<24:25:05, 3.71it/s] {'loss': 4.5766, 'learning_rate': 8.90915555172488e-07, 'epoch': 1.95} + 12%|█▏ | 45200/371472 [3:38:45<24:25:05, 3.71it/s] 12%|█▏ | 45201/371472 [3:38:45<24:13:55, 3.74it/s] 12%|█▏ | 45202/371472 [3:38:45<23:59:14, 3.78it/s] 12%|█▏ | 45203/371472 [3:38:45<24:33:31, 3.69it/s] 12%|█▏ | 45204/371472 [3:38:46<24:25:28, 3.71it/s] 12%|█▏ | 45205/371472 [3:38:46<25:26:10, 3.56it/s] 12%|█▏ | 45206/371472 [3:38:46<27:53:33, 3.25it/s] 12%|█▏ | 45207/371472 [3:38:47<27:21:37, 3.31it/s] 12%|█▏ | 45208/371472 [3:38:47<25:47:29, 3.51it/s] 12%|█▏ | 45209/371472 [3:38:47<25:42:36, 3.53it/s] 12%|█▏ | 45210/371472 [3:38:47<25:12:41, 3.59it/s] 12%|█▏ | 45211/371472 [3:38:48<27:00:08, 3.36it/s] 12%|█▏ | 45212/371472 [3:38:48<27:25:45, 3.30it/s] 12%|█▏ | 45213/371472 [3:38:48<25:57:50, 3.49it/s] 12%|█▏ | 45214/371472 [3:38:49<25:31:03, 3.55it/s] 12%|█▏ | 45215/371472 [3:38:49<26:48:01, 3.38it/s] 12%|█▏ | 45216/371472 [3:38:49<28:31:39, 3.18it/s] 12%|█▏ | 45217/371472 [3:38:50<27:04:30, 3.35it/s] 12%|█▏ | 45218/371472 [3:38:50<26:15:24, 3.45it/s] 12%|█▏ | 45219/371472 [3:38:50<25:52:36, 3.50it/s] 12%|█▏ | 45220/371472 [3:38:50<26:06:51, 3.47it/s] {'loss': 4.4793, 'learning_rate': 8.908670731970092e-07, 'epoch': 1.95} + 12%|█▏ | 45220/371472 [3:38:50<26:06:51, 3.47it/s] 12%|█▏ | 45221/371472 [3:38:51<26:49:14, 3.38it/s] 12%|█▏ | 45222/371472 [3:38:51<26:07:38, 3.47it/s] 12%|█▏ | 45223/371472 [3:38:51<27:52:32, 3.25it/s] 12%|█▏ | 45224/371472 [3:38:52<28:00:33, 3.24it/s] 12%|█▏ | 45225/371472 [3:38:52<26:47:23, 3.38it/s] 12%|█▏ | 45226/371472 [3:38:52<27:37:19, 3.28it/s] 12%|█▏ | 45227/371472 [3:38:53<28:12:29, 3.21it/s] 12%|█▏ | 45228/371472 [3:38:53<26:42:13, 3.39it/s] 12%|█▏ | 45229/371472 [3:38:53<26:10:20, 3.46it/s] 12%|█▏ | 45230/371472 [3:38:53<26:34:54, 3.41it/s] 12%|█▏ | 45231/371472 [3:38:54<25:45:11, 3.52it/s] 12%|█▏ | 45232/371472 [3:38:54<25:29:19, 3.56it/s] 12%|█▏ | 45233/371472 [3:38:54<25:18:26, 3.58it/s] 12%|█▏ | 45234/371472 [3:38:55<27:22:55, 3.31it/s] 12%|█▏ | 45235/371472 [3:38:55<26:10:32, 3.46it/s] 12%|█▏ | 45236/371472 [3:38:55<25:25:00, 3.57it/s] 12%|█▏ | 45237/371472 [3:38:55<25:54:05, 3.50it/s] 12%|█▏ | 45238/371472 [3:38:56<25:25:40, 3.56it/s] 12%|█▏ | 45239/371472 [3:38:56<25:01:53, 3.62it/s] 12%|█▏ | 45240/371472 [3:38:56<24:16:36, 3.73it/s] {'loss': 4.3069, 'learning_rate': 8.908185912215302e-07, 'epoch': 1.95} + 12%|█▏ | 45240/371472 [3:38:56<24:16:36, 3.73it/s] 12%|█▏ | 45241/371472 [3:38:56<24:31:00, 3.70it/s] 12%|█▏ | 45242/371472 [3:38:57<24:36:07, 3.68it/s] 12%|█▏ | 45243/371472 [3:38:57<24:32:39, 3.69it/s] 12%|█▏ | 45244/371472 [3:38:57<25:13:57, 3.59it/s] 12%|█▏ | 45245/371472 [3:38:58<27:09:36, 3.34it/s] 12%|█▏ | 45246/371472 [3:38:58<26:19:48, 3.44it/s] 12%|█▏ | 45247/371472 [3:38:58<25:44:19, 3.52it/s] 12%|█▏ | 45248/371472 [3:38:58<25:34:43, 3.54it/s] 12%|█▏ | 45249/371472 [3:38:59<24:46:41, 3.66it/s] 12%|█▏ | 45250/371472 [3:38:59<23:54:09, 3.79it/s] 12%|█▏ | 45251/371472 [3:38:59<24:06:42, 3.76it/s] 12%|█▏ | 45252/371472 [3:38:59<25:16:51, 3.58it/s] 12%|█▏ | 45253/371472 [3:39:00<26:02:35, 3.48it/s] 12%|█▏ | 45254/371472 [3:39:00<25:24:02, 3.57it/s] 12%|█▏ | 45255/371472 [3:39:00<25:48:43, 3.51it/s] 12%|█▏ | 45256/371472 [3:39:01<25:43:57, 3.52it/s] 12%|█▏ | 45257/371472 [3:39:01<28:16:06, 3.21it/s] 12%|█▏ | 45258/371472 [3:39:01<28:19:46, 3.20it/s] 12%|█▏ | 45259/371472 [3:39:02<26:41:36, 3.39it/s] 12%|█▏ | 45260/371472 [3:39:02<28:33:38, 3.17it/s] {'loss': 4.3347, 'learning_rate': 8.907701092460514e-07, 'epoch': 1.95} + 12%|█▏ | 45260/371472 [3:39:02<28:33:38, 3.17it/s] 12%|█▏ | 45261/371472 [3:39:02<27:26:50, 3.30it/s] 12%|█▏ | 45262/371472 [3:39:03<28:11:52, 3.21it/s] 12%|█▏ | 45263/371472 [3:39:03<26:50:42, 3.38it/s] 12%|█▏ | 45264/371472 [3:39:03<26:04:12, 3.48it/s] 12%|█▏ | 45265/371472 [3:39:03<25:18:37, 3.58it/s] 12%|█▏ | 45266/371472 [3:39:04<26:01:19, 3.48it/s] 12%|█▏ | 45267/371472 [3:39:04<25:08:15, 3.60it/s] 12%|█▏ | 45268/371472 [3:39:04<24:54:55, 3.64it/s] 12%|█▏ | 45269/371472 [3:39:04<24:12:40, 3.74it/s] 12%|█▏ | 45270/371472 [3:39:05<24:30:53, 3.70it/s] 12%|█▏ | 45271/371472 [3:39:05<24:46:54, 3.66it/s] 12%|█▏ | 45272/371472 [3:39:05<24:08:33, 3.75it/s] 12%|█▏ | 45273/371472 [3:39:06<24:24:12, 3.71it/s] 12%|█▏ | 45274/371472 [3:39:06<25:41:56, 3.53it/s] 12%|█▏ | 45275/371472 [3:39:06<25:50:03, 3.51it/s] 12%|█▏ | 45276/371472 [3:39:06<25:10:27, 3.60it/s] 12%|█▏ | 45277/371472 [3:39:07<24:32:55, 3.69it/s] 12%|█▏ | 45278/371472 [3:39:07<24:37:56, 3.68it/s] 12%|█▏ | 45279/371472 [3:39:07<24:21:33, 3.72it/s] 12%|█▏ | 45280/371472 [3:39:07<25:44:38, 3.52it/s] {'loss': 4.3836, 'learning_rate': 8.907216272705725e-07, 'epoch': 1.95} + 12%|█▏ | 45280/371472 [3:39:07<25:44:38, 3.52it/s] 12%|█▏ | 45281/371472 [3:39:08<28:07:26, 3.22it/s] 12%|█▏ | 45282/371472 [3:39:08<27:51:18, 3.25it/s] 12%|█▏ | 45283/371472 [3:39:08<26:41:37, 3.39it/s] 12%|█▏ | 45284/371472 [3:39:09<26:19:11, 3.44it/s] 12%|█▏ | 45285/371472 [3:39:09<25:21:44, 3.57it/s] 12%|█▏ | 45286/371472 [3:39:09<24:36:31, 3.68it/s] 12%|█▏ | 45287/371472 [3:39:09<24:47:13, 3.66it/s] 12%|█▏ | 45288/371472 [3:39:10<25:16:10, 3.59it/s] 12%|█▏ | 45289/371472 [3:39:10<25:30:17, 3.55it/s] 12%|█▏ | 45290/371472 [3:39:10<28:03:16, 3.23it/s] 12%|█▏ | 45291/371472 [3:39:11<29:42:03, 3.05it/s] 12%|█▏ | 45292/371472 [3:39:11<27:58:49, 3.24it/s] 12%|█▏ | 45293/371472 [3:39:11<28:15:02, 3.21it/s] 12%|█▏ | 45294/371472 [3:39:12<26:52:59, 3.37it/s] 12%|█▏ | 45295/371472 [3:39:12<27:48:56, 3.26it/s] 12%|█▏ | 45296/371472 [3:39:12<26:36:19, 3.41it/s] 12%|█▏ | 45297/371472 [3:39:13<26:41:21, 3.39it/s] 12%|█▏ | 45298/371472 [3:39:13<25:39:31, 3.53it/s] 12%|█▏ | 45299/371472 [3:39:13<25:39:22, 3.53it/s] 12%|█▏ | 45300/371472 [3:39:13<25:41:21, 3.53it/s] {'loss': 4.2522, 'learning_rate': 8.906731452950936e-07, 'epoch': 1.95} + 12%|█▏ | 45300/371472 [3:39:13<25:41:21, 3.53it/s] 12%|█▏ | 45301/371472 [3:39:14<25:24:11, 3.57it/s] 12%|█▏ | 45302/371472 [3:39:14<25:51:09, 3.50it/s] 12%|█▏ | 45303/371472 [3:39:14<25:21:07, 3.57it/s] 12%|█▏ | 45304/371472 [3:39:14<24:46:02, 3.66it/s] 12%|█▏ | 45305/371472 [3:39:15<24:41:07, 3.67it/s] 12%|█▏ | 45306/371472 [3:39:15<24:19:38, 3.72it/s] 12%|█▏ | 45307/371472 [3:39:15<25:34:29, 3.54it/s] 12%|█▏ | 45308/371472 [3:39:16<25:25:16, 3.56it/s] 12%|█▏ | 45309/371472 [3:39:16<25:15:29, 3.59it/s] 12%|█▏ | 45310/371472 [3:39:16<24:52:09, 3.64it/s] 12%|█▏ | 45311/371472 [3:39:16<25:47:16, 3.51it/s] 12%|█▏ | 45312/371472 [3:39:17<25:59:21, 3.49it/s] 12%|█▏ | 45313/371472 [3:39:17<26:06:38, 3.47it/s] 12%|█▏ | 45314/371472 [3:39:17<26:18:53, 3.44it/s] 12%|█▏ | 45315/371472 [3:39:18<26:26:42, 3.43it/s] 12%|█▏ | 45316/371472 [3:39:18<25:39:31, 3.53it/s] 12%|█▏ | 45317/371472 [3:39:18<27:31:41, 3.29it/s] 12%|█▏ | 45318/371472 [3:39:18<26:25:41, 3.43it/s] 12%|█▏ | 45319/371472 [3:39:19<25:33:37, 3.54it/s] 12%|█▏ | 45320/371472 [3:39:19<25:22:07, 3.57it/s] {'loss': 4.1439, 'learning_rate': 8.906246633196146e-07, 'epoch': 1.95} + 12%|█▏ | 45320/371472 [3:39:19<25:22:07, 3.57it/s] 12%|█▏ | 45321/371472 [3:39:19<25:56:17, 3.49it/s] 12%|█▏ | 45322/371472 [3:39:20<26:35:38, 3.41it/s] 12%|█▏ | 45323/371472 [3:39:20<26:28:01, 3.42it/s] 12%|█▏ | 45324/371472 [3:39:20<28:27:31, 3.18it/s] 12%|█▏ | 45325/371472 [3:39:21<27:16:08, 3.32it/s] 12%|█▏ | 45326/371472 [3:39:21<27:00:39, 3.35it/s] 12%|█▏ | 45327/371472 [3:39:21<25:47:11, 3.51it/s] 12%|█▏ | 45328/371472 [3:39:21<25:50:26, 3.51it/s] 12%|█▏ | 45329/371472 [3:39:22<25:34:16, 3.54it/s] 12%|█▏ | 45330/371472 [3:39:22<25:25:09, 3.56it/s] 12%|█▏ | 45331/371472 [3:39:22<25:23:31, 3.57it/s] 12%|█▏ | 45332/371472 [3:39:23<25:42:52, 3.52it/s] 12%|█▏ | 45333/371472 [3:39:23<25:30:00, 3.55it/s] 12%|█▏ | 45334/371472 [3:39:23<25:09:37, 3.60it/s] 12%|█▏ | 45335/371472 [3:39:23<25:59:55, 3.48it/s] 12%|█▏ | 45336/371472 [3:39:24<26:14:41, 3.45it/s] 12%|█▏ | 45337/371472 [3:39:24<25:41:08, 3.53it/s] 12%|█▏ | 45338/371472 [3:39:24<25:31:00, 3.55it/s] 12%|█▏ | 45339/371472 [3:39:24<25:47:18, 3.51it/s] 12%|█▏ | 45340/371472 [3:39:25<26:37:45, 3.40it/s] {'loss': 4.3673, 'learning_rate': 8.905761813441358e-07, 'epoch': 1.95} + 12%|█▏ | 45340/371472 [3:39:25<26:37:45, 3.40it/s] 12%|█▏ | 45341/371472 [3:39:25<27:54:56, 3.25it/s] 12%|█▏ | 45342/371472 [3:39:25<28:20:11, 3.20it/s] 12%|█▏ | 45343/371472 [3:39:26<28:11:10, 3.21it/s] 12%|█▏ | 45344/371472 [3:39:26<26:56:03, 3.36it/s] 12%|█▏ | 45345/371472 [3:39:26<26:08:37, 3.47it/s] 12%|█▏ | 45346/371472 [3:39:27<25:30:43, 3.55it/s] 12%|█▏ | 45347/371472 [3:39:27<24:44:03, 3.66it/s] 12%|█▏ | 45348/371472 [3:39:27<24:50:35, 3.65it/s] 12%|█▏ | 45349/371472 [3:39:27<25:16:09, 3.58it/s] 12%|█▏ | 45350/371472 [3:39:28<26:25:48, 3.43it/s] 12%|█▏ | 45351/371472 [3:39:28<26:02:07, 3.48it/s] 12%|█▏ | 45352/371472 [3:39:28<25:27:50, 3.56it/s] 12%|█▏ | 45353/371472 [3:39:29<24:33:22, 3.69it/s] 12%|█▏ | 45354/371472 [3:39:29<24:24:36, 3.71it/s] 12%|█▏ | 45355/371472 [3:39:29<25:56:01, 3.49it/s] 12%|█▏ | 45356/371472 [3:39:29<25:43:12, 3.52it/s] 12%|█▏ | 45357/371472 [3:39:30<25:37:39, 3.53it/s] 12%|█▏ | 45358/371472 [3:39:30<25:17:37, 3.58it/s] 12%|█▏ | 45359/371472 [3:39:30<25:22:51, 3.57it/s] 12%|█▏ | 45360/371472 [3:39:31<26:17:01, 3.45it/s] {'loss': 4.3393, 'learning_rate': 8.905276993686569e-07, 'epoch': 1.95} + 12%|█▏ | 45360/371472 [3:39:31<26:17:01, 3.45it/s] 12%|█▏ | 45361/371472 [3:39:31<25:11:54, 3.59it/s] 12%|█▏ | 45362/371472 [3:39:31<25:10:14, 3.60it/s] 12%|█▏ | 45363/371472 [3:39:31<24:28:02, 3.70it/s] 12%|█▏ | 45364/371472 [3:39:32<24:02:44, 3.77it/s] 12%|█▏ | 45365/371472 [3:39:32<24:01:34, 3.77it/s] 12%|█▏ | 45366/371472 [3:39:32<25:11:00, 3.60it/s] 12%|█▏ | 45367/371472 [3:39:32<25:22:25, 3.57it/s] 12%|█▏ | 45368/371472 [3:39:33<25:42:23, 3.52it/s] 12%|█▏ | 45369/371472 [3:39:33<25:16:04, 3.58it/s] 12%|█▏ | 45370/371472 [3:39:33<24:47:40, 3.65it/s] 12%|█▏ | 45371/371472 [3:39:34<24:49:26, 3.65it/s] 12%|█▏ | 45372/371472 [3:39:34<24:17:06, 3.73it/s] 12%|█▏ | 45373/371472 [3:39:34<24:41:26, 3.67it/s] 12%|█▏ | 45374/371472 [3:39:34<25:10:49, 3.60it/s] 12%|█▏ | 45375/371472 [3:39:35<25:32:15, 3.55it/s] 12%|█▏ | 45376/371472 [3:39:35<26:32:39, 3.41it/s] 12%|█▏ | 45377/371472 [3:39:35<25:57:24, 3.49it/s] 12%|█▏ | 45378/371472 [3:39:35<25:26:24, 3.56it/s] 12%|█▏ | 45379/371472 [3:39:36<24:36:14, 3.68it/s] 12%|█▏ | 45380/371472 [3:39:36<25:38:32, 3.53it/s] {'loss': 4.4877, 'learning_rate': 8.904792173931781e-07, 'epoch': 1.95} + 12%|█▏ | 45380/371472 [3:39:36<25:38:32, 3.53it/s] 12%|█▏ | 45381/371472 [3:39:36<25:27:20, 3.56it/s] 12%|█▏ | 45382/371472 [3:39:37<25:55:43, 3.49it/s] 12%|█▏ | 45383/371472 [3:39:37<27:00:24, 3.35it/s] 12%|█▏ | 45384/371472 [3:39:37<27:46:16, 3.26it/s] 12%|█▏ | 45385/371472 [3:39:38<27:09:07, 3.34it/s] 12%|█▏ | 45386/371472 [3:39:38<26:00:02, 3.48it/s] 12%|█▏ | 45387/371472 [3:39:38<26:55:47, 3.36it/s] 12%|█▏ | 45388/371472 [3:39:38<26:18:49, 3.44it/s] 12%|█▏ | 45389/371472 [3:39:39<26:07:17, 3.47it/s] 12%|█▏ | 45390/371472 [3:39:39<25:44:21, 3.52it/s] 12%|█▏ | 45391/371472 [3:39:39<26:21:37, 3.44it/s] 12%|█▏ | 45392/371472 [3:39:40<25:14:51, 3.59it/s] 12%|█▏ | 45393/371472 [3:39:40<24:17:50, 3.73it/s] 12%|█▏ | 45394/371472 [3:39:40<26:55:13, 3.36it/s] 12%|█▏ | 45395/371472 [3:39:40<27:14:25, 3.33it/s] 12%|█▏ | 45396/371472 [3:39:41<26:29:30, 3.42it/s] 12%|█▏ | 45397/371472 [3:39:41<25:49:38, 3.51it/s] 12%|█▏ | 45398/371472 [3:39:41<26:30:19, 3.42it/s] 12%|█▏ | 45399/371472 [3:39:42<26:05:44, 3.47it/s] 12%|█▏ | 45400/371472 [3:39:42<25:43:28, 3.52it/s] {'loss': 4.6196, 'learning_rate': 8.904307354176991e-07, 'epoch': 1.96} + 12%|█▏ | 45400/371472 [3:39:42<25:43:28, 3.52it/s] 12%|█▏ | 45401/371472 [3:39:42<25:22:22, 3.57it/s] 12%|█▏ | 45402/371472 [3:39:42<26:04:21, 3.47it/s] 12%|█▏ | 45403/371472 [3:39:43<25:37:32, 3.53it/s] 12%|█▏ | 45404/371472 [3:39:43<27:38:40, 3.28it/s] 12%|█▏ | 45405/371472 [3:39:43<27:31:42, 3.29it/s] 12%|█▏ | 45406/371472 [3:39:44<26:37:24, 3.40it/s] 12%|█▏ | 45407/371472 [3:39:44<27:02:41, 3.35it/s] 12%|█▏ | 45408/371472 [3:39:44<26:41:06, 3.39it/s] 12%|█▏ | 45409/371472 [3:39:44<26:04:11, 3.47it/s] 12%|█▏ | 45410/371472 [3:39:45<26:03:23, 3.48it/s] 12%|█▏ | 45411/371472 [3:39:45<26:28:49, 3.42it/s] 12%|█▏ | 45412/371472 [3:39:45<25:57:59, 3.49it/s] 12%|█▏ | 45413/371472 [3:39:46<25:11:57, 3.59it/s] 12%|█▏ | 45414/371472 [3:39:46<25:48:57, 3.51it/s] 12%|█▏ | 45415/371472 [3:39:46<25:23:18, 3.57it/s] 12%|█▏ | 45416/371472 [3:39:46<25:57:35, 3.49it/s] 12%|█▏ | 45417/371472 [3:39:47<25:58:54, 3.49it/s] 12%|█▏ | 45418/371472 [3:39:47<25:33:10, 3.54it/s] 12%|█▏ | 45419/371472 [3:39:47<25:15:07, 3.59it/s] 12%|█▏ | 45420/371472 [3:39:48<25:00:03, 3.62it/s] {'loss': 4.3089, 'learning_rate': 8.903822534422203e-07, 'epoch': 1.96} + 12%|█▏ | 45420/371472 [3:39:48<25:00:03, 3.62it/s] 12%|█▏ | 45421/371472 [3:39:48<24:19:20, 3.72it/s] 12%|█▏ | 45422/371472 [3:39:48<24:26:59, 3.70it/s] 12%|█▏ | 45423/371472 [3:39:48<24:09:06, 3.75it/s] 12%|█▏ | 45424/371472 [3:39:49<26:46:14, 3.38it/s] 12%|█▏ | 45425/371472 [3:39:49<28:08:30, 3.22it/s] 12%|█▏ | 45426/371472 [3:39:49<27:31:50, 3.29it/s] 12%|█▏ | 45427/371472 [3:39:50<27:01:49, 3.35it/s] 12%|█▏ | 45428/371472 [3:39:50<26:36:02, 3.40it/s] 12%|█▏ | 45429/371472 [3:39:50<26:01:55, 3.48it/s] 12%|█▏ | 45430/371472 [3:39:50<25:44:20, 3.52it/s] 12%|█▏ | 45431/371472 [3:39:51<25:44:17, 3.52it/s] 12%|█▏ | 45432/371472 [3:39:51<25:49:37, 3.51it/s] 12%|█▏ | 45433/371472 [3:39:51<25:10:01, 3.60it/s] 12%|█▏ | 45434/371472 [3:39:52<25:21:53, 3.57it/s] 12%|█▏ | 45435/371472 [3:39:52<25:06:03, 3.61it/s] 12%|█▏ | 45436/371472 [3:39:52<27:28:41, 3.30it/s] 12%|█▏ | 45437/371472 [3:39:53<27:17:09, 3.32it/s] 12%|█▏ | 45438/371472 [3:39:53<25:54:34, 3.50it/s] 12%|█▏ | 45439/371472 [3:39:53<25:09:57, 3.60it/s] 12%|█▏ | 45440/371472 [3:39:53<25:44:36, 3.52it/s] {'loss': 4.363, 'learning_rate': 8.903337714667413e-07, 'epoch': 1.96} + 12%|█▏ | 45440/371472 [3:39:53<25:44:36, 3.52it/s] 12%|█▏ | 45441/371472 [3:39:54<26:05:27, 3.47it/s] 12%|█▏ | 45442/371472 [3:39:54<27:33:58, 3.29it/s] 12%|█▏ | 45443/371472 [3:39:54<26:32:51, 3.41it/s] 12%|█▏ | 45444/371472 [3:39:55<26:06:47, 3.47it/s] 12%|█▏ | 45445/371472 [3:39:55<27:02:11, 3.35it/s] 12%|█▏ | 45446/371472 [3:39:55<25:43:36, 3.52it/s] 12%|█▏ | 45447/371472 [3:39:55<26:02:05, 3.48it/s] 12%|█▏ | 45448/371472 [3:39:56<24:47:43, 3.65it/s] 12%|█▏ | 45449/371472 [3:39:56<25:48:34, 3.51it/s] 12%|█▏ | 45450/371472 [3:39:56<25:41:49, 3.52it/s] 12%|█▏ | 45451/371472 [3:39:57<25:35:17, 3.54it/s] 12%|█▏ | 45452/371472 [3:39:57<25:13:01, 3.59it/s] 12%|█▏ | 45453/371472 [3:39:57<24:51:22, 3.64it/s] 12%|█▏ | 45454/371472 [3:39:57<25:04:22, 3.61it/s] 12%|█▏ | 45455/371472 [3:39:58<25:28:11, 3.56it/s] 12%|█▏ | 45456/371472 [3:39:58<24:49:05, 3.65it/s] 12%|█▏ | 45457/371472 [3:39:58<24:35:05, 3.68it/s] 12%|█▏ | 45458/371472 [3:39:58<25:23:27, 3.57it/s] 12%|█▏ | 45459/371472 [3:39:59<24:56:31, 3.63it/s] 12%|█▏ | 45460/371472 [3:39:59<24:54:26, 3.64it/s] {'loss': 4.329, 'learning_rate': 8.902852894912624e-07, 'epoch': 1.96} + 12%|█▏ | 45460/371472 [3:39:59<24:54:26, 3.64it/s] 12%|█▏ | 45461/371472 [3:39:59<26:24:19, 3.43it/s] 12%|█▏ | 45462/371472 [3:40:00<25:17:57, 3.58it/s] 12%|█▏ | 45463/371472 [3:40:00<24:55:14, 3.63it/s] 12%|█▏ | 45464/371472 [3:40:00<24:24:27, 3.71it/s] 12%|█▏ | 45465/371472 [3:40:00<24:30:45, 3.69it/s] 12%|█▏ | 45466/371472 [3:40:01<24:34:26, 3.69it/s] 12%|█▏ | 45467/371472 [3:40:01<26:34:42, 3.41it/s] 12%|█▏ | 45468/371472 [3:40:01<26:54:54, 3.36it/s] 12%|█▏ | 45469/371472 [3:40:02<25:50:36, 3.50it/s] 12%|█▏ | 45470/371472 [3:40:02<25:45:06, 3.52it/s] 12%|█▏ | 45471/371472 [3:40:02<25:55:32, 3.49it/s] 12%|█▏ | 45472/371472 [3:40:02<25:41:51, 3.52it/s] 12%|█▏ | 45473/371472 [3:40:03<26:05:24, 3.47it/s] 12%|█▏ | 45474/371472 [3:40:03<26:25:53, 3.43it/s] 12%|█▏ | 45475/371472 [3:40:03<27:29:53, 3.29it/s] 12%|█▏ | 45476/371472 [3:40:04<27:07:10, 3.34it/s] 12%|█▏ | 45477/371472 [3:40:04<28:37:27, 3.16it/s] 12%|█▏ | 45478/371472 [3:40:04<28:29:14, 3.18it/s] 12%|█▏ | 45479/371472 [3:40:05<27:17:59, 3.32it/s] 12%|█▏ | 45480/371472 [3:40:05<26:07:21, 3.47it/s] {'loss': 4.4342, 'learning_rate': 8.902368075157835e-07, 'epoch': 1.96} + 12%|█▏ | 45480/371472 [3:40:05<26:07:21, 3.47it/s] 12%|█▏ | 45481/371472 [3:40:05<25:33:58, 3.54it/s] 12%|█▏ | 45482/371472 [3:40:05<28:16:17, 3.20it/s] 12%|█▏ | 45483/371472 [3:40:06<27:36:11, 3.28it/s] 12%|█▏ | 45484/371472 [3:40:06<26:44:44, 3.39it/s] 12%|█▏ | 45485/371472 [3:40:06<28:14:49, 3.21it/s] 12%|█▏ | 45486/371472 [3:40:07<26:51:19, 3.37it/s] 12%|█▏ | 45487/371472 [3:40:07<27:10:09, 3.33it/s] 12%|█▏ | 45488/371472 [3:40:07<26:24:29, 3.43it/s] 12%|█▏ | 45489/371472 [3:40:07<25:50:42, 3.50it/s] 12%|█▏ | 45490/371472 [3:40:08<26:43:37, 3.39it/s] 12%|█▏ | 45491/371472 [3:40:08<25:17:21, 3.58it/s] 12%|█▏ | 45492/371472 [3:40:08<25:27:43, 3.56it/s] 12%|█▏ | 45493/371472 [3:40:09<25:01:00, 3.62it/s] 12%|█▏ | 45494/371472 [3:40:09<28:10:26, 3.21it/s] 12%|█▏ | 45495/371472 [3:40:09<29:04:30, 3.11it/s] 12%|█▏ | 45496/371472 [3:40:10<29:57:24, 3.02it/s] 12%|█▏ | 45497/371472 [3:40:10<28:21:34, 3.19it/s] 12%|█▏ | 45498/371472 [3:40:10<28:13:56, 3.21it/s] 12%|█▏ | 45499/371472 [3:40:11<27:39:09, 3.27it/s] 12%|█▏ | 45500/371472 [3:40:11<26:53:04, 3.37it/s] {'loss': 4.4552, 'learning_rate': 8.901883255403046e-07, 'epoch': 1.96} + 12%|█▏ | 45500/371472 [3:40:11<26:53:04, 3.37it/s] 12%|█▏ | 45501/371472 [3:40:11<26:14:55, 3.45it/s] 12%|█▏ | 45502/371472 [3:40:11<25:37:43, 3.53it/s] 12%|█▏ | 45503/371472 [3:40:12<25:04:43, 3.61it/s] 12%|█▏ | 45504/371472 [3:40:12<27:48:41, 3.26it/s] 12%|█▏ | 45505/371472 [3:40:12<28:36:14, 3.17it/s] 12%|█▏ | 45506/371472 [3:40:13<27:47:04, 3.26it/s] 12%|█▏ | 45507/371472 [3:40:13<27:24:07, 3.30it/s] 12%|█▏ | 45508/371472 [3:40:13<27:09:55, 3.33it/s] 12%|█▏ | 45509/371472 [3:40:13<26:37:02, 3.40it/s] 12%|█▏ | 45510/371472 [3:40:14<26:07:59, 3.46it/s] 12%|█▏ | 45511/371472 [3:40:14<26:12:06, 3.46it/s] 12%|█▏ | 45512/371472 [3:40:14<25:34:21, 3.54it/s] 12%|█▏ | 45513/371472 [3:40:15<27:18:44, 3.32it/s] 12%|█▏ | 45514/371472 [3:40:15<26:42:26, 3.39it/s] 12%|█▏ | 45515/371472 [3:40:15<27:24:18, 3.30it/s] 12%|█▏ | 45516/371472 [3:40:16<26:31:11, 3.41it/s] 12%|█▏ | 45517/371472 [3:40:16<26:28:04, 3.42it/s] 12%|█▏ | 45518/371472 [3:40:16<25:19:38, 3.57it/s] 12%|█▏ | 45519/371472 [3:40:16<25:49:31, 3.51it/s] 12%|█▏ | 45520/371472 [3:40:17<25:03:32, 3.61it/s] {'loss': 4.3011, 'learning_rate': 8.901398435648258e-07, 'epoch': 1.96} + 12%|█▏ | 45520/371472 [3:40:17<25:03:32, 3.61it/s] 12%|█▏ | 45521/371472 [3:40:17<25:06:32, 3.61it/s] 12%|█▏ | 45522/371472 [3:40:17<25:25:54, 3.56it/s] 12%|█▏ | 45523/371472 [3:40:17<25:31:10, 3.55it/s] 12%|█▏ | 45524/371472 [3:40:18<26:02:42, 3.48it/s] 12%|█▏ | 45525/371472 [3:40:18<26:13:04, 3.45it/s] 12%|█▏ | 45526/371472 [3:40:18<26:39:07, 3.40it/s] 12%|█▏ | 45527/371472 [3:40:19<29:11:59, 3.10it/s] 12%|█▏ | 45528/371472 [3:40:19<27:21:48, 3.31it/s] 12%|█▏ | 45529/371472 [3:40:19<26:30:21, 3.42it/s] 12%|█▏ | 45530/371472 [3:40:20<26:40:33, 3.39it/s] 12%|█▏ | 45531/371472 [3:40:20<25:55:02, 3.49it/s] 12%|█▏ | 45532/371472 [3:40:20<24:52:02, 3.64it/s] 12%|█▏ | 45533/371472 [3:40:20<24:25:11, 3.71it/s] 12%|█▏ | 45534/371472 [3:40:21<24:36:40, 3.68it/s] 12%|█▏ | 45535/371472 [3:40:21<24:47:51, 3.65it/s] 12%|█▏ | 45536/371472 [3:40:21<25:40:14, 3.53it/s] 12%|█▏ | 45537/371472 [3:40:22<25:31:36, 3.55it/s] 12%|█▏ | 45538/371472 [3:40:22<24:37:54, 3.68it/s] 12%|█▏ | 45539/371472 [3:40:22<25:02:13, 3.62it/s] 12%|█▏ | 45540/371472 [3:40:22<25:15:46, 3.58it/s] {'loss': 4.3669, 'learning_rate': 8.900913615893468e-07, 'epoch': 1.96} + 12%|█▏ | 45540/371472 [3:40:22<25:15:46, 3.58it/s] 12%|█▏ | 45541/371472 [3:40:23<25:38:51, 3.53it/s] 12%|█▏ | 45542/371472 [3:40:23<26:47:50, 3.38it/s] 12%|█▏ | 45543/371472 [3:40:23<27:48:06, 3.26it/s] 12%|█▏ | 45544/371472 [3:40:24<26:28:58, 3.42it/s] 12%|█▏ | 45545/371472 [3:40:24<25:50:59, 3.50it/s] 12%|█▏ | 45546/371472 [3:40:24<26:18:45, 3.44it/s] 12%|█▏ | 45547/371472 [3:40:24<25:31:58, 3.55it/s] 12%|█▏ | 45548/371472 [3:40:25<26:27:12, 3.42it/s] 12%|█▏ | 45549/371472 [3:40:25<26:01:41, 3.48it/s] 12%|█▏ | 45550/371472 [3:40:25<26:48:28, 3.38it/s] 12%|█▏ | 45551/371472 [3:40:26<26:07:33, 3.47it/s] 12%|█▏ | 45552/371472 [3:40:26<25:44:27, 3.52it/s] 12%|█▏ | 45553/371472 [3:40:26<26:05:47, 3.47it/s] 12%|█▏ | 45554/371472 [3:40:26<26:38:14, 3.40it/s] 12%|█▏ | 45555/371472 [3:40:27<25:44:23, 3.52it/s] 12%|█▏ | 45556/371472 [3:40:27<26:00:23, 3.48it/s] 12%|█▏ | 45557/371472 [3:40:27<27:13:24, 3.33it/s] 12%|█▏ | 45558/371472 [3:40:28<26:31:59, 3.41it/s] 12%|█▏ | 45559/371472 [3:40:28<30:45:19, 2.94it/s] 12%|█▏ | 45560/371472 [3:40:28<29:22:08, 3.08it/s] {'loss': 4.2394, 'learning_rate': 8.900428796138679e-07, 'epoch': 1.96} + 12%|█▏ | 45560/371472 [3:40:28<29:22:08, 3.08it/s] 12%|█▏ | 45561/371472 [3:40:29<27:39:01, 3.27it/s] 12%|█▏ | 45562/371472 [3:40:29<28:19:24, 3.20it/s] 12%|█▏ | 45563/371472 [3:40:29<27:24:11, 3.30it/s] 12%|█▏ | 45564/371472 [3:40:29<26:33:27, 3.41it/s] 12%|█▏ | 45565/371472 [3:40:30<28:50:29, 3.14it/s] 12%|█▏ | 45566/371472 [3:40:30<27:29:28, 3.29it/s] 12%|█▏ | 45567/371472 [3:40:30<27:27:38, 3.30it/s] 12%|█▏ | 45568/371472 [3:40:31<26:12:09, 3.45it/s] 12%|█▏ | 45569/371472 [3:40:31<25:56:46, 3.49it/s] 12%|█▏ | 45570/371472 [3:40:31<31:23:20, 2.88it/s] 12%|█▏ | 45571/371472 [3:40:32<29:26:27, 3.07it/s] 12%|█▏ | 45572/371472 [3:40:32<27:45:38, 3.26it/s] 12%|█▏ | 45573/371472 [3:40:32<26:26:00, 3.42it/s] 12%|█▏ | 45574/371472 [3:40:33<27:45:32, 3.26it/s] 12%|█▏ | 45575/371472 [3:40:33<26:51:32, 3.37it/s] 12%|█▏ | 45576/371472 [3:40:33<26:44:55, 3.38it/s] 12%|█▏ | 45577/371472 [3:40:33<27:04:33, 3.34it/s] 12%|█▏ | 45578/371472 [3:40:34<25:53:56, 3.50it/s] 12%|█▏ | 45579/371472 [3:40:34<27:10:46, 3.33it/s] 12%|█▏ | 45580/371472 [3:40:34<26:26:57, 3.42it/s] {'loss': 4.2049, 'learning_rate': 8.89994397638389e-07, 'epoch': 1.96} + 12%|█▏ | 45580/371472 [3:40:34<26:26:57, 3.42it/s] 12%|█▏ | 45581/371472 [3:40:35<25:49:23, 3.51it/s] 12%|█▏ | 45582/371472 [3:40:35<29:31:12, 3.07it/s] 12%|█▏ | 45583/371472 [3:40:35<28:12:50, 3.21it/s] 12%|█▏ | 45584/371472 [3:40:36<27:43:12, 3.27it/s] 12%|█▏ | 45585/371472 [3:40:36<27:45:21, 3.26it/s] 12%|█▏ | 45586/371472 [3:40:36<27:11:18, 3.33it/s] 12%|█▏ | 45587/371472 [3:40:36<25:52:55, 3.50it/s] 12%|█▏ | 45588/371472 [3:40:37<25:18:46, 3.58it/s] 12%|█▏ | 45589/371472 [3:40:37<25:19:03, 3.58it/s] 12%|█▏ | 45590/371472 [3:40:37<25:11:47, 3.59it/s] 12%|█▏ | 45591/371472 [3:40:38<24:50:00, 3.65it/s] 12%|█▏ | 45592/371472 [3:40:38<25:07:07, 3.60it/s] 12%|█▏ | 45593/371472 [3:40:38<24:37:11, 3.68it/s] 12%|█▏ | 45594/371472 [3:40:38<24:29:31, 3.70it/s] 12%|█▏ | 45595/371472 [3:40:39<25:51:01, 3.50it/s] 12%|█▏ | 45596/371472 [3:40:39<25:40:07, 3.53it/s] 12%|█▏ | 45597/371472 [3:40:39<25:35:54, 3.54it/s] 12%|█▏ | 45598/371472 [3:40:40<26:20:46, 3.44it/s] 12%|█▏ | 45599/371472 [3:40:40<25:37:43, 3.53it/s] 12%|█▏ | 45600/371472 [3:40:40<25:01:47, 3.62it/s] {'loss': 4.2717, 'learning_rate': 8.899459156629102e-07, 'epoch': 1.96} + 12%|█▏ | 45600/371472 [3:40:40<25:01:47, 3.62it/s] 12%|█▏ | 45601/371472 [3:40:40<24:39:11, 3.67it/s] 12%|█▏ | 45602/371472 [3:40:41<24:28:01, 3.70it/s] 12%|█▏ | 45603/371472 [3:40:41<24:20:59, 3.72it/s] 12%|█▏ | 45604/371472 [3:40:41<26:59:36, 3.35it/s] 12%|█▏ | 45605/371472 [3:40:41<26:19:28, 3.44it/s] 12%|█▏ | 45606/371472 [3:40:42<25:48:14, 3.51it/s] 12%|█▏ | 45607/371472 [3:40:42<25:55:37, 3.49it/s] 12%|█▏ | 45608/371472 [3:40:42<26:55:28, 3.36it/s] 12%|█▏ | 45609/371472 [3:40:43<26:05:03, 3.47it/s] 12%|█▏ | 45610/371472 [3:40:43<29:33:42, 3.06it/s] 12%|█▏ | 45611/371472 [3:40:43<28:44:03, 3.15it/s] 12%|█▏ | 45612/371472 [3:40:44<28:19:20, 3.20it/s] 12%|█▏ | 45613/371472 [3:40:44<28:21:19, 3.19it/s] 12%|█▏ | 45614/371472 [3:40:44<27:19:34, 3.31it/s] 12%|█▏ | 45615/371472 [3:40:44<25:59:28, 3.48it/s] 12%|█▏ | 45616/371472 [3:40:45<25:20:01, 3.57it/s] 12%|█▏ | 45617/371472 [3:40:45<24:27:29, 3.70it/s] 12%|█▏ | 45618/371472 [3:40:45<25:34:05, 3.54it/s] 12%|█▏ | 45619/371472 [3:40:46<25:22:17, 3.57it/s] 12%|█▏ | 45620/371472 [3:40:46<25:38:18, 3.53it/s] {'loss': 4.5402, 'learning_rate': 8.898974336874312e-07, 'epoch': 1.96} + 12%|█▏ | 45620/371472 [3:40:46<25:38:18, 3.53it/s] 12%|█▏ | 45621/371472 [3:40:46<25:19:21, 3.57it/s] 12%|█▏ | 45622/371472 [3:40:46<24:57:07, 3.63it/s] 12%|█▏ | 45623/371472 [3:40:47<25:12:53, 3.59it/s] 12%|█▏ | 45624/371472 [3:40:47<26:28:07, 3.42it/s] 12%|█▏ | 45625/371472 [3:40:47<25:54:00, 3.49it/s] 12%|█▏ | 45626/371472 [3:40:48<25:52:34, 3.50it/s] 12%|█▏ | 45627/371472 [3:40:48<25:44:09, 3.52it/s] 12%|█▏ | 45628/371472 [3:40:48<27:02:15, 3.35it/s] 12%|█▏ | 45629/371472 [3:40:49<27:31:10, 3.29it/s] 12%|█▏ | 45630/371472 [3:40:49<27:36:10, 3.28it/s] 12%|█▏ | 45631/371472 [3:40:49<27:23:56, 3.30it/s] 12%|█▏ | 45632/371472 [3:40:49<27:05:41, 3.34it/s] 12%|█▏ | 45633/371472 [3:40:50<26:42:10, 3.39it/s] 12%|█▏ | 45634/371472 [3:40:50<25:44:11, 3.52it/s] 12%|█▏ | 45635/371472 [3:40:50<25:07:03, 3.60it/s] 12%|█▏ | 45636/371472 [3:40:50<24:35:01, 3.68it/s] 12%|█▏ | 45637/371472 [3:40:51<25:26:15, 3.56it/s] 12%|█▏ | 45638/371472 [3:40:51<25:51:58, 3.50it/s] 12%|█▏ | 45639/371472 [3:40:51<25:30:11, 3.55it/s] 12%|█▏ | 45640/371472 [3:40:52<29:46:42, 3.04it/s] {'loss': 4.304, 'learning_rate': 8.898489517119524e-07, 'epoch': 1.97} + 12%|█▏ | 45640/371472 [3:40:52<29:46:42, 3.04it/s] 12%|█▏ | 45641/371472 [3:40:52<28:18:50, 3.20it/s] 12%|█▏ | 45642/371472 [3:40:52<27:49:17, 3.25it/s] 12%|█▏ | 45643/371472 [3:40:53<27:08:16, 3.34it/s] 12%|█▏ | 45644/371472 [3:40:53<28:32:10, 3.17it/s] 12%|█▏ | 45645/371472 [3:40:53<28:08:58, 3.22it/s] 12%|█▏ | 45646/371472 [3:40:54<26:50:02, 3.37it/s] 12%|█▏ | 45647/371472 [3:40:54<26:57:38, 3.36it/s] 12%|█▏ | 45648/371472 [3:40:54<25:37:36, 3.53it/s] 12%|█▏ | 45649/371472 [3:40:54<25:19:17, 3.57it/s] 12%|█▏ | 45650/371472 [3:40:55<25:54:25, 3.49it/s] 12%|█▏ | 45651/371472 [3:40:55<25:28:51, 3.55it/s] 12%|█▏ | 45652/371472 [3:40:55<26:58:20, 3.36it/s] 12%|█▏ | 45653/371472 [3:40:56<26:05:51, 3.47it/s] 12%|█▏ | 45654/371472 [3:40:56<31:43:28, 2.85it/s] 12%|█▏ | 45655/371472 [3:40:56<30:20:26, 2.98it/s] 12%|█▏ | 45656/371472 [3:40:57<28:25:15, 3.18it/s] 12%|█▏ | 45657/371472 [3:40:57<27:32:45, 3.29it/s] 12%|█▏ | 45658/371472 [3:40:57<26:36:52, 3.40it/s] 12%|█▏ | 45659/371472 [3:40:57<25:58:56, 3.48it/s] 12%|█▏ | 45660/371472 [3:40:58<25:23:35, 3.56it/s] {'loss': 4.3852, 'learning_rate': 8.898004697364735e-07, 'epoch': 1.97} + 12%|█▏ | 45660/371472 [3:40:58<25:23:35, 3.56it/s] 12%|█▏ | 45661/371472 [3:40:58<25:24:22, 3.56it/s] 12%|█▏ | 45662/371472 [3:40:58<27:15:40, 3.32it/s] 12%|█▏ | 45663/371472 [3:40:59<29:36:23, 3.06it/s] 12%|█▏ | 45664/371472 [3:40:59<32:28:33, 2.79it/s] 12%|█▏ | 45665/371472 [3:40:59<31:25:12, 2.88it/s] 12%|█▏ | 45666/371472 [3:41:00<29:07:06, 3.11it/s] 12%|█▏ | 45667/371472 [3:41:00<30:29:54, 2.97it/s] 12%|█▏ | 45668/371472 [3:41:00<29:07:14, 3.11it/s] 12%|█▏ | 45669/371472 [3:41:01<29:00:40, 3.12it/s] 12%|█▏ | 45670/371472 [3:41:01<27:33:31, 3.28it/s] 12%|█▏ | 45671/371472 [3:41:01<26:27:35, 3.42it/s] 12%|█▏ | 45672/371472 [3:41:02<26:25:43, 3.42it/s] 12%|█▏ | 45673/371472 [3:41:02<25:31:41, 3.55it/s] 12%|█▏ | 45674/371472 [3:41:02<25:19:38, 3.57it/s] 12%|█▏ | 45675/371472 [3:41:02<25:06:17, 3.60it/s] 12%|█▏ | 45676/371472 [3:41:03<24:14:48, 3.73it/s] 12%|█▏ | 45677/371472 [3:41:03<23:51:32, 3.79it/s] 12%|█▏ | 45678/371472 [3:41:03<23:32:21, 3.84it/s] 12%|█▏ | 45679/371472 [3:41:03<24:02:30, 3.76it/s] 12%|█▏ | 45680/371472 [3:41:04<23:36:55, 3.83it/s] {'loss': 4.3731, 'learning_rate': 8.897519877609947e-07, 'epoch': 1.97} + 12%|█▏ | 45680/371472 [3:41:04<23:36:55, 3.83it/s] 12%|█▏ | 45681/371472 [3:41:04<23:46:58, 3.81it/s] 12%|█▏ | 45682/371472 [3:41:04<25:41:46, 3.52it/s] 12%|█▏ | 45683/371472 [3:41:05<28:34:44, 3.17it/s] 12%|█▏ | 45684/371472 [3:41:05<28:22:28, 3.19it/s] 12%|█▏ | 45685/371472 [3:41:05<26:51:45, 3.37it/s] 12%|█▏ | 45686/371472 [3:41:06<27:55:22, 3.24it/s] 12%|█▏ | 45687/371472 [3:41:06<26:25:43, 3.42it/s] 12%|█▏ | 45688/371472 [3:41:06<25:53:38, 3.49it/s] 12%|█▏ | 45689/371472 [3:41:06<25:21:11, 3.57it/s] 12%|█▏ | 45690/371472 [3:41:07<27:06:50, 3.34it/s] 12%|█▏ | 45691/371472 [3:41:07<26:15:15, 3.45it/s] 12%|█▏ | 45692/371472 [3:41:07<25:08:27, 3.60it/s] 12%|█▏ | 45693/371472 [3:41:07<24:56:37, 3.63it/s] 12%|█▏ | 45694/371472 [3:41:08<24:47:27, 3.65it/s] 12%|█▏ | 45695/371472 [3:41:08<25:27:20, 3.55it/s] 12%|█▏ | 45696/371472 [3:41:08<26:06:29, 3.47it/s] 12%|█▏ | 45697/371472 [3:41:09<25:07:25, 3.60it/s] 12%|█▏ | 45698/371472 [3:41:09<25:25:24, 3.56it/s] 12%|█▏ | 45699/371472 [3:41:09<25:17:46, 3.58it/s] 12%|█▏ | 45700/371472 [3:41:09<25:51:23, 3.50it/s] {'loss': 4.3931, 'learning_rate': 8.897035057855156e-07, 'epoch': 1.97} + 12%|█▏ | 45700/371472 [3:41:09<25:51:23, 3.50it/s] 12%|█▏ | 45701/371472 [3:41:10<25:48:36, 3.51it/s] 12%|█▏ | 45702/371472 [3:41:10<25:10:04, 3.60it/s] 12%|█▏ | 45703/371472 [3:41:10<24:34:09, 3.68it/s] 12%|█▏ | 45704/371472 [3:41:10<24:02:48, 3.76it/s] 12%|█▏ | 45705/371472 [3:41:11<25:51:02, 3.50it/s] 12%|█▏ | 45706/371472 [3:41:11<26:56:53, 3.36it/s] 12%|█▏ | 45707/371472 [3:41:11<26:22:06, 3.43it/s] 12%|█▏ | 45708/371472 [3:41:12<25:28:27, 3.55it/s] 12%|█▏ | 45709/371472 [3:41:12<25:35:57, 3.53it/s] 12%|█▏ | 45710/371472 [3:41:12<27:20:10, 3.31it/s] 12%|█▏ | 45711/371472 [3:41:13<26:16:56, 3.44it/s] 12%|█▏ | 45712/371472 [3:41:13<25:14:27, 3.58it/s] 12%|█▏ | 45713/371472 [3:41:13<24:37:09, 3.68it/s] 12%|█▏ | 45714/371472 [3:41:13<24:15:38, 3.73it/s] 12%|█▏ | 45715/371472 [3:41:14<24:33:27, 3.68it/s] 12%|█▏ | 45716/371472 [3:41:14<24:47:01, 3.65it/s] 12%|█▏ | 45717/371472 [3:41:14<24:16:42, 3.73it/s] 12%|█▏ | 45718/371472 [3:41:14<24:58:23, 3.62it/s] 12%|█▏ | 45719/371472 [3:41:15<25:01:32, 3.62it/s] 12%|█▏ | 45720/371472 [3:41:15<25:12:35, 3.59it/s] {'loss': 4.2081, 'learning_rate': 8.896550238100368e-07, 'epoch': 1.97} + 12%|█▏ | 45720/371472 [3:41:15<25:12:35, 3.59it/s] 12%|█▏ | 45721/371472 [3:41:15<24:54:24, 3.63it/s] 12%|█▏ | 45722/371472 [3:41:16<26:40:57, 3.39it/s] 12%|█▏ | 45723/371472 [3:41:16<25:34:46, 3.54it/s] 12%|█▏ | 45724/371472 [3:41:16<26:13:42, 3.45it/s] 12%|█▏ | 45725/371472 [3:41:16<27:01:07, 3.35it/s] 12%|█▏ | 45726/371472 [3:41:17<25:47:20, 3.51it/s] 12%|█▏ | 45727/371472 [3:41:17<24:45:19, 3.66it/s] 12%|█▏ | 45728/371472 [3:41:17<24:52:06, 3.64it/s] 12%|█▏ | 45729/371472 [3:41:18<26:40:44, 3.39it/s] 12%|█▏ | 45730/371472 [3:41:18<27:00:35, 3.35it/s] 12%|█▏ | 45731/371472 [3:41:18<25:37:21, 3.53it/s] 12%|█▏ | 45732/371472 [3:41:18<25:05:23, 3.61it/s] 12%|█▏ | 45733/371472 [3:41:19<24:16:34, 3.73it/s] 12%|█▏ | 45734/371472 [3:41:19<24:37:03, 3.68it/s] 12%|█▏ | 45735/371472 [3:41:19<24:10:54, 3.74it/s] 12%|█▏ | 45736/371472 [3:41:19<24:32:29, 3.69it/s] 12%|█▏ | 45737/371472 [3:41:20<25:09:39, 3.60it/s] 12%|█▏ | 45738/371472 [3:41:20<25:52:04, 3.50it/s] 12%|█▏ | 45739/371472 [3:41:20<26:45:48, 3.38it/s] 12%|█▏ | 45740/371472 [3:41:21<25:43:57, 3.52it/s] {'loss': 4.1342, 'learning_rate': 8.896065418345579e-07, 'epoch': 1.97} + 12%|█▏ | 45740/371472 [3:41:21<25:43:57, 3.52it/s] 12%|█▏ | 45741/371472 [3:41:21<25:39:28, 3.53it/s] 12%|█▏ | 45742/371472 [3:41:21<26:13:34, 3.45it/s] 12%|█▏ | 45743/371472 [3:41:22<27:26:45, 3.30it/s] 12%|█▏ | 45744/371472 [3:41:22<27:24:32, 3.30it/s] 12%|█▏ | 45745/371472 [3:41:22<26:15:35, 3.45it/s] 12%|█▏ | 45746/371472 [3:41:22<25:10:11, 3.59it/s] 12%|█▏ | 45747/371472 [3:41:23<26:00:22, 3.48it/s] 12%|█▏ | 45748/371472 [3:41:23<25:52:44, 3.50it/s] 12%|█▏ | 45749/371472 [3:41:23<25:13:23, 3.59it/s] 12%|█▏ | 45750/371472 [3:41:24<24:44:19, 3.66it/s] 12%|█▏ | 45751/371472 [3:41:24<24:45:49, 3.65it/s] 12%|█▏ | 45752/371472 [3:41:24<25:14:25, 3.58it/s] 12%|█▏ | 45753/371472 [3:41:24<25:52:06, 3.50it/s] 12%|█▏ | 45754/371472 [3:41:25<26:09:47, 3.46it/s] 12%|█▏ | 45755/371472 [3:41:25<25:18:44, 3.57it/s] 12%|█▏ | 45756/371472 [3:41:25<26:17:24, 3.44it/s] 12%|█▏ | 45757/371472 [3:41:26<26:22:27, 3.43it/s] 12%|█▏ | 45758/371472 [3:41:26<25:55:23, 3.49it/s] 12%|█▏ | 45759/371472 [3:41:26<27:01:48, 3.35it/s] 12%|█▏ | 45760/371472 [3:41:26<26:14:04, 3.45it/s] {'loss': 4.3196, 'learning_rate': 8.895580598590791e-07, 'epoch': 1.97} + 12%|█▏ | 45760/371472 [3:41:26<26:14:04, 3.45it/s] 12%|█▏ | 45761/371472 [3:41:27<26:34:05, 3.41it/s] 12%|█▏ | 45762/371472 [3:41:27<26:52:20, 3.37it/s] 12%|█▏ | 45763/371472 [3:41:27<27:00:21, 3.35it/s] 12%|█▏ | 45764/371472 [3:41:28<26:03:10, 3.47it/s] 12%|█▏ | 45765/371472 [3:41:28<27:13:23, 3.32it/s] 12%|█▏ | 45766/371472 [3:41:28<27:40:57, 3.27it/s] 12%|█▏ | 45767/371472 [3:41:29<27:22:53, 3.30it/s] 12%|█▏ | 45768/371472 [3:41:29<27:29:45, 3.29it/s] 12%|█▏ | 45769/371472 [3:41:29<27:48:54, 3.25it/s] 12%|█▏ | 45770/371472 [3:41:29<26:26:32, 3.42it/s] 12%|█▏ | 45771/371472 [3:41:30<25:36:34, 3.53it/s] 12%|█▏ | 45772/371472 [3:41:30<25:04:33, 3.61it/s] 12%|█▏ | 45773/371472 [3:41:30<24:37:12, 3.67it/s] 12%|█▏ | 45774/371472 [3:41:30<24:03:30, 3.76it/s] 12%|█▏ | 45775/371472 [3:41:31<23:58:51, 3.77it/s] 12%|█▏ | 45776/371472 [3:41:31<24:02:14, 3.76it/s] 12%|█▏ | 45777/371472 [3:41:31<24:06:12, 3.75it/s] 12%|█▏ | 45778/371472 [3:41:32<24:20:00, 3.72it/s] 12%|█▏ | 45779/371472 [3:41:32<26:45:13, 3.38it/s] 12%|█▏ | 45780/371472 [3:41:32<27:34:43, 3.28it/s] {'loss': 4.2608, 'learning_rate': 8.895095778836001e-07, 'epoch': 1.97} + 12%|█▏ | 45780/371472 [3:41:32<27:34:43, 3.28it/s] 12%|█▏ | 45781/371472 [3:41:32<27:03:55, 3.34it/s] 12%|█▏ | 45782/371472 [3:41:33<26:37:30, 3.40it/s] 12%|█▏ | 45783/371472 [3:41:33<26:44:20, 3.38it/s] 12%|█▏ | 45784/371472 [3:41:33<27:00:11, 3.35it/s] 12%|█▏ | 45785/371472 [3:41:34<26:13:08, 3.45it/s] 12%|█▏ | 45786/371472 [3:41:34<26:33:53, 3.41it/s] 12%|█▏ | 45787/371472 [3:41:34<25:32:34, 3.54it/s] 12%|█▏ | 45788/371472 [3:41:35<25:43:12, 3.52it/s] 12%|█▏ | 45789/371472 [3:41:35<26:47:38, 3.38it/s] 12%|█▏ | 45790/371472 [3:41:35<26:09:11, 3.46it/s] 12%|█▏ | 45791/371472 [3:41:35<25:35:48, 3.53it/s] 12%|█▏ | 45792/371472 [3:41:36<26:50:03, 3.37it/s] 12%|█▏ | 45793/371472 [3:41:36<26:37:33, 3.40it/s] 12%|█▏ | 45794/371472 [3:41:36<26:35:54, 3.40it/s] 12%|█▏ | 45795/371472 [3:41:37<25:20:14, 3.57it/s] 12%|█▏ | 45796/371472 [3:41:37<26:59:53, 3.35it/s] 12%|█▏ | 45797/371472 [3:41:37<25:32:33, 3.54it/s] 12%|█▏ | 45798/371472 [3:41:37<24:25:08, 3.70it/s] 12%|█▏ | 45799/371472 [3:41:38<24:24:46, 3.71it/s] 12%|█▏ | 45800/371472 [3:41:38<23:47:44, 3.80it/s] {'loss': 4.2443, 'learning_rate': 8.894610959081213e-07, 'epoch': 1.97} + 12%|█▏ | 45800/371472 [3:41:38<23:47:44, 3.80it/s] 12%|█▏ | 45801/371472 [3:41:38<24:12:43, 3.74it/s] 12%|█▏ | 45802/371472 [3:41:38<25:30:44, 3.55it/s] 12%|█▏ | 45803/371472 [3:41:39<27:31:01, 3.29it/s] 12%|█▏ | 45804/371472 [3:41:39<26:17:21, 3.44it/s] 12%|█▏ | 45805/371472 [3:41:39<25:36:07, 3.53it/s] 12%|█▏ | 45806/371472 [3:41:40<26:46:58, 3.38it/s] 12%|█▏ | 45807/371472 [3:41:40<26:15:17, 3.45it/s] 12%|█▏ | 45808/371472 [3:41:40<25:53:03, 3.49it/s] 12%|█▏ | 45809/371472 [3:41:40<25:28:22, 3.55it/s] 12%|█▏ | 45810/371472 [3:41:41<25:43:47, 3.52it/s] 12%|█▏ | 45811/371472 [3:41:41<26:13:02, 3.45it/s] 12%|█▏ | 45812/371472 [3:41:41<26:18:54, 3.44it/s] 12%|█▏ | 45813/371472 [3:41:42<25:17:04, 3.58it/s] 12%|█▏ | 45814/371472 [3:41:42<25:47:35, 3.51it/s] 12%|█▏ | 45815/371472 [3:41:42<25:13:29, 3.59it/s] 12%|█▏ | 45816/371472 [3:41:43<26:12:45, 3.45it/s] 12%|█▏ | 45817/371472 [3:41:43<25:41:22, 3.52it/s] 12%|█▏ | 45818/371472 [3:41:43<25:33:45, 3.54it/s] 12%|█▏ | 45819/371472 [3:41:43<25:41:48, 3.52it/s] 12%|█▏ | 45820/371472 [3:41:44<25:40:15, 3.52it/s] {'loss': 4.5963, 'learning_rate': 8.894126139326422e-07, 'epoch': 1.97} + 12%|█▏ | 45820/371472 [3:41:44<25:40:15, 3.52it/s] 12%|█▏ | 45821/371472 [3:41:44<24:55:07, 3.63it/s] 12%|█▏ | 45822/371472 [3:41:44<24:44:43, 3.66it/s] 12%|█▏ | 45823/371472 [3:41:44<24:31:36, 3.69it/s] 12%|█▏ | 45824/371472 [3:41:45<24:49:34, 3.64it/s] 12%|█▏ | 45825/371472 [3:41:45<24:28:12, 3.70it/s] 12%|█▏ | 45826/371472 [3:41:45<25:20:20, 3.57it/s] 12%|█▏ | 45827/371472 [3:41:46<25:06:28, 3.60it/s] 12%|█▏ | 45828/371472 [3:41:46<27:07:40, 3.33it/s] 12%|█▏ | 45829/371472 [3:41:46<26:27:32, 3.42it/s] 12%|█▏ | 45830/371472 [3:41:46<27:08:55, 3.33it/s] 12%|█▏ | 45831/371472 [3:41:47<27:15:16, 3.32it/s] 12%|█▏ | 45832/371472 [3:41:47<27:32:50, 3.28it/s] 12%|█▏ | 45833/371472 [3:41:47<26:16:45, 3.44it/s] 12%|█▏ | 45834/371472 [3:41:48<26:27:02, 3.42it/s] 12%|█▏ | 45835/371472 [3:41:48<25:51:21, 3.50it/s] 12%|█▏ | 45836/371472 [3:41:48<25:53:09, 3.49it/s] 12%|█▏ | 45837/371472 [3:41:49<26:05:11, 3.47it/s] 12%|█▏ | 45838/371472 [3:41:49<25:57:37, 3.48it/s] 12%|█▏ | 45839/371472 [3:41:49<26:05:33, 3.47it/s] 12%|█▏ | 45840/371472 [3:41:49<25:55:28, 3.49it/s] {'loss': 4.1681, 'learning_rate': 8.893641319571635e-07, 'epoch': 1.97} + 12%|█▏ | 45840/371472 [3:41:49<25:55:28, 3.49it/s] 12%|█▏ | 45841/371472 [3:41:50<27:07:59, 3.33it/s] 12%|█▏ | 45842/371472 [3:41:50<27:00:00, 3.35it/s] 12%|█▏ | 45843/371472 [3:41:50<27:25:22, 3.30it/s] 12%|█▏ | 45844/371472 [3:41:51<26:24:40, 3.42it/s] 12%|█▏ | 45845/371472 [3:41:51<28:08:57, 3.21it/s] 12%|█▏ | 45846/371472 [3:41:51<26:58:21, 3.35it/s] 12%|█▏ | 45847/371472 [3:41:51<25:57:34, 3.48it/s] 12%|█▏ | 45848/371472 [3:41:52<25:26:14, 3.56it/s] 12%|█▏ | 45849/371472 [3:41:52<25:43:27, 3.52it/s] 12%|█▏ | 45850/371472 [3:41:52<25:00:01, 3.62it/s] 12%|█▏ | 45851/371472 [3:41:53<24:31:15, 3.69it/s] 12%|█▏ | 45852/371472 [3:41:53<24:00:12, 3.77it/s] 12%|█▏ | 45853/371472 [3:41:53<24:31:20, 3.69it/s] 12%|█▏ | 45854/371472 [3:41:53<24:43:26, 3.66it/s] 12%|█▏ | 45855/371472 [3:41:54<24:12:59, 3.73it/s] 12%|█▏ | 45856/371472 [3:41:54<24:18:16, 3.72it/s] 12%|█▏ | 45857/371472 [3:41:54<23:51:32, 3.79it/s] 12%|█▏ | 45858/371472 [3:41:54<24:44:02, 3.66it/s] 12%|█▏ | 45859/371472 [3:41:55<25:18:10, 3.57it/s] 12%|█▏ | 45860/371472 [3:41:55<24:25:15, 3.70it/s] {'loss': 4.5358, 'learning_rate': 8.893156499816845e-07, 'epoch': 1.98} + 12%|█▏ | 45860/371472 [3:41:55<24:25:15, 3.70it/s] 12%|█▏ | 45861/371472 [3:41:55<25:36:21, 3.53it/s] 12%|█▏ | 45862/371472 [3:41:56<25:04:22, 3.61it/s] 12%|█▏ | 45863/371472 [3:41:56<26:02:08, 3.47it/s] 12%|█▏ | 45864/371472 [3:41:56<25:25:28, 3.56it/s] 12%|█▏ | 45865/371472 [3:41:56<24:43:07, 3.66it/s] 12%|█▏ | 45866/371472 [3:41:57<24:29:19, 3.69it/s] 12%|█��� | 45867/371472 [3:41:57<25:14:08, 3.58it/s] 12%|█▏ | 45868/371472 [3:41:57<24:41:47, 3.66it/s] 12%|█▏ | 45869/371472 [3:41:57<24:00:54, 3.77it/s] 12%|█▏ | 45870/371472 [3:41:58<25:01:54, 3.61it/s] 12%|█▏ | 45871/371472 [3:41:58<25:22:04, 3.57it/s] 12%|█▏ | 45872/371472 [3:41:58<26:14:19, 3.45it/s] 12%|█▏ | 45873/371472 [3:41:59<25:33:40, 3.54it/s] 12%|█▏ | 45874/371472 [3:41:59<25:26:56, 3.55it/s] 12%|█▏ | 45875/371472 [3:41:59<25:29:29, 3.55it/s] 12%|█▏ | 45876/371472 [3:42:00<26:44:58, 3.38it/s] 12%|█▏ | 45877/371472 [3:42:00<26:26:02, 3.42it/s] 12%|█▏ | 45878/371472 [3:42:00<27:27:40, 3.29it/s] 12%|█▏ | 45879/371472 [3:42:00<27:29:05, 3.29it/s] 12%|█▏ | 45880/371472 [3:42:01<32:07:33, 2.82it/s] {'loss': 4.2574, 'learning_rate': 8.892671680062057e-07, 'epoch': 1.98} + 12%|█▏ | 45880/371472 [3:42:01<32:07:33, 2.82it/s] 12%|█▏ | 45881/371472 [3:42:01<31:19:20, 2.89it/s] 12%|█▏ | 45882/371472 [3:42:02<32:26:31, 2.79it/s] 12%|█▏ | 45883/371472 [3:42:02<31:47:21, 2.85it/s] 12%|█▏ | 45884/371472 [3:42:02<31:11:03, 2.90it/s] 12%|█▏ | 45885/371472 [3:42:03<28:59:48, 3.12it/s] 12%|█▏ | 45886/371472 [3:42:03<29:49:07, 3.03it/s] 12%|█▏ | 45887/371472 [3:42:03<29:07:30, 3.11it/s] 12%|█▏ | 45888/371472 [3:42:03<27:58:59, 3.23it/s] 12%|█▏ | 45889/371472 [3:42:04<26:24:28, 3.42it/s] 12%|█▏ | 45890/371472 [3:42:04<25:48:04, 3.51it/s] 12%|█▏ | 45891/371472 [3:42:04<24:53:35, 3.63it/s] 12%|█▏ | 45892/371472 [3:42:05<28:01:12, 3.23it/s] 12%|█▏ | 45893/371472 [3:42:05<27:10:29, 3.33it/s] 12%|█▏ | 45894/371472 [3:42:05<27:19:28, 3.31it/s] 12%|█▏ | 45895/371472 [3:42:05<25:56:34, 3.49it/s] 12%|█▏ | 45896/371472 [3:42:06<24:56:45, 3.63it/s] 12%|█▏ | 45897/371472 [3:42:06<24:17:18, 3.72it/s] 12%|█▏ | 45898/371472 [3:42:06<25:07:15, 3.60it/s] 12%|█▏ | 45899/371472 [3:42:07<27:14:30, 3.32it/s] 12%|█▏ | 45900/371472 [3:42:07<27:01:10, 3.35it/s] {'loss': 4.2747, 'learning_rate': 8.892186860307268e-07, 'epoch': 1.98} + 12%|█▏ | 45900/371472 [3:42:07<27:01:10, 3.35it/s] 12%|█▏ | 45901/371472 [3:42:07<26:53:01, 3.36it/s] 12%|█▏ | 45902/371472 [3:42:08<27:24:40, 3.30it/s] 12%|█▏ | 45903/371472 [3:42:08<25:49:08, 3.50it/s] 12%|█▏ | 45904/371472 [3:42:08<26:42:30, 3.39it/s] 12%|█▏ | 45905/371472 [3:42:08<28:53:26, 3.13it/s] 12%|█▏ | 45906/371472 [3:42:09<27:25:25, 3.30it/s] 12%|█▏ | 45907/371472 [3:42:09<27:19:31, 3.31it/s] 12%|█▏ | 45908/371472 [3:42:09<25:52:01, 3.50it/s] 12%|█▏ | 45909/371472 [3:42:10<25:26:19, 3.55it/s] 12%|█▏ | 45910/371472 [3:42:10<25:02:12, 3.61it/s] 12%|█▏ | 45911/371472 [3:42:10<25:47:32, 3.51it/s] 12%|█▏ | 45912/371472 [3:42:10<26:57:56, 3.35it/s] 12%|█▏ | 45913/371472 [3:42:11<26:11:18, 3.45it/s] 12%|█▏ | 45914/371472 [3:42:11<27:11:55, 3.32it/s] 12%|█▏ | 45915/371472 [3:42:11<27:42:26, 3.26it/s] 12%|█▏ | 45916/371472 [3:42:12<27:13:31, 3.32it/s] 12%|█▏ | 45917/371472 [3:42:12<28:42:15, 3.15it/s] 12%|█▏ | 45918/371472 [3:42:12<27:51:15, 3.25it/s] 12%|█▏ | 45919/371472 [3:42:13<26:56:52, 3.36it/s] 12%|█▏ | 45920/371472 [3:42:13<26:37:06, 3.40it/s] {'loss': 4.4428, 'learning_rate': 8.89170204055248e-07, 'epoch': 1.98} + 12%|█▏ | 45920/371472 [3:42:13<26:37:06, 3.40it/s] 12%|█▏ | 45921/371472 [3:42:13<26:57:34, 3.35it/s] 12%|█▏ | 45922/371472 [3:42:14<28:04:17, 3.22it/s] 12%|█▏ | 45923/371472 [3:42:14<29:07:40, 3.10it/s] 12%|█▏ | 45924/371472 [3:42:14<27:52:37, 3.24it/s] 12%|█▏ | 45925/371472 [3:42:14<26:48:20, 3.37it/s] 12%|█▏ | 45926/371472 [3:42:15<27:23:54, 3.30it/s] 12%|█▏ | 45927/371472 [3:42:15<26:51:06, 3.37it/s] 12%|█▏ | 45928/371472 [3:42:15<26:42:30, 3.39it/s] 12%|█▏ | 45929/371472 [3:42:16<25:40:00, 3.52it/s] 12%|█▏ | 45930/371472 [3:42:16<25:17:49, 3.57it/s] 12%|█▏ | 45931/371472 [3:42:16<28:09:04, 3.21it/s] 12%|█▏ | 45932/371472 [3:42:16<27:21:29, 3.31it/s] 12%|█▏ | 45933/371472 [3:42:17<25:31:09, 3.54it/s] 12%|█▏ | 45934/371472 [3:42:17<24:48:24, 3.65it/s] 12%|█▏ | 45935/371472 [3:42:17<24:46:17, 3.65it/s] 12%|█▏ | 45936/371472 [3:42:18<24:56:44, 3.62it/s] 12%|█▏ | 45937/371472 [3:42:18<25:42:37, 3.52it/s] 12%|█▏ | 45938/371472 [3:42:18<25:16:15, 3.58it/s] 12%|█▏ | 45939/371472 [3:42:18<26:45:50, 3.38it/s] 12%|█▏ | 45940/371472 [3:42:19<27:00:37, 3.35it/s] {'loss': 4.3901, 'learning_rate': 8.891217220797689e-07, 'epoch': 1.98} + 12%|█▏ | 45940/371472 [3:42:19<27:00:37, 3.35it/s] 12%|█▏ | 45941/371472 [3:42:19<26:11:50, 3.45it/s] 12%|█▏ | 45942/371472 [3:42:19<27:46:58, 3.25it/s] 12%|█▏ | 45943/371472 [3:42:20<28:51:35, 3.13it/s] 12%|█▏ | 45944/371472 [3:42:20<27:32:56, 3.28it/s] 12%|█▏ | 45945/371472 [3:42:20<27:28:35, 3.29it/s] 12%|█▏ | 45946/371472 [3:42:21<26:39:10, 3.39it/s] 12%|█▏ | 45947/371472 [3:42:21<26:32:06, 3.41it/s] 12%|█▏ | 45948/371472 [3:42:21<26:43:24, 3.38it/s] 12%|█▏ | 45949/371472 [3:42:21<25:51:20, 3.50it/s] 12%|█▏ | 45950/371472 [3:42:22<25:16:01, 3.58it/s] 12%|█▏ | 45951/371472 [3:42:22<24:49:04, 3.64it/s] 12%|█▏ | 45952/371472 [3:42:22<27:14:07, 3.32it/s] 12%|█▏ | 45953/371472 [3:42:23<27:51:43, 3.25it/s] 12%|█▏ | 45954/371472 [3:42:23<27:07:27, 3.33it/s] 12%|█▏ | 45955/371472 [3:42:23<26:28:48, 3.41it/s] 12%|█▏ | 45956/371472 [3:42:23<26:26:56, 3.42it/s] 12%|█▏ | 45957/371472 [3:42:24<25:23:01, 3.56it/s] 12%|█▏ | 45958/371472 [3:42:24<24:55:10, 3.63it/s] 12%|█▏ | 45959/371472 [3:42:24<25:49:35, 3.50it/s] 12%|█▏ | 45960/371472 [3:42:25<25:44:49, 3.51it/s] {'loss': 4.508, 'learning_rate': 8.890732401042901e-07, 'epoch': 1.98} + 12%|█▏ | 45960/371472 [3:42:25<25:44:49, 3.51it/s] 12%|█▏ | 45961/371472 [3:42:25<24:57:15, 3.62it/s] 12%|█▏ | 45962/371472 [3:42:25<25:17:36, 3.57it/s] 12%|█▏ | 45963/371472 [3:42:25<27:09:21, 3.33it/s] 12%|█▏ | 45964/371472 [3:42:26<26:20:00, 3.43it/s] 12%|█▏ | 45965/371472 [3:42:26<25:58:56, 3.48it/s] 12%|█▏ | 45966/371472 [3:42:26<25:49:51, 3.50it/s] 12%|█▏ | 45967/371472 [3:42:27<25:46:54, 3.51it/s] 12%|█▏ | 45968/371472 [3:42:27<26:19:01, 3.44it/s] 12%|█▏ | 45969/371472 [3:42:27<25:31:50, 3.54it/s] 12%|█▏ | 45970/371472 [3:42:27<24:44:47, 3.65it/s] 12%|█▏ | 45971/371472 [3:42:28<24:52:10, 3.64it/s] 12%|█▏ | 45972/371472 [3:42:28<24:46:35, 3.65it/s] 12%|█▏ | 45973/371472 [3:42:28<24:56:16, 3.63it/s] 12%|█▏ | 45974/371472 [3:42:29<25:28:39, 3.55it/s] 12%|█▏ | 45975/371472 [3:42:29<25:21:32, 3.57it/s] 12%|█▏ | 45976/371472 [3:42:29<26:40:42, 3.39it/s] 12%|█▏ | 45977/371472 [3:42:29<25:40:38, 3.52it/s] 12%|█▏ | 45978/371472 [3:42:30<28:42:12, 3.15it/s] 12%|█▏ | 45979/371472 [3:42:30<27:59:18, 3.23it/s] 12%|█▏ | 45980/371472 [3:42:30<28:07:56, 3.21it/s] {'loss': 4.3826, 'learning_rate': 8.890247581288112e-07, 'epoch': 1.98} + 12%|█▏ | 45980/371472 [3:42:30<28:07:56, 3.21it/s] 12%|█▏ | 45981/371472 [3:42:31<27:00:12, 3.35it/s] 12%|█▏ | 45982/371472 [3:42:31<27:19:05, 3.31it/s] 12%|█▏ | 45983/371472 [3:42:31<26:42:10, 3.39it/s] 12%|█▏ | 45984/371472 [3:42:32<27:36:03, 3.28it/s] 12%|█▏ | 45985/371472 [3:42:32<26:29:23, 3.41it/s] 12%|█▏ | 45986/371472 [3:42:32<26:23:55, 3.42it/s] 12%|█▏ | 45987/371472 [3:42:32<25:52:42, 3.49it/s] 12%|█▏ | 45988/371472 [3:42:33<25:57:17, 3.48it/s] 12%|█▏ | 45989/371472 [3:42:33<35:33:28, 2.54it/s] 12%|█▏ | 45990/371472 [3:42:34<32:18:58, 2.80it/s] 12%|█▏ | 45991/371472 [3:42:34<30:17:51, 2.98it/s] 12%|█▏ | 45992/371472 [3:42:34<28:17:29, 3.20it/s] 12%|█▏ | 45993/371472 [3:42:34<26:49:11, 3.37it/s] 12%|█▏ | 45994/371472 [3:42:35<26:32:30, 3.41it/s] 12%|█▏ | 45995/371472 [3:42:35<27:42:49, 3.26it/s] 12%|█▏ | 45996/371472 [3:42:35<28:54:26, 3.13it/s] 12%|█▏ | 45997/371472 [3:42:36<28:24:58, 3.18it/s] 12%|█▏ | 45998/371472 [3:42:36<27:26:14, 3.30it/s] 12%|█▏ | 45999/371472 [3:42:36<26:20:38, 3.43it/s] 12%|█▏ | 46000/371472 [3:42:36<25:19:46, 3.57it/s] {'loss': 4.355, 'learning_rate': 8.889762761533322e-07, 'epoch': 1.98} + 12%|█▏ | 46000/371472 [3:42:36<25:19:46, 3.57it/s] 12%|█▏ | 46001/371472 [3:42:37<25:20:33, 3.57it/s] 12%|█▏ | 46002/371472 [3:42:37<25:08:12, 3.60it/s] 12%|█▏ | 46003/371472 [3:42:37<24:57:01, 3.62it/s] 12%|█▏ | 46004/371472 [3:42:38<24:14:57, 3.73it/s] 12%|█▏ | 46005/371472 [3:42:38<26:51:09, 3.37it/s] 12%|█▏ | 46006/371472 [3:42:38<26:50:14, 3.37it/s] 12%|█▏ | 46007/371472 [3:42:39<28:13:41, 3.20it/s] 12%|█▏ | 46008/371472 [3:42:39<27:13:47, 3.32it/s] 12%|█▏ | 46009/371472 [3:42:39<27:54:50, 3.24it/s] 12%|█▏ | 46010/371472 [3:42:39<27:13:10, 3.32it/s] 12%|█▏ | 46011/371472 [3:42:40<26:01:59, 3.47it/s] 12%|█▏ | 46012/371472 [3:42:40<26:58:20, 3.35it/s] 12%|█▏ | 46013/371472 [3:42:40<26:12:20, 3.45it/s] 12%|█▏ | 46014/371472 [3:42:41<25:29:01, 3.55it/s] 12%|█▏ | 46015/371472 [3:42:41<27:12:15, 3.32it/s] 12%|█▏ | 46016/371472 [3:42:41<25:58:55, 3.48it/s] 12%|█▏ | 46017/371472 [3:42:41<26:13:32, 3.45it/s] 12%|█▏ | 46018/371472 [3:42:42<26:07:09, 3.46it/s] 12%|█▏ | 46019/371472 [3:42:42<27:23:43, 3.30it/s] 12%|█▏ | 46020/371472 [3:42:42<27:04:17, 3.34it/s] {'loss': 4.2947, 'learning_rate': 8.889277941778534e-07, 'epoch': 1.98} + 12%|█▏ | 46020/371472 [3:42:42<27:04:17, 3.34it/s] 12%|█▏ | 46021/371472 [3:42:43<29:04:36, 3.11it/s] 12%|█▏ | 46022/371472 [3:42:43<27:19:59, 3.31it/s] 12%|█▏ | 46023/371472 [3:42:43<26:50:17, 3.37it/s] 12%|█▏ | 46024/371472 [3:42:44<27:35:16, 3.28it/s] 12%|█▏ | 46025/371472 [3:42:44<27:03:54, 3.34it/s] 12%|█▏ | 46026/371472 [3:42:44<26:10:47, 3.45it/s] 12%|█▏ | 46027/371472 [3:42:44<25:51:10, 3.50it/s] 12%|█▏ | 46028/371472 [3:42:45<25:50:19, 3.50it/s] 12%|█▏ | 46029/371472 [3:42:45<25:21:11, 3.57it/s] 12%|█▏ | 46030/371472 [3:42:45<27:12:20, 3.32it/s] 12%|█▏ | 46031/371472 [3:42:46<26:20:58, 3.43it/s] 12%|█▏ | 46032/371472 [3:42:46<25:33:51, 3.54it/s] 12%|█▏ | 46033/371472 [3:42:46<26:59:15, 3.35it/s] 12%|█▏ | 46034/371472 [3:42:46<26:07:37, 3.46it/s] 12%|█▏ | 46035/371472 [3:42:47<28:26:47, 3.18it/s] 12%|█▏ | 46036/371472 [3:42:47<27:15:04, 3.32it/s] 12%|█▏ | 46037/371472 [3:42:47<27:28:04, 3.29it/s] 12%|█▏ | 46038/371472 [3:42:48<26:25:21, 3.42it/s] 12%|█▏ | 46039/371472 [3:42:48<26:31:42, 3.41it/s] 12%|█▏ | 46040/371472 [3:42:48<27:18:33, 3.31it/s] {'loss': 4.5955, 'learning_rate': 8.888793122023746e-07, 'epoch': 1.98} + 12%|█▏ | 46040/371472 [3:42:48<27:18:33, 3.31it/s] 12%|█▏ | 46041/371472 [3:42:49<28:21:56, 3.19it/s] 12%|█▏ | 46042/371472 [3:42:49<28:20:44, 3.19it/s] 12%|█▏ | 46043/371472 [3:42:49<28:31:17, 3.17it/s] 12%|█▏ | 46044/371472 [3:42:50<28:33:11, 3.17it/s] 12%|█▏ | 46045/371472 [3:42:50<26:50:21, 3.37it/s] 12%|█▏ | 46046/371472 [3:42:50<27:01:04, 3.35it/s] 12%|█▏ | 46047/371472 [3:42:50<25:50:26, 3.50it/s] 12%|█▏ | 46048/371472 [3:42:51<25:48:46, 3.50it/s] 12%|█▏ | 46049/371472 [3:42:51<25:37:01, 3.53it/s] 12%|█▏ | 46050/371472 [3:42:51<25:13:35, 3.58it/s] 12%|█▏ | 46051/371472 [3:42:52<24:19:44, 3.72it/s] 12%|█▏ | 46052/371472 [3:42:52<24:12:40, 3.73it/s] 12%|█▏ | 46053/371472 [3:42:52<24:06:13, 3.75it/s] 12%|█▏ | 46054/371472 [3:42:52<25:41:58, 3.52it/s] 12%|█▏ | 46055/371472 [3:42:53<25:03:06, 3.61it/s] 12%|█▏ | 46056/371472 [3:42:53<26:00:10, 3.48it/s] 12%|█▏ | 46057/371472 [3:42:53<25:57:07, 3.48it/s] 12%|█▏ | 46058/371472 [3:42:54<26:17:39, 3.44it/s] 12%|█▏ | 46059/371472 [3:42:54<26:28:56, 3.41it/s] 12%|█▏ | 46060/371472 [3:42:54<26:27:57, 3.42it/s] {'loss': 4.3466, 'learning_rate': 8.888308302268957e-07, 'epoch': 1.98} + 12%|█▏ | 46060/371472 [3:42:54<26:27:57, 3.42it/s] 12%|█▏ | 46061/371472 [3:42:54<26:05:42, 3.46it/s] 12%|█▏ | 46062/371472 [3:42:55<26:34:47, 3.40it/s] 12%|█▏ | 46063/371472 [3:42:55<26:35:23, 3.40it/s] 12%|█▏ | 46064/371472 [3:42:55<25:36:03, 3.53it/s] 12%|█▏ | 46065/371472 [3:42:56<26:20:28, 3.43it/s] 12%|█▏ | 46066/371472 [3:42:56<27:07:24, 3.33it/s] 12%|█▏ | 46067/371472 [3:42:56<26:34:59, 3.40it/s] 12%|█▏ | 46068/371472 [3:42:56<26:01:15, 3.47it/s] 12%|█▏ | 46069/371472 [3:42:57<25:42:21, 3.52it/s] 12%|█▏ | 46070/371472 [3:42:57<26:36:14, 3.40it/s] 12%|█▏ | 46071/371472 [3:42:57<26:55:05, 3.36it/s] 12%|█▏ | 46072/371472 [3:42:58<27:21:23, 3.30it/s] 12%|█▏ | 46073/371472 [3:42:58<28:06:02, 3.22it/s] 12%|█▏ | 46074/371472 [3:42:58<26:53:13, 3.36it/s] 12%|█▏ | 46075/371472 [3:42:59<26:32:27, 3.41it/s] 12%|█▏ | 46076/371472 [3:42:59<26:19:38, 3.43it/s] 12%|█▏ | 46077/371472 [3:42:59<26:11:51, 3.45it/s] 12%|█▏ | 46078/371472 [3:42:59<26:05:17, 3.46it/s] 12%|█▏ | 46079/371472 [3:43:00<25:19:09, 3.57it/s] 12%|█▏ | 46080/371472 [3:43:00<26:53:48, 3.36it/s] {'loss': 4.2408, 'learning_rate': 8.887823482514166e-07, 'epoch': 1.98} + 12%|█▏ | 46080/371472 [3:43:00<26:53:48, 3.36it/s] 12%|█▏ | 46081/371472 [3:43:00<26:09:44, 3.45it/s] 12%|█▏ | 46082/371472 [3:43:01<27:37:59, 3.27it/s] 12%|█▏ | 46083/371472 [3:43:01<27:06:18, 3.33it/s] 12%|█▏ | 46084/371472 [3:43:01<26:25:50, 3.42it/s] 12%|█▏ | 46085/371472 [3:43:01<25:17:02, 3.57it/s] 12%|█▏ | 46086/371472 [3:43:02<30:35:58, 2.95it/s] 12%|█▏ | 46087/371472 [3:43:02<28:44:00, 3.15it/s] 12%|█▏ | 46088/371472 [3:43:02<27:46:41, 3.25it/s] 12%|█▏ | 46089/371472 [3:43:03<26:55:58, 3.36it/s] 12%|█▏ | 46090/371472 [3:43:03<27:17:36, 3.31it/s] 12%|█▏ | 46091/371472 [3:43:03<27:03:02, 3.34it/s] 12%|█▏ | 46092/371472 [3:43:04<26:24:21, 3.42it/s] 12%|█▏ | 46093/371472 [3:43:04<26:32:59, 3.40it/s] 12%|█▏ | 46094/371472 [3:43:04<26:31:40, 3.41it/s] 12%|█▏ | 46095/371472 [3:43:04<25:16:17, 3.58it/s] 12%|█▏ | 46096/371472 [3:43:05<24:26:36, 3.70it/s] 12%|█▏ | 46097/371472 [3:43:05<24:39:59, 3.66it/s] 12%|█▏ | 46098/371472 [3:43:05<25:11:03, 3.59it/s] 12%|█▏ | 46099/371472 [3:43:06<25:33:38, 3.54it/s] 12%|█▏ | 46100/371472 [3:43:06<25:43:00, 3.51it/s] {'loss': 4.5384, 'learning_rate': 8.887338662759378e-07, 'epoch': 1.99} + 12%|█▏ | 46100/371472 [3:43:06<25:43:00, 3.51it/s] 12%|█▏ | 46101/371472 [3:43:06<25:09:32, 3.59it/s] 12%|█▏ | 46102/371472 [3:43:06<27:12:44, 3.32it/s] 12%|█▏ | 46103/371472 [3:43:07<26:49:21, 3.37it/s] 12%|█▏ | 46104/371472 [3:43:07<28:30:10, 3.17it/s] 12%|█▏ | 46105/371472 [3:43:07<27:40:52, 3.27it/s] 12%|█▏ | 46106/371472 [3:43:08<27:44:01, 3.26it/s] 12%|█▏ | 46107/371472 [3:43:08<27:58:35, 3.23it/s] 12%|█▏ | 46108/371472 [3:43:08<26:28:22, 3.41it/s] 12%|█▏ | 46109/371472 [3:43:09<25:48:44, 3.50it/s] 12%|█▏ | 46110/371472 [3:43:09<25:37:29, 3.53it/s] 12%|█▏ | 46111/371472 [3:43:09<24:45:00, 3.65it/s] 12%|█▏ | 46112/371472 [3:43:09<26:28:22, 3.41it/s] 12%|█▏ | 46113/371472 [3:43:10<25:51:57, 3.49it/s] 12%|█▏ | 46114/371472 [3:43:10<26:38:51, 3.39it/s] 12%|█▏ | 46115/371472 [3:43:10<26:30:34, 3.41it/s] 12%|█▏ | 46116/371472 [3:43:11<25:55:17, 3.49it/s] 12%|█▏ | 46117/371472 [3:43:11<26:09:36, 3.45it/s] 12%|█▏ | 46118/371472 [3:43:11<26:01:14, 3.47it/s] 12%|█▏ | 46119/371472 [3:43:11<25:39:32, 3.52it/s] 12%|█▏ | 46120/371472 [3:43:12<24:45:44, 3.65it/s] {'loss': 4.2042, 'learning_rate': 8.886853843004589e-07, 'epoch': 1.99} + 12%|█▏ | 46120/371472 [3:43:12<24:45:44, 3.65it/s] 12%|█▏ | 46121/371472 [3:43:12<24:40:54, 3.66it/s] 12%|█▏ | 46122/371472 [3:43:12<25:09:23, 3.59it/s] 12%|█▏ | 46123/371472 [3:43:13<26:21:32, 3.43it/s] 12%|█▏ | 46124/371472 [3:43:13<26:27:00, 3.42it/s] 12%|█▏ | 46125/371472 [3:43:13<26:20:02, 3.43it/s] 12%|█▏ | 46126/371472 [3:43:13<26:08:43, 3.46it/s] 12%|█▏ | 46127/371472 [3:43:14<25:20:36, 3.57it/s] 12%|█▏ | 46128/371472 [3:43:14<26:21:16, 3.43it/s] 12%|█▏ | 46129/371472 [3:43:14<26:41:34, 3.39it/s] 12%|█▏ | 46130/371472 [3:43:15<25:42:51, 3.51it/s] 12%|█▏ | 46131/371472 [3:43:15<25:23:17, 3.56it/s] 12%|█▏ | 46132/371472 [3:43:15<25:52:10, 3.49it/s] 12%|█▏ | 46133/371472 [3:43:15<25:51:24, 3.50it/s] 12%|█▏ | 46134/371472 [3:43:16<25:57:00, 3.48it/s] 12%|█▏ | 46135/371472 [3:43:16<25:34:22, 3.53it/s] 12%|█▏ | 46136/371472 [3:43:16<26:17:58, 3.44it/s] 12%|█▏ | 46137/371472 [3:43:17<25:37:01, 3.53it/s] 12%|█▏ | 46138/371472 [3:43:17<24:36:57, 3.67it/s] 12%|█▏ | 46139/371472 [3:43:17<24:36:54, 3.67it/s] 12%|█▏ | 46140/371472 [3:43:17<25:03:39, 3.61it/s] {'loss': 4.2729, 'learning_rate': 8.8863690232498e-07, 'epoch': 1.99} + 12%|█▏ | 46140/371472 [3:43:17<25:03:39, 3.61it/s] 12%|█▏ | 46141/371472 [3:43:18<25:22:12, 3.56it/s] 12%|█▏ | 46142/371472 [3:43:18<25:20:03, 3.57it/s] 12%|█▏ | 46143/371472 [3:43:18<26:08:41, 3.46it/s] 12%|█▏ | 46144/371472 [3:43:19<26:56:08, 3.35it/s] 12%|█▏ | 46145/371472 [3:43:19<25:42:55, 3.51it/s] 12%|█▏ | 46146/371472 [3:43:19<26:23:00, 3.43it/s] 12%|█▏ | 46147/371472 [3:43:19<25:15:13, 3.58it/s] 12%|█▏ | 46148/371472 [3:43:20<25:28:11, 3.55it/s] 12%|█▏ | 46149/371472 [3:43:20<25:58:04, 3.48it/s] 12%|█▏ | 46150/371472 [3:43:20<25:53:46, 3.49it/s] 12%|█▏ | 46151/371472 [3:43:21<26:21:12, 3.43it/s] 12%|█▏ | 46152/371472 [3:43:21<26:17:46, 3.44it/s] 12%|█▏ | 46153/371472 [3:43:21<25:16:36, 3.58it/s] 12%|█▏ | 46154/371472 [3:43:21<26:07:58, 3.46it/s] 12%|█▏ | 46155/371472 [3:43:22<26:19:27, 3.43it/s] 12%|█▏ | 46156/371472 [3:43:22<25:39:47, 3.52it/s] 12%|█▏ | 46157/371472 [3:43:22<26:13:13, 3.45it/s] 12%|█▏ | 46158/371472 [3:43:22<24:57:07, 3.62it/s] 12%|█▏ | 46159/371472 [3:43:23<27:48:07, 3.25it/s] 12%|█▏ | 46160/371472 [3:43:23<26:59:17, 3.35it/s] {'loss': 4.3113, 'learning_rate': 8.885884203495011e-07, 'epoch': 1.99} + 12%|█▏ | 46160/371472 [3:43:23<26:59:17, 3.35it/s] 12%|█▏ | 46161/371472 [3:43:23<26:25:19, 3.42it/s] 12%|█▏ | 46162/371472 [3:43:24<25:51:19, 3.49it/s] 12%|█▏ | 46163/371472 [3:43:24<25:56:18, 3.48it/s] 12%|█▏ | 46164/371472 [3:43:24<25:37:46, 3.53it/s] 12%|█▏ | 46165/371472 [3:43:25<24:56:59, 3.62it/s] 12%|█▏ | 46166/371472 [3:43:25<24:54:58, 3.63it/s] 12%|█▏ | 46167/371472 [3:43:25<24:24:54, 3.70it/s] 12%|█▏ | 46168/371472 [3:43:25<23:35:07, 3.83it/s] 12%|█▏ | 46169/371472 [3:43:26<23:40:03, 3.82it/s] 12%|█▏ | 46170/371472 [3:43:26<24:34:36, 3.68it/s] 12%|█▏ | 46171/371472 [3:43:26<25:05:31, 3.60it/s] 12%|█▏ | 46172/371472 [3:43:26<25:17:51, 3.57it/s] 12%|█▏ | 46173/371472 [3:43:27<24:41:34, 3.66it/s] 12%|█▏ | 46174/371472 [3:43:27<24:08:42, 3.74it/s] 12%|█▏ | 46175/371472 [3:43:27<25:39:39, 3.52it/s] 12%|█▏ | 46176/371472 [3:43:28<25:01:47, 3.61it/s] 12%|█▏ | 46177/371472 [3:43:28<24:26:44, 3.70it/s] 12%|█▏ | 46178/371472 [3:43:28<24:15:12, 3.73it/s] 12%|█▏ | 46179/371472 [3:43:28<27:08:49, 3.33it/s] 12%|█▏ | 46180/371472 [3:43:29<26:44:14, 3.38it/s] {'loss': 4.5347, 'learning_rate': 8.885399383740223e-07, 'epoch': 1.99} + 12%|█▏ | 46180/371472 [3:43:29<26:44:14, 3.38it/s] 12%|█▏ | 46181/371472 [3:43:29<27:57:19, 3.23it/s] 12%|█▏ | 46182/371472 [3:43:29<28:18:58, 3.19it/s] 12%|█▏ | 46183/371472 [3:43:30<29:10:24, 3.10it/s] 12%|█▏ | 46184/371472 [3:43:30<27:51:40, 3.24it/s] 12%|█▏ | 46185/371472 [3:43:30<26:13:24, 3.45it/s] 12%|█▏ | 46186/371472 [3:43:30<25:16:54, 3.57it/s] 12%|█▏ | 46187/371472 [3:43:31<25:40:01, 3.52it/s] 12%|█▏ | 46188/371472 [3:43:31<25:11:01, 3.59it/s] 12%|█▏ | 46189/371472 [3:43:31<24:47:32, 3.64it/s] 12%|█▏ | 46190/371472 [3:43:32<24:38:25, 3.67it/s] 12%|█▏ | 46191/371472 [3:43:32<24:43:41, 3.65it/s] 12%|█▏ | 46192/371472 [3:43:32<24:38:50, 3.67it/s] 12%|█▏ | 46193/371472 [3:43:32<26:00:40, 3.47it/s] 12%|█▏ | 46194/371472 [3:43:33<24:49:04, 3.64it/s] 12%|█▏ | 46195/371472 [3:43:33<24:21:01, 3.71it/s] 12%|█▏ | 46196/371472 [3:43:33<24:32:39, 3.68it/s] 12%|█▏ | 46197/371472 [3:43:34<24:28:14, 3.69it/s] 12%|█▏ | 46198/371472 [3:43:34<24:05:13, 3.75it/s] 12%|█▏ | 46199/371472 [3:43:34<23:58:35, 3.77it/s] 12%|█▏ | 46200/371472 [3:43:34<24:55:53, 3.62it/s] {'loss': 4.6352, 'learning_rate': 8.884914563985433e-07, 'epoch': 1.99} + 12%|█▏ | 46200/371472 [3:43:34<24:55:53, 3.62it/s] 12%|█▏ | 46201/371472 [3:43:35<25:14:53, 3.58it/s] 12%|█▏ | 46202/371472 [3:43:35<27:21:32, 3.30it/s] 12%|█▏ | 46203/371472 [3:43:35<30:36:51, 2.95it/s] 12%|█▏ | 46204/371472 [3:43:36<28:53:46, 3.13it/s] 12%|█▏ | 46205/371472 [3:43:36<28:54:36, 3.13it/s] 12%|█▏ | 46206/371472 [3:43:36<27:42:13, 3.26it/s] 12%|█▏ | 46207/371472 [3:43:37<26:31:06, 3.41it/s] 12%|█▏ | 46208/371472 [3:43:37<26:32:44, 3.40it/s] 12%|█▏ | 46209/371472 [3:43:37<26:13:58, 3.44it/s] 12%|█▏ | 46210/371472 [3:43:37<25:13:11, 3.58it/s] 12%|█▏ | 46211/371472 [3:43:38<26:29:51, 3.41it/s] 12%|█▏ | 46212/371472 [3:43:38<26:21:43, 3.43it/s] 12%|█▏ | 46213/371472 [3:43:38<26:15:02, 3.44it/s] 12%|█▏ | 46214/371472 [3:43:39<25:40:27, 3.52it/s] 12%|█▏ | 46215/371472 [3:43:39<25:35:40, 3.53it/s] 12%|█▏ | 46216/371472 [3:43:39<25:03:15, 3.61it/s] 12%|█▏ | 46217/371472 [3:43:39<25:09:09, 3.59it/s] 12%|█▏ | 46218/371472 [3:43:40<25:38:40, 3.52it/s] 12%|█▏ | 46219/371472 [3:43:40<26:30:48, 3.41it/s] 12%|█▏ | 46220/371472 [3:43:40<27:20:44, 3.30it/s] {'loss': 4.1104, 'learning_rate': 8.884429744230644e-07, 'epoch': 1.99} + 12%|█▏ | 46220/371472 [3:43:40<27:20:44, 3.30it/s] 12%|█▏ | 46221/371472 [3:43:41<26:25:32, 3.42it/s] 12%|█▏ | 46222/371472 [3:43:41<27:40:37, 3.26it/s] 12%|█▏ | 46223/371472 [3:43:41<26:55:16, 3.36it/s] 12%|█▏ | 46224/371472 [3:43:41<26:30:31, 3.41it/s] 12%|█▏ | 46225/371472 [3:43:42<27:02:39, 3.34it/s] 12%|█▏ | 46226/371472 [3:43:42<27:01:20, 3.34it/s] 12%|█▏ | 46227/371472 [3:43:42<26:46:41, 3.37it/s] 12%|█▏ | 46228/371472 [3:43:43<27:30:58, 3.28it/s] 12%|█▏ | 46229/371472 [3:43:43<26:39:43, 3.39it/s] 12%|█▏ | 46230/371472 [3:43:43<25:50:43, 3.50it/s] 12%|█▏ | 46231/371472 [3:43:44<26:56:02, 3.35it/s] 12%|█▏ | 46232/371472 [3:43:44<26:30:50, 3.41it/s] 12%|█▏ | 46233/371472 [3:43:44<28:24:30, 3.18it/s] 12%|█▏ | 46234/371472 [3:43:44<26:42:12, 3.38it/s] 12%|█▏ | 46235/371472 [3:43:45<26:09:39, 3.45it/s] 12%|█▏ | 46236/371472 [3:43:45<26:17:36, 3.44it/s] 12%|█▏ | 46237/371472 [3:43:45<25:56:28, 3.48it/s] 12%|█▏ | 46238/371472 [3:43:46<25:44:36, 3.51it/s] 12%|█▏ | 46239/371472 [3:43:46<25:37:44, 3.53it/s] 12%|█▏ | 46240/371472 [3:43:46<25:19:39, 3.57it/s] {'loss': 4.314, 'learning_rate': 8.883944924475855e-07, 'epoch': 1.99} + 12%|█▏ | 46240/371472 [3:43:46<25:19:39, 3.57it/s] 12%|█▏ | 46241/371472 [3:43:46<25:38:40, 3.52it/s] 12%|█▏ | 46242/371472 [3:43:47<25:45:40, 3.51it/s] 12%|█▏ | 46243/371472 [3:43:47<25:40:44, 3.52it/s] 12%|█▏ | 46244/371472 [3:43:47<24:53:39, 3.63it/s] 12%|█▏ | 46245/371472 [3:43:48<25:59:25, 3.48it/s] 12%|█▏ | 46246/371472 [3:43:48<25:14:37, 3.58it/s] 12%|█▏ | 46247/371472 [3:43:48<25:22:05, 3.56it/s] 12%|█▏ | 46248/371472 [3:43:48<24:59:45, 3.61it/s] 12%|█▏ | 46249/371472 [3:43:49<25:15:32, 3.58it/s] 12%|█▏ | 46250/371472 [3:43:49<25:40:17, 3.52it/s] 12%|█▏ | 46251/371472 [3:43:49<25:00:19, 3.61it/s] 12%|█▏ | 46252/371472 [3:43:49<24:34:22, 3.68it/s] 12%|█▏ | 46253/371472 [3:43:50<26:48:11, 3.37it/s] 12%|█▏ | 46254/371472 [3:43:50<26:37:27, 3.39it/s] 12%|█▏ | 46255/371472 [3:43:50<28:04:55, 3.22it/s] 12%|█▏ | 46256/371472 [3:43:51<26:52:44, 3.36it/s] 12%|█▏ | 46257/371472 [3:43:51<25:33:12, 3.54it/s] 12%|█▏ | 46258/371472 [3:43:51<24:37:01, 3.67it/s] 12%|█▏ | 46259/371472 [3:43:52<24:51:18, 3.63it/s] 12%|█▏ | 46260/371472 [3:43:52<24:30:37, 3.69it/s] {'loss': 4.3375, 'learning_rate': 8.883460104721067e-07, 'epoch': 1.99} + 12%|█▏ | 46260/371472 [3:43:52<24:30:37, 3.69it/s] 12%|█▏ | 46261/371472 [3:43:52<24:27:00, 3.69it/s] 12%|█▏ | 46262/371472 [3:43:52<24:14:39, 3.73it/s] 12%|█▏ | 46263/371472 [3:43:53<25:11:42, 3.59it/s] 12%|█▏ | 46264/371472 [3:43:53<24:32:31, 3.68it/s] 12%|█▏ | 46265/371472 [3:43:53<25:10:25, 3.59it/s] 12%|█▏ | 46266/371472 [3:43:53<25:38:28, 3.52it/s] 12%|█▏ | 46267/371472 [3:43:54<24:52:09, 3.63it/s] 12%|█▏ | 46268/371472 [3:43:54<26:10:02, 3.45it/s] 12%|█▏ | 46269/371472 [3:43:54<25:45:02, 3.51it/s] 12%|█▏ | 46270/371472 [3:43:55<24:52:35, 3.63it/s] 12%|█▏ | 46271/371472 [3:43:55<24:21:27, 3.71it/s] 12%|█▏ | 46272/371472 [3:43:55<23:55:44, 3.78it/s] 12%|█▏ | 46273/371472 [3:43:55<25:25:40, 3.55it/s] 12%|█▏ | 46274/371472 [3:43:56<25:17:13, 3.57it/s] 12%|█▏ | 46275/371472 [3:43:56<26:04:34, 3.46it/s] 12%|█▏ | 46276/371472 [3:43:56<25:34:44, 3.53it/s] 12%|█▏ | 46277/371472 [3:43:57<25:25:26, 3.55it/s] 12%|█▏ | 46278/371472 [3:43:57<25:07:30, 3.60it/s] 12%|█▏ | 46279/371472 [3:43:57<26:03:25, 3.47it/s] 12%|█▏ | 46280/371472 [3:43:57<27:06:49, 3.33it/s] {'loss': 4.2552, 'learning_rate': 8.882975284966278e-07, 'epoch': 1.99} + 12%|█▏ | 46280/371472 [3:43:57<27:06:49, 3.33it/s] 12%|█▏ | 46281/371472 [3:43:58<26:40:16, 3.39it/s] 12%|█▏ | 46282/371472 [3:43:58<25:15:19, 3.58it/s] 12%|█▏ | 46283/371472 [3:43:58<25:35:33, 3.53it/s] 12%|█▏ | 46284/371472 [3:43:59<26:13:49, 3.44it/s] 12%|█▏ | 46285/371472 [3:43:59<26:02:30, 3.47it/s] 12%|█▏ | 46286/371472 [3:43:59<25:55:25, 3.48it/s] 12%|█▏ | 46287/371472 [3:43:59<24:59:04, 3.62it/s] 12%|█▏ | 46288/371472 [3:44:00<25:12:22, 3.58it/s] 12%|█▏ | 46289/371472 [3:44:00<24:51:28, 3.63it/s] 12%|█▏ | 46290/371472 [3:44:00<24:35:39, 3.67it/s] 12%|█▏ | 46291/371472 [3:44:01<26:26:38, 3.42it/s] 12%|█▏ | 46292/371472 [3:44:01<25:58:50, 3.48it/s] 12%|█▏ | 46293/371472 [3:44:01<24:56:46, 3.62it/s] 12%|█▏ | 46294/371472 [3:44:01<24:30:01, 3.69it/s] 12%|█▏ | 46295/371472 [3:44:02<24:19:57, 3.71it/s] 12%|█▏ | 46296/371472 [3:44:02<24:14:58, 3.72it/s] 12%|█▏ | 46297/371472 [3:44:02<25:02:40, 3.61it/s] 12%|█▏ | 46298/371472 [3:44:02<25:19:33, 3.57it/s] 12%|█▏ | 46299/371472 [3:44:03<24:35:31, 3.67it/s] 12%|█▏ | 46300/371472 [3:44:03<24:11:17, 3.73it/s] {'loss': 4.4266, 'learning_rate': 8.882490465211489e-07, 'epoch': 1.99} + 12%|█▏ | 46300/371472 [3:44:03<24:11:17, 3.73it/s] 12%|█▏ | 46301/371472 [3:44:03<24:11:24, 3.73it/s] 12%|█▏ | 46302/371472 [3:44:03<24:14:37, 3.73it/s] 12%|█▏ | 46303/371472 [3:44:04<23:36:01, 3.83it/s] 12%|█▏ | 46304/371472 [3:44:04<24:43:47, 3.65it/s] 12%|█▏ | 46305/371472 [3:44:04<26:09:54, 3.45it/s] 12%|█▏ | 46306/371472 [3:44:05<26:27:24, 3.41it/s] 12%|█▏ | 46307/371472 [3:44:05<26:19:34, 3.43it/s] 12%|█▏ | 46308/371472 [3:44:05<25:54:38, 3.49it/s] 12%|█▏ | 46309/371472 [3:44:06<25:51:29, 3.49it/s] 12%|█▏ | 46310/371472 [3:44:06<25:53:00, 3.49it/s] 12%|█▏ | 46311/371472 [3:44:06<25:12:46, 3.58it/s] 12%|█▏ | 46312/371472 [3:44:06<25:51:45, 3.49it/s] 12%|█▏ | 46313/371472 [3:44:07<26:14:20, 3.44it/s] 12%|█▏ | 46314/371472 [3:44:07<26:39:33, 3.39it/s] 12%|█▏ | 46315/371472 [3:44:07<26:11:07, 3.45it/s] 12%|█▏ | 46316/371472 [3:44:08<28:26:54, 3.17it/s] 12%|█▏ | 46317/371472 [3:44:08<27:25:42, 3.29it/s] 12%|█▏ | 46318/371472 [3:44:08<28:47:53, 3.14it/s] 12%|█▏ | 46319/371472 [3:44:09<27:13:41, 3.32it/s] 12%|█▏ | 46320/371472 [3:44:09<27:45:29, 3.25it/s] {'loss': 4.2751, 'learning_rate': 8.8820056454567e-07, 'epoch': 2.0} + 12%|█▏ | 46320/371472 [3:44:09<27:45:29, 3.25it/s] 12%|█▏ | 46321/371472 [3:44:09<27:51:54, 3.24it/s] 12%|█▏ | 46322/371472 [3:44:09<27:32:16, 3.28it/s] 12%|█▏ | 46323/371472 [3:44:10<27:32:58, 3.28it/s] 12%|█▏ | 46324/371472 [3:44:10<28:36:10, 3.16it/s] 12%|█▏ | 46325/371472 [3:44:10<28:30:21, 3.17it/s] 12%|█▏ | 46326/371472 [3:44:11<27:31:03, 3.28it/s] 12%|█▏ | 46327/371472 [3:44:11<26:19:30, 3.43it/s] 12%|█▏ | 46328/371472 [3:44:11<26:05:23, 3.46it/s] 12%|█▏ | 46329/371472 [3:44:12<25:41:52, 3.51it/s] 12%|█▏ | 46330/371472 [3:44:12<26:14:13, 3.44it/s] 12%|█▏ | 46331/371472 [3:44:12<25:35:33, 3.53it/s] 12%|█▏ | 46332/371472 [3:44:12<25:07:49, 3.59it/s] 12%|█▏ | 46333/371472 [3:44:13<24:17:45, 3.72it/s] 12%|█▏ | 46334/371472 [3:44:13<25:04:28, 3.60it/s] 12%|█▏ | 46335/371472 [3:44:13<29:03:41, 3.11it/s] 12%|█▏ | 46336/371472 [3:44:14<28:21:51, 3.18it/s] 12%|█▏ | 46337/371472 [3:44:14<28:31:31, 3.17it/s] 12%|█▏ | 46338/371472 [3:44:14<27:42:45, 3.26it/s] 12%|█▏ | 46339/371472 [3:44:15<27:39:55, 3.26it/s] 12%|█▏ | 46340/371472 [3:44:15<26:59:26, 3.35it/s] {'loss': 4.1229, 'learning_rate': 8.88152082570191e-07, 'epoch': 2.0} + 12%|█▏ | 46340/371472 [3:44:15<26:59:26, 3.35it/s] 12%|█▏ | 46341/371472 [3:44:15<26:00:33, 3.47it/s] 12%|█▏ | 46342/371472 [3:44:15<25:19:36, 3.57it/s] 12%|█▏ | 46343/371472 [3:44:16<26:37:23, 3.39it/s] 12%|█▏ | 46344/371472 [3:44:16<28:44:59, 3.14it/s] 12%|█▏ | 46345/371472 [3:44:16<28:29:21, 3.17it/s] 12%|█▏ | 46346/371472 [3:44:17<27:03:12, 3.34it/s] 12%|█▏ | 46347/371472 [3:44:17<27:25:03, 3.29it/s] 12%|█▏ | 46348/371472 [3:44:17<28:38:02, 3.15it/s] 12%|█▏ | 46349/371472 [3:44:18<27:32:47, 3.28it/s] 12%|█▏ | 46350/371472 [3:44:18<27:26:52, 3.29it/s] 12%|█▏ | 46351/371472 [3:44:18<26:07:29, 3.46it/s] 12%|█▏ | 46352/371472 [3:44:18<26:55:44, 3.35it/s] 12%|█▏ | 46353/371472 [3:44:19<25:56:34, 3.48it/s] 12%|█▏ | 46354/371472 [3:44:19<25:25:59, 3.55it/s] 12%|█▏ | 46355/371472 [3:44:19<25:24:33, 3.55it/s] 12%|█▏ | 46356/371472 [3:44:19<25:03:41, 3.60it/s] 12%|█▏ | 46357/371472 [3:44:20<25:07:56, 3.59it/s] 12%|█▏ | 46358/371472 [3:44:20<24:30:49, 3.68it/s] 12%|█▏ | 46359/371472 [3:44:20<24:10:52, 3.73it/s] 12%|█▏ | 46360/371472 [3:44:21<24:09:56, 3.74it/s] {'loss': 4.6671, 'learning_rate': 8.881036005947122e-07, 'epoch': 2.0} + 12%|█▏ | 46360/371472 [3:44:21<24:09:56, 3.74it/s] 12%|█▏ | 46361/371472 [3:44:21<25:15:07, 3.58it/s] 12%|█▏ | 46362/371472 [3:44:21<26:48:04, 3.37it/s] 12%|█▏ | 46363/371472 [3:44:21<26:08:54, 3.45it/s] 12%|█▏ | 46364/371472 [3:44:22<24:52:07, 3.63it/s] 12%|█▏ | 46365/371472 [3:44:22<26:15:37, 3.44it/s] 12%|█▏ | 46366/371472 [3:44:22<25:37:59, 3.52it/s] 12%|█▏ | 46367/371472 [3:44:23<25:07:33, 3.59it/s] 12%|█▏ | 46368/371472 [3:44:23<24:37:41, 3.67it/s] 12%|█▏ | 46369/371472 [3:44:23<26:54:57, 3.36it/s] 12%|█▏ | 46370/371472 [3:44:24<27:37:40, 3.27it/s] 12%|█▏ | 46371/371472 [3:44:24<26:57:06, 3.35it/s] 12%|█▏ | 46372/371472 [3:44:24<26:16:41, 3.44it/s] 12%|█▏ | 46373/371472 [3:44:24<26:06:53, 3.46it/s] 12%|█▏ | 46374/371472 [3:44:25<26:10:37, 3.45it/s] 12%|█▏ | 46375/371472 [3:44:25<25:25:53, 3.55it/s] 12%|█▏ | 46376/371472 [3:44:25<28:37:25, 3.15it/s] 12%|█▏ | 46377/371472 [3:44:26<27:32:12, 3.28it/s] 12%|█▏ | 46378/371472 [3:44:26<28:46:35, 3.14it/s] 12%|█▏ | 46379/371472 [3:44:26<29:00:36, 3.11it/s] 12%|█▏ | 46380/371472 [3:44:27<29:38:21, 3.05it/s] {'loss': 4.4372, 'learning_rate': 8.880551186192333e-07, 'epoch': 2.0} + 12%|█▏ | 46380/371472 [3:44:27<29:38:21, 3.05it/s] 12%|█▏ | 46381/371472 [3:44:27<30:26:22, 2.97it/s] 12%|█▏ | 46382/371472 [3:44:27<30:10:39, 2.99it/s] 12%|█▏ | 46383/371472 [3:44:28<30:20:05, 2.98it/s] 12%|█▏ | 46384/371472 [3:44:28<28:19:45, 3.19it/s] 12%|█▏ | 46385/371472 [3:44:28<27:46:43, 3.25it/s] 12%|█▏ | 46386/371472 [3:44:29<28:28:04, 3.17it/s] 12%|█▏ | 46387/371472 [3:44:29<27:29:07, 3.29it/s] 12%|█▏ | 46388/371472 [3:44:29<26:33:33, 3.40it/s] 12%|█▏ | 46389/371472 [3:44:29<25:03:50, 3.60it/s] 12%|█▏ | 46390/371472 [3:44:30<25:32:38, 3.54it/s] 12%|█▏ | 46391/371472 [3:44:30<25:06:22, 3.60it/s] 12%|█▏ | 46392/371472 [3:44:30<27:21:16, 3.30it/s] 12%|█▏ | 46393/371472 [3:44:30<26:06:42, 3.46it/s] 12%|█▏ | 46394/371472 [3:44:31<25:41:36, 3.51it/s] 12%|█▏ | 46395/371472 [3:44:31<25:02:32, 3.61it/s] 12%|█▏ | 46396/371472 [3:44:31<24:58:59, 3.61it/s] 12%|█▏ | 46397/371472 [3:44:32<26:18:23, 3.43it/s] 12%|█▏ | 46398/371472 [3:44:32<25:47:31, 3.50it/s] 12%|█▏ | 46399/371472 [3:44:32<25:16:51, 3.57it/s] 12%|█▏ | 46400/371472 [3:44:33<28:20:58, 3.19it/s] {'loss': 4.2064, 'learning_rate': 8.880066366437544e-07, 'epoch': 2.0} + 12%|█▏ | 46400/371472 [3:44:33<28:20:58, 3.19it/s] 12%|█▏ | 46401/371472 [3:44:33<28:40:37, 3.15it/s] 12%|█▏ | 46402/371472 [3:44:33<29:30:23, 3.06it/s] 12%|█▏ | 46403/371472 [3:44:34<29:07:41, 3.10it/s] 12%|█▏ | 46404/371472 [3:44:34<27:57:23, 3.23it/s] 12%|█▏ | 46405/371472 [3:44:34<26:52:43, 3.36it/s] 12%|█▏ | 46406/371472 [3:44:34<26:01:15, 3.47it/s] 12%|█▏ | 46407/371472 [3:44:35<25:30:45, 3.54it/s] 12%|█▏ | 46408/371472 [3:44:35<25:52:37, 3.49it/s] 12%|█▏ | 46409/371472 [3:44:35<26:34:01, 3.40it/s] 12%|█▏ | 46410/371472 [3:44:36<25:57:17, 3.48it/s] 12%|█▏ | 46411/371472 [3:44:36<25:29:09, 3.54it/s] 12%|█▏ | 46412/371472 [3:44:36<26:22:20, 3.42it/s] 12%|█▏ | 46413/371472 [3:44:36<25:50:00, 3.50it/s] 12%|█▏ | 46414/371472 [3:44:37<26:18:37, 3.43it/s] 12%|█▏ | 46415/371472 [3:44:37<26:58:31, 3.35it/s] 12%|█▏ | 46416/371472 [3:44:37<26:38:12, 3.39it/s] 12%|█▏ | 46417/371472 [3:44:38<25:53:12, 3.49it/s] 12%|█▏ | 46418/371472 [3:44:38<25:10:35, 3.59it/s] 12%|█▏ | 46419/371472 [3:44:38<25:39:55, 3.52it/s] 12%|█▏ | 46420/371472 [3:44:38<25:55:45, 3.48it/s] {'loss': 4.3167, 'learning_rate': 8.879581546682756e-07, 'epoch': 2.0} + 12%|█▏ | 46420/371472 [3:44:38<25:55:45, 3.48it/s] 12%|█▏ | 46421/371472 [3:44:39<27:09:24, 3.32it/s] 12%|█▏ | 46422/371472 [3:44:39<27:09:23, 3.32it/s] 12%|█▏ | 46423/371472 [3:44:39<26:22:40, 3.42it/s] 12%|█▏ | 46424/371472 [3:44:40<27:33:41, 3.28it/s] 12%|█▏ | 46425/371472 [3:44:40<26:31:02, 3.40it/s] 12%|█▏ | 46426/371472 [3:44:40<27:17:34, 3.31it/s] 12%|█▏ | 46427/371472 [3:44:40<26:21:03, 3.43it/s] 12%|█▏ | 46428/371472 [3:44:41<26:35:39, 3.40it/s] 12%|█▏ | 46429/371472 [3:44:41<26:25:03, 3.42it/s] 12%|█▏ | 46430/371472 [3:44:41<27:53:36, 3.24it/s] 12%|█▏ | 46431/371472 [3:44:42<27:02:02, 3.34it/s] 12%|█▏ | 46432/371472 [3:44:42<26:38:49, 3.39it/s] 12%|█▏ | 46433/371472 [3:44:42<25:40:50, 3.52it/s] 12%|█▎ | 46434/371472 [3:44:43<26:44:26, 3.38it/s]Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co./docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41. +Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2} +/opt/conda/lib/python3.10/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock. + self.pid = os.fork() + 13%|█▎ | 46435/371472 [3:45:10<761:11:43, 8.43s/it] 13%|█▎ | 46436/371472 [3:45:10<541:37:54, 6.00s/it] 13%|█▎ | 46437/371472 [3:45:11<387:49:17, 4.30s/it] 13%|█▎ | 46438/371472 [3:45:11<280:38:30, 3.11s/it] 13%|█▎ | 46439/371472 [3:45:11<205:35:21, 2.28s/it] 13%|█▎ | 46440/371472 [3:45:12<152:49:16, 1.69s/it] {'loss': 4.3422, 'learning_rate': 8.879096726927968e-07, 'epoch': 2.0} + 13%|█▎ | 46440/371472 [3:45:12<152:49:16, 1.69s/it] 13%|█▎ | 46441/371472 [3:45:12<114:29:32, 1.27s/it] 13%|█▎ | 46442/371472 [3:45:12<89:11:35, 1.01it/s] 13%|█▎ | 46443/371472 [3:45:13<71:40:47, 1.26it/s] 13%|█▎ | 46444/371472 [3:45:13<58:52:56, 1.53it/s] 13%|█▎ | 46445/371472 [3:45:13<51:07:41, 1.77it/s] 13%|█▎ | 46446/371472 [3:45:14<44:24:51, 2.03it/s] 13%|█▎ | 46447/371472 [3:45:14<39:37:13, 2.28it/s] 13%|█▎ | 46448/371472 [3:45:14<36:34:04, 2.47it/s] 13%|█▎ | 46449/371472 [3:45:15<35:00:33, 2.58it/s] 13%|█▎ | 46450/371472 [3:45:15<31:35:53, 2.86it/s] 13%|█▎ | 46451/371472 [3:45:15<32:48:14, 2.75it/s] 13%|█▎ | 46452/371472 [3:45:16<30:42:02, 2.94it/s] 13%|█▎ | 46453/371472 [3:45:16<30:27:23, 2.96it/s] 13%|█▎ | 46454/371472 [3:45:16<32:58:05, 2.74it/s] 13%|█▎ | 46455/371472 [3:45:17<31:19:09, 2.88it/s] 13%|█▎ | 46456/371472 [3:45:17<30:37:14, 2.95it/s] 13%|█▎ | 46457/371472 [3:45:17<29:10:56, 3.09it/s] 13%|█▎ | 46458/371472 [3:45:17<27:38:24, 3.27it/s] 13%|█▎ | 46459/371472 [3:45:18<27:12:19, 3.32it/s] 13%|█▎ | 46460/371472 [3:45:18<26:58:19, 3.35it/s] {'loss': 4.4733, 'learning_rate': 8.878611907173177e-07, 'epoch': 2.0} + 13%|█▎ | 46460/371472 [3:45:18<26:58:19, 3.35it/s] 13%|█▎ | 46461/371472 [3:45:18<26:12:38, 3.44it/s] 13%|█▎ | 46462/371472 [3:45:19<25:54:28, 3.48it/s] 13%|█▎ | 46463/371472 [3:45:19<26:16:32, 3.44it/s] 13%|█▎ | 46464/371472 [3:45:19<26:30:28, 3.41it/s] 13%|█▎ | 46465/371472 [3:45:19<26:06:32, 3.46it/s] 13%|█▎ | 46466/371472 [3:45:20<26:01:00, 3.47it/s] 13%|█▎ | 46467/371472 [3:45:20<25:52:20, 3.49it/s] 13%|█▎ | 46468/371472 [3:45:20<25:57:16, 3.48it/s] 13%|█▎ | 46469/371472 [3:45:21<25:45:29, 3.50it/s] 13%|█▎ | 46470/371472 [3:45:21<24:51:38, 3.63it/s] 13%|█▎ | 46471/371472 [3:45:21<25:33:56, 3.53it/s] 13%|█▎ | 46472/371472 [3:45:21<26:51:54, 3.36it/s] 13%|█▎ | 46473/371472 [3:45:22<26:37:31, 3.39it/s] 13%|█▎ | 46474/371472 [3:45:22<26:15:03, 3.44it/s] 13%|█▎ | 46475/371472 [3:45:22<25:15:52, 3.57it/s] 13%|█▎ | 46476/371472 [3:45:23<24:41:53, 3.66it/s] 13%|█▎ | 46477/371472 [3:45:23<24:20:15, 3.71it/s] 13%|█▎ | 46478/371472 [3:45:23<24:23:00, 3.70it/s] 13%|█▎ | 46479/371472 [3:45:23<25:52:38, 3.49it/s] 13%|█▎ | 46480/371472 [3:45:24<25:36:53, 3.52it/s] {'loss': 4.5121, 'learning_rate': 8.878127087418388e-07, 'epoch': 2.0} + 13%|█▎ | 46480/371472 [3:45:24<25:36:53, 3.52it/s] 13%|█▎ | 46481/371472 [3:45:24<25:39:49, 3.52it/s] 13%|█▎ | 46482/371472 [3:45:24<24:56:57, 3.62it/s] 13%|█▎ | 46483/371472 [3:45:25<24:48:46, 3.64it/s] 13%|█▎ | 46484/371472 [3:45:25<24:51:21, 3.63it/s] 13%|█▎ | 46485/371472 [3:45:25<24:17:26, 3.72it/s] 13%|█▎ | 46486/371472 [3:45:25<24:48:42, 3.64it/s] 13%|█▎ | 46487/371472 [3:45:26<25:06:48, 3.59it/s] 13%|█▎ | 46488/371472 [3:45:26<26:15:35, 3.44it/s] 13%|█▎ | 46489/371472 [3:45:26<26:30:47, 3.40it/s] 13%|█▎ | 46490/371472 [3:45:27<26:28:04, 3.41it/s] 13%|█▎ | 46491/371472 [3:45:27<25:35:23, 3.53it/s] 13%|█▎ | 46492/371472 [3:45:27<29:32:47, 3.06it/s] 13%|█▎ | 46493/371472 [3:45:28<28:11:52, 3.20it/s] 13%|█▎ | 46494/371472 [3:45:28<28:22:18, 3.18it/s] 13%|█▎ | 46495/371472 [3:45:28<26:59:37, 3.34it/s] 13%|█▎ | 46496/371472 [3:45:28<27:04:22, 3.33it/s] 13%|█▎ | 46497/371472 [3:45:29<26:19:35, 3.43it/s] 13%|█▎ | 46498/371472 [3:45:29<27:02:51, 3.34it/s] 13%|█▎ | 46499/371472 [3:45:29<26:19:19, 3.43it/s] 13%|█▎ | 46500/371472 [3:45:30<25:55:19, 3.48it/s] {'loss': 4.3565, 'learning_rate': 8.8776422676636e-07, 'epoch': 2.0} + 13%|█▎ | 46500/371472 [3:45:30<25:55:19, 3.48it/s] 13%|█▎ | 46501/371472 [3:45:30<26:05:51, 3.46it/s] 13%|█▎ | 46502/371472 [3:45:30<26:55:45, 3.35it/s] 13%|█▎ | 46503/371472 [3:45:30<26:55:26, 3.35it/s] 13%|█▎ | 46504/371472 [3:45:31<26:39:06, 3.39it/s] 13%|█▎ | 46505/371472 [3:45:31<25:33:58, 3.53it/s] 13%|█▎ | 46506/371472 [3:45:31<24:43:25, 3.65it/s] 13%|█▎ | 46507/371472 [3:45:32<25:19:20, 3.56it/s] 13%|█▎ | 46508/371472 [3:45:32<24:52:47, 3.63it/s] 13%|█▎ | 46509/371472 [3:45:32<25:22:24, 3.56it/s] 13%|█▎ | 46510/371472 [3:45:32<24:56:50, 3.62it/s] 13%|█▎ | 46511/371472 [3:45:33<26:02:13, 3.47it/s] 13%|█▎ | 46512/371472 [3:45:33<25:41:01, 3.51it/s] 13%|█▎ | 46513/371472 [3:45:33<25:33:06, 3.53it/s] 13%|█▎ | 46514/371472 [3:45:33<25:02:37, 3.60it/s] 13%|█▎ | 46515/371472 [3:45:34<25:56:28, 3.48it/s] 13%|█▎ | 46516/371472 [3:45:34<25:05:07, 3.60it/s] 13%|█▎ | 46517/371472 [3:45:34<24:29:53, 3.68it/s] 13%|█▎ | 46518/371472 [3:45:35<24:36:57, 3.67it/s] 13%|█▎ | 46519/371472 [3:45:35<24:59:13, 3.61it/s] 13%|█▎ | 46520/371472 [3:45:35<25:25:48, 3.55it/s] {'loss': 4.3452, 'learning_rate': 8.87715744790881e-07, 'epoch': 2.0} + 13%|█▎ | 46520/371472 [3:45:35<25:25:48, 3.55it/s] 13%|█▎ | 46521/371472 [3:45:35<24:46:50, 3.64it/s] 13%|█▎ | 46522/371472 [3:45:36<24:12:10, 3.73it/s] 13%|█▎ | 46523/371472 [3:45:36<26:51:08, 3.36it/s] 13%|█▎ | 46524/371472 [3:45:36<28:16:51, 3.19it/s] 13%|█▎ | 46525/371472 [3:45:37<27:23:56, 3.29it/s] 13%|█▎ | 46526/371472 [3:45:37<27:34:36, 3.27it/s] 13%|█▎ | 46527/371472 [3:45:37<26:33:52, 3.40it/s] 13%|█▎ | 46528/371472 [3:45:38<27:55:39, 3.23it/s] 13%|█▎ | 46529/371472 [3:45:38<28:15:57, 3.19it/s] 13%|█▎ | 46530/371472 [3:45:38<26:38:12, 3.39it/s] 13%|█▎ | 46531/371472 [3:45:38<26:29:09, 3.41it/s] 13%|█▎ | 46532/371472 [3:45:39<25:40:05, 3.52it/s] 13%|█▎ | 46533/371472 [3:45:39<25:54:11, 3.48it/s] 13%|█▎ | 46534/371472 [3:45:39<26:22:52, 3.42it/s] 13%|█▎ | 46535/371472 [3:45:40<25:31:31, 3.54it/s] 13%|█▎ | 46536/371472 [3:45:40<27:56:06, 3.23it/s] 13%|█▎ | 46537/371472 [3:45:40<26:28:04, 3.41it/s] 13%|█▎ | 46538/371472 [3:45:41<27:25:41, 3.29it/s] 13%|█▎ | 46539/371472 [3:45:41<27:14:04, 3.31it/s] 13%|█▎ | 46540/371472 [3:45:41<26:34:10, 3.40it/s] {'loss': 4.2917, 'learning_rate': 8.876672628154021e-07, 'epoch': 2.0} + 13%|█▎ | 46540/371472 [3:45:41<26:34:10, 3.40it/s] 13%|█▎ | 46541/371472 [3:45:41<25:23:19, 3.56it/s] 13%|█▎ | 46542/371472 [3:45:42<26:33:26, 3.40it/s] 13%|█▎ | 46543/371472 [3:45:42<27:27:02, 3.29it/s] 13%|█▎ | 46544/371472 [3:45:42<27:31:04, 3.28it/s] 13%|█▎ | 46545/371472 [3:45:43<27:37:31, 3.27it/s] 13%|█▎ | 46546/371472 [3:45:43<26:09:18, 3.45it/s] 13%|█▎ | 46547/371472 [3:45:43<26:42:01, 3.38it/s] 13%|█▎ | 46548/371472 [3:45:44<26:54:29, 3.35it/s] 13%|█▎ | 46549/371472 [3:45:44<27:04:18, 3.33it/s] 13%|█▎ | 46550/371472 [3:45:44<29:45:52, 3.03it/s] 13%|█▎ | 46551/371472 [3:45:45<29:13:30, 3.09it/s] 13%|█▎ | 46552/371472 [3:45:45<28:31:36, 3.16it/s] 13%|█▎ | 46553/371472 [3:45:45<27:40:18, 3.26it/s] 13%|█▎ | 46554/371472 [3:45:45<28:09:32, 3.21it/s] 13%|█▎ | 46555/371472 [3:45:46<28:29:57, 3.17it/s] 13%|█▎ | 46556/371472 [3:45:46<27:35:06, 3.27it/s] 13%|█▎ | 46557/371472 [3:45:46<26:43:10, 3.38it/s] 13%|█▎ | 46558/371472 [3:45:47<27:17:39, 3.31it/s] 13%|█▎ | 46559/371472 [3:45:47<26:14:11, 3.44it/s] 13%|█▎ | 46560/371472 [3:45:47<27:25:23, 3.29it/s] {'loss': 4.2508, 'learning_rate': 8.876187808399233e-07, 'epoch': 2.01} + 13%|█▎ | 46560/371472 [3:45:47<27:25:23, 3.29it/s] 13%|█▎ | 46561/371472 [3:45:47<26:35:12, 3.39it/s] 13%|█▎ | 46562/371472 [3:45:48<27:14:37, 3.31it/s] 13%|█▎ | 46563/371472 [3:45:48<27:15:17, 3.31it/s] 13%|█▎ | 46564/371472 [3:45:48<26:21:02, 3.43it/s] 13%|█▎ | 46565/371472 [3:45:49<25:53:10, 3.49it/s] 13%|█▎ | 46566/371472 [3:45:49<25:34:48, 3.53it/s] 13%|█▎ | 46567/371472 [3:45:49<26:00:08, 3.47it/s] 13%|█▎ | 46568/371472 [3:45:50<25:37:55, 3.52it/s] 13%|█▎ | 46569/371472 [3:45:50<25:54:00, 3.48it/s] 13%|█▎ | 46570/371472 [3:45:50<27:51:38, 3.24it/s] 13%|█▎ | 46571/371472 [3:45:51<29:22:08, 3.07it/s] 13%|█▎ | 46572/371472 [3:45:51<29:09:13, 3.10it/s] 13%|█▎ | 46573/371472 [3:45:51<29:08:18, 3.10it/s] 13%|█▎ | 46574/371472 [3:45:51<27:59:35, 3.22it/s] 13%|█▎ | 46575/371472 [3:45:52<26:32:07, 3.40it/s] 13%|█▎ | 46576/371472 [3:45:52<26:00:20, 3.47it/s] 13%|█▎ | 46577/371472 [3:45:52<27:26:33, 3.29it/s] 13%|█▎ | 46578/371472 [3:45:53<27:41:05, 3.26it/s] 13%|█▎ | 46579/371472 [3:45:53<27:35:20, 3.27it/s] 13%|█▎ | 46580/371472 [3:45:53<30:37:12, 2.95it/s] {'loss': 4.5191, 'learning_rate': 8.875702988644444e-07, 'epoch': 2.01} + 13%|█▎ | 46580/371472 [3:45:53<30:37:12, 2.95it/s] 13%|█▎ | 46581/371472 [3:45:54<29:00:41, 3.11it/s] 13%|█▎ | 46582/371472 [3:45:54<27:22:27, 3.30it/s] 13%|█▎ | 46583/371472 [3:45:54<27:50:52, 3.24it/s] 13%|█▎ | 46584/371472 [3:45:54<26:51:23, 3.36it/s] 13%|█▎ | 46585/371472 [3:45:55<26:07:59, 3.45it/s] 13%|█▎ | 46586/371472 [3:45:55<26:39:38, 3.38it/s] 13%|█▎ | 46587/371472 [3:45:55<26:07:01, 3.46it/s] 13%|█▎ | 46588/371472 [3:45:56<25:30:23, 3.54it/s] 13%|█▎ | 46589/371472 [3:45:56<26:10:52, 3.45it/s] 13%|█▎ | 46590/371472 [3:45:56<25:42:54, 3.51it/s] 13%|█▎ | 46591/371472 [3:45:56<26:31:27, 3.40it/s] 13%|█▎ | 46592/371472 [3:45:57<26:51:38, 3.36it/s] 13%|█▎ | 46593/371472 [3:45:57<26:37:51, 3.39it/s] 13%|█▎ | 46594/371472 [3:45:57<26:43:04, 3.38it/s] 13%|█▎ | 46595/371472 [3:45:58<28:07:57, 3.21it/s] 13%|█▎ | 46596/371472 [3:45:58<27:24:02, 3.29it/s] 13%|█▎ | 46597/371472 [3:45:58<28:30:31, 3.17it/s] 13%|█▎ | 46598/371472 [3:45:59<27:06:21, 3.33it/s] 13%|█▎ | 46599/371472 [3:45:59<26:41:47, 3.38it/s] 13%|█▎ | 46600/371472 [3:45:59<26:29:13, 3.41it/s] {'loss': 4.3219, 'learning_rate': 8.875218168889654e-07, 'epoch': 2.01} + 13%|█▎ | 46600/371472 [3:45:59<26:29:13, 3.41it/s] 13%|█▎ | 46601/371472 [3:45:59<25:34:41, 3.53it/s] 13%|█▎ | 46602/371472 [3:46:00<25:04:18, 3.60it/s] 13%|█▎ | 46603/371472 [3:46:00<25:14:31, 3.58it/s] 13%|█▎ | 46604/371472 [3:46:00<24:42:41, 3.65it/s] 13%|█▎ | 46605/371472 [3:46:01<25:35:06, 3.53it/s] 13%|█▎ | 46606/371472 [3:46:01<25:07:22, 3.59it/s] 13%|█▎ | 46607/371472 [3:46:01<25:42:33, 3.51it/s] 13%|█▎ | 46608/371472 [3:46:01<25:29:42, 3.54it/s] 13%|█▎ | 46609/371472 [3:46:02<26:00:52, 3.47it/s] 13%|█▎ | 46610/371472 [3:46:02<25:44:39, 3.51it/s] 13%|█▎ | 46611/371472 [3:46:02<24:53:29, 3.63it/s] 13%|█▎ | 46612/371472 [3:46:03<25:09:52, 3.59it/s] 13%|█▎ | 46613/371472 [3:46:03<25:30:58, 3.54it/s] 13%|█▎ | 46614/371472 [3:46:03<26:31:18, 3.40it/s] 13%|█▎ | 46615/371472 [3:46:03<26:46:56, 3.37it/s] 13%|█▎ | 46616/371472 [3:46:04<26:52:33, 3.36it/s] 13%|█▎ | 46617/371472 [3:46:04<26:26:12, 3.41it/s] 13%|█▎ | 46618/371472 [3:46:04<25:46:44, 3.50it/s] 13%|█▎ | 46619/371472 [3:46:05<25:20:02, 3.56it/s] 13%|█▎ | 46620/371472 [3:46:05<25:13:21, 3.58it/s] {'loss': 4.254, 'learning_rate': 8.874733349134865e-07, 'epoch': 2.01} + 13%|█▎ | 46620/371472 [3:46:05<25:13:21, 3.58it/s] 13%|█▎ | 46621/371472 [3:46:05<25:29:44, 3.54it/s] 13%|█▎ | 46622/371472 [3:46:05<26:24:44, 3.42it/s] 13%|█▎ | 46623/371472 [3:46:06<26:41:22, 3.38it/s] 13%|█▎ | 46624/371472 [3:46:06<30:10:08, 2.99it/s] 13%|█▎ | 46625/371472 [3:46:07<31:07:57, 2.90it/s] 13%|█▎ | 46626/371472 [3:46:07<30:32:10, 2.96it/s] 13%|█▎ | 46627/371472 [3:46:07<29:59:56, 3.01it/s] 13%|█▎ | 46628/371472 [3:46:07<28:43:53, 3.14it/s] 13%|█▎ | 46629/371472 [3:46:08<28:25:06, 3.18it/s] 13%|█▎ | 46630/371472 [3:46:08<28:35:49, 3.16it/s] 13%|█▎ | 46631/371472 [3:46:08<27:53:22, 3.24it/s] 13%|█▎ | 46632/371472 [3:46:09<28:40:51, 3.15it/s] 13%|█▎ | 46633/371472 [3:46:09<29:17:51, 3.08it/s] 13%|█▎ | 46634/371472 [3:46:09<29:50:17, 3.02it/s] 13%|█▎ | 46635/371472 [3:46:10<29:36:43, 3.05it/s] 13%|█▎ | 46636/371472 [3:46:10<29:26:32, 3.06it/s] 13%|█▎ | 46637/371472 [3:46:10<28:18:46, 3.19it/s] 13%|█▎ | 46638/371472 [3:46:11<26:39:41, 3.38it/s] 13%|█▎ | 46639/371472 [3:46:11<26:49:45, 3.36it/s] 13%|█▎ | 46640/371472 [3:46:11<26:16:38, 3.43it/s] {'loss': 4.2289, 'learning_rate': 8.874248529380077e-07, 'epoch': 2.01} + 13%|█▎ | 46640/371472 [3:46:11<26:16:38, 3.43it/s] 13%|█▎ | 46641/371472 [3:46:12<27:07:27, 3.33it/s] 13%|█▎ | 46642/371472 [3:46:12<25:50:01, 3.49it/s] 13%|█▎ | 46643/371472 [3:46:12<26:32:55, 3.40it/s] 13%|█▎ | 46644/371472 [3:46:12<26:22:15, 3.42it/s] 13%|█▎ | 46645/371472 [3:46:13<26:20:02, 3.43it/s] 13%|█▎ | 46646/371472 [3:46:13<25:59:04, 3.47it/s] 13%|█▎ | 46647/371472 [3:46:13<28:29:17, 3.17it/s] 13%|█▎ | 46648/371472 [3:46:14<27:41:08, 3.26it/s] 13%|█▎ | 46649/371472 [3:46:14<26:08:24, 3.45it/s] 13%|█▎ | 46650/371472 [3:46:14<26:34:24, 3.40it/s] 13%|█▎ | 46651/371472 [3:46:14<27:28:18, 3.28it/s] 13%|█▎ | 46652/371472 [3:46:15<26:25:32, 3.41it/s] 13%|█▎ | 46653/371472 [3:46:15<26:20:27, 3.43it/s] 13%|█▎ | 46654/371472 [3:46:15<26:48:17, 3.37it/s] 13%|█▎ | 46655/371472 [3:46:16<26:40:12, 3.38it/s] 13%|█▎ | 46656/371472 [3:46:16<26:54:51, 3.35it/s] 13%|█▎ | 46657/371472 [3:46:16<27:24:07, 3.29it/s] 13%|█▎ | 46658/371472 [3:46:17<27:06:56, 3.33it/s] 13%|█▎ | 46659/371472 [3:46:17<26:42:12, 3.38it/s] 13%|█▎ | 46660/371472 [3:46:17<26:34:57, 3.39it/s] {'loss': 4.454, 'learning_rate': 8.873763709625289e-07, 'epoch': 2.01} + 13%|█▎ | 46660/371472 [3:46:17<26:34:57, 3.39it/s] 13%|█▎ | 46661/371472 [3:46:17<25:48:12, 3.50it/s] 13%|█▎ | 46662/371472 [3:46:18<25:22:51, 3.55it/s] 13%|█▎ | 46663/371472 [3:46:18<24:36:56, 3.67it/s] 13%|█▎ | 46664/371472 [3:46:18<24:27:13, 3.69it/s] 13%|█▎ | 46665/371472 [3:46:18<24:48:33, 3.64it/s] 13%|█▎ | 46666/371472 [3:46:19<24:05:29, 3.75it/s] 13%|█▎ | 46667/371472 [3:46:19<24:42:54, 3.65it/s] 13%|█▎ | 46668/371472 [3:46:19<23:56:50, 3.77it/s] 13%|█▎ | 46669/371472 [3:46:20<24:33:18, 3.67it/s] 13%|█▎ | 46670/371472 [3:46:20<26:35:43, 3.39it/s] 13%|█▎ | 46671/371472 [3:46:20<28:54:43, 3.12it/s] 13%|█▎ | 46672/371472 [3:46:21<29:41:46, 3.04it/s] 13%|█▎ | 46673/371472 [3:46:21<32:46:16, 2.75it/s] 13%|█▎ | 46674/371472 [3:46:21<31:45:01, 2.84it/s] \ No newline at end of file