diff --git "a/run-2024-10-02T06:53:25+00:00.log" "b/run-2024-10-02T06:53:25+00:00.log" --- "a/run-2024-10-02T06:53:25+00:00.log" +++ "b/run-2024-10-02T06:53:25+00:00.log" @@ -820,4 +820,200 @@ Non-default generation parameters: {'max_length': 200, 'early_stopping': True, ' 25%|██▌ | 14406/57600 [2:31:33<21:15:10, 1.77s/it] 25%|██▌ | 14407/57600 [2:31:34<17:00:23, 1.42s/it] 25%|██▌ | 14408/57600 [2:31:34<14:26:23, 1.20s/it] 25%|██▌ | 14409/57600 [2:31:35<12:16:00, 1.02s/it] 25%|██▌ | 14410/57600 [2:31:35<10:46:02, 1.11it/s] 25%|██▌ | 14411/57600 [2:31:36<9:52:56, 1.21it/s] 25%|██▌ | 14412/57600 [2:31:37<9:31:03, 1.26it/s] 25%|██▌ | 14413/57600 [2:31:38<9:11:07, 1.31it/s] 25%|██▌ | 14414/57600 [2:31:38<8:46:11, 1.37it/s] 25%|██▌ | 14415/57600 [2:31:39<8:13:04, 1.46it/s] 25%|██▌ | 14416/57600 [2:31:39<8:06:47, 1.48it/s] 25%|██▌ | 14417/57600 [2:31:40<8:00:21, 1.50it/s] 25%|██▌ | 14418/57600 [2:31:41<8:02:46, 1.49it/s] 25%|██▌ | 14419/57600 [2:31:41<7:58:43, 1.50it/s] 25%|██▌ | 14420/57600 [2:31:42<7:48:02, 1.54it/s] {'loss': 1.2825, 'learning_rate': 7.770383275261323e-07, 'epoch': 159.16} 25%|██▌ | 14420/57600 [2:31:42<7:48:02, 1.54it/s] 25%|██▌ | 14421/57600 [2:31:43<7:57:24, 1.51it/s] 25%|██▌ | 14422/57600 [2:31:43<7:44:31, 1.55it/s] 25%|██▌ | 14423/57600 [2:31:44<7:48:24, 1.54it/s] 25%|██▌ | 14424/57600 [2:31:45<7:40:26, 1.56it/s] 25%|██▌ | 14425/57600 [2:31:45<7:32:19, 1.59it/s] 25%|██▌ | 14426/57600 [2:31:46<7:38:00, 1.57it/s] 25%|██▌ | 14427/57600 [2:31:46<7:21:54, 1.63it/s] 25%|██▌ | 14428/57600 [2:31:47<7:33:16, 1.59it/s] 25%|██▌ | 14429/57600 [2:31:48<7:44:30, 1.55it/s] 25%|██▌ | 14430/57600 [2:31:48<7:39:01, 1.57it/s] 25%|██▌ | 14431/57600 [2:31:49<7:44:01, 1.55it/s] 25%|██▌ | 14432/57600 [2:31:50<7:42:24, 1.56it/s] 25%|██▌ | 14433/57600 [2:31:50<7:30:10, 1.60it/s] 25%|██▌ | 14434/57600 [2:31:51<7:34:01, 1.58it/s] 25%|██▌ | 14435/57600 [2:31:52<7:34:22, 1.58it/s] 25%|██▌ | 14436/57600 [2:31:52<7:37:57, 1.57it/s] 25%|██▌ | 14437/57600 [2:31:53<7:55:38, 1.51it/s] 25%|██▌ | 14438/57600 [2:31:54<7:47:13, 1.54it/s] 25%|██▌ | 14439/57600 [2:31:54<7:39:37, 1.57it/s] 25%|██▌ | 14440/57600 [2:31:55<7:55:09, 1.51it/s] {'loss': 1.3146, 'learning_rate': 7.767247386759582e-07, 'epoch': 159.38} 25%|██▌ | 14440/57600 [2:31:55<7:55:09, 1.51it/s] 25%|██▌ | 14441/57600 [2:31:56<7:48:46, 1.53it/s] 25%|██▌ | 14442/57600 [2:31:56<7:49:35, 1.53it/s] 25%|██▌ | 14443/57600 [2:31:57<7:49:36, 1.53it/s] 25%|██▌ | 14444/57600 [2:31:57<7:39:20, 1.57it/s] 25%|██▌ | 14445/57600 [2:31:58<7:28:28, 1.60it/s] 25%|██▌ | 14446/57600 [2:31:59<7:20:04, 1.63it/s] 25%|██▌ | 14447/57600 [2:31:59<7:29:32, 1.60it/s] 25%|██▌ | 14448/57600 [2:32:00<7:25:49, 1.61it/s] 25%|██▌ | 14449/57600 [2:32:01<7:30:49, 1.60it/s] 25%|██▌ | 14450/57600 [2:32:01<7:31:12, 1.59it/s] 25%|██▌ | 14451/57600 [2:32:02<7:59:19, 1.50it/s] 25%|██▌ | 14452/57600 [2:32:03<8:12:50, 1.46it/s] 25%|██▌ | 14453/57600 [2:32:03<8:21:55, 1.43it/s] 25%|██▌ | 14454/57600 [2:32:04<8:39:06, 1.39it/s] 25%|██▌ | 14455/57600 [2:32:05<8:04:46, 1.48it/s] 25%|██▌ | 14456/57600 [2:32:05<7:48:54, 1.53it/s] 25%|██▌ | 14457/57600 [2:32:06<7:49:22, 1.53it/s] 25%|██▌ | 14458/57600 [2:32:07<7:33:15, 1.59it/s] 25%|██▌ | 14459/57600 [2:32:07<7:29:17, 1.60it/s] 25%|██▌ | 14460/57600 [2:32:08<7:31:25, 1.59it/s] {'loss': 1.3051, 'learning_rate': 7.76411149825784e-07, 'epoch': 159.6} - 25%|██▌ | 14460/57600 [2:32:08<7:31:25, 1.59it/s] \ No newline at end of file + 25%|██▌ | 14460/57600 [2:32:08<7:31:25, 1.59it/s] 25%|██▌ | 14461/57600 [2:32:08<7:16:50, 1.65it/s] 25%|██▌ | 14462/57600 [2:32:09<7:13:25, 1.66it/s] 25%|██▌ | 14463/57600 [2:32:10<7:13:54, 1.66it/s] 25%|██▌ | 14464/57600 [2:32:10<7:19:33, 1.64it/s] 25%|██▌ | 14465/57600 [2:32:11<7:19:05, 1.64it/s] 25%|██▌ | 14466/57600 [2:32:11<7:19:23, 1.64it/s] 25%|██▌ | 14467/57600 [2:32:12<7:14:52, 1.65it/s] 25%|██▌ | 14468/57600 [2:32:13<7:09:33, 1.67it/s] 25%|██▌ | 14469/57600 [2:32:13<7:22:21, 1.63it/s] 25%|██▌ | 14470/57600 [2:32:14<7:44:56, 1.55it/s] 25%|██▌ | 14471/57600 [2:32:15<7:30:24, 1.60it/s] 25%|██▌ | 14472/57600 [2:32:15<7:28:44, 1.60it/s] 25%|██▌ | 14473/57600 [2:32:16<7:12:44, 1.66it/s] 25%|██▌ | 14474/57600 [2:32:16<7:24:07, 1.62it/s] 25%|██▌ | 14475/57600 [2:32:17<7:20:09, 1.63it/s] 25%|██▌ | 14476/57600 [2:32:18<7:39:36, 1.56it/s] 25%|██▌ | 14477/57600 [2:32:18<7:36:52, 1.57it/s] 25%|██▌ | 14478/57600 [2:32:19<7:52:15, 1.52it/s] 25%|██▌ | 14479/57600 [2:32:20<7:48:28, 1.53it/s] 25%|██▌ | 14480/57600 [2:32:20<7:34:42, 1.58it/s] {'loss': 1.2975, 'learning_rate': 7.760975609756097e-07, 'epoch': 159.82} + 25%|██▌ | 14480/57600 [2:32:20<7:34:42, 1.58it/s] 25%|██▌ | 14481/57600 [2:32:21<7:27:48, 1.60it/s] 25%|██▌ | 14482/57600 [2:32:21<7:33:26, 1.58it/s] 25%|██▌ | 14483/57600 [2:32:22<7:29:37, 1.60it/s] 25%|██▌ | 14484/57600 [2:32:23<7:28:11, 1.60it/s] 25%|██▌ | 14485/57600 [2:32:23<7:26:11, 1.61it/s] 25%|██▌ | 14486/57600 [2:32:24<7:35:41, 1.58it/s] 25%|██▌ | 14487/57600 [2:32:25<7:30:49, 1.59it/s] 25%|██▌ | 14488/57600 [2:32:25<7:39:26, 1.56it/s] 25%|██▌ | 14489/57600 [2:32:26<7:50:04, 1.53it/s] 25%|██▌ | 14490/57600 [2:32:27<7:43:05, 1.55it/s] 25%|██▌ | 14491/57600 [2:32:27<7:35:19, 1.58it/s] 25%|██▌ | 14492/57600 [2:32:28<7:28:21, 1.60it/s] 25%|██▌ | 14493/57600 [2:32:28<7:21:33, 1.63it/s] 25%|██▌ | 14494/57600 [2:32:29<7:33:03, 1.59it/s] 25%|██▌ | 14495/57600 [2:32:30<7:24:44, 1.62it/s] 25%|██▌ | 14496/57600 [2:32:30<7:38:40, 1.57it/s] 25%|██▌ | 14497/57600 [2:32:31<8:06:09, 1.48it/s] 25%|██▌ | 14498/57600 [2:32:32<8:04:09, 1.48it/s] 25%|██▌ | 14499/57600 [2:32:32<7:57:34, 1.50it/s] 25%|██▌ | 14500/57600 [2:32:33<8:00:59, 1.49it/s] {'loss': 1.3279, 'learning_rate': 7.757839721254355e-07, 'epoch': 160.04} + 25%|██▌ | 14500/57600 [2:32:33<8:00:59, 1.49it/s] 25%|██▌ | 14501/57600 [2:32:34<7:54:01, 1.52it/s] 25%|██▌ | 14502/57600 [2:32:34<8:20:32, 1.44it/s] 25%|██▌ | 14503/57600 [2:32:35<8:34:23, 1.40it/s] 25%|██▌ | 14504/57600 [2:32:36<8:26:31, 1.42it/s] 25%|██▌ | 14505/57600 [2:32:37<8:03:07, 1.49it/s] 25%|██▌ | 14506/57600 [2:32:37<7:52:02, 1.52it/s] 25%|██▌ | 14507/57600 [2:32:38<7:51:49, 1.52it/s] 25%|██▌ | 14508/57600 [2:32:38<7:37:34, 1.57it/s] 25%|██▌ | 14509/57600 [2:32:39<7:36:39, 1.57it/s] 25%|██▌ | 14510/57600 [2:32:40<7:33:36, 1.58it/s] 25%|██▌ | 14511/57600 [2:32:40<7:20:28, 1.63it/s] 25%|██▌ | 14512/57600 [2:32:41<7:34:23, 1.58it/s] 25%|██▌ | 14513/57600 [2:32:42<7:32:31, 1.59it/s] 25%|██▌ | 14514/57600 [2:32:42<7:25:07, 1.61it/s] 25%|██▌ | 14515/57600 [2:32:43<7:28:59, 1.60it/s] 25%|██▌ | 14516/57600 [2:32:43<7:44:29, 1.55it/s] 25%|██▌ | 14517/57600 [2:32:44<7:47:39, 1.54it/s] 25%|██▌ | 14518/57600 [2:32:45<7:58:50, 1.50it/s] 25%|██▌ | 14519/57600 [2:32:45<7:50:53, 1.52it/s] 25%|██▌ | 14520/57600 [2:32:46<8:00:15, 1.50it/s] {'loss': 1.3128, 'learning_rate': 7.754703832752613e-07, 'epoch': 160.26} + 25%|██▌ | 14520/57600 [2:32:46<8:00:15, 1.50it/s] 25%|██▌ | 14521/57600 [2:32:47<7:53:19, 1.52it/s] 25%|██▌ | 14522/57600 [2:32:47<7:53:12, 1.52it/s] 25%|██▌ | 14523/57600 [2:32:48<7:48:42, 1.53it/s] 25%|██▌ | 14524/57600 [2:32:49<7:35:42, 1.58it/s] 25%|██▌ | 14525/57600 [2:32:49<7:33:30, 1.58it/s] 25%|██▌ | 14526/57600 [2:32:50<7:28:18, 1.60it/s] 25%|██▌ | 14527/57600 [2:32:51<7:29:14, 1.60it/s] 25%|██▌ | 14528/57600 [2:32:51<7:47:29, 1.54it/s] 25%|██▌ | 14529/57600 [2:32:52<8:00:11, 1.49it/s] 25%|██▌ | 14530/57600 [2:32:53<7:52:35, 1.52it/s] 25%|██▌ | 14531/57600 [2:32:53<7:58:40, 1.50it/s] 25%|██▌ | 14532/57600 [2:32:54<7:50:53, 1.52it/s] 25%|██▌ | 14533/57600 [2:32:54<7:33:20, 1.58it/s] 25%|██▌ | 14534/57600 [2:32:55<7:29:45, 1.60it/s] 25%|██▌ | 14535/57600 [2:32:56<7:39:49, 1.56it/s] 25%|██▌ | 14536/57600 [2:32:56<7:42:28, 1.55it/s] 25%|██▌ | 14537/57600 [2:32:57<7:57:06, 1.50it/s] 25%|██▌ | 14538/57600 [2:32:58<7:43:57, 1.55it/s] 25%|██▌ | 14539/57600 [2:32:58<7:45:38, 1.54it/s] 25%|██▌ | 14540/57600 [2:32:59<8:00:15, 1.49it/s] {'loss': 1.2796, 'learning_rate': 7.751567944250871e-07, 'epoch': 160.49} + 25%|██▌ | 14540/57600 [2:32:59<8:00:15, 1.49it/s] 25%|██▌ | 14541/57600 [2:33:00<7:32:49, 1.58it/s] 25%|██▌ | 14542/57600 [2:33:00<7:45:04, 1.54it/s] 25%|██▌ | 14543/57600 [2:33:01<7:46:55, 1.54it/s] 25%|██▌ | 14544/57600 [2:33:02<7:34:27, 1.58it/s] 25%|██▌ | 14545/57600 [2:33:02<7:55:55, 1.51it/s] 25%|██▌ | 14546/57600 [2:33:03<7:48:44, 1.53it/s] 25%|██▌ | 14547/57600 [2:33:04<7:44:49, 1.54it/s] 25%|██▌ | 14548/57600 [2:33:04<7:52:43, 1.52it/s] 25%|██▌ | 14549/57600 [2:33:05<7:53:14, 1.52it/s] 25%|██▌ | 14550/57600 [2:33:06<7:46:08, 1.54it/s] 25%|██▌ | 14551/57600 [2:33:06<8:21:56, 1.43it/s] 25%|██▌ | 14552/57600 [2:33:07<8:28:08, 1.41it/s] 25%|██▌ | 14553/57600 [2:33:08<8:10:54, 1.46it/s] 25%|██▌ | 14554/57600 [2:33:08<8:12:48, 1.46it/s] 25%|██▌ | 14555/57600 [2:33:09<8:09:45, 1.46it/s] 25%|██▌ | 14556/57600 [2:33:10<7:54:49, 1.51it/s] 25%|██▌ | 14557/57600 [2:33:10<7:54:14, 1.51it/s] 25%|██▌ | 14558/57600 [2:33:11<7:52:13, 1.52it/s] 25%|██▌ | 14559/57600 [2:33:12<7:37:24, 1.57it/s] 25%|██▌ | 14560/57600 [2:33:12<7:52:35, 1.52it/s] {'loss': 1.313, 'learning_rate': 7.748432055749128e-07, 'epoch': 160.71} + 25%|██▌ | 14560/57600 [2:33:12<7:52:35, 1.52it/s] 25%|██▌ | 14561/57600 [2:33:13<7:47:46, 1.53it/s] 25%|██▌ | 14562/57600 [2:33:14<7:45:32, 1.54it/s] 25%|██▌ | 14563/57600 [2:33:14<8:00:47, 1.49it/s] 25%|██▌ | 14564/57600 [2:33:15<8:12:00, 1.46it/s] 25%|██▌ | 14565/57600 [2:33:16<8:12:00, 1.46it/s] 25%|██▌ | 14566/57600 [2:33:16<7:57:57, 1.50it/s] 25%|██▌ | 14567/57600 [2:33:17<7:43:56, 1.55it/s] 25%|██▌ | 14568/57600 [2:33:18<7:43:05, 1.55it/s] 25%|██▌ | 14569/57600 [2:33:18<7:36:17, 1.57it/s] 25%|██▌ | 14570/57600 [2:33:19<7:29:47, 1.59it/s] 25%|██▌ | 14571/57600 [2:33:19<7:28:50, 1.60it/s] 25%|██▌ | 14572/57600 [2:33:20<7:29:24, 1.60it/s] 25%|██▌ | 14573/57600 [2:33:21<7:32:26, 1.58it/s] 25%|██▌ | 14574/57600 [2:33:21<7:27:47, 1.60it/s] 25%|██▌ | 14575/57600 [2:33:22<7:22:40, 1.62it/s] 25%|██▌ | 14576/57600 [2:33:22<7:18:48, 1.63it/s] 25%|██▌ | 14577/57600 [2:33:23<7:08:57, 1.67it/s] 25%|██▌ | 14578/57600 [2:33:24<7:04:13, 1.69it/s] 25%|██▌ | 14579/57600 [2:33:24<7:20:11, 1.63it/s] 25%|██▌ | 14580/57600 [2:33:25<7:22:18, 1.62it/s] {'loss': 1.2979, 'learning_rate': 7.745296167247387e-07, 'epoch': 160.93} + 25%|██▌ | 14580/57600 [2:33:25<7:22:18, 1.62it/s] 25%|██▌ | 14581/57600 [2:33:26<7:24:32, 1.61it/s] 25%|██▌ | 14582/57600 [2:33:26<7:36:39, 1.57it/s] 25%|██▌ | 14583/57600 [2:33:27<7:28:54, 1.60it/s] 25%|██▌ | 14584/57600 [2:33:27<7:24:05, 1.61it/s] 25%|██▌ | 14585/57600 [2:33:28<7:28:20, 1.60it/s] 25%|██▌ | 14586/57600 [2:33:29<7:31:17, 1.59it/s] 25%|██▌ | 14587/57600 [2:33:29<7:44:14, 1.54it/s] 25%|██▌ | 14588/57600 [2:33:30<7:53:41, 1.51it/s] 25%|██▌ | 14589/57600 [2:33:31<7:44:47, 1.54it/s] 25%|██▌ | 14590/57600 [2:33:31<7:41:49, 1.55it/s] 25%|██▌ | 14591/57600 [2:33:32<7:34:44, 1.58it/s] 25%|██▌ | 14592/57600 [2:33:33<7:43:48, 1.55it/s] 25%|██▌ | 14593/57600 [2:33:33<7:37:36, 1.57it/s] 25%|██▌ | 14594/57600 [2:33:34<7:32:33, 1.58it/s] 25%|██▌ | 14595/57600 [2:33:34<7:26:38, 1.60it/s] 25%|██▌ | 14596/57600 [2:33:35<7:33:57, 1.58it/s] 25%|██▌ | 14597/57600 [2:33:36<7:29:28, 1.59it/s] 25%|██▌ | 14598/57600 [2:33:36<7:34:58, 1.58it/s] 25%|██▌ | 14599/57600 [2:33:37<7:22:36, 1.62it/s] 25%|██▌ | 14600/57600 [2:33:38<7:38:10, 1.56it/s] {'loss': 1.3405, 'learning_rate': 7.742160278745645e-07, 'epoch': 161.15} + 25%|██▌ | 14600/57600 [2:33:38<7:38:10, 1.56it/s] 25%|██▌ | 14601/57600 [2:33:38<7:37:07, 1.57it/s] 25%|██▌ | 14602/57600 [2:33:39<7:45:52, 1.54it/s] 25%|██▌ | 14603/57600 [2:33:40<7:43:44, 1.55it/s] 25%|██▌ | 14604/57600 [2:33:40<7:41:22, 1.55it/s] 25%|██▌ | 14605/57600 [2:33:41<7:45:37, 1.54it/s] 25%|██▌ | 14606/57600 [2:33:42<7:52:58, 1.52it/s] 25%|██▌ | 14607/57600 [2:33:42<7:49:21, 1.53it/s] 25%|██▌ | 14608/57600 [2:33:43<7:37:22, 1.57it/s] 25%|██▌ | 14609/57600 [2:33:43<7:38:18, 1.56it/s] 25%|██▌ | 14610/57600 [2:33:44<7:34:40, 1.58it/s] 25%|██▌ | 14611/57600 [2:33:45<7:18:26, 1.63it/s] 25%|██▌ | 14612/57600 [2:33:45<7:11:01, 1.66it/s] 25%|██▌ | 14613/57600 [2:33:46<7:20:36, 1.63it/s] 25%|██▌ | 14614/57600 [2:33:46<7:14:58, 1.65it/s] 25%|██▌ | 14615/57600 [2:33:47<7:04:04, 1.69it/s] 25%|██▌ | 14616/57600 [2:33:48<7:07:19, 1.68it/s] 25%|██▌ | 14617/57600 [2:33:48<7:10:26, 1.66it/s] 25%|██▌ | 14618/57600 [2:33:49<6:58:58, 1.71it/s] 25%|██▌ | 14619/57600 [2:33:49<7:15:45, 1.64it/s] 25%|██▌ | 14620/57600 [2:33:50<7:23:56, 1.61it/s] {'loss': 1.2926, 'learning_rate': 7.739024390243903e-07, 'epoch': 161.37} + 25%|██▌ | 14620/57600 [2:33:50<7:23:56, 1.61it/s] 25%|██▌ | 14621/57600 [2:33:51<7:11:36, 1.66it/s] 25%|██▌ | 14622/57600 [2:33:51<7:14:24, 1.65it/s] 25%|██▌ | 14623/57600 [2:33:52<7:19:03, 1.63it/s] 25%|██▌ | 14624/57600 [2:33:53<7:29:19, 1.59it/s] 25%|██▌ | 14625/57600 [2:33:53<7:17:11, 1.64it/s] 25%|██▌ | 14626/57600 [2:33:54<7:08:42, 1.67it/s] 25%|██▌ | 14627/57600 [2:33:54<7:12:07, 1.66it/s] 25%|██▌ | 14628/57600 [2:33:55<7:06:55, 1.68it/s] 25%|██▌ | 14629/57600 [2:33:56<7:13:31, 1.65it/s] 25%|██▌ | 14630/57600 [2:33:56<7:16:32, 1.64it/s] 25%|██▌ | 14631/57600 [2:33:57<7:17:26, 1.64it/s] 25%|██▌ | 14632/57600 [2:33:57<7:16:37, 1.64it/s] 25%|██▌ | 14633/57600 [2:33:58<7:06:22, 1.68it/s] 25%|██▌ | 14634/57600 [2:33:59<7:05:31, 1.68it/s] 25%|██▌ | 14635/57600 [2:33:59<6:57:19, 1.72it/s] 25%|██▌ | 14636/57600 [2:34:00<6:53:48, 1.73it/s] 25%|██▌ | 14637/57600 [2:34:00<7:23:43, 1.61it/s] 25%|██▌ | 14638/57600 [2:34:01<7:14:32, 1.65it/s] 25%|██▌ | 14639/57600 [2:34:02<7:12:02, 1.66it/s] 25%|██▌ | 14640/57600 [2:34:02<7:17:52, 1.64it/s] {'loss': 1.3202, 'learning_rate': 7.735888501742159e-07, 'epoch': 161.59} + 25%|██▌ | 14640/57600 [2:34:02<7:17:52, 1.64it/s] 25%|██▌ | 14641/57600 [2:34:03<7:14:54, 1.65it/s] 25%|██▌ | 14642/57600 [2:34:03<7:24:56, 1.61it/s] 25%|██▌ | 14643/57600 [2:34:04<7:13:38, 1.65it/s] 25%|██▌ | 14644/57600 [2:34:05<7:23:06, 1.62it/s] 25%|██▌ | 14645/57600 [2:34:05<7:27:09, 1.60it/s] 25%|██▌ | 14646/57600 [2:34:06<7:28:09, 1.60it/s] 25%|██▌ | 14647/57600 [2:34:07<7:30:07, 1.59it/s] 25%|██▌ | 14648/57600 [2:34:07<7:21:11, 1.62it/s] 25%|██▌ | 14649/57600 [2:34:08<7:26:03, 1.60it/s] 25%|██▌ | 14650/57600 [2:34:08<7:16:37, 1.64it/s] 25%|██▌ | 14651/57600 [2:34:09<7:21:42, 1.62it/s] 25%|██▌ | 14652/57600 [2:34:10<7:20:41, 1.62it/s] 25%|██▌ | 14653/57600 [2:34:10<7:20:26, 1.63it/s] 25%|██▌ | 14654/57600 [2:34:11<7:23:15, 1.61it/s] 25%|██▌ | 14655/57600 [2:34:11<7:16:16, 1.64it/s] 25%|██▌ | 14656/57600 [2:34:12<7:14:34, 1.65it/s] 25%|██▌ | 14657/57600 [2:34:13<7:02:16, 1.69it/s] 25%|██▌ | 14658/57600 [2:34:13<7:03:47, 1.69it/s] 25%|██▌ | 14659/57600 [2:34:14<7:18:17, 1.63it/s] 25%|██▌ | 14660/57600 [2:34:14<7:20:56, 1.62it/s] {'loss': 1.3303, 'learning_rate': 7.732752613240418e-07, 'epoch': 161.81} + 25%|██▌ | 14660/57600 [2:34:14<7:20:56, 1.62it/s] 25%|██▌ | 14661/57600 [2:34:15<7:24:38, 1.61it/s] 25%|██▌ | 14662/57600 [2:34:16<7:27:37, 1.60it/s] 25%|██▌ | 14663/57600 [2:34:16<7:37:51, 1.56it/s] 25%|██▌ | 14664/57600 [2:34:17<7:28:43, 1.59it/s] 25%|██▌ | 14665/57600 [2:34:18<7:42:51, 1.55it/s] 25%|██▌ | 14666/57600 [2:34:18<7:54:59, 1.51it/s] 25%|██▌ | 14667/57600 [2:34:19<7:54:47, 1.51it/s] 25%|██▌ | 14668/57600 [2:34:20<7:37:45, 1.56it/s] 25%|██▌ | 14669/57600 [2:34:20<7:35:50, 1.57it/s] 25%|██▌ | 14670/57600 [2:34:21<7:29:55, 1.59it/s] 25%|██▌ | 14671/57600 [2:34:22<7:34:06, 1.58it/s] 25%|██▌ | 14672/57600 [2:34:22<7:33:16, 1.58it/s] 25%|██▌ | 14673/57600 [2:34:23<7:16:32, 1.64it/s] 25%|██▌ | 14674/57600 [2:34:23<7:30:27, 1.59it/s] 25%|██▌ | 14675/57600 [2:34:24<7:22:28, 1.62it/s] 25%|██▌ | 14676/57600 [2:34:25<7:19:57, 1.63it/s] 25%|██▌ | 14677/57600 [2:34:25<7:17:06, 1.64it/s] 25%|██▌ | 14678/57600 [2:34:26<7:44:31, 1.54it/s] 25%|██▌ | 14679/57600 [2:34:27<7:36:59, 1.57it/s] 25%|██▌ | 14680/57600 [2:34:27<7:44:26, 1.54it/s] {'loss': 1.271, 'learning_rate': 7.729616724738675e-07, 'epoch': 162.03} + 25%|██▌ | 14680/57600 [2:34:27<7:44:26, 1.54it/s] 25%|██▌ | 14681/57600 [2:34:28<7:28:52, 1.59it/s] 25%|██▌ | 14682/57600 [2:34:28<7:32:14, 1.58it/s] 25%|██▌ | 14683/57600 [2:34:29<7:43:26, 1.54it/s] 25%|██▌ | 14684/57600 [2:34:30<7:32:57, 1.58it/s] 25%|██▌ | 14685/57600 [2:34:30<7:34:30, 1.57it/s] 25%|██▌ | 14686/57600 [2:34:31<7:41:51, 1.55it/s] 25%|██▌ | 14687/57600 [2:34:32<7:24:05, 1.61it/s] 26%|██▌ | 14688/57600 [2:34:32<7:11:52, 1.66it/s] 26%|██▌ | 14689/57600 [2:34:33<7:27:10, 1.60it/s] 26%|██▌ | 14690/57600 [2:34:33<7:32:43, 1.58it/s] 26%|██▌ | 14691/57600 [2:34:34<7:14:51, 1.64it/s] 26%|██▌ | 14692/57600 [2:34:35<7:12:14, 1.65it/s] 26%|██▌ | 14693/57600 [2:34:35<7:08:52, 1.67it/s] 26%|██▌ | 14694/57600 [2:34:36<7:02:10, 1.69it/s] 26%|██▌ | 14695/57600 [2:34:36<7:15:40, 1.64it/s] 26%|██▌ | 14696/57600 [2:34:37<7:15:25, 1.64it/s] 26%|██▌ | 14697/57600 [2:34:38<7:13:58, 1.65it/s] 26%|██▌ | 14698/57600 [2:34:38<7:27:00, 1.60it/s] 26%|██▌ | 14699/57600 [2:34:39<7:49:24, 1.52it/s] 26%|██▌ | 14700/57600 [2:34:40<7:49:18, 1.52it/s] {'loss': 1.2651, 'learning_rate': 7.726480836236933e-07, 'epoch': 162.25} + 26%|██▌ | 14700/57600 [2:34:40<7:49:18, 1.52it/s] 26%|██▌ | 14701/57600 [2:34:40<7:39:13, 1.56it/s] 26%|██▌ | 14702/57600 [2:34:41<7:39:36, 1.56it/s] 26%|██▌ | 14703/57600 [2:34:42<7:48:03, 1.53it/s] 26%|██▌ | 14704/57600 [2:34:42<7:33:43, 1.58it/s] 26%|██▌ | 14705/57600 [2:34:43<7:30:21, 1.59it/s] 26%|██▌ | 14706/57600 [2:34:43<7:23:28, 1.61it/s] 26%|██▌ | 14707/57600 [2:34:44<7:12:25, 1.65it/s] 26%|██▌ | 14708/57600 [2:34:45<7:21:24, 1.62it/s] 26%|██▌ | 14709/57600 [2:34:45<7:22:44, 1.61it/s] 26%|██▌ | 14710/57600 [2:34:46<7:31:08, 1.58it/s] 26%|██▌ | 14711/57600 [2:34:47<7:26:24, 1.60it/s] 26%|██▌ | 14712/57600 [2:34:47<7:33:31, 1.58it/s] 26%|██▌ | 14713/57600 [2:34:48<7:17:14, 1.63it/s] 26%|██▌ | 14714/57600 [2:34:48<7:03:51, 1.69it/s] 26%|██▌ | 14715/57600 [2:34:49<7:15:56, 1.64it/s] 26%|██▌ | 14716/57600 [2:34:50<7:13:04, 1.65it/s] 26%|██▌ | 14717/57600 [2:34:50<7:09:17, 1.66it/s] 26%|██▌ | 14718/57600 [2:34:51<7:16:24, 1.64it/s] 26%|██▌ | 14719/57600 [2:34:51<7:33:31, 1.58it/s] 26%|██▌ | 14720/57600 [2:34:52<7:25:13, 1.61it/s] {'loss': 1.2809, 'learning_rate': 7.723344947735191e-07, 'epoch': 162.47} + 26%|██▌ | 14720/57600 [2:34:52<7:25:13, 1.61it/s] 26%|██▌ | 14721/57600 [2:34:53<7:13:25, 1.65it/s] 26%|██▌ | 14722/57600 [2:34:53<7:08:58, 1.67it/s] 26%|██▌ | 14723/57600 [2:34:54<7:16:22, 1.64it/s] 26%|██▌ | 14724/57600 [2:34:54<7:05:36, 1.68it/s] 26%|██▌ | 14725/57600 [2:34:55<7:02:15, 1.69it/s] 26%|██▌ | 14726/57600 [2:34:56<7:02:23, 1.69it/s] 26%|██▌ | 14727/57600 [2:34:56<7:05:19, 1.68it/s] 26%|██▌ | 14728/57600 [2:34:57<6:59:23, 1.70it/s] 26%|██▌ | 14729/57600 [2:34:57<7:12:23, 1.65it/s] 26%|██▌ | 14730/57600 [2:34:58<7:01:11, 1.70it/s] 26%|██▌ | 14731/57600 [2:34:59<6:53:11, 1.73it/s] 26%|██▌ | 14732/57600 [2:34:59<6:47:16, 1.75it/s] 26%|██▌ | 14733/57600 [2:35:00<7:01:52, 1.69it/s] 26%|██▌ | 14734/57600 [2:35:00<6:59:15, 1.70it/s] 26%|██▌ | 14735/57600 [2:35:01<6:59:59, 1.70it/s] 26%|██▌ | 14736/57600 [2:35:02<7:17:36, 1.63it/s] 26%|██▌ | 14737/57600 [2:35:02<7:15:29, 1.64it/s] 26%|██▌ | 14738/57600 [2:35:03<7:21:09, 1.62it/s] 26%|██▌ | 14739/57600 [2:35:03<7:18:09, 1.63it/s] 26%|██▌ | 14740/57600 [2:35:04<7:10:15, 1.66it/s] {'loss': 1.3978, 'learning_rate': 7.72020905923345e-07, 'epoch': 162.69} + 26%|██▌ | 14740/57600 [2:35:04<7:10:15, 1.66it/s] 26%|██▌ | 14741/57600 [2:35:05<7:00:54, 1.70it/s] 26%|██▌ | 14742/57600 [2:35:05<7:04:12, 1.68it/s] 26%|██▌ | 14743/57600 [2:35:06<7:03:56, 1.68it/s] 26%|██▌ | 14744/57600 [2:35:06<7:21:14, 1.62it/s] 26%|██▌ | 14745/57600 [2:35:07<7:22:49, 1.61it/s] 26%|██▌ | 14746/57600 [2:35:08<7:25:04, 1.60it/s] 26%|██▌ | 14747/57600 [2:35:08<7:48:52, 1.52it/s] 26%|██▌ | 14748/57600 [2:35:09<7:41:19, 1.55it/s] 26%|██▌ | 14749/57600 [2:35:10<7:28:41, 1.59it/s] 26%|██▌ | 14750/57600 [2:35:10<7:27:49, 1.59it/s] 26%|██▌ | 14751/57600 [2:35:11<7:07:31, 1.67it/s] 26%|██▌ | 14752/57600 [2:35:11<7:16:07, 1.64it/s] 26%|██▌ | 14753/57600 [2:35:12<7:29:36, 1.59it/s] 26%|██▌ | 14754/57600 [2:35:13<7:27:13, 1.60it/s] 26%|██▌ | 14755/57600 [2:35:13<7:30:17, 1.59it/s] 26%|██▌ | 14756/57600 [2:35:14<7:32:46, 1.58it/s] 26%|██▌ | 14757/57600 [2:35:15<7:18:54, 1.63it/s] 26%|██▌ | 14758/57600 [2:35:15<7:32:50, 1.58it/s] 26%|██▌ | 14759/57600 [2:35:16<7:28:37, 1.59it/s] 26%|██▌ | 14760/57600 [2:35:17<7:38:52, 1.56it/s] {'loss': 1.2972, 'learning_rate': 7.717073170731707e-07, 'epoch': 162.91} + 26%|██▌ | 14760/57600 [2:35:17<7:38:52, 1.56it/s] 26%|██▌ | 14761/57600 [2:35:17<7:44:00, 1.54it/s] 26%|██▌ | 14762/57600 [2:35:18<7:50:39, 1.52it/s] 26%|██▌ | 14763/57600 [2:35:18<7:26:05, 1.60it/s] 26%|██▌ | 14764/57600 [2:35:19<7:14:00, 1.64it/s] 26%|██▌ | 14765/57600 [2:35:20<7:22:59, 1.61it/s] 26%|██▌ | 14766/57600 [2:35:20<7:09:04, 1.66it/s] 26%|██▌ | 14767/57600 [2:35:21<7:03:24, 1.69it/s] 26%|██▌ | 14768/57600 [2:35:21<7:28:09, 1.59it/s] 26%|██▌ | 14769/57600 [2:35:22<7:24:32, 1.61it/s] 26%|██▌ | 14770/57600 [2:35:23<7:25:07, 1.60it/s] 26%|██▌ | 14771/57600 [2:35:23<7:25:32, 1.60it/s] 26%|██▌ | 14772/57600 [2:35:24<7:23:16, 1.61it/s] 26%|██▌ | 14773/57600 [2:35:25<7:15:45, 1.64it/s] 26%|██▌ | 14774/57600 [2:35:25<7:04:33, 1.68it/s] 26%|██▌ | 14775/57600 [2:35:26<6:58:33, 1.71it/s] 26%|██▌ | 14776/57600 [2:35:26<7:34:03, 1.57it/s] 26%|██▌ | 14777/57600 [2:35:27<7:24:18, 1.61it/s] 26%|██▌ | 14778/57600 [2:35:28<7:30:25, 1.58it/s] 26%|██▌ | 14779/57600 [2:35:28<7:34:24, 1.57it/s] 26%|██▌ | 14780/57600 [2:35:29<7:32:18, 1.58it/s] {'loss': 1.3079, 'learning_rate': 7.713937282229964e-07, 'epoch': 163.13} + 26%|██▌ | 14780/57600 [2:35:29<7:32:18, 1.58it/s] 26%|██▌ | 14781/57600 [2:35:30<7:35:36, 1.57it/s] 26%|██▌ | 14782/57600 [2:35:30<7:29:32, 1.59it/s] 26%|██▌ | 14783/57600 [2:35:31<7:33:47, 1.57it/s] 26%|██▌ | 14784/57600 [2:35:32<7:46:53, 1.53it/s] 26%|██▌ | 14785/57600 [2:35:32<7:33:42, 1.57it/s] 26%|██▌ | 14786/57600 [2:35:33<7:25:53, 1.60it/s] 26%|██▌ | 14787/57600 [2:35:33<7:18:08, 1.63it/s] 26%|██▌ | 14788/57600 [2:35:34<7:20:14, 1.62it/s] 26%|██▌ | 14789/57600 [2:35:35<7:15:34, 1.64it/s] 26%|██▌ | 14790/57600 [2:35:35<7:14:59, 1.64it/s] 26%|██▌ | 14791/57600 [2:35:36<7:12:58, 1.65it/s] 26%|██▌ | 14792/57600 [2:35:36<7:12:43, 1.65it/s] 26%|██▌ | 14793/57600 [2:35:37<7:05:14, 1.68it/s] 26%|██▌ | 14794/57600 [2:35:37<7:01:18, 1.69it/s] 26%|██▌ | 14795/57600 [2:35:38<7:02:43, 1.69it/s] 26%|██▌ | 14796/57600 [2:35:39<7:04:20, 1.68it/s] 26%|██▌ | 14797/57600 [2:35:39<7:16:53, 1.63it/s] 26%|██▌ | 14798/57600 [2:35:40<7:14:49, 1.64it/s] 26%|██▌ | 14799/57600 [2:35:41<7:24:24, 1.61it/s] 26%|██▌ | 14800/57600 [2:35:41<7:12:08, 1.65it/s] {'loss': 1.2769, 'learning_rate': 7.710801393728223e-07, 'epoch': 163.36} + 26%|██▌ | 14800/57600 [2:35:41<7:12:08, 1.65it/s] 26%|██▌ | 14801/57600 [2:35:42<7:30:57, 1.58it/s] 26%|██▌ | 14802/57600 [2:35:43<7:36:41, 1.56it/s] 26%|██▌ | 14803/57600 [2:35:43<7:25:34, 1.60it/s] 26%|██▌ | 14804/57600 [2:35:44<7:24:55, 1.60it/s] 26%|██▌ | 14805/57600 [2:35:44<7:13:42, 1.64it/s] 26%|██▌ | 14806/57600 [2:35:45<7:21:10, 1.62it/s] 26%|██▌ | 14807/57600 [2:35:46<7:15:45, 1.64it/s] 26%|██▌ | 14808/57600 [2:35:46<7:05:47, 1.68it/s] 26%|██▌ | 14809/57600 [2:35:47<6:57:23, 1.71it/s] 26%|██▌ | 14810/57600 [2:35:47<7:00:14, 1.70it/s] 26%|██▌ | 14811/57600 [2:35:48<6:56:19, 1.71it/s] 26%|██▌ | 14812/57600 [2:35:48<7:09:02, 1.66it/s] 26%|██▌ | 14813/57600 [2:35:49<6:56:54, 1.71it/s] 26%|██▌ | 14814/57600 [2:35:50<6:54:20, 1.72it/s] 26%|██▌ | 14815/57600 [2:35:50<6:53:31, 1.72it/s] 26%|██▌ | 14816/57600 [2:35:51<6:54:03, 1.72it/s] 26%|██▌ | 14817/57600 [2:35:51<6:53:57, 1.72it/s] 26%|██▌ | 14818/57600 [2:35:52<7:24:14, 1.61it/s] 26%|██▌ | 14819/57600 [2:35:53<7:15:27, 1.64it/s] 26%|██▌ | 14820/57600 [2:35:53<7:17:29, 1.63it/s] {'loss': 1.3133, 'learning_rate': 7.707665505226481e-07, 'epoch': 163.58} + 26%|██▌ | 14820/57600 [2:35:53<7:17:29, 1.63it/s] 26%|██▌ | 14821/57600 [2:35:54<7:31:26, 1.58it/s] 26%|██▌ | 14822/57600 [2:35:55<7:32:37, 1.58it/s] 26%|██▌ | 14823/57600 [2:35:55<7:32:08, 1.58it/s] 26%|██▌ | 14824/57600 [2:35:56<7:32:36, 1.58it/s] 26%|██▌ | 14825/57600 [2:35:56<7:32:45, 1.57it/s] 26%|██▌ | 14826/57600 [2:35:57<7:35:26, 1.57it/s] 26%|██▌ | 14827/57600 [2:35:58<7:23:06, 1.61it/s] 26%|██▌ | 14828/57600 [2:35:58<7:27:24, 1.59it/s] 26%|██▌ | 14829/57600 [2:35:59<7:48:35, 1.52it/s] 26%|██▌ | 14830/57600 [2:36:00<7:26:37, 1.60it/s] 26%|██▌ | 14831/57600 [2:36:00<7:13:49, 1.64it/s] 26%|██▌ | 14832/57600 [2:36:01<7:21:35, 1.61it/s] 26%|██▌ | 14833/57600 [2:36:01<7:15:26, 1.64it/s] 26%|██▌ | 14834/57600 [2:36:02<7:13:54, 1.64it/s] 26%|██▌ | 14835/57600 [2:36:03<7:12:17, 1.65it/s] 26%|██▌ | 14836/57600 [2:36:03<7:22:38, 1.61it/s] 26%|██▌ | 14837/57600 [2:36:04<7:08:36, 1.66it/s] 26%|██▌ | 14838/57600 [2:36:04<6:58:02, 1.70it/s] 26%|██▌ | 14839/57600 [2:36:05<7:14:15, 1.64it/s] 26%|██▌ | 14840/57600 [2:36:06<7:27:49, 1.59it/s] {'loss': 1.2618, 'learning_rate': 7.704529616724738e-07, 'epoch': 163.8} + 26%|██▌ | 14840/57600 [2:36:06<7:27:49, 1.59it/s] 26%|██▌ | 14841/57600 [2:36:06<7:31:05, 1.58it/s] 26%|██▌ | 14842/57600 [2:36:07<7:28:18, 1.59it/s] 26%|██▌ | 14843/57600 [2:36:08<7:31:28, 1.58it/s] 26%|██▌ | 14844/57600 [2:36:08<7:18:52, 1.62it/s] 26%|██▌ | 14845/57600 [2:36:09<7:12:28, 1.65it/s] 26%|██▌ | 14846/57600 [2:36:09<7:16:45, 1.63it/s] 26%|██▌ | 14847/57600 [2:36:10<7:28:45, 1.59it/s] 26%|██▌ | 14848/57600 [2:36:11<7:23:06, 1.61it/s] 26%|██▌ | 14849/57600 [2:36:11<7:39:42, 1.55it/s] 26%|██▌ | 14850/57600 [2:36:12<7:29:30, 1.59it/s] 26%|██▌ | 14851/57600 [2:36:13<7:38:02, 1.56it/s] 26%|██▌ | 14852/57600 [2:36:13<7:26:28, 1.60it/s] 26%|██▌ | 14853/57600 [2:36:14<7:13:08, 1.64it/s] 26%|██▌ | 14854/57600 [2:36:14<7:25:32, 1.60it/s] 26%|██▌ | 14855/57600 [2:36:15<7:15:24, 1.64it/s] 26%|██▌ | 14856/57600 [2:36:16<7:10:36, 1.65it/s] 26%|██▌ | 14857/57600 [2:36:16<7:13:15, 1.64it/s] 26%|██▌ | 14858/57600 [2:36:17<7:22:06, 1.61it/s] 26%|██▌ | 14859/57600 [2:36:18<7:45:43, 1.53it/s] 26%|██▌ | 14860/57600 [2:36:18<7:31:27, 1.58it/s] {'loss': 1.2768, 'learning_rate': 7.701393728222996e-07, 'epoch': 164.02} + 26%|██▌ | 14860/57600 [2:36:18<7:31:27, 1.58it/s] 26%|██▌ | 14861/57600 [2:36:19<7:31:18, 1.58it/s] 26%|██▌ | 14862/57600 [2:36:19<7:23:27, 1.61it/s] 26%|██▌ | 14863/57600 [2:36:20<7:25:17, 1.60it/s] 26%|██▌ | 14864/57600 [2:36:21<7:33:24, 1.57it/s] 26%|██▌ | 14865/57600 [2:36:21<7:28:55, 1.59it/s] 26%|██▌ | 14866/57600 [2:36:22<7:34:57, 1.57it/s] 26%|██▌ | 14867/57600 [2:36:23<7:42:08, 1.54it/s] 26%|██▌ | 14868/57600 [2:36:23<7:38:20, 1.55it/s] 26%|██▌ | 14869/57600 [2:36:24<7:25:08, 1.60it/s] 26%|██▌ | 14870/57600 [2:36:25<7:25:42, 1.60it/s] 26%|██▌ | 14871/57600 [2:36:25<7:19:11, 1.62it/s] 26%|██▌ | 14872/57600 [2:36:26<7:21:04, 1.61it/s] 26%|██▌ | 14873/57600 [2:36:26<7:14:48, 1.64it/s] 26%|██▌ | 14874/57600 [2:36:27<7:29:54, 1.58it/s] 26%|██▌ | 14875/57600 [2:36:28<7:42:18, 1.54it/s] 26%|██▌ | 14876/57600 [2:36:28<7:22:27, 1.61it/s] 26%|██▌ | 14877/57600 [2:36:29<7:07:52, 1.66it/s] 26%|██▌ | 14878/57600 [2:36:29<7:12:45, 1.65it/s] 26%|██▌ | 14879/57600 [2:36:30<7:04:22, 1.68it/s] 26%|██▌ | 14880/57600 [2:36:31<7:04:01, 1.68it/s] {'loss': 1.2407, 'learning_rate': 7.698257839721255e-07, 'epoch': 164.24} + 26%|██▌ | 14880/57600 [2:36:31<7:04:01, 1.68it/s] 26%|██▌ | 14881/57600 [2:36:31<7:14:36, 1.64it/s] 26%|██▌ | 14882/57600 [2:36:32<7:05:00, 1.68it/s] 26%|██▌ | 14883/57600 [2:36:32<7:10:03, 1.66it/s] 26%|██▌ | 14884/57600 [2:36:33<7:08:47, 1.66it/s] 26%|██▌ | 14885/57600 [2:36:34<7:11:39, 1.65it/s] 26%|██▌ | 14886/57600 [2:36:34<7:32:37, 1.57it/s] 26%|██▌ | 14887/57600 [2:36:35<7:32:40, 1.57it/s] 26%|██▌ | 14888/57600 [2:36:36<7:23:15, 1.61it/s] 26%|██▌ | 14889/57600 [2:36:36<7:10:37, 1.65it/s] 26%|██▌ | 14890/57600 [2:36:37<7:22:01, 1.61it/s] 26%|██▌ | 14891/57600 [2:36:37<7:12:57, 1.64it/s] 26%|██▌ | 14892/57600 [2:36:38<7:11:40, 1.65it/s] 26%|██▌ | 14893/57600 [2:36:39<7:02:03, 1.69it/s] 26%|██▌ | 14894/57600 [2:36:39<7:04:39, 1.68it/s] 26%|██▌ | 14895/57600 [2:36:40<7:03:32, 1.68it/s] 26%|██▌ | 14896/57600 [2:36:40<7:17:38, 1.63it/s] 26%|██▌ | 14897/57600 [2:36:41<7:37:25, 1.56it/s] 26%|██▌ | 14898/57600 [2:36:42<7:29:16, 1.58it/s] 26%|██▌ | 14899/57600 [2:36:42<7:26:14, 1.59it/s] 26%|██▌ | 14900/57600 [2:36:43<7:22:54, 1.61it/s] {'loss': 1.2921, 'learning_rate': 7.695121951219512e-07, 'epoch': 164.46} + 26%|██▌ | 14900/57600 [2:36:43<7:22:54, 1.61it/s] 26%|██▌ | 14901/57600 [2:36:44<7:26:23, 1.59it/s] 26%|██▌ | 14902/57600 [2:36:44<7:18:17, 1.62it/s] 26%|██▌ | 14903/57600 [2:36:45<7:09:39, 1.66it/s] 26%|██▌ | 14904/57600 [2:36:46<7:39:12, 1.55it/s] 26%|██▌ | 14905/57600 [2:36:46<7:43:16, 1.54it/s] 26%|██▌ | 14906/57600 [2:36:47<7:32:04, 1.57it/s] 26%|██▌ | 14907/57600 [2:36:47<7:26:32, 1.59it/s] 26%|██▌ | 14908/57600 [2:36:48<7:28:55, 1.58it/s] 26%|██▌ | 14909/57600 [2:36:49<7:20:53, 1.61it/s] 26%|██▌ | 14910/57600 [2:36:49<7:21:03, 1.61it/s] 26%|██▌ | 14911/57600 [2:36:50<7:14:18, 1.64it/s] 26%|██▌ | 14912/57600 [2:36:50<7:19:56, 1.62it/s] 26%|██▌ | 14913/57600 [2:36:51<7:18:34, 1.62it/s] 26%|██▌ | 14914/57600 [2:36:52<7:07:11, 1.67it/s] 26%|██▌ | 14915/57600 [2:36:52<7:13:01, 1.64it/s] 26%|██▌ | 14916/57600 [2:36:53<7:11:32, 1.65it/s] 26%|██▌ | 14917/57600 [2:36:53<7:01:04, 1.69it/s] 26%|██▌ | 14918/57600 [2:36:54<7:07:10, 1.67it/s] 26%|██▌ | 14919/57600 [2:36:55<7:03:07, 1.68it/s] 26%|██▌ | 14920/57600 [2:36:55<7:11:09, 1.65it/s] {'loss': 1.3135, 'learning_rate': 7.691986062717769e-07, 'epoch': 164.68} + 26%|██▌ | 14920/57600 [2:36:55<7:11:09, 1.65it/s] 26%|██▌ | 14921/57600 [2:36:56<7:12:28, 1.64it/s] 26%|██▌ | 14922/57600 [2:36:56<7:10:51, 1.65it/s] 26%|██▌ | 14923/57600 [2:36:57<7:27:54, 1.59it/s] 26%|██▌ | 14924/57600 [2:36:58<7:28:02, 1.59it/s] 26%|██▌ | 14925/57600 [2:36:58<7:29:59, 1.58it/s] 26%|██▌ | 14926/57600 [2:36:59<7:28:33, 1.59it/s] 26%|██▌ | 14927/57600 [2:37:00<7:32:43, 1.57it/s] 26%|██▌ | 14928/57600 [2:37:00<7:43:57, 1.53it/s] 26%|██▌ | 14929/57600 [2:37:01<7:47:21, 1.52it/s] 26%|██▌ | 14930/57600 [2:37:02<7:27:17, 1.59it/s] 26%|██▌ | 14931/57600 [2:37:02<7:13:08, 1.64it/s] 26%|██▌ | 14932/57600 [2:37:03<7:11:53, 1.65it/s] 26%|██▌ | 14933/57600 [2:37:03<7:23:07, 1.60it/s] 26%|██▌ | 14934/57600 [2:37:04<7:21:14, 1.61it/s] 26%|██▌ | 14935/57600 [2:37:05<7:40:58, 1.54it/s] 26%|██▌ | 14936/57600 [2:37:05<7:28:58, 1.58it/s] 26%|██▌ | 14937/57600 [2:37:06<7:23:37, 1.60it/s] 26%|██▌ | 14938/57600 [2:37:07<7:10:58, 1.65it/s] 26%|██▌ | 14939/57600 [2:37:07<7:11:03, 1.65it/s] 26%|██▌ | 14940/57600 [2:37:08<7:21:08, 1.61it/s] {'loss': 1.2811, 'learning_rate': 7.688850174216027e-07, 'epoch': 164.9} + 26%|██▌ | 14940/57600 [2:37:08<7:21:08, 1.61it/s] 26%|██▌ | 14941/57600 [2:37:08<7:15:56, 1.63it/s] 26%|██▌ | 14942/57600 [2:37:09<7:07:42, 1.66it/s] 26%|██▌ | 14943/57600 [2:37:10<6:59:42, 1.69it/s] 26%|██▌ | 14944/57600 [2:37:10<7:07:21, 1.66it/s] 26%|██▌ | 14945/57600 [2:37:11<7:21:40, 1.61it/s] 26%|██▌ | 14946/57600 [2:37:11<7:18:16, 1.62it/s] 26%|██▌ | 14947/57600 [2:37:12<7:30:54, 1.58it/s] 26%|██▌ | 14948/57600 [2:37:13<7:21:15, 1.61it/s] 26%|██▌ | 14949/57600 [2:37:13<7:25:56, 1.59it/s] 26%|██▌ | 14950/57600 [2:37:14<7:30:13, 1.58it/s] 26%|██▌ | 14951/57600 [2:37:15<7:17:38, 1.62it/s] 26%|██▌ | 14952/57600 [2:37:15<7:19:04, 1.62it/s] 26%|██▌ | 14953/57600 [2:37:16<7:08:41, 1.66it/s] 26%|██▌ | 14954/57600 [2:37:16<6:59:29, 1.69it/s] 26%|██▌ | 14955/57600 [2:37:17<7:04:16, 1.68it/s] 26%|██▌ | 14956/57600 [2:37:18<7:28:04, 1.59it/s] 26%|██▌ | 14957/57600 [2:37:18<7:13:23, 1.64it/s] 26%|██▌ | 14958/57600 [2:37:19<7:12:31, 1.64it/s] 26%|██▌ | 14959/57600 [2:37:19<7:07:35, 1.66it/s] 26%|██▌ | 14960/57600 [2:37:20<7:14:43, 1.63it/s] {'loss': 1.2964, 'learning_rate': 7.685714285714287e-07, 'epoch': 165.12} + 26%|██▌ | 14960/57600 [2:37:20<7:14:43, 1.63it/s] 26%|██▌ | 14961/57600 [2:37:21<7:35:48, 1.56it/s] 26%|██▌ | 14962/57600 [2:37:21<7:53:18, 1.50it/s] 26%|██▌ | 14963/57600 [2:37:22<7:44:55, 1.53it/s] 26%|██▌ | 14964/57600 [2:37:23<7:53:34, 1.50it/s] 26%|██▌ | 14965/57600 [2:37:23<7:57:48, 1.49it/s] 26%|██▌ | 14966/57600 [2:37:24<7:41:03, 1.54it/s] 26%|██▌ | 14967/57600 [2:37:25<7:37:44, 1.55it/s] 26%|██▌ | 14968/57600 [2:37:25<7:32:03, 1.57it/s] 26%|██▌ | 14969/57600 [2:37:26<7:35:18, 1.56it/s] 26%|██▌ | 14970/57600 [2:37:27<7:31:48, 1.57it/s] 26%|██▌ | 14971/57600 [2:37:27<7:30:14, 1.58it/s] 26%|██▌ | 14972/57600 [2:37:28<7:28:00, 1.59it/s] 26%|██▌ | 14973/57600 [2:37:28<7:27:16, 1.59it/s] 26%|██▌ | 14974/57600 [2:37:29<7:33:06, 1.57it/s] 26%|██▌ | 14975/57600 [2:37:30<7:32:26, 1.57it/s] 26%|██▌ | 14976/57600 [2:37:30<7:34:49, 1.56it/s] 26%|██▌ | 14977/57600 [2:37:31<7:31:16, 1.57it/s] 26%|██▌ | 14978/57600 [2:37:32<7:44:46, 1.53it/s] 26%|██▌ | 14979/57600 [2:37:32<7:43:27, 1.53it/s] 26%|██▌ | 14980/57600 [2:37:33<7:25:38, 1.59it/s] {'loss': 1.2674, 'learning_rate': 7.682578397212542e-07, 'epoch': 165.34} + 26%|██▌ | 14980/57600 [2:37:33<7:25:38, 1.59it/s] 26%|██▌ | 14981/57600 [2:37:34<7:18:11, 1.62it/s] 26%|██▌ | 14982/57600 [2:37:34<7:21:13, 1.61it/s] 26%|██▌ | 14983/57600 [2:37:35<7:27:17, 1.59it/s] 26%|██▌ | 14984/57600 [2:37:36<7:47:48, 1.52it/s] 26%|██▌ | 14985/57600 [2:37:36<7:30:26, 1.58it/s] 26%|██▌ | 14986/57600 [2:37:37<7:22:15, 1.61it/s] 26%|██▌ | 14987/57600 [2:37:37<7:21:44, 1.61it/s] 26%|██▌ | 14988/57600 [2:37:38<7:39:45, 1.54it/s] 26%|██▌ | 14989/57600 [2:37:39<7:45:12, 1.53it/s] 26%|██▌ | 14990/57600 [2:37:39<7:23:46, 1.60it/s] 26%|██▌ | 14991/57600 [2:37:40<7:20:42, 1.61it/s] 26%|██▌ | 14992/57600 [2:37:40<7:17:32, 1.62it/s] 26%|██▌ | 14993/57600 [2:37:41<7:16:29, 1.63it/s] 26%|██▌ | 14994/57600 [2:37:42<7:28:52, 1.58it/s] 26%|██▌ | 14995/57600 [2:37:42<7:20:23, 1.61it/s] 26%|██▌ | 14996/57600 [2:37:43<7:19:07, 1.62it/s] 26%|██▌ | 14997/57600 [2:37:44<7:40:33, 1.54it/s] 26%|██▌ | 14998/57600 [2:37:44<7:25:43, 1.59it/s] 26%|██▌ | 14999/57600 [2:37:45<7:19:33, 1.62it/s] 26%|██▌ | 15000/57600 [2:37:45<7:18:47, 1.62it/s] {'loss': 1.3111, 'learning_rate': 7.679442508710801e-07, 'epoch': 165.56} + 26%|██▌ | 15000/57600 [2:37:45<7:18:47, 1.62it/s] 26%|██▌ | 15001/57600 [2:37:46<7:32:34, 1.57it/s] 26%|██▌ | 15002/57600 [2:37:47<7:22:34, 1.60it/s] 26%|██▌ | 15003/57600 [2:37:47<7:18:15, 1.62it/s] 26%|██▌ | 15004/57600 [2:37:48<7:13:53, 1.64it/s] 26%|██▌ | 15005/57600 [2:37:49<7:24:24, 1.60it/s] 26%|██▌ | 15006/57600 [2:37:49<7:21:51, 1.61it/s] 26%|██▌ | 15007/57600 [2:37:50<7:28:09, 1.58it/s] 26%|██▌ | 15008/57600 [2:37:51<7:24:59, 1.60it/s] 26%|██▌ | 15009/57600 [2:37:51<7:32:29, 1.57it/s] 26%|██▌ | 15010/57600 [2:37:52<7:31:10, 1.57it/s] 26%|██▌ | 15011/57600 [2:37:52<7:28:14, 1.58it/s] 26%|██▌ | 15012/57600 [2:37:53<7:26:31, 1.59it/s] 26%|██▌ | 15013/57600 [2:37:54<7:25:43, 1.59it/s] 26%|██▌ | 15014/57600 [2:37:54<7:25:14, 1.59it/s] 26%|██▌ | 15015/57600 [2:37:55<7:11:21, 1.65it/s] 26%|██▌ | 15016/57600 [2:37:55<7:14:52, 1.63it/s] 26%|██▌ | 15017/57600 [2:37:56<7:18:03, 1.62it/s] 26%|██▌ | 15018/57600 [2:37:57<7:23:28, 1.60it/s] 26%|██▌ | 15019/57600 [2:37:57<7:21:53, 1.61it/s] 26%|██▌ | 15020/57600 [2:37:58<7:12:56, 1.64it/s] {'loss': 1.2993, 'learning_rate': 7.67630662020906e-07, 'epoch': 165.78} + 26%|██▌ | 15020/57600 [2:37:58<7:12:56, 1.64it/s] 26%|██▌ | 15021/57600 [2:37:59<7:16:25, 1.63it/s] 26%|██▌ | 15022/57600 [2:37:59<7:08:14, 1.66it/s] 26%|██▌ | 15023/57600 [2:38:00<7:23:01, 1.60it/s] 26%|██▌ | 15024/57600 [2:38:00<7:23:23, 1.60it/s] 26%|██▌ | 15025/57600 [2:38:01<7:22:11, 1.60it/s] 26%|██▌ | 15026/57600 [2:38:02<7:24:45, 1.60it/s] 26%|██▌ | 15027/57600 [2:38:02<7:32:35, 1.57it/s] 26%|██▌ | 15028/57600 [2:38:03<7:32:02, 1.57it/s] 26%|██▌ | 15029/57600 [2:38:04<7:38:53, 1.55it/s] 26%|██▌ | 15030/57600 [2:38:04<7:23:14, 1.60it/s] 26%|██▌ | 15031/57600 [2:38:05<7:27:21, 1.59it/s] 26%|██▌ | 15032/57600 [2:38:06<7:37:35, 1.55it/s] 26%|██▌ | 15033/57600 [2:38:06<7:35:09, 1.56it/s] 26%|██▌ | 15034/57600 [2:38:07<7:32:29, 1.57it/s] 26%|██▌ | 15035/57600 [2:38:07<7:31:57, 1.57it/s] 26%|██▌ | 15036/57600 [2:38:08<7:24:14, 1.60it/s] 26%|██▌ | 15037/57600 [2:38:09<7:23:13, 1.60it/s] 26%|██▌ | 15038/57600 [2:38:09<7:19:48, 1.61it/s] 26%|██▌ | 15039/57600 [2:38:10<7:08:57, 1.65it/s] 26%|██▌ | 15040/57600 [2:38:11<7:40:04, 1.54it/s] {'loss': 1.268, 'learning_rate': 7.673170731707317e-07, 'epoch': 166.0} + 26%|██▌ | 15040/57600 [2:38:11<7:40:04, 1.54it/s] 26%|██▌ | 15041/57600 [2:38:11<7:41:44, 1.54it/s] 26%|██▌ | 15042/57600 [2:38:12<7:49:41, 1.51it/s] 26%|██▌ | 15043/57600 [2:38:13<7:35:52, 1.56it/s] 26%|██▌ | 15044/57600 [2:38:13<7:34:26, 1.56it/s] 26%|██▌ | 15045/57600 [2:38:14<7:33:07, 1.57it/s] 26%|██▌ | 15046/57600 [2:38:14<7:37:14, 1.55it/s] 26%|██▌ | 15047/57600 [2:38:15<7:39:56, 1.54it/s] 26%|██▌ | 15048/57600 [2:38:16<7:45:38, 1.52it/s] 26%|██▌ | 15049/57600 [2:38:16<7:24:27, 1.60it/s] 26%|██▌ | 15050/57600 [2:38:17<7:21:07, 1.61it/s] 26%|██▌ | 15051/57600 [2:38:18<7:06:53, 1.66it/s] 26%|██▌ | 15052/57600 [2:38:18<7:23:08, 1.60it/s] 26%|██▌ | 15053/57600 [2:38:19<7:31:43, 1.57it/s] 26%|██▌ | 15054/57600 [2:38:19<7:12:13, 1.64it/s] 26%|██▌ | 15055/57600 [2:38:20<7:16:25, 1.62it/s] 26%|██▌ | 15056/57600 [2:38:21<7:20:56, 1.61it/s] 26%|██▌ | 15057/57600 [2:38:21<7:20:35, 1.61it/s] 26%|██▌ | 15058/57600 [2:38:22<7:12:43, 1.64it/s] 26%|██▌ | 15059/57600 [2:38:23<7:22:06, 1.60it/s] 26%|██▌ | 15060/57600 [2:38:23<7:18:14, 1.62it/s] {'loss': 1.3067, 'learning_rate': 7.670034843205574e-07, 'epoch': 166.23} + 26%|██▌ | 15060/57600 [2:38:23<7:18:14, 1.62it/s] 26%|██▌ | 15061/57600 [2:38:24<7:31:34, 1.57it/s] 26%|██▌ | 15062/57600 [2:38:24<7:30:05, 1.58it/s] 26%|██▌ | 15063/57600 [2:38:25<7:35:05, 1.56it/s] 26%|██▌ | 15064/57600 [2:38:26<7:24:48, 1.59it/s] 26%|██▌ | 15065/57600 [2:38:26<7:19:37, 1.61it/s] 26%|██▌ | 15066/57600 [2:38:27<7:31:09, 1.57it/s] 26%|██▌ | 15067/57600 [2:38:28<7:32:56, 1.57it/s] 26%|██▌ | 15068/57600 [2:38:28<7:27:56, 1.58it/s] 26%|██▌ | 15069/57600 [2:38:29<7:18:02, 1.62it/s] 26%|██▌ | 15070/57600 [2:38:29<7:14:02, 1.63it/s] 26%|██▌ | 15071/57600 [2:38:30<7:20:04, 1.61it/s] 26%|██▌ | 15072/57600 [2:38:31<7:15:41, 1.63it/s] 26%|██▌ | 15073/57600 [2:38:31<7:21:17, 1.61it/s] 26%|██▌ | 15074/57600 [2:38:32<7:10:14, 1.65it/s] 26%|██▌ | 15075/57600 [2:38:32<7:05:36, 1.67it/s] 26%|██▌ | 15076/57600 [2:38:33<7:03:42, 1.67it/s] 26%|██▌ | 15077/57600 [2:38:34<7:02:09, 1.68it/s] 26%|██▌ | 15078/57600 [2:38:34<6:56:00, 1.70it/s] 26%|██▌ | 15079/57600 [2:38:35<7:04:14, 1.67it/s] 26%|██▌ | 15080/57600 [2:38:35<7:02:38, 1.68it/s] {'loss': 1.3156, 'learning_rate': 7.666898954703832e-07, 'epoch': 166.45} + 26%|██▌ | 15080/57600 [2:38:35<7:02:38, 1.68it/s] 26%|██▌ | 15081/57600 [2:38:36<7:05:50, 1.66it/s] 26%|██▌ | 15082/57600 [2:38:37<7:08:41, 1.65it/s] 26%|██▌ | 15083/57600 [2:38:37<7:28:44, 1.58it/s] 26%|██▌ | 15084/57600 [2:38:38<7:23:12, 1.60it/s] 26%|██▌ | 15085/57600 [2:38:39<7:20:15, 1.61it/s] 26%|██▌ | 15086/57600 [2:38:39<7:18:39, 1.62it/s] 26%|██▌ | 15087/57600 [2:38:40<7:11:20, 1.64it/s] 26%|██▌ | 15088/57600 [2:38:40<7:14:26, 1.63it/s] 26%|██▌ | 15089/57600 [2:38:41<7:17:12, 1.62it/s] 26%|██▌ | 15090/57600 [2:38:42<7:09:38, 1.65it/s] 26%|██▌ | 15091/57600 [2:38:42<7:09:32, 1.65it/s] 26%|██▌ | 15092/57600 [2:38:43<7:10:05, 1.65it/s] 26%|██▌ | 15093/57600 [2:38:44<7:23:19, 1.60it/s] 26%|██▌ | 15094/57600 [2:38:44<7:23:10, 1.60it/s] 26%|██▌ | 15095/57600 [2:38:45<7:24:15, 1.59it/s] 26%|██▌ | 15096/57600 [2:38:45<7:19:11, 1.61it/s] 26%|██▌ | 15097/57600 [2:38:46<7:18:38, 1.61it/s] 26%|██▌ | 15098/57600 [2:38:47<7:21:00, 1.61it/s] 26%|██▌ | 15099/57600 [2:38:47<7:36:56, 1.55it/s] 26%|██▌ | 15100/57600 [2:38:48<7:34:48, 1.56it/s] {'loss': 1.3127, 'learning_rate': 7.663763066202091e-07, 'epoch': 166.67} + 26%|██▌ | 15100/57600 [2:38:48<7:34:48, 1.56it/s] 26%|██▌ | 15101/57600 [2:38:49<7:35:16, 1.56it/s] 26%|██▌ | 15102/57600 [2:38:49<7:27:35, 1.58it/s] 26%|██▌ | 15103/57600 [2:38:50<7:29:59, 1.57it/s] 26%|██▌ | 15104/57600 [2:38:50<7:31:30, 1.57it/s] 26%|██▌ | 15105/57600 [2:38:51<7:26:35, 1.59it/s] 26%|██▌ | 15106/57600 [2:38:52<7:31:06, 1.57it/s] 26%|██▌ | 15107/57600 [2:38:52<7:32:46, 1.56it/s] 26%|██▌ | 15108/57600 [2:38:53<7:26:38, 1.59it/s] 26%|██▌ | 15109/57600 [2:38:54<7:31:59, 1.57it/s] 26%|██▌ | 15110/57600 [2:38:54<7:37:44, 1.55it/s] 26%|██▌ | 15111/57600 [2:38:55<7:34:48, 1.56it/s] 26%|██▌ | 15112/57600 [2:38:56<7:45:20, 1.52it/s] 26%|██▌ | 15113/57600 [2:38:56<8:01:13, 1.47it/s] 26%|██▌ | 15114/57600 [2:38:57<7:50:24, 1.51it/s] 26%|██▌ | 15115/57600 [2:38:58<7:36:29, 1.55it/s] 26%|██▌ | 15116/57600 [2:38:58<7:26:05, 1.59it/s] 26%|██▌ | 15117/57600 [2:38:59<7:20:28, 1.61it/s] 26%|██▌ | 15118/57600 [2:38:59<7:25:39, 1.59it/s] 26%|██▌ | 15119/57600 [2:39:00<7:17:58, 1.62it/s] 26%|██▋ | 15120/57600 [2:39:01<7:16:33, 1.62it/s] {'loss': 1.2802, 'learning_rate': 7.660627177700348e-07, 'epoch': 166.89} + 26%|██▋ | 15120/57600 [2:39:01<7:16:33, 1.62it/s] 26%|██▋ | 15121/57600 [2:39:01<7:28:14, 1.58it/s] 26%|██▋ | 15122/57600 [2:39:02<7:31:01, 1.57it/s] 26%|██▋ | 15123/57600 [2:39:03<7:19:56, 1.61it/s] 26%|██▋ | 15124/57600 [2:39:03<7:22:54, 1.60it/s] 26%|██▋ | 15125/57600 [2:39:04<7:11:07, 1.64it/s] 26%|██▋ | 15126/57600 [2:39:04<7:18:27, 1.61it/s] 26%|██▋ | 15127/57600 [2:39:05<7:41:40, 1.53it/s] 26%|██▋ | 15128/57600 [2:39:06<7:45:09, 1.52it/s] 26%|██▋ | 15129/57600 [2:39:06<7:40:26, 1.54it/s] 26%|██▋ | 15130/57600 [2:39:07<7:29:23, 1.58it/s] 26%|██▋ | 15131/57600 [2:39:08<7:48:43, 1.51it/s] 26%|██▋ | 15132/57600 [2:39:08<7:43:09, 1.53it/s] 26%|██▋ | 15133/57600 [2:39:09<7:47:45, 1.51it/s] 26%|██▋ | 15134/57600 [2:39:10<7:40:01, 1.54it/s] 26%|██▋ | 15135/57600 [2:39:10<7:21:31, 1.60it/s] 26%|██▋ | 15136/57600 [2:39:11<7:26:16, 1.59it/s] 26%|██▋ | 15137/57600 [2:39:12<9:05:52, 1.30it/s] 26%|██▋ | 15138/57600 [2:39:13<8:29:56, 1.39it/s] 26%|██▋ | 15139/57600 [2:39:13<8:09:39, 1.45it/s] 26%|██▋ | 15140/57600 [2:39:14<7:49:52, 1.51it/s] {'loss': 1.252, 'learning_rate': 7.657491289198606e-07, 'epoch': 167.11} + 26%|██▋ | 15140/57600 [2:39:14<7:49:52, 1.51it/s] 26%|██▋ | 15141/57600 [2:39:14<7:40:37, 1.54it/s] 26%|██▋ | 15142/57600 [2:39:15<7:36:52, 1.55it/s] 26%|██▋ | 15143/57600 [2:39:16<7:39:33, 1.54it/s] 26%|██▋ | 15144/57600 [2:39:16<7:22:49, 1.60it/s] 26%|██▋ | 15145/57600 [2:39:17<7:24:41, 1.59it/s] 26%|██▋ | 15146/57600 [2:39:18<7:35:39, 1.55it/s] 26%|██▋ | 15147/57600 [2:39:18<7:13:32, 1.63it/s] 26%|██▋ | 15148/57600 [2:39:19<7:13:45, 1.63it/s] 26%|██▋ | 15149/57600 [2:39:19<6:58:01, 1.69it/s] 26%|██▋ | 15150/57600 [2:39:20<7:07:13, 1.66it/s] 26%|██▋ | 15151/57600 [2:39:21<7:08:30, 1.65it/s] 26%|██▋ | 15152/57600 [2:39:21<7:00:44, 1.68it/s] 26%|██▋ | 15153/57600 [2:39:22<7:03:01, 1.67it/s] 26%|██▋ | 15154/57600 [2:39:22<7:22:31, 1.60it/s] 26%|██▋ | 15155/57600 [2:39:23<7:36:33, 1.55it/s] 26%|██▋ | 15156/57600 [2:39:24<7:33:22, 1.56it/s] 26%|██▋ | 15157/57600 [2:39:24<7:41:03, 1.53it/s] 26%|██▋ | 15158/57600 [2:39:25<7:35:24, 1.55it/s] 26%|██▋ | 15159/57600 [2:39:26<7:43:03, 1.53it/s] 26%|██▋ | 15160/57600 [2:39:26<7:40:11, 1.54it/s] {'loss': 1.3094, 'learning_rate': 7.654355400696864e-07, 'epoch': 167.33} + 26%|██▋ | 15160/57600 [2:39:26<7:40:11, 1.54it/s] 26%|██▋ | 15161/57600 [2:39:27<7:22:12, 1.60it/s] 26%|██▋ | 15162/57600 [2:39:28<7:17:33, 1.62it/s] 26%|██▋ | 15163/57600 [2:39:28<7:23:46, 1.59it/s] 26%|██▋ | 15164/57600 [2:39:29<7:19:18, 1.61it/s] 26%|██▋ | 15165/57600 [2:39:29<7:23:49, 1.59it/s] 26%|██▋ | 15166/57600 [2:39:30<7:21:31, 1.60it/s] 26%|██▋ | 15167/57600 [2:39:31<7:22:23, 1.60it/s] 26%|██▋ | 15168/57600 [2:39:31<7:07:11, 1.66it/s] 26%|██▋ | 15169/57600 [2:39:32<6:59:36, 1.69it/s] 26%|██▋ | 15170/57600 [2:39:32<7:10:35, 1.64it/s] 26%|██▋ | 15171/57600 [2:39:33<7:10:33, 1.64it/s] 26%|██▋ | 15172/57600 [2:39:34<7:06:19, 1.66it/s] 26%|██▋ | 15173/57600 [2:39:34<7:09:56, 1.64it/s] 26%|██▋ | 15174/57600 [2:39:35<7:23:37, 1.59it/s] 26%|██▋ | 15175/57600 [2:39:36<7:11:28, 1.64it/s] 26%|██▋ | 15176/57600 [2:39:36<7:16:55, 1.62it/s] 26%|██▋ | 15177/57600 [2:39:37<7:23:04, 1.60it/s] 26%|██▋ | 15178/57600 [2:39:37<7:23:06, 1.60it/s] 26%|██▋ | 15179/57600 [2:39:38<7:31:30, 1.57it/s] 26%|██▋ | 15180/57600 [2:39:39<7:27:53, 1.58it/s] {'loss': 1.2655, 'learning_rate': 7.651219512195122e-07, 'epoch': 167.55} + 26%|██▋ | 15180/57600 [2:39:39<7:27:53, 1.58it/s] 26%|██▋ | 15181/57600 [2:39:39<7:25:01, 1.59it/s] 26%|██▋ | 15182/57600 [2:39:40<7:34:33, 1.56it/s] 26%|██▋ | 15183/57600 [2:39:41<7:12:35, 1.63it/s] 26%|██▋ | 15184/57600 [2:39:41<7:08:22, 1.65it/s] 26%|██▋ | 15185/57600 [2:39:42<6:56:07, 1.70it/s] 26%|██▋ | 15186/57600 [2:39:42<6:49:10, 1.73it/s] 26%|██▋ | 15187/57600 [2:39:43<7:14:53, 1.63it/s] 26%|██▋ | 15188/57600 [2:39:44<7:37:31, 1.55it/s] 26%|██▋ | 15189/57600 [2:39:44<7:25:46, 1.59it/s] 26%|██▋ | 15190/57600 [2:39:45<7:09:42, 1.64it/s] 26%|██▋ | 15191/57600 [2:39:45<7:04:37, 1.66it/s] 26%|██▋ | 15192/57600 [2:39:46<7:13:00, 1.63it/s] 26%|██▋ | 15193/57600 [2:39:47<7:16:49, 1.62it/s] 26%|██▋ | 15194/57600 [2:39:47<7:16:16, 1.62it/s] 26%|██▋ | 15195/57600 [2:39:48<7:18:02, 1.61it/s] 26%|██▋ | 15196/57600 [2:39:49<7:19:43, 1.61it/s] 26%|██▋ | 15197/57600 [2:39:49<7:05:48, 1.66it/s] 26%|██▋ | 15198/57600 [2:39:50<7:00:57, 1.68it/s] 26%|██▋ | 15199/57600 [2:39:50<7:00:53, 1.68it/s] 26%|██▋ | 15200/57600 [2:39:51<7:12:40, 1.63it/s] {'loss': 1.315, 'learning_rate': 7.648083623693378e-07, 'epoch': 167.77} + 26%|██▋ | 15200/57600 [2:39:51<7:12:40, 1.63it/s] 26%|██▋ | 15201/57600 [2:39:52<7:19:26, 1.61it/s] 26%|██▋ | 15202/57600 [2:39:52<7:15:37, 1.62it/s] 26%|██▋ | 15203/57600 [2:39:53<7:14:56, 1.62it/s] 26%|██▋ | 15204/57600 [2:39:53<7:10:43, 1.64it/s] 26%|██▋ | 15205/57600 [2:39:54<7:15:12, 1.62it/s] 26%|██▋ | 15206/57600 [2:39:55<7:18:29, 1.61it/s] 26%|██▋ | 15207/57600 [2:39:55<7:25:53, 1.58it/s] 26%|██▋ | 15208/57600 [2:39:56<7:40:22, 1.53it/s] 26%|██▋ | 15209/57600 [2:39:57<7:32:29, 1.56it/s] 26%|██▋ | 15210/57600 [2:39:57<7:21:26, 1.60it/s] 26%|██▋ | 15211/57600 [2:39:58<7:36:51, 1.55it/s] 26%|██▋ | 15212/57600 [2:39:59<7:30:50, 1.57it/s] 26%|██▋ | 15213/57600 [2:39:59<7:20:28, 1.60it/s] 26%|██▋ | 15214/57600 [2:40:00<7:15:38, 1.62it/s] 26%|██▋ | 15215/57600 [2:40:00<7:12:56, 1.63it/s] 26%|██▋ | 15216/57600 [2:40:01<7:11:50, 1.64it/s] 26%|██▋ | 15217/57600 [2:40:02<7:21:54, 1.60it/s] 26%|██▋ | 15218/57600 [2:40:02<7:28:21, 1.58it/s] 26%|██▋ | 15219/57600 [2:40:03<7:28:39, 1.57it/s] 26%|██▋ | 15220/57600 [2:40:03<7:15:56, 1.62it/s] {'loss': 1.2746, 'learning_rate': 7.644947735191637e-07, 'epoch': 167.99} + 26%|██▋ | 15220/57600 [2:40:03<7:15:56, 1.62it/s] 26%|██▋ | 15221/57600 [2:40:04<7:23:49, 1.59it/s] 26%|██▋ | 15222/57600 [2:40:05<7:28:15, 1.58it/s] 26%|██▋ | 15223/57600 [2:40:05<7:22:28, 1.60it/s] 26%|██▋ | 15224/57600 [2:40:06<7:36:36, 1.55it/s] 26%|██▋ | 15225/57600 [2:40:07<7:33:25, 1.56it/s] 26%|██▋ | 15226/57600 [2:40:07<7:22:53, 1.59it/s] 26%|██▋ | 15227/57600 [2:40:08<7:19:40, 1.61it/s] 26%|██▋ | 15228/57600 [2:40:09<7:14:40, 1.62it/s] 26%|██▋ | 15229/57600 [2:40:09<7:02:44, 1.67it/s] 26%|██▋ | 15230/57600 [2:40:10<6:54:19, 1.70it/s] 26%|██▋ | 15231/57600 [2:40:10<6:53:27, 1.71it/s] 26%|██▋ | 15232/57600 [2:40:11<6:50:03, 1.72it/s] 26%|██▋ | 15233/57600 [2:40:11<7:04:10, 1.66it/s] 26%|██▋ | 15234/57600 [2:40:12<7:07:20, 1.65it/s] 26%|██▋ | 15235/57600 [2:40:13<6:59:43, 1.68it/s] 26%|█���▋ | 15236/57600 [2:40:13<7:10:49, 1.64it/s] 26%|██▋ | 15237/57600 [2:40:14<7:02:37, 1.67it/s] 26%|██▋ | 15238/57600 [2:40:14<6:57:26, 1.69it/s] 26%|██▋ | 15239/57600 [2:40:15<7:13:29, 1.63it/s] 26%|██▋ | 15240/57600 [2:40:16<7:26:33, 1.58it/s] {'loss': 1.279, 'learning_rate': 7.641811846689895e-07, 'epoch': 168.21} + 26%|██▋ | 15240/57600 [2:40:16<7:26:33, 1.58it/s] 26%|██▋ | 15241/57600 [2:40:16<7:30:39, 1.57it/s] 26%|██▋ | 15242/57600 [2:40:17<7:44:27, 1.52it/s] 26%|██▋ | 15243/57600 [2:40:18<7:40:11, 1.53it/s] 26%|██▋ | 15244/57600 [2:40:18<7:30:10, 1.57it/s] 26%|██▋ | 15245/57600 [2:40:19<7:31:07, 1.56it/s] 26%|██▋ | 15246/57600 [2:40:20<7:34:31, 1.55it/s] 26%|██▋ | 15247/57600 [2:40:20<7:32:33, 1.56it/s] 26%|██▋ | 15248/57600 [2:40:21<7:28:52, 1.57it/s] 26%|██▋ | 15249/57600 [2:40:22<7:21:38, 1.60it/s] 26%|██▋ | 15250/57600 [2:40:22<7:07:02, 1.65it/s] 26%|██▋ | 15251/57600 [2:40:23<6:55:57, 1.70it/s] 26%|██▋ | 15252/57600 [2:40:23<7:07:23, 1.65it/s] 26%|██▋ | 15253/57600 [2:40:24<7:10:12, 1.64it/s] 26%|██▋ | 15254/57600 [2:40:25<7:16:39, 1.62it/s] 26%|██▋ | 15255/57600 [2:40:25<7:13:33, 1.63it/s] 26%|██▋ | 15256/57600 [2:40:26<7:19:33, 1.61it/s] 26%|██▋ | 15257/57600 [2:40:26<7:14:57, 1.62it/s] 26%|██▋ | 15258/57600 [2:40:27<7:09:10, 1.64it/s] 26%|██▋ | 15259/57600 [2:40:28<7:13:19, 1.63it/s] 26%|██▋ | 15260/57600 [2:40:28<7:23:20, 1.59it/s] {'loss': 1.288, 'learning_rate': 7.638675958188153e-07, 'epoch': 168.43} + 26%|██▋ | 15260/57600 [2:40:28<7:23:20, 1.59it/s] 26%|██▋ | 15261/57600 [2:40:29<7:15:49, 1.62it/s] 26%|██▋ | 15262/57600 [2:40:29<7:15:46, 1.62it/s] 26%|██▋ | 15263/57600 [2:40:30<7:08:44, 1.65it/s] 26%|██▋ | 15264/57600 [2:40:31<7:07:47, 1.65it/s] 27%|██▋ | 15265/57600 [2:40:31<7:01:08, 1.68it/s] 27%|██▋ | 15266/57600 [2:40:32<7:15:55, 1.62it/s] 27%|██▋ | 15267/57600 [2:40:32<7:16:12, 1.62it/s] 27%|██▋ | 15268/57600 [2:40:33<7:52:39, 1.49it/s] 27%|██▋ | 15269/57600 [2:40:34<7:40:24, 1.53it/s] 27%|██▋ | 15270/57600 [2:40:35<7:29:35, 1.57it/s] 27%|██▋ | 15271/57600 [2:40:35<7:27:44, 1.58it/s] 27%|██▋ | 15272/57600 [2:40:36<7:23:46, 1.59it/s] 27%|██▋ | 15273/57600 [2:40:36<7:19:13, 1.61it/s] 27%|██▋ | 15274/57600 [2:40:37<7:37:29, 1.54it/s] 27%|██▋ | 15275/57600 [2:40:38<7:28:59, 1.57it/s] 27%|██▋ | 15276/57600 [2:40:38<7:13:10, 1.63it/s] 27%|██▋ | 15277/57600 [2:40:39<7:05:13, 1.66it/s] 27%|██▋ | 15278/57600 [2:40:39<6:59:43, 1.68it/s] 27%|██▋ | 15279/57600 [2:40:40<7:06:05, 1.66it/s] 27%|██▋ | 15280/57600 [2:40:41<7:00:02, 1.68it/s] {'loss': 1.3345, 'learning_rate': 7.63554006968641e-07, 'epoch': 168.65} + 27%|██▋ | 15280/57600 [2:40:41<7:00:02, 1.68it/s] 27%|██▋ | 15281/57600 [2:40:41<6:59:17, 1.68it/s] 27%|██▋ | 15282/57600 [2:40:42<6:53:12, 1.71it/s] 27%|██▋ | 15283/57600 [2:40:42<6:59:51, 1.68it/s] 27%|██▋ | 15284/57600 [2:40:43<6:59:03, 1.68it/s] 27%|██▋ | 15285/57600 [2:40:44<6:57:18, 1.69it/s] 27%|██▋ | 15286/57600 [2:40:44<7:09:33, 1.64it/s] 27%|██▋ | 15287/57600 [2:40:45<7:01:37, 1.67it/s] 27%|██▋ | 15288/57600 [2:40:45<7:05:54, 1.66it/s] 27%|██▋ | 15289/57600 [2:40:46<7:14:32, 1.62it/s] 27%|██▋ | 15290/57600 [2:40:47<7:04:19, 1.66it/s] 27%|██▋ | 15291/57600 [2:40:47<6:57:19, 1.69it/s] 27%|██▋ | 15292/57600 [2:40:48<7:07:04, 1.65it/s] 27%|██▋ | 15293/57600 [2:40:49<7:28:06, 1.57it/s] 27%|██▋ | 15294/57600 [2:40:49<7:23:05, 1.59it/s] 27%|██▋ | 15295/57600 [2:40:50<7:35:00, 1.55it/s] 27%|██▋ | 15296/57600 [2:40:50<7:35:11, 1.55it/s] 27%|██▋ | 15297/57600 [2:40:51<7:38:03, 1.54it/s] 27%|██▋ | 15298/57600 [2:40:52<7:28:53, 1.57it/s] 27%|██▋ | 15299/57600 [2:40:52<7:19:46, 1.60it/s] 27%|██▋ | 15300/57600 [2:40:53<7:32:15, 1.56it/s] {'loss': 1.2753, 'learning_rate': 7.632404181184669e-07, 'epoch': 168.87} + 27%|██▋ | 15300/57600 [2:40:53<7:32:15, 1.56it/s] 27%|██▋ | 15301/57600 [2:40:54<7:15:29, 1.62it/s] 27%|██▋ | 15302/57600 [2:40:54<7:09:10, 1.64it/s] 27%|██▋ | 15303/57600 [2:40:55<7:13:05, 1.63it/s] 27%|██▋ | 15304/57600 [2:40:55<7:11:27, 1.63it/s] 27%|██▋ | 15305/57600 [2:40:56<7:16:52, 1.61it/s] 27%|██▋ | 15306/57600 [2:40:57<7:13:52, 1.62it/s] 27%|██▋ | 15307/57600 [2:40:57<7:13:33, 1.63it/s] 27%|██▋ | 15308/57600 [2:40:58<7:11:56, 1.63it/s] 27%|██▋ | 15309/57600 [2:40:59<7:23:04, 1.59it/s] 27%|██▋ | 15310/57600 [2:40:59<7:12:45, 1.63it/s] 27%|██▋ | 15311/57600 [2:41:00<7:26:21, 1.58it/s] 27%|██▋ | 15312/57600 [2:41:01<7:56:57, 1.48it/s] 27%|██▋ | 15313/57600 [2:41:01<7:46:32, 1.51it/s] 27%|██▋ | 15314/57600 [2:41:02<7:42:07, 1.53it/s] 27%|██▋ | 15315/57600 [2:41:02<7:27:58, 1.57it/s] 27%|██▋ | 15316/57600 [2:41:03<7:27:19, 1.58it/s] 27%|██▋ | 15317/57600 [2:41:04<7:19:19, 1.60it/s] 27%|██▋ | 15318/57600 [2:41:04<7:15:11, 1.62it/s] 27%|██▋ | 15319/57600 [2:41:05<7:05:35, 1.66it/s] 27%|██▋ | 15320/57600 [2:41:05<7:15:34, 1.62it/s] {'loss': 1.3172, 'learning_rate': 7.629268292682927e-07, 'epoch': 169.09} + 27%|██▋ | 15320/57600 [2:41:05<7:15:34, 1.62it/s] 27%|██▋ | 15321/57600 [2:41:06<7:19:15, 1.60it/s] 27%|██▋ | 15322/57600 [2:41:07<7:23:13, 1.59it/s] 27%|██▋ | 15323/57600 [2:41:07<7:24:11, 1.59it/s] 27%|██▋ | 15324/57600 [2:41:08<7:08:11, 1.65it/s] 27%|██▋ | 15325/57600 [2:41:09<7:21:51, 1.59it/s] 27%|██▋ | 15326/57600 [2:41:09<7:09:37, 1.64it/s] 27%|██▋ | 15327/57600 [2:41:10<7:15:13, 1.62it/s] 27%|██▋ | 15328/57600 [2:41:10<7:07:46, 1.65it/s] 27%|██▋ | 15329/57600 [2:41:11<7:20:45, 1.60it/s] 27%|██▋ | 15330/57600 [2:41:12<7:09:37, 1.64it/s] 27%|██▋ | 15331/57600 [2:41:12<6:59:52, 1.68it/s] 27%|██▋ | 15332/57600 [2:41:13<6:56:29, 1.69it/s] 27%|██▋ | 15333/57600 [2:41:13<7:17:53, 1.61it/s] 27%|██▋ | 15334/57600 [2:41:14<7:13:48, 1.62it/s] 27%|██▋ | 15335/57600 [2:41:15<7:36:15, 1.54it/s] 27%|██▋ | 15336/57600 [2:41:15<7:48:38, 1.50it/s] 27%|██▋ | 15337/57600 [2:41:16<7:46:02, 1.51it/s] 27%|██▋ | 15338/57600 [2:41:17<7:26:56, 1.58it/s] 27%|██▋ | 15339/57600 [2:41:17<7:22:21, 1.59it/s] 27%|██▋ | 15340/57600 [2:41:18<7:23:29, 1.59it/s] {'loss': 1.26, 'learning_rate': 7.626132404181184e-07, 'epoch': 169.32} + 27%|██▋ | 15340/57600 [2:41:18<7:23:29, 1.59it/s] 27%|██▋ | 15341/57600 [2:41:19<7:28:31, 1.57it/s] 27%|██▋ | 15342/57600 [2:41:19<7:15:45, 1.62it/s] 27%|██▋ | 15343/57600 [2:41:20<7:22:13, 1.59it/s] 27%|██▋ | 15344/57600 [2:41:20<7:26:42, 1.58it/s] 27%|██▋ | 15345/57600 [2:41:21<7:16:12, 1.61it/s] 27%|██▋ | 15346/57600 [2:41:22<7:20:06, 1.60it/s] 27%|██▋ | 15347/57600 [2:41:22<7:10:47, 1.63it/s] 27%|██▋ | 15348/57600 [2:41:23<7:19:13, 1.60it/s] 27%|██▋ | 15349/57600 [2:41:24<7:19:57, 1.60it/s] 27%|██▋ | 15350/57600 [2:41:24<7:21:53, 1.59it/s] 27%|██▋ | 15351/57600 [2:41:25<7:20:27, 1.60it/s] 27%|██▋ | 15352/57600 [2:41:25<7:11:36, 1.63it/s] 27%|██▋ | 15353/57600 [2:41:26<7:23:11, 1.59it/s] 27%|██▋ | 15354/57600 [2:41:27<7:30:40, 1.56it/s] 27%|██▋ | 15355/57600 [2:41:27<7:34:20, 1.55it/s] 27%|██▋ | 15356/57600 [2:41:28<7:25:37, 1.58it/s] 27%|██▋ | 15357/57600 [2:41:29<7:22:46, 1.59it/s] 27%|██▋ | 15358/57600 [2:41:29<7:25:45, 1.58it/s] 27%|██▋ | 15359/57600 [2:41:30<7:28:08, 1.57it/s] 27%|██▋ | 15360/57600 [2:41:31<7:22:35, 1.59it/s] {'loss': 1.2803, 'learning_rate': 7.622996515679443e-07, 'epoch': 169.54} + 27%|██▋ | 15360/57600 [2:41:31<7:22:35, 1.59it/s] 27%|██▋ | 15361/57600 [2:41:31<7:21:56, 1.59it/s] 27%|██▋ | 15362/57600 [2:41:32<7:10:29, 1.64it/s] 27%|██▋ | 15363/57600 [2:41:32<7:14:43, 1.62it/s] 27%|██▋ | 15364/57600 [2:41:33<7:19:34, 1.60it/s] 27%|██▋ | 15365/57600 [2:41:34<7:28:38, 1.57it/s] 27%|██▋ | 15366/57600 [2:41:34<7:08:54, 1.64it/s] 27%|██▋ | 15367/57600 [2:41:35<6:57:06, 1.69it/s] 27%|██▋ | 15368/57600 [2:41:35<6:48:20, 1.72it/s] 27%|██▋ | 15369/57600 [2:41:36<6:49:41, 1.72it/s] 27%|██▋ | 15370/57600 [2:41:37<7:06:50, 1.65it/s] 27%|██▋ | 15371/57600 [2:41:37<7:05:43, 1.65it/s] 27%|██▋ | 15372/57600 [2:41:38<6:57:58, 1.68it/s] 27%|██▋ | 15373/57600 [2:41:38<7:05:34, 1.65it/s] 27%|██▋ | 15374/57600 [2:41:39<7:01:05, 1.67it/s] 27%|██▋ | 15375/57600 [2:41:40<6:58:13, 1.68it/s] 27%|██▋ | 15376/57600 [2:41:40<7:30:16, 1.56it/s] 27%|██▋ | 15377/57600 [2:41:41<7:41:28, 1.52it/s] 27%|██▋ | 15378/57600 [2:41:42<7:25:27, 1.58it/s] 27%|██▋ | 15379/57600 [2:41:42<7:21:05, 1.60it/s] 27%|██▋ | 15380/57600 [2:41:43<7:06:57, 1.65it/s] {'loss': 1.2895, 'learning_rate': 7.6198606271777e-07, 'epoch': 169.76} + 27%|██▋ | 15380/57600 [2:41:43<7:06:57, 1.65it/s] 27%|██▋ | 15381/57600 [2:41:43<6:59:33, 1.68it/s] 27%|██▋ | 15382/57600 [2:41:44<7:00:46, 1.67it/s] 27%|██▋ | 15383/57600 [2:41:44<7:02:37, 1.66it/s] 27%|██▋ | 15384/57600 [2:41:45<6:51:46, 1.71it/s] 27%|██▋ | 15385/57600 [2:41:46<6:59:43, 1.68it/s] 27%|██▋ | 15386/57600 [2:41:46<7:11:34, 1.63it/s] 27%|██▋ | 15387/57600 [2:41:47<6:58:27, 1.68it/s] 27%|██▋ | 15388/57600 [2:41:47<6:56:55, 1.69it/s] 27%|██▋ | 15389/57600 [2:41:48<6:49:05, 1.72it/s] 27%|██▋ | 15390/57600 [2:41:49<7:17:00, 1.61it/s] 27%|██▋ | 15391/57600 [2:41:49<7:24:07, 1.58it/s] 27%|██▋ | 15392/57600 [2:41:50<7:08:35, 1.64it/s] 27%|██▋ | 15393/57600 [2:41:51<7:10:01, 1.64it/s] 27%|██▋ | 15394/57600 [2:41:51<7:03:36, 1.66it/s] 27%|██▋ | 15395/57600 [2:41:52<7:11:35, 1.63it/s] 27%|██▋ | 15396/57600 [2:41:52<7:02:07, 1.67it/s] 27%|██▋ | 15397/57600 [2:41:53<6:56:01, 1.69it/s] 27%|██▋ | 15398/57600 [2:41:53<6:53:01, 1.70it/s] 27%|██▋ | 15399/57600 [2:41:54<7:02:59, 1.66it/s] 27%|██▋ | 15400/57600 [2:41:55<7:08:54, 1.64it/s] {'loss': 1.3389, 'learning_rate': 7.616724738675958e-07, 'epoch': 169.98} + 27%|██▋ | 15400/57600 [2:41:55<7:08:54, 1.64it/s] 27%|██▋ | 15401/57600 [2:41:55<7:11:18, 1.63it/s] 27%|██▋ | 15402/57600 [2:41:56<7:16:40, 1.61it/s] 27%|██▋ | 15403/57600 [2:41:57<7:24:41, 1.58it/s] 27%|██▋ | 15404/57600 [2:41:57<7:19:20, 1.60it/s] 27%|██▋ | 15405/57600 [2:41:58<7:23:07, 1.59it/s] 27%|██▋ | 15406/57600 [2:41:59<7:15:05, 1.62it/s] 27%|██▋ | 15407/57600 [2:41:59<7:05:37, 1.65it/s] 27%|██▋ | 15408/57600 [2:42:00<7:02:29, 1.66it/s] 27%|██▋ | 15409/57600 [2:42:00<7:22:11, 1.59it/s] 27%|██▋ | 15410/57600 [2:42:01<7:16:46, 1.61it/s] 27%|██▋ | 15411/57600 [2:42:02<7:12:13, 1.63it/s] 27%|██▋ | 15412/57600 [2:42:02<7:22:37, 1.59it/s] 27%|██▋ | 15413/57600 [2:42:03<7:24:29, 1.58it/s] 27%|██▋ | 15414/57600 [2:42:04<7:32:29, 1.55it/s] 27%|██▋ | 15415/57600 [2:42:04<7:21:37, 1.59it/s] 27%|██▋ | 15416/57600 [2:42:05<7:22:59, 1.59it/s] 27%|██▋ | 15417/57600 [2:42:05<7:22:17, 1.59it/s] 27%|██▋ | 15418/57600 [2:42:06<7:31:23, 1.56it/s] 27%|██▋ | 15419/57600 [2:42:07<7:26:54, 1.57it/s] 27%|██▋ | 15420/57600 [2:42:07<7:21:47, 1.59it/s] {'loss': 1.2815, 'learning_rate': 7.613588850174215e-07, 'epoch': 170.2} + 27%|██▋ | 15420/57600 [2:42:07<7:21:47, 1.59it/s] 27%|██▋ | 15421/57600 [2:42:08<7:09:17, 1.64it/s] 27%|██▋ | 15422/57600 [2:42:09<7:14:41, 1.62it/s] 27%|██▋ | 15423/57600 [2:42:09<7:22:13, 1.59it/s] 27%|██▋ | 15424/57600 [2:42:10<7:19:50, 1.60it/s] 27%|██▋ | 15425/57600 [2:42:10<7:06:24, 1.65it/s] 27%|██▋ | 15426/57600 [2:42:11<7:06:11, 1.65it/s] 27%|██▋ | 15427/57600 [2:42:12<6:58:21, 1.68it/s] 27%|██▋ | 15428/57600 [2:42:12<6:57:47, 1.68it/s] 27%|██▋ | 15429/57600 [2:42:13<7:05:44, 1.65it/s] 27%|██▋ | 15430/57600 [2:42:13<7:16:49, 1.61it/s] 27%|██▋ | 15431/57600 [2:42:14<7:41:56, 1.52it/s] 27%|██▋ | 15432/57600 [2:42:15<7:38:17, 1.53it/s] 27%|██▋ | 15433/57600 [2:42:15<7:27:04, 1.57it/s] 27%|██▋ | 15434/57600 [2:42:16<7:19:16, 1.60it/s] 27%|██▋ | 15435/57600 [2:42:17<7:03:08, 1.66it/s] 27%|██▋ | 15436/57600 [2:42:17<7:01:25, 1.67it/s] 27%|██▋ | 15437/57600 [2:42:18<7:14:36, 1.62it/s] 27%|██▋ | 15438/57600 [2:42:18<7:25:44, 1.58it/s] 27%|██▋ | 15439/57600 [2:42:19<7:14:33, 1.62it/s] 27%|██▋ | 15440/57600 [2:42:20<7:21:23, 1.59it/s] {'loss': 1.2479, 'learning_rate': 7.610452961672474e-07, 'epoch': 170.42} + 27%|██▋ | 15440/57600 [2:42:20<7:21:23, 1.59it/s] 27%|██▋ | 15441/57600 [2:42:20<7:27:59, 1.57it/s] 27%|██▋ | 15442/57600 [2:42:21<7:19:38, 1.60it/s] 27%|██▋ | 15443/57600 [2:42:22<7:12:59, 1.62it/s] 27%|██▋ | 15444/57600 [2:42:22<7:09:58, 1.63it/s] 27%|██▋ | 15445/57600 [2:42:23<7:22:50, 1.59it/s] 27%|██▋ | 15446/57600 [2:42:24<7:32:19, 1.55it/s] 27%|██▋ | 15447/57600 [2:42:24<7:09:57, 1.63it/s] 27%|██▋ | 15448/57600 [2:42:25<7:38:11, 1.53it/s] 27%|██▋ | 15449/57600 [2:42:25<7:28:34, 1.57it/s] 27%|██▋ | 15450/57600 [2:42:26<7:23:44, 1.58it/s] 27%|██▋ | 15451/57600 [2:42:27<7:35:06, 1.54it/s] 27%|██▋ | 15452/57600 [2:42:27<7:18:34, 1.60it/s] 27%|██▋ | 15453/57600 [2:42:28<7:32:10, 1.55it/s] 27%|██▋ | 15454/57600 [2:42:29<7:19:46, 1.60it/s] 27%|██▋ | 15455/57600 [2:42:29<7:26:21, 1.57it/s] 27%|██▋ | 15456/57600 [2:42:30<7:19:51, 1.60it/s] 27%|██▋ | 15457/57600 [2:42:30<7:25:24, 1.58it/s] 27%|██▋ | 15458/57600 [2:42:31<7:32:18, 1.55it/s] 27%|██▋ | 15459/57600 [2:42:32<7:34:48, 1.54it/s] 27%|██▋ | 15460/57600 [2:42:32<7:26:32, 1.57it/s] {'loss': 1.2462, 'learning_rate': 7.607317073170731e-07, 'epoch': 170.64} + 27%|██▋ | 15460/57600 [2:42:32<7:26:32, 1.57it/s] 27%|██▋ | 15461/57600 [2:42:33<7:15:16, 1.61it/s] 27%|██▋ | 15462/57600 [2:42:34<7:12:50, 1.62it/s] 27%|██▋ | 15463/57600 [2:42:34<7:06:13, 1.65it/s] 27%|██▋ | 15464/57600 [2:42:35<7:05:47, 1.65it/s] 27%|██▋ | 15465/57600 [2:42:35<7:10:56, 1.63it/s] 27%|██▋ | 15466/57600 [2:42:36<7:13:58, 1.62it/s] 27%|██▋ | 15467/57600 [2:42:37<7:30:38, 1.56it/s] 27%|██▋ | 15468/57600 [2:42:37<7:28:51, 1.56it/s] 27%|██▋ | 15469/57600 [2:42:38<7:16:04, 1.61it/s] 27%|██▋ | 15470/57600 [2:42:39<7:12:53, 1.62it/s] 27%|██▋ | 15471/57600 [2:42:39<7:16:41, 1.61it/s] 27%|██▋ | 15472/57600 [2:42:40<7:14:08, 1.62it/s] 27%|██▋ | 15473/57600 [2:42:40<7:10:33, 1.63it/s] 27%|██▋ | 15474/57600 [2:42:41<7:08:29, 1.64it/s] 27%|██▋ | 15475/57600 [2:42:42<6:56:03, 1.69it/s] 27%|██▋ | 15476/57600 [2:42:42<6:59:19, 1.67it/s] 27%|██▋ | 15477/57600 [2:42:43<6:58:32, 1.68it/s] 27%|██▋ | 15478/57600 [2:42:43<6:48:48, 1.72it/s] 27%|██▋ | 15479/57600 [2:42:44<6:51:55, 1.70it/s] 27%|██▋ | 15480/57600 [2:42:44<6:52:22, 1.70it/s] {'loss': 1.2922, 'learning_rate': 7.604181184668989e-07, 'epoch': 170.86} + 27%|██▋ | 15480/57600 [2:42:44<6:52:22, 1.70it/s] 27%|██▋ | 15481/57600 [2:42:45<7:01:48, 1.66it/s] 27%|██▋ | 15482/57600 [2:42:46<6:50:46, 1.71it/s] 27%|██▋ | 15483/57600 [2:42:46<7:13:22, 1.62it/s] 27%|██▋ | 15484/57600 [2:42:47<7:05:43, 1.65it/s] 27%|██▋ | 15485/57600 [2:42:48<7:07:10, 1.64it/s] 27%|██▋ | 15486/57600 [2:42:48<7:01:47, 1.66it/s] 27%|██▋ | 15487/57600 [2:42:49<7:04:46, 1.65it/s] 27%|██▋ | 15488/57600 [2:42:49<6:58:41, 1.68it/s] 27%|██▋ | 15489/57600 [2:42:50<7:08:31, 1.64it/s] 27%|██▋ | 15490/57600 [2:42:51<7:01:58, 1.66it/s] 27%|██▋ | 15491/57600 [2:42:51<7:10:51, 1.63it/s] 27%|██▋ | 15492/57600 [2:42:52<7:16:43, 1.61it/s] 27%|██▋ | 15493/57600 [2:42:53<7:37:37, 1.53it/s] 27%|██▋ | 15494/57600 [2:42:53<7:24:51, 1.58it/s] 27%|██▋ | 15495/57600 [2:42:54<7:33:42, 1.55it/s] 27%|██▋ | 15496/57600 [2:42:54<7:20:40, 1.59it/s] 27%|██▋ | 15497/57600 [2:42:55<7:33:40, 1.55it/s] 27%|██▋ | 15498/57600 [2:42:56<7:21:48, 1.59it/s] 27%|██▋ | 15499/57600 [2:42:56<7:18:26, 1.60it/s] 27%|██▋ | 15500/57600 [2:42:57<7:15:02, 1.61it/s] {'loss': 1.3001, 'learning_rate': 7.601045296167247e-07, 'epoch': 171.08} + 27%|██▋ | 15500/57600 [2:42:57<7:15:02, 1.61it/s] 27%|██▋ | 15501/57600 [2:42:57<7:09:29, 1.63it/s] 27%|██▋ | 15502/57600 [2:42:58<7:01:37, 1.66it/s] 27%|██▋ | 15503/57600 [2:42:59<6:57:38, 1.68it/s] 27%|██▋ | 15504/57600 [2:42:59<6:57:02, 1.68it/s] 27%|██▋ | 15505/57600 [2:43:00<6:57:00, 1.68it/s] 27%|██▋ | 15506/57600 [2:43:00<6:51:22, 1.71it/s] 27%|██▋ | 15507/57600 [2:43:01<6:45:14, 1.73it/s] 27%|██▋ | 15508/57600 [2:43:02<6:59:32, 1.67it/s] 27%|██▋ | 15509/57600 [2:43:02<6:56:49, 1.68it/s] 27%|██▋ | 15510/57600 [2:43:03<7:06:22, 1.65it/s] 27%|██▋ | 15511/57600 [2:43:03<7:10:46, 1.63it/s] 27%|██▋ | 15512/57600 [2:43:04<7:16:07, 1.61it/s] 27%|██▋ | 15513/57600 [2:43:05<7:31:09, 1.55it/s] 27%|██▋ | 15514/57600 [2:43:05<7:12:44, 1.62it/s] 27%|██▋ | 15515/57600 [2:43:06<7:39:28, 1.53it/s] 27%|██▋ | 15516/57600 [2:43:07<7:42:30, 1.52it/s] 27%|██▋ | 15517/57600 [2:43:07<7:20:41, 1.59it/s] 27%|██▋ | 15518/57600 [2:43:08<7:30:09, 1.56it/s] 27%|██▋ | 15519/57600 [2:43:09<7:21:55, 1.59it/s] 27%|██▋ | 15520/57600 [2:43:09<7:27:47, 1.57it/s] {'loss': 1.2975, 'learning_rate': 7.597909407665505e-07, 'epoch': 171.3} + 27%|██▋ | 15520/57600 [2:43:09<7:27:47, 1.57it/s] 27%|██▋ | 15521/57600 [2:43:10<7:24:40, 1.58it/s] 27%|██▋ | 15522/57600 [2:43:11<7:22:54, 1.58it/s] 27%|██▋ | 15523/57600 [2:43:11<7:18:26, 1.60it/s] 27%|██▋ | 15524/57600 [2:43:12<7:15:39, 1.61it/s] 27%|██▋ | 15525/57600 [2:43:12<7:24:59, 1.58it/s] 27%|██▋ | 15526/57600 [2:43:13<7:14:45, 1.61it/s] 27%|██▋ | 15527/57600 [2:43:14<7:20:33, 1.59it/s] 27%|██▋ | 15528/57600 [2:43:14<7:17:12, 1.60it/s] 27%|██▋ | 15529/57600 [2:43:15<7:00:10, 1.67it/s] 27%|██▋ | 15530/57600 [2:43:15<7:13:58, 1.62it/s] 27%|██▋ | 15531/57600 [2:43:16<7:21:00, 1.59it/s] 27%|██▋ | 15532/57600 [2:43:17<7:13:48, 1.62it/s] 27%|██▋ | 15533/57600 [2:43:17<7:37:55, 1.53it/s] 27%|██▋ | 15534/57600 [2:43:18<7:17:34, 1.60it/s] 27%|██▋ | 15535/57600 [2:43:19<7:18:49, 1.60it/s] 27%|██▋ | 15536/57600 [2:43:19<7:04:01, 1.65it/s] 27%|██▋ | 15537/57600 [2:43:20<6:55:56, 1.69it/s] 27%|██▋ | 15538/57600 [2:43:20<6:54:57, 1.69it/s] 27%|██▋ | 15539/57600 [2:43:21<6:50:11, 1.71it/s] 27%|██▋ | 15540/57600 [2:43:21<6:49:17, 1.71it/s] {'loss': 1.2716, 'learning_rate': 7.594773519163762e-07, 'epoch': 171.52} + 27%|██▋ | 15540/57600 [2:43:21<6:49:17, 1.71it/s] 27%|██▋ | 15541/57600 [2:43:22<6:49:21, 1.71it/s] 27%|██▋ | 15542/57600 [2:43:23<6:58:52, 1.67it/s] 27%|██▋ | 15543/57600 [2:43:23<7:08:57, 1.63it/s] 27%|██▋ | 15544/57600 [2:43:24<7:11:14, 1.63it/s] 27%|██▋ | 15545/57600 [2:43:25<7:21:17, 1.59it/s] 27%|██▋ | 15546/57600 [2:43:25<7:23:27, 1.58it/s] 27%|██▋ | 15547/57600 [2:43:26<7:22:32, 1.58it/s] 27%|██▋ | 15548/57600 [2:43:26<7:09:08, 1.63it/s] 27%|██▋ | 15549/57600 [2:43:27<7:15:42, 1.61it/s] 27%|██▋ | 15550/57600 [2:43:28<7:12:43, 1.62it/s] 27%|██▋ | 15551/57600 [2:43:28<7:06:47, 1.64it/s] 27%|██▋ | 15552/57600 [2:43:29<6:59:54, 1.67it/s] 27%|██▋ | 15553/57600 [2:43:29<6:55:37, 1.69it/s] 27%|██▋ | 15554/57600 [2:43:30<7:06:54, 1.64it/s] 27%|██▋ | 15555/57600 [2:43:31<6:58:53, 1.67it/s] 27%|██▋ | 15556/57600 [2:43:31<6:47:37, 1.72it/s] 27%|██▋ | 15557/57600 [2:43:32<6:49:08, 1.71it/s] 27%|██▋ | 15558/57600 [2:43:32<7:06:34, 1.64it/s] 27%|██▋ | 15559/57600 [2:43:33<7:37:25, 1.53it/s] 27%|██▋ | 15560/57600 [2:43:34<7:45:29, 1.51it/s] {'loss': 1.2841, 'learning_rate': 7.591637630662021e-07, 'epoch': 171.74} + 27%|██▋ | 15560/57600 [2:43:34<7:45:29, 1.51it/s] 27%|██▋ | 15561/57600 [2:43:35<8:01:51, 1.45it/s] 27%|██▋ | 15562/57600 [2:43:35<7:41:38, 1.52it/s] 27%|██▋ | 15563/57600 [2:43:36<7:35:27, 1.54it/s] 27%|██▋ | 15564/57600 [2:43:36<7:12:35, 1.62it/s] 27%|██▋ | 15565/57600 [2:43:37<7:16:24, 1.61it/s] 27%|██▋ | 15566/57600 [2:43:38<7:05:59, 1.64it/s] 27%|██▋ | 15567/57600 [2:43:38<7:11:31, 1.62it/s] 27%|██▋ | 15568/57600 [2:43:39<7:23:11, 1.58it/s] 27%|██▋ | 15569/57600 [2:43:40<7:17:08, 1.60it/s] 27%|██▋ | 15570/57600 [2:43:40<7:27:37, 1.56it/s] 27%|██▋ | 15571/57600 [2:43:41<7:18:08, 1.60it/s] 27%|██▋ | 15572/57600 [2:43:41<7:09:05, 1.63it/s] 27%|██▋ | 15573/57600 [2:43:42<7:02:44, 1.66it/s] 27%|██▋ | 15574/57600 [2:43:43<7:01:55, 1.66it/s] 27%|██▋ | 15575/57600 [2:43:43<6:52:13, 1.70it/s] 27%|██▋ | 15576/57600 [2:43:44<6:52:50, 1.70it/s] 27%|██▋ | 15577/57600 [2:43:44<6:56:01, 1.68it/s] 27%|██▋ | 15578/57600 [2:43:45<7:20:56, 1.59it/s] 27%|██▋ | 15579/57600 [2:43:46<7:14:13, 1.61it/s] 27%|██▋ | 15580/57600 [2:43:46<7:04:07, 1.65it/s] {'loss': 1.2861, 'learning_rate': 7.588501742160279e-07, 'epoch': 171.96} + 27%|██▋ | 15580/57600 [2:43:46<7:04:07, 1.65it/s] 27%|██▋ | 15581/57600 [2:43:47<7:20:18, 1.59it/s] 27%|██▋ | 15582/57600 [2:43:48<7:23:05, 1.58it/s] 27%|██▋ | 15583/57600 [2:43:48<7:14:38, 1.61it/s] 27%|██▋ | 15584/57600 [2:43:49<7:29:35, 1.56it/s] 27%|██▋ | 15585/57600 [2:43:49<7:11:08, 1.62it/s] 27%|██▋ | 15586/57600 [2:43:50<7:08:55, 1.63it/s] 27%|██▋ | 15587/57600 [2:43:51<7:21:46, 1.58it/s] 27%|██▋ | 15588/57600 [2:43:51<7:21:11, 1.59it/s] 27%|██▋ | 15589/57600 [2:43:52<7:16:24, 1.60it/s] 27%|██▋ | 15590/57600 [2:43:53<7:14:59, 1.61it/s] 27%|██▋ | 15591/57600 [2:43:53<7:07:41, 1.64it/s] 27%|██▋ | 15592/57600 [2:43:54<7:23:04, 1.58it/s] 27%|██▋ | 15593/57600 [2:43:54<7:14:51, 1.61it/s] 27%|██▋ | 15594/57600 [2:43:55<7:19:10, 1.59it/s] 27%|██▋ | 15595/57600 [2:43:56<7:15:29, 1.61it/s] 27%|██▋ | 15596/57600 [2:43:56<7:09:18, 1.63it/s] 27%|██▋ | 15597/57600 [2:43:57<6:59:58, 1.67it/s] 27%|██▋ | 15598/57600 [2:43:57<7:02:04, 1.66it/s] 27%|██▋ | 15599/57600 [2:43:58<7:22:18, 1.58it/s] 27%|██▋ | 15600/57600 [2:43:59<7:24:12, 1.58it/s] {'loss': 1.2708, 'learning_rate': 7.585365853658536e-07, 'epoch': 172.19} + 27%|██▋ | 15600/57600 [2:43:59<7:24:12, 1.58it/s] 27%|██▋ | 15601/57600 [2:43:59<7:17:27, 1.60it/s] 27%|██▋ | 15602/57600 [2:44:00<7:08:58, 1.63it/s] 27%|██▋ | 15603/57600 [2:44:01<7:11:41, 1.62it/s] 27%|██▋ | 15604/57600 [2:44:01<7:12:35, 1.62it/s] 27%|██▋ | 15605/57600 [2:44:02<7:14:13, 1.61it/s] 27%|██▋ | 15606/57600 [2:44:02<7:01:10, 1.66it/s] 27%|██▋ | 15607/57600 [2:44:03<7:13:49, 1.61it/s] 27%|██▋ | 15608/57600 [2:44:04<7:30:56, 1.55it/s] 27%|██▋ | 15609/57600 [2:44:04<7:20:21, 1.59it/s] 27%|██▋ | 15610/57600 [2:44:05<7:04:11, 1.65it/s] 27%|██▋ | 15611/57600 [2:44:05<7:09:52, 1.63it/s] 27%|██▋ | 15612/57600 [2:44:06<7:14:09, 1.61it/s] 27%|██▋ | 15613/57600 [2:44:07<7:31:16, 1.55it/s] 27%|██▋ | 15614/57600 [2:44:07<7:14:41, 1.61it/s] 27%|██▋ | 15615/57600 [2:44:08<7:18:35, 1.60it/s] 27%|██▋ | 15616/57600 [2:44:09<7:15:37, 1.61it/s] 27%|██▋ | 15617/57600 [2:44:09<7:05:58, 1.64it/s] 27%|██▋ | 15618/57600 [2:44:10<7:09:02, 1.63it/s] 27%|██▋ | 15619/57600 [2:44:10<7:06:37, 1.64it/s] 27%|██▋ | 15620/57600 [2:44:11<7:05:54, 1.64it/s] {'loss': 1.2853, 'learning_rate': 7.582229965156794e-07, 'epoch': 172.41} + 27%|██▋ | 15620/57600 [2:44:11<7:05:54, 1.64it/s] 27%|██▋ | 15621/57600 [2:44:12<7:01:10, 1.66it/s] 27%|██▋ | 15622/57600 [2:44:12<7:28:14, 1.56it/s] 27%|██▋ | 15623/57600 [2:44:13<7:28:50, 1.56it/s] 27%|██▋ | 15624/57600 [2:44:14<7:33:32, 1.54it/s] 27%|██▋ | 15625/57600 [2:44:14<7:30:36, 1.55it/s] 27%|██▋ | 15626/57600 [2:44:15<7:14:24, 1.61it/s] 27%|██▋ | 15627/57600 [2:44:15<6:59:18, 1.67it/s] 27%|██▋ | 15628/57600 [2:44:16<6:56:37, 1.68it/s] 27%|██▋ | 15629/57600 [2:44:17<6:56:52, 1.68it/s] 27%|██▋ | 15630/57600 [2:44:17<7:01:11, 1.66it/s] 27%|██▋ | 15631/57600 [2:44:18<6:50:19, 1.70it/s] 27%|██▋ | 15632/57600 [2:44:18<6:49:43, 1.71it/s] 27%|██▋ | 15633/57600 [2:44:19<6:47:52, 1.71it/s] 27%|██▋ | 15634/57600 [2:44:19<6:39:16, 1.75it/s] 27%|██▋ | 15635/57600 [2:44:20<6:39:22, 1.75it/s] 27%|██▋ | 15636/57600 [2:44:21<6:45:26, 1.73it/s] 27%|██▋ | 15637/57600 [2:44:21<6:53:49, 1.69it/s] 27%|██▋ | 15638/57600 [2:44:22<7:00:39, 1.66it/s] 27%|██▋ | 15639/57600 [2:44:23<7:01:40, 1.66it/s] 27%|██▋ | 15640/57600 [2:44:23<7:14:35, 1.61it/s] {'loss': 1.2959, 'learning_rate': 7.579094076655052e-07, 'epoch': 172.63} + 27%|██▋ | 15640/57600 [2:44:23<7:14:35, 1.61it/s] 27%|██▋ | 15641/57600 [2:44:24<7:29:41, 1.56it/s] 27%|██▋ | 15642/57600 [2:44:24<7:18:55, 1.59it/s] 27%|██▋ | 15643/57600 [2:44:25<7:18:06, 1.60it/s] 27%|██▋ | 15644/57600 [2:44:26<7:24:01, 1.57it/s] 27%|██▋ | 15645/57600 [2:44:26<7:24:24, 1.57it/s] 27%|██▋ | 15646/57600 [2:44:27<7:21:21, 1.58it/s] 27%|██▋ | 15647/57600 [2:44:28<7:16:28, 1.60it/s] 27%|██▋ | 15648/57600 [2:44:28<7:13:54, 1.61it/s] 27%|██▋ | 15649/57600 [2:44:29<7:10:18, 1.62it/s] 27%|██▋ | 15650/57600 [2:44:29<6:57:38, 1.67it/s] 27%|██▋ | 15651/57600 [2:44:30<7:09:58, 1.63it/s] 27%|██▋ | 15652/57600 [2:44:31<6:55:33, 1.68it/s] 27%|██▋ | 15653/57600 [2:44:31<6:54:00, 1.69it/s] 27%|██▋ | 15654/57600 [2:44:32<6:50:09, 1.70it/s] 27%|██▋ | 15655/57600 [2:44:32<7:00:07, 1.66it/s] 27%|██▋ | 15656/57600 [2:44:33<6:57:32, 1.67it/s] 27%|██▋ | 15657/57600 [2:44:34<7:01:34, 1.66it/s] 27%|██▋ | 15658/57600 [2:44:34<7:08:51, 1.63it/s] 27%|██▋ | 15659/57600 [2:44:35<7:27:09, 1.56it/s] 27%|██▋ | 15660/57600 [2:44:36<7:34:11, 1.54it/s] {'loss': 1.2862, 'learning_rate': 7.57595818815331e-07, 'epoch': 172.85} + 27%|██▋ | 15660/57600 [2:44:36<7:34:11, 1.54it/s] 27%|██▋ | 15661/57600 [2:44:36<7:32:52, 1.54it/s] 27%|██▋ | 15662/57600 [2:44:37<7:36:04, 1.53it/s] 27%|██▋ | 15663/57600 [2:44:37<7:24:25, 1.57it/s] 27%|██▋ | 15664/57600 [2:44:38<7:15:39, 1.60it/s] 27%|██▋ | 15665/57600 [2:44:39<7:07:20, 1.64it/s] 27%|██▋ | 15666/57600 [2:44:39<7:08:02, 1.63it/s] 27%|██▋ | 15667/57600 [2:44:40<7:00:22, 1.66it/s] 27%|██▋ | 15668/57600 [2:44:41<7:11:12, 1.62it/s] 27%|██▋ | 15669/57600 [2:44:41<7:12:13, 1.62it/s] 27%|██▋ | 15670/57600 [2:44:42<7:18:00, 1.60it/s] 27%|██▋ | 15671/57600 [2:44:42<7:31:44, 1.55it/s] 27%|██▋ | 15672/57600 [2:44:43<7:24:09, 1.57it/s] 27%|██▋ | 15673/57600 [2:44:44<7:42:19, 1.51it/s] 27%|██▋ | 15674/57600 [2:44:44<7:44:48, 1.50it/s] 27%|██▋ | 15675/57600 [2:44:45<7:46:16, 1.50it/s] 27%|██▋ | 15676/57600 [2:44:46<7:25:27, 1.57it/s] 27%|██▋ | 15677/57600 [2:44:46<7:28:58, 1.56it/s] 27%|██▋ | 15678/57600 [2:44:47<7:10:32, 1.62it/s] 27%|██▋ | 15679/57600 [2:44:47<7:00:18, 1.66it/s] 27%|██▋ | 15680/57600 [2:44:48<6:53:33, 1.69it/s] {'loss': 1.287, 'learning_rate': 7.572822299651567e-07, 'epoch': 173.07} + 27%|██▋ | 15680/57600 [2:44:48<6:53:33, 1.69it/s] 27%|██▋ | 15681/57600 [2:44:49<7:16:35, 1.60it/s] 27%|██▋ | 15682/57600 [2:44:49<7:08:49, 1.63it/s] 27%|██▋ | 15683/57600 [2:44:50<7:06:02, 1.64it/s] 27%|██▋ | 15684/57600 [2:44:51<7:04:13, 1.65it/s] 27%|██▋ | 15685/57600 [2:44:51<7:04:57, 1.64it/s] 27%|██▋ | 15686/57600 [2:44:52<6:54:11, 1.69it/s] 27%|██▋ | 15687/57600 [2:44:52<6:49:41, 1.71it/s] 27%|██▋ | 15688/57600 [2:44:53<6:57:10, 1.67it/s] 27%|██▋ | 15689/57600 [2:44:54<7:09:14, 1.63it/s] 27%|██▋ | 15690/57600 [2:44:54<7:05:40, 1.64it/s] 27%|██▋ | 15691/57600 [2:44:55<6:58:54, 1.67it/s] 27%|██▋ | 15692/57600 [2:44:55<6:50:54, 1.70it/s] 27%|██▋ | 15693/57600 [2:44:56<6:53:44, 1.69it/s] 27%|██▋ | 15694/57600 [2:44:56<6:41:44, 1.74it/s] 27%|██▋ | 15695/57600 [2:44:57<6:59:34, 1.66it/s] 27%|██▋ | 15696/57600 [2:44:58<7:06:07, 1.64it/s] 27%|██▋ | 15697/57600 [2:44:58<7:12:52, 1.61it/s] 27%|██▋ | 15698/57600 [2:44:59<7:12:19, 1.62it/s] 27%|██▋ | 15699/57600 [2:45:00<7:16:22, 1.60it/s] 27%|██▋ | 15700/57600 [2:45:00<7:18:24, 1.59it/s] {'loss': 1.2946, 'learning_rate': 7.569686411149826e-07, 'epoch': 173.29} + 27%|██▋ | 15700/57600 [2:45:00<7:18:24, 1.59it/s] 27%|██▋ | 15701/57600 [2:45:01<7:19:27, 1.59it/s] 27%|██▋ | 15702/57600 [2:45:02<7:21:42, 1.58it/s] 27%|██▋ | 15703/57600 [2:45:02<7:21:24, 1.58it/s] 27%|██▋ | 15704/57600 [2:45:03<7:05:46, 1.64it/s] 27%|██▋ | 15705/57600 [2:45:03<7:02:11, 1.65it/s] 27%|██▋ | 15706/57600 [2:45:04<6:54:52, 1.68it/s] 27%|██▋ | 15707/57600 [2:45:05<6:58:50, 1.67it/s] 27%|██▋ | 15708/57600 [2:45:05<6:52:54, 1.69it/s] 27%|██▋ | 15709/57600 [2:45:06<7:12:42, 1.61it/s] 27%|██▋ | 15710/57600 [2:45:06<7:00:15, 1.66it/s] 27%|██▋ | 15711/57600 [2:45:07<7:07:43, 1.63it/s] 27%|██▋ | 15712/57600 [2:45:08<7:02:32, 1.65it/s] 27%|██▋ | 15713/57600 [2:45:08<6:58:53, 1.67it/s] 27%|██▋ | 15714/57600 [2:45:09<7:10:15, 1.62it/s] 27%|██▋ | 15715/57600 [2:45:09<7:04:35, 1.64it/s] 27%|██▋ | 15716/57600 [2:45:10<7:06:26, 1.64it/s] 27%|██▋ | 15717/57600 [2:45:11<7:23:09, 1.58it/s] 27%|██▋ | 15718/57600 [2:45:11<7:23:16, 1.57it/s] 27%|██▋ | 15719/57600 [2:45:12<7:28:04, 1.56it/s] 27%|██▋ | 15720/57600 [2:45:13<7:31:22, 1.55it/s] {'loss': 1.2862, 'learning_rate': 7.566550522648083e-07, 'epoch': 173.51} + 27%|██▋ | 15720/57600 [2:45:13<7:31:22, 1.55it/s] 27%|██▋ | 15721/57600 [2:45:13<7:41:03, 1.51it/s] 27%|██▋ | 15722/57600 [2:45:14<7:33:39, 1.54it/s] 27%|██▋ | 15723/57600 [2:45:15<7:23:26, 1.57it/s] 27%|██▋ | 15724/57600 [2:45:15<7:23:17, 1.57it/s] 27%|██▋ | 15725/57600 [2:45:16<7:09:55, 1.62it/s] 27%|██▋ | 15726/57600 [2:45:16<6:59:32, 1.66it/s] 27%|██▋ | 15727/57600 [2:45:17<6:57:40, 1.67it/s] 27%|██▋ | 15728/57600 [2:45:18<7:13:15, 1.61it/s] 27%|██▋ | 15729/57600 [2:45:18<7:40:57, 1.51it/s] 27%|██▋ | 15730/57600 [2:45:19<7:45:35, 1.50it/s] 27%|██▋ | 15731/57600 [2:45:20<7:30:05, 1.55it/s] 27%|██▋ | 15732/57600 [2:45:20<7:24:32, 1.57it/s] 27%|██▋ | 15733/57600 [2:45:21<7:11:50, 1.62it/s] 27%|██▋ | 15734/57600 [2:45:21<7:00:11, 1.66it/s] 27%|██▋ | 15735/57600 [2:45:22<7:00:16, 1.66it/s] 27%|██▋ | 15736/57600 [2:45:23<7:05:00, 1.64it/s] 27%|██▋ | 15737/57600 [2:45:23<7:15:41, 1.60it/s] 27%|██▋ | 15738/57600 [2:45:24<7:10:09, 1.62it/s] 27%|██▋ | 15739/57600 [2:45:24<7:11:51, 1.62it/s] 27%|██▋ | 15740/57600 [2:45:25<7:17:15, 1.60it/s] {'loss': 1.2715, 'learning_rate': 7.563414634146341e-07, 'epoch': 173.73} + 27%|██▋ | 15740/57600 [2:45:25<7:17:15, 1.60it/s] 27%|██▋ | 15741/57600 [2:45:26<7:17:21, 1.60it/s] 27%|██▋ | 15742/57600 [2:45:26<7:21:32, 1.58it/s] 27%|██▋ | 15743/57600 [2:45:27<7:19:39, 1.59it/s] 27%|██▋ | 15744/57600 [2:45:28<7:24:02, 1.57it/s] 27%|██▋ | 15745/57600 [2:45:28<7:40:57, 1.51it/s] 27%|██▋ | 15746/57600 [2:45:29<7:28:26, 1.56it/s] 27%|██▋ | 15747/57600 [2:45:30<7:15:19, 1.60it/s] 27%|██▋ | 15748/57600 [2:45:30<7:16:00, 1.60it/s] 27%|██▋ | 15749/57600 [2:45:31<7:12:01, 1.61it/s] 27%|██▋ | 15750/57600 [2:45:31<7:23:42, 1.57it/s] 27%|██▋ | 15751/57600 [2:45:32<7:12:51, 1.61it/s] 27%|██▋ | 15752/57600 [2:45:33<7:07:17, 1.63it/s] 27%|██▋ | 15753/57600 [2:45:33<7:15:50, 1.60it/s] 27%|██▋ | 15754/57600 [2:45:34<7:11:41, 1.62it/s] 27%|██▋ | 15755/57600 [2:45:35<7:10:52, 1.62it/s] 27%|██▋ | 15756/57600 [2:45:35<7:05:16, 1.64it/s] 27%|██▋ | 15757/57600 [2:45:36<7:02:13, 1.65it/s] 27%|██▋ | 15758/57600 [2:45:36<6:57:45, 1.67it/s] 27%|██▋ | 15759/57600 [2:45:37<6:48:05, 1.71it/s] 27%|██▋ | 15760/57600 [2:45:37<6:44:52, 1.72it/s] {'loss': 1.3114, 'learning_rate': 7.560278745644598e-07, 'epoch': 173.95} + 27%|██▋ | 15760/57600 [2:45:37<6:44:52, 1.72it/s] 27%|██▋ | 15761/57600 [2:45:38<6:37:25, 1.75it/s] 27%|██▋ | 15762/57600 [2:45:39<6:43:18, 1.73it/s] 27%|██▋ | 15763/57600 [2:45:39<6:36:16, 1.76it/s] 27%|██▋ | 15764/57600 [2:45:40<6:37:27, 1.75it/s] 27%|██▋ | 15765/57600 [2:45:40<6:57:40, 1.67it/s] 27%|██▋ | 15766/57600 [2:45:41<6:54:41, 1.68it/s] 27%|██▋ | 15767/57600 [2:45:42<7:24:17, 1.57it/s] 27%|██▋ | 15768/57600 [2:45:42<7:21:10, 1.58it/s] 27%|██▋ | 15769/57600 [2:45:43<7:05:06, 1.64it/s] 27%|██▋ | 15770/57600 [2:45:43<7:08:11, 1.63it/s] 27%|██▋ | 15771/57600 [2:45:44<7:01:57, 1.65it/s] 27%|██▋ | 15772/57600 [2:45:45<7:03:21, 1.65it/s] 27%|██▋ | 15773/57600 [2:45:45<7:03:19, 1.65it/s] 27%|██▋ | 15774/57600 [2:45:46<7:16:33, 1.60it/s] 27%|██▋ | 15775/57600 [2:45:47<7:11:03, 1.62it/s] 27%|██▋ | 15776/57600 [2:45:47<7:16:21, 1.60it/s] 27%|██▋ | 15777/57600 [2:45:48<7:05:22, 1.64it/s] 27%|██▋ | 15778/57600 [2:45:48<7:07:51, 1.63it/s] 27%|██▋ | 15779/57600 [2:45:49<7:09:11, 1.62it/s] 27%|██▋ | 15780/57600 [2:45:50<6:59:02, 1.66it/s] {'loss': 1.3279, 'learning_rate': 7.557142857142858e-07, 'epoch': 174.17} + 27%|██▋ | 15780/57600 [2:45:50<6:59:02, 1.66it/s] 27%|██▋ | 15781/57600 [2:45:50<7:14:20, 1.60it/s] 27%|██▋ | 15782/57600 [2:45:51<7:00:06, 1.66it/s] 27%|██▋ | 15783/57600 [2:45:51<7:09:55, 1.62it/s] 27%|██▋ | 15784/57600 [2:45:52<7:09:13, 1.62it/s] 27%|██▋ | 15785/57600 [2:45:53<7:02:34, 1.65it/s] 27%|██▋ | 15786/57600 [2:45:53<7:27:13, 1.56it/s] 27%|██▋ | 15787/57600 [2:45:54<7:14:37, 1.60it/s] 27%|██▋ | 15788/57600 [2:45:55<6:58:25, 1.67it/s] 27%|██▋ | 15789/57600 [2:45:55<7:02:50, 1.65it/s] 27%|██▋ | 15790/57600 [2:45:56<6:57:29, 1.67it/s] 27%|██▋ | 15791/57600 [2:45:56<7:04:06, 1.64it/s] 27%|██▋ | 15792/57600 [2:45:57<7:03:12, 1.65it/s] 27%|██▋ | 15793/57600 [2:45:58<7:04:56, 1.64it/s] 27%|██▋ | 15794/57600 [2:45:58<7:17:16, 1.59it/s] 27%|██▋ | 15795/57600 [2:45:59<7:24:43, 1.57it/s] 27%|██▋ | 15796/57600 [2:46:00<7:18:41, 1.59it/s] 27%|██▋ | 15797/57600 [2:46:00<7:18:01, 1.59it/s] 27%|██▋ | 15798/57600 [2:46:01<6:58:40, 1.66it/s] 27%|██▋ | 15799/57600 [2:46:01<6:55:54, 1.68it/s] 27%|██▋ | 15800/57600 [2:46:02<7:13:05, 1.61it/s] {'loss': 1.2848, 'learning_rate': 7.554006968641114e-07, 'epoch': 174.39} + 27%|██▋ | 15800/57600 [2:46:02<7:13:05, 1.61it/s] 27%|██▋ | 15801/57600 [2:46:03<7:07:13, 1.63it/s] 27%|██▋ | 15802/57600 [2:46:03<7:20:17, 1.58it/s] 27%|██▋ | 15803/57600 [2:46:04<7:16:57, 1.59it/s] 27%|██▋ | 15804/57600 [2:46:04<7:11:59, 1.61it/s] 27%|██▋ | 15805/57600 [2:46:05<7:09:25, 1.62it/s] 27%|██▋ | 15806/57600 [2:46:06<7:24:05, 1.57it/s] 27%|██▋ | 15807/57600 [2:46:06<7:13:51, 1.61it/s] 27%|██▋ | 15808/57600 [2:46:07<7:09:37, 1.62it/s] 27%|██▋ | 15809/57600 [2:46:08<7:07:16, 1.63it/s] 27%|██▋ | 15810/57600 [2:46:08<7:13:44, 1.61it/s] 27%|██▋ | 15811/57600 [2:46:09<6:57:59, 1.67it/s] 27%|██▋ | 15812/57600 [2:46:09<6:57:27, 1.67it/s] 27%|██▋ | 15813/57600 [2:46:10<6:55:38, 1.68it/s] 27%|██▋ | 15814/57600 [2:46:11<6:54:12, 1.68it/s] 27%|██▋ | 15815/57600 [2:46:11<6:58:17, 1.66it/s] 27%|██▋ | 15816/57600 [2:46:12<7:03:50, 1.64it/s] 27%|██▋ | 15817/57600 [2:46:12<7:10:38, 1.62it/s] 27%|██▋ | 15818/57600 [2:46:13<7:07:18, 1.63it/s] 27%|██▋ | 15819/57600 [2:46:14<6:54:03, 1.68it/s] 27%|██▋ | 15820/57600 [2:46:14<6:43:43, 1.72it/s] {'loss': 1.3029, 'learning_rate': 7.550871080139372e-07, 'epoch': 174.61} + 27%|██▋ | 15820/57600 [2:46:14<6:43:43, 1.72it/s] 27%|██▋ | 15821/57600 [2:46:15<7:01:06, 1.65it/s] 27%|██▋ | 15822/57600 [2:46:15<7:10:22, 1.62it/s] 27%|██▋ | 15823/57600 [2:46:16<7:01:15, 1.65it/s] 27%|██▋ | 15824/57600 [2:46:17<6:55:34, 1.68it/s] 27%|██▋ | 15825/57600 [2:46:17<7:05:01, 1.64it/s] 27%|██▋ | 15826/57600 [2:46:18<7:02:38, 1.65it/s] 27%|██▋ | 15827/57600 [2:46:18<7:05:53, 1.63it/s] 27%|██▋ | 15828/57600 [2:46:19<7:14:03, 1.60it/s] 27%|██▋ | 15829/57600 [2:46:20<7:07:10, 1.63it/s] 27%|██▋ | 15830/57600 [2:46:20<7:17:02, 1.59it/s] 27%|██▋ | 15831/57600 [2:46:21<7:09:13, 1.62it/s] 27%|██▋ | 15832/57600 [2:46:22<7:16:17, 1.60it/s] 27%|██▋ | 15833/57600 [2:46:22<7:07:58, 1.63it/s] 27%|██▋ | 15834/57600 [2:46:23<7:33:45, 1.53it/s] 27%|██▋ | 15835/57600 [2:46:23<7:20:57, 1.58it/s] 27%|██▋ | 15836/57600 [2:46:24<7:16:53, 1.59it/s] 27%|██▋ | 15837/57600 [2:46:25<7:24:32, 1.57it/s] 27%|██▋ | 15838/57600 [2:46:25<7:30:19, 1.55it/s] 27%|██▋ | 15839/57600 [2:46:26<7:21:26, 1.58it/s] 28%|██▊ | 15840/57600 [2:46:27<7:33:21, 1.54it/s] {'loss': 1.2794, 'learning_rate': 7.547735191637631e-07, 'epoch': 174.83} + 28%|██▊ | 15840/57600 [2:46:27<7:33:21, 1.54it/s] 28%|██▊ | 15841/57600 [2:46:27<7:32:19, 1.54it/s] 28%|██▊ | 15842/57600 [2:46:28<7:10:22, 1.62it/s] 28%|██▊ | 15843/57600 [2:46:29<7:07:39, 1.63it/s] 28%|██▊ | 15844/57600 [2:46:29<7:07:02, 1.63it/s] 28%|██▊ | 15845/57600 [2:46:30<7:05:56, 1.63it/s] 28%|██▊ | 15846/57600 [2:46:30<7:05:00, 1.64it/s] 28%|██▊ | 15847/57600 [2:46:31<6:59:20, 1.66it/s] 28%|██▊ | 15848/57600 [2:46:32<7:12:35, 1.61it/s] 28%|██▊ | 15849/57600 [2:46:32<7:10:03, 1.62it/s] 28%|██▊ | 15850/57600 [2:46:33<6:59:01, 1.66it/s] 28%|██▊ | 15851/57600 [2:46:33<6:52:23, 1.69it/s] 28%|██▊ | 15852/57600 [2:46:34<6:44:44, 1.72it/s] 28%|██▊ | 15853/57600 [2:46:35<6:53:11, 1.68it/s] 28%|██▊ | 15854/57600 [2:46:35<7:11:23, 1.61it/s] 28%|██▊ | 15855/57600 [2:46:36<7:00:00, 1.66it/s] 28%|██▊ | 15856/57600 [2:46:36<7:14:14, 1.60it/s] 28%|██▊ | 15857/57600 [2:46:37<7:04:51, 1.64it/s] 28%|██▊ | 15858/57600 [2:46:38<6:57:10, 1.67it/s] 28%|██▊ | 15859/57600 [2:46:38<7:10:22, 1.62it/s] 28%|██▊ | 15860/57600 [2:46:39<7:13:52, 1.60it/s] {'loss': 1.3058, 'learning_rate': 7.544599303135888e-07, 'epoch': 175.06} + 28%|██▊ | 15860/57600 [2:46:39<7:13:52, 1.60it/s] 28%|██▊ | 15861/57600 [2:46:40<7:24:53, 1.56it/s] 28%|██▊ | 15862/57600 [2:46:40<7:17:12, 1.59it/s] 28%|██▊ | 15863/57600 [2:46:41<7:34:03, 1.53it/s] 28%|██▊ | 15864/57600 [2:46:41<7:20:57, 1.58it/s] 28%|██▊ | 15865/57600 [2:46:42<7:20:37, 1.58it/s] 28%|██▊ | 15866/57600 [2:46:43<7:20:35, 1.58it/s] 28%|██▊ | 15867/57600 [2:46:43<7:24:42, 1.56it/s] 28%|██▊ | 15868/57600 [2:46:44<7:17:52, 1.59it/s] 28%|██▊ | 15869/57600 [2:46:45<7:34:29, 1.53it/s] 28%|██▊ | 15870/57600 [2:46:45<7:17:33, 1.59it/s] 28%|██▊ | 15871/57600 [2:46:46<7:20:56, 1.58it/s] 28%|██▊ | 15872/57600 [2:46:47<7:22:03, 1.57it/s] 28%|██▊ | 15873/57600 [2:46:47<7:12:50, 1.61it/s] 28%|██▊ | 15874/57600 [2:46:48<7:21:01, 1.58it/s] 28%|██▊ | 15875/57600 [2:46:48<7:18:18, 1.59it/s] 28%|██▊ | 15876/57600 [2:46:49<7:20:22, 1.58it/s] 28%|██▊ | 15877/57600 [2:46:50<7:17:36, 1.59it/s] 28%|██▊ | 15878/57600 [2:46:50<7:08:07, 1.62it/s] 28%|██▊ | 15879/57600 [2:46:51<7:03:32, 1.64it/s] 28%|██▊ | 15880/57600 [2:46:52<7:13:46, 1.60it/s] {'loss': 1.2509, 'learning_rate': 7.541463414634146e-07, 'epoch': 175.28} + 28%|██▊ | 15880/57600 [2:46:52<7:13:46, 1.60it/s] 28%|██▊ | 15881/57600 [2:46:52<7:10:54, 1.61it/s] 28%|██▊ | 15882/57600 [2:46:53<7:05:49, 1.63it/s] 28%|██▊ | 15883/57600 [2:46:53<7:18:58, 1.58it/s] 28%|██▊ | 15884/57600 [2:46:54<7:27:41, 1.55it/s] 28%|██▊ | 15885/57600 [2:46:55<7:15:58, 1.59it/s] 28%|██▊ | 15886/57600 [2:46:55<7:26:36, 1.56it/s] 28%|██▊ | 15887/57600 [2:46:56<7:23:33, 1.57it/s] 28%|██▊ | 15888/57600 [2:46:57<7:25:57, 1.56it/s] 28%|██▊ | 15889/57600 [2:46:57<7:22:19, 1.57it/s] 28%|██▊ | 15890/57600 [2:46:58<7:11:56, 1.61it/s] 28%|██▊ | 15891/57600 [2:46:59<7:24:59, 1.56it/s] 28%|██▊ | 15892/57600 [2:46:59<7:23:08, 1.57it/s] 28%|██▊ | 15893/57600 [2:47:00<7:10:46, 1.61it/s] 28%|██▊ | 15894/57600 [2:47:00<7:14:13, 1.60it/s] 28%|██▊ | 15895/57600 [2:47:01<7:06:25, 1.63it/s] 28%|██▊ | 15896/57600 [2:47:02<7:01:58, 1.65it/s] 28%|██▊ | 15897/57600 [2:47:02<7:10:44, 1.61it/s] 28%|██▊ | 15898/57600 [2:47:03<7:10:20, 1.62it/s] 28%|██▊ | 15899/57600 [2:47:03<7:03:07, 1.64it/s] 28%|██▊ | 15900/57600 [2:47:04<7:02:30, 1.64it/s] {'loss': 1.2677, 'learning_rate': 7.538327526132404e-07, 'epoch': 175.5} + 28%|██▊ | 15900/57600 [2:47:04<7:02:30, 1.64it/s] 28%|██▊ | 15901/57600 [2:47:05<7:10:23, 1.61it/s] 28%|██▊ | 15902/57600 [2:47:05<7:01:01, 1.65it/s] 28%|██▊ | 15903/57600 [2:47:06<7:07:03, 1.63it/s] 28%|██▊ | 15904/57600 [2:47:07<7:19:20, 1.58it/s] 28%|██▊ | 15905/57600 [2:47:07<7:10:07, 1.62it/s] 28%|██▊ | 15906/57600 [2:47:08<7:17:54, 1.59it/s] 28%|██▊ | 15907/57600 [2:47:08<7:31:34, 1.54it/s] 28%|██▊ | 15908/57600 [2:47:09<7:37:53, 1.52it/s] 28%|██▊ | 15909/57600 [2:47:10<7:28:49, 1.55it/s] 28%|██▊ | 15910/57600 [2:47:10<7:14:41, 1.60it/s] 28%|██▊ | 15911/57600 [2:47:11<7:19:52, 1.58it/s] 28%|██▊ | 15912/57600 [2:47:12<7:10:05, 1.62it/s] 28%|██▊ | 15913/57600 [2:47:12<7:07:37, 1.62it/s] 28%|██▊ | 15914/57600 [2:47:13<7:07:12, 1.63it/s] 28%|██▊ | 15915/57600 [2:47:13<7:18:43, 1.58it/s] 28%|██▊ | 15916/57600 [2:47:14<7:20:28, 1.58it/s] 28%|██▊ | 15917/57600 [2:47:15<7:38:01, 1.52it/s] 28%|██▊ | 15918/57600 [2:47:15<7:35:39, 1.52it/s] 28%|██▊ | 15919/57600 [2:47:16<7:32:53, 1.53it/s] 28%|██▊ | 15920/57600 [2:47:17<7:31:16, 1.54it/s] {'loss': 1.2854, 'learning_rate': 7.535191637630663e-07, 'epoch': 175.72} + 28%|██▊ | 15920/57600 [2:47:17<7:31:16, 1.54it/s] 28%|██▊ | 15921/57600 [2:47:17<7:18:59, 1.58it/s] 28%|██▊ | 15922/57600 [2:47:18<7:11:30, 1.61it/s] 28%|██▊ | 15923/57600 [2:47:19<7:11:44, 1.61it/s] 28%|██▊ | 15924/57600 [2:47:19<7:09:11, 1.62it/s] 28%|██▊ | 15925/57600 [2:47:20<7:17:28, 1.59it/s] 28%|██▊ | 15926/57600 [2:47:21<7:48:38, 1.48it/s] 28%|██▊ | 15927/57600 [2:47:21<7:47:34, 1.49it/s] 28%|██▊ | 15928/57600 [2:47:22<7:35:34, 1.52it/s] 28%|██▊ | 15929/57600 [2:47:23<7:33:03, 1.53it/s] 28%|██▊ | 15930/57600 [2:47:23<7:21:23, 1.57it/s] 28%|██▊ | 15931/57600 [2:47:24<7:15:30, 1.59it/s] 28%|██▊ | 15932/57600 [2:47:24<7:26:05, 1.56it/s] 28%|██▊ | 15933/57600 [2:47:25<7:33:04, 1.53it/s] 28%|██▊ | 15934/57600 [2:47:26<7:33:16, 1.53it/s] 28%|██▊ | 15935/57600 [2:47:26<7:30:58, 1.54it/s] 28%|██▊ | 15936/57600 [2:47:27<7:28:33, 1.55it/s] 28%|██▊ | 15937/57600 [2:47:28<7:36:44, 1.52it/s] 28%|██▊ | 15938/57600 [2:47:28<7:27:18, 1.55it/s] 28%|██▊ | 15939/57600 [2:47:29<7:21:49, 1.57it/s] 28%|██▊ | 15940/57600 [2:47:30<7:19:12, 1.58it/s] {'loss': 1.2456, 'learning_rate': 7.532055749128919e-07, 'epoch': 175.94} + 28%|██▊ | 15940/57600 [2:47:30<7:19:12, 1.58it/s] 28%|██▊ | 15941/57600 [2:47:30<7:28:09, 1.55it/s] 28%|██▊ | 15942/57600 [2:47:31<7:17:28, 1.59it/s] 28%|██▊ | 15943/57600 [2:47:31<7:14:35, 1.60it/s] 28%|██▊ | 15944/57600 [2:47:32<7:06:18, 1.63it/s] 28%|██▊ | 15945/57600 [2:47:33<7:09:30, 1.62it/s] 28%|██▊ | 15946/57600 [2:47:33<7:16:30, 1.59it/s] 28%|██▊ | 15947/57600 [2:47:34<7:08:35, 1.62it/s] 28%|██▊ | 15948/57600 [2:47:35<7:20:15, 1.58it/s] 28%|██▊ | 15949/57600 [2:47:35<7:17:22, 1.59it/s] 28%|██▊ | 15950/57600 [2:47:36<7:26:29, 1.55it/s] 28%|██▊ | 15951/57600 [2:47:37<7:27:59, 1.55it/s] 28%|██▊ | 15952/57600 [2:47:37<7:19:28, 1.58it/s] 28%|██▊ | 15953/57600 [2:47:38<7:26:14, 1.56it/s] 28%|██▊ | 15954/57600 [2:47:38<7:35:28, 1.52it/s] 28%|██▊ | 15955/57600 [2:47:39<7:35:49, 1.52it/s] 28%|██▊ | 15956/57600 [2:47:40<7:35:03, 1.53it/s] 28%|██▊ | 15957/57600 [2:47:40<7:33:45, 1.53it/s] 28%|██▊ | 15958/57600 [2:47:41<7:24:04, 1.56it/s] 28%|██▊ | 15959/57600 [2:47:42<7:25:49, 1.56it/s] 28%|██▊ | 15960/57600 [2:47:42<7:20:03, 1.58it/s] {'loss': 1.2687, 'learning_rate': 7.528919860627177e-07, 'epoch': 176.16} + 28%|██▊ | 15960/57600 [2:47:42<7:20:03, 1.58it/s] 28%|██▊ | 15961/57600 [2:47:43<7:40:56, 1.51it/s] 28%|██▊ | 15962/57600 [2:47:44<7:42:15, 1.50it/s] 28%|██▊ | 15963/57600 [2:47:44<7:40:29, 1.51it/s] 28%|██▊ | 15964/57600 [2:47:45<7:44:47, 1.49it/s] 28%|██▊ | 15965/57600 [2:47:46<7:41:57, 1.50it/s] 28%|██▊ | 15966/57600 [2:47:46<7:45:00, 1.49it/s] 28%|██▊ | 15967/57600 [2:47:47<7:36:09, 1.52it/s] 28%|██▊ | 15968/57600 [2:47:48<7:27:20, 1.55it/s] 28%|██▊ | 15969/57600 [2:47:48<7:47:55, 1.48it/s] 28%|██▊ | 15970/57600 [2:47:49<7:43:32, 1.50it/s] 28%|██▊ | 15971/57600 [2:47:50<7:35:26, 1.52it/s] 28%|██▊ | 15972/57600 [2:47:50<7:45:42, 1.49it/s] 28%|██▊ | 15973/57600 [2:47:51<7:40:33, 1.51it/s] 28%|██▊ | 15974/57600 [2:47:52<7:18:41, 1.58it/s] 28%|██▊ | 15975/57600 [2:47:52<7:10:17, 1.61it/s] 28%|██▊ | 15976/57600 [2:47:53<7:10:15, 1.61it/s] 28%|██▊ | 15977/57600 [2:47:53<7:08:47, 1.62it/s] 28%|██▊ | 15978/57600 [2:47:54<7:16:37, 1.59it/s] 28%|██▊ | 15979/57600 [2:47:55<7:31:10, 1.54it/s] 28%|██▊ | 15980/57600 [2:47:55<7:39:49, 1.51it/s] {'loss': 1.2569, 'learning_rate': 7.525783972125435e-07, 'epoch': 176.38} + 28%|██▊ | 15980/57600 [2:47:55<7:39:49, 1.51it/s] 28%|██▊ | 15981/57600 [2:47:56<7:29:15, 1.54it/s] 28%|██▊ | 15982/57600 [2:47:57<7:17:50, 1.58it/s] 28%|██▊ | 15983/57600 [2:47:57<7:10:13, 1.61it/s] 28%|██▊ | 15984/57600 [2:47:58<7:08:12, 1.62it/s] 28%|██▊ | 15985/57600 [2:47:59<7:11:55, 1.61it/s] 28%|██▊ | 15986/57600 [2:47:59<7:08:55, 1.62it/s] 28%|██▊ | 15987/57600 [2:48:00<7:05:57, 1.63it/s] 28%|██▊ | 15988/57600 [2:48:00<7:07:43, 1.62it/s] 28%|██▊ | 15989/57600 [2:48:01<7:06:32, 1.63it/s] 28%|██▊ | 15990/57600 [2:48:02<7:10:04, 1.61it/s] 28%|██▊ | 15991/57600 [2:48:02<7:03:58, 1.64it/s] 28%|██▊ | 15992/57600 [2:48:03<7:07:26, 1.62it/s] 28%|██▊ | 15993/57600 [2:48:03<7:08:01, 1.62it/s] 28%|██▊ | 15994/57600 [2:48:04<7:33:28, 1.53it/s] 28%|██▊ | 15995/57600 [2:48:05<7:29:51, 1.54it/s] 28%|██▊ | 15996/57600 [2:48:05<7:37:15, 1.52it/s] 28%|██▊ | 15997/57600 [2:48:06<7:27:31, 1.55it/s] 28%|██▊ | 15998/57600 [2:48:07<7:22:14, 1.57it/s] 28%|██▊ | 15999/57600 [2:48:07<7:24:48, 1.56it/s] 28%|██▊ | 16000/57600 [2:48:08<7:09:31, 1.61it/s] {'loss': 1.2962, 'learning_rate': 7.522648083623694e-07, 'epoch': 176.6} + 28%|██▊ | 16000/57600 [2:48:08<7:09:31, 1.61it/s] 28%|██▊ | 16001/57600 [2:48:09<7:31:29, 1.54it/s] 28%|██▊ | 16002/57600 [2:48:09<7:26:20, 1.55it/s] 28%|██▊ | 16003/57600 [2:48:10<7:18:58, 1.58it/s] 28%|██▊ | 16004/57600 [2:48:11<7:36:43, 1.52it/s] 28%|██▊ | 16005/57600 [2:48:11<7:18:52, 1.58it/s] 28%|██▊ | 16006/57600 [2:48:12<7:25:45, 1.56it/s] 28%|██▊ | 16007/57600 [2:48:12<7:14:46, 1.59it/s] 28%|██▊ | 16008/57600 [2:48:13<7:16:05, 1.59it/s] 28%|██▊ | 16009/57600 [2:48:14<7:11:52, 1.61it/s] 28%|██▊ | 16010/57600 [2:48:14<7:11:35, 1.61it/s] 28%|██▊ | 16011/57600 [2:48:15<7:26:09, 1.55it/s] 28%|██▊ | 16012/57600 [2:48:16<7:22:10, 1.57it/s] 28%|██▊ | 16013/57600 [2:48:16<7:14:33, 1.59it/s] 28%|██▊ | 16014/57600 [2:48:17<7:20:20, 1.57it/s] 28%|██▊ | 16015/57600 [2:48:18<7:17:31, 1.58it/s] 28%|██▊ | 16016/57600 [2:48:18<7:29:34, 1.54it/s] 28%|██▊ | 16017/57600 [2:48:19<7:12:03, 1.60it/s] 28%|██▊ | 16018/57600 [2:48:19<7:13:53, 1.60it/s] 28%|██▊ | 16019/57600 [2:48:20<7:12:31, 1.60it/s] 28%|██▊ | 16020/57600 [2:48:21<7:10:34, 1.61it/s] {'loss': 1.2896, 'learning_rate': 7.519512195121951e-07, 'epoch': 176.82} + 28%|██▊ | 16020/57600 [2:48:21<7:10:34, 1.61it/s] 28%|██▊ | 16021/57600 [2:48:21<7:04:40, 1.63it/s] 28%|██▊ | 16022/57600 [2:48:22<7:12:40, 1.60it/s] 28%|██▊ | 16023/57600 [2:48:22<7:07:00, 1.62it/s] 28%|██▊ | 16024/57600 [2:48:23<7:07:00, 1.62it/s] 28%|██▊ | 16025/57600 [2:48:24<7:13:20, 1.60it/s] 28%|██▊ | 16026/57600 [2:48:24<7:11:00, 1.61it/s] 28%|██▊ | 16027/57600 [2:48:25<7:15:57, 1.59it/s] 28%|██▊ | 16028/57600 [2:48:26<7:09:05, 1.61it/s] 28%|██▊ | 16029/57600 [2:48:26<7:01:13, 1.64it/s] 28%|██▊ | 16030/57600 [2:48:27<7:07:54, 1.62it/s] 28%|██▊ | 16031/57600 [2:48:27<7:05:17, 1.63it/s] 28%|██▊ | 16032/57600 [2:48:28<7:11:06, 1.61it/s] 28%|██▊ | 16033/57600 [2:48:29<7:07:25, 1.62it/s] 28%|██▊ | 16034/57600 [2:48:29<7:13:28, 1.60it/s] 28%|██▊ | 16035/57600 [2:48:30<7:06:41, 1.62it/s] 28%|██▊ | 16036/57600 [2:48:31<7:08:03, 1.62it/s] 28%|██▊ | 16037/57600 [2:48:31<7:23:17, 1.56it/s] 28%|██▊ | 16038/57600 [2:48:32<7:08:19, 1.62it/s] 28%|██▊ | 16039/57600 [2:48:32<7:12:10, 1.60it/s] 28%|██▊ | 16040/57600 [2:48:33<7:18:01, 1.58it/s] {'loss': 1.2979, 'learning_rate': 7.516376306620209e-07, 'epoch': 177.04} + 28%|██▊ | 16040/57600 [2:48:33<7:18:01, 1.58it/s] 28%|██▊ | 16041/57600 [2:48:34<7:16:32, 1.59it/s] 28%|██▊ | 16042/57600 [2:48:34<7:14:17, 1.59it/s] 28%|██▊ | 16043/57600 [2:48:35<7:28:01, 1.55it/s] 28%|██▊ | 16044/57600 [2:48:36<7:26:30, 1.55it/s] 28%|██▊ | 16045/57600 [2:48:36<7:41:59, 1.50it/s] 28%|██▊ | 16046/57600 [2:48:37<7:36:50, 1.52it/s] 28%|██▊ | 16047/57600 [2:48:38<7:27:39, 1.55it/s] 28%|██▊ | 16048/57600 [2:48:38<7:23:24, 1.56it/s] 28%|██▊ | 16049/57600 [2:48:39<7:11:45, 1.60it/s] 28%|██▊ | 16050/57600 [2:48:39<7:08:56, 1.61it/s] 28%|██▊ | 16051/57600 [2:48:40<7:06:17, 1.62it/s] 28%|██▊ | 16052/57600 [2:48:41<7:14:44, 1.59it/s] 28%|██▊ | 16053/57600 [2:48:41<7:23:03, 1.56it/s] 28%|██▊ | 16054/57600 [2:48:42<7:26:06, 1.55it/s] 28%|██▊ | 16055/57600 [2:48:43<7:21:48, 1.57it/s] 28%|██▊ | 16056/57600 [2:48:43<7:18:11, 1.58it/s] 28%|██▊ | 16057/57600 [2:48:44<7:09:29, 1.61it/s] 28%|██▊ | 16058/57600 [2:48:44<7:05:33, 1.63it/s] 28%|██▊ | 16059/57600 [2:48:45<7:02:38, 1.64it/s] 28%|██▊ | 16060/57600 [2:48:46<6:49:12, 1.69it/s] {'loss': 1.2901, 'learning_rate': 7.513240418118468e-07, 'epoch': 177.26} + 28%|██▊ | 16060/57600 [2:48:46<6:49:12, 1.69it/s] 28%|██▊ | 16061/57600 [2:48:46<7:03:08, 1.64it/s] 28%|██▊ | 16062/57600 [2:48:47<7:27:26, 1.55it/s] 28%|██▊ | 16063/57600 [2:48:48<7:23:58, 1.56it/s] 28%|██▊ | 16064/57600 [2:48:48<7:33:30, 1.53it/s] 28%|██▊ | 16065/57600 [2:48:49<7:26:48, 1.55it/s] 28%|██▊ | 16066/57600 [2:48:50<7:17:08, 1.58it/s] 28%|██▊ | 16067/57600 [2:48:50<7:24:27, 1.56it/s] 28%|██▊ | 16068/57600 [2:48:51<7:21:44, 1.57it/s] 28%|██▊ | 16069/57600 [2:48:52<7:48:58, 1.48it/s] 28%|██▊ | 16070/57600 [2:48:52<7:31:24, 1.53it/s] 28%|██▊ | 16071/57600 [2:48:53<7:20:49, 1.57it/s] 28%|██▊ | 16072/57600 [2:48:53<7:21:34, 1.57it/s] 28%|██▊ | 16073/57600 [2:48:54<7:16:08, 1.59it/s] 28%|██▊ | 16074/57600 [2:48:55<7:16:02, 1.59it/s] 28%|██▊ | 16075/57600 [2:48:55<7:11:46, 1.60it/s] 28%|██▊ | 16076/57600 [2:48:56<7:24:56, 1.56it/s] 28%|██▊ | 16077/57600 [2:48:57<7:23:32, 1.56it/s] 28%|██▊ | 16078/57600 [2:48:57<7:32:42, 1.53it/s] 28%|██▊ | 16079/57600 [2:48:58<7:21:40, 1.57it/s] 28%|██▊ | 16080/57600 [2:48:59<7:29:05, 1.54it/s] {'loss': 1.2471, 'learning_rate': 7.510104529616724e-07, 'epoch': 177.48} + 28%|██▊ | 16080/57600 [2:48:59<7:29:05, 1.54it/s] 28%|██▊ | 16081/57600 [2:48:59<7:32:20, 1.53it/s] 28%|██▊ | 16082/57600 [2:49:00<7:27:31, 1.55it/s] 28%|██▊ | 16083/57600 [2:49:01<7:30:55, 1.53it/s] 28%|██▊ | 16084/57600 [2:49:01<7:40:06, 1.50it/s] 28%|██▊ | 16085/57600 [2:49:02<7:30:56, 1.53it/s] 28%|██▊ | 16086/57600 [2:49:02<7:28:38, 1.54it/s] 28%|██▊ | 16087/57600 [2:49:03<7:13:26, 1.60it/s] 28%|██▊ | 16088/57600 [2:49:04<7:05:05, 1.63it/s] 28%|██▊ | 16089/57600 [2:49:04<7:09:32, 1.61it/s] 28%|██▊ | 16090/57600 [2:49:05<7:14:14, 1.59it/s] 28%|██▊ | 16091/57600 [2:49:06<7:17:57, 1.58it/s] 28%|██▊ | 16092/57600 [2:49:06<7:24:11, 1.56it/s] 28%|██▊ | 16093/57600 [2:49:07<7:43:30, 1.49it/s] 28%|██▊ | 16094/57600 [2:49:08<7:23:01, 1.56it/s] 28%|██▊ | 16095/57600 [2:49:08<7:10:43, 1.61it/s] 28%|██▊ | 16096/57600 [2:49:09<7:16:07, 1.59it/s] 28%|██▊ | 16097/57600 [2:49:09<7:22:53, 1.56it/s] 28%|██▊ | 16098/57600 [2:49:10<7:29:17, 1.54it/s] 28%|██▊ | 16099/57600 [2:49:11<7:24:39, 1.56it/s] 28%|██▊ | 16100/57600 [2:49:11<7:34:26, 1.52it/s] {'loss': 1.2514, 'learning_rate': 7.506968641114981e-07, 'epoch': 177.7} + 28%|██▊ | 16100/57600 [2:49:11<7:34:26, 1.52it/s] 28%|██▊ | 16101/57600 [2:49:12<7:22:56, 1.56it/s] 28%|██▊ | 16102/57600 [2:49:13<7:08:33, 1.61it/s] 28%|██▊ | 16103/57600 [2:49:13<7:16:53, 1.58it/s] 28%|██▊ | 16104/57600 [2:49:14<7:19:47, 1.57it/s] 28%|██▊ | 16105/57600 [2:49:14<7:00:40, 1.64it/s] 28%|██▊ | 16106/57600 [2:49:15<7:06:00, 1.62it/s] 28%|██▊ | 16107/57600 [2:49:16<6:58:02, 1.65it/s] 28%|██▊ | 16108/57600 [2:49:16<6:59:25, 1.65it/s] 28%|██▊ | 16109/57600 [2:49:17<7:03:43, 1.63it/s] 28%|██▊ | 16110/57600 [2:49:18<7:07:21, 1.62it/s] 28%|██▊ | 16111/57600 [2:49:18<7:15:51, 1.59it/s] 28%|██▊ | 16112/57600 [2:49:19<7:08:18, 1.61it/s] 28%|██▊ | 16113/57600 [2:49:19<7:12:00, 1.60it/s] 28%|██▊ | 16114/57600 [2:49:20<7:24:28, 1.56it/s] 28%|██▊ | 16115/57600 [2:49:21<7:25:52, 1.55it/s] 28%|██▊ | 16116/57600 [2:49:21<7:11:13, 1.60it/s] 28%|██▊ | 16117/57600 [2:49:22<7:10:18, 1.61it/s] 28%|██▊ | 16118/57600 [2:49:23<7:01:40, 1.64it/s] 28%|██▊ | 16119/57600 [2:49:23<6:53:19, 1.67it/s] 28%|██▊ | 16120/57600 [2:49:24<7:10:43, 1.61it/s] {'loss': 1.2979, 'learning_rate': 7.50383275261324e-07, 'epoch': 177.92} + 28%|██▊ | 16120/57600 [2:49:24<7:10:43, 1.61it/s] 28%|██▊ | 16121/57600 [2:49:24<6:58:36, 1.65it/s] 28%|██▊ | 16122/57600 [2:49:25<6:59:05, 1.65it/s] 28%|██▊ | 16123/57600 [2:49:26<7:05:20, 1.63it/s] 28%|██▊ | 16124/57600 [2:49:26<7:01:05, 1.64it/s] 28%|██▊ | 16125/57600 [2:49:27<7:07:59, 1.62it/s] 28%|██▊ | 16126/57600 [2:49:27<7:06:23, 1.62it/s] 28%|██▊ | 16127/57600 [2:49:28<7:40:51, 1.50it/s] 28%|██▊ | 16128/57600 [2:49:29<7:42:43, 1.49it/s] 28%|██▊ | 16129/57600 [2:49:30<7:35:45, 1.52it/s] 28%|██▊ | 16130/57600 [2:49:30<7:25:09, 1.55it/s] 28%|██▊ | 16131/57600 [2:49:31<7:11:06, 1.60it/s] 28%|██▊ | 16132/57600 [2:49:31<7:04:49, 1.63it/s] 28%|██▊ | 16133/57600 [2:49:32<7:08:54, 1.61it/s] 28%|██▊ | 16134/57600 [2:49:33<7:01:53, 1.64it/s] 28%|██▊ | 16135/57600 [2:49:33<6:52:42, 1.67it/s] 28%|██▊ | 16136/57600 [2:49:34<7:01:10, 1.64it/s] 28%|█��▊ | 16137/57600 [2:49:34<7:00:18, 1.64it/s] 28%|██▊ | 16138/57600 [2:49:35<7:16:41, 1.58it/s] 28%|██▊ | 16139/57600 [2:49:36<7:06:48, 1.62it/s] 28%|██▊ | 16140/57600 [2:49:36<6:59:00, 1.65it/s] {'loss': 1.275, 'learning_rate': 7.500696864111499e-07, 'epoch': 178.15} + 28%|██▊ | 16140/57600 [2:49:36<6:59:00, 1.65it/s] 28%|██▊ | 16141/57600 [2:49:37<6:55:32, 1.66it/s] 28%|██▊ | 16142/57600 [2:49:37<6:57:43, 1.65it/s] 28%|██▊ | 16143/57600 [2:49:38<7:05:19, 1.62it/s] 28%|██▊ | 16144/57600 [2:49:39<7:06:42, 1.62it/s] 28%|██▊ | 16145/57600 [2:49:39<7:16:40, 1.58it/s] 28%|██▊ | 16146/57600 [2:49:40<7:22:12, 1.56it/s] 28%|██▊ | 16147/57600 [2:49:41<7:12:09, 1.60it/s] 28%|██▊ | 16148/57600 [2:49:41<7:17:33, 1.58it/s] 28%|██▊ | 16149/57600 [2:49:42<7:22:37, 1.56it/s] 28%|██▊ | 16150/57600 [2:49:43<7:29:08, 1.54it/s] 28%|██▊ | 16151/57600 [2:49:43<7:22:05, 1.56it/s] 28%|██▊ | 16152/57600 [2:49:44<7:07:48, 1.61it/s] 28%|██▊ | 16153/57600 [2:49:44<7:15:39, 1.59it/s] 28%|██▊ | 16154/57600 [2:49:45<7:28:03, 1.54it/s] 28%|██▊ | 16155/57600 [2:49:46<7:22:33, 1.56it/s] 28%|██▊ | 16156/57600 [2:49:46<7:24:22, 1.55it/s] 28%|██▊ | 16157/57600 [2:49:47<7:20:14, 1.57it/s] 28%|██▊ | 16158/57600 [2:49:48<7:24:57, 1.55it/s] 28%|██▊ | 16159/57600 [2:49:48<7:17:30, 1.58it/s] 28%|██▊ | 16160/57600 [2:49:49<7:18:03, 1.58it/s] {'loss': 1.2554, 'learning_rate': 7.497560975609755e-07, 'epoch': 178.37} + 28%|██▊ | 16160/57600 [2:49:49<7:18:03, 1.58it/s] 28%|██▊ | 16161/57600 [2:49:50<7:14:50, 1.59it/s] 28%|██▊ | 16162/57600 [2:49:50<7:06:11, 1.62it/s] 28%|██▊ | 16163/57600 [2:49:51<7:10:48, 1.60it/s] 28%|██▊ | 16164/57600 [2:49:51<7:22:42, 1.56it/s] 28%|██▊ | 16165/57600 [2:49:52<7:22:49, 1.56it/s] 28%|██▊ | 16166/57600 [2:49:53<7:20:36, 1.57it/s] 28%|██▊ | 16167/57600 [2:49:53<7:11:18, 1.60it/s] 28%|██▊ | 16168/57600 [2:49:54<7:22:10, 1.56it/s] 28%|██▊ | 16169/57600 [2:49:55<7:16:27, 1.58it/s] 28%|██▊ | 16170/57600 [2:49:55<7:16:46, 1.58it/s] 28%|██▊ | 16171/57600 [2:49:56<7:10:21, 1.60it/s] 28%|██▊ | 16172/57600 [2:49:56<7:11:53, 1.60it/s] 28%|██▊ | 16173/57600 [2:49:57<7:10:11, 1.60it/s] 28%|██▊ | 16174/57600 [2:49:58<7:05:50, 1.62it/s] 28%|██▊ | 16175/57600 [2:49:58<7:08:49, 1.61it/s] 28%|██▊ | 16176/57600 [2:49:59<7:17:54, 1.58it/s] 28%|██▊ | 16177/57600 [2:50:00<7:09:00, 1.61it/s] 28%|██▊ | 16178/57600 [2:50:00<7:06:51, 1.62it/s] 28%|██▊ | 16179/57600 [2:50:01<7:02:23, 1.63it/s] 28%|██▊ | 16180/57600 [2:50:01<7:05:08, 1.62it/s] {'loss': 1.2885, 'learning_rate': 7.494425087108014e-07, 'epoch': 178.59} + 28%|██▊ | 16180/57600 [2:50:01<7:05:08, 1.62it/s] 28%|██▊ | 16181/57600 [2:50:02<7:11:09, 1.60it/s] 28%|██▊ | 16182/57600 [2:50:03<7:15:59, 1.58it/s] 28%|██▊ | 16183/57600 [2:50:03<7:28:03, 1.54it/s] 28%|██▊ | 16184/57600 [2:50:04<7:19:31, 1.57it/s] 28%|██▊ | 16185/57600 [2:50:05<7:23:25, 1.56it/s] 28%|██▊ | 16186/57600 [2:50:05<7:24:06, 1.55it/s] 28%|██▊ | 16187/57600 [2:50:06<7:18:56, 1.57it/s] 28%|██▊ | 16188/57600 [2:50:07<7:16:27, 1.58it/s] 28%|██▊ | 16189/57600 [2:50:07<7:28:01, 1.54it/s] 28%|██▊ | 16190/57600 [2:50:08<7:22:50, 1.56it/s] 28%|██▊ | 16191/57600 [2:50:08<7:23:33, 1.56it/s] 28%|██▊ | 16192/57600 [2:50:09<7:17:50, 1.58it/s] 28%|██▊ | 16193/57600 [2:50:10<7:33:33, 1.52it/s] 28%|██▊ | 16194/57600 [2:50:10<7:30:37, 1.53it/s] 28%|██▊ | 16195/57600 [2:50:11<7:32:54, 1.52it/s] 28%|██▊ | 16196/57600 [2:50:12<7:19:45, 1.57it/s] 28%|██▊ | 16197/57600 [2:50:12<7:16:30, 1.58it/s] 28%|██▊ | 16198/57600 [2:50:13<7:15:59, 1.58it/s] 28%|██▊ | 16199/57600 [2:50:14<7:09:27, 1.61it/s] 28%|██▊ | 16200/57600 [2:50:14<7:02:32, 1.63it/s] {'loss': 1.2416, 'learning_rate': 7.491289198606272e-07, 'epoch': 178.81} + 28%|██▊ | 16200/57600 [2:50:14<7:02:32, 1.63it/s] 28%|██▊ | 16201/57600 [2:50:15<7:24:06, 1.55it/s] 28%|██▊ | 16202/57600 [2:50:16<7:25:57, 1.55it/s] 28%|██▊ | 16203/57600 [2:50:16<7:20:28, 1.57it/s] 28%|██▊ | 16204/57600 [2:50:17<7:41:43, 1.49it/s] 28%|██▊ | 16205/57600 [2:50:17<7:29:37, 1.53it/s] 28%|██▊ | 16206/57600 [2:50:18<7:29:44, 1.53it/s] 28%|██▊ | 16207/57600 [2:50:19<7:21:38, 1.56it/s] 28%|██▊ | 16208/57600 [2:50:19<7:19:08, 1.57it/s] 28%|██▊ | 16209/57600 [2:50:20<7:17:19, 1.58it/s] 28%|██▊ | 16210/57600 [2:50:21<7:19:44, 1.57it/s] 28%|██▊ | 16211/57600 [2:50:21<7:15:05, 1.59it/s] 28%|██▊ | 16212/57600 [2:50:22<7:13:59, 1.59it/s] 28%|██▊ | 16213/57600 [2:50:23<7:15:36, 1.58it/s] 28%|██▊ | 16214/57600 [2:50:23<7:06:31, 1.62it/s] 28%|██▊ | 16215/57600 [2:50:24<7:20:11, 1.57it/s] 28%|██▊ | 16216/57600 [2:50:24<7:28:09, 1.54it/s] 28%|██▊ | 16217/57600 [2:50:25<7:14:19, 1.59it/s] 28%|██▊ | 16218/57600 [2:50:26<7:32:34, 1.52it/s] 28%|██▊ | 16219/57600 [2:50:26<7:18:35, 1.57it/s] 28%|██▊ | 16220/57600 [2:50:27<7:11:27, 1.60it/s] {'loss': 1.2668, 'learning_rate': 7.48815331010453e-07, 'epoch': 179.03} + 28%|██▊ | 16220/57600 [2:50:27<7:11:27, 1.60it/s] 28%|██▊ | 16221/57600 [2:50:28<7:09:16, 1.61it/s] 28%|██▊ | 16222/57600 [2:50:28<7:11:13, 1.60it/s] 28%|██▊ | 16223/57600 [2:50:29<7:00:39, 1.64it/s] 28%|██▊ | 16224/57600 [2:50:29<7:00:01, 1.64it/s] 28%|██▊ | 16225/57600 [2:50:30<7:09:42, 1.60it/s] 28%|██▊ | 16226/57600 [2:50:31<7:08:46, 1.61it/s] 28%|██▊ | 16227/57600 [2:50:31<7:00:48, 1.64it/s] 28%|██▊ | 16228/57600 [2:50:32<7:00:31, 1.64it/s] 28%|██▊ | 16229/57600 [2:50:32<6:59:45, 1.64it/s] 28%|██▊ | 16230/57600 [2:50:33<7:05:07, 1.62it/s] 28%|██▊ | 16231/57600 [2:50:34<7:00:38, 1.64it/s] 28%|██▊ | 16232/57600 [2:50:34<7:10:54, 1.60it/s] 28%|██▊ | 16233/57600 [2:50:35<7:04:58, 1.62it/s] 28%|██▊ | 16234/57600 [2:50:36<6:59:18, 1.64it/s] 28%|██▊ | 16235/57600 [2:50:36<6:57:47, 1.65it/s] 28%|██▊ | 16236/57600 [2:50:37<6:44:39, 1.70it/s] 28%|██▊ | 16237/57600 [2:50:37<6:58:58, 1.65it/s] 28%|██▊ | 16238/57600 [2:50:38<7:06:03, 1.62it/s] 28%|██▊ | 16239/57600 [2:50:39<7:17:48, 1.57it/s] 28%|██▊ | 16240/57600 [2:50:39<7:23:35, 1.55it/s] {'loss': 1.2995, 'learning_rate': 7.485017421602786e-07, 'epoch': 179.25} + 28%|██▊ | 16240/57600 [2:50:39<7:23:35, 1.55it/s] 28%|██▊ | 16241/57600 [2:50:40<7:28:47, 1.54it/s] 28%|██▊ | 16242/57600 [2:50:41<7:09:54, 1.60it/s] 28%|██▊ | 16243/57600 [2:50:41<7:02:39, 1.63it/s] 28%|██▊ | 16244/57600 [2:50:42<7:12:56, 1.59it/s] 28%|██▊ | 16245/57600 [2:50:42<7:22:22, 1.56it/s] 28%|██▊ | 16246/57600 [2:50:43<7:26:32, 1.54it/s] 28%|██▊ | 16247/57600 [2:50:44<7:15:29, 1.58it/s] 28%|██▊ | 16248/57600 [2:50:44<7:31:12, 1.53it/s] 28%|██▊ | 16249/57600 [2:50:45<7:21:11, 1.56it/s] 28%|██▊ | 16250/57600 [2:50:46<7:15:44, 1.58it/s] 28%|██▊ | 16251/57600 [2:50:46<7:20:21, 1.56it/s] 28%|██▊ | 16252/57600 [2:50:47<7:38:03, 1.50it/s] 28%|██▊ | 16253/57600 [2:50:48<7:30:10, 1.53it/s] 28%|██▊ | 16254/57600 [2:50:48<7:20:12, 1.57it/s] 28%|██▊ | 16255/57600 [2:50:49<7:27:44, 1.54it/s] 28%|██▊ | 16256/57600 [2:50:50<7:18:25, 1.57it/s] 28%|██▊ | 16257/57600 [2:50:50<7:12:46, 1.59it/s] 28%|██▊ | 16258/57600 [2:50:51<6:58:35, 1.65it/s] 28%|██▊ | 16259/57600 [2:50:51<6:58:23, 1.65it/s] 28%|██▊ | 16260/57600 [2:50:52<6:49:23, 1.68it/s] {'loss': 1.2525, 'learning_rate': 7.481881533101045e-07, 'epoch': 179.47} + 28%|██▊ | 16260/57600 [2:50:52<6:49:23, 1.68it/s] 28%|██▊ | 16261/57600 [2:50:53<6:56:30, 1.65it/s] 28%|██▊ | 16262/57600 [2:50:53<7:09:26, 1.60it/s] 28%|██▊ | 16263/57600 [2:50:54<7:15:49, 1.58it/s] 28%|██▊ | 16264/57600 [2:50:54<7:15:04, 1.58it/s] 28%|██▊ | 16265/57600 [2:50:55<7:17:33, 1.57it/s] 28%|██▊ | 16266/57600 [2:50:56<7:15:08, 1.58it/s] 28%|██▊ | 16267/57600 [2:50:56<7:20:10, 1.57it/s] 28%|██▊ | 16268/57600 [2:50:57<7:18:02, 1.57it/s] 28%|██▊ | 16269/57600 [2:50:58<7:26:29, 1.54it/s] 28%|██▊ | 16270/57600 [2:50:58<7:25:40, 1.55it/s] 28%|██▊ | 16271/57600 [2:50:59<7:08:07, 1.61it/s] 28%|██▊ | 16272/57600 [2:51:00<7:19:30, 1.57it/s] 28%|██▊ | 16273/57600 [2:51:00<7:08:56, 1.61it/s] 28%|██▊ | 16274/57600 [2:51:01<7:03:00, 1.63it/s] 28%|██▊ | 16275/57600 [2:51:01<7:00:36, 1.64it/s] 28%|██▊ | 16276/57600 [2:51:02<6:59:42, 1.64it/s] 28%|██▊ | 16277/57600 [2:51:03<7:05:06, 1.62it/s] 28%|██▊ | 16278/57600 [2:51:03<7:05:41, 1.62it/s] 28%|██▊ | 16279/57600 [2:51:04<7:02:54, 1.63it/s] 28%|██▊ | 16280/57600 [2:51:04<7:10:34, 1.60it/s] {'loss': 1.2723, 'learning_rate': 7.478745644599303e-07, 'epoch': 179.69} + 28%|██▊ | 16280/57600 [2:51:04<7:10:34, 1.60it/s] 28%|██▊ | 16281/57600 [2:51:05<7:01:43, 1.63it/s] 28%|██▊ | 16282/57600 [2:51:06<7:00:07, 1.64it/s] 28%|██▊ | 16283/57600 [2:51:06<6:52:12, 1.67it/s] 28%|██▊ | 16284/57600 [2:51:07<6:50:17, 1.68it/s] 28%|██▊ | 16285/57600 [2:51:07<6:53:03, 1.67it/s] 28%|██▊ | 16286/57600 [2:51:08<7:01:30, 1.63it/s] 28%|██▊ | 16287/57600 [2:51:09<6:53:53, 1.66it/s] 28%|██▊ | 16288/57600 [2:51:09<6:52:15, 1.67it/s] 28%|██▊ | 16289/57600 [2:51:10<6:50:37, 1.68it/s] 28%|██▊ | 16290/57600 [2:51:10<6:44:05, 1.70it/s] 28%|██▊ | 16291/57600 [2:51:11<7:03:35, 1.63it/s] 28%|██▊ | 16292/57600 [2:51:12<6:58:44, 1.64it/s] 28%|██▊ | 16293/57600 [2:51:12<7:13:29, 1.59it/s] 28%|██▊ | 16294/57600 [2:51:13<7:03:02, 1.63it/s] 28%|██▊ | 16295/57600 [2:51:14<7:02:40, 1.63it/s] 28%|██▊ | 16296/57600 [2:51:14<7:09:08, 1.60it/s] 28%|██▊ | 16297/57600 [2:51:15<6:55:31, 1.66it/s] 28%|██▊ | 16298/57600 [2:51:15<6:53:36, 1.66it/s] 28%|██▊ | 16299/57600 [2:51:16<7:05:05, 1.62it/s] 28%|██▊ | 16300/57600 [2:51:17<7:02:58, 1.63it/s] {'loss': 1.3271, 'learning_rate': 7.475609756097561e-07, 'epoch': 179.91} + 28%|██▊ | 16300/57600 [2:51:17<7:02:58, 1.63it/s] 28%|██▊ | 16301/57600 [2:51:17<6:58:43, 1.64it/s] 28%|██▊ | 16302/57600 [2:51:18<7:20:25, 1.56it/s] 28%|██▊ | 16303/57600 [2:51:19<7:15:32, 1.58it/s] 28%|██▊ | 16304/57600 [2:51:19<7:12:17, 1.59it/s] 28%|██▊ | 16305/57600 [2:51:20<7:09:51, 1.60it/s] 28%|██▊ | 16306/57600 [2:51:20<7:05:52, 1.62it/s] 28%|██▊ | 16307/57600 [2:51:21<7:13:19, 1.59it/s] 28%|██▊ | 16308/57600 [2:51:22<7:07:13, 1.61it/s] 28%|██▊ | 16309/57600 [2:51:22<7:25:07, 1.55it/s] 28%|██▊ | 16310/57600 [2:51:23<7:42:45, 1.49it/s] 28%|██▊ | 16311/57600 [2:51:24<7:27:03, 1.54it/s] 28%|██▊ | 16312/57600 [2:51:24<7:30:22, 1.53it/s] 28%|██▊ | 16313/57600 [2:51:25<7:15:43, 1.58it/s] 28%|██▊ | 16314/57600 [2:51:26<7:20:58, 1.56it/s] 28%|██▊ | 16315/57600 [2:51:26<7:33:55, 1.52it/s] 28%|██▊ | 16316/57600 [2:51:27<7:21:28, 1.56it/s] 28%|██▊ | 16317/57600 [2:51:27<7:17:05, 1.57it/s] 28%|██▊ | 16318/57600 [2:51:28<7:11:51, 1.59it/s] 28%|██▊ | 16319/57600 [2:51:29<7:23:10, 1.55it/s] 28%|██▊ | 16320/57600 [2:51:29<7:16:45, 1.58it/s] {'loss': 1.2475, 'learning_rate': 7.472473867595818e-07, 'epoch': 180.13} + 28%|██▊ | 16320/57600 [2:51:29<7:16:45, 1.58it/s] 28%|██▊ | 16321/57600 [2:51:30<7:01:02, 1.63it/s] 28%|██▊ | 16322/57600 [2:51:31<7:10:02, 1.60it/s] 28%|██▊ | 16323/57600 [2:51:31<6:59:34, 1.64it/s] 28%|██▊ | 16324/57600 [2:51:32<7:04:34, 1.62it/s] 28%|██▊ | 16325/57600 [2:51:32<7:05:37, 1.62it/s] 28%|██▊ | 16326/57600 [2:51:33<7:01:26, 1.63it/s] 28%|██▊ | 16327/57600 [2:51:34<7:02:55, 1.63it/s] 28%|██▊ | 16328/57600 [2:51:34<6:55:54, 1.65it/s] 28%|██▊ | 16329/57600 [2:51:35<7:02:40, 1.63it/s] 28%|██▊ | 16330/57600 [2:51:36<7:27:25, 1.54it/s] 28%|██▊ | 16331/57600 [2:51:36<7:21:21, 1.56it/s] 28%|██▊ | 16332/57600 [2:51:37<7:15:39, 1.58it/s] 28%|██▊ | 16333/57600 [2:51:37<6:59:38, 1.64it/s] 28%|██▊ | 16334/57600 [2:51:38<6:58:55, 1.64it/s] 28%|██▊ | 16335/57600 [2:51:39<6:51:55, 1.67it/s] 28%|██▊ | 16336/57600 [2:51:39<6:50:20, 1.68it/s] 28%|██▊ | 16337/57600 [2:51:40<6:56:29, 1.65it/s] 28%|██▊ | 16338/57600 [2:51:40<6:49:33, 1.68it/s] 28%|██▊ | 16339/57600 [2:51:41<6:59:55, 1.64it/s] 28%|██▊ | 16340/57600 [2:51:42<7:01:48, 1.63it/s] {'loss': 1.2938, 'learning_rate': 7.469337979094077e-07, 'epoch': 180.35} + 28%|██▊ | 16340/57600 [2:51:42<7:01:48, 1.63it/s] 28%|██▊ | 16341/57600 [2:51:42<6:58:36, 1.64it/s] 28%|██▊ | 16342/57600 [2:51:43<6:52:39, 1.67it/s] 28%|█��▊ | 16343/57600 [2:51:43<6:54:34, 1.66it/s] 28%|██▊ | 16344/57600 [2:51:44<6:53:31, 1.66it/s] 28%|██▊ | 16345/57600 [2:51:45<6:53:41, 1.66it/s] 28%|██▊ | 16346/57600 [2:51:45<6:51:36, 1.67it/s] 28%|██▊ | 16347/57600 [2:51:46<6:58:44, 1.64it/s] 28%|██▊ | 16348/57600 [2:51:46<6:59:43, 1.64it/s] 28%|██▊ | 16349/57600 [2:51:47<7:30:43, 1.53it/s] 28%|██▊ | 16350/57600 [2:51:48<7:19:49, 1.56it/s] 28%|██▊ | 16351/57600 [2:51:48<7:22:17, 1.55it/s] 28%|██▊ | 16352/57600 [2:51:49<7:25:27, 1.54it/s] 28%|██▊ | 16353/57600 [2:51:50<7:39:36, 1.50it/s] 28%|██▊ | 16354/57600 [2:51:51<7:34:21, 1.51it/s] 28%|██▊ | 16355/57600 [2:51:51<7:24:07, 1.55it/s] 28%|██▊ | 16356/57600 [2:51:52<7:20:00, 1.56it/s] 28%|██▊ | 16357/57600 [2:51:52<7:21:04, 1.56it/s] 28%|██▊ | 16358/57600 [2:51:53<7:11:26, 1.59it/s] 28%|██▊ | 16359/57600 [2:51:54<7:17:56, 1.57it/s] 28%|██▊ | 16360/57600 [2:51:54<7:21:23, 1.56it/s] {'loss': 1.28, 'learning_rate': 7.466202090592336e-07, 'epoch': 180.57} + 28%|██▊ | 16360/57600 [2:51:54<7:21:23, 1.56it/s] 28%|██▊ | 16361/57600 [2:51:55<7:17:30, 1.57it/s] 28%|██▊ | 16362/57600 [2:51:56<7:18:08, 1.57it/s] 28%|██▊ | 16363/57600 [2:51:56<7:27:00, 1.54it/s] 28%|██▊ | 16364/57600 [2:51:57<7:30:40, 1.52it/s] 28%|██▊ | 16365/57600 [2:51:58<7:31:50, 1.52it/s] 28%|██▊ | 16366/57600 [2:51:58<7:24:51, 1.54it/s] 28%|██▊ | 16367/57600 [2:51:59<7:09:20, 1.60it/s] 28%|██▊ | 16368/57600 [2:51:59<7:10:03, 1.60it/s] 28%|██▊ | 16369/57600 [2:52:00<7:01:27, 1.63it/s] 28%|██▊ | 16370/57600 [2:52:01<7:03:58, 1.62it/s] 28%|██▊ | 16371/57600 [2:52:01<7:05:42, 1.61it/s] 28%|██▊ | 16372/57600 [2:52:02<6:56:49, 1.65it/s] 28%|██▊ | 16373/57600 [2:52:02<6:54:53, 1.66it/s] 28%|██▊ | 16374/57600 [2:52:03<6:50:48, 1.67it/s] 28%|██▊ | 16375/57600 [2:52:04<6:52:29, 1.67it/s] 28%|██▊ | 16376/57600 [2:52:04<6:45:44, 1.69it/s] 28%|██▊ | 16377/57600 [2:52:05<6:50:31, 1.67it/s] 28%|██▊ | 16378/57600 [2:52:05<6:55:48, 1.65it/s] 28%|██▊ | 16379/57600 [2:52:06<7:03:12, 1.62it/s] 28%|██▊ | 16380/57600 [2:52:07<6:59:49, 1.64it/s] {'loss': 1.2954, 'learning_rate': 7.463066202090591e-07, 'epoch': 180.79} + 28%|██▊ | 16380/57600 [2:52:07<6:59:49, 1.64it/s] 28%|██▊ | 16381/57600 [2:52:07<6:57:05, 1.65it/s] 28%|██▊ | 16382/57600 [2:52:08<7:26:05, 1.54it/s] 28%|██▊ | 16383/57600 [2:52:09<7:16:02, 1.58it/s] 28%|██▊ | 16384/57600 [2:52:09<7:04:49, 1.62it/s] 28%|██▊ | 16385/57600 [2:52:10<6:57:13, 1.65it/s] 28%|██▊ | 16386/57600 [2:52:10<6:47:14, 1.69it/s] 28%|██▊ | 16387/57600 [2:52:11<6:43:26, 1.70it/s] 28%|██▊ | 16388/57600 [2:52:11<6:45:12, 1.70it/s] 28%|██▊ | 16389/57600 [2:52:12<6:45:58, 1.69it/s] 28%|██▊ | 16390/57600 [2:52:13<6:48:51, 1.68it/s] 28%|██▊ | 16391/57600 [2:52:13<7:10:55, 1.59it/s] 28%|██▊ | 16392/57600 [2:52:14<6:59:17, 1.64it/s] 28%|██▊ | 16393/57600 [2:52:15<7:07:11, 1.61it/s] 28%|██▊ | 16394/57600 [2:52:15<7:00:25, 1.63it/s] 28%|██▊ | 16395/57600 [2:52:16<7:03:58, 1.62it/s] 28%|██▊ | 16396/57600 [2:52:16<7:13:27, 1.58it/s] 28%|██▊ | 16397/57600 [2:52:17<7:03:59, 1.62it/s] 28%|██▊ | 16398/57600 [2:52:18<7:15:12, 1.58it/s] 28%|██▊ | 16399/57600 [2:52:18<7:29:01, 1.53it/s] 28%|██▊ | 16400/57600 [2:52:19<7:31:50, 1.52it/s] {'loss': 1.2661, 'learning_rate': 7.45993031358885e-07, 'epoch': 181.02} + 28%|██▊ | 16400/57600 [2:52:19<7:31:50, 1.52it/s] 28%|██▊ | 16401/57600 [2:52:20<7:13:16, 1.58it/s] 28%|██▊ | 16402/57600 [2:52:20<7:10:46, 1.59it/s] 28%|██▊ | 16403/57600 [2:52:21<7:02:49, 1.62it/s] 28%|██▊ | 16404/57600 [2:52:21<7:03:03, 1.62it/s] 28%|██▊ | 16405/57600 [2:52:22<6:59:26, 1.64it/s] 28%|██▊ | 16406/57600 [2:52:23<6:54:19, 1.66it/s] 28%|██▊ | 16407/57600 [2:52:23<6:44:24, 1.70it/s] 28%|██▊ | 16408/57600 [2:52:24<6:37:22, 1.73it/s] 28%|██▊ | 16409/57600 [2:52:24<6:31:29, 1.75it/s] 28%|██▊ | 16410/57600 [2:52:25<6:39:30, 1.72it/s] 28%|██▊ | 16411/57600 [2:52:26<6:40:53, 1.71it/s] 28%|██▊ | 16412/57600 [2:52:26<6:46:38, 1.69it/s] 28%|██▊ | 16413/57600 [2:52:27<7:01:54, 1.63it/s] 28%|██▊ | 16414/57600 [2:52:28<7:19:09, 1.56it/s] 28%|██▊ | 16415/57600 [2:52:28<7:17:38, 1.57it/s] 28%|██▊ | 16416/57600 [2:52:29<7:24:34, 1.54it/s] 29%|██▊ | 16417/57600 [2:52:29<7:24:41, 1.54it/s] 29%|██▊ | 16418/57600 [2:52:30<7:14:47, 1.58it/s] 29%|██▊ | 16419/57600 [2:52:31<7:12:28, 1.59it/s] 29%|██▊ | 16420/57600 [2:52:31<7:11:25, 1.59it/s] {'loss': 1.3225, 'learning_rate': 7.456794425087108e-07, 'epoch': 181.24} + 29%|██▊ | 16420/57600 [2:52:31<7:11:25, 1.59it/s] 29%|██▊ | 16421/57600 [2:52:32<7:01:23, 1.63it/s] 29%|██▊ | 16422/57600 [2:52:33<7:10:35, 1.59it/s] 29%|██▊ | 16423/57600 [2:52:33<7:17:16, 1.57it/s] 29%|██▊ | 16424/57600 [2:52:34<7:18:53, 1.56it/s] 29%|██▊ | 16425/57600 [2:52:34<7:16:52, 1.57it/s] 29%|██▊ | 16426/57600 [2:52:35<7:24:17, 1.54it/s] 29%|██▊ | 16427/57600 [2:52:36<7:38:41, 1.50it/s] 29%|██▊ | 16428/57600 [2:52:37<7:30:41, 1.52it/s] 29%|██▊ | 16429/57600 [2:52:37<7:31:13, 1.52it/s] 29%|██▊ | 16430/57600 [2:52:38<7:28:21, 1.53it/s] 29%|██▊ | 16431/57600 [2:52:38<7:20:13, 1.56it/s] 29%|██▊ | 16432/57600 [2:52:39<7:11:58, 1.59it/s] 29%|██▊ | 16433/57600 [2:52:40<7:03:11, 1.62it/s] 29%|██▊ | 16434/57600 [2:52:40<6:55:03, 1.65it/s] 29%|██▊ | 16435/57600 [2:52:41<7:01:02, 1.63it/s] 29%|██▊ | 16436/57600 [2:52:41<6:57:13, 1.64it/s] 29%|██▊ | 16437/57600 [2:52:42<7:05:04, 1.61it/s] 29%|██▊ | 16438/57600 [2:52:43<7:00:48, 1.63it/s] 29%|██▊ | 16439/57600 [2:52:43<7:02:46, 1.62it/s] 29%|██▊ | 16440/57600 [2:52:44<7:08:06, 1.60it/s] {'loss': 1.2522, 'learning_rate': 7.453658536585366e-07, 'epoch': 181.46} + 29%|██▊ | 16440/57600 [2:52:44<7:08:06, 1.60it/s] 29%|██▊ | 16441/57600 [2:52:45<7:06:44, 1.61it/s] 29%|██▊ | 16442/57600 [2:52:45<7:18:08, 1.57it/s] 29%|██▊ | 16443/57600 [2:52:46<7:11:34, 1.59it/s] 29%|██▊ | 16444/57600 [2:52:46<6:54:37, 1.65it/s] 29%|██▊ | 16445/57600 [2:52:47<7:00:40, 1.63it/s] 29%|██▊ | 16446/57600 [2:52:48<6:53:28, 1.66it/s] 29%|██▊ | 16447/57600 [2:52:48<6:45:57, 1.69it/s] 29%|██▊ | 16448/57600 [2:52:49<6:53:01, 1.66it/s] 29%|██▊ | 16449/57600 [2:52:49<6:46:10, 1.69it/s] 29%|██▊ | 16450/57600 [2:52:50<6:40:02, 1.71it/s] 29%|██▊ | 16451/57600 [2:52:50<6:28:31, 1.77it/s] 29%|██▊ | 16452/57600 [2:52:51<6:33:48, 1.74it/s] 29%|██▊ | 16453/57600 [2:52:52<6:40:09, 1.71it/s] 29%|██▊ | 16454/57600 [2:52:52<6:43:13, 1.70it/s] 29%|██▊ | 16455/57600 [2:52:53<6:51:16, 1.67it/s] 29%|██▊ | 16456/57600 [2:52:54<7:06:16, 1.61it/s] 29%|██▊ | 16457/57600 [2:52:54<7:00:02, 1.63it/s] 29%|██▊ | 16458/57600 [2:52:55<7:13:12, 1.58it/s] 29%|██▊ | 16459/57600 [2:52:55<7:16:08, 1.57it/s] 29%|██▊ | 16460/57600 [2:52:56<7:18:12, 1.56it/s] {'loss': 1.312, 'learning_rate': 7.450522648083623e-07, 'epoch': 181.68} + 29%|██▊ | 16460/57600 [2:52:56<7:18:12, 1.56it/s] 29%|██▊ | 16461/57600 [2:52:57<7:11:16, 1.59it/s] 29%|██▊ | 16462/57600 [2:52:57<7:18:52, 1.56it/s] 29%|██▊ | 16463/57600 [2:52:58<7:07:12, 1.60it/s] 29%|██▊ | 16464/57600 [2:52:59<6:57:32, 1.64it/s] 29%|██▊ | 16465/57600 [2:52:59<6:49:42, 1.67it/s] 29%|██▊ | 16466/57600 [2:53:00<6:58:54, 1.64it/s] 29%|██▊ | 16467/57600 [2:53:00<7:05:13, 1.61it/s] 29%|██▊ | 16468/57600 [2:53:01<7:41:09, 1.49it/s] 29%|██▊ | 16469/57600 [2:53:02<7:31:20, 1.52it/s] 29%|██▊ | 16470/57600 [2:53:02<7:26:34, 1.54it/s] 29%|██▊ | 16471/57600 [2:53:03<7:26:27, 1.54it/s] 29%|██▊ | 16472/57600 [2:53:04<7:14:28, 1.58it/s] 29%|██▊ | 16473/57600 [2:53:04<7:14:55, 1.58it/s] 29%|██▊ | 16474/57600 [2:53:05<7:00:11, 1.63it/s] 29%|██▊ | 16475/57600 [2:53:06<7:06:23, 1.61it/s] 29%|██▊ | 16476/57600 [2:53:06<7:14:17, 1.58it/s] 29%|██▊ | 16477/57600 [2:53:07<7:06:13, 1.61it/s] 29%|██▊ | 16478/57600 [2:53:07<7:03:07, 1.62it/s] 29%|██▊ | 16479/57600 [2:53:08<6:58:48, 1.64it/s] 29%|██▊ | 16480/57600 [2:53:09<7:07:04, 1.60it/s] {'loss': 1.2597, 'learning_rate': 7.447386759581882e-07, 'epoch': 181.9} + 29%|██▊ | 16480/57600 [2:53:09<7:07:04, 1.60it/s] 29%|██▊ | 16481/57600 [2:53:09<7:10:59, 1.59it/s] 29%|██▊ | 16482/57600 [2:53:10<7:21:13, 1.55it/s] 29%|██▊ | 16483/57600 [2:53:11<7:19:15, 1.56it/s] 29%|██▊ | 16484/57600 [2:53:11<7:20:59, 1.55it/s] 29%|██▊ | 16485/57600 [2:53:12<7:12:34, 1.58it/s] 29%|██▊ | 16486/57600 [2:53:13<7:20:56, 1.55it/s] 29%|██▊ | 16487/57600 [2:53:13<7:18:01, 1.56it/s] 29%|██▊ | 16488/57600 [2:53:14<7:22:36, 1.55it/s] 29%|██▊ | 16489/57600 [2:53:14<7:13:38, 1.58it/s] 29%|██▊ | 16490/57600 [2:53:15<7:18:49, 1.56it/s] 29%|██▊ | 16491/57600 [2:53:16<7:17:09, 1.57it/s] 29%|██▊ | 16492/57600 [2:53:16<7:17:01, 1.57it/s] 29%|██▊ | 16493/57600 [2:53:17<7:16:43, 1.57it/s] 29%|██▊ | 16494/57600 [2:53:18<7:22:06, 1.55it/s] 29%|██▊ | 16495/57600 [2:53:18<7:03:39, 1.62it/s] 29%|██▊ | 16496/57600 [2:53:19<7:08:21, 1.60it/s] 29%|██▊ | 16497/57600 [2:53:19<6:59:54, 1.63it/s] 29%|██▊ | 16498/57600 [2:53:20<7:06:10, 1.61it/s] 29%|██▊ | 16499/57600 [2:53:21<7:00:53, 1.63it/s] 29%|██▊ | 16500/57600 [2:53:21<7:04:54, 1.61it/s] {'loss': 1.3262, 'learning_rate': 7.444250871080139e-07, 'epoch': 182.12} + 29%|██▊ | 16500/57600 [2:53:21<7:04:54, 1.61it/s] 29%|██▊ | 16501/57600 [2:53:22<7:10:18, 1.59it/s] 29%|██▊ | 16502/57600 [2:53:23<7:23:31, 1.54it/s] 29%|██▊ | 16503/57600 [2:53:23<7:27:55, 1.53it/s] 29%|██▊ | 16504/57600 [2:53:24<7:06:44, 1.61it/s] 29%|██▊ | 16505/57600 [2:53:24<7:02:44, 1.62it/s] 29%|██▊ | 16506/57600 [2:53:25<7:05:19, 1.61it/s] 29%|██▊ | 16507/57600 [2:53:26<7:07:14, 1.60it/s] 29%|██▊ | 16508/57600 [2:53:26<7:08:14, 1.60it/s] 29%|██▊ | 16509/57600 [2:53:27<7:15:42, 1.57it/s] 29%|██▊ | 16510/57600 [2:53:28<7:00:50, 1.63it/s] 29%|██▊ | 16511/57600 [2:53:28<6:59:48, 1.63it/s] 29%|██▊ | 16512/57600 [2:53:29<6:53:07, 1.66it/s] 29%|██▊ | 16513/57600 [2:53:29<6:59:35, 1.63it/s] 29%|██▊ | 16514/57600 [2:53:30<6:56:21, 1.64it/s] 29%|██▊ | 16515/57600 [2:53:31<6:46:39, 1.68it/s] 29%|██▊ | 16516/57600 [2:53:31<6:44:54, 1.69it/s] 29%|██▊ | 16517/57600 [2:53:32<6:41:13, 1.71it/s] 29%|██▊ | 16518/57600 [2:53:32<6:36:30, 1.73it/s] 29%|██▊ | 16519/57600 [2:53:33<6:36:46, 1.73it/s] 29%|██▊ | 16520/57600 [2:53:33<6:34:39, 1.73it/s] {'loss': 1.3519, 'learning_rate': 7.441114982578397e-07, 'epoch': 182.34} + 29%|██▊ | 16520/57600 [2:53:33<6:34:39, 1.73it/s] 29%|██▊ | 16521/57600 [2:53:34<6:59:11, 1.63it/s] 29%|██▊ | 16522/57600 [2:53:35<7:06:22, 1.61it/s] 29%|██▊ | 16523/57600 [2:53:35<7:17:03, 1.57it/s] 29%|██▊ | 16524/57600 [2:53:36<7:19:51, 1.56it/s] 29%|██▊ | 16525/57600 [2:53:37<7:22:53, 1.55it/s] 29%|██▊ | 16526/57600 [2:53:37<7:40:36, 1.49it/s] 29%|██▊ | 16527/57600 [2:53:38<7:41:36, 1.48it/s] 29%|██▊ | 16528/57600 [2:53:39<7:14:29, 1.58it/s] 29%|██▊ | 16529/57600 [2:53:39<7:03:00, 1.62it/s] 29%|██▊ | 16530/57600 [2:53:40<7:00:18, 1.63it/s] 29%|██▊ | 16531/57600 [2:53:41<7:03:26, 1.62it/s] 29%|██▊ | 16532/57600 [2:53:41<7:17:06, 1.57it/s] 29%|██▊ | 16533/57600 [2:53:42<7:07:02, 1.60it/s] 29%|██▊ | 16534/57600 [2:53:42<7:13:58, 1.58it/s] 29%|██▊ | 16535/57600 [2:53:43<7:12:03, 1.58it/s] 29%|██▊ | 16536/57600 [2:53:44<7:01:12, 1.62it/s] 29%|██▊ | 16537/57600 [2:53:44<7:08:31, 1.60it/s] 29%|██▊ | 16538/57600 [2:53:45<7:06:30, 1.60it/s] 29%|██▊ | 16539/57600 [2:53:46<7:18:56, 1.56it/s] 29%|██▊ | 16540/57600 [2:53:46<7:20:32, 1.55it/s] {'loss': 1.2241, 'learning_rate': 7.437979094076654e-07, 'epoch': 182.56} + 29%|██▊ | 16540/57600 [2:53:46<7:20:32, 1.55it/s] 29%|██▊ | 16541/57600 [2:53:47<7:19:02, 1.56it/s] 29%|██▊ | 16542/57600 [2:53:47<7:06:28, 1.60it/s] 29%|██▊ | 16543/57600 [2:53:48<7:17:59, 1.56it/s] 29%|██▊ | 16544/57600 [2:53:49<7:04:11, 1.61it/s] 29%|██▊ | 16545/57600 [2:53:49<6:57:24, 1.64it/s] 29%|██▊ | 16546/57600 [2:53:50<6:53:44, 1.65it/s] 29%|██▊ | 16547/57600 [2:53:51<6:58:14, 1.64it/s] 29%|██▊ | 16548/57600 [2:53:51<6:48:46, 1.67it/s] 29%|██▊ | 16549/57600 [2:53:52<6:49:07, 1.67it/s] 29%|██▊ | 16550/57600 [2:53:52<7:00:40, 1.63it/s] 29%|██▊ | 16551/57600 [2:53:53<6:56:26, 1.64it/s] 29%|██▊ | 16552/57600 [2:53:54<7:02:05, 1.62it/s] 29%|██▊ | 16553/57600 [2:53:54<6:43:55, 1.69it/s] 29%|██▊ | 16554/57600 [2:53:55<7:01:30, 1.62it/s] 29%|██▊ | 16555/57600 [2:53:55<6:53:15, 1.66it/s] 29%|██▊ | 16556/57600 [2:53:56<7:08:19, 1.60it/s] 29%|██▊ | 16557/57600 [2:53:57<7:10:27, 1.59it/s] 29%|██▊ | 16558/57600 [2:53:57<7:04:04, 1.61it/s] 29%|██▊ | 16559/57600 [2:53:58<7:18:43, 1.56it/s] 29%|██▉ | 16560/57600 [2:53:59<7:17:30, 1.56it/s] {'loss': 1.3083, 'learning_rate': 7.434843205574913e-07, 'epoch': 182.78} + 29%|██▉ | 16560/57600 [2:53:59<7:17:30, 1.56it/s] 29%|██▉ | 16561/57600 [2:53:59<7:18:15, 1.56it/s] 29%|██▉ | 16562/57600 [2:54:00<7:14:42, 1.57it/s] 29%|██▉ | 16563/57600 [2:54:00<7:01:02, 1.62it/s] 29%|██▉ | 16564/57600 [2:54:01<7:10:07, 1.59it/s] 29%|██▉ | 16565/57600 [2:54:02<7:13:49, 1.58it/s] 29%|██▉ | 16566/57600 [2:54:02<7:28:03, 1.53it/s] 29%|██▉ | 16567/57600 [2:54:03<7:13:32, 1.58it/s] 29%|██▉ | 16568/57600 [2:54:04<7:02:58, 1.62it/s] 29%|██▉ | 16569/57600 [2:54:04<7:06:42, 1.60it/s] 29%|██▉ | 16570/57600 [2:54:05<6:57:45, 1.64it/s] 29%|██▉ | 16571/57600 [2:54:06<7:14:25, 1.57it/s] 29%|██▉ | 16572/57600 [2:54:06<7:24:16, 1.54it/s] 29%|██▉ | 16573/57600 [2:54:07<7:11:34, 1.58it/s] 29%|██▉ | 16574/57600 [2:54:07<7:07:41, 1.60it/s] 29%|██▉ | 16575/57600 [2:54:08<7:00:02, 1.63it/s] 29%|██▉ | 16576/57600 [2:54:09<6:54:31, 1.65it/s] 29%|██▉ | 16577/57600 [2:54:09<6:51:29, 1.66it/s] 29%|██▉ | 16578/57600 [2:54:10<6:57:24, 1.64it/s] 29%|██▉ | 16579/57600 [2:54:10<6:58:00, 1.64it/s] 29%|██▉ | 16580/57600 [2:54:11<7:07:36, 1.60it/s] {'loss': 1.2853, 'learning_rate': 7.43170731707317e-07, 'epoch': 183.0} + 29%|██▉ | 16580/57600 [2:54:11<7:07:36, 1.60it/s] 29%|██▉ | 16581/57600 [2:54:12<7:08:14, 1.60it/s] 29%|██▉ | 16582/57600 [2:54:12<7:15:42, 1.57it/s] 29%|██▉ | 16583/57600 [2:54:13<7:11:01, 1.59it/s] 29%|██▉ | 16584/57600 [2:54:14<7:01:07, 1.62it/s] 29%|██▉ | 16585/57600 [2:54:14<6:53:35, 1.65it/s] 29%|██▉ | 16586/57600 [2:54:15<7:02:02, 1.62it/s] 29%|██▉ | 16587/57600 [2:54:15<7:02:03, 1.62it/s] 29%|██▉ | 16588/57600 [2:54:16<6:53:12, 1.65it/s] 29%|██▉ | 16589/57600 [2:54:17<7:07:30, 1.60it/s] 29%|██▉ | 16590/57600 [2:54:17<6:57:55, 1.64it/s] 29%|██▉ | 16591/57600 [2:54:18<6:45:09, 1.69it/s] 29%|██▉ | 16592/57600 [2:54:18<6:49:41, 1.67it/s] 29%|██▉ | 16593/57600 [2:54:19<7:12:27, 1.58it/s] 29%|██▉ | 16594/57600 [2:54:20<7:04:05, 1.61it/s] 29%|██▉ | 16595/57600 [2:54:20<7:03:11, 1.61it/s] 29%|██▉ | 16596/57600 [2:54:21<6:56:54, 1.64it/s] 29%|██▉ | 16597/57600 [2:54:22<6:59:40, 1.63it/s] 29%|██▉ | 16598/57600 [2:54:22<6:57:27, 1.64it/s] 29%|██▉ | 16599/57600 [2:54:23<6:54:36, 1.65it/s] 29%|██▉ | 16600/57600 [2:54:23<6:57:12, 1.64it/s] {'loss': 1.3211, 'learning_rate': 7.428571428571427e-07, 'epoch': 183.22} + 29%|██▉ | 16600/57600 [2:54:23<6:57:12, 1.64it/s] 29%|██▉ | 16601/57600 [2:54:24<7:13:11, 1.58it/s] 29%|██▉ | 16602/57600 [2:54:25<7:23:26, 1.54it/s] 29%|██▉ | 16603/57600 [2:54:25<7:18:16, 1.56it/s] 29%|██▉ | 16604/57600 [2:54:26<7:30:28, 1.52it/s] 29%|██▉ | 16605/57600 [2:54:27<7:15:09, 1.57it/s] 29%|██▉ | 16606/57600 [2:54:27<7:08:40, 1.59it/s] 29%|██▉ | 16607/57600 [2:54:28<6:58:23, 1.63it/s] 29%|██▉ | 16608/57600 [2:54:28<6:46:44, 1.68it/s] 29%|██▉ | 16609/57600 [2:54:29<6:53:52, 1.65it/s] 29%|██▉ | 16610/57600 [2:54:30<7:01:49, 1.62it/s] 29%|██▉ | 16611/57600 [2:54:30<7:18:11, 1.56it/s] 29%|██▉ | 16612/57600 [2:54:31<7:23:38, 1.54it/s] 29%|██▉ | 16613/57600 [2:54:32<7:15:13, 1.57it/s] 29%|██▉ | 16614/57600 [2:54:32<7:18:57, 1.56it/s] 29%|██▉ | 16615/57600 [2:54:33<7:02:48, 1.62it/s] 29%|██▉ | 16616/57600 [2:54:33<7:07:55, 1.60it/s] 29%|██▉ | 16617/57600 [2:54:34<7:10:06, 1.59it/s] 29%|██▉ | 16618/57600 [2:54:35<7:15:43, 1.57it/s] 29%|██▉ | 16619/57600 [2:54:35<7:13:36, 1.58it/s] 29%|██▉ | 16620/57600 [2:54:36<7:19:39, 1.55it/s] {'loss': 1.2444, 'learning_rate': 7.425435540069687e-07, 'epoch': 183.44} + 29%|██▉ | 16620/57600 [2:54:36<7:19:39, 1.55it/s] 29%|██▉ | 16621/57600 [2:54:37<7:05:55, 1.60it/s] 29%|██▉ | 16622/57600 [2:54:37<6:53:05, 1.65it/s] 29%|██▉ | 16623/57600 [2:54:38<6:56:42, 1.64it/s] 29%|██▉ | 16624/57600 [2:54:38<6:51:08, 1.66it/s] 29%|██▉ | 16625/57600 [2:54:39<6:50:32, 1.66it/s] 29%|██▉ | 16626/57600 [2:54:40<7:11:19, 1.58it/s] 29%|██▉ | 16627/57600 [2:54:40<7:03:28, 1.61it/s] 29%|██▉ | 16628/57600 [2:54:41<7:10:29, 1.59it/s] 29%|██▉ | 16629/57600 [2:54:42<7:20:36, 1.55it/s] 29%|██▉ | 16630/57600 [2:54:42<7:19:25, 1.55it/s] 29%|██▉ | 16631/57600 [2:54:43<7:14:50, 1.57it/s] 29%|██▉ | 16632/57600 [2:54:43<7:03:35, 1.61it/s] 29%|██▉ | 16633/57600 [2:54:44<6:56:16, 1.64it/s] 29%|██▉ | 16634/57600 [2:54:45<6:52:19, 1.66it/s] 29%|██▉ | 16635/57600 [2:54:45<7:00:50, 1.62it/s] 29%|██▉ | 16636/57600 [2:54:46<6:58:56, 1.63it/s] 29%|██▉ | 16637/57600 [2:54:47<7:03:10, 1.61it/s] 29%|██▉ | 16638/57600 [2:54:47<7:13:37, 1.57it/s] 29%|██▉ | 16639/57600 [2:54:48<7:18:51, 1.56it/s] 29%|██▉ | 16640/57600 [2:54:48<6:58:17, 1.63it/s] {'loss': 1.2557, 'learning_rate': 7.422299651567944e-07, 'epoch': 183.66} + 29%|██▉ | 16640/57600 [2:54:48<6:58:17, 1.63it/s] 29%|██▉ | 16641/57600 [2:54:49<6:53:13, 1.65it/s] 29%|██▉ | 16642/57600 [2:54:50<6:59:30, 1.63it/s] 29%|██▉ | 16643/57600 [2:54:50<6:55:12, 1.64it/s] 29%|██▉ | 16644/57600 [2:54:51<6:57:04, 1.64it/s] 29%|██▉ | 16645/57600 [2:54:52<7:03:45, 1.61it/s] 29%|██▉ | 16646/57600 [2:54:52<7:00:02, 1.62it/s] 29%|██▉ | 16647/57600 [2:54:53<7:06:24, 1.60it/s] 29%|██▉ | 16648/57600 [2:54:53<7:00:30, 1.62it/s] 29%|██▉ | 16649/57600 [2:54:54<6:52:09, 1.66it/s] 29%|██▉ | 16650/57600 [2:54:55<6:53:46, 1.65it/s] 29%|██▉ | 16651/57600 [2:54:55<6:57:40, 1.63it/s] 29%|██▉ | 16652/57600 [2:54:56<6:55:27, 1.64it/s] 29%|██▉ | 16653/57600 [2:54:56<6:58:09, 1.63it/s] 29%|██▉ | 16654/57600 [2:54:57<6:46:16, 1.68it/s] 29%|██▉ | 16655/57600 [2:54:58<7:07:57, 1.59it/s] 29%|██▉ | 16656/57600 [2:54:58<6:59:59, 1.62it/s] 29%|██▉ | 16657/57600 [2:54:59<6:59:31, 1.63it/s] 29%|██▉ | 16658/57600 [2:54:59<6:57:44, 1.63it/s] 29%|██▉ | 16659/57600 [2:55:00<7:01:33, 1.62it/s] 29%|██▉ | 16660/57600 [2:55:01<7:08:28, 1.59it/s] {'loss': 1.249, 'learning_rate': 7.419163763066202e-07, 'epoch': 183.89} + 29%|██▉ | 16660/57600 [2:55:01<7:08:28, 1.59it/s] 29%|██▉ | 16661/57600 [2:55:01<7:11:10, 1.58it/s] 29%|██▉ | 16662/57600 [2:55:02<7:13:00, 1.58it/s] 29%|██▉ | 16663/57600 [2:55:03<7:27:01, 1.53it/s] 29%|██▉ | 16664/57600 [2:55:03<7:41:23, 1.48it/s] 29%|██▉ | 16665/57600 [2:55:04<7:36:30, 1.49it/s] 29%|██▉ | 16666/57600 [2:55:05<7:42:23, 1.48it/s] 29%|██▉ | 16667/57600 [2:55:05<7:30:31, 1.51it/s] 29%|██▉ | 16668/57600 [2:55:06<7:37:40, 1.49it/s] 29%|██▉ | 16669/57600 [2:55:07<7:28:25, 1.52it/s] 29%|██▉ | 16670/57600 [2:55:07<7:18:48, 1.55it/s] 29%|██▉ | 16671/57600 [2:55:08<7:30:20, 1.51it/s] 29%|██▉ | 16672/57600 [2:55:09<7:27:39, 1.52it/s] 29%|██▉ | 16673/57600 [2:55:09<7:22:10, 1.54it/s] 29%|██▉ | 16674/57600 [2:55:10<7:15:04, 1.57it/s] 29%|██▉ | 16675/57600 [2:55:11<7:06:31, 1.60it/s] 29%|██▉ | 16676/57600 [2:55:11<7:07:43, 1.59it/s] 29%|██▉ | 16677/57600 [2:55:12<7:01:43, 1.62it/s] 29%|██▉ | 16678/57600 [2:55:12<7:03:12, 1.61it/s] 29%|██▉ | 16679/57600 [2:55:13<7:04:43, 1.61it/s] 29%|██▉ | 16680/57600 [2:55:14<6:55:06, 1.64it/s] {'loss': 1.2379, 'learning_rate': 7.416027874564459e-07, 'epoch': 184.11} + 29%|██▉ | 16680/57600 [2:55:14<6:55:06, 1.64it/s] 29%|██▉ | 16681/57600 [2:55:14<7:21:04, 1.55it/s] 29%|██▉ | 16682/57600 [2:55:15<7:03:42, 1.61it/s] 29%|██▉ | 16683/57600 [2:55:15<6:59:31, 1.63it/s] 29%|██▉ | 16684/57600 [2:55:16<7:05:42, 1.60it/s] 29%|██▉ | 16685/57600 [2:55:17<6:55:00, 1.64it/s] 29%|██▉ | 16686/57600 [2:55:17<6:57:30, 1.63it/s] 29%|██▉ | 16687/57600 [2:55:18<7:08:10, 1.59it/s] 29%|██▉ | 16688/57600 [2:55:19<7:18:15, 1.56it/s] 29%|██▉ | 16689/57600 [2:55:19<7:04:48, 1.61it/s] 29%|██▉ | 16690/57600 [2:55:20<7:19:15, 1.55it/s] 29%|██▉ | 16691/57600 [2:55:21<7:29:02, 1.52it/s] 29%|██▉ | 16692/57600 [2:55:21<7:33:23, 1.50it/s] 29%|██▉ | 16693/57600 [2:55:22<7:25:33, 1.53it/s] 29%|██▉ | 16694/57600 [2:55:23<7:09:00, 1.59it/s] 29%|██▉ | 16695/57600 [2:55:23<7:11:25, 1.58it/s] 29%|██▉ | 16696/57600 [2:55:24<6:56:28, 1.64it/s] 29%|██▉ | 16697/57600 [2:55:24<7:02:52, 1.61it/s] 29%|██▉ | 16698/57600 [2:55:25<7:16:33, 1.56it/s] 29%|██▉ | 16699/57600 [2:55:26<7:02:07, 1.61it/s] 29%|██▉ | 16700/57600 [2:55:26<7:09:12, 1.59it/s] {'loss': 1.2756, 'learning_rate': 7.412891986062718e-07, 'epoch': 184.33} + 29%|██▉ | 16700/57600 [2:55:26<7:09:12, 1.59it/s] 29%|██▉ | 16701/57600 [2:55:27<6:56:21, 1.64it/s] 29%|██▉ | 16702/57600 [2:55:27<7:00:56, 1.62it/s] 29%|██▉ | 16703/57600 [2:55:28<6:59:05, 1.63it/s] 29%|██▉ | 16704/57600 [2:55:29<7:08:38, 1.59it/s] 29%|██▉ | 16705/57600 [2:55:29<7:07:55, 1.59it/s] 29%|██▉ | 16706/57600 [2:55:30<7:26:03, 1.53it/s] 29%|██▉ | 16707/57600 [2:55:31<7:13:13, 1.57it/s] 29%|██▉ | 16708/57600 [2:55:31<7:05:57, 1.60it/s] 29%|██▉ | 16709/57600 [2:55:32<7:07:07, 1.60it/s] 29%|██▉ | 16710/57600 [2:55:33<7:07:00, 1.60it/s] 29%|██▉ | 16711/57600 [2:55:33<7:11:44, 1.58it/s] 29%|██▉ | 16712/57600 [2:55:34<7:11:12, 1.58it/s] 29%|██▉ | 16713/57600 [2:55:34<7:04:52, 1.60it/s] 29%|██▉ | 16714/57600 [2:55:35<6:59:27, 1.62it/s] 29%|██▉ | 16715/57600 [2:55:36<6:59:17, 1.63it/s] 29%|██▉ | 16716/57600 [2:55:36<7:09:01, 1.59it/s] 29%|██▉ | 16717/57600 [2:55:37<7:21:08, 1.54it/s] 29%|██▉ | 16718/57600 [2:55:38<7:14:02, 1.57it/s] 29%|██▉ | 16719/57600 [2:55:38<7:20:12, 1.55it/s] 29%|██▉ | 16720/57600 [2:55:39<7:09:42, 1.59it/s] {'loss': 1.2827, 'learning_rate': 7.409756097560975e-07, 'epoch': 184.55} + 29%|██▉ | 16720/57600 [2:55:39<7:09:42, 1.59it/s] 29%|██▉ | 16721/57600 [2:55:39<7:03:09, 1.61it/s] 29%|██▉ | 16722/57600 [2:55:40<7:03:27, 1.61it/s] 29%|██▉ | 16723/57600 [2:55:41<6:56:19, 1.64it/s] 29%|██▉ | 16724/57600 [2:55:41<7:14:50, 1.57it/s] 29%|██▉ | 16725/57600 [2:55:42<7:07:21, 1.59it/s] 29%|██▉ | 16726/57600 [2:55:43<6:53:47, 1.65it/s] 29%|██▉ | 16727/57600 [2:55:43<7:00:09, 1.62it/s] 29%|██▉ | 16728/57600 [2:55:44<6:51:26, 1.66it/s] 29%|██▉ | 16729/57600 [2:55:44<6:59:43, 1.62it/s] 29%|██▉ | 16730/57600 [2:55:45<7:14:31, 1.57it/s] 29%|██▉ | 16731/57600 [2:55:46<7:02:22, 1.61it/s] 29%|██▉ | 16732/57600 [2:55:46<7:04:15, 1.61it/s] 29%|██▉ | 16733/57600 [2:55:47<7:06:52, 1.60it/s] 29%|██▉ | 16734/57600 [2:55:47<6:54:37, 1.64it/s] 29%|██▉ | 16735/57600 [2:55:48<7:04:08, 1.61it/s] 29%|██▉ | 16736/57600 [2:55:49<7:00:52, 1.62it/s] 29%|██▉ | 16737/57600 [2:55:49<7:19:50, 1.55it/s] 29%|██▉ | 16738/57600 [2:55:50<7:11:00, 1.58it/s] 29%|██▉ | 16739/57600 [2:55:51<7:00:19, 1.62it/s] 29%|██▉ | 16740/57600 [2:55:51<6:45:14, 1.68it/s] {'loss': 1.2726, 'learning_rate': 7.406620209059234e-07, 'epoch': 184.77} + 29%|██▉ | 16740/57600 [2:55:51<6:45:14, 1.68it/s] 29%|██▉ | 16741/57600 [2:55:52<6:47:18, 1.67it/s] 29%|██▉ | 16742/57600 [2:55:52<7:03:26, 1.61it/s] 29%|██▉ | 16743/57600 [2:55:53<6:59:34, 1.62it/s] 29%|██▉ | 16744/57600 [2:55:54<6:58:18, 1.63it/s] 29%|██▉ | 16745/57600 [2:55:54<6:52:03, 1.65it/s] 29%|██▉ | 16746/57600 [2:55:55<6:44:36, 1.68it/s] 29%|██▉ | 16747/57600 [2:55:55<7:01:07, 1.62it/s] 29%|██▉ | 16748/57600 [2:55:56<6:56:42, 1.63it/s] 29%|██▉ | 16749/57600 [2:55:57<7:06:04, 1.60it/s] 29%|██▉ | 16750/57600 [2:55:57<6:58:41, 1.63it/s] 29%|██▉ | 16751/57600 [2:55:58<7:03:16, 1.61it/s] 29%|██▉ | 16752/57600 [2:55:59<6:59:35, 1.62it/s] 29%|██▉ | 16753/57600 [2:55:59<6:51:04, 1.66it/s] 29%|██▉ | 16754/57600 [2:56:00<7:11:03, 1.58it/s] 29%|██▉ | 16755/57600 [2:56:00<7:03:26, 1.61it/s] 29%|██▉ | 16756/57600 [2:56:01<7:04:32, 1.60it/s] 29%|██▉ | 16757/57600 [2:56:02<7:15:11, 1.56it/s] 29%|██▉ | 16758/57600 [2:56:02<7:10:48, 1.58it/s] 29%|██▉ | 16759/57600 [2:56:03<7:07:33, 1.59it/s] 29%|██▉ | 16760/57600 [2:56:04<7:02:21, 1.61it/s] {'loss': 1.298, 'learning_rate': 7.403484320557491e-07, 'epoch': 184.99} + 29%|██▉ | 16760/57600 [2:56:04<7:02:21, 1.61it/s] 29%|██▉ | 16761/57600 [2:56:04<7:10:52, 1.58it/s] 29%|██▉ | 16762/57600 [2:56:05<7:13:56, 1.57it/s] 29%|██▉ | 16763/57600 [2:56:06<7:14:31, 1.57it/s] 29%|██▉ | 16764/57600 [2:56:06<7:25:42, 1.53it/s] 29%|██▉ | 16765/57600 [2:56:07<7:10:45, 1.58it/s] 29%|██▉ | 16766/57600 [2:56:08<7:31:07, 1.51it/s] 29%|██▉ | 16767/57600 [2:56:08<7:39:50, 1.48it/s] 29%|██▉ | 16768/57600 [2:56:09<7:21:58, 1.54it/s] 29%|██▉ | 16769/57600 [2:56:09<7:02:49, 1.61it/s] 29%|██▉ | 16770/57600 [2:56:10<6:56:04, 1.64it/s] 29%|██▉ | 16771/57600 [2:56:11<6:46:03, 1.68it/s] 29%|██▉ | 16772/57600 [2:56:11<6:49:06, 1.66it/s] 29%|██▉ | 16773/57600 [2:56:12<6:54:39, 1.64it/s] 29%|██▉ | 16774/57600 [2:56:12<7:13:08, 1.57it/s] 29%|██▉ | 16775/57600 [2:56:13<7:11:06, 1.58it/s] 29%|██▉ | 16776/57600 [2:56:14<7:06:40, 1.59it/s] 29%|██▉ | 16777/57600 [2:56:14<7:14:53, 1.56it/s] 29%|██▉ | 16778/57600 [2:56:15<7:14:12, 1.57it/s] 29%|██▉ | 16779/57600 [2:56:16<7:11:26, 1.58it/s] 29%|██▉ | 16780/57600 [2:56:16<7:11:27, 1.58it/s] {'loss': 1.2739, 'learning_rate': 7.400348432055749e-07, 'epoch': 185.21} + 29%|██▉ | 16780/57600 [2:56:16<7:11:27, 1.58it/s] 29%|██▉ | 16781/57600 [2:56:17<7:01:56, 1.61it/s] 29%|██▉ | 16782/57600 [2:56:17<6:57:03, 1.63it/s] 29%|██▉ | 16783/57600 [2:56:18<6:59:15, 1.62it/s] 29%|██▉ | 16784/57600 [2:56:19<7:05:16, 1.60it/s] 29%|██▉ | 16785/57600 [2:56:19<7:16:20, 1.56it/s] 29%|██▉ | 16786/57600 [2:56:20<7:13:09, 1.57it/s] 29%|██▉ | 16787/57600 [2:56:21<7:14:25, 1.57it/s] 29%|██▉ | 16788/57600 [2:56:21<7:01:45, 1.61it/s] 29%|██▉ | 16789/57600 [2:56:22<6:53:38, 1.64it/s] 29%|██▉ | 16790/57600 [2:56:22<6:49:15, 1.66it/s] 29%|██▉ | 16791/57600 [2:56:23<6:46:13, 1.67it/s] 29%|██▉ | 16792/57600 [2:56:24<6:57:28, 1.63it/s] 29%|██▉ | 16793/57600 [2:56:24<7:01:50, 1.61it/s] 29%|██▉ | 16794/57600 [2:56:25<6:54:07, 1.64it/s] 29%|██▉ | 16795/57600 [2:56:26<7:01:42, 1.61it/s] 29%|██▉ | 16796/57600 [2:56:26<6:53:26, 1.64it/s] 29%|██▉ | 16797/57600 [2:56:27<6:56:02, 1.63it/s] 29%|██▉ | 16798/57600 [2:56:27<7:06:55, 1.59it/s] 29%|██▉ | 16799/57600 [2:56:28<7:05:25, 1.60it/s] 29%|██▉ | 16800/57600 [2:56:29<6:56:20, 1.63it/s] {'loss': 1.2657, 'learning_rate': 7.397212543554006e-07, 'epoch': 185.43} + 29%|██▉ | 16800/57600 [2:56:29<6:56:20, 1.63it/s] 29%|██▉ | 16801/57600 [2:56:29<6:52:18, 1.65it/s] 29%|██▉ | 16802/57600 [2:56:30<6:59:56, 1.62it/s] 29%|██▉ | 16803/57600 [2:56:30<6:55:22, 1.64it/s] 29%|██▉ | 16804/57600 [2:56:31<6:55:06, 1.64it/s] 29%|██▉ | 16805/57600 [2:56:32<6:53:56, 1.64it/s] 29%|██▉ | 16806/57600 [2:56:32<7:04:37, 1.60it/s] 29%|██▉ | 16807/57600 [2:56:33<7:01:44, 1.61it/s] 29%|██▉ | 16808/57600 [2:56:34<7:06:42, 1.59it/s] 29%|██▉ | 16809/57600 [2:56:34<7:21:11, 1.54it/s] 29%|██▉ | 16810/57600 [2:56:35<7:14:31, 1.56it/s] 29%|██▉ | 16811/57600 [2:56:35<7:04:34, 1.60it/s] 29%|██▉ | 16812/57600 [2:56:36<7:09:26, 1.58it/s] 29%|██▉ | 16813/57600 [2:56:37<7:16:31, 1.56it/s] 29%|██▉ | 16814/57600 [2:56:37<7:02:08, 1.61it/s] 29%|██▉ | 16815/57600 [2:56:38<6:49:44, 1.66it/s] 29%|██▉ | 16816/57600 [2:56:39<6:51:30, 1.65it/s] 29%|██▉ | 16817/57600 [2:56:39<7:05:08, 1.60it/s] 29%|██▉ | 16818/57600 [2:56:40<6:53:49, 1.64it/s] 29%|██▉ | 16819/57600 [2:56:40<6:56:46, 1.63it/s] 29%|██▉ | 16820/57600 [2:56:41<7:07:19, 1.59it/s] {'loss': 1.2605, 'learning_rate': 7.394076655052264e-07, 'epoch': 185.65} + 29%|██▉ | 16820/57600 [2:56:41<7:07:19, 1.59it/s] 29%|██▉ | 16821/57600 [2:56:42<6:57:49, 1.63it/s] 29%|██▉ | 16822/57600 [2:56:42<6:56:43, 1.63it/s] 29%|██▉ | 16823/57600 [2:56:43<6:47:30, 1.67it/s] 29%|██▉ | 16824/57600 [2:56:43<6:41:32, 1.69it/s] 29%|██▉ | 16825/57600 [2:56:44<7:01:46, 1.61it/s] 29%|██▉ | 16826/57600 [2:56:45<7:00:30, 1.62it/s] 29%|██▉ | 16827/57600 [2:56:45<7:09:09, 1.58it/s] 29%|██▉ | 16828/57600 [2:56:46<7:03:46, 1.60it/s] 29%|██▉ | 16829/57600 [2:56:47<7:09:37, 1.58it/s] 29%|██▉ | 16830/57600 [2:56:47<7:01:04, 1.61it/s] 29%|██▉ | 16831/57600 [2:56:48<7:16:17, 1.56it/s] 29%|██▉ | 16832/57600 [2:56:49<7:19:16, 1.55it/s] 29%|██▉ | 16833/57600 [2:56:49<7:22:26, 1.54it/s] 29%|██▉ | 16834/57600 [2:56:50<7:05:55, 1.60it/s] 29%|██▉ | 16835/57600 [2:56:50<7:00:28, 1.62it/s] 29%|██▉ | 16836/57600 [2:56:51<7:02:42, 1.61it/s] 29%|██▉ | 16837/57600 [2:56:52<6:57:05, 1.63it/s] 29%|██▉ | 16838/57600 [2:56:52<7:09:48, 1.58it/s] 29%|██▉ | 16839/57600 [2:56:53<7:14:33, 1.56it/s] 29%|██▉ | 16840/57600 [2:56:54<7:05:17, 1.60it/s] {'loss': 1.2664, 'learning_rate': 7.390940766550522e-07, 'epoch': 185.87} + 29%|██▉ | 16840/57600 [2:56:54<7:05:17, 1.60it/s] 29%|██▉ | 16841/57600 [2:56:54<7:09:07, 1.58it/s] 29%|██▉ | 16842/57600 [2:56:55<7:04:31, 1.60it/s] 29%|██▉ | 16843/57600 [2:56:55<6:49:15, 1.66it/s] 29%|██▉ | 16844/57600 [2:56:56<7:04:15, 1.60it/s] 29%|██▉ | 16845/57600 [2:56:57<7:03:47, 1.60it/s] 29%|██▉ | 16846/57600 [2:56:57<7:00:04, 1.62it/s] 29%|██▉ | 16847/57600 [2:56:58<7:03:39, 1.60it/s] 29%|██▉ | 16848/57600 [2:56:58<6:49:47, 1.66it/s] 29%|██▉ | 16849/57600 [2:56:59<6:43:33, 1.68it/s] 29%|██▉ | 16850/57600 [2:57:00<6:35:27, 1.72it/s] 29%|██▉ | 16851/57600 [2:57:00<6:35:59, 1.72it/s] 29%|██▉ | 16852/57600 [2:57:01<7:05:24, 1.60it/s] 29%|██▉ | 16853/57600 [2:57:02<7:13:23, 1.57it/s] 29%|██▉ | 16854/57600 [2:57:02<7:06:37, 1.59it/s] 29%|██▉ | 16855/57600 [2:57:03<7:04:27, 1.60it/s] 29%|██▉ | 16856/57600 [2:57:03<7:09:46, 1.58it/s] 29%|██▉ | 16857/57600 [2:57:04<7:05:12, 1.60it/s] 29%|██▉ | 16858/57600 [2:57:05<7:04:23, 1.60it/s] 29%|██▉ | 16859/57600 [2:57:05<7:00:53, 1.61it/s] 29%|██▉ | 16860/57600 [2:57:06<7:02:30, 1.61it/s] {'loss': 1.278, 'learning_rate': 7.38780487804878e-07, 'epoch': 186.09} + 29%|██▉ | 16860/57600 [2:57:06<7:02:30, 1.61it/s] 29%|██▉ | 16861/57600 [2:57:07<7:03:37, 1.60it/s] 29%|██▉ | 16862/57600 [2:57:07<7:00:07, 1.62it/s] 29%|██▉ | 16863/57600 [2:57:08<7:05:42, 1.59it/s] 29%|██▉ | 16864/57600 [2:57:08<7:07:06, 1.59it/s] 29%|██▉ | 16865/57600 [2:57:09<7:25:04, 1.53it/s] 29%|██▉ | 16866/57600 [2:57:10<7:08:35, 1.58it/s] 29%|██▉ | 16867/57600 [2:57:10<7:18:21, 1.55it/s] 29%|██▉ | 16868/57600 [2:57:11<7:23:34, 1.53it/s] 29%|██▉ | 16869/57600 [2:57:12<7:22:18, 1.53it/s] 29%|██▉ | 16870/57600 [2:57:12<7:16:12, 1.56it/s] 29%|██▉ | 16871/57600 [2:57:13<7:12:13, 1.57it/s] 29%|██▉ | 16872/57600 [2:57:14<7:21:05, 1.54it/s] 29%|██▉ | 16873/57600 [2:57:14<7:07:42, 1.59it/s] 29%|██▉ | 16874/57600 [2:57:15<7:00:20, 1.61it/s] 29%|██▉ | 16875/57600 [2:57:15<6:50:32, 1.65it/s] 29%|██▉ | 16876/57600 [2:57:16<6:51:14, 1.65it/s] 29%|██▉ | 16877/57600 [2:57:17<6:50:05, 1.66it/s] 29%|██▉ | 16878/57600 [2:57:17<6:54:06, 1.64it/s] 29%|██▉ | 16879/57600 [2:57:18<6:58:44, 1.62it/s] 29%|██▉ | 16880/57600 [2:57:18<6:48:30, 1.66it/s] {'loss': 1.276, 'learning_rate': 7.384668989547038e-07, 'epoch': 186.31} + 29%|██▉ | 16880/57600 [2:57:18<6:48:30, 1.66it/s] 29%|██▉ | 16881/57600 [2:57:19<6:53:45, 1.64it/s] 29%|██▉ | 16882/57600 [2:57:20<6:52:54, 1.64it/s] 29%|██▉ | 16883/57600 [2:57:20<6:52:31, 1.65it/s] 29%|██▉ | 16884/57600 [2:57:21<6:56:01, 1.63it/s] 29%|██▉ | 16885/57600 [2:57:22<7:15:13, 1.56it/s] 29%|██▉ | 16886/57600 [2:57:22<7:10:53, 1.57it/s] 29%|██▉ | 16887/57600 [2:57:23<6:54:05, 1.64it/s] 29%|██▉ | 16888/57600 [2:57:23<7:01:55, 1.61it/s] 29%|██▉ | 16889/57600 [2:57:24<7:06:28, 1.59it/s] 29%|██▉ | 16890/57600 [2:57:25<7:10:27, 1.58it/s] 29%|██▉ | 16891/57600 [2:57:25<6:59:44, 1.62it/s] 29%|██▉ | 16892/57600 [2:57:26<6:56:54, 1.63it/s] 29%|██▉ | 16893/57600 [2:57:26<6:52:55, 1.64it/s] 29%|██▉ | 16894/57600 [2:57:27<7:02:24, 1.61it/s] 29%|██▉ | 16895/57600 [2:57:28<7:12:19, 1.57it/s] 29%|██▉ | 16896/57600 [2:57:28<6:57:04, 1.63it/s] 29%|██▉ | 16897/57600 [2:57:29<7:09:45, 1.58it/s] 29%|██▉ | 16898/57600 [2:57:30<7:12:15, 1.57it/s] 29%|██▉ | 16899/57600 [2:57:30<7:17:23, 1.55it/s] 29%|██▉ | 16900/57600 [2:57:31<6:54:35, 1.64it/s] {'loss': 1.2683, 'learning_rate': 7.381533101045296e-07, 'epoch': 186.53} + 29%|██▉ | 16900/57600 [2:57:31<6:54:35, 1.64it/s] 29%|██▉ | 16901/57600 [2:57:32<7:00:52, 1.61it/s] 29%|██▉ | 16902/57600 [2:57:32<7:12:22, 1.57it/s] 29%|██▉ | 16903/57600 [2:57:33<7:09:08, 1.58it/s] 29%|██▉ | 16904/57600 [2:57:33<7:16:03, 1.56it/s] 29%|██▉ | 16905/57600 [2:57:34<7:10:51, 1.57it/s] 29%|██▉ | 16906/57600 [2:57:35<7:12:46, 1.57it/s] 29%|██▉ | 16907/57600 [2:57:35<6:58:03, 1.62it/s] 29%|██▉ | 16908/57600 [2:57:36<7:00:43, 1.61it/s] 29%|██▉ | 16909/57600 [2:57:37<6:56:55, 1.63it/s] 29%|██▉ | 16910/57600 [2:57:37<7:05:16, 1.59it/s] 29%|██▉ | 16911/57600 [2:57:38<6:57:38, 1.62it/s] 29%|██▉ | 16912/57600 [2:57:39<7:16:53, 1.55it/s] 29%|██▉ | 16913/57600 [2:57:39<7:20:15, 1.54it/s] 29%|██▉ | 16914/57600 [2:57:40<7:14:50, 1.56it/s] 29%|██▉ | 16915/57600 [2:57:40<7:00:24, 1.61it/s] 29%|██▉ | 16916/57600 [2:57:41<6:56:47, 1.63it/s] 29%|██▉ | 16917/57600 [2:57:42<6:53:30, 1.64it/s] 29%|██▉ | 16918/57600 [2:57:42<6:58:09, 1.62it/s] 29%|██▉ | 16919/57600 [2:57:43<6:55:59, 1.63it/s] 29%|██▉ | 16920/57600 [2:57:43<6:57:46, 1.62it/s] {'loss': 1.265, 'learning_rate': 7.378397212543554e-07, 'epoch': 186.75} + 29%|██▉ | 16920/57600 [2:57:43<6:57:46, 1.62it/s] 29%|██▉ | 16921/57600 [2:57:44<7:01:12, 1.61it/s] 29%|██▉ | 16922/57600 [2:57:45<6:52:15, 1.64it/s] 29%|██▉ | 16923/57600 [2:57:45<6:48:06, 1.66it/s] 29%|██▉ | 16924/57600 [2:57:46<6:48:36, 1.66it/s] 29%|██▉ | 16925/57600 [2:57:46<6:48:38, 1.66it/s] 29%|██▉ | 16926/57600 [2:57:47<6:39:00, 1.70it/s] 29%|██▉ | 16927/57600 [2:57:48<6:41:09, 1.69it/s] 29%|██▉ | 16928/57600 [2:57:48<6:43:36, 1.68it/s] 29%|██▉ | 16929/57600 [2:57:49<6:50:36, 1.65it/s] 29%|██▉ | 16930/57600 [2:57:49<6:40:15, 1.69it/s] 29%|██▉ | 16931/57600 [2:57:50<6:56:07, 1.63it/s] 29%|██▉ | 16932/57600 [2:57:51<6:45:07, 1.67it/s] 29%|██▉ | 16933/57600 [2:57:51<6:39:31, 1.70it/s] 29%|██▉ | 16934/57600 [2:57:52<7:01:26, 1.61it/s] 29%|██▉ | 16935/57600 [2:57:52<6:59:41, 1.61it/s] 29%|██▉ | 16936/57600 [2:57:53<7:08:27, 1.58it/s] 29%|██▉ | 16937/57600 [2:57:54<7:05:03, 1.59it/s] 29%|██▉ | 16938/57600 [2:57:54<7:03:51, 1.60it/s] 29%|██▉ | 16939/57600 [2:57:55<7:23:43, 1.53it/s] 29%|██▉ | 16940/57600 [2:57:56<7:22:36, 1.53it/s] {'loss': 1.3122, 'learning_rate': 7.375261324041811e-07, 'epoch': 186.98} + 29%|██▉ | 16940/57600 [2:57:56<7:22:36, 1.53it/s] 29%|██▉ | 16941/57600 [2:57:56<7:23:07, 1.53it/s] 29%|██▉ | 16942/57600 [2:57:57<7:16:03, 1.55it/s] 29%|██▉ | 16943/57600 [2:57:58<7:24:58, 1.52it/s] 29%|██▉ | 16944/57600 [2:57:58<7:16:08, 1.55it/s] 29%|██▉ | 16945/57600 [2:57:59<7:13:25, 1.56it/s] 29%|██▉ | 16946/57600 [2:58:00<7:05:49, 1.59it/s] 29%|██▉ | 16947/57600 [2:58:00<7:21:39, 1.53it/s] 29%|██▉ | 16948/57600 [2:58:01<7:08:53, 1.58it/s] 29%|██▉ | 16949/57600 [2:58:01<7:06:33, 1.59it/s] 29%|██▉ | 16950/57600 [2:58:02<6:58:01, 1.62it/s] 29%|██▉ | 16951/57600 [2:58:03<6:44:24, 1.68it/s] 29%|██▉ | 16952/57600 [2:58:03<6:40:03, 1.69it/s] 29%|██▉ | 16953/57600 [2:58:04<6:52:32, 1.64it/s] 29%|██▉ | 16954/57600 [2:58:04<6:52:16, 1.64it/s] 29%|██▉ | 16955/57600 [2:58:05<6:45:00, 1.67it/s] 29%|██▉ | 16956/57600 [2:58:06<6:58:09, 1.62it/s] 29%|██▉ | 16957/57600 [2:58:06<7:06:38, 1.59it/s] 29%|██▉ | 16958/57600 [2:58:07<7:05:24, 1.59it/s] 29%|██▉ | 16959/57600 [2:58:08<7:00:19, 1.61it/s] 29%|██▉ | 16960/57600 [2:58:08<6:47:13, 1.66it/s] {'loss': 1.2669, 'learning_rate': 7.37212543554007e-07, 'epoch': 187.2} + 29%|██▉ | 16960/57600 [2:58:08<6:47:13, 1.66it/s] 29%|██▉ | 16961/57600 [2:58:09<6:38:43, 1.70it/s] 29%|██▉ | 16962/57600 [2:58:09<6:40:36, 1.69it/s] 29%|██▉ | 16963/57600 [2:58:10<6:43:04, 1.68it/s] 29%|██▉ | 16964/57600 [2:58:10<6:42:37, 1.68it/s] 29%|██▉ | 16965/57600 [2:58:11<6:41:10, 1.69it/s] 29%|██▉ | 16966/57600 [2:58:12<6:33:34, 1.72it/s] 29%|██▉ | 16967/57600 [2:58:12<6:44:19, 1.67it/s] 29%|██▉ | 16968/57600 [2:58:13<7:20:43, 1.54it/s] 29%|██▉ | 16969/57600 [2:58:14<7:33:17, 1.49it/s] 29%|██▉ | 16970/57600 [2:58:14<7:26:00, 1.52it/s] 29%|██▉ | 16971/57600 [2:58:15<7:11:39, 1.57it/s] 29%|██▉ | 16972/57600 [2:58:16<7:03:31, 1.60it/s] 29%|██▉ | 16973/57600 [2:58:16<7:09:03, 1.58it/s] 29%|██▉ | 16974/57600 [2:58:17<7:05:52, 1.59it/s] 29%|██▉ | 16975/57600 [2:58:17<6:58:42, 1.62it/s] 29%|██▉ | 16976/57600 [2:58:18<6:56:37, 1.63it/s] 29%|██▉ | 16977/57600 [2:58:19<6:54:29, 1.63it/s] 29%|██▉ | 16978/57600 [2:58:19<6:56:53, 1.62it/s] 29%|██▉ | 16979/57600 [2:58:20<6:54:20, 1.63it/s] 29%|██▉ | 16980/57600 [2:58:21<7:00:21, 1.61it/s] {'loss': 1.2789, 'learning_rate': 7.368989547038327e-07, 'epoch': 187.42} + 29%|██▉ | 16980/57600 [2:58:21<7:00:21, 1.61it/s] 29%|██▉ | 16981/57600 [2:58:21<7:03:46, 1.60it/s] 29%|██▉ | 16982/57600 [2:58:22<7:02:14, 1.60it/s] 29%|██▉ | 16983/57600 [2:58:22<7:05:15, 1.59it/s] 29%|██▉ | 16984/57600 [2:58:23<7:03:43, 1.60it/s] 29%|██▉ | 16985/57600 [2:58:24<6:55:37, 1.63it/s] 29%|██▉ | 16986/57600 [2:58:24<6:50:39, 1.65it/s] 29%|██▉ | 16987/57600 [2:58:25<6:52:02, 1.64it/s] 29%|██▉ | 16988/57600 [2:58:25<6:54:34, 1.63it/s] 29%|██▉ | 16989/57600 [2:58:26<6:58:07, 1.62it/s] 29%|██▉ | 16990/57600 [2:58:27<7:03:59, 1.60it/s] 29%|██▉ | 16991/57600 [2:58:27<6:57:03, 1.62it/s] 30%|██▉ | 16992/57600 [2:58:28<7:13:16, 1.56it/s] 30%|██▉ | 16993/57600 [2:58:29<7:00:51, 1.61it/s] 30%|██▉ | 16994/57600 [2:58:29<6:56:24, 1.63it/s] 30%|██▉ | 16995/57600 [2:58:30<7:14:27, 1.56it/s] 30%|██▉ | 16996/57600 [2:58:30<7:04:09, 1.60it/s] 30%|██▉ | 16997/57600 [2:58:31<7:23:31, 1.53it/s] 30%|██▉ | 16998/57600 [2:58:32<7:21:05, 1.53it/s] 30%|██▉ | 16999/57600 [2:58:32<7:21:02, 1.53it/s] 30%|██▉ | 17000/57600 [2:58:33<7:17:42, 1.55it/s] {'loss': 1.2457, 'learning_rate': 7.365853658536585e-07, 'epoch': 187.64} + 30%|██▉ | 17000/57600 [2:58:33<7:17:42, 1.55it/s] 30%|██▉ | 17001/57600 [2:58:34<7:03:58, 1.60it/s] 30%|██▉ | 17002/57600 [2:58:34<6:57:14, 1.62it/s] 30%|██▉ | 17003/57600 [2:58:35<7:06:29, 1.59it/s] 30%|██▉ | 17004/57600 [2:58:36<7:11:20, 1.57it/s] 30%|██▉ | 17005/57600 [2:58:36<7:15:18, 1.55it/s] 30%|██▉ | 17006/57600 [2:58:37<7:16:57, 1.55it/s] 30%|██▉ | 17007/57600 [2:58:38<7:06:14, 1.59it/s] 30%|██▉ | 17008/57600 [2:58:38<6:54:15, 1.63it/s] 30%|██▉ | 17009/57600 [2:58:39<7:02:13, 1.60it/s] 30%|██▉ | 17010/57600 [2:58:39<7:01:32, 1.60it/s] 30%|██▉ | 17011/57600 [2:58:40<6:51:58, 1.64it/s] 30%|██▉ | 17012/57600 [2:58:41<6:47:09, 1.66it/s] 30%|██▉ | 17013/57600 [2:58:41<6:46:40, 1.66it/s] 30%|██▉ | 17014/57600 [2:58:42<7:09:56, 1.57it/s] 30%|██▉ | 17015/57600 [2:58:42<6:58:13, 1.62it/s] 30%|██▉ | 17016/57600 [2:58:43<6:55:13, 1.63it/s] 30%|██▉ | 17017/57600 [2:58:44<6:50:14, 1.65it/s] 30%|██▉ | 17018/57600 [2:58:44<6:55:03, 1.63it/s] 30%|██▉ | 17019/57600 [2:58:45<7:01:38, 1.60it/s] 30%|██▉ | 17020/57600 [2:58:46<7:09:51, 1.57it/s] {'loss': 1.3173, 'learning_rate': 7.362717770034843e-07, 'epoch': 187.86} + 30%|██▉ | 17020/57600 [2:58:46<7:09:51, 1.57it/s] 30%|██▉ | 17021/57600 [2:58:46<7:18:38, 1.54it/s] 30%|██▉ | 17022/57600 [2:58:47<7:31:54, 1.50it/s] 30%|██▉ | 17023/57600 [2:58:48<7:34:35, 1.49it/s] 30%|██▉ | 17024/57600 [2:58:48<7:29:45, 1.50it/s] 30%|██▉ | 17025/57600 [2:58:49<7:17:43, 1.54it/s] 30%|██▉ | 17026/57600 [2:58:49<7:08:27, 1.58it/s] 30%|██▉ | 17027/57600 [2:58:50<7:07:12, 1.58it/s] 30%|██▉ | 17028/57600 [2:58:51<7:08:11, 1.58it/s] 30%|██▉ | 17029/57600 [2:58:51<6:54:41, 1.63it/s] 30%|██▉ | 17030/57600 [2:58:52<6:49:25, 1.65it/s] 30%|██▉ | 17031/57600 [2:58:53<6:50:16, 1.65it/s] 30%|██▉ | 17032/57600 [2:58:53<6:44:03, 1.67it/s] 30%|██▉ | 17033/57600 [2:58:54<7:03:03, 1.60it/s] 30%|██▉ | 17034/57600 [2:58:54<7:03:28, 1.60it/s] 30%|██▉ | 17035/57600 [2:58:55<6:59:04, 1.61it/s] 30%|██▉ | 17036/57600 [2:58:56<6:52:30, 1.64it/s] 30%|██▉ | 17037/57600 [2:58:56<7:04:06, 1.59it/s] 30%|██�� | 17038/57600 [2:58:57<6:58:58, 1.61it/s] 30%|██▉ | 17039/57600 [2:58:57<6:58:02, 1.62it/s] 30%|██▉ | 17040/57600 [2:58:58<6:46:57, 1.66it/s] {'loss': 1.2793, 'learning_rate': 7.359581881533101e-07, 'epoch': 188.08} + 30%|██▉ | 17040/57600 [2:58:58<6:46:57, 1.66it/s] 30%|██▉ | 17041/57600 [2:58:59<6:47:04, 1.66it/s] 30%|██▉ | 17042/57600 [2:58:59<6:59:02, 1.61it/s] 30%|██▉ | 17043/57600 [2:59:00<7:05:52, 1.59it/s] 30%|██▉ | 17044/57600 [2:59:01<6:50:45, 1.65it/s] 30%|██▉ | 17045/57600 [2:59:01<6:44:58, 1.67it/s] 30%|██▉ | 17046/57600 [2:59:02<6:41:03, 1.69it/s] 30%|██▉ | 17047/57600 [2:59:02<6:58:02, 1.62it/s] 30%|██▉ | 17048/57600 [2:59:03<7:02:21, 1.60it/s] 30%|██▉ | 17049/57600 [2:59:04<6:48:10, 1.66it/s] 30%|██▉ | 17050/57600 [2:59:04<6:56:34, 1.62it/s] 30%|██▉ | 17051/57600 [2:59:05<7:05:31, 1.59it/s] 30%|██▉ | 17052/57600 [2:59:05<7:01:29, 1.60it/s] 30%|██▉ | 17053/57600 [2:59:06<6:54:16, 1.63it/s] 30%|██▉ | 17054/57600 [2:59:07<6:50:46, 1.65it/s] 30%|██▉ | 17055/57600 [2:59:07<6:51:29, 1.64it/s] 30%|██▉ | 17056/57600 [2:59:08<7:00:10, 1.61it/s] 30%|██▉ | 17057/57600 [2:59:09<7:12:30, 1.56it/s] 30%|██▉ | 17058/57600 [2:59:09<7:03:42, 1.59it/s] 30%|██▉ | 17059/57600 [2:59:10<6:58:10, 1.62it/s] 30%|██▉ | 17060/57600 [2:59:10<7:06:38, 1.58it/s] {'loss': 1.2538, 'learning_rate': 7.356445993031358e-07, 'epoch': 188.3} + 30%|██▉ | 17060/57600 [2:59:10<7:06:38, 1.58it/s] 30%|██▉ | 17061/57600 [2:59:11<7:02:09, 1.60it/s] 30%|██▉ | 17062/57600 [2:59:12<7:02:24, 1.60it/s] 30%|██▉ | 17063/57600 [2:59:12<7:24:32, 1.52it/s] 30%|██▉ | 17064/57600 [2:59:13<7:30:15, 1.50it/s] 30%|██▉ | 17065/57600 [2:59:14<7:18:38, 1.54it/s] 30%|██▉ | 17066/57600 [2:59:14<7:19:36, 1.54it/s] 30%|██▉ | 17067/57600 [2:59:15<7:07:00, 1.58it/s] 30%|██▉ | 17068/57600 [2:59:16<7:24:54, 1.52it/s] 30%|██▉ | 17069/57600 [2:59:16<7:18:58, 1.54it/s] 30%|██▉ | 17070/57600 [2:59:17<7:04:25, 1.59it/s] 30%|██▉ | 17071/57600 [2:59:18<7:05:06, 1.59it/s] 30%|██▉ | 17072/57600 [2:59:18<7:08:04, 1.58it/s] 30%|██▉ | 17073/57600 [2:59:19<6:49:04, 1.65it/s] 30%|██▉ | 17074/57600 [2:59:19<6:50:15, 1.65it/s] 30%|██▉ | 17075/57600 [2:59:20<6:46:56, 1.66it/s] 30%|██▉ | 17076/57600 [2:59:21<6:45:38, 1.67it/s] 30%|██▉ | 17077/57600 [2:59:21<6:52:14, 1.64it/s] 30%|██▉ | 17078/57600 [2:59:22<7:12:32, 1.56it/s] 30%|██▉ | 17079/57600 [2:59:22<7:11:52, 1.56it/s] 30%|██▉ | 17080/57600 [2:59:23<7:07:34, 1.58it/s] {'loss': 1.2564, 'learning_rate': 7.353310104529616e-07, 'epoch': 188.52} + 30%|██▉ | 17080/57600 [2:59:23<7:07:34, 1.58it/s] 30%|██▉ | 17081/57600 [2:59:24<7:03:33, 1.59it/s] 30%|██▉ | 17082/57600 [2:59:24<6:59:32, 1.61it/s] 30%|██▉ | 17083/57600 [2:59:25<7:04:07, 1.59it/s] 30%|██▉ | 17084/57600 [2:59:26<7:07:47, 1.58it/s] 30%|██▉ | 17085/57600 [2:59:26<7:00:58, 1.60it/s] 30%|██▉ | 17086/57600 [2:59:27<6:47:50, 1.66it/s] 30%|██▉ | 17087/57600 [2:59:27<7:01:14, 1.60it/s] 30%|██▉ | 17088/57600 [2:59:28<7:01:51, 1.60it/s] 30%|██▉ | 17089/57600 [2:59:29<6:52:41, 1.64it/s] 30%|██▉ | 17090/57600 [2:59:29<6:48:09, 1.65it/s] 30%|██▉ | 17091/57600 [2:59:30<6:46:09, 1.66it/s] 30%|██▉ | 17092/57600 [2:59:30<6:39:28, 1.69it/s] 30%|██▉ | 17093/57600 [2:59:31<6:58:18, 1.61it/s] 30%|██▉ | 17094/57600 [2:59:32<7:11:39, 1.56it/s] 30%|██▉ | 17095/57600 [2:59:32<6:58:28, 1.61it/s] 30%|██▉ | 17096/57600 [2:59:33<7:05:38, 1.59it/s] 30%|██▉ | 17097/57600 [2:59:34<6:59:56, 1.61it/s] 30%|██▉ | 17098/57600 [2:59:34<7:16:53, 1.55it/s] 30%|██▉ | 17099/57600 [2:59:35<7:00:20, 1.61it/s] 30%|██▉ | 17100/57600 [2:59:35<6:59:13, 1.61it/s] {'loss': 1.2794, 'learning_rate': 7.350174216027874e-07, 'epoch': 188.74} + 30%|██▉ | 17100/57600 [2:59:35<6:59:13, 1.61it/s] 30%|██▉ | 17101/57600 [2:59:36<7:11:48, 1.56it/s] 30%|██▉ | 17102/57600 [2:59:37<7:00:11, 1.61it/s] 30%|██▉ | 17103/57600 [2:59:37<6:48:56, 1.65it/s] 30%|██▉ | 17104/57600 [2:59:38<6:45:56, 1.66it/s] 30%|██▉ | 17105/57600 [2:59:39<7:00:08, 1.61it/s] 30%|██▉ | 17106/57600 [2:59:39<6:58:38, 1.61it/s] 30%|██▉ | 17107/57600 [2:59:40<7:11:09, 1.57it/s] 30%|██▉ | 17108/57600 [2:59:41<7:28:30, 1.50it/s] 30%|██▉ | 17109/57600 [2:59:41<7:28:32, 1.50it/s] 30%|██▉ | 17110/57600 [2:59:42<7:25:34, 1.51it/s] 30%|██▉ | 17111/57600 [2:59:43<7:11:48, 1.56it/s] 30%|██▉ | 17112/57600 [2:59:43<7:04:16, 1.59it/s] 30%|██▉ | 17113/57600 [2:59:44<6:55:19, 1.62it/s] 30%|██▉ | 17114/57600 [2:59:44<6:49:57, 1.65it/s] 30%|██▉ | 17115/57600 [2:59:45<6:54:51, 1.63it/s] 30%|██▉ | 17116/57600 [2:59:46<6:49:34, 1.65it/s] 30%|██▉ | 17117/57600 [2:59:46<6:44:39, 1.67it/s] 30%|██▉ | 17118/57600 [2:59:47<7:05:02, 1.59it/s] 30%|██▉ | 17119/57600 [2:59:47<7:09:51, 1.57it/s] 30%|██▉ | 17120/57600 [2:59:48<7:15:43, 1.55it/s] {'loss': 1.2664, 'learning_rate': 7.347038327526132e-07, 'epoch': 188.96} + 30%|██▉ | 17120/57600 [2:59:48<7:15:43, 1.55it/s] 30%|██▉ | 17121/57600 [2:59:49<7:14:04, 1.55it/s] 30%|██▉ | 17122/57600 [2:59:49<6:59:21, 1.61it/s] 30%|██▉ | 17123/57600 [2:59:50<7:00:32, 1.60it/s] 30%|██▉ | 17124/57600 [2:59:51<7:15:05, 1.55it/s] 30%|██▉ | 17125/57600 [2:59:51<7:10:57, 1.57it/s] 30%|██▉ | 17126/57600 [2:59:52<7:12:06, 1.56it/s] 30%|██▉ | 17127/57600 [2:59:52<6:54:50, 1.63it/s] 30%|██▉ | 17128/57600 [2:59:53<7:06:58, 1.58it/s] 30%|██▉ | 17129/57600 [2:59:54<7:06:32, 1.58it/s] 30%|██▉ | 17130/57600 [2:59:54<7:13:57, 1.55it/s] 30%|██▉ | 17131/57600 [2:59:55<7:06:35, 1.58it/s] 30%|██▉ | 17132/57600 [2:59:56<7:05:04, 1.59it/s] 30%|██▉ | 17133/57600 [2:59:56<7:08:05, 1.58it/s] 30%|██▉ | 17134/57600 [2:59:57<7:02:24, 1.60it/s] 30%|██▉ | 17135/57600 [2:59:58<6:57:04, 1.62it/s] 30%|██▉ | 17136/57600 [2:59:58<7:02:25, 1.60it/s] 30%|██▉ | 17137/57600 [2:59:59<7:05:09, 1.59it/s] 30%|██▉ | 17138/57600 [3:00:00<7:17:50, 1.54it/s] 30%|██▉ | 17139/57600 [3:00:00<7:24:22, 1.52it/s] 30%|██▉ | 17140/57600 [3:00:01<7:23:22, 1.52it/s] {'loss': 1.2489, 'learning_rate': 7.343902439024389e-07, 'epoch': 189.18} + 30%|██▉ | 17140/57600 [3:00:01<7:23:22, 1.52it/s] 30%|██▉ | 17141/57600 [3:00:01<6:59:01, 1.61it/s] 30%|██▉ | 17142/57600 [3:00:02<6:57:26, 1.62it/s] 30%|██▉ | 17143/57600 [3:00:03<7:17:07, 1.54it/s] 30%|██▉ | 17144/57600 [3:00:03<7:11:03, 1.56it/s] 30%|██▉ | 17145/57600 [3:00:04<7:11:48, 1.56it/s] 30%|██▉ | 17146/57600 [3:00:05<7:01:40, 1.60it/s] 30%|██▉ | 17147/57600 [3:00:05<6:56:50, 1.62it/s] 30%|██▉ | 17148/57600 [3:00:06<7:06:24, 1.58it/s] 30%|██▉ | 17149/57600 [3:00:06<7:02:24, 1.60it/s] 30%|██▉ | 17150/57600 [3:00:07<6:49:29, 1.65it/s] 30%|██▉ | 17151/57600 [3:00:08<6:56:32, 1.62it/s] 30%|██▉ | 17152/57600 [3:00:08<7:11:58, 1.56it/s] 30%|██▉ | 17153/57600 [3:00:09<6:59:15, 1.61it/s] 30%|██▉ | 17154/57600 [3:00:10<7:11:23, 1.56it/s] 30%|██▉ | 17155/57600 [3:00:10<7:04:31, 1.59it/s] 30%|██▉ | 17156/57600 [3:00:11<6:55:12, 1.62it/s] 30%|██▉ | 17157/57600 [3:00:11<6:58:01, 1.61it/s] 30%|██▉ | 17158/57600 [3:00:12<6:46:57, 1.66it/s] 30%|██▉ | 17159/57600 [3:00:13<6:39:06, 1.69it/s] 30%|██▉ | 17160/57600 [3:00:13<6:39:29, 1.69it/s] {'loss': 1.29, 'learning_rate': 7.340766550522648e-07, 'epoch': 189.4} + 30%|██▉ | 17160/57600 [3:00:13<6:39:29, 1.69it/s] 30%|██▉ | 17161/57600 [3:00:14<6:46:20, 1.66it/s] 30%|██▉ | 17162/57600 [3:00:14<6:41:21, 1.68it/s] 30%|██▉ | 17163/57600 [3:00:15<6:44:44, 1.67it/s] 30%|██▉ | 17164/57600 [3:00:16<6:53:59, 1.63it/s] 30%|██▉ | 17165/57600 [3:00:16<6:59:38, 1.61it/s] 30%|██▉ | 17166/57600 [3:00:17<6:58:42, 1.61it/s] 30%|██▉ | 17167/57600 [3:00:18<7:12:04, 1.56it/s] 30%|██▉ | 17168/57600 [3:00:18<7:09:33, 1.57it/s] 30%|██▉ | 17169/57600 [3:00:19<7:16:42, 1.54it/s] 30%|██▉ | 17170/57600 [3:00:20<7:18:44, 1.54it/s] 30%|██▉ | 17171/57600 [3:00:20<7:12:52, 1.56it/s] 30%|██▉ | 17172/57600 [3:00:21<7:04:36, 1.59it/s] 30%|██▉ | 17173/57600 [3:00:21<6:57:47, 1.61it/s] 30%|██▉ | 17174/57600 [3:00:22<6:54:57, 1.62it/s] 30%|██▉ | 17175/57600 [3:00:23<7:08:59, 1.57it/s] 30%|██▉ | 17176/57600 [3:00:23<6:58:40, 1.61it/s] 30%|██▉ | 17177/57600 [3:00:24<7:02:26, 1.59it/s] 30%|██▉ | 17178/57600 [3:00:25<7:10:33, 1.56it/s] 30%|██▉ | 17179/57600 [3:00:25<7:05:36, 1.58it/s] 30%|██▉ | 17180/57600 [3:00:26<6:54:31, 1.63it/s] {'loss': 1.2308, 'learning_rate': 7.337630662020907e-07, 'epoch': 189.62} + 30%|██▉ | 17180/57600 [3:00:26<6:54:31, 1.63it/s] 30%|██▉ | 17181/57600 [3:00:26<6:59:39, 1.61it/s] 30%|██▉ | 17182/57600 [3:00:27<6:49:49, 1.64it/s] 30%|██▉ | 17183/57600 [3:00:28<6:48:41, 1.65it/s] 30%|██▉ | 17184/57600 [3:00:28<6:45:12, 1.66it/s] 30%|██▉ | 17185/57600 [3:00:29<6:52:38, 1.63it/s] 30%|██▉ | 17186/57600 [3:00:29<6:59:45, 1.60it/s] 30%|██▉ | 17187/57600 [3:00:30<6:54:08, 1.63it/s] 30%|██▉ | 17188/57600 [3:00:31<6:47:59, 1.65it/s] 30%|██▉ | 17189/57600 [3:00:31<6:41:20, 1.68it/s] 30%|██▉ | 17190/57600 [3:00:32<6:37:58, 1.69it/s] 30%|██▉ | 17191/57600 [3:00:32<6:37:55, 1.69it/s] 30%|██▉ | 17192/57600 [3:00:33<6:44:35, 1.66it/s] 30%|██▉ | 17193/57600 [3:00:34<6:49:25, 1.64it/s] 30%|██▉ | 17194/57600 [3:00:34<7:04:17, 1.59it/s] 30%|██▉ | 17195/57600 [3:00:35<7:03:03, 1.59it/s] 30%|██▉ | 17196/57600 [3:00:36<7:08:27, 1.57it/s] 30%|██▉ | 17197/57600 [3:00:36<6:55:29, 1.62it/s] 30%|██▉ | 17198/57600 [3:00:37<6:53:40, 1.63it/s] 30%|██▉ | 17199/57600 [3:00:37<6:49:33, 1.64it/s] 30%|██▉ | 17200/57600 [3:00:38<7:04:35, 1.59it/s] {'loss': 1.2809, 'learning_rate': 7.334494773519163e-07, 'epoch': 189.85} + 30%|██▉ | 17200/57600 [3:00:38<7:04:35, 1.59it/s] 30%|██▉ | 17201/57600 [3:00:39<6:56:31, 1.62it/s] 30%|██▉ | 17202/57600 [3:00:39<6:46:43, 1.66it/s] 30%|██▉ | 17203/57600 [3:00:40<6:45:51, 1.66it/s] 30%|██▉ | 17204/57600 [3:00:40<6:41:56, 1.68it/s] 30%|██▉ | 17205/57600 [3:00:41<6:45:36, 1.66it/s] 30%|██▉ | 17206/57600 [3:00:42<6:49:23, 1.64it/s] 30%|██▉ | 17207/57600 [3:00:42<6:43:40, 1.67it/s] 30%|██▉ | 17208/57600 [3:00:43<6:48:17, 1.65it/s] 30%|██▉ | 17209/57600 [3:00:43<6:58:08, 1.61it/s] 30%|██▉ | 17210/57600 [3:00:44<6:54:17, 1.62it/s] 30%|██▉ | 17211/57600 [3:00:45<6:58:40, 1.61it/s] 30%|██▉ | 17212/57600 [3:00:45<6:59:59, 1.60it/s] 30%|██▉ | 17213/57600 [3:00:46<7:00:36, 1.60it/s] 30%|██▉ | 17214/57600 [3:00:47<7:20:05, 1.53it/s] 30%|██▉ | 17215/57600 [3:00:47<7:24:20, 1.51it/s] 30%|██▉ | 17216/57600 [3:00:48<7:18:36, 1.53it/s] 30%|██▉ | 17217/57600 [3:00:49<7:06:51, 1.58it/s] 30%|██▉ | 17218/57600 [3:00:49<7:13:50, 1.55it/s] 30%|██▉ | 17219/57600 [3:00:50<7:08:31, 1.57it/s] 30%|██▉ | 17220/57600 [3:00:50<7:09:57, 1.57it/s] {'loss': 1.2614, 'learning_rate': 7.331358885017421e-07, 'epoch': 190.07} + 30%|██▉ | 17220/57600 [3:00:50<7:09:57, 1.57it/s] 30%|██▉ | 17221/57600 [3:00:51<7:16:49, 1.54it/s] 30%|██▉ | 17222/57600 [3:00:52<7:05:56, 1.58it/s] 30%|██▉ | 17223/57600 [3:00:52<7:16:51, 1.54it/s] 30%|██▉ | 17224/57600 [3:00:53<7:13:25, 1.55it/s] 30%|██▉ | 17225/57600 [3:00:54<7:09:13, 1.57it/s] 30%|██▉ | 17226/57600 [3:00:54<7:06:13, 1.58it/s] 30%|██▉ | 17227/57600 [3:00:55<7:00:04, 1.60it/s] 30%|██▉ | 17228/57600 [3:00:56<6:54:58, 1.62it/s] 30%|██▉ | 17229/57600 [3:00:56<7:14:49, 1.55it/s] 30%|██▉ | 17230/57600 [3:00:57<7:17:36, 1.54it/s] 30%|██▉ | 17231/57600 [3:00:57<6:54:46, 1.62it/s] 30%|██▉ | 17232/57600 [3:00:58<7:00:59, 1.60it/s] 30%|██▉ | 17233/57600 [3:00:59<7:09:40, 1.57it/s] 30%|██▉ | 17234/57600 [3:00:59<6:55:36, 1.62it/s] 30%|██▉ | 17235/57600 [3:01:00<6:50:56, 1.64it/s] 30%|██▉ | 17236/57600 [3:01:00<6:43:49, 1.67it/s] 30%|██▉ | 17237/57600 [3:01:01<6:53:08, 1.63it/s] 30%|██▉ | 17238/57600 [3:01:02<6:44:13, 1.66it/s] 30%|██▉ | 17239/57600 [3:01:02<6:35:32, 1.70it/s] 30%|██▉ | 17240/57600 [3:01:03<6:27:58, 1.73it/s] {'loss': 1.3223, 'learning_rate': 7.328222996515679e-07, 'epoch': 190.29} + 30%|██▉ | 17240/57600 [3:01:03<6:27:58, 1.73it/s] 30%|██▉ | 17241/57600 [3:01:03<6:27:29, 1.74it/s] 30%|██▉ | 17242/57600 [3:01:04<6:27:16, 1.74it/s] 30%|██▉ | 17243/57600 [3:01:05<6:21:15, 1.76it/s] 30%|██▉ | 17244/57600 [3:01:05<6:32:37, 1.71it/s] 30%|██▉ | 17245/57600 [3:01:06<6:41:08, 1.68it/s] 30%|██▉ | 17246/57600 [3:01:06<6:50:59, 1.64it/s] 30%|██▉ | 17247/57600 [3:01:07<6:52:07, 1.63it/s] 30%|██▉ | 17248/57600 [3:01:08<6:58:47, 1.61it/s] 30%|██▉ | 17249/57600 [3:01:08<7:02:53, 1.59it/s] 30%|██▉ | 17250/57600 [3:01:09<7:12:11, 1.56it/s] 30%|██▉ | 17251/57600 [3:01:10<7:04:38, 1.58it/s] 30%|██▉ | 17252/57600 [3:01:10<7:04:08, 1.59it/s] 30%|██▉ | 17253/57600 [3:01:11<6:53:23, 1.63it/s] 30%|██▉ | 17254/57600 [3:01:11<6:48:20, 1.65it/s] 30%|██▉ | 17255/57600 [3:01:12<6:36:14, 1.70it/s] 30%|██▉ | 17256/57600 [3:01:13<6:46:23, 1.65it/s] 30%|██▉ | 17257/57600 [3:01:13<6:45:46, 1.66it/s] 30%|██▉ | 17258/57600 [3:01:14<6:55:57, 1.62it/s] 30%|██▉ | 17259/57600 [3:01:14<6:47:33, 1.65it/s] 30%|██▉ | 17260/57600 [3:01:15<7:01:39, 1.59it/s] {'loss': 1.3043, 'learning_rate': 7.325087108013937e-07, 'epoch': 190.51} + 30%|██▉ | 17260/57600 [3:01:15<7:01:39, 1.59it/s] 30%|██▉ | 17261/57600 [3:01:16<7:11:26, 1.56it/s] 30%|██▉ | 17262/57600 [3:01:16<7:04:09, 1.59it/s] 30%|██▉ | 17263/57600 [3:01:17<7:02:16, 1.59it/s] 30%|██▉ | 17264/57600 [3:01:18<7:16:56, 1.54it/s] 30%|██▉ | 17265/57600 [3:01:18<7:19:35, 1.53it/s] 30%|██▉ | 17266/57600 [3:01:19<7:03:40, 1.59it/s] 30%|██▉ | 17267/57600 [3:01:19<6:51:55, 1.63it/s] 30%|██▉ | 17268/57600 [3:01:20<6:39:09, 1.68it/s] 30%|██▉ | 17269/57600 [3:01:21<6:38:48, 1.69it/s] 30%|██▉ | 17270/57600 [3:01:21<6:39:55, 1.68it/s] 30%|██▉ | 17271/57600 [3:01:22<6:44:29, 1.66it/s] 30%|██▉ | 17272/57600 [3:01:23<6:57:20, 1.61it/s] 30%|██▉ | 17273/57600 [3:01:23<7:07:18, 1.57it/s] 30%|██▉ | 17274/57600 [3:01:24<6:50:48, 1.64it/s] 30%|██▉ | 17275/57600 [3:01:24<6:58:53, 1.60it/s] 30%|██▉ | 17276/57600 [3:01:25<6:59:47, 1.60it/s] 30%|██▉ | 17277/57600 [3:01:26<7:04:32, 1.58it/s] 30%|██▉ | 17278/57600 [3:01:26<7:08:06, 1.57it/s] 30%|██▉ | 17279/57600 [3:01:27<7:02:45, 1.59it/s] 30%|███ | 17280/57600 [3:01:28<7:10:29, 1.56it/s] {'loss': 1.2612, 'learning_rate': 7.321951219512194e-07, 'epoch': 190.73} + 30%|███ | 17280/57600 [3:01:28<7:10:29, 1.56it/s] 30%|███ | 17281/57600 [3:01:28<7:12:54, 1.55it/s] 30%|███ | 17282/57600 [3:01:29<7:17:24, 1.54it/s] 30%|███ | 17283/57600 [3:01:30<7:22:54, 1.52it/s] 30%|███ | 17284/57600 [3:01:30<7:30:44, 1.49it/s] 30%|███ | 17285/57600 [3:01:31<7:23:36, 1.51it/s] 30%|███ | 17286/57600 [3:01:32<7:08:47, 1.57it/s] 30%|███ | 17287/57600 [3:01:32<6:57:03, 1.61it/s] 30%|███ | 17288/57600 [3:01:33<7:01:53, 1.59it/s] 30%|███ | 17289/57600 [3:01:33<7:10:53, 1.56it/s] 30%|███ | 17290/57600 [3:01:34<7:09:18, 1.56it/s] 30%|███ | 17291/57600 [3:01:35<6:54:55, 1.62it/s] 30%|███ | 17292/57600 [3:01:35<7:02:01, 1.59it/s] 30%|███ | 17293/57600 [3:01:36<7:03:15, 1.59it/s] 30%|███ | 17294/57600 [3:01:37<7:12:28, 1.55it/s] 30%|███ | 17295/57600 [3:01:37<7:05:11, 1.58it/s] 30%|███ | 17296/57600 [3:01:38<7:13:30, 1.55it/s] 30%|███ | 17297/57600 [3:01:38<7:11:53, 1.56it/s] 30%|███ | 17298/57600 [3:01:39<6:59:27, 1.60it/s] 30%|███ | 17299/57600 [3:01:40<6:55:41, 1.62it/s] 30%|███ | 17300/57600 [3:01:40<6:55:20, 1.62it/s] {'loss': 1.2209, 'learning_rate': 7.318815331010453e-07, 'epoch': 190.95} + 30%|███ | 17300/57600 [3:01:40<6:55:20, 1.62it/s] 30%|███ | 17301/57600 [3:01:41<6:51:31, 1.63it/s] 30%|███ | 17302/57600 [3:01:41<6:39:02, 1.68it/s] 30%|███ | 17303/57600 [3:01:42<6:41:23, 1.67it/s] 30%|███ | 17304/57600 [3:01:43<7:00:38, 1.60it/s] 30%|███ | 17305/57600 [3:01:43<7:15:45, 1.54it/s] 30%|███ | 17306/57600 [3:01:44<7:03:01, 1.59it/s] 30%|███ | 17307/57600 [3:01:45<7:01:50, 1.59it/s] 30%|███ | 17308/57600 [3:01:45<7:14:45, 1.54it/s] 30%|███ | 17309/57600 [3:01:46<7:06:42, 1.57it/s] 30%|███ | 17310/57600 [3:01:47<7:10:51, 1.56it/s] 30%|███ | 17311/57600 [3:01:47<7:19:38, 1.53it/s] 30%|███ | 17312/57600 [3:01:48<7:23:56, 1.51it/s] 30%|███ | 17313/57600 [3:01:49<7:20:38, 1.52it/s] 30%|███ | 17314/57600 [3:01:49<7:07:08, 1.57it/s] 30%|███ | 17315/57600 [3:01:50<6:57:34, 1.61it/s] 30%|███ | 17316/57600 [3:01:50<6:58:54, 1.60it/s] 30%|███ | 17317/57600 [3:01:51<6:57:46, 1.61it/s] 30%|███ | 17318/57600 [3:01:52<7:06:40, 1.57it/s] 30%|███ | 17319/57600 [3:01:52<7:15:23, 1.54it/s] 30%|███ | 17320/57600 [3:01:53<7:24:23, 1.51it/s] {'loss': 1.2478, 'learning_rate': 7.315679442508711e-07, 'epoch': 191.17} + 30%|███ | 17320/57600 [3:01:53<7:24:23, 1.51it/s] 30%|███ | 17321/57600 [3:01:54<7:07:08, 1.57it/s] 30%|███ | 17322/57600 [3:01:54<7:04:58, 1.58it/s] 30%|███ | 17323/57600 [3:01:55<7:05:40, 1.58it/s] 30%|███ | 17324/57600 [3:01:56<6:54:56, 1.62it/s] 30%|███ | 17325/57600 [3:01:56<7:03:02, 1.59it/s] 30%|███ | 17326/57600 [3:01:57<6:50:36, 1.63it/s] 30%|███ | 17327/57600 [3:01:57<6:48:08, 1.64it/s] 30%|███ | 17328/57600 [3:01:58<7:00:20, 1.60it/s] 30%|███ | 17329/57600 [3:01:59<6:59:57, 1.60it/s] 30%|███ | 17330/57600 [3:01:59<7:06:22, 1.57it/s] 30%|███ | 17331/57600 [3:02:00<7:01:45, 1.59it/s] 30%|███ | 17332/57600 [3:02:01<7:00:25, 1.60it/s] 30%|███ | 17333/57600 [3:02:01<6:56:06, 1.61it/s] 30%|███ | 17334/57600 [3:02:02<6:45:38, 1.65it/s] 30%|███ | 17335/57600 [3:02:02<7:08:00, 1.57it/s] 30%|███ | 17336/57600 [3:02:03<7:03:47, 1.58it/s] 30%|███ | 17337/57600 [3:02:04<7:03:58, 1.58it/s] 30%|███ | 17338/57600 [3:02:04<7:01:46, 1.59it/s] 30%|███ | 17339/57600 [3:02:05<7:03:27, 1.58it/s] 30%|███ | 17340/57600 [3:02:06<7:04:26, 1.58it/s] {'loss': 1.3277, 'learning_rate': 7.312543554006968e-07, 'epoch': 191.39} + 30%|███ | 17340/57600 [3:02:06<7:04:26, 1.58it/s] 30%|███ | 17341/57600 [3:02:06<6:54:27, 1.62it/s] 30%|███ | 17342/57600 [3:02:07<6:59:34, 1.60it/s] 30%|███ | 17343/57600 [3:02:07<6:47:24, 1.65it/s] 30%|███ | 17344/57600 [3:02:08<6:43:30, 1.66it/s] 30%|███ | 17345/57600 [3:02:09<6:42:47, 1.67it/s] 30%|███ | 17346/57600 [3:02:09<6:47:49, 1.65it/s] 30%|███ | 17347/57600 [3:02:10<6:51:58, 1.63it/s] 30%|███ | 17348/57600 [3:02:10<7:07:19, 1.57it/s] 30%|███ | 17349/57600 [3:02:11<6:58:26, 1.60it/s] 30%|███ | 17350/57600 [3:02:12<6:51:56, 1.63it/s] 30%|███ | 17351/57600 [3:02:12<6:53:06, 1.62it/s] 30%|███ | 17352/57600 [3:02:13<6:50:13, 1.64it/s] 30%|███ | 17353/57600 [3:02:14<6:59:08, 1.60it/s] 30%|███ | 17354/57600 [3:02:14<6:53:02, 1.62it/s] 30%|███ | 17355/57600 [3:02:15<6:53:57, 1.62it/s] 30%|███ | 17356/57600 [3:02:15<6:50:52, 1.63it/s] 30%|███ | 17357/57600 [3:02:16<6:52:24, 1.63it/s] 30%|███ | 17358/57600 [3:02:17<6:49:36, 1.64it/s] 30%|███ | 17359/57600 [3:02:17<7:00:58, 1.59it/s] 30%|███ | 17360/57600 [3:02:18<7:06:48, 1.57it/s] {'loss': 1.2715, 'learning_rate': 7.309407665505225e-07, 'epoch': 191.61} + 30%|███ | 17360/57600 [3:02:18<7:06:48, 1.57it/s] 30%|███ | 17361/57600 [3:02:19<7:16:41, 1.54it/s] 30%|███ | 17362/57600 [3:02:19<6:59:30, 1.60it/s] 30%|███ | 17363/57600 [3:02:20<6:59:02, 1.60it/s] 30%|███ | 17364/57600 [3:02:20<7:05:39, 1.58it/s] 30%|███ | 17365/57600 [3:02:21<6:53:04, 1.62it/s] 30%|███ | 17366/57600 [3:02:22<7:07:40, 1.57it/s] 30%|███ | 17367/57600 [3:02:22<7:01:47, 1.59it/s] 30%|███ | 17368/57600 [3:02:23<6:47:21, 1.65it/s] 30%|███ | 17369/57600 [3:02:23<6:46:58, 1.65it/s] 30%|███ | 17370/57600 [3:02:24<6:50:02, 1.64it/s] 30%|███ | 17371/57600 [3:02:25<7:14:55, 1.54it/s] 30%|███ | 17372/57600 [3:02:25<7:09:35, 1.56it/s] 30%|███ | 17373/57600 [3:02:26<7:11:21, 1.55it/s] 30%|███ | 17374/57600 [3:02:27<7:03:49, 1.58it/s] 30%|███ | 17375/57600 [3:02:27<7:10:40, 1.56it/s] 30%|███ | 17376/57600 [3:02:28<7:28:57, 1.49it/s] 30%|███ | 17377/57600 [3:02:29<7:23:52, 1.51it/s] 30%|███ | 17378/57600 [3:02:29<7:13:32, 1.55it/s] 30%|███ | 17379/57600 [3:02:30<6:59:18, 1.60it/s] 30%|███ | 17380/57600 [3:02:31<7:07:00, 1.57it/s] {'loss': 1.2433, 'learning_rate': 7.306271777003485e-07, 'epoch': 191.83} + 30%|███ | 17380/57600 [3:02:31<7:07:00, 1.57it/s] 30%|███ | 17381/57600 [3:02:31<6:59:43, 1.60it/s] 30%|███ | 17382/57600 [3:02:32<6:49:28, 1.64it/s] 30%|███ | 17383/57600 [3:02:32<6:53:15, 1.62it/s] 30%|███ | 17384/57600 [3:02:33<6:51:55, 1.63it/s] 30%|███ | 17385/57600 [3:02:34<7:05:28, 1.58it/s] 30%|███ | 17386/57600 [3:02:34<6:52:58, 1.62it/s] 30%|███ | 17387/57600 [3:02:35<6:49:10, 1.64it/s] 30%|███ | 17388/57600 [3:02:35<6:45:34, 1.65it/s] 30%|███ | 17389/57600 [3:02:36<6:59:32, 1.60it/s] 30%|███ | 17390/57600 [3:02:37<6:52:35, 1.62it/s] 30%|███ | 17391/57600 [3:02:37<6:45:37, 1.65it/s] 30%|███ | 17392/57600 [3:02:38<6:42:41, 1.66it/s] 30%|███ | 17393/57600 [3:02:39<6:51:11, 1.63it/s] 30%|███ | 17394/57600 [3:02:39<6:58:51, 1.60it/s] 30%|███ | 17395/57600 [3:02:40<6:54:04, 1.62it/s] 30%|███ | 17396/57600 [3:02:41<7:23:08, 1.51it/s] 30%|███ | 17397/57600 [3:02:41<7:18:56, 1.53it/s] 30%|███ | 17398/57600 [3:02:42<7:13:46, 1.54it/s] 30%|███ | 17399/57600 [3:02:42<7:02:06, 1.59it/s] 30%|███ | 17400/57600 [3:02:43<6:59:54, 1.60it/s] {'loss': 1.2975, 'learning_rate': 7.303135888501742e-07, 'epoch': 192.05} + 30%|███ | 17400/57600 [3:02:43<6:59:54, 1.60it/s] 30%|███ | 17401/57600 [3:02:44<7:00:14, 1.59it/s] 30%|███ | 17402/57600 [3:02:44<6:52:38, 1.62it/s] 30%|███ | 17403/57600 [3:02:45<6:49:10, 1.64it/s] 30%|███ | 17404/57600 [3:02:45<6:41:54, 1.67it/s] 30%|███ | 17405/57600 [3:02:46<6:45:12, 1.65it/s] 30%|███ | 17406/57600 [3:02:47<7:10:07, 1.56it/s] 30%|███ | 17407/57600 [3:02:47<7:15:00, 1.54it/s] 30%|███ | 17408/57600 [3:02:48<7:18:08, 1.53it/s] 30%|███ | 17409/57600 [3:02:49<7:15:05, 1.54it/s] 30%|███ | 17410/57600 [3:02:49<7:07:59, 1.57it/s] 30%|███ | 17411/57600 [3:02:50<6:57:55, 1.60it/s] 30%|███ | 17412/57600 [3:02:51<6:54:34, 1.62it/s] 30%|███ | 17413/57600 [3:02:51<6:59:04, 1.60it/s] 30%|███ | 17414/57600 [3:02:52<6:56:14, 1.61it/s] 30%|███ | 17415/57600 [3:02:52<6:55:34, 1.61it/s] 30%|███ | 17416/57600 [3:02:53<7:06:29, 1.57it/s] 30%|███ | 17417/57600 [3:02:54<7:04:06, 1.58it/s] 30%|███ | 17418/57600 [3:02:54<7:16:15, 1.54it/s] 30%|███ | 17419/57600 [3:02:55<7:05:39, 1.57it/s] 30%|███ | 17420/57600 [3:02:56<6:59:31, 1.60it/s] {'loss': 1.2325, 'learning_rate': 7.299999999999998e-07, 'epoch': 192.27} + 30%|███ | 17420/57600 [3:02:56<6:59:31, 1.60it/s] 30%|███ | 17421/57600 [3:02:56<7:10:43, 1.55it/s] 30%|███ | 17422/57600 [3:02:57<7:01:54, 1.59it/s] 30%|███ | 17423/57600 [3:02:58<7:00:30, 1.59it/s] 30%|███ | 17424/57600 [3:02:58<6:49:44, 1.63it/s] 30%|███ | 17425/57600 [3:02:59<6:54:56, 1.61it/s] 30%|███ | 17426/57600 [3:02:59<6:59:04, 1.60it/s] 30%|███ | 17427/57600 [3:03:00<6:57:06, 1.61it/s] 30%|███ | 17428/57600 [3:03:01<6:56:50, 1.61it/s] 30%|███ | 17429/57600 [3:03:01<6:55:03, 1.61it/s] 30%|███ | 17430/57600 [3:03:02<6:56:35, 1.61it/s] 30%|███ | 17431/57600 [3:03:02<6:55:37, 1.61it/s] 30%|███ | 17432/57600 [3:03:03<6:47:50, 1.64it/s] 30%|███ | 17433/57600 [3:03:04<7:13:31, 1.54it/s] 30%|███ | 17434/57600 [3:03:04<6:57:15, 1.60it/s] 30%|███ | 17435/57600 [3:03:05<6:48:19, 1.64it/s] 30%|███ | 17436/57600 [3:03:06<6:47:46, 1.64it/s] 30%|███ | 17437/57600 [3:03:06<6:51:21, 1.63it/s] 30%|███ | 17438/57600 [3:03:07<6:59:41, 1.59it/s] 30%|███ | 17439/57600 [3:03:07<7:04:29, 1.58it/s] 30%|███ | 17440/57600 [3:03:08<6:59:51, 1.59it/s] {'loss': 1.2685, 'learning_rate': 7.296864111498258e-07, 'epoch': 192.49} + 30%|███ | 17440/57600 [3:03:08<6:59:51, 1.59it/s] 30%|███ | 17441/57600 [3:03:09<7:07:24, 1.57it/s] 30%|███ | 17442/57600 [3:03:09<7:01:10, 1.59it/s] 30%|███ | 17443/57600 [3:03:10<7:16:29, 1.53it/s] 30%|███ | 17444/57600 [3:03:11<7:11:13, 1.55it/s] 30%|███ | 17445/57600 [3:03:11<7:06:44, 1.57it/s] 30%|███ | 17446/57600 [3:03:12<7:00:42, 1.59it/s] 30%|███ | 17447/57600 [3:03:12<6:49:04, 1.64it/s] 30%|███ | 17448/57600 [3:03:13<6:53:46, 1.62it/s] 30%|███ | 17449/57600 [3:03:14<6:47:05, 1.64it/s] 30%|███ | 17450/57600 [3:03:14<6:44:14, 1.66it/s] 30%|███ | 17451/57600 [3:03:15<6:48:58, 1.64it/s] 30%|███ | 17452/57600 [3:03:16<7:05:26, 1.57it/s] 30%|███ | 17453/57600 [3:03:16<6:51:38, 1.63it/s] 30%|███ | 17454/57600 [3:03:17<6:48:40, 1.64it/s] 30%|███ | 17455/57600 [3:03:17<6:46:12, 1.65it/s] 30%|███ | 17456/57600 [3:03:18<6:49:29, 1.63it/s] 30%|███ | 17457/57600 [3:03:19<6:57:27, 1.60it/s] 30%|███ | 17458/57600 [3:03:19<7:01:38, 1.59it/s] 30%|███ | 17459/57600 [3:03:20<6:56:21, 1.61it/s] 30%|███ | 17460/57600 [3:03:21<6:56:38, 1.61it/s] {'loss': 1.285, 'learning_rate': 7.293728222996516e-07, 'epoch': 192.72} + 30%|███ | 17460/57600 [3:03:21<6:56:38, 1.61it/s] 30%|███ | 17461/57600 [3:03:21<7:10:26, 1.55it/s] 30%|███ | 17462/57600 [3:03:22<7:10:42, 1.55it/s] 30%|███ | 17463/57600 [3:03:23<7:09:32, 1.56it/s] 30%|███ | 17464/57600 [3:03:23<6:58:33, 1.60it/s] 30%|███ | 17465/57600 [3:03:24<6:49:29, 1.63it/s] 30%|███ | 17466/57600 [3:03:24<6:58:33, 1.60it/s] 30%|███ | 17467/57600 [3:03:25<6:53:24, 1.62it/s] 30%|███ | 17468/57600 [3:03:26<6:54:57, 1.61it/s] 30%|███ | 17469/57600 [3:03:26<7:02:52, 1.58it/s] 30%|███ | 17470/57600 [3:03:27<7:03:45, 1.58it/s] 30%|███ | 17471/57600 [3:03:27<6:56:16, 1.61it/s] 30%|███ | 17472/57600 [3:03:28<7:05:01, 1.57it/s] 30%|███ | 17473/57600 [3:03:29<7:04:13, 1.58it/s] 30%|███ | 17474/57600 [3:03:29<6:50:08, 1.63it/s] 30%|███ | 17475/57600 [3:03:30<6:59:44, 1.59it/s] 30%|███ | 17476/57600 [3:03:31<6:50:50, 1.63it/s] 30%|███ | 17477/57600 [3:03:31<6:55:26, 1.61it/s] 30%|███ | 17478/57600 [3:03:32<6:59:23, 1.59it/s] 30%|███ | 17479/57600 [3:03:32<7:03:34, 1.58it/s] 30%|███ | 17480/57600 [3:03:33<7:01:28, 1.59it/s] {'loss': 1.2741, 'learning_rate': 7.290592334494773e-07, 'epoch': 192.94} + 30%|███ | 17480/57600 [3:03:33<7:01:28, 1.59it/s] 30%|███ | 17481/57600 [3:03:34<6:58:36, 1.60it/s] 30%|███ | 17482/57600 [3:03:34<6:52:34, 1.62it/s] 30%|███ | 17483/57600 [3:03:35<6:51:03, 1.63it/s] 30%|███ | 17484/57600 [3:03:36<6:52:47, 1.62it/s] 30%|███ | 17485/57600 [3:03:36<6:46:06, 1.65it/s] 30%|███ | 17486/57600 [3:03:37<7:09:57, 1.55it/s] 30%|███ | 17487/57600 [3:03:38<7:12:32, 1.55it/s] 30%|███ | 17488/57600 [3:03:38<7:01:41, 1.59it/s] 30%|███ | 17489/57600 [3:03:39<7:11:21, 1.55it/s] 30%|███ | 17490/57600 [3:03:39<7:10:11, 1.55it/s] 30%|███ | 17491/57600 [3:03:40<7:05:35, 1.57it/s] 30%|███ | 17492/57600 [3:03:41<7:23:44, 1.51it/s] 30%|███ | 17493/57600 [3:03:41<7:11:31, 1.55it/s] 30%|███ | 17494/57600 [3:03:42<7:13:19, 1.54it/s] 30%|███ | 17495/57600 [3:03:43<7:09:58, 1.55it/s] 30%|███ | 17496/57600 [3:03:43<7:10:49, 1.55it/s] 30%|███ | 17497/57600 [3:03:44<7:01:24, 1.59it/s] 30%|███ | 17498/57600 [3:03:44<6:51:24, 1.62it/s] 30%|███ | 17499/57600 [3:03:45<6:56:48, 1.60it/s] 30%|███ | 17500/57600 [3:03:46<7:20:37, 1.52it/s] {'loss': 1.275, 'learning_rate': 7.28745644599303e-07, 'epoch': 193.16} + 30%|███ | 17500/57600 [3:03:46<7:20:37, 1.52it/s] 30%|███ | 17501/57600 [3:03:46<7:08:24, 1.56it/s] 30%|███ | 17502/57600 [3:03:47<7:09:53, 1.55it/s] 30%|███ | 17503/57600 [3:03:48<6:55:14, 1.61it/s] 30%|███ | 17504/57600 [3:03:48<7:01:29, 1.59it/s] 30%|███ | 17505/57600 [3:03:49<6:59:16, 1.59it/s] 30%|███ | 17506/57600 [3:03:50<6:50:38, 1.63it/s] 30%|███ | 17507/57600 [3:03:50<6:59:23, 1.59it/s] 30%|███ | 17508/57600 [3:03:51<7:02:34, 1.58it/s] 30%|███ | 17509/57600 [3:03:51<6:54:43, 1.61it/s] 30%|███ | 17510/57600 [3:03:52<6:52:39, 1.62it/s] 30%|███ | 17511/57600 [3:03:53<6:46:33, 1.64it/s] 30%|███ | 17512/57600 [3:03:53<7:04:06, 1.58it/s] 30%|███ | 17513/57600 [3:03:54<7:06:33, 1.57it/s] 30%|███ | 17514/57600 [3:03:55<7:00:49, 1.59it/s] 30%|███ | 17515/57600 [3:03:55<6:56:00, 1.61it/s] 30%|███ | 17516/57600 [3:03:56<7:06:23, 1.57it/s] 30%|███ | 17517/57600 [3:03:56<6:55:42, 1.61it/s] 30%|███ | 17518/57600 [3:03:57<6:49:35, 1.63it/s] 30%|███ | 17519/57600 [3:03:58<6:42:24, 1.66it/s] 30%|███ | 17520/57600 [3:03:58<6:39:48, 1.67it/s] {'loss': 1.2641, 'learning_rate': 7.284320557491289e-07, 'epoch': 193.38} + 30%|███ | 17520/57600 [3:03:58<6:39:48, 1.67it/s] 30%|███ | 17521/57600 [3:03:59<6:42:12, 1.66it/s] 30%|███ | 17522/57600 [3:03:59<6:51:40, 1.62it/s] 30%|███ | 17523/57600 [3:04:00<7:05:58, 1.57it/s] 30%|███ | 17524/57600 [3:04:01<7:05:37, 1.57it/s] 30%|███ | 17525/57600 [3:04:01<7:05:51, 1.57it/s] 30%|███ | 17526/57600 [3:04:02<6:57:39, 1.60it/s] 30%|███ | 17527/57600 [3:04:03<6:55:21, 1.61it/s] 30%|███ | 17528/57600 [3:04:03<6:43:54, 1.65it/s] 30%|███ | 17529/57600 [3:04:04<6:50:34, 1.63it/s] 30%|███ | 17530/57600 [3:04:04<6:42:04, 1.66it/s] 30%|███ | 17531/57600 [3:04:05<6:36:54, 1.68it/s] 30%|███ | 17532/57600 [3:04:06<6:51:50, 1.62it/s] 30%|███ | 17533/57600 [3:04:06<6:43:34, 1.65it/s] 30%|███ | 17534/57600 [3:04:07<6:48:10, 1.64it/s] 30%|███ | 17535/57600 [3:04:07<6:42:19, 1.66it/s] 30%|███ | 17536/57600 [3:04:08<6:40:26, 1.67it/s] 30%|███ | 17537/57600 [3:04:09<6:41:14, 1.66it/s] 30%|███ | 17538/57600 [3:04:09<6:46:11, 1.64it/s] 30%|███ | 17539/57600 [3:04:10<6:32:00, 1.70it/s] 30%|███ | 17540/57600 [3:04:10<6:33:02, 1.70it/s] {'loss': 1.3078, 'learning_rate': 7.281184668989547e-07, 'epoch': 193.6} + 30%|███ | 17540/57600 [3:04:10<6:33:02, 1.70it/s] 30%|███ | 17541/57600 [3:04:11<6:40:38, 1.67it/s] 30%|███ | 17542/57600 [3:04:12<6:44:07, 1.65it/s] 30%|███ | 17543/57600 [3:04:12<6:52:36, 1.62it/s] 30%|███ | 17544/57600 [3:04:13<6:58:39, 1.59it/s] 30%|███ | 17545/57600 [3:04:14<6:53:12, 1.62it/s] 30%|███ | 17546/57600 [3:04:14<6:54:20, 1.61it/s] 30%|███ | 17547/57600 [3:04:15<6:48:27, 1.63it/s] 30%|███ | 17548/57600 [3:04:15<7:03:43, 1.58it/s] 30%|███ | 17549/57600 [3:04:16<7:01:19, 1.58it/s] 30%|███ | 17550/57600 [3:04:17<7:17:05, 1.53it/s] 30%|███ | 17551/57600 [3:04:17<7:16:06, 1.53it/s] 30%|███ | 17552/57600 [3:04:18<7:08:48, 1.56it/s] 30%|███ | 17553/57600 [3:04:19<7:04:31, 1.57it/s] 30%|███ | 17554/57600 [3:04:19<7:08:06, 1.56it/s] 30%|███ | 17555/57600 [3:04:20<6:58:43, 1.59it/s] 30%|███ | 17556/57600 [3:04:21<6:59:12, 1.59it/s] 30%|███ | 17557/57600 [3:04:21<7:10:08, 1.55it/s] 30%|███ | 17558/57600 [3:04:22<7:17:51, 1.52it/s] 30%|███ | 17559/57600 [3:04:23<7:12:36, 1.54it/s] 30%|███ | 17560/57600 [3:04:23<7:00:46, 1.59it/s] {'loss': 1.2516, 'learning_rate': 7.278048780487804e-07, 'epoch': 193.82} + 30%|███ | 17560/57600 [3:04:23<7:00:46, 1.59it/s] 30%|███ | 17561/57600 [3:04:24<6:53:58, 1.61it/s] 30%|███ | 17562/57600 [3:04:24<7:05:42, 1.57it/s] 30%|███ | 17563/57600 [3:04:25<7:05:47, 1.57it/s] 30%|███ | 17564/57600 [3:04:26<6:58:21, 1.59it/s] 30%|███ | 17565/57600 [3:04:26<6:57:42, 1.60it/s] 30%|███ | 17566/57600 [3:04:27<6:50:31, 1.63it/s] 30%|███ | 17567/57600 [3:04:28<7:00:11, 1.59it/s] 30%|███ | 17568/57600 [3:04:28<6:48:50, 1.63it/s] 31%|███ | 17569/57600 [3:04:29<6:52:13, 1.62it/s] 31%|███ | 17570/57600 [3:04:29<6:47:23, 1.64it/s] 31%|███ | 17571/57600 [3:04:30<6:48:11, 1.63it/s] 31%|███ | 17572/57600 [3:04:31<6:56:36, 1.60it/s] 31%|███ | 17573/57600 [3:04:31<6:56:51, 1.60it/s] 31%|███ | 17574/57600 [3:04:32<6:49:33, 1.63it/s] 31%|███ | 17575/57600 [3:04:32<6:45:42, 1.64it/s] 31%|███ | 17576/57600 [3:04:33<6:50:33, 1.62it/s] 31%|███ | 17577/57600 [3:04:34<7:03:02, 1.58it/s] 31%|███ | 17578/57600 [3:04:34<6:58:58, 1.59it/s] 31%|███ | 17579/57600 [3:04:35<6:59:11, 1.59it/s] 31%|███ | 17580/57600 [3:04:36<6:49:04, 1.63it/s] {'loss': 1.2708, 'learning_rate': 7.274912891986062e-07, 'epoch': 194.04} + 31%|███ | 17580/57600 [3:04:36<6:49:04, 1.63it/s] 31%|███ | 17581/57600 [3:04:36<6:57:41, 1.60it/s] 31%|███ | 17582/57600 [3:04:37<6:43:57, 1.65it/s] 31%|███ | 17583/57600 [3:04:37<6:36:18, 1.68it/s] 31%|███ | 17584/57600 [3:04:38<6:43:08, 1.65it/s] 31%|███ | 17585/57600 [3:04:39<6:43:31, 1.65it/s] 31%|███ | 17586/57600 [3:04:39<6:38:11, 1.67it/s] 31%|███ | 17587/57600 [3:04:40<6:44:35, 1.65it/s] 31%|███ | 17588/57600 [3:04:40<6:42:13, 1.66it/s] 31%|███ | 17589/57600 [3:04:41<6:42:25, 1.66it/s] 31%|███ | 17590/57600 [3:04:42<6:45:57, 1.64it/s] 31%|███ | 17591/57600 [3:04:42<6:59:06, 1.59it/s] 31%|███ | 17592/57600 [3:04:43<6:56:59, 1.60it/s] 31%|███ | 17593/57600 [3:04:44<7:12:06, 1.54it/s] 31%|███ | 17594/57600 [3:04:44<7:06:47, 1.56it/s] 31%|███ | 17595/57600 [3:04:45<6:52:02, 1.62it/s] 31%|███ | 17596/57600 [3:04:45<6:57:22, 1.60it/s] 31%|███ | 17597/57600 [3:04:46<7:12:55, 1.54it/s] 31%|███ | 17598/57600 [3:04:47<6:58:41, 1.59it/s] 31%|███ | 17599/57600 [3:04:47<6:50:25, 1.62it/s] 31%|███ | 17600/57600 [3:04:48<6:55:38, 1.60it/s] {'loss': 1.265, 'learning_rate': 7.271777003484321e-07, 'epoch': 194.26} + 31%|███ | 17600/57600 [3:04:48<6:55:38, 1.60it/s] 31%|███ | 17601/57600 [3:04:49<6:57:57, 1.59it/s] 31%|███ | 17602/57600 [3:04:49<7:01:14, 1.58it/s] 31%|███ | 17603/57600 [3:04:50<6:56:02, 1.60it/s] 31%|███ | 17604/57600 [3:04:50<6:48:46, 1.63it/s] 31%|███ | 17605/57600 [3:04:51<6:51:30, 1.62it/s] 31%|███ | 17606/57600 [3:04:52<6:42:40, 1.66it/s] 31%|███ | 17607/57600 [3:04:52<6:46:46, 1.64it/s] 31%|███ | 17608/57600 [3:04:53<6:39:48, 1.67it/s] 31%|███ | 17609/57600 [3:04:53<6:38:57, 1.67it/s] 31%|███ | 17610/57600 [3:04:54<6:33:41, 1.69it/s] 31%|███ | 17611/57600 [3:04:55<7:11:33, 1.54it/s] 31%|███ | 17612/57600 [3:04:55<7:01:41, 1.58it/s] 31%|███ | 17613/57600 [3:04:56<7:08:13, 1.56it/s] 31%|███ | 17614/57600 [3:04:57<6:52:53, 1.61it/s] 31%|███ | 17615/57600 [3:04:57<7:04:58, 1.57it/s] 31%|███ | 17616/57600 [3:04:58<7:15:41, 1.53it/s] 31%|███ | 17617/57600 [3:04:59<7:16:59, 1.52it/s] 31%|███ | 17618/57600 [3:04:59<7:17:30, 1.52it/s] 31%|███ | 17619/57600 [3:05:00<6:54:51, 1.61it/s] 31%|███ | 17620/57600 [3:05:00<6:56:18, 1.60it/s] {'loss': 1.2597, 'learning_rate': 7.268641114982578e-07, 'epoch': 194.48} + 31%|███ | 17620/57600 [3:05:00<6:56:18, 1.60it/s] 31%|███ | 17621/57600 [3:05:01<6:59:50, 1.59it/s] 31%|███ | 17622/57600 [3:05:02<7:00:11, 1.59it/s] 31%|███ | 17623/57600 [3:05:02<6:58:32, 1.59it/s] 31%|███ | 17624/57600 [3:05:03<6:59:05, 1.59it/s] 31%|███ | 17625/57600 [3:05:04<6:52:48, 1.61it/s] 31%|███ | 17626/57600 [3:05:04<7:15:10, 1.53it/s] 31%|███ | 17627/57600 [3:05:05<7:02:50, 1.58it/s] 31%|███ | 17628/57600 [3:05:06<7:01:09, 1.58it/s] 31%|███ | 17629/57600 [3:05:06<7:14:20, 1.53it/s] 31%|███ | 17630/57600 [3:05:07<7:01:36, 1.58it/s] 31%|███ | 17631/57600 [3:05:07<7:10:24, 1.55it/s] 31%|███ | 17632/57600 [3:05:08<7:07:14, 1.56it/s] 31%|███ | 17633/57600 [3:05:09<6:53:43, 1.61it/s] 31%|███ | 17634/57600 [3:05:09<6:47:50, 1.63it/s] 31%|███ | 17635/57600 [3:05:10<6:44:12, 1.65it/s] 31%|███ | 17636/57600 [3:05:11<6:49:52, 1.63it/s] 31%|███ | 17637/57600 [3:05:11<6:54:58, 1.61it/s] 31%|███ | 17638/57600 [3:05:12<6:50:27, 1.62it/s] 31%|███ | 17639/57600 [3:05:12<6:49:14, 1.63it/s] 31%|███ | 17640/57600 [3:05:13<6:53:52, 1.61it/s] {'loss': 1.2685, 'learning_rate': 7.265505226480835e-07, 'epoch': 194.7} + 31%|███ | 17640/57600 [3:05:13<6:53:52, 1.61it/s] 31%|███ | 17641/57600 [3:05:14<6:43:53, 1.65it/s] 31%|███ | 17642/57600 [3:05:14<6:41:32, 1.66it/s] 31%|███ | 17643/57600 [3:05:15<6:46:51, 1.64it/s] 31%|███ | 17644/57600 [3:05:15<6:45:52, 1.64it/s] 31%|███ | 17645/57600 [3:05:16<6:47:17, 1.63it/s] 31%|███ | 17646/57600 [3:05:17<6:40:30, 1.66it/s] 31%|███ | 17647/57600 [3:05:17<6:58:53, 1.59it/s] 31%|███ | 17648/57600 [3:05:18<6:54:37, 1.61it/s] 31%|███ | 17649/57600 [3:05:18<6:45:12, 1.64it/s] 31%|███ | 17650/57600 [3:05:19<6:51:35, 1.62it/s] 31%|███ | 17651/57600 [3:05:20<6:38:45, 1.67it/s] 31%|███ | 17652/57600 [3:05:20<6:41:50, 1.66it/s] 31%|███ | 17653/57600 [3:05:21<6:46:42, 1.64it/s] 31%|███ | 17654/57600 [3:05:21<6:39:02, 1.67it/s] 31%|███ | 17655/57600 [3:05:22<7:00:17, 1.58it/s] 31%|███ | 17656/57600 [3:05:23<6:54:15, 1.61it/s] 31%|███ | 17657/57600 [3:05:23<7:05:57, 1.56it/s] 31%|███ | 17658/57600 [3:05:24<6:55:28, 1.60it/s] 31%|███ | 17659/57600 [3:05:25<6:53:10, 1.61it/s] 31%|███ | 17660/57600 [3:05:25<6:48:29, 1.63it/s] {'loss': 1.2869, 'learning_rate': 7.262369337979094e-07, 'epoch': 194.92} + 31%|███ | 17660/57600 [3:05:25<6:48:29, 1.63it/s] 31%|███ | 17661/57600 [3:05:26<6:48:23, 1.63it/s] 31%|███ | 17662/57600 [3:05:27<6:50:51, 1.62it/s] 31%|███ | 17663/57600 [3:05:27<6:58:54, 1.59it/s] 31%|███ | 17664/57600 [3:05:28<6:59:03, 1.59it/s] 31%|███ | 17665/57600 [3:05:28<6:57:07, 1.60it/s] 31%|███ | 17666/57600 [3:05:29<6:48:41, 1.63it/s] 31%|███ | 17667/57600 [3:05:30<6:50:39, 1.62it/s] 31%|███ | 17668/57600 [3:05:30<6:58:51, 1.59it/s] 31%|███ | 17669/57600 [3:05:31<6:59:16, 1.59it/s] 31%|███ | 17670/57600 [3:05:32<7:13:27, 1.54it/s] 31%|███ | 17671/57600 [3:05:32<7:14:40, 1.53it/s] 31%|███ | 17672/57600 [3:05:33<7:13:06, 1.54it/s] 31%|███ | 17673/57600 [3:05:34<7:03:00, 1.57it/s] 31%|███ | 17674/57600 [3:05:34<7:12:40, 1.54it/s] 31%|███ | 17675/57600 [3:05:35<6:51:59, 1.62it/s] 31%|███ | 17676/57600 [3:05:35<6:51:20, 1.62it/s] 31%|███ | 17677/57600 [3:05:36<6:43:49, 1.65it/s] 31%|███ | 17678/57600 [3:05:37<6:52:26, 1.61it/s] 31%|███ | 17679/57600 [3:05:37<7:08:21, 1.55it/s] 31%|███ | 17680/57600 [3:05:38<7:05:45, 1.56it/s] {'loss': 1.2597, 'learning_rate': 7.259233449477352e-07, 'epoch': 195.14} + 31%|███ | 17680/57600 [3:05:38<7:05:45, 1.56it/s] 31%|███ | 17681/57600 [3:05:39<6:55:55, 1.60it/s] 31%|███ | 17682/57600 [3:05:39<6:47:07, 1.63it/s] 31%|███ | 17683/57600 [3:05:40<6:49:35, 1.62it/s] 31%|███ | 17684/57600 [3:05:40<6:54:26, 1.61it/s] 31%|███ | 17685/57600 [3:05:41<6:53:49, 1.61it/s] 31%|███ | 17686/57600 [3:05:42<6:50:41, 1.62it/s] 31%|███ | 17687/57600 [3:05:42<6:56:11, 1.60it/s] 31%|███ | 17688/57600 [3:05:43<6:39:17, 1.67it/s] 31%|███ | 17689/57600 [3:05:43<6:55:49, 1.60it/s] 31%|███ | 17690/57600 [3:05:44<7:04:12, 1.57it/s] 31%|███ | 17691/57600 [3:05:45<6:59:03, 1.59it/s] 31%|███ | 17692/57600 [3:05:45<6:50:32, 1.62it/s] 31%|███ | 17693/57600 [3:05:46<6:45:32, 1.64it/s] 31%|███ | 17694/57600 [3:05:47<6:51:54, 1.61it/s] 31%|███ | 17695/57600 [3:05:47<6:48:23, 1.63it/s] 31%|███ | 17696/57600 [3:05:48<6:53:13, 1.61it/s] 31%|███ | 17697/57600 [3:05:48<6:59:02, 1.59it/s] 31%|███ | 17698/57600 [3:05:49<6:49:39, 1.62it/s] 31%|███ | 17699/57600 [3:05:50<6:38:19, 1.67it/s] 31%|███ | 17700/57600 [3:05:50<6:56:36, 1.60it/s] {'loss': 1.2579, 'learning_rate': 7.25609756097561e-07, 'epoch': 195.36} + 31%|███ | 17700/57600 [3:05:50<6:56:36, 1.60it/s] 31%|███ | 17701/57600 [3:05:51<7:01:18, 1.58it/s] 31%|███ | 17702/57600 [3:05:52<7:00:21, 1.58it/s] 31%|███ | 17703/57600 [3:05:52<6:54:16, 1.61it/s] 31%|███ | 17704/57600 [3:05:53<6:52:56, 1.61it/s] 31%|███ | 17705/57600 [3:05:53<6:52:46, 1.61it/s] 31%|███ | 17706/57600 [3:05:54<7:02:30, 1.57it/s] 31%|███ | 17707/57600 [3:05:55<6:54:55, 1.60it/s] 31%|███ | 17708/57600 [3:05:55<6:52:37, 1.61it/s] 31%|███ | 17709/57600 [3:05:56<7:08:02, 1.55it/s] 31%|███ | 17710/57600 [3:05:57<7:14:31, 1.53it/s] 31%|███ | 17711/57600 [3:05:57<7:07:43, 1.55it/s] 31%|███ | 17712/57600 [3:05:58<7:00:29, 1.58it/s] 31%|███ | 17713/57600 [3:05:58<6:49:48, 1.62it/s] 31%|███ | 17714/57600 [3:05:59<6:44:03, 1.65it/s] 31%|███ | 17715/57600 [3:06:00<6:34:48, 1.68it/s] 31%|███ | 17716/57600 [3:06:00<6:45:16, 1.64it/s] 31%|███ | 17717/57600 [3:06:01<6:46:31, 1.64it/s] 31%|███ | 17718/57600 [3:06:02<6:56:41, 1.60it/s] 31%|███ | 17719/57600 [3:06:02<6:52:43, 1.61it/s] 31%|███ | 17720/57600 [3:06:03<7:02:03, 1.57it/s] {'loss': 1.2641, 'learning_rate': 7.252961672473867e-07, 'epoch': 195.58} + 31%|███ | 17720/57600 [3:06:03<7:02:03, 1.57it/s] 31%|███ | 17721/57600 [3:06:03<6:51:30, 1.62it/s] 31%|███ | 17722/57600 [3:06:04<6:58:12, 1.59it/s] 31%|███ | 17723/57600 [3:06:05<7:19:01, 1.51it/s] 31%|███ | 17724/57600 [3:06:05<7:18:36, 1.52it/s] 31%|███ | 17725/57600 [3:06:06<7:13:47, 1.53it/s] 31%|███ | 17726/57600 [3:06:07<7:09:00, 1.55it/s] 31%|███ | 17727/57600 [3:06:07<7:08:11, 1.55it/s] 31%|███ | 17728/57600 [3:06:08<7:07:49, 1.55it/s] 31%|███ | 17729/57600 [3:06:09<7:01:03, 1.58it/s] 31%|���██ | 17730/57600 [3:06:09<6:58:35, 1.59it/s] 31%|███ | 17731/57600 [3:06:10<7:02:31, 1.57it/s] 31%|███ | 17732/57600 [3:06:10<7:02:09, 1.57it/s] 31%|███ | 17733/57600 [3:06:11<6:54:40, 1.60it/s] 31%|███ | 17734/57600 [3:06:12<6:39:23, 1.66it/s] 31%|███ | 17735/57600 [3:06:12<6:44:11, 1.64it/s] 31%|███ | 17736/57600 [3:06:13<6:43:27, 1.65it/s] 31%|███ | 17737/57600 [3:06:14<6:49:10, 1.62it/s] 31%|███ | 17738/57600 [3:06:14<6:52:29, 1.61it/s] 31%|███ | 17739/57600 [3:06:15<6:57:02, 1.59it/s] 31%|███ | 17740/57600 [3:06:15<6:49:57, 1.62it/s] {'loss': 1.2782, 'learning_rate': 7.249825783972126e-07, 'epoch': 195.81} + 31%|███ | 17740/57600 [3:06:15<6:49:57, 1.62it/s] 31%|███ | 17741/57600 [3:06:16<6:52:02, 1.61it/s] 31%|███ | 17742/57600 [3:06:17<6:52:56, 1.61it/s] 31%|███ | 17743/57600 [3:06:17<6:42:23, 1.65it/s] 31%|███ | 17744/57600 [3:06:18<6:34:11, 1.69it/s] 31%|███ | 17745/57600 [3:06:18<6:24:32, 1.73it/s] 31%|███ | 17746/57600 [3:06:19<6:32:12, 1.69it/s] 31%|███ | 17747/57600 [3:06:19<6:22:18, 1.74it/s] 31%|███ | 17748/57600 [3:06:20<6:29:55, 1.70it/s] 31%|███ | 17749/57600 [3:06:21<6:39:42, 1.66it/s] 31%|███ | 17750/57600 [3:06:21<6:40:45, 1.66it/s] 31%|███ | 17751/57600 [3:06:22<6:44:54, 1.64it/s] 31%|███ | 17752/57600 [3:06:23<6:46:28, 1.63it/s] 31%|███ | 17753/57600 [3:06:23<6:56:10, 1.60it/s] 31%|███ | 17754/57600 [3:06:24<6:57:29, 1.59it/s] 31%|███ | 17755/57600 [3:06:25<7:09:42, 1.55it/s] 31%|███ | 17756/57600 [3:06:25<7:09:49, 1.54it/s] 31%|███ | 17757/57600 [3:06:26<7:05:30, 1.56it/s] 31%|███ | 17758/57600 [3:06:27<7:16:14, 1.52it/s] 31%|███ | 17759/57600 [3:06:27<6:55:16, 1.60it/s] 31%|███ | 17760/57600 [3:06:28<6:55:18, 1.60it/s] {'loss': 1.2705, 'learning_rate': 7.246689895470382e-07, 'epoch': 196.03} + 31%|███ | 17760/57600 [3:06:28<6:55:18, 1.60it/s] 31%|███ | 17761/57600 [3:06:28<6:56:44, 1.59it/s] 31%|███ | 17762/57600 [3:06:29<7:05:33, 1.56it/s] 31%|███ | 17763/57600 [3:06:30<6:52:26, 1.61it/s] 31%|███ | 17764/57600 [3:06:30<6:56:42, 1.59it/s] 31%|███ | 17765/57600 [3:06:31<7:02:36, 1.57it/s] 31%|███ | 17766/57600 [3:06:32<7:05:27, 1.56it/s] 31%|███ | 17767/57600 [3:06:32<7:00:16, 1.58it/s] 31%|███ | 17768/57600 [3:06:33<7:09:37, 1.55it/s] 31%|███ | 17769/57600 [3:06:33<6:56:24, 1.59it/s] 31%|███ | 17770/57600 [3:06:34<6:55:09, 1.60it/s] 31%|███ | 17771/57600 [3:06:35<6:54:04, 1.60it/s] 31%|███ | 17772/57600 [3:06:35<6:54:28, 1.60it/s] 31%|███ | 17773/57600 [3:06:36<7:01:13, 1.58it/s] 31%|███ | 17774/57600 [3:06:37<6:55:57, 1.60it/s] 31%|███ | 17775/57600 [3:06:37<6:50:35, 1.62it/s] 31%|███ | 17776/57600 [3:06:38<6:41:44, 1.65it/s] 31%|███ | 17777/57600 [3:06:38<6:59:45, 1.58it/s] 31%|███ | 17778/57600 [3:06:39<6:59:14, 1.58it/s] 31%|███ | 17779/57600 [3:06:40<7:00:47, 1.58it/s] 31%|███ | 17780/57600 [3:06:40<6:57:32, 1.59it/s] {'loss': 1.2878, 'learning_rate': 7.24355400696864e-07, 'epoch': 196.25} + 31%|███ | 17780/57600 [3:06:40<6:57:32, 1.59it/s] 31%|███ | 17781/57600 [3:06:41<6:48:48, 1.62it/s] 31%|███ | 17782/57600 [3:06:41<6:45:29, 1.64it/s] 31%|███ | 17783/57600 [3:06:42<6:50:37, 1.62it/s] 31%|███ | 17784/57600 [3:06:43<7:09:38, 1.54it/s] 31%|███ | 17785/57600 [3:06:43<6:57:11, 1.59it/s] 31%|███ | 17786/57600 [3:06:44<7:01:26, 1.57it/s] 31%|███ | 17787/57600 [3:06:45<6:49:07, 1.62it/s] 31%|███ | 17788/57600 [3:06:45<6:45:13, 1.64it/s] 31%|███ | 17789/57600 [3:06:46<6:40:05, 1.66it/s] 31%|███ | 17790/57600 [3:06:46<6:50:58, 1.61it/s] 31%|███ | 17791/57600 [3:06:47<7:00:51, 1.58it/s] 31%|███ | 17792/57600 [3:06:48<6:53:51, 1.60it/s] 31%|███ | 17793/57600 [3:06:48<6:54:10, 1.60it/s] 31%|███ | 17794/57600 [3:06:49<6:52:19, 1.61it/s] 31%|███ | 17795/57600 [3:06:50<6:51:34, 1.61it/s] 31%|███ | 17796/57600 [3:06:50<6:53:41, 1.60it/s] 31%|███ | 17797/57600 [3:06:51<6:42:45, 1.65it/s] 31%|███ | 17798/57600 [3:06:51<6:34:14, 1.68it/s] 31%|███ | 17799/57600 [3:06:52<6:33:12, 1.69it/s] 31%|███ | 17800/57600 [3:06:53<6:34:45, 1.68it/s] {'loss': 1.3158, 'learning_rate': 7.240418118466899e-07, 'epoch': 196.47} + 31%|███ | 17800/57600 [3:06:53<6:34:45, 1.68it/s] 31%|███ | 17801/57600 [3:06:53<6:31:46, 1.69it/s] 31%|███ | 17802/57600 [3:06:54<6:35:01, 1.68it/s] 31%|███ | 17803/57600 [3:06:54<6:43:23, 1.64it/s] 31%|███ | 17804/57600 [3:06:55<6:52:03, 1.61it/s] 31%|███ | 17805/57600 [3:06:56<6:57:40, 1.59it/s] 31%|███ | 17806/57600 [3:06:56<7:04:42, 1.56it/s] 31%|███ | 17807/57600 [3:06:57<7:05:35, 1.56it/s] 31%|███ | 17808/57600 [3:06:58<7:07:30, 1.55it/s] 31%|███ | 17809/57600 [3:06:58<6:57:55, 1.59it/s] 31%|███ | 17810/57600 [3:06:59<7:04:52, 1.56it/s] 31%|███ | 17811/57600 [3:07:00<7:09:35, 1.54it/s] 31%|███ | 17812/57600 [3:07:00<7:30:52, 1.47it/s] 31%|███ | 17813/57600 [3:07:01<7:09:08, 1.55it/s] 31%|███ | 17814/57600 [3:07:02<7:19:46, 1.51it/s] 31%|███ | 17815/57600 [3:07:02<7:13:39, 1.53it/s] 31%|███ | 17816/57600 [3:07:03<6:59:59, 1.58it/s] 31%|███ | 17817/57600 [3:07:03<6:52:15, 1.61it/s] 31%|███ | 17818/57600 [3:07:04<6:59:06, 1.58it/s] 31%|███ | 17819/57600 [3:07:05<6:53:29, 1.60it/s] 31%|███ | 17820/57600 [3:07:05<6:48:28, 1.62it/s] {'loss': 1.2371, 'learning_rate': 7.237282229965157e-07, 'epoch': 196.69} + 31%|███ | 17820/57600 [3:07:05<6:48:28, 1.62it/s] 31%|███ | 17821/57600 [3:07:06<6:42:36, 1.65it/s] 31%|███ | 17822/57600 [3:07:06<6:42:17, 1.65it/s] 31%|███ | 17823/57600 [3:07:07<6:51:11, 1.61it/s] 31%|███ | 17824/57600 [3:07:08<7:00:37, 1.58it/s] 31%|███ | 17825/57600 [3:07:08<6:58:55, 1.58it/s] 31%|███ | 17826/57600 [3:07:09<6:47:10, 1.63it/s] 31%|███ | 17827/57600 [3:07:10<6:40:06, 1.66it/s] 31%|███ | 17828/57600 [3:07:10<6:37:22, 1.67it/s] 31%|███ | 17829/57600 [3:07:11<6:43:27, 1.64it/s] 31%|███ | 17830/57600 [3:07:11<6:57:16, 1.59it/s] 31%|███ | 17831/57600 [3:07:12<7:00:02, 1.58it/s] 31%|███ | 17832/57600 [3:07:13<7:07:42, 1.55it/s] 31%|███ | 17833/57600 [3:07:13<6:57:10, 1.59it/s] 31%|███ | 17834/57600 [3:07:14<6:45:34, 1.63it/s] 31%|███ | 17835/57600 [3:07:15<6:49:29, 1.62it/s] 31%|███ | 17836/57600 [3:07:15<6:39:23, 1.66it/s] 31%|███ | 17837/57600 [3:07:16<6:41:11, 1.65it/s] 31%|███ | 17838/57600 [3:07:16<6:41:52, 1.65it/s] 31%|███ | 17839/57600 [3:07:17<6:39:17, 1.66it/s] 31%|███ | 17840/57600 [3:07:18<6:37:59, 1.66it/s] {'loss': 1.2553, 'learning_rate': 7.234146341463414e-07, 'epoch': 196.91} + 31%|███ | 17840/57600 [3:07:18<6:37:59, 1.66it/s] 31%|███ | 17841/57600 [3:07:18<6:35:10, 1.68it/s] 31%|███ | 17842/57600 [3:07:19<6:42:47, 1.65it/s] 31%|███ | 17843/57600 [3:07:19<6:49:53, 1.62it/s] 31%|███ | 17844/57600 [3:07:20<6:40:30, 1.65it/s] 31%|███ | 17845/57600 [3:07:21<6:55:15, 1.60it/s] 31%|███ | 17846/57600 [3:07:21<6:52:37, 1.61it/s] 31%|███ | 17847/57600 [3:07:22<6:37:19, 1.67it/s] 31%|███ | 17848/57600 [3:07:22<6:41:17, 1.65it/s] 31%|███ | 17849/57600 [3:07:23<7:03:18, 1.57it/s] 31%|███ | 17850/57600 [3:07:24<7:01:22, 1.57it/s] 31%|███ | 17851/57600 [3:07:24<6:46:28, 1.63it/s] 31%|███ | 17852/57600 [3:07:25<6:36:57, 1.67it/s] 31%|███ | 17853/57600 [3:07:26<6:49:15, 1.62it/s] 31%|███ | 17854/57600 [3:07:26<7:02:11, 1.57it/s] 31%|███ | 17855/57600 [3:07:27<6:58:23, 1.58it/s] 31%|███ | 17856/57600 [3:07:27<6:47:39, 1.62it/s] 31%|███ | 17857/57600 [3:07:28<6:48:15, 1.62it/s] 31%|███ | 17858/57600 [3:07:29<6:54:48, 1.60it/s] 31%|███ | 17859/57600 [3:07:29<6:49:43, 1.62it/s] 31%|███ | 17860/57600 [3:07:30<6:46:59, 1.63it/s] {'loss': 1.2847, 'learning_rate': 7.231010452961672e-07, 'epoch': 197.13} + 31%|███ | 17860/57600 [3:07:30<6:46:59, 1.63it/s] 31%|███ | 17861/57600 [3:07:31<6:41:23, 1.65it/s] 31%|███ | 17862/57600 [3:07:31<7:05:03, 1.56it/s] 31%|███ | 17863/57600 [3:07:32<7:09:47, 1.54it/s] 31%|███ | 17864/57600 [3:07:32<6:58:42, 1.58it/s] 31%|███ | 17865/57600 [3:07:33<6:50:26, 1.61it/s] 31%|███ | 17866/57600 [3:07:34<6:49:34, 1.62it/s] 31%|███ | 17867/57600 [3:07:34<6:43:04, 1.64it/s] 31%|███ | 17868/57600 [3:07:35<6:41:01, 1.65it/s] 31%|███ | 17869/57600 [3:07:35<6:32:58, 1.69it/s] 31%|███ | 17870/57600 [3:07:36<6:53:33, 1.60it/s] 31%|███ | 17871/57600 [3:07:37<7:07:50, 1.55it/s] 31%|███ | 17872/57600 [3:07:37<7:06:47, 1.55it/s] 31%|███ | 17873/57600 [3:07:38<6:57:53, 1.58it/s] 31%|███ | 17874/57600 [3:07:39<7:01:25, 1.57it/s] 31%|███ | 17875/57600 [3:07:39<6:56:31, 1.59it/s] 31%|███ | 17876/57600 [3:07:40<6:55:34, 1.59it/s] 31%|███ | 17877/57600 [3:07:41<6:49:18, 1.62it/s] 31%|███ | 17878/57600 [3:07:41<6:58:37, 1.58it/s] 31%|███ | 17879/57600 [3:07:42<7:17:10, 1.51it/s] 31%|███ | 17880/57600 [3:07:43<7:12:01, 1.53it/s] {'loss': 1.2516, 'learning_rate': 7.22787456445993e-07, 'epoch': 197.35} + 31%|███ | 17880/57600 [3:07:43<7:12:01, 1.53it/s] 31%|███ | 17881/57600 [3:07:43<6:52:05, 1.61it/s] 31%|███ | 17882/57600 [3:07:44<7:13:55, 1.53it/s] 31%|███ | 17883/57600 [3:07:44<7:07:37, 1.55it/s] 31%|███ | 17884/57600 [3:07:45<6:58:50, 1.58it/s] 31%|███ | 17885/57600 [3:07:46<6:47:45, 1.62it/s] 31%|███ | 17886/57600 [3:07:46<6:58:23, 1.58it/s] 31%|███ | 17887/57600 [3:07:47<6:41:34, 1.65it/s] 31%|███ | 17888/57600 [3:07:47<6:41:31, 1.65it/s] 31%|███ | 17889/57600 [3:07:48<6:46:27, 1.63it/s] 31%|███ | 17890/57600 [3:07:49<6:47:32, 1.62it/s] 31%|███ | 17891/57600 [3:07:49<6:48:10, 1.62it/s] 31%|███ | 17892/57600 [3:07:50<6:53:36, 1.60it/s] 31%|███ | 17893/57600 [3:07:51<6:56:07, 1.59it/s] 31%|███ | 17894/57600 [3:07:51<6:49:12, 1.62it/s] 31%|███ | 17895/57600 [3:07:52<6:54:22, 1.60it/s] 31%|███ | 17896/57600 [3:07:52<6:50:30, 1.61it/s] 31%|███ | 17897/57600 [3:07:53<7:00:18, 1.57it/s] 31%|███ | 17898/57600 [3:07:54<6:47:45, 1.62it/s] 31%|███ | 17899/57600 [3:07:54<6:57:28, 1.58it/s] 31%|███ | 17900/57600 [3:07:55<7:00:06, 1.57it/s] {'loss': 1.2802, 'learning_rate': 7.224738675958188e-07, 'epoch': 197.57} + 31%|███ | 17900/57600 [3:07:55<7:00:06, 1.57it/s] 31%|███ | 17901/57600 [3:07:56<7:03:53, 1.56it/s] 31%|███ | 17902/57600 [3:07:56<7:02:57, 1.56it/s] 31%|███ | 17903/57600 [3:07:57<6:58:41, 1.58it/s] 31%|███ | 17904/57600 [3:07:58<7:06:22, 1.55it/s] 31%|███ | 17905/57600 [3:07:58<6:59:21, 1.58it/s] 31%|███ | 17906/57600 [3:07:59<7:05:41, 1.55it/s] 31%|███ | 17907/57600 [3:07:59<6:55:15, 1.59it/s] 31%|███ | 17908/57600 [3:08:00<6:49:10, 1.62it/s] 31%|███ | 17909/57600 [3:08:01<6:51:43, 1.61it/s] 31%|███ | 17910/57600 [3:08:01<6:48:18, 1.62it/s] 31%|███ | 17911/57600 [3:08:02<6:40:24, 1.65it/s] 31%|███ | 17912/57600 [3:08:03<6:51:43, 1.61it/s] 31%|███ | 17913/57600 [3:08:03<6:47:25, 1.62it/s] 31%|███ | 17914/57600 [3:08:04<6:47:25, 1.62it/s] 31%|███ | 17915/57600 [3:08:04<7:05:23, 1.55it/s] 31%|███ | 17916/57600 [3:08:05<6:59:59, 1.57it/s] 31%|███ | 17917/57600 [3:08:06<6:58:43, 1.58it/s] 31%|███ | 17918/57600 [3:08:06<6:59:01, 1.58it/s] 31%|███ | 17919/57600 [3:08:07<6:42:45, 1.64it/s] 31%|███ | 17920/57600 [3:08:07<6:35:39, 1.67it/s] {'loss': 1.2839, 'learning_rate': 7.221602787456446e-07, 'epoch': 197.79} + 31%|███ | 17920/57600 [3:08:07<6:35:39, 1.67it/s] 31%|███ | 17921/57600 [3:08:08<6:37:36, 1.66it/s] 31%|███ | 17922/57600 [3:08:09<7:13:31, 1.53it/s] 31%|███ | 17923/57600 [3:08:09<7:05:01, 1.56it/s] 31%|███ | 17924/57600 [3:08:10<6:56:34, 1.59it/s] 31%|███ | 17925/57600 [3:08:11<6:45:43, 1.63it/s] 31%|███ | 17926/57600 [3:08:11<6:42:04, 1.64it/s] 31%|███ | 17927/57600 [3:08:12<6:47:21, 1.62it/s] 31%|███ | 17928/57600 [3:08:13<6:48:29, 1.62it/s] 31%|███ | 17929/57600 [3:08:13<6:37:43, 1.66it/s] 31%|███ | 17930/57600 [3:08:14<6:42:18, 1.64it/s] 31%|███ | 17931/57600 [3:08:14<6:42:51, 1.64it/s] 31%|███ | 17932/57600 [3:08:15<6:44:34, 1.63it/s] 31%|███ | 17933/57600 [3:08:16<6:38:24, 1.66it/s] 31%|███ | 17934/57600 [3:08:16<6:39:58, 1.65it/s] 31%|███ | 17935/57600 [3:08:17<6:39:47, 1.65it/s] 31%|███ | 17936/57600 [3:08:17<6:51:48, 1.61it/s] 31%|███ | 17937/57600 [3:08:18<7:02:10, 1.57it/s] 31%|███ | 17938/57600 [3:08:19<7:01:34, 1.57it/s] 31%|██��� | 17939/57600 [3:08:19<7:06:35, 1.55it/s] 31%|███ | 17940/57600 [3:08:20<7:15:52, 1.52it/s] {'loss': 1.2259, 'learning_rate': 7.218466898954704e-07, 'epoch': 198.01} + 31%|███ | 17940/57600 [3:08:20<7:15:52, 1.52it/s] 31%|███ | 17941/57600 [3:08:21<7:01:12, 1.57it/s] 31%|███ | 17942/57600 [3:08:21<7:08:38, 1.54it/s] 31%|███ | 17943/57600 [3:08:22<7:21:25, 1.50it/s] 31%|███ | 17944/57600 [3:08:23<7:05:43, 1.55it/s] 31%|███ | 17945/57600 [3:08:23<7:02:02, 1.57it/s] 31%|███ | 17946/57600 [3:08:24<7:04:58, 1.56it/s] 31%|███ | 17947/57600 [3:08:25<6:59:27, 1.58it/s] 31%|███ | 17948/57600 [3:08:25<6:57:58, 1.58it/s] 31%|███ | 17949/57600 [3:08:26<6:47:33, 1.62it/s] 31%|███ | 17950/57600 [3:08:26<7:01:48, 1.57it/s] 31%|███ | 17951/57600 [3:08:27<6:45:28, 1.63it/s] 31%|███ | 17952/57600 [3:08:28<6:43:48, 1.64it/s] 31%|███ | 17953/57600 [3:08:28<6:52:10, 1.60it/s] 31%|███ | 17954/57600 [3:08:29<6:44:50, 1.63it/s] 31%|███ | 17955/57600 [3:08:29<6:48:10, 1.62it/s] 31%|███ | 17956/57600 [3:08:30<6:42:47, 1.64it/s] 31%|███ | 17957/57600 [3:08:31<6:40:57, 1.65it/s] 31%|███ | 17958/57600 [3:08:31<6:38:41, 1.66it/s] 31%|███ | 17959/57600 [3:08:32<7:04:25, 1.56it/s] 31%|███ | 17960/57600 [3:08:33<7:03:00, 1.56it/s] {'loss': 1.2203, 'learning_rate': 7.215331010452961e-07, 'epoch': 198.23} + 31%|███ | 17960/57600 [3:08:33<7:03:00, 1.56it/s] 31%|███ | 17961/57600 [3:08:33<6:52:15, 1.60it/s] 31%|███ | 17962/57600 [3:08:34<6:48:22, 1.62it/s] 31%|███ | 17963/57600 [3:08:34<6:45:49, 1.63it/s] 31%|███ | 17964/57600 [3:08:35<6:36:21, 1.67it/s] 31%|███ | 17965/57600 [3:08:36<6:45:34, 1.63it/s] 31%|███ | 17966/57600 [3:08:36<6:54:37, 1.59it/s] 31%|███ | 17967/57600 [3:08:37<6:50:39, 1.61it/s] 31%|███ | 17968/57600 [3:08:37<6:49:19, 1.61it/s] 31%|███ | 17969/57600 [3:08:38<6:47:13, 1.62it/s] 31%|███ | 17970/57600 [3:08:39<7:00:51, 1.57it/s] 31%|███ | 17971/57600 [3:08:39<7:08:41, 1.54it/s] 31%|███ | 17972/57600 [3:08:40<7:04:25, 1.56it/s] 31%|███ | 17973/57600 [3:08:41<6:49:01, 1.61it/s] 31%|███ | 17974/57600 [3:08:41<6:59:24, 1.57it/s] 31%|███ | 17975/57600 [3:08:42<6:58:04, 1.58it/s] 31%|███ | 17976/57600 [3:08:43<6:51:43, 1.60it/s] 31%|███ | 17977/57600 [3:08:43<7:00:50, 1.57it/s] 31%|███ | 17978/57600 [3:08:44<6:59:01, 1.58it/s] 31%|███ | 17979/57600 [3:08:44<6:48:46, 1.62it/s] 31%|███ | 17980/57600 [3:08:45<6:56:49, 1.58it/s] {'loss': 1.2837, 'learning_rate': 7.212195121951219e-07, 'epoch': 198.45} + 31%|███ | 17980/57600 [3:08:45<6:56:49, 1.58it/s] 31%|███ | 17981/57600 [3:08:46<6:38:30, 1.66it/s] 31%|███ | 17982/57600 [3:08:46<6:37:22, 1.66it/s] 31%|███ | 17983/57600 [3:08:47<6:22:39, 1.73it/s] 31%|███ | 17984/57600 [3:08:47<6:31:26, 1.69it/s] 31%|███ | 17985/57600 [3:08:48<6:26:06, 1.71it/s] 31%|███ | 17986/57600 [3:08:49<6:41:59, 1.64it/s] 31%|███ | 17987/57600 [3:08:49<6:31:07, 1.69it/s] 31%|███ | 17988/57600 [3:08:50<6:24:32, 1.72it/s] 31%|███ | 17989/57600 [3:08:50<6:32:13, 1.68it/s] 31%|███ | 17990/57600 [3:08:51<6:29:49, 1.69it/s] 31%|███ | 17991/57600 [3:08:52<6:35:28, 1.67it/s] 31%|███ | 17992/57600 [3:08:52<6:28:31, 1.70it/s] 31%|███ | 17993/57600 [3:08:53<6:44:00, 1.63it/s] 31%|███ | 17994/57600 [3:08:53<6:41:53, 1.64it/s] 31%|███ | 17995/57600 [3:08:54<6:53:16, 1.60it/s] 31%|███ | 17996/57600 [3:08:55<7:05:27, 1.55it/s] 31%|███ | 17997/57600 [3:08:55<7:18:30, 1.51it/s] 31%|███ | 17998/57600 [3:08:56<7:05:20, 1.55it/s] 31%|███ | 17999/57600 [3:08:57<7:14:47, 1.52it/s] 31%|███▏ | 18000/57600 [3:08:57<7:04:51, 1.55it/s] {'loss': 1.2906, 'learning_rate': 7.209059233449475e-07, 'epoch': 198.68} + 31%|███▏ | 18000/57600 [3:08:57<7:04:51, 1.55it/s]Generation Kwargs: +{'max_length': 384, 'max_gen_length': 380, 'num_beams': 5} + + 0%| | 0/6 [00:00> Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co./docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41. +Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2} + 31%|███▏ | 18001/57600 [3:09:19<77:58:33, 7.09s/it] 31%|███▏ | 18002/57600 [3:09:20<56:48:56, 5.17s/it] 31%|███▏ | 18003/57600 [3:09:21<42:01:40, 3.82s/it] 31%|███▏ | 18004/57600 [3:09:22<31:42:41, 2.88s/it] 31%|███▏ | 18005/57600 [3:09:22<24:28:20, 2.23s/it] 31%|███▏ | 18006/57600 [3:09:23<19:12:19, 1.75s/it] 31%|███▏ | 18007/57600 [3:09:23<15:16:34, 1.39s/it] 31%|███▏ | 18008/57600 [3:09:24<12:51:51, 1.17s/it] 31%|███▏ | 18009/57600 [3:09:25<11:24:24, 1.04s/it] 31%|███▏ | 18010/57600 [3:09:25<10:10:26, 1.08it/s] 31%|███▏ | 18011/57600 [3:09:26<9:21:31, 1.18it/s] 31%|███▏ | 18012/57600 [3:09:27<8:45:11, 1.26it/s] 31%|███▏ | 18013/57600 [3:09:27<8:20:44, 1.32it/s] 31%|███▏ | 18014/57600 [3:09:28<7:57:43, 1.38it/s] 31%|███▏ | 18015/57600 [3:09:29<7:53:51, 1.39it/s] 31%|███▏ | 18016/57600 [3:09:29<7:37:21, 1.44it/s] 31%|███▏ | 18017/57600 [3:09:30<7:43:42, 1.42it/s] 31%|███▏ | 18018/57600 [3:09:31<7:34:06, 1.45it/s] 31%|███▏ | 18019/57600 [3:09:32<7:29:31, 1.47it/s] 31%|███▏ | 18020/57600 [3:09:32<7:18:42, 1.50it/s] {'loss': 1.2383, 'learning_rate': 7.205923344947735e-07, 'epoch': 198.9} + 31%|███▏ | 18020/57600 [3:09:32<7:18:42, 1.50it/s] 31%|███▏ | 18021/57600 [3:09:33<7:18:46, 1.50it/s] 31%|███▏ | 18022/57600 [3:09:33<7:22:52, 1.49it/s] 31%|███▏ | 18023/57600 [3:09:34<7:13:30, 1.52it/s] 31%|███▏ | 18024/57600 [3:09:35<7:21:33, 1.49it/s] 31%|███▏ | 18025/57600 [3:09:35<7:16:04, 1.51it/s] 31%|███▏ | 18026/57600 [3:09:36<7:17:04, 1.51it/s] 31%|███▏ | 18027/57600 [3:09:37<7:03:02, 1.56it/s] 31%|███▏ | 18028/57600 [3:09:37<7:01:17, 1.57it/s] 31%|███▏ | 18029/57600 [3:09:38<7:01:10, 1.57it/s]/opt/conda/lib/python3.10/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock. + self.pid = os.fork() + 31%|███▏ | 18030/57600 [3:09:39<7:22:51, 1.49it/s] 31%|███▏ | 18031/57600 [3:09:39<7:07:52, 1.54it/s] 31%|███▏ | 18032/57600 [3:09:40<7:03:41, 1.56it/s] 31%|███▏ | 18033/57600 [3:09:41<7:03:47, 1.56it/s] 31%|███▏ | 18034/57600 [3:09:41<7:04:25, 1.55it/s] 31%|███▏ | 18035/57600 [3:09:42<7:02:56, 1.56it/s] 31%|███▏ | 18036/57600 [3:09:43<7:10:01, 1.53it/s] 31%|███▏ | 18037/57600 [3:09:43<7:14:08, 1.52it/s] 31%|███▏ | 18038/57600 [3:09:44<7:32:11, 1.46it/s] 31%|███▏ | 18039/57600 [3:09:45<7:19:55, 1.50it/s] 31%|███▏ | 18040/57600 [3:09:45<7:23:48, 1.49it/s] {'loss': 1.2724, 'learning_rate': 7.202787456445993e-07, 'epoch': 199.12} + 31%|███▏ | 18040/57600 [3:09:45<7:23:48, 1.49it/s] 31%|███▏ | 18041/57600 [3:09:46<7:31:52, 1.46it/s] 31%|███▏ | 18042/57600 [3:09:47<7:23:39, 1.49it/s] 31%|███▏ | 18043/57600 [3:09:47<7:22:53, 1.49it/s] 31%|███▏ | 18044/57600 [3:09:48<7:09:41, 1.53it/s] 31%|███▏ | 18045/57600 [3:09:49<7:07:01, 1.54it/s] 31%|███▏ | 18046/57600 [3:09:49<7:03:55, 1.56it/s] 31%|███▏ | 18047/57600 [3:09:50<7:01:01, 1.57it/s] 31%|███▏ | 18048/57600 [3:09:50<6:48:23, 1.61it/s] 31%|███▏ | 18049/57600 [3:09:51<6:50:52, 1.60it/s] 31%|███▏ | 18050/57600 [3:09:52<6:34:12, 1.67it/s] 31%|███▏ | 18051/57600 [3:09:52<6:56:43, 1.58it/s] 31%|███▏ | 18052/57600 [3:09:53<6:56:17, 1.58it/s] 31%|███▏ | 18053/57600 [3:09:54<6:59:41, 1.57it/s] 31%|███▏ | 18054/57600 [3:09:54<7:00:36, 1.57it/s] 31%|███▏ | 18055/57600 [3:09:55<7:03:13, 1.56it/s] 31%|███▏ | 18056/57600 [3:09:55<6:57:36, 1.58it/s] 31%|███▏ | 18057/57600 [3:09:56<7:06:51, 1.54it/s] 31%|███▏ | 18058/57600 [3:09:57<7:11:51, 1.53it/s] 31%|███▏ | 18059/57600 [3:09:57<7:08:29, 1.54it/s] 31%|███▏ | 18060/57600 [3:09:58<7:07:10, 1.54it/s] {'loss': 1.2588, 'learning_rate': 7.199651567944251e-07, 'epoch': 199.34} + 31%|███▏ | 18060/57600 [3:09:58<7:07:10, 1.54it/s] 31%|███▏ | 18061/57600 [3:09:59<7:02:25, 1.56it/s] 31%|███▏ | 18062/57600 [3:09:59<7:02:43, 1.56it/s] 31%|███▏ | 18063/57600 [3:10:00<7:17:35, 1.51it/s] 31%|███▏ | 18064/57600 [3:10:01<7:10:06, 1.53it/s] 31%|███▏ | 18065/57600 [3:10:01<7:08:37, 1.54it/s] 31%|███▏ | 18066/57600 [3:10:02<7:05:13, 1.55it/s] 31%|███▏ | 18067/57600 [3:10:03<7:00:49, 1.57it/s] \ No newline at end of file