diff --git "a/run-2024-07-04T00:18:50+00:00.log" "b/run-2024-07-04T00:18:50+00:00.log" --- "a/run-2024-07-04T00:18:50+00:00.log" +++ "b/run-2024-07-04T00:18:50+00:00.log" @@ -1049,4 +1049,508 @@ Non-default generation parameters: {'max_length': 200, 'early_stopping': True, ' 31%|███ | 20008/65536 [3:26:04<13:34:02, 1.07s/it] 31%|███ | 20009/65536 [3:26:04<11:52:08, 1.07it/s] 31%|███ | 20010/65536 [3:26:05<10:39:27, 1.19it/s] 31%|███ | 20011/65536 [3:26:06<9:58:44, 1.27it/s] 31%|███ | 20012/65536 [3:26:06<9:28:21, 1.33it/s] 31%|███ | 20013/65536 [3:26:07<9:09:16, 1.38it/s] 31%|███ | 20014/65536 [3:26:08<8:48:20, 1.44it/s] 31%|███ | 20015/65536 [3:26:08<8:41:42, 1.45it/s] 31%|███ | 20016/65536 [3:26:09<8:37:20, 1.47it/s] 31%|███ | 20017/65536 [3:26:10<8:32:14, 1.48it/s] 31%|███ | 20018/65536 [3:26:10<8:21:03, 1.51it/s] 31%|███ | 20019/65536 [3:26:11<8:05:57, 1.56it/s] 31%|███ | 20020/65536 [3:26:11<8:01:26, 1.58it/s] {'loss': 3.1123, 'learning_rate': 7.269805314068814e-07, 'epoch': 1235.8} 31%|███ | 20020/65536 [3:26:11<8:01:26, 1.58it/s] 31%|███ | 20021/65536 [3:26:12<7:58:15, 1.59it/s] 31%|███ | 20022/65536 [3:26:13<7:57:30, 1.59it/s] 31%|███ | 20023/65536 [3:26:13<7:49:58, 1.61it/s] 31%|███ | 20024/65536 [3:26:14<8:10:13, 1.55it/s] 31%|███ | 20025/65536 [3:26:15<8:06:17, 1.56it/s] 31%|███ | 20026/65536 [3:26:15<8:04:43, 1.56it/s] 31%|███ | 20027/65536 [3:26:16<8:09:04, 1.55it/s] 31%|███ | 20028/65536 [3:26:17<8:05:34, 1.56it/s] 31%|███ | 20029/65536 [3:26:17<8:08:14, 1.55it/s] 31%|███ | 20030/65536 [3:26:18<8:16:05, 1.53it/s] 31%|███ | 20031/65536 [3:26:18<8:14:32, 1.53it/s] 31%|███ | 20032/65536 [3:26:19<8:17:26, 1.52it/s] 31%|███ | 20033/65536 [3:26:20<8:03:42, 1.57it/s] 31%|███ | 20034/65536 [3:26:20<8:09:50, 1.55it/s] 31%|███ | 20035/65536 [3:26:21<8:22:35, 1.51it/s] 31%|███ | 20036/65536 [3:26:22<8:18:50, 1.52it/s] 31%|███ | 20037/65536 [3:26:22<8:14:18, 1.53it/s] 31%|███ | 20038/65536 [3:26:23<8:09:25, 1.55it/s] 31%|███ | 20039/65536 [3:26:24<8:13:28, 1.54it/s] 31%|███ | 20040/65536 [3:26:24<8:45:33, 1.44it/s] {'loss': 3.113, 'learning_rate': 7.267050324476552e-07, 'epoch': 1237.04} 31%|███ | 20040/65536 [3:26:24<8:45:33, 1.44it/s] 31%|███ | 20041/65536 [3:26:25<8:38:12, 1.46it/s] 31%|███ | 20042/65536 [3:26:26<8:41:41, 1.45it/s] 31%|███ | 20043/65536 [3:26:27<8:35:21, 1.47it/s] 31%|███ | 20044/65536 [3:26:27<8:37:01, 1.47it/s] 31%|███ | 20045/65536 [3:26:28<8:24:02, 1.50it/s] 31%|███ | 20046/65536 [3:26:28<8:12:01, 1.54it/s] 31%|███ | 20047/65536 [3:26:29<7:55:00, 1.60it/s] 31%|███ | 20048/65536 [3:26:30<8:11:12, 1.54it/s] 31%|███ | 20049/65536 [3:26:30<8:08:27, 1.55it/s] 31%|███ | 20050/65536 [3:26:31<8:12:15, 1.54it/s] 31%|███ | 20051/65536 [3:26:32<8:13:13, 1.54it/s] 31%|███ | 20052/65536 [3:26:32<8:12:05, 1.54it/s] 31%|███ | 20053/65536 [3:26:33<8:22:16, 1.51it/s] 31%|███ | 20054/65536 [3:26:34<8:17:22, 1.52it/s] 31%|███ | 20055/65536 [3:26:34<8:18:32, 1.52it/s] 31%|███ | 20056/65536 [3:26:35<8:28:00, 1.49it/s] 31%|███ | 20057/65536 [3:26:36<8:21:35, 1.51it/s] 31%|███ | 20058/65536 [3:26:36<8:06:48, 1.56it/s] 31%|███ | 20059/65536 [3:26:37<7:58:16, 1.58it/s] 31%|███ | 20060/65536 [3:26:37<7:56:50, 1.59it/s] {'loss': 3.0655, 'learning_rate': 7.26429533488429e-07, 'epoch': 1238.27} - 31%|███ | 20060/65536 [3:26:37<7:56:50, 1.59it/s] 31%|███ | 20061/65536 [3:26:38<7:59:27, 1.58it/s] 31%|███ | 20062/65536 [3:26:39<8:04:22, 1.56it/s] 31%|███ | 20063/65536 [3:26:39<8:06:37, 1.56it/s] 31%|███ | 20064/65536 [3:26:40<8:00:34, 1.58it/s] \ No newline at end of file + 31%|███ | 20060/65536 [3:26:37<7:56:50, 1.59it/s] 31%|███ | 20061/65536 [3:26:38<7:59:27, 1.58it/s] 31%|███ | 20062/65536 [3:26:39<8:04:22, 1.56it/s] 31%|███ | 20063/65536 [3:26:39<8:06:37, 1.56it/s] 31%|███ | 20064/65536 [3:26:40<8:00:34, 1.58it/s] 31%|███ | 20065/65536 [3:26:41<7:57:42, 1.59it/s] 31%|███ | 20066/65536 [3:26:41<8:11:10, 1.54it/s] 31%|███ | 20067/65536 [3:26:42<7:58:04, 1.59it/s] 31%|███ | 20068/65536 [3:26:43<7:48:08, 1.62it/s] 31%|███ | 20069/65536 [3:26:43<7:55:21, 1.59it/s] 31%|███ | 20070/65536 [3:26:44<7:51:52, 1.61it/s] 31%|███ | 20071/65536 [3:26:44<8:02:18, 1.57it/s] 31%|███ | 20072/65536 [3:26:45<8:18:14, 1.52it/s] 31%|███ | 20073/65536 [3:26:46<8:21:50, 1.51it/s] 31%|███ | 20074/65536 [3:26:46<8:05:06, 1.56it/s] 31%|███ | 20075/65536 [3:26:47<7:56:16, 1.59it/s] 31%|███ | 20076/65536 [3:26:48<7:48:46, 1.62it/s] 31%|███ | 20077/65536 [3:26:48<7:45:45, 1.63it/s] 31%|███ | 20078/65536 [3:26:49<7:45:57, 1.63it/s] 31%|███ | 20079/65536 [3:26:49<7:49:00, 1.62it/s] 31%|███ | 20080/65536 [3:26:50<7:58:26, 1.58it/s] {'loss': 3.1083, 'learning_rate': 7.261540345292028e-07, 'epoch': 1239.51} + 31%|███ | 20080/65536 [3:26:50<7:58:26, 1.58it/s] 31%|███ | 20081/65536 [3:26:51<8:01:33, 1.57it/s] 31%|███ | 20082/65536 [3:26:51<7:56:08, 1.59it/s] 31%|███ | 20083/65536 [3:26:52<7:54:55, 1.60it/s] 31%|███ | 20084/65536 [3:26:53<7:50:45, 1.61it/s] 31%|███ | 20085/65536 [3:26:53<7:48:15, 1.62it/s] 31%|███ | 20086/65536 [3:26:54<8:08:15, 1.55it/s] 31%|███ | 20087/65536 [3:26:55<8:03:23, 1.57it/s] 31%|███ | 20088/65536 [3:26:55<8:03:59, 1.57it/s] 31%|███ | 20089/65536 [3:26:56<8:29:21, 1.49it/s] 31%|███ | 20090/65536 [3:26:57<8:31:47, 1.48it/s] 31%|███ | 20091/65536 [3:26:57<8:25:04, 1.50it/s] 31%|███ | 20092/65536 [3:26:58<8:28:40, 1.49it/s] 31%|███ | 20093/65536 [3:26:59<8:16:58, 1.52it/s] 31%|███ | 20094/65536 [3:26:59<8:16:59, 1.52it/s] 31%|███ | 20095/65536 [3:27:00<8:11:21, 1.54it/s] 31%|███ | 20096/65536 [3:27:00<8:02:26, 1.57it/s] 31%|███ | 20097/65536 [3:27:01<7:58:56, 1.58it/s] 31%|███ | 20098/65536 [3:27:02<7:54:23, 1.60it/s] 31%|███ | 20099/65536 [3:27:02<7:48:57, 1.61it/s] 31%|███ | 20100/65536 [3:27:03<8:01:26, 1.57it/s] {'loss': 3.0929, 'learning_rate': 7.258785355699767e-07, 'epoch': 1240.74} + 31%|███ | 20100/65536 [3:27:03<8:01:26, 1.57it/s] 31%|███ | 20101/65536 [3:27:04<8:03:00, 1.57it/s] 31%|███ | 20102/65536 [3:27:04<8:19:40, 1.52it/s] 31%|███ | 20103/65536 [3:27:05<8:17:16, 1.52it/s] 31%|███ | 20104/65536 [3:27:06<8:17:55, 1.52it/s] 31%|███ | 20105/65536 [3:27:06<8:34:42, 1.47it/s] 31%|███ | 20106/65536 [3:27:07<8:38:36, 1.46it/s] 31%|███ | 20107/65536 [3:27:08<8:35:20, 1.47it/s] 31%|███ | 20108/65536 [3:27:08<8:27:59, 1.49it/s] 31%|███ | 20109/65536 [3:27:09<8:09:56, 1.55it/s] 31%|███ | 20110/65536 [3:27:10<8:13:08, 1.54it/s] 31%|███ | 20111/65536 [3:27:10<8:10:42, 1.54it/s] 31%|███ | 20112/65536 [3:27:11<8:13:24, 1.53it/s] 31%|███ | 20113/65536 [3:27:12<7:59:54, 1.58it/s] 31%|███ | 20114/65536 [3:27:12<7:51:27, 1.61it/s] 31%|███ | 20115/65536 [3:27:13<7:47:43, 1.62it/s] 31%|███ | 20116/65536 [3:27:13<7:45:44, 1.63it/s] 31%|███ | 20117/65536 [3:27:14<7:42:45, 1.64it/s] 31%|███ | 20118/65536 [3:27:15<8:03:36, 1.57it/s] 31%|███ | 20119/65536 [3:27:15<8:26:31, 1.49it/s] 31%|███ | 20120/65536 [3:27:16<8:35:58, 1.47it/s] {'loss': 3.0803, 'learning_rate': 7.256030366107506e-07, 'epoch': 1241.98} + 31%|███ | 20120/65536 [3:27:16<8:35:58, 1.47it/s] 31%|███ | 20121/65536 [3:27:17<8:52:55, 1.42it/s] 31%|███ | 20122/65536 [3:27:18<8:42:23, 1.45it/s] 31%|███ | 20123/65536 [3:27:18<8:37:35, 1.46it/s] 31%|███ | 20124/65536 [3:27:19<8:32:45, 1.48it/s] 31%|███ | 20125/65536 [3:27:19<8:21:10, 1.51it/s] 31%|███ | 20126/65536 [3:27:20<8:03:28, 1.57it/s] 31%|███ | 20127/65536 [3:27:21<7:59:14, 1.58it/s] 31%|███ | 20128/65536 [3:27:21<7:50:45, 1.61it/s] 31%|███ | 20129/65536 [3:27:22<7:51:00, 1.61it/s] 31%|███ | 20130/65536 [3:27:23<7:46:14, 1.62it/s] 31%|███ | 20131/65536 [3:27:23<7:56:35, 1.59it/s] 31%|███ | 20132/65536 [3:27:24<8:05:22, 1.56it/s] 31%|███ | 20133/65536 [3:27:24<8:02:06, 1.57it/s] 31%|███ | 20134/65536 [3:27:25<8:04:46, 1.56it/s] 31%|███ | 20135/65536 [3:27:26<8:09:07, 1.55it/s] 31%|███ | 20136/65536 [3:27:26<8:06:50, 1.55it/s] 31%|███ | 20137/65536 [3:27:27<8:19:09, 1.52it/s] 31%|███ | 20138/65536 [3:27:28<8:11:05, 1.54it/s] 31%|███ | 20139/65536 [3:27:28<8:18:56, 1.52it/s] 31%|███ | 20140/65536 [3:27:29<8:13:55, 1.53it/s] {'loss': 3.1016, 'learning_rate': 7.253275376515244e-07, 'epoch': 1243.21} + 31%|███ | 20140/65536 [3:27:29<8:13:55, 1.53it/s] 31%|███ | 20141/65536 [3:27:30<8:05:09, 1.56it/s] 31%|███ | 20142/65536 [3:27:30<8:17:17, 1.52it/s] 31%|███ | 20143/65536 [3:27:31<8:18:38, 1.52it/s] 31%|███ | 20144/65536 [3:27:32<8:22:24, 1.51it/s] 31%|███ | 20145/65536 [3:27:32<8:09:04, 1.55it/s] 31%|███ | 20146/65536 [3:27:33<8:14:57, 1.53it/s] 31%|███ | 20147/65536 [3:27:34<8:10:43, 1.54it/s] 31%|███ | 20148/65536 [3:27:34<7:59:07, 1.58it/s] 31%|███ | 20149/65536 [3:27:35<7:57:43, 1.58it/s] 31%|███ | 20150/65536 [3:27:35<7:51:54, 1.60it/s] 31%|███ | 20151/65536 [3:27:36<7:55:27, 1.59it/s] 31%|███ | 20152/65536 [3:27:37<8:07:29, 1.55it/s] 31%|███ | 20153/65536 [3:27:38<8:38:49, 1.46it/s] 31%|███ | 20154/65536 [3:27:38<8:35:58, 1.47it/s] 31%|███ | 20155/65536 [3:27:39<8:28:59, 1.49it/s] 31%|███ | 20156/65536 [3:27:40<8:23:03, 1.50it/s] 31%|███ | 20157/65536 [3:27:40<8:16:55, 1.52it/s] 31%|███ | 20158/65536 [3:27:41<8:08:21, 1.55it/s] 31%|███ | 20159/65536 [3:27:41<8:05:31, 1.56it/s] 31%|███ | 20160/65536 [3:27:42<8:04:16, 1.56it/s] {'loss': 3.0868, 'learning_rate': 7.250520386922983e-07, 'epoch': 1244.44} + 31%|███ | 20160/65536 [3:27:42<8:04:16, 1.56it/s] 31%|███ | 20161/65536 [3:27:43<8:17:47, 1.52it/s] 31%|███ | 20162/65536 [3:27:43<8:14:14, 1.53it/s] 31%|███ | 20163/65536 [3:27:44<8:16:25, 1.52it/s] 31%|███ | 20164/65536 [3:27:45<8:07:25, 1.55it/s] 31%|███ | 20165/65536 [3:27:45<8:03:02, 1.57it/s] 31%|███ | 20166/65536 [3:27:46<8:14:43, 1.53it/s] 31%|███ | 20167/65536 [3:27:47<8:11:56, 1.54it/s] 31%|███ | 20168/65536 [3:27:47<8:27:39, 1.49it/s] 31%|███ | 20169/65536 [3:27:48<8:19:07, 1.51it/s] 31%|███ | 20170/65536 [3:27:49<8:34:24, 1.47it/s] 31%|███ | 20171/65536 [3:27:49<8:28:50, 1.49it/s] 31%|███ | 20172/65536 [3:27:50<8:38:25, 1.46it/s] 31%|███ | 20173/65536 [3:27:51<8:19:14, 1.51it/s] 31%|███ | 20174/65536 [3:27:51<8:24:44, 1.50it/s] 31%|███ | 20175/65536 [3:27:52<8:10:31, 1.54it/s] 31%|███ | 20176/65536 [3:27:53<8:16:12, 1.52it/s] 31%|███ | 20177/65536 [3:27:53<7:59:55, 1.58it/s] 31%|███ | 20178/65536 [3:27:54<8:04:41, 1.56it/s] 31%|███ | 20179/65536 [3:27:55<8:01:47, 1.57it/s] 31%|███ | 20180/65536 [3:27:55<8:04:04, 1.56it/s] {'loss': 3.053, 'learning_rate': 7.247765397330721e-07, 'epoch': 1245.68} + 31%|███ | 20180/65536 [3:27:55<8:04:04, 1.56it/s] 31%|███ | 20181/65536 [3:27:56<8:07:04, 1.55it/s] 31%|███ | 20182/65536 [3:27:56<8:12:12, 1.54it/s] 31%|███ | 20183/65536 [3:27:57<8:05:44, 1.56it/s] 31%|███ | 20184/65536 [3:27:58<8:08:46, 1.55it/s] 31%|███ | 20185/65536 [3:27:58<8:12:00, 1.54it/s] 31%|███ | 20186/65536 [3:27:59<8:37:13, 1.46it/s] 31%|███ | 20187/65536 [3:28:00<8:52:22, 1.42it/s] 31%|███ | 20188/65536 [3:28:01<8:39:36, 1.45it/s] 31%|███ | 20189/65536 [3:28:01<8:22:10, 1.51it/s] 31%|███ | 20190/65536 [3:28:02<8:07:49, 1.55it/s] 31%|███ | 20191/65536 [3:28:02<8:12:33, 1.53it/s] 31%|███ | 20192/65536 [3:28:03<8:15:54, 1.52it/s] 31%|███ | 20193/65536 [3:28:04<8:05:33, 1.56it/s] 31%|███ | 20194/65536 [3:28:04<8:01:13, 1.57it/s] 31%|███ | 20195/65536 [3:28:05<8:02:43, 1.57it/s] 31%|███ | 20196/65536 [3:28:06<8:18:46, 1.52it/s] 31%|███ | 20197/65536 [3:28:06<8:04:37, 1.56it/s] 31%|███ | 20198/65536 [3:28:07<8:00:21, 1.57it/s] 31%|███ | 20199/65536 [3:28:08<8:01:52, 1.57it/s] 31%|███ | 20200/65536 [3:28:08<8:01:38, 1.57it/s] {'loss': 3.0749, 'learning_rate': 7.24501040773846e-07, 'epoch': 1246.91} + 31%|███ | 20200/65536 [3:28:08<8:01:38, 1.57it/s] 31%|███ | 20201/65536 [3:28:09<8:02:57, 1.56it/s] 31%|███ | 20202/65536 [3:28:10<8:26:48, 1.49it/s] 31%|███ | 20203/65536 [3:28:10<8:26:24, 1.49it/s] 31%|███ | 20204/65536 [3:28:11<8:10:32, 1.54it/s] 31%|███ | 20205/65536 [3:28:11<7:56:49, 1.58it/s] 31%|███ | 20206/65536 [3:28:12<7:59:43, 1.57it/s] 31%|███ | 20207/65536 [3:28:13<7:46:57, 1.62it/s] 31%|███ | 20208/65536 [3:28:13<7:45:46, 1.62it/s] 31%|███ | 20209/65536 [3:28:14<7:44:50, 1.63it/s] 31%|███ | 20210/65536 [3:28:15<7:47:35, 1.62it/s] 31%|███ | 20211/65536 [3:28:15<7:42:54, 1.63it/s] 31%|███ | 20212/65536 [3:28:16<7:43:05, 1.63it/s] 31%|███ | 20213/65536 [3:28:16<7:42:27, 1.63it/s] 31%|███ | 20214/65536 [3:28:17<7:38:06, 1.65it/s] 31%|███ | 20215/65536 [3:28:18<7:55:30, 1.59it/s] 31%|███ | 20216/65536 [3:28:18<7:56:49, 1.58it/s] 31%|███ | 20217/65536 [3:28:19<7:58:46, 1.58it/s] 31%|███ | 20218/65536 [3:28:20<8:12:42, 1.53it/s] 31%|███ | 20219/65536 [3:28:20<8:00:27, 1.57it/s] 31%|███ | 20220/65536 [3:28:21<7:59:10, 1.58it/s] {'loss': 3.125, 'learning_rate': 7.242255418146198e-07, 'epoch': 1248.15} + 31%|███ | 20220/65536 [3:28:21<7:59:10, 1.58it/s] 31%|███ | 20221/65536 [3:28:21<7:53:38, 1.59it/s] 31%|███ | 20222/65536 [3:28:22<7:53:07, 1.60it/s] 31%|███ | 20223/65536 [3:28:23<7:50:56, 1.60it/s] 31%|███ | 20224/65536 [3:28:23<7:48:09, 1.61it/s] 31%|███ | 20225/65536 [3:28:24<7:42:42, 1.63it/s] 31%|███ | 20226/65536 [3:28:25<7:43:45, 1.63it/s] 31%|███ | 20227/65536 [3:28:25<7:40:26, 1.64it/s] 31%|███ | 20228/65536 [3:28:26<7:47:59, 1.61it/s] 31%|███ | 20229/65536 [3:28:26<7:46:05, 1.62it/s] 31%|███ | 20230/65536 [3:28:27<7:52:58, 1.60it/s] 31%|███ | 20231/65536 [3:28:28<7:48:38, 1.61it/s] 31%|███ | 20232/65536 [3:28:28<7:41:19, 1.64it/s] 31%|███ | 20233/65536 [3:28:29<7:36:52, 1.65it/s] 31%|███ | 20234/65536 [3:28:30<8:01:48, 1.57it/s] 31%|███ | 20235/65536 [3:28:30<7:59:04, 1.58it/s] 31%|███ | 20236/65536 [3:28:31<8:14:54, 1.53it/s] 31%|███ | 20237/65536 [3:28:31<8:05:04, 1.56it/s] 31%|███ | 20238/65536 [3:28:32<7:49:42, 1.61it/s] 31%|███ | 20239/65536 [3:28:33<7:47:39, 1.61it/s] 31%|███ | 20240/65536 [3:28:33<7:42:18, 1.63it/s] {'loss': 3.1073, 'learning_rate': 7.239500428553937e-07, 'epoch': 1249.38} + 31%|███ | 20240/65536 [3:28:33<7:42:18, 1.63it/s] 31%|███ | 20241/65536 [3:28:34<7:50:38, 1.60it/s] 31%|███ | 20242/65536 [3:28:35<7:50:02, 1.61it/s] 31%|███ | 20243/65536 [3:28:35<8:03:50, 1.56it/s] 31%|███ | 20244/65536 [3:28:36<7:51:35, 1.60it/s] 31%|███ | 20245/65536 [3:28:36<7:54:15, 1.59it/s] 31%|███ | 20246/65536 [3:28:37<7:49:27, 1.61it/s] 31%|███ | 20247/65536 [3:28:38<7:45:00, 1.62it/s] 31%|███ | 20248/65536 [3:28:38<7:47:43, 1.61it/s] 31%|███ | 20249/65536 [3:28:39<7:40:37, 1.64it/s] 31%|███ | 20250/65536 [3:28:39<7:30:53, 1.67it/s] 31%|███ | 20251/65536 [3:28:40<8:16:39, 1.52it/s] 31%|███ | 20252/65536 [3:28:41<8:12:31, 1.53it/s] 31%|███ | 20253/65536 [3:28:41<8:03:49, 1.56it/s] 31%|███ | 20254/65536 [3:28:42<8:09:40, 1.54it/s] 31%|███ | 20255/65536 [3:28:43<7:59:38, 1.57it/s] 31%|███ | 20256/65536 [3:28:43<7:50:11, 1.61it/s] 31%|███ | 20257/65536 [3:28:44<7:48:42, 1.61it/s] 31%|███ | 20258/65536 [3:28:45<7:48:26, 1.61it/s] 31%|███ | 20259/65536 [3:28:45<7:47:51, 1.61it/s] 31%|███ | 20260/65536 [3:28:46<7:47:56, 1.61it/s] {'loss': 3.0613, 'learning_rate': 7.236745438961675e-07, 'epoch': 1250.62} + 31%|███ | 20260/65536 [3:28:46<7:47:56, 1.61it/s] 31%|███ | 20261/65536 [3:28:46<8:03:44, 1.56it/s] 31%|███ | 20262/65536 [3:28:47<8:02:57, 1.56it/s] 31%|███ | 20263/65536 [3:28:48<7:57:20, 1.58it/s] 31%|███ | 20264/65536 [3:28:48<7:53:22, 1.59it/s] 31%|███ | 20265/65536 [3:28:49<7:45:24, 1.62it/s] 31%|███ | 20266/65536 [3:28:50<7:33:10, 1.66it/s] 31%|███ | 20267/65536 [3:28:50<7:53:10, 1.59it/s] 31%|███ | 20268/65536 [3:28:51<7:48:58, 1.61it/s] 31%|███ | 20269/65536 [3:28:51<7:33:45, 1.66it/s] 31%|███ | 20270/65536 [3:28:52<7:40:51, 1.64it/s] 31%|███ | 20271/65536 [3:28:53<7:45:27, 1.62it/s] 31%|███ | 20272/65536 [3:28:53<7:47:08, 1.61it/s] 31%|███ | 20273/65536 [3:28:54<7:49:00, 1.61it/s] 31%|███ | 20274/65536 [3:28:54<7:36:59, 1.65it/s] 31%|███ | 20275/65536 [3:28:55<7:45:15, 1.62it/s] 31%|███ | 20276/65536 [3:28:56<7:44:42, 1.62it/s] 31%|███ | 20277/65536 [3:28:56<7:46:51, 1.62it/s] 31%|███ | 20278/65536 [3:28:57<7:55:46, 1.59it/s] 31%|███ | 20279/65536 [3:28:58<8:17:52, 1.51it/s] 31%|███ | 20280/65536 [3:28:58<8:02:11, 1.56it/s] {'loss': 3.0474, 'learning_rate': 7.233990449369414e-07, 'epoch': 1251.85} + 31%|███ | 20280/65536 [3:28:58<8:02:11, 1.56it/s] 31%|███ | 20281/65536 [3:28:59<7:59:28, 1.57it/s] 31%|███ | 20282/65536 [3:29:00<7:48:27, 1.61it/s] 31%|███ | 20283/65536 [3:29:00<8:05:45, 1.55it/s] 31%|███ | 20284/65536 [3:29:01<8:15:00, 1.52it/s] 31%|███ | 20285/65536 [3:29:02<8:06:29, 1.55it/s] 31%|███ | 20286/65536 [3:29:02<8:02:35, 1.56it/s] 31%|███ | 20287/65536 [3:29:03<8:09:20, 1.54it/s] 31%|███ | 20288/65536 [3:29:03<7:57:27, 1.58it/s] 31%|███ | 20289/65536 [3:29:04<7:52:44, 1.60it/s] 31%|███ | 20290/65536 [3:29:05<7:54:32, 1.59it/s] 31%|███ | 20291/65536 [3:29:05<7:52:25, 1.60it/s] 31%|███ | 20292/65536 [3:29:06<7:56:04, 1.58it/s] 31%|███ | 20293/65536 [3:29:07<7:50:00, 1.60it/s] 31%|███ | 20294/65536 [3:29:07<7:39:30, 1.64it/s] 31%|███ | 20295/65536 [3:29:08<7:38:14, 1.65it/s] 31%|███ | 20296/65536 [3:29:08<7:38:32, 1.64it/s] 31%|███ | 20297/65536 [3:29:09<7:30:15, 1.67it/s] 31%|███ | 20298/65536 [3:29:10<7:35:47, 1.65it/s] 31%|███ | 20299/65536 [3:29:10<7:54:42, 1.59it/s] 31%|███ | 20300/65536 [3:29:11<7:49:08, 1.61it/s] {'loss': 3.0647, 'learning_rate': 7.231235459777152e-07, 'epoch': 1253.09} + 31%|███ | 20300/65536 [3:29:11<7:49:08, 1.61it/s] 31%|███ | 20301/65536 [3:29:11<7:42:40, 1.63it/s] 31%|███ | 20302/65536 [3:29:12<7:43:15, 1.63it/s] 31%|███ | 20303/65536 [3:29:13<7:53:24, 1.59it/s] 31%|███ | 20304/65536 [3:29:13<7:54:32, 1.59it/s] 31%|███ | 20305/65536 [3:29:14<8:11:23, 1.53it/s] 31%|███ | 20306/65536 [3:29:15<8:05:40, 1.55it/s] 31%|███ | 20307/65536 [3:29:15<8:00:23, 1.57it/s] 31%|███ | 20308/65536 [3:29:16<7:53:55, 1.59it/s] 31%|███ | 20309/65536 [3:29:16<7:49:31, 1.61it/s] 31%|███ | 20310/65536 [3:29:17<7:51:17, 1.60it/s] 31%|███ | 20311/65536 [3:29:18<7:49:29, 1.61it/s] 31%|███ | 20312/65536 [3:29:18<7:40:16, 1.64it/s] 31%|███ | 20313/65536 [3:29:19<7:43:32, 1.63it/s] 31%|███ | 20314/65536 [3:29:20<7:42:19, 1.63it/s] 31%|███ | 20315/65536 [3:29:20<7:58:49, 1.57it/s] 31%|███ | 20316/65536 [3:29:21<7:54:48, 1.59it/s] 31%|███ | 20317/65536 [3:29:21<7:47:01, 1.61it/s] 31%|███ | 20318/65536 [3:29:22<7:40:32, 1.64it/s] 31%|███ | 20319/65536 [3:29:23<7:57:35, 1.58it/s] 31%|███ | 20320/65536 [3:29:23<7:46:35, 1.62it/s] {'loss': 3.0641, 'learning_rate': 7.228480470184889e-07, 'epoch': 1254.32} + 31%|███ | 20320/65536 [3:29:23<7:46:35, 1.62it/s] 31%|███ | 20321/65536 [3:29:24<7:39:58, 1.64it/s] 31%|███ | 20322/65536 [3:29:24<7:32:16, 1.67it/s] 31%|███ | 20323/65536 [3:29:25<7:39:17, 1.64it/s] 31%|███ | 20324/65536 [3:29:26<7:50:34, 1.60it/s] 31%|███ | 20325/65536 [3:29:26<8:00:45, 1.57it/s] 31%|███ | 20326/65536 [3:29:27<7:57:07, 1.58it/s] 31%|███ | 20327/65536 [3:29:28<7:48:27, 1.61it/s] 31%|███ | 20328/65536 [3:29:28<7:44:56, 1.62it/s] 31%|███ | 20329/65536 [3:29:29<7:50:37, 1.60it/s] 31%|███ | 20330/65536 [3:29:30<7:50:53, 1.60it/s] 31%|███ | 20331/65536 [3:29:30<7:47:17, 1.61it/s] 31%|███ | 20332/65536 [3:29:31<7:55:35, 1.58it/s] 31%|███ | 20333/65536 [3:29:31<7:56:29, 1.58it/s] 31%|███ | 20334/65536 [3:29:32<7:37:09, 1.65it/s] 31%|███ | 20335/65536 [3:29:33<7:44:06, 1.62it/s] 31%|███ | 20336/65536 [3:29:33<7:49:05, 1.61it/s] 31%|███ | 20337/65536 [3:29:34<7:46:43, 1.61it/s] 31%|███ | 20338/65536 [3:29:34<7:44:47, 1.62it/s] 31%|███ | 20339/65536 [3:29:35<7:58:41, 1.57it/s] 31%|███ | 20340/65536 [3:29:36<7:49:49, 1.60it/s] {'loss': 3.0684, 'learning_rate': 7.225725480592628e-07, 'epoch': 1255.56} + 31%|███ | 20340/65536 [3:29:36<7:49:49, 1.60it/s] 31%|███ | 20341/65536 [3:29:36<7:46:01, 1.62it/s] 31%|███ | 20342/65536 [3:29:37<7:56:29, 1.58it/s] 31%|███ | 20343/65536 [3:29:38<7:50:41, 1.60it/s] 31%|███ | 20344/65536 [3:29:38<8:02:53, 1.56it/s] 31%|███ | 20345/65536 [3:29:39<7:46:25, 1.61it/s] 31%|███ | 20346/65536 [3:29:39<7:45:31, 1.62it/s] 31%|███ | 20347/65536 [3:29:40<7:38:29, 1.64it/s] 31%|███ | 20348/65536 [3:29:41<7:56:51, 1.58it/s] 31%|███ | 20349/65536 [3:29:41<7:43:44, 1.62it/s] 31%|███ | 20350/65536 [3:29:42<7:39:02, 1.64it/s] 31%|███ | 20351/65536 [3:29:43<7:44:14, 1.62it/s] 31%|███ | 20352/65536 [3:29:43<7:53:31, 1.59it/s] 31%|███ | 20353/65536 [3:29:44<7:56:56, 1.58it/s] 31%|███ | 20354/65536 [3:29:44<7:49:16, 1.60it/s] 31%|███ | 20355/65536 [3:29:45<7:49:29, 1.60it/s] 31%|███ | 20356/65536 [3:29:46<7:55:30, 1.58it/s] 31%|███ | 20357/65536 [3:29:46<7:47:05, 1.61it/s] 31%|███ | 20358/65536 [3:29:47<7:54:47, 1.59it/s] 31%|███ | 20359/65536 [3:29:48<8:11:21, 1.53it/s] 31%|███ | 20360/65536 [3:29:48<8:00:06, 1.57it/s] {'loss': 3.0527, 'learning_rate': 7.222970491000366e-07, 'epoch': 1256.79} + 31%|███ | 20360/65536 [3:29:48<8:00:06, 1.57it/s] 31%|███ | 20361/65536 [3:29:49<7:49:27, 1.60it/s] 31%|███ | 20362/65536 [3:29:50<7:47:55, 1.61it/s] 31%|███ | 20363/65536 [3:29:50<7:44:14, 1.62it/s] 31%|███ | 20364/65536 [3:29:51<7:56:54, 1.58it/s] 31%|███ | 20365/65536 [3:29:51<7:49:22, 1.60it/s] 31%|███ | 20366/65536 [3:29:52<7:54:25, 1.59it/s] 31%|███ | 20367/65536 [3:29:53<8:13:31, 1.53it/s] 31%|███ | 20368/65536 [3:29:53<8:07:47, 1.54it/s] 31%|███ | 20369/65536 [3:29:54<8:05:07, 1.55it/s] 31%|███ | 20370/65536 [3:29:55<7:53:32, 1.59it/s] 31%|███ | 20371/65536 [3:29:55<7:42:39, 1.63it/s] 31%|███ | 20372/65536 [3:29:56<7:33:16, 1.66it/s] 31%|███ | 20373/65536 [3:29:56<7:22:19, 1.70it/s] 31%|███ | 20374/65536 [3:29:57<7:27:31, 1.68it/s] 31%|███ | 20375/65536 [3:29:58<7:36:55, 1.65it/s] 31%|███ | 20376/65536 [3:29:58<7:40:02, 1.64it/s] 31%|███ | 20377/65536 [3:29:59<7:39:06, 1.64it/s] 31%|███ | 20378/65536 [3:29:59<7:31:18, 1.67it/s] 31%|███ | 20379/65536 [3:30:00<7:30:22, 1.67it/s] 31%|███ | 20380/65536 [3:30:01<7:47:24, 1.61it/s] {'loss': 3.1258, 'learning_rate': 7.220215501408105e-07, 'epoch': 1258.02} + 31%|███ | 20380/65536 [3:30:01<7:47:24, 1.61it/s] 31%|███ | 20381/65536 [3:30:01<7:49:39, 1.60it/s] 31%|███ | 20382/65536 [3:30:02<7:44:40, 1.62it/s] 31%|███ | 20383/65536 [3:30:02<7:46:34, 1.61it/s] 31%|███ | 20384/65536 [3:30:03<7:43:34, 1.62it/s] 31%|███ | 20385/65536 [3:30:04<8:09:26, 1.54it/s] 31%|███ | 20386/65536 [3:30:04<7:57:03, 1.58it/s] 31%|███ | 20387/65536 [3:30:05<7:46:00, 1.61it/s] 31%|███ | 20388/65536 [3:30:06<7:43:04, 1.62it/s] 31%|███ | 20389/65536 [3:30:06<7:35:01, 1.65it/s] 31%|███ | 20390/65536 [3:30:07<7:33:14, 1.66it/s] 31%|███ | 20391/65536 [3:30:07<7:40:10, 1.64it/s] 31%|███ | 20392/65536 [3:30:08<7:42:19, 1.63it/s] 31%|███ | 20393/65536 [3:30:09<7:41:48, 1.63it/s] 31%|███ | 20394/65536 [3:30:09<7:34:35, 1.66it/s] 31%|███ | 20395/65536 [3:30:10<7:32:50, 1.66it/s] 31%|███ | 20396/65536 [3:30:11<7:49:43, 1.60it/s] 31%|███ | 20397/65536 [3:30:11<7:31:11, 1.67it/s] 31%|███ | 20398/65536 [3:30:12<7:34:16, 1.66it/s] 31%|███ | 20399/65536 [3:30:12<7:23:42, 1.70it/s] 31%|███ | 20400/65536 [3:30:13<7:38:53, 1.64it/s] {'loss': 3.0887, 'learning_rate': 7.217460511815844e-07, 'epoch': 1259.26} + 31%|███ | 20400/65536 [3:30:13<7:38:53, 1.64it/s] 31%|███ | 20401/65536 [3:30:13<7:37:13, 1.65it/s] 31%|███ | 20402/65536 [3:30:14<7:27:53, 1.68it/s] 31%|███ | 20403/65536 [3:30:15<7:30:48, 1.67it/s] 31%|███ | 20404/65536 [3:30:15<7:30:39, 1.67it/s] 31%|███ | 20405/65536 [3:30:16<7:42:31, 1.63it/s] 31%|███ | 20406/65536 [3:30:17<7:39:16, 1.64it/s] 31%|███ | 20407/65536 [3:30:17<7:34:29, 1.65it/s] 31%|███ | 20408/65536 [3:30:18<7:43:19, 1.62it/s] 31%|███ | 20409/65536 [3:30:18<7:43:31, 1.62it/s] 31%|███ | 20410/65536 [3:30:19<7:45:16, 1.62it/s] 31%|███ | 20411/65536 [3:30:20<7:50:27, 1.60it/s] 31%|███ | 20412/65536 [3:30:20<7:40:43, 1.63it/s] 31%|███ | 20413/65536 [3:30:21<7:59:16, 1.57it/s] 31%|███ | 20414/65536 [3:30:22<7:51:44, 1.59it/s] 31%|███ | 20415/65536 [3:30:22<7:39:18, 1.64it/s] 31%|███ | 20416/65536 [3:30:23<7:31:12, 1.67it/s] 31%|███ | 20417/65536 [3:30:23<7:25:03, 1.69it/s] 31%|███ | 20418/65536 [3:30:24<7:36:19, 1.65it/s] 31%|███ | 20419/65536 [3:30:25<7:46:20, 1.61it/s] 31%|███ | 20420/65536 [3:30:25<7:39:09, 1.64it/s] {'loss': 3.1021, 'learning_rate': 7.214705522223582e-07, 'epoch': 1260.49} + 31%|███ | 20420/65536 [3:30:25<7:39:09, 1.64it/s] 31%|███ | 20421/65536 [3:30:26<7:34:43, 1.65it/s] 31%|███ | 20422/65536 [3:30:26<7:29:02, 1.67it/s] 31%|███ | 20423/65536 [3:30:27<7:40:59, 1.63it/s] 31%|███ | 20424/65536 [3:30:28<7:31:53, 1.66it/s] 31%|███ | 20425/65536 [3:30:28<7:40:11, 1.63it/s] 31%|███ | 20426/65536 [3:30:29<7:54:58, 1.58it/s] 31%|███ | 20427/65536 [3:30:29<7:46:57, 1.61it/s] 31%|███ | 20428/65536 [3:30:30<7:47:18, 1.61it/s] 31%|███ | 20429/65536 [3:30:31<7:52:14, 1.59it/s] 31%|███ | 20430/65536 [3:30:31<7:57:21, 1.57it/s] 31%|███ | 20431/65536 [3:30:32<7:49:53, 1.60it/s] 31%|███ | 20432/65536 [3:30:33<7:52:47, 1.59it/s] 31%|███ | 20433/65536 [3:30:33<7:54:07, 1.59it/s] 31%|███ | 20434/65536 [3:30:34<7:44:54, 1.62it/s] 31%|███ | 20435/65536 [3:30:34<7:39:33, 1.64it/s] 31%|███ | 20436/65536 [3:30:35<7:35:34, 1.65it/s] 31%|███ | 20437/65536 [3:30:36<7:28:34, 1.68it/s] 31%|███ | 20438/65536 [3:30:36<7:34:17, 1.65it/s] 31%|███ | 20439/65536 [3:30:37<7:34:18, 1.65it/s] 31%|███ | 20440/65536 [3:30:37<7:36:04, 1.65it/s] {'loss': 3.034, 'learning_rate': 7.211950532631321e-07, 'epoch': 1261.73} + 31%|███ | 20440/65536 [3:30:37<7:36:04, 1.65it/s] 31%|███ | 20441/65536 [3:30:38<7:39:29, 1.64it/s] 31%|███ | 20442/65536 [3:30:39<7:38:45, 1.64it/s] 31%|███ | 20443/65536 [3:30:39<7:31:24, 1.66it/s] 31%|███ | 20444/65536 [3:30:40<7:31:55, 1.66it/s] 31%|███ | 20445/65536 [3:30:40<7:47:13, 1.61it/s] 31%|███ | 20446/65536 [3:30:41<7:43:56, 1.62it/s] 31%|███ | 20447/65536 [3:30:42<7:38:25, 1.64it/s] 31%|███ | 20448/65536 [3:30:42<7:41:57, 1.63it/s] 31%|███ | 20449/65536 [3:30:43<7:36:59, 1.64it/s] 31%|███ | 20450/65536 [3:30:43<7:33:28, 1.66it/s] 31%|███ | 20451/65536 [3:30:44<7:47:03, 1.61it/s] 31%|███ | 20452/65536 [3:30:45<7:56:53, 1.58it/s] 31%|███ | 20453/65536 [3:30:45<7:54:15, 1.58it/s] 31%|███ | 20454/65536 [3:30:46<7:46:47, 1.61it/s] 31%|███ | 20455/65536 [3:30:47<7:35:53, 1.65it/s] 31%|███ | 20456/65536 [3:30:47<7:35:37, 1.65it/s] 31%|███ | 20457/65536 [3:30:48<7:43:13, 1.62it/s] 31%|███ | 20458/65536 [3:30:48<7:38:17, 1.64it/s] 31%|███ | 20459/65536 [3:30:49<7:26:58, 1.68it/s] 31%|███ | 20460/65536 [3:30:50<7:32:22, 1.66it/s] {'loss': 3.0488, 'learning_rate': 7.209195543039059e-07, 'epoch': 1262.96} + 31%|███ | 20460/65536 [3:30:50<7:32:22, 1.66it/s] 31%|███ | 20461/65536 [3:30:50<7:46:05, 1.61it/s] 31%|███ | 20462/65536 [3:30:51<7:50:06, 1.60it/s] 31%|███ | 20463/65536 [3:30:52<7:52:37, 1.59it/s] 31%|███ | 20464/65536 [3:30:52<7:39:55, 1.63it/s] 31%|███ | 20465/65536 [3:30:53<7:40:24, 1.63it/s] 31%|███ | 20466/65536 [3:30:53<7:37:43, 1.64it/s] 31%|███ | 20467/65536 [3:30:54<7:56:05, 1.58it/s] 31%|███ | 20468/65536 [3:30:55<7:56:16, 1.58it/s] 31%|███ | 20469/65536 [3:30:55<7:48:04, 1.60it/s] 31%|███ | 20470/65536 [3:30:56<7:42:21, 1.62it/s] 31%|███ | 20471/65536 [3:30:56<7:39:24, 1.63it/s] 31%|███ | 20472/65536 [3:30:57<7:36:08, 1.65it/s] 31%|███ | 20473/65536 [3:30:58<7:39:09, 1.64it/s] 31%|███ | 20474/65536 [3:30:58<7:27:33, 1.68it/s] 31%|███ | 20475/65536 [3:30:59<7:25:26, 1.69it/s] 31%|███ | 20476/65536 [3:30:59<7:29:21, 1.67it/s] 31%|███ | 20477/65536 [3:31:00<7:36:40, 1.64it/s] 31%|███ | 20478/65536 [3:31:01<7:39:58, 1.63it/s] 31%|███ | 20479/65536 [3:31:01<7:36:14, 1.65it/s] 31%|███▏ | 20480/65536 [3:31:02<7:36:34, 1.64it/s] {'loss': 3.1048, 'learning_rate': 7.206440553446798e-07, 'epoch': 1264.2} + 31%|███▏ | 20480/65536 [3:31:02<7:36:34, 1.64it/s] 31%|███▏ | 20481/65536 [3:31:03<7:42:50, 1.62it/s] 31%|███▏ | 20482/65536 [3:31:03<7:46:41, 1.61it/s] 31%|███▏ | 20483/65536 [3:31:04<7:33:21, 1.66it/s] 31%|███▏ | 20484/65536 [3:31:04<7:36:54, 1.64it/s] 31%|███▏ | 20485/65536 [3:31:05<7:40:35, 1.63it/s] 31%|███▏ | 20486/65536 [3:31:06<7:28:51, 1.67it/s] 31%|███▏ | 20487/65536 [3:31:06<7:30:28, 1.67it/s] 31%|███▏ | 20488/65536 [3:31:07<7:52:16, 1.59it/s] 31%|███▏ | 20489/65536 [3:31:07<7:40:25, 1.63it/s] 31%|███▏ | 20490/65536 [3:31:08<7:43:50, 1.62it/s] 31%|███▏ | 20491/65536 [3:31:09<7:39:54, 1.63it/s] 31%|███▏ | 20492/65536 [3:31:09<7:37:52, 1.64it/s] 31%|███▏ | 20493/65536 [3:31:10<7:29:36, 1.67it/s] 31%|███▏ | 20494/65536 [3:31:11<7:57:30, 1.57it/s] 31%|███▏ | 20495/65536 [3:31:11<7:44:04, 1.62it/s] 31%|███▏ | 20496/65536 [3:31:12<7:38:57, 1.64it/s] 31%|███▏ | 20497/65536 [3:31:12<7:36:57, 1.64it/s] 31%|███▏ | 20498/65536 [3:31:13<7:33:47, 1.65it/s] 31%|███▏ | 20499/65536 [3:31:14<7:33:05, 1.66it/s] 31%|███▏ | 20500/65536 [3:31:14<7:40:50, 1.63it/s] {'loss': 3.0026, 'learning_rate': 7.203685563854537e-07, 'epoch': 1265.43} + 31%|███▏ | 20500/65536 [3:31:14<7:40:50, 1.63it/s] 31%|███▏ | 20501/65536 [3:31:15<7:51:18, 1.59it/s] 31%|███▏ | 20502/65536 [3:31:15<7:45:34, 1.61it/s] 31%|███▏ | 20503/65536 [3:31:16<7:35:44, 1.65it/s] 31%|███▏ | 20504/65536 [3:31:17<7:24:47, 1.69it/s] 31%|███▏ | 20505/65536 [3:31:17<7:24:48, 1.69it/s] 31%|███▏ | 20506/65536 [3:31:18<7:29:14, 1.67it/s] 31%|███▏ | 20507/65536 [3:31:18<7:42:58, 1.62it/s] 31%|███▏ | 20508/65536 [3:31:19<7:38:26, 1.64it/s] 31%|███▏ | 20509/65536 [3:31:20<7:43:19, 1.62it/s] 31%|███▏ | 20510/65536 [3:31:20<7:49:17, 1.60it/s] 31%|███▏ | 20511/65536 [3:31:21<7:52:00, 1.59it/s] 31%|███▏ | 20512/65536 [3:31:22<7:50:21, 1.60it/s] 31%|███▏ | 20513/65536 [3:31:22<7:40:52, 1.63it/s] 31%|███▏ | 20514/65536 [3:31:23<7:35:45, 1.65it/s] 31%|███▏ | 20515/65536 [3:31:23<7:40:45, 1.63it/s] 31%|███▏ | 20516/65536 [3:31:24<7:45:08, 1.61it/s] 31%|███▏ | 20517/65536 [3:31:25<7:51:14, 1.59it/s] 31%|███▏ | 20518/65536 [3:31:25<7:45:17, 1.61it/s] 31%|███▏ | 20519/65536 [3:31:26<7:42:53, 1.62it/s] 31%|███▏ | 20520/65536 [3:31:26<7:41:52, 1.62it/s] {'loss': 2.9731, 'learning_rate': 7.200930574262274e-07, 'epoch': 1266.67} + 31%|███▏ | 20520/65536 [3:31:26<7:41:52, 1.62it/s] 31%|███▏ | 20521/65536 [3:31:27<7:38:15, 1.64it/s] 31%|███▏ | 20522/65536 [3:31:28<7:43:14, 1.62it/s] 31%|███▏ | 20523/65536 [3:31:28<7:42:06, 1.62it/s] 31%|███▏ | 20524/65536 [3:31:29<7:43:43, 1.62it/s] 31%|███▏ | 20525/65536 [3:31:30<7:34:04, 1.65it/s] 31%|███▏ | 20526/65536 [3:31:30<7:50:18, 1.60it/s] 31%|███▏ | 20527/65536 [3:31:31<7:43:31, 1.62it/s] 31%|███▏ | 20528/65536 [3:31:31<7:41:40, 1.62it/s] 31%|███▏ | 20529/65536 [3:31:32<7:32:42, 1.66it/s] 31%|███▏ | 20530/65536 [3:31:33<7:40:29, 1.63it/s] 31%|███▏ | 20531/65536 [3:31:33<7:41:34, 1.63it/s] 31%|███▏ | 20532/65536 [3:31:34<7:29:26, 1.67it/s] 31%|███▏ | 20533/65536 [3:31:34<7:29:22, 1.67it/s] 31%|███▏ | 20534/65536 [3:31:35<7:32:03, 1.66it/s] 31%|███▏ | 20535/65536 [3:31:36<7:38:32, 1.64it/s] 31%|███▏ | 20536/65536 [3:31:36<7:32:01, 1.66it/s] 31%|███▏ | 20537/65536 [3:31:37<7:34:49, 1.65it/s] 31%|███▏ | 20538/65536 [3:31:38<7:47:34, 1.60it/s] 31%|███▏ | 20539/65536 [3:31:38<7:47:54, 1.60it/s] 31%|███▏ | 20540/65536 [3:31:39<7:38:46, 1.63it/s] {'loss': 3.0784, 'learning_rate': 7.198175584670013e-07, 'epoch': 1267.9} + 31%|███▏ | 20540/65536 [3:31:39<7:38:46, 1.63it/s] 31%|███▏ | 20541/65536 [3:31:39<7:41:39, 1.62it/s] 31%|███▏ | 20542/65536 [3:31:40<7:52:59, 1.59it/s] 31%|███▏ | 20543/65536 [3:31:41<7:54:12, 1.58it/s] 31%|███▏ | 20544/65536 [3:31:41<7:42:05, 1.62it/s] 31%|███▏ | 20545/65536 [3:31:42<7:38:02, 1.64it/s] 31%|███▏ | 20546/65536 [3:31:42<7:28:20, 1.67it/s] 31%|███▏ | 20547/65536 [3:31:43<7:26:36, 1.68it/s] 31%|███▏ | 20548/65536 [3:31:44<7:21:19, 1.70it/s] 31%|███▏ | 20549/65536 [3:31:44<7:22:07, 1.70it/s] 31%|███▏ | 20550/65536 [3:31:45<7:25:13, 1.68it/s] 31%|███▏ | 20551/65536 [3:31:45<7:27:56, 1.67it/s] 31%|███▏ | 20552/65536 [3:31:46<7:39:01, 1.63it/s] 31%|███▏ | 20553/65536 [3:31:47<7:46:01, 1.61it/s] 31%|███▏ | 20554/65536 [3:31:47<7:36:46, 1.64it/s] 31%|███▏ | 20555/65536 [3:31:48<7:38:36, 1.63it/s] 31%|███▏ | 20556/65536 [3:31:48<7:29:11, 1.67it/s] 31%|███▏ | 20557/65536 [3:31:49<7:55:19, 1.58it/s] 31%|███▏ | 20558/65536 [3:31:50<7:58:17, 1.57it/s] 31%|███▏ | 20559/65536 [3:31:50<7:45:46, 1.61it/s] 31%|███▏ | 20560/65536 [3:31:51<7:39:11, 1.63it/s] {'loss': 3.0579, 'learning_rate': 7.195420595077751e-07, 'epoch': 1269.14} + 31%|███▏ | 20560/65536 [3:31:51<7:39:11, 1.63it/s] 31%|███▏ | 20561/65536 [3:31:52<7:45:26, 1.61it/s] 31%|███▏ | 20562/65536 [3:31:52<7:48:09, 1.60it/s] 31%|███▏ | 20563/65536 [3:31:53<7:36:31, 1.64it/s] 31%|███▏ | 20564/65536 [3:31:53<7:36:49, 1.64it/s] 31%|███▏ | 20565/65536 [3:31:54<7:33:05, 1.65it/s] 31%|███▏ | 20566/65536 [3:31:55<7:31:52, 1.66it/s] 31%|███▏ | 20567/65536 [3:31:55<7:31:05, 1.66it/s] 31%|███▏ | 20568/65536 [3:31:56<7:46:23, 1.61it/s] 31%|███▏ | 20569/65536 [3:31:56<7:38:39, 1.63it/s] 31%|███▏ | 20570/65536 [3:31:57<7:28:58, 1.67it/s] 31%|███▏ | 20571/65536 [3:31:58<7:42:24, 1.62it/s] 31%|███▏ | 20572/65536 [3:31:58<7:39:45, 1.63it/s] 31%|███▏ | 20573/65536 [3:31:59<7:32:03, 1.66it/s] 31%|███▏ | 20574/65536 [3:31:59<7:28:22, 1.67it/s] 31%|███▏ | 20575/65536 [3:32:00<7:48:53, 1.60it/s] 31%|███▏ | 20576/65536 [3:32:01<7:47:02, 1.60it/s] 31%|███▏ | 20577/65536 [3:32:01<7:48:50, 1.60it/s] 31%|███▏ | 20578/65536 [3:32:02<7:41:58, 1.62it/s] 31%|███▏ | 20579/65536 [3:32:03<7:24:56, 1.68it/s] 31%|███▏ | 20580/65536 [3:32:03<7:27:10, 1.68it/s] {'loss': 3.1232, 'learning_rate': 7.19266560548549e-07, 'epoch': 1270.37} + 31%|███▏ | 20580/65536 [3:32:03<7:27:10, 1.68it/s] 31%|███▏ | 20581/65536 [3:32:04<7:35:31, 1.64it/s] 31%|███▏ | 20582/65536 [3:32:04<7:37:50, 1.64it/s] 31%|███▏ | 20583/65536 [3:32:05<7:28:36, 1.67it/s] 31%|███▏ | 20584/65536 [3:32:06<7:26:36, 1.68it/s] 31%|███▏ | 20585/65536 [3:32:06<7:27:31, 1.67it/s] 31%|███▏ | 20586/65536 [3:32:07<7:29:39, 1.67it/s] 31%|███▏ | 20587/65536 [3:32:07<7:24:57, 1.68it/s] 31%|███▏ | 20588/65536 [3:32:08<7:24:31, 1.69it/s] 31%|███▏ | 20589/65536 [3:32:09<7:39:38, 1.63it/s] 31%|███▏ | 20590/65536 [3:32:09<7:43:54, 1.61it/s] 31%|███▏ | 20591/65536 [3:32:10<8:09:24, 1.53it/s] 31%|███▏ | 20592/65536 [3:32:11<7:57:57, 1.57it/s] 31%|███▏ | 20593/65536 [3:32:11<8:09:15, 1.53it/s] 31%|███▏ | 20594/65536 [3:32:12<7:56:14, 1.57it/s] 31%|███▏ | 20595/65536 [3:32:12<7:52:31, 1.59it/s] 31%|███▏ | 20596/65536 [3:32:13<7:39:53, 1.63it/s] 31%|███▏ | 20597/65536 [3:32:14<7:37:36, 1.64it/s] 31%|███▏ | 20598/65536 [3:32:14<7:31:34, 1.66it/s] 31%|███▏ | 20599/65536 [3:32:15<7:24:18, 1.69it/s] 31%|███▏ | 20600/65536 [3:32:15<7:23:27, 1.69it/s] {'loss': 3.0752, 'learning_rate': 7.189910615893227e-07, 'epoch': 1271.6} + 31%|███▏ | 20600/65536 [3:32:15<7:23:27, 1.69it/s] 31%|███▏ | 20601/65536 [3:32:16<7:36:52, 1.64it/s] 31%|███▏ | 20602/65536 [3:32:17<7:32:00, 1.66it/s] 31%|███▏ | 20603/65536 [3:32:17<7:26:47, 1.68it/s] 31%|███▏ | 20604/65536 [3:32:18<7:27:25, 1.67it/s] 31%|███▏ | 20605/65536 [3:32:18<7:28:19, 1.67it/s] 31%|███▏ | 20606/65536 [3:32:19<7:35:32, 1.64it/s] 31%|███▏ | 20607/65536 [3:32:20<7:58:22, 1.57it/s] 31%|███▏ | 20608/65536 [3:32:20<7:47:34, 1.60it/s] 31%|███▏ | 20609/65536 [3:32:21<7:47:22, 1.60it/s] 31%|███▏ | 20610/65536 [3:32:22<7:40:17, 1.63it/s] 31%|███▏ | 20611/65536 [3:32:22<7:57:26, 1.57it/s] 31%|███▏ | 20612/65536 [3:32:23<7:39:55, 1.63it/s] 31%|███▏ | 20613/65536 [3:32:23<7:36:19, 1.64it/s] 31%|███▏ | 20614/65536 [3:32:24<7:31:09, 1.66it/s] 31%|███▏ | 20615/65536 [3:32:25<7:43:32, 1.62it/s] 31%|███▏ | 20616/65536 [3:32:25<7:47:10, 1.60it/s] 31%|███▏ | 20617/65536 [3:32:26<7:42:25, 1.62it/s] 31%|███▏ | 20618/65536 [3:32:26<7:37:07, 1.64it/s] 31%|███▏ | 20619/65536 [3:32:27<7:41:29, 1.62it/s] 31%|███▏ | 20620/65536 [3:32:28<7:35:42, 1.64it/s] {'loss': 2.9859, 'learning_rate': 7.187155626300966e-07, 'epoch': 1272.84} + 31%|███▏ | 20620/65536 [3:32:28<7:35:42, 1.64it/s] 31%|███▏ | 20621/65536 [3:32:28<7:37:38, 1.64it/s] 31%|███▏ | 20622/65536 [3:32:29<7:37:24, 1.64it/s] 31%|███▏ | 20623/65536 [3:32:30<7:48:46, 1.60it/s] 31%|███▏ | 20624/65536 [3:32:30<7:43:46, 1.61it/s] 31%|███▏ | 20625/65536 [3:32:31<7:43:16, 1.62it/s] 31%|███▏ | 20626/65536 [3:32:31<7:45:42, 1.61it/s] 31%|███▏ | 20627/65536 [3:32:32<7:39:31, 1.63it/s] 31%|███▏ | 20628/65536 [3:32:33<7:44:17, 1.61it/s] 31%|███▏ | 20629/65536 [3:32:33<7:41:01, 1.62it/s] 31%|███▏ | 20630/65536 [3:32:34<7:37:26, 1.64it/s] 31%|███▏ | 20631/65536 [3:32:35<7:42:12, 1.62it/s] 31%|███▏ | 20632/65536 [3:32:35<7:37:12, 1.64it/s] 31%|███▏ | 20633/65536 [3:32:36<7:38:38, 1.63it/s] 31%|███▏ | 20634/65536 [3:32:36<7:36:12, 1.64it/s] 31%|███▏ | 20635/65536 [3:32:37<7:30:34, 1.66it/s] 31%|███▏ | 20636/65536 [3:32:37<7:22:26, 1.69it/s] 31%|███▏ | 20637/65536 [3:32:38<7:20:44, 1.70it/s] 31%|███▏ | 20638/65536 [3:32:39<7:31:03, 1.66it/s] 31%|███▏ | 20639/65536 [3:32:39<7:47:10, 1.60it/s] 31%|███▏ | 20640/65536 [3:32:40<7:42:57, 1.62it/s] {'loss': 3.0488, 'learning_rate': 7.184400636708705e-07, 'epoch': 1274.07} + 31%|███▏ | 20640/65536 [3:32:40<7:42:57, 1.62it/s] 31%|███▏ | 20641/65536 [3:32:41<7:47:48, 1.60it/s] 31%|███▏ | 20642/65536 [3:32:41<7:39:31, 1.63it/s] 31%|███▏ | 20643/65536 [3:32:42<7:32:13, 1.65it/s] 32%|███▏ | 20644/65536 [3:32:42<7:39:04, 1.63it/s] 32%|███▏ | 20645/65536 [3:32:43<7:47:21, 1.60it/s] 32%|███▏ | 20646/65536 [3:32:44<7:44:34, 1.61it/s] 32%|███▏ | 20647/65536 [3:32:44<7:34:45, 1.65it/s] 32%|███▏ | 20648/65536 [3:32:45<7:46:00, 1.61it/s] 32%|███▏ | 20649/65536 [3:32:46<7:41:17, 1.62it/s] 32%|███▏ | 20650/65536 [3:32:46<7:50:55, 1.59it/s] 32%|███▏ | 20651/65536 [3:32:47<7:36:10, 1.64it/s] 32%|███▏ | 20652/65536 [3:32:47<7:27:54, 1.67it/s] 32%|███▏ | 20653/65536 [3:32:48<7:32:27, 1.65it/s] 32%|███▏ | 20654/65536 [3:32:49<7:30:27, 1.66it/s] 32%|███▏ | 20655/65536 [3:32:49<7:27:27, 1.67it/s] 32%|███▏ | 20656/65536 [3:32:50<7:50:07, 1.59it/s] 32%|███▏ | 20657/65536 [3:32:50<7:52:43, 1.58it/s] 32%|███▏ | 20658/65536 [3:32:51<7:53:30, 1.58it/s] 32%|███▏ | 20659/65536 [3:32:52<7:52:12, 1.58it/s] 32%|███▏ | 20660/65536 [3:32:52<7:40:02, 1.63it/s] {'loss': 3.1021, 'learning_rate': 7.181645647116444e-07, 'epoch': 1275.31} + 32%|███▏ | 20660/65536 [3:32:52<7:40:02, 1.63it/s] 32%|███▏ | 20661/65536 [3:32:53<7:41:14, 1.62it/s] 32%|███▏ | 20662/65536 [3:32:53<7:34:50, 1.64it/s] 32%|███▏ | 20663/65536 [3:32:54<7:37:59, 1.63it/s] 32%|███▏ | 20664/65536 [3:32:55<7:39:09, 1.63it/s] 32%|███▏ | 20665/65536 [3:32:55<7:29:44, 1.66it/s] 32%|███▏ | 20666/65536 [3:32:56<7:32:51, 1.65it/s] 32%|███▏ | 20667/65536 [3:32:56<7:22:17, 1.69it/s] 32%|███▏ | 20668/65536 [3:32:57<7:24:05, 1.68it/s] 32%|███▏ | 20669/65536 [3:32:58<7:23:47, 1.69it/s] 32%|███▏ | 20670/65536 [3:32:58<7:21:20, 1.69it/s] 32%|███▏ | 20671/65536 [3:32:59<7:23:21, 1.69it/s] 32%|███▏ | 20672/65536 [3:33:00<7:43:22, 1.61it/s] 32%|███▏ | 20673/65536 [3:33:00<7:41:30, 1.62it/s] 32%|███▏ | 20674/65536 [3:33:01<7:39:03, 1.63it/s] 32%|███▏ | 20675/65536 [3:33:01<7:32:47, 1.65it/s] 32%|███▏ | 20676/65536 [3:33:02<7:34:41, 1.64it/s] 32%|███▏ | 20677/65536 [3:33:03<7:25:14, 1.68it/s] 32%|███▏ | 20678/65536 [3:33:03<7:32:10, 1.65it/s] 32%|███▏ | 20679/65536 [3:33:04<7:36:29, 1.64it/s] 32%|███▏ | 20680/65536 [3:33:04<7:34:21, 1.65it/s] {'loss': 3.0604, 'learning_rate': 7.178890657524183e-07, 'epoch': 1276.54} + 32%|███▏ | 20680/65536 [3:33:04<7:34:21, 1.65it/s] 32%|███▏ | 20681/65536 [3:33:05<7:23:07, 1.69it/s] 32%|███▏ | 20682/65536 [3:33:06<7:40:31, 1.62it/s] 32%|███▏ | 20683/65536 [3:33:06<7:41:55, 1.62it/s] 32%|███▏ | 20684/65536 [3:33:07<7:34:29, 1.64it/s] 32%|███▏ | 20685/65536 [3:33:07<7:35:07, 1.64it/s] 32%|███▏ | 20686/65536 [3:33:08<7:46:58, 1.60it/s] 32%|███▏ | 20687/65536 [3:33:09<7:38:24, 1.63it/s] 32%|███▏ | 20688/65536 [3:33:09<7:54:54, 1.57it/s] 32%|███▏ | 20689/65536 [3:33:10<7:42:00, 1.62it/s] 32%|███▏ | 20690/65536 [3:33:11<7:42:56, 1.61it/s] 32%|███▏ | 20691/65536 [3:33:11<7:44:48, 1.61it/s] 32%|███▏ | 20692/65536 [3:33:12<7:50:22, 1.59it/s] 32%|███▏ | 20693/65536 [3:33:12<7:37:32, 1.63it/s] 32%|███▏ | 20694/65536 [3:33:13<7:23:07, 1.69it/s] 32%|███▏ | 20695/65536 [3:33:14<7:24:35, 1.68it/s] 32%|███▏ | 20696/65536 [3:33:14<7:36:50, 1.64it/s] 32%|███▏ | 20697/65536 [3:33:15<7:23:58, 1.68it/s] 32%|███▏ | 20698/65536 [3:33:15<7:19:09, 1.70it/s] 32%|███▏ | 20699/65536 [3:33:16<7:22:11, 1.69it/s] 32%|███▏ | 20700/65536 [3:33:17<7:28:03, 1.67it/s] {'loss': 3.0458, 'learning_rate': 7.176135667931921e-07, 'epoch': 1277.78} + 32%|███▏ | 20700/65536 [3:33:17<7:28:03, 1.67it/s] 32%|███▏ | 20701/65536 [3:33:17<7:38:03, 1.63it/s] 32%|███▏ | 20702/65536 [3:33:18<7:49:59, 1.59it/s] 32%|███▏ | 20703/65536 [3:33:18<7:44:38, 1.61it/s] 32%|███▏ | 20704/65536 [3:33:19<7:51:54, 1.58it/s] 32%|███▏ | 20705/65536 [3:33:20<7:43:05, 1.61it/s] 32%|███▏ | 20706/65536 [3:33:20<7:37:33, 1.63it/s] 32%|███▏ | 20707/65536 [3:33:21<7:37:40, 1.63it/s] 32%|███▏ | 20708/65536 [3:33:22<7:38:39, 1.63it/s] 32%|███▏ | 20709/65536 [3:33:22<7:37:08, 1.63it/s] 32%|███▏ | 20710/65536 [3:33:23<7:36:16, 1.64it/s] 32%|███▏ | 20711/65536 [3:33:23<7:29:23, 1.66it/s] 32%|███▏ | 20712/65536 [3:33:24<7:41:32, 1.62it/s] 32%|███▏ | 20713/65536 [3:33:25<7:33:23, 1.65it/s] 32%|███▏ | 20714/65536 [3:33:25<7:39:42, 1.63it/s] 32%|███▏ | 20715/65536 [3:33:26<7:34:13, 1.64it/s] 32%|███▏ | 20716/65536 [3:33:26<7:38:30, 1.63it/s] 32%|███▏ | 20717/65536 [3:33:27<7:39:06, 1.63it/s] 32%|███▏ | 20718/65536 [3:33:28<7:35:20, 1.64it/s] 32%|███▏ | 20719/65536 [3:33:28<7:30:45, 1.66it/s] 32%|███▏ | 20720/65536 [3:33:29<7:45:05, 1.61it/s] {'loss': 3.0895, 'learning_rate': 7.17338067833966e-07, 'epoch': 1279.01} + 32%|███▏ | 20720/65536 [3:33:29<7:45:05, 1.61it/s] 32%|███▏ | 20721/65536 [3:33:29<7:37:33, 1.63it/s] 32%|███▏ | 20722/65536 [3:33:30<7:39:39, 1.62it/s] 32%|███▏ | 20723/65536 [3:33:31<8:04:17, 1.54it/s] 32%|███▏ | 20724/65536 [3:33:31<7:51:35, 1.58it/s] 32%|███▏ | 20725/65536 [3:33:32<7:43:29, 1.61it/s] 32%|███▏ | 20726/65536 [3:33:33<7:35:59, 1.64it/s] 32%|███▏ | 20727/65536 [3:33:33<7:29:00, 1.66it/s] 32%|███▏ | 20728/65536 [3:33:34<7:35:04, 1.64it/s] 32%|███▏ | 20729/65536 [3:33:34<7:38:04, 1.63it/s] 32%|███▏ | 20730/65536 [3:33:35<7:30:29, 1.66it/s] 32%|███▏ | 20731/65536 [3:33:36<7:39:58, 1.62it/s] 32%|███▏ | 20732/65536 [3:33:36<7:31:47, 1.65it/s] 32%|███▏ | 20733/65536 [3:33:37<7:35:22, 1.64it/s] 32%|███▏ | 20734/65536 [3:33:37<7:42:07, 1.62it/s] 32%|███▏ | 20735/65536 [3:33:38<7:36:31, 1.64it/s] 32%|███▏ | 20736/65536 [3:33:39<7:35:11, 1.64it/s] 32%|███▏ | 20737/65536 [3:33:39<7:49:31, 1.59it/s] 32%|███▏ | 20738/65536 [3:33:40<7:53:28, 1.58it/s] 32%|███▏ | 20739/65536 [3:33:41<7:45:53, 1.60it/s] 32%|███▏ | 20740/65536 [3:33:41<7:31:56, 1.65it/s] {'loss': 2.9886, 'learning_rate': 7.170625688747399e-07, 'epoch': 1280.25} + 32%|███▏ | 20740/65536 [3:33:41<7:31:56, 1.65it/s] 32%|███▏ | 20741/65536 [3:33:42<7:35:14, 1.64it/s] 32%|███▏ | 20742/65536 [3:33:42<7:48:46, 1.59it/s] 32%|███▏ | 20743/65536 [3:33:43<7:46:47, 1.60it/s] 32%|███▏ | 20744/65536 [3:33:44<7:40:24, 1.62it/s] 32%|███▏ | 20745/65536 [3:33:44<7:32:19, 1.65it/s] 32%|███▏ | 20746/65536 [3:33:45<7:32:11, 1.65it/s] 32%|███▏ | 20747/65536 [3:33:45<7:35:08, 1.64it/s] 32%|███▏ | 20748/65536 [3:33:46<7:36:27, 1.64it/s] 32%|███▏ | 20749/65536 [3:33:47<7:33:18, 1.65it/s] 32%|███▏ | 20750/65536 [3:33:47<7:22:52, 1.69it/s] 32%|███▏ | 20751/65536 [3:33:48<7:32:03, 1.65it/s] 32%|███▏ | 20752/65536 [3:33:49<7:33:04, 1.65it/s] 32%|███▏ | 20753/65536 [3:33:49<7:44:57, 1.61it/s] 32%|███▏ | 20754/65536 [3:33:50<7:47:34, 1.60it/s] 32%|███▏ | 20755/65536 [3:33:50<7:37:59, 1.63it/s] 32%|███▏ | 20756/65536 [3:33:51<7:38:54, 1.63it/s] 32%|███▏ | 20757/65536 [3:33:52<7:37:35, 1.63it/s] 32%|███▏ | 20758/65536 [3:33:52<7:47:54, 1.59it/s] 32%|███▏ | 20759/65536 [3:33:53<7:46:18, 1.60it/s] 32%|███▏ | 20760/65536 [3:33:54<7:53:06, 1.58it/s] {'loss': 3.086, 'learning_rate': 7.167870699155137e-07, 'epoch': 1281.48} + 32%|███▏ | 20760/65536 [3:33:54<7:53:06, 1.58it/s] 32%|███▏ | 20761/65536 [3:33:54<7:43:20, 1.61it/s] 32%|███▏ | 20762/65536 [3:33:55<7:29:51, 1.66it/s] 32%|███▏ | 20763/65536 [3:33:55<7:25:04, 1.68it/s] 32%|███▏ | 20764/65536 [3:33:56<7:31:02, 1.65it/s] 32%|███▏ | 20765/65536 [3:33:57<7:30:06, 1.66it/s] 32%|███▏ | 20766/65536 [3:33:57<7:37:06, 1.63it/s] 32%|███▏ | 20767/65536 [3:33:58<7:34:18, 1.64it/s] 32%|███▏ | 20768/65536 [3:33:58<7:31:15, 1.65it/s] 32%|███▏ | 20769/65536 [3:33:59<7:42:48, 1.61it/s] 32%|███▏ | 20770/65536 [3:34:00<7:57:18, 1.56it/s] 32%|███▏ | 20771/65536 [3:34:00<7:46:57, 1.60it/s] 32%|███▏ | 20772/65536 [3:34:01<7:40:46, 1.62it/s] 32%|███▏ | 20773/65536 [3:34:01<7:35:18, 1.64it/s] 32%|███▏ | 20774/65536 [3:34:02<7:33:42, 1.64it/s] 32%|███▏ | 20775/65536 [3:34:03<7:35:24, 1.64it/s] 32%|███▏ | 20776/65536 [3:34:03<7:38:39, 1.63it/s] 32%|███▏ | 20777/65536 [3:34:04<7:29:39, 1.66it/s] 32%|███▏ | 20778/65536 [3:34:04<7:19:09, 1.70it/s] 32%|███▏ | 20779/65536 [3:34:05<7:22:46, 1.68it/s] 32%|███▏ | 20780/65536 [3:34:06<7:27:32, 1.67it/s] {'loss': 3.006, 'learning_rate': 7.165115709562876e-07, 'epoch': 1282.72} + 32%|███▏ | 20780/65536 [3:34:06<7:27:32, 1.67it/s] 32%|███▏ | 20781/65536 [3:34:06<7:24:46, 1.68it/s] 32%|███▏ | 20782/65536 [3:34:07<7:30:07, 1.66it/s] 32%|███▏ | 20783/65536 [3:34:07<7:24:55, 1.68it/s] 32%|███▏ | 20784/65536 [3:34:08<7:40:41, 1.62it/s] 32%|███▏ | 20785/65536 [3:34:09<7:48:23, 1.59it/s] 32%|███▏ | 20786/65536 [3:34:09<7:37:25, 1.63it/s] 32%|███▏ | 20787/65536 [3:34:10<7:39:15, 1.62it/s] 32%|███▏ | 20788/65536 [3:34:11<7:33:06, 1.65it/s] 32%|███▏ | 20789/65536 [3:34:11<7:28:03, 1.66it/s] 32%|███▏ | 20790/65536 [3:34:12<7:46:02, 1.60it/s] 32%|███▏ | 20791/65536 [3:34:12<7:40:56, 1.62it/s] 32%|███▏ | 20792/65536 [3:34:13<7:42:24, 1.61it/s] 32%|███▏ | 20793/65536 [3:34:14<7:35:50, 1.64it/s] 32%|███▏ | 20794/65536 [3:34:14<7:34:40, 1.64it/s] 32%|███▏ | 20795/65536 [3:34:15<7:34:19, 1.64it/s] 32%|███▏ | 20796/65536 [3:34:15<7:33:42, 1.64it/s] 32%|███▏ | 20797/65536 [3:34:16<7:34:35, 1.64it/s] 32%|███▏ | 20798/65536 [3:34:17<7:24:37, 1.68it/s] 32%|███▏ | 20799/65536 [3:34:17<7:23:36, 1.68it/s] 32%|███▏ | 20800/65536 [3:34:18<7:24:40, 1.68it/s] {'loss': 3.0638, 'learning_rate': 7.162360719970613e-07, 'epoch': 1283.95} + 32%|███▏ | 20800/65536 [3:34:18<7:24:40, 1.68it/s] 32%|███▏ | 20801/65536 [3:34:19<7:53:51, 1.57it/s] 32%|███▏ | 20802/65536 [3:34:19<7:38:10, 1.63it/s] 32%|███▏ | 20803/65536 [3:34:20<7:38:01, 1.63it/s] 32%|███▏ | 20804/65536 [3:34:20<7:33:03, 1.65it/s] 32%|███▏ | 20805/65536 [3:34:21<7:39:16, 1.62it/s] 32%|███▏ | 20806/65536 [3:34:22<7:47:18, 1.60it/s] 32%|███▏ | 20807/65536 [3:34:22<7:47:03, 1.60it/s] 32%|███▏ | 20808/65536 [3:34:23<7:42:19, 1.61it/s] 32%|███▏ | 20809/65536 [3:34:23<7:41:29, 1.62it/s] 32%|███▏ | 20810/65536 [3:34:24<8:02:38, 1.54it/s] 32%|███▏ | 20811/65536 [3:34:25<7:50:09, 1.59it/s] 32%|███▏ | 20812/65536 [3:34:25<7:43:02, 1.61it/s] 32%|███▏ | 20813/65536 [3:34:26<7:40:39, 1.62it/s] 32%|███▏ | 20814/65536 [3:34:27<7:33:43, 1.64it/s] 32%|███▏ | 20815/65536 [3:34:27<7:28:51, 1.66it/s] 32%|███▏ | 20816/65536 [3:34:28<7:32:48, 1.65it/s] 32%|███▏ | 20817/65536 [3:34:28<7:24:06, 1.68it/s] 32%|███▏ | 20818/65536 [3:34:29<7:41:44, 1.61it/s] 32%|███▏ | 20819/65536 [3:34:30<7:39:55, 1.62it/s] 32%|███▏ | 20820/65536 [3:34:30<7:51:16, 1.58it/s] {'loss': 3.0063, 'learning_rate': 7.159605730378352e-07, 'epoch': 1285.19} + 32%|███▏ | 20820/65536 [3:34:30<7:51:16, 1.58it/s] 32%|███▏ | 20821/65536 [3:34:31<7:37:14, 1.63it/s] 32%|███▏ | 20822/65536 [3:34:32<7:43:30, 1.61it/s] 32%|███▏ | 20823/65536 [3:34:32<7:39:59, 1.62it/s] 32%|███▏ | 20824/65536 [3:34:33<7:40:02, 1.62it/s] 32%|███▏ | 20825/65536 [3:34:33<7:28:51, 1.66it/s] 32%|███▏ | 20826/65536 [3:34:34<7:43:01, 1.61it/s] 32%|███▏ | 20827/65536 [3:34:35<7:45:48, 1.60it/s] 32%|███▏ | 20828/65536 [3:34:35<7:43:06, 1.61it/s] 32%|███▏ | 20829/65536 [3:34:36<7:38:23, 1.63it/s] 32%|███▏ | 20830/65536 [3:34:36<7:43:43, 1.61it/s] 32%|███▏ | 20831/65536 [3:34:37<7:43:51, 1.61it/s] 32%|███▏ | 20832/65536 [3:34:38<7:35:38, 1.64it/s] 32%|███▏ | 20833/65536 [3:34:38<7:24:14, 1.68it/s] 32%|███▏ | 20834/65536 [3:34:39<7:37:37, 1.63it/s] 32%|███▏ | 20835/65536 [3:34:40<7:46:23, 1.60it/s] 32%|███▏ | 20836/65536 [3:34:40<7:49:02, 1.59it/s] 32%|███▏ | 20837/65536 [3:34:41<7:40:44, 1.62it/s] 32%|███▏ | 20838/65536 [3:34:41<7:32:49, 1.65it/s] 32%|███▏ | 20839/65536 [3:34:42<7:41:53, 1.61it/s] 32%|███▏ | 20840/65536 [3:34:43<7:38:37, 1.62it/s] {'loss': 3.0392, 'learning_rate': 7.15685074078609e-07, 'epoch': 1286.42} + 32%|███▏ | 20840/65536 [3:34:43<7:38:37, 1.62it/s] 32%|███▏ | 20841/65536 [3:34:43<7:31:56, 1.65it/s] 32%|███▏ | 20842/65536 [3:34:44<7:30:22, 1.65it/s] 32%|███▏ | 20843/65536 [3:34:44<7:28:05, 1.66it/s] 32%|███▏ | 20844/65536 [3:34:45<7:39:57, 1.62it/s] 32%|███▏ | 20845/65536 [3:34:46<7:38:51, 1.62it/s] 32%|███▏ | 20846/65536 [3:34:46<7:30:07, 1.65it/s] 32%|███▏ | 20847/65536 [3:34:47<7:24:09, 1.68it/s] 32%|███▏ | 20848/65536 [3:34:47<7:30:44, 1.65it/s] 32%|███▏ | 20849/65536 [3:34:48<7:35:03, 1.64it/s] 32%|███▏ | 20850/65536 [3:34:49<7:52:43, 1.58it/s] 32%|███▏ | 20851/65536 [3:34:49<8:03:55, 1.54it/s] 32%|███▏ | 20852/65536 [3:34:50<7:44:30, 1.60it/s] 32%|███▏ | 20853/65536 [3:34:51<7:29:17, 1.66it/s] 32%|███▏ | 20854/65536 [3:34:51<7:23:52, 1.68it/s] 32%|███▏ | 20855/65536 [3:34:52<7:35:50, 1.63it/s] 32%|███▏ | 20856/65536 [3:34:52<7:28:54, 1.66it/s] 32%|███▏ | 20857/65536 [3:34:53<7:25:11, 1.67it/s] 32%|███▏ | 20858/65536 [3:34:54<7:25:34, 1.67it/s] 32%|███▏ | 20859/65536 [3:34:54<7:35:58, 1.63it/s] 32%|███▏ | 20860/65536 [3:34:55<7:45:31, 1.60it/s] {'loss': 3.0259, 'learning_rate': 7.154095751193828e-07, 'epoch': 1287.65} + 32%|███▏ | 20860/65536 [3:34:55<7:45:31, 1.60it/s] 32%|███▏ | 20861/65536 [3:34:55<7:35:41, 1.63it/s] 32%|███▏ | 20862/65536 [3:34:56<7:37:02, 1.63it/s] 32%|███▏ | 20863/65536 [3:34:57<7:37:39, 1.63it/s] 32%|███▏ | 20864/65536 [3:34:57<7:37:48, 1.63it/s] 32%|███▏ | 20865/65536 [3:34:58<7:37:17, 1.63it/s] 32%|███▏ | 20866/65536 [3:34:59<7:51:27, 1.58it/s] 32%|███▏ | 20867/65536 [3:34:59<7:39:40, 1.62it/s] 32%|███▏ | 20868/65536 [3:35:00<7:41:54, 1.61it/s] 32%|███▏ | 20869/65536 [3:35:00<7:33:42, 1.64it/s] 32%|███▏ | 20870/65536 [3:35:01<7:28:51, 1.66it/s] 32%|███▏ | 20871/65536 [3:35:02<7:48:09, 1.59it/s] 32%|███▏ | 20872/65536 [3:35:02<7:38:31, 1.62it/s] 32%|███▏ | 20873/65536 [3:35:03<7:38:53, 1.62it/s] 32%|███▏ | 20874/65536 [3:35:03<7:29:18, 1.66it/s] 32%|███▏ | 20875/65536 [3:35:04<7:19:40, 1.69it/s] 32%|███▏ | 20876/65536 [3:35:05<7:16:26, 1.71it/s] 32%|███▏ | 20877/65536 [3:35:05<7:12:27, 1.72it/s] 32%|███▏ | 20878/65536 [3:35:06<7:16:26, 1.71it/s] 32%|███▏ | 20879/65536 [3:35:06<7:48:27, 1.59it/s] 32%|███▏ | 20880/65536 [3:35:07<7:47:52, 1.59it/s] {'loss': 3.0477, 'learning_rate': 7.151340761601567e-07, 'epoch': 1288.89} + 32%|███▏ | 20880/65536 [3:35:07<7:47:52, 1.59it/s] 32%|███▏ | 20881/65536 [3:35:08<7:47:44, 1.59it/s] 32%|███▏ | 20882/65536 [3:35:08<8:01:15, 1.55it/s] 32%|███▏ | 20883/65536 [3:35:09<7:56:35, 1.56it/s] 32%|███▏ | 20884/65536 [3:35:10<7:49:47, 1.58it/s] 32%|███▏ | 20885/65536 [3:35:10<7:37:43, 1.63it/s] 32%|███▏ | 20886/65536 [3:35:11<7:25:08, 1.67it/s] 32%|███▏ | 20887/65536 [3:35:11<7:20:25, 1.69it/s] 32%|███▏ | 20888/65536 [3:35:12<7:33:03, 1.64it/s] 32%|███▏ | 20889/65536 [3:35:13<7:32:07, 1.65it/s] 32%|███▏ | 20890/65536 [3:35:13<7:35:43, 1.63it/s] 32%|███▏ | 20891/65536 [3:35:14<7:32:53, 1.64it/s] 32%|███▏ | 20892/65536 [3:35:14<7:27:04, 1.66it/s] 32%|███▏ | 20893/65536 [3:35:15<7:55:50, 1.56it/s] 32%|███▏ | 20894/65536 [3:35:16<8:02:35, 1.54it/s] 32%|███▏ | 20895/65536 [3:35:16<7:57:58, 1.56it/s] 32%|███▏ | 20896/65536 [3:35:17<7:52:00, 1.58it/s] 32%|███▏ | 20897/65536 [3:35:18<7:38:36, 1.62it/s] 32%|███▏ | 20898/65536 [3:35:18<7:38:26, 1.62it/s] 32%|███▏ | 20899/65536 [3:35:19<7:56:23, 1.56it/s] 32%|███▏ | 20900/65536 [3:35:19<7:40:20, 1.62it/s] {'loss': 3.0454, 'learning_rate': 7.148585772009305e-07, 'epoch': 1290.12} + 32%|███▏ | 20900/65536 [3:35:19<7:40:20, 1.62it/s] 32%|███▏ | 20901/65536 [3:35:20<7:43:27, 1.61it/s] 32%|███▏ | 20902/65536 [3:35:21<7:35:28, 1.63it/s] 32%|███▏ | 20903/65536 [3:35:21<7:33:43, 1.64it/s] 32%|███▏ | 20904/65536 [3:35:22<7:29:35, 1.65it/s] 32%|███▏ | 20905/65536 [3:35:23<7:39:12, 1.62it/s] 32%|███▏ | 20906/65536 [3:35:23<7:40:03, 1.62it/s] 32%|███▏ | 20907/65536 [3:35:24<7:30:53, 1.65it/s] 32%|███▏ | 20908/65536 [3:35:24<7:49:54, 1.58it/s] 32%|███▏ | 20909/65536 [3:35:25<7:34:15, 1.64it/s] 32%|███▏ | 20910/65536 [3:35:26<7:22:30, 1.68it/s] 32%|███▏ | 20911/65536 [3:35:26<7:35:49, 1.63it/s] 32%|███▏ | 20912/65536 [3:35:27<7:26:59, 1.66it/s] 32%|███▏ | 20913/65536 [3:35:27<7:31:42, 1.65it/s] 32%|███▏ | 20914/65536 [3:35:28<7:24:12, 1.67it/s] 32%|███▏ | 20915/65536 [3:35:29<7:52:30, 1.57it/s] 32%|███▏ | 20916/65536 [3:35:29<7:43:31, 1.60it/s] 32%|███▏ | 20917/65536 [3:35:30<7:45:46, 1.60it/s] 32%|███▏ | 20918/65536 [3:35:31<7:37:47, 1.62it/s] 32%|███▏ | 20919/65536 [3:35:31<7:29:51, 1.65it/s] 32%|███▏ | 20920/65536 [3:35:32<7:34:58, 1.63it/s] {'loss': 3.009, 'learning_rate': 7.145830782417044e-07, 'epoch': 1291.36} + 32%|███▏ | 20920/65536 [3:35:32<7:34:58, 1.63it/s] 32%|███▏ | 20921/65536 [3:35:32<7:42:30, 1.61it/s] 32%|███▏ | 20922/65536 [3:35:33<7:36:48, 1.63it/s] 32%|███▏ | 20923/65536 [3:35:34<7:35:24, 1.63it/s] 32%|███▏ | 20924/65536 [3:35:34<7:43:49, 1.60it/s] 32%|███▏ | 20925/65536 [3:35:35<7:31:28, 1.65it/s] 32%|███▏ | 20926/65536 [3:35:35<7:34:43, 1.64it/s] 32%|███▏ | 20927/65536 [3:35:36<7:42:45, 1.61it/s] 32%|███▏ | 20928/65536 [3:35:37<7:35:46, 1.63it/s] 32%|███▏ | 20929/65536 [3:35:37<7:33:21, 1.64it/s] 32%|███▏ | 20930/65536 [3:35:38<7:21:06, 1.69it/s] 32%|███▏ | 20931/65536 [3:35:39<7:48:50, 1.59it/s] 32%|███▏ | 20932/65536 [3:35:39<7:48:05, 1.59it/s] 32%|███▏ | 20933/65536 [3:35:40<7:37:25, 1.63it/s] 32%|███▏ | 20934/65536 [3:35:40<7:32:57, 1.64it/s] 32%|███▏ | 20935/65536 [3:35:41<7:29:10, 1.65it/s] 32%|███▏ | 20936/65536 [3:35:42<7:29:28, 1.65it/s] 32%|███▏ | 20937/65536 [3:35:42<7:25:12, 1.67it/s] 32%|███▏ | 20938/65536 [3:35:43<7:23:49, 1.67it/s] 32%|███▏ | 20939/65536 [3:35:43<7:20:21, 1.69it/s] 32%|███▏ | 20940/65536 [3:35:44<7:17:23, 1.70it/s] {'loss': 3.0454, 'learning_rate': 7.143075792824782e-07, 'epoch': 1292.59} + 32%|███▏ | 20940/65536 [3:35:44<7:17:23, 1.70it/s] 32%|███▏ | 20941/65536 [3:35:45<7:30:05, 1.65it/s] 32%|███▏ | 20942/65536 [3:35:45<7:35:19, 1.63it/s] 32%|███▏ | 20943/65536 [3:35:46<7:37:59, 1.62it/s] 32%|███▏ | 20944/65536 [3:35:46<7:31:50, 1.64it/s] 32%|███▏ | 20945/65536 [3:35:47<7:29:22, 1.65it/s] 32%|███▏ | 20946/65536 [3:35:48<7:35:45, 1.63it/s] 32%|███▏ | 20947/65536 [3:35:48<7:57:56, 1.55it/s] 32%|███▏ | 20948/65536 [3:35:49<7:52:35, 1.57it/s] 32%|███▏ | 20949/65536 [3:35:50<7:50:51, 1.58it/s] 32%|███▏ | 20950/65536 [3:35:50<7:38:10, 1.62it/s] 32%|███▏ | 20951/65536 [3:35:51<7:33:45, 1.64it/s] 32%|███▏ | 20952/65536 [3:35:51<7:32:36, 1.64it/s] 32%|███▏ | 20953/65536 [3:35:52<7:26:48, 1.66it/s] 32%|███▏ | 20954/65536 [3:35:52<7:19:08, 1.69it/s] 32%|███▏ | 20955/65536 [3:35:53<7:28:56, 1.66it/s] 32%|███▏ | 20956/65536 [3:35:54<7:43:00, 1.60it/s] 32%|███▏ | 20957/65536 [3:35:54<7:34:09, 1.64it/s] 32%|███▏ | 20958/65536 [3:35:55<7:31:05, 1.65it/s] 32%|██���▏ | 20959/65536 [3:35:56<7:34:24, 1.63it/s] 32%|███▏ | 20960/65536 [3:35:56<7:30:07, 1.65it/s] {'loss': 3.0392, 'learning_rate': 7.140320803232521e-07, 'epoch': 1293.83} + 32%|███▏ | 20960/65536 [3:35:56<7:30:07, 1.65it/s] 32%|███▏ | 20961/65536 [3:35:57<7:23:23, 1.68it/s] 32%|███▏ | 20962/65536 [3:35:57<7:41:51, 1.61it/s] 32%|███▏ | 20963/65536 [3:35:58<7:54:26, 1.57it/s] 32%|███▏ | 20964/65536 [3:35:59<7:45:28, 1.60it/s] 32%|███▏ | 20965/65536 [3:35:59<7:52:30, 1.57it/s] 32%|███▏ | 20966/65536 [3:36:00<7:42:57, 1.60it/s] 32%|███▏ | 20967/65536 [3:36:01<7:33:20, 1.64it/s] 32%|███▏ | 20968/65536 [3:36:01<7:28:06, 1.66it/s] 32%|███▏ | 20969/65536 [3:36:02<7:31:12, 1.65it/s] 32%|███▏ | 20970/65536 [3:36:02<7:27:06, 1.66it/s] 32%|███▏ | 20971/65536 [3:36:03<7:23:33, 1.67it/s] 32%|███▏ | 20972/65536 [3:36:04<7:40:07, 1.61it/s] 32%|███▏ | 20973/65536 [3:36:04<7:27:54, 1.66it/s] 32%|███▏ | 20974/65536 [3:36:05<7:29:39, 1.65it/s] 32%|███▏ | 20975/65536 [3:36:05<7:26:30, 1.66it/s] 32%|███▏ | 20976/65536 [3:36:06<7:27:05, 1.66it/s] 32%|███▏ | 20977/65536 [3:36:07<7:25:55, 1.67it/s] 32%|███▏ | 20978/65536 [3:36:07<7:23:28, 1.67it/s] 32%|███▏ | 20979/65536 [3:36:08<7:36:19, 1.63it/s] 32%|███▏ | 20980/65536 [3:36:09<7:57:19, 1.56it/s] {'loss': 3.046, 'learning_rate': 7.13756581364026e-07, 'epoch': 1295.06} + 32%|███▏ | 20980/65536 [3:36:09<7:57:19, 1.56it/s] 32%|███▏ | 20981/65536 [3:36:09<7:55:18, 1.56it/s] 32%|███▏ | 20982/65536 [3:36:10<8:01:16, 1.54it/s] 32%|███▏ | 20983/65536 [3:36:10<8:03:50, 1.53it/s] 32%|███▏ | 20984/65536 [3:36:11<7:58:42, 1.55it/s] 32%|███▏ | 20985/65536 [3:36:12<7:50:38, 1.58it/s] 32%|███▏ | 20986/65536 [3:36:12<7:45:33, 1.59it/s] 32%|███▏ | 20987/65536 [3:36:13<7:44:22, 1.60it/s] 32%|███▏ | 20988/65536 [3:36:14<7:39:27, 1.62it/s] 32%|███▏ | 20989/65536 [3:36:14<7:29:27, 1.65it/s] 32%|███▏ | 20990/65536 [3:36:15<7:20:59, 1.68it/s] 32%|███▏ | 20991/65536 [3:36:15<7:23:58, 1.67it/s] 32%|███▏ | 20992/65536 [3:36:16<7:21:47, 1.68it/s] 32%|███▏ | 20993/65536 [3:36:16<7:18:23, 1.69it/s] 32%|███▏ | 20994/65536 [3:36:17<7:17:25, 1.70it/s] 32%|███▏ | 20995/65536 [3:36:18<7:16:37, 1.70it/s] 32%|███▏ | 20996/65536 [3:36:18<7:36:31, 1.63it/s] 32%|███▏ | 20997/65536 [3:36:19<7:30:41, 1.65it/s] 32%|███▏ | 20998/65536 [3:36:20<7:28:04, 1.66it/s] 32%|███▏ | 20999/65536 [3:36:20<7:29:02, 1.65it/s] 32%|███▏ | 21000/65536 [3:36:21<7:30:52, 1.65it/s] {'loss': 3.0165, 'learning_rate': 7.134810824047998e-07, 'epoch': 1296.3} + 32%|███▏ | 21000/65536 [3:36:21<7:30:52, 1.65it/s] 32%|███▏ | 21001/65536 [3:36:21<7:19:05, 1.69it/s] 32%|███▏ | 21002/65536 [3:36:22<7:16:21, 1.70it/s] 32%|███▏ | 21003/65536 [3:36:22<7:19:22, 1.69it/s] 32%|███▏ | 21004/65536 [3:36:23<7:37:12, 1.62it/s] 32%|███▏ | 21005/65536 [3:36:24<7:39:43, 1.61it/s] 32%|███▏ | 21006/65536 [3:36:24<7:37:45, 1.62it/s] 32%|███▏ | 21007/65536 [3:36:25<7:30:06, 1.65it/s] 32%|███▏ | 21008/65536 [3:36:26<7:40:28, 1.61it/s] 32%|███▏ | 21009/65536 [3:36:26<7:46:15, 1.59it/s] 32%|███▏ | 21010/65536 [3:36:27<7:41:33, 1.61it/s] 32%|███▏ | 21011/65536 [3:36:27<7:38:44, 1.62it/s] 32%|███▏ | 21012/65536 [3:36:28<7:42:44, 1.60it/s] 32%|███▏ | 21013/65536 [3:36:29<7:46:13, 1.59it/s] 32%|███▏ | 21014/65536 [3:36:29<7:32:41, 1.64it/s] 32%|███▏ | 21015/65536 [3:36:30<7:31:14, 1.64it/s] 32%|███▏ | 21016/65536 [3:36:31<7:47:26, 1.59it/s] 32%|███▏ | 21017/65536 [3:36:31<7:39:24, 1.62it/s] 32%|███▏ | 21018/65536 [3:36:32<7:32:08, 1.64it/s] 32%|███▏ | 21019/65536 [3:36:32<7:29:28, 1.65it/s] 32%|███▏ | 21020/65536 [3:36:33<7:33:32, 1.64it/s] {'loss': 3.031, 'learning_rate': 7.132055834455737e-07, 'epoch': 1297.53} + 32%|███▏ | 21020/65536 [3:36:33<7:33:32, 1.64it/s] 32%|███▏ | 21021/65536 [3:36:34<7:27:41, 1.66it/s] 32%|███▏ | 21022/65536 [3:36:34<7:25:26, 1.67it/s] 32%|███▏ | 21023/65536 [3:36:35<7:18:37, 1.69it/s] 32%|███▏ | 21024/65536 [3:36:35<7:35:26, 1.63it/s] 32%|███▏ | 21025/65536 [3:36:36<7:37:24, 1.62it/s] 32%|███▏ | 21026/65536 [3:36:37<7:24:09, 1.67it/s] 32%|███▏ | 21027/65536 [3:36:37<7:20:46, 1.68it/s] 32%|███▏ | 21028/65536 [3:36:38<7:37:50, 1.62it/s] 32%|███▏ | 21029/65536 [3:36:38<7:37:30, 1.62it/s] 32%|███▏ | 21030/65536 [3:36:39<7:22:35, 1.68it/s] 32%|███▏ | 21031/65536 [3:36:40<7:27:16, 1.66it/s] 32%|███▏ | 21032/65536 [3:36:40<7:30:30, 1.65it/s] 32%|███▏ | 21033/65536 [3:36:41<7:30:52, 1.65it/s] 32%|███▏ | 21034/65536 [3:36:42<7:39:17, 1.61it/s] 32%|███▏ | 21035/65536 [3:36:42<7:29:21, 1.65it/s] 32%|███▏ | 21036/65536 [3:36:43<7:43:11, 1.60it/s] 32%|███▏ | 21037/65536 [3:36:43<7:34:42, 1.63it/s] 32%|███▏ | 21038/65536 [3:36:44<7:35:59, 1.63it/s] 32%|███▏ | 21039/65536 [3:36:45<7:28:39, 1.65it/s] 32%|███▏ | 21040/65536 [3:36:45<7:23:17, 1.67it/s] {'loss': 3.0662, 'learning_rate': 7.129300844863475e-07, 'epoch': 1298.77} + 32%|███▏ | 21040/65536 [3:36:45<7:23:17, 1.67it/s] 32%|███▏ | 21041/65536 [3:36:46<7:24:55, 1.67it/s] 32%|███▏ | 21042/65536 [3:36:46<7:31:53, 1.64it/s] 32%|███▏ | 21043/65536 [3:36:47<7:30:49, 1.64it/s] 32%|███▏ | 21044/65536 [3:36:48<7:51:36, 1.57it/s] 32%|███▏ | 21045/65536 [3:36:48<7:45:51, 1.59it/s] 32%|███▏ | 21046/65536 [3:36:49<7:37:48, 1.62it/s] 32%|███▏ | 21047/65536 [3:36:49<7:38:03, 1.62it/s] 32%|███▏ | 21048/65536 [3:36:50<7:34:55, 1.63it/s] 32%|███▏ | 21049/65536 [3:36:51<7:30:04, 1.65it/s] 32%|███▏ | 21050/65536 [3:36:51<7:27:00, 1.66it/s] 32%|███▏ | 21051/65536 [3:36:52<7:27:38, 1.66it/s] 32%|███▏ | 21052/65536 [3:36:53<7:33:57, 1.63it/s] 32%|███▏ | 21053/65536 [3:36:53<7:41:19, 1.61it/s] 32%|███▏ | 21054/65536 [3:36:54<7:49:59, 1.58it/s] 32%|███▏ | 21055/65536 [3:36:54<7:45:44, 1.59it/s] 32%|███▏ | 21056/65536 [3:36:55<7:47:43, 1.58it/s] 32%|███▏ | 21057/65536 [3:36:56<7:34:50, 1.63it/s] 32%|███▏ | 21058/65536 [3:36:56<7:33:01, 1.64it/s] 32%|███▏ | 21059/65536 [3:36:57<7:27:24, 1.66it/s] 32%|███▏ | 21060/65536 [3:36:57<7:19:20, 1.69it/s] {'loss': 3.0765, 'learning_rate': 7.126545855271213e-07, 'epoch': 1300.0} + 32%|███▏ | 21060/65536 [3:36:57<7:19:20, 1.69it/s] 32%|███▏ | 21061/65536 [3:36:58<7:39:34, 1.61it/s] 32%|███▏ | 21062/65536 [3:36:59<7:26:30, 1.66it/s] 32%|███▏ | 21063/65536 [3:36:59<7:33:40, 1.63it/s] 32%|███▏ | 21064/65536 [3:37:00<7:35:27, 1.63it/s] 32%|███▏ | 21065/65536 [3:37:00<7:26:18, 1.66it/s] 32%|███▏ | 21066/65536 [3:37:01<7:16:19, 1.70it/s] 32%|███▏ | 21067/65536 [3:37:02<7:14:50, 1.70it/s] 32%|███▏ | 21068/65536 [3:37:02<7:17:13, 1.70it/s] 32%|███▏ | 21069/65536 [3:37:03<7:37:43, 1.62it/s] 32%|███▏ | 21070/65536 [3:37:03<7:33:27, 1.63it/s] 32%|███▏ | 21071/65536 [3:37:04<7:33:17, 1.63it/s] 32%|███▏ | 21072/65536 [3:37:05<7:37:20, 1.62it/s] 32%|███▏ | 21073/65536 [3:37:05<7:33:27, 1.63it/s] 32%|███▏ | 21074/65536 [3:37:06<7:30:32, 1.64it/s] 32%|███▏ | 21075/65536 [3:37:07<7:40:10, 1.61it/s] 32%|███▏ | 21076/65536 [3:37:07<7:36:06, 1.62it/s] 32%|███▏ | 21077/65536 [3:37:08<7:46:01, 1.59it/s] 32%|███▏ | 21078/65536 [3:37:08<7:35:12, 1.63it/s] 32%|███▏ | 21079/65536 [3:37:09<7:25:25, 1.66it/s] 32%|███▏ | 21080/65536 [3:37:10<7:22:49, 1.67it/s] {'loss': 3.014, 'learning_rate': 7.123790865678951e-07, 'epoch': 1301.23} + 32%|███▏ | 21080/65536 [3:37:10<7:22:49, 1.67it/s] 32%|███▏ | 21081/65536 [3:37:10<7:22:26, 1.67it/s] 32%|███▏ | 21082/65536 [3:37:11<7:15:01, 1.70it/s] 32%|███▏ | 21083/65536 [3:37:11<7:19:40, 1.69it/s] 32%|███▏ | 21084/65536 [3:37:12<7:30:22, 1.65it/s] 32%|███▏ | 21085/65536 [3:37:13<7:35:34, 1.63it/s] 32%|███▏ | 21086/65536 [3:37:13<7:35:50, 1.63it/s] 32%|███▏ | 21087/65536 [3:37:14<7:31:21, 1.64it/s] 32%|███▏ | 21088/65536 [3:37:14<7:28:23, 1.65it/s] 32%|███▏ | 21089/65536 [3:37:15<7:34:48, 1.63it/s] 32%|███▏ | 21090/65536 [3:37:16<7:43:00, 1.60it/s] 32%|███▏ | 21091/65536 [3:37:16<7:33:54, 1.63it/s] 32%|███▏ | 21092/65536 [3:37:17<7:39:38, 1.61it/s] 32%|��██▏ | 21093/65536 [3:37:18<8:07:08, 1.52it/s] 32%|███▏ | 21094/65536 [3:37:18<7:47:24, 1.58it/s] 32%|███▏ | 21095/65536 [3:37:19<8:00:12, 1.54it/s] 32%|███▏ | 21096/65536 [3:37:20<7:43:08, 1.60it/s] 32%|███▏ | 21097/65536 [3:37:20<7:42:49, 1.60it/s] 32%|███▏ | 21098/65536 [3:37:21<7:35:03, 1.63it/s] 32%|███▏ | 21099/65536 [3:37:21<7:46:52, 1.59it/s] 32%|███▏ | 21100/65536 [3:37:22<7:35:09, 1.63it/s] {'loss': 2.9775, 'learning_rate': 7.12103587608669e-07, 'epoch': 1302.47} + 32%|███▏ | 21100/65536 [3:37:22<7:35:09, 1.63it/s] 32%|███▏ | 21101/65536 [3:37:23<7:39:11, 1.61it/s] 32%|███▏ | 21102/65536 [3:37:23<7:33:46, 1.63it/s] 32%|███▏ | 21103/65536 [3:37:24<7:25:46, 1.66it/s] 32%|███▏ | 21104/65536 [3:37:24<7:21:17, 1.68it/s] 32%|███▏ | 21105/65536 [3:37:25<7:19:48, 1.68it/s] 32%|███▏ | 21106/65536 [3:37:26<7:33:34, 1.63it/s] 32%|███▏ | 21107/65536 [3:37:26<7:42:10, 1.60it/s] 32%|███▏ | 21108/65536 [3:37:27<7:33:54, 1.63it/s] 32%|███▏ | 21109/65536 [3:37:27<7:42:14, 1.60it/s] 32%|███▏ | 21110/65536 [3:37:28<7:28:33, 1.65it/s] 32%|███▏ | 21111/65536 [3:37:29<7:36:42, 1.62it/s] 32%|███▏ | 21112/65536 [3:37:29<7:32:21, 1.64it/s] 32%|███▏ | 21113/65536 [3:37:30<7:23:24, 1.67it/s] 32%|███▏ | 21114/65536 [3:37:30<7:23:27, 1.67it/s] 32%|███▏ | 21115/65536 [3:37:31<7:20:49, 1.68it/s] 32%|███▏ | 21116/65536 [3:37:32<7:19:28, 1.68it/s] 32%|███▏ | 21117/65536 [3:37:32<7:20:18, 1.68it/s] 32%|███▏ | 21118/65536 [3:37:33<7:26:55, 1.66it/s] 32%|███▏ | 21119/65536 [3:37:33<7:25:03, 1.66it/s] 32%|███▏ | 21120/65536 [3:37:34<7:37:06, 1.62it/s] {'loss': 3.0938, 'learning_rate': 7.118280886494429e-07, 'epoch': 1303.7} + 32%|███▏ | 21120/65536 [3:37:34<7:37:06, 1.62it/s] 32%|███▏ | 21121/65536 [3:37:35<7:36:03, 1.62it/s] 32%|███▏ | 21122/65536 [3:37:35<7:32:11, 1.64it/s] 32%|███▏ | 21123/65536 [3:37:36<7:26:03, 1.66it/s] 32%|███▏ | 21124/65536 [3:37:37<7:40:06, 1.61it/s] 32%|███▏ | 21125/65536 [3:37:37<7:47:22, 1.58it/s] 32%|███▏ | 21126/65536 [3:37:38<7:39:08, 1.61it/s] 32%|███▏ | 21127/65536 [3:37:38<7:48:04, 1.58it/s] 32%|███▏ | 21128/65536 [3:37:39<7:32:48, 1.63it/s] 32%|███▏ | 21129/65536 [3:37:40<7:26:18, 1.66it/s] 32%|███▏ | 21130/65536 [3:37:40<7:30:41, 1.64it/s] 32%|███▏ | 21131/65536 [3:37:41<7:29:58, 1.64it/s] 32%|███▏ | 21132/65536 [3:37:41<7:29:02, 1.65it/s] 32%|███▏ | 21133/65536 [3:37:42<7:25:44, 1.66it/s] 32%|███▏ | 21134/65536 [3:37:43<7:26:42, 1.66it/s] 32%|███▏ | 21135/65536 [3:37:43<7:16:17, 1.70it/s] 32%|███▏ | 21136/65536 [3:37:44<7:14:15, 1.70it/s] 32%|███▏ | 21137/65536 [3:37:44<7:22:11, 1.67it/s] 32%|███▏ | 21138/65536 [3:37:45<7:19:23, 1.68it/s] 32%|███▏ | 21139/65536 [3:37:46<7:39:22, 1.61it/s] 32%|███▏ | 21140/65536 [3:37:46<7:32:48, 1.63it/s] {'loss': 3.0424, 'learning_rate': 7.115525896902166e-07, 'epoch': 1304.94} + 32%|███▏ | 21140/65536 [3:37:46<7:32:48, 1.63it/s] 32%|███▏ | 21141/65536 [3:37:47<7:35:01, 1.63it/s] 32%|███▏ | 21142/65536 [3:37:48<7:49:42, 1.58it/s] 32%|███▏ | 21143/65536 [3:37:48<7:41:46, 1.60it/s] 32%|███▏ | 21144/65536 [3:37:49<7:46:08, 1.59it/s] 32%|███▏ | 21145/65536 [3:37:49<7:40:27, 1.61it/s] 32%|███▏ | 21146/65536 [3:37:50<7:37:53, 1.62it/s] 32%|███▏ | 21147/65536 [3:37:51<7:34:38, 1.63it/s] 32%|███▏ | 21148/65536 [3:37:51<7:41:09, 1.60it/s] 32%|███▏ | 21149/65536 [3:37:52<7:43:18, 1.60it/s] 32%|███▏ | 21150/65536 [3:37:53<7:40:58, 1.60it/s] 32%|███▏ | 21151/65536 [3:37:53<7:45:14, 1.59it/s] 32%|███▏ | 21152/65536 [3:37:54<7:33:43, 1.63it/s] 32%|███▏ | 21153/65536 [3:37:54<7:41:42, 1.60it/s] 32%|███▏ | 21154/65536 [3:37:55<7:31:12, 1.64it/s] 32%|███▏ | 21155/65536 [3:37:56<7:31:31, 1.64it/s] 32%|███▏ | 21156/65536 [3:37:56<7:29:16, 1.65it/s] 32%|███▏ | 21157/65536 [3:37:57<7:26:08, 1.66it/s] 32%|███▏ | 21158/65536 [3:37:57<7:42:38, 1.60it/s] 32%|███▏ | 21159/65536 [3:37:58<7:39:49, 1.61it/s] 32%|███▏ | 21160/65536 [3:37:59<7:30:41, 1.64it/s] {'loss': 3.053, 'learning_rate': 7.112770907309905e-07, 'epoch': 1306.17} + 32%|███▏ | 21160/65536 [3:37:59<7:30:41, 1.64it/s] 32%|███▏ | 21161/65536 [3:37:59<7:38:04, 1.61it/s] 32%|███▏ | 21162/65536 [3:38:00<7:33:41, 1.63it/s] 32%|███▏ | 21163/65536 [3:38:01<7:29:41, 1.64it/s] 32%|███▏ | 21164/65536 [3:38:01<7:39:14, 1.61it/s] 32%|███▏ | 21165/65536 [3:38:02<7:35:31, 1.62it/s] 32%|███▏ | 21166/65536 [3:38:02<7:39:13, 1.61it/s] 32%|███▏ | 21167/65536 [3:38:03<7:36:29, 1.62it/s] 32%|███▏ | 21168/65536 [3:38:04<7:24:55, 1.66it/s] 32%|███▏ | 21169/65536 [3:38:04<7:25:08, 1.66it/s] 32%|███▏ | 21170/65536 [3:38:05<7:21:22, 1.68it/s] 32%|███▏ | 21171/65536 [3:38:05<7:31:31, 1.64it/s] 32%|███▏ | 21172/65536 [3:38:06<7:33:27, 1.63it/s] 32%|███▏ | 21173/65536 [3:38:07<7:32:33, 1.63it/s] 32%|███▏ | 21174/65536 [3:38:07<7:42:30, 1.60it/s] 32%|███▏ | 21175/65536 [3:38:08<7:45:37, 1.59it/s] 32%|███▏ | 21176/65536 [3:38:09<7:34:47, 1.63it/s] 32%|███▏ | 21177/65536 [3:38:09<7:33:19, 1.63it/s] 32%|███▏ | 21178/65536 [3:38:10<7:35:29, 1.62it/s] 32%|███▏ | 21179/65536 [3:38:10<7:26:47, 1.65it/s] 32%|███▏ | 21180/65536 [3:38:11<7:31:45, 1.64it/s] {'loss': 3.0423, 'learning_rate': 7.110015917717643e-07, 'epoch': 1307.41} + 32%|███▏ | 21180/65536 [3:38:11<7:31:45, 1.64it/s] 32%|███▏ | 21181/65536 [3:38:12<7:47:46, 1.58it/s] 32%|███▏ | 21182/65536 [3:38:12<7:35:19, 1.62it/s] 32%|███▏ | 21183/65536 [3:38:13<7:24:21, 1.66it/s] 32%|███▏ | 21184/65536 [3:38:13<7:23:14, 1.67it/s] 32%|███▏ | 21185/65536 [3:38:14<7:28:03, 1.65it/s] 32%|███▏ | 21186/65536 [3:38:15<7:39:11, 1.61it/s] 32%|███▏ | 21187/65536 [3:38:15<7:24:47, 1.66it/s] 32%|███▏ | 21188/65536 [3:38:16<7:23:10, 1.67it/s] 32%|███▏ | 21189/65536 [3:38:16<7:25:54, 1.66it/s] 32%|███▏ | 21190/65536 [3:38:17<7:33:41, 1.63it/s] 32%|███▏ | 21191/65536 [3:38:18<7:28:37, 1.65it/s] 32%|███▏ | 21192/65536 [3:38:18<7:37:46, 1.61it/s] 32%|███▏ | 21193/65536 [3:38:19<7:26:05, 1.66it/s] 32%|███▏ | 21194/65536 [3:38:19<7:24:57, 1.66it/s] 32%|███▏ | 21195/65536 [3:38:20<7:23:57, 1.66it/s] 32%|███▏ | 21196/65536 [3:38:21<7:20:02, 1.68it/s] 32%|███▏ | 21197/65536 [3:38:21<7:29:28, 1.64it/s] 32%|███▏ | 21198/65536 [3:38:22<7:36:05, 1.62it/s] 32%|███▏ | 21199/65536 [3:38:23<7:40:40, 1.60it/s] 32%|███▏ | 21200/65536 [3:38:23<7:34:58, 1.62it/s] {'loss': 3.003, 'learning_rate': 7.107260928125382e-07, 'epoch': 1308.64} + 32%|███▏ | 21200/65536 [3:38:23<7:34:58, 1.62it/s] 32%|███▏ | 21201/65536 [3:38:24<7:27:48, 1.65it/s] 32%|███▏ | 21202/65536 [3:38:24<7:29:32, 1.64it/s] 32%|███▏ | 21203/65536 [3:38:25<7:38:20, 1.61it/s] 32%|███▏ | 21204/65536 [3:38:26<7:41:19, 1.60it/s] 32%|███▏ | 21205/65536 [3:38:26<7:36:41, 1.62it/s] 32%|███▏ | 21206/65536 [3:38:27<7:45:28, 1.59it/s] 32%|███▏ | 21207/65536 [3:38:27<7:43:09, 1.60it/s] 32%|███▏ | 21208/65536 [3:38:28<7:47:40, 1.58it/s] 32%|███▏ | 21209/65536 [3:38:29<7:45:54, 1.59it/s] 32%|███▏ | 21210/65536 [3:38:29<7:38:50, 1.61it/s] 32%|███▏ | 21211/65536 [3:38:30<7:37:08, 1.62it/s] 32%|███▏ | 21212/65536 [3:38:31<7:30:05, 1.64it/s] 32%|███▏ | 21213/65536 [3:38:31<7:23:50, 1.66it/s] 32%|███▏ | 21214/65536 [3:38:32<7:27:57, 1.65it/s] 32%|███▏ | 21215/65536 [3:38:32<7:33:00, 1.63it/s] 32%|███▏ | 21216/65536 [3:38:33<7:35:14, 1.62it/s] 32%|███▏ | 21217/65536 [3:38:34<7:23:05, 1.67it/s] 32%|███▏ | 21218/65536 [3:38:34<7:25:49, 1.66it/s] 32%|███▏ | 21219/65536 [3:38:35<7:36:24, 1.62it/s] 32%|███▏ | 21220/65536 [3:38:35<7:35:10, 1.62it/s] {'loss': 3.0905, 'learning_rate': 7.10450593853312e-07, 'epoch': 1309.88} + 32%|███▏ | 21220/65536 [3:38:35<7:35:10, 1.62it/s] 32%|███▏ | 21221/65536 [3:38:36<7:27:18, 1.65it/s] 32%|███▏ | 21222/65536 [3:38:37<7:19:51, 1.68it/s] 32%|███▏ | 21223/65536 [3:38:37<7:38:48, 1.61it/s] 32%|███▏ | 21224/65536 [3:38:38<7:26:03, 1.66it/s] 32%|███▏ | 21225/65536 [3:38:38<7:22:50, 1.67it/s] 32%|███▏ | 21226/65536 [3:38:39<7:24:47, 1.66it/s] 32%|███▏ | 21227/65536 [3:38:40<7:18:57, 1.68it/s] 32%|███▏ | 21228/65536 [3:38:40<7:24:35, 1.66it/s] 32%|███▏ | 21229/65536 [3:38:41<7:34:55, 1.62it/s] 32%|███▏ | 21230/65536 [3:38:41<7:29:12, 1.64it/s] 32%|███▏ | 21231/65536 [3:38:42<7:35:58, 1.62it/s] 32%|███▏ | 21232/65536 [3:38:43<7:38:07, 1.61it/s] 32%|███▏ | 21233/65536 [3:38:43<7:33:39, 1.63it/s] 32%|███▏ | 21234/65536 [3:38:44<7:27:24, 1.65it/s] 32%|███▏ | 21235/65536 [3:38:45<7:22:31, 1.67it/s] 32%|███▏ | 21236/65536 [3:38:45<7:45:17, 1.59it/s] 32%|███▏ | 21237/65536 [3:38:46<7:38:45, 1.61it/s] 32%|███▏ | 21238/65536 [3:38:46<7:29:29, 1.64it/s] 32%|███▏ | 21239/65536 [3:38:47<7:43:07, 1.59it/s] 32%|███▏ | 21240/65536 [3:38:48<7:36:08, 1.62it/s] {'loss': 3.0676, 'learning_rate': 7.101750948940859e-07, 'epoch': 1311.11} + 32%|███▏ | 21240/65536 [3:38:48<7:36:08, 1.62it/s] 32%|███▏ | 21241/65536 [3:38:48<7:42:14, 1.60it/s] 32%|███▏ | 21242/65536 [3:38:49<7:39:57, 1.61it/s] 32%|███▏ | 21243/65536 [3:38:50<7:29:02, 1.64it/s] 32%|███▏ | 21244/65536 [3:38:50<7:43:06, 1.59it/s] 32%|███▏ | 21245/65536 [3:38:51<7:54:30, 1.56it/s] 32%|███▏ | 21246/65536 [3:38:51<7:43:28, 1.59it/s] 32%|███▏ | 21247/65536 [3:38:52<7:37:51, 1.61it/s] 32%|███▏ | 21248/65536 [3:38:53<7:26:57, 1.65it/s] 32%|███▏ | 21249/65536 [3:38:53<7:24:37, 1.66it/s] 32%|███▏ | 21250/65536 [3:38:54<7:41:23, 1.60it/s] 32%|███▏ | 21251/65536 [3:38:55<7:41:05, 1.60it/s] 32%|███▏ | 21252/65536 [3:38:55<7:37:08, 1.61it/s] 32%|███▏ | 21253/65536 [3:38:56<7:31:14, 1.64it/s] 32%|███▏ | 21254/65536 [3:38:56<7:30:03, 1.64it/s] 32%|███▏ | 21255/65536 [3:38:57<7:41:28, 1.60it/s] 32%|███▏ | 21256/65536 [3:38:58<7:44:31, 1.59it/s] 32%|███▏ | 21257/65536 [3:38:58<7:38:09, 1.61it/s] 32%|███▏ | 21258/65536 [3:38:59<7:33:12, 1.63it/s] 32%|███▏ | 21259/65536 [3:38:59<7:30:13, 1.64it/s] 32%|███▏ | 21260/65536 [3:39:00<7:35:36, 1.62it/s] {'loss': 2.9869, 'learning_rate': 7.098995959348598e-07, 'epoch': 1312.35} + 32%|███▏ | 21260/65536 [3:39:00<7:35:36, 1.62it/s] 32%|███▏ | 21261/65536 [3:39:01<7:33:04, 1.63it/s] 32%|███▏ | 21262/65536 [3:39:01<7:24:38, 1.66it/s] 32%|███▏ | 21263/65536 [3:39:02<7:32:07, 1.63it/s] 32%|███▏ | 21264/65536 [3:39:02<7:27:23, 1.65it/s] 32%|███▏ | 21265/65536 [3:39:03<7:28:01, 1.65it/s] 32%|███▏ | 21266/65536 [3:39:04<7:40:04, 1.60it/s] 32%|███▏ | 21267/65536 [3:39:04<7:36:35, 1.62it/s] 32%|███▏ | 21268/65536 [3:39:05<7:23:09, 1.66it/s] 32%|███▏ | 21269/65536 [3:39:06<7:21:41, 1.67it/s] 32%|███▏ | 21270/65536 [3:39:06<7:19:09, 1.68it/s] 32%|███▏ | 21271/65536 [3:39:07<7:33:10, 1.63it/s] 32%|███▏ | 21272/65536 [3:39:07<7:27:30, 1.65it/s] 32%|███▏ | 21273/65536 [3:39:08<7:26:59, 1.65it/s] 32%|███▏ | 21274/65536 [3:39:09<7:29:51, 1.64it/s] 32%|███▏ | 21275/65536 [3:39:09<7:23:18, 1.66it/s] 32%|███▏ | 21276/65536 [3:39:10<7:28:01, 1.65it/s] 32%|███▏ | 21277/65536 [3:39:10<7:21:35, 1.67it/s] 32%|███▏ | 21278/65536 [3:39:11<7:23:34, 1.66it/s] 32%|███▏ | 21279/65536 [3:39:12<7:23:31, 1.66it/s] 32%|███▏ | 21280/65536 [3:39:12<7:26:03, 1.65it/s] {'loss': 3.082, 'learning_rate': 7.096240969756336e-07, 'epoch': 1313.58} + 32%|███▏ | 21280/65536 [3:39:12<7:26:03, 1.65it/s] 32%|███▏ | 21281/65536 [3:39:13<7:28:57, 1.64it/s] 32%|███▏ | 21282/65536 [3:39:13<7:44:27, 1.59it/s] 32%|███▏ | 21283/65536 [3:39:14<7:57:24, 1.54it/s] 32%|███▏ | 21284/65536 [3:39:15<7:48:50, 1.57it/s] 32%|███▏ | 21285/65536 [3:39:15<7:25:17, 1.66it/s] 32%|███▏ | 21286/65536 [3:39:16<7:16:34, 1.69it/s] 32%|███▏ | 21287/65536 [3:39:17<7:39:36, 1.60it/s] 32%|███▏ | 21288/65536 [3:39:17<7:47:53, 1.58it/s] 32%|███▏ | 21289/65536 [3:39:18<7:46:04, 1.58it/s] 32%|███▏ | 21290/65536 [3:39:19<7:53:27, 1.56it/s] 32%|███▏ | 21291/65536 [3:39:19<7:48:55, 1.57it/s] 32%|███▏ | 21292/65536 [3:39:20<7:36:43, 1.61it/s] 32%|███▏ | 21293/65536 [3:39:20<7:35:18, 1.62it/s] 32%|███▏ | 21294/65536 [3:39:21<7:35:47, 1.62it/s] 32%|███▏ | 21295/65536 [3:39:22<7:32:09, 1.63it/s] 32%|███▏ | 21296/65536 [3:39:22<7:34:16, 1.62it/s] 32%|███▏ | 21297/65536 [3:39:23<7:25:22, 1.66it/s] 32%|███▏ | 21298/65536 [3:39:23<7:25:13, 1.66it/s] 32%|███▏ | 21299/65536 [3:39:24<7:18:02, 1.68it/s] 33%|███▎ | 21300/65536 [3:39:24<7:13:31, 1.70it/s] {'loss': 2.9242, 'learning_rate': 7.093485980164075e-07, 'epoch': 1314.81} + 33%|███▎ | 21300/65536 [3:39:24<7:13:31, 1.70it/s] 33%|███▎ | 21301/65536 [3:39:25<7:31:57, 1.63it/s] 33%|███▎ | 21302/65536 [3:39:26<7:38:30, 1.61it/s] 33%|███▎ | 21303/65536 [3:39:26<7:33:07, 1.63it/s] 33%|███▎ | 21304/65536 [3:39:27<7:59:44, 1.54it/s] 33%|███▎ | 21305/65536 [3:39:28<7:52:47, 1.56it/s] 33%|███▎ | 21306/65536 [3:39:28<7:35:57, 1.62it/s] 33%|███▎ | 21307/65536 [3:39:29<7:24:44, 1.66it/s] 33%|███▎ | 21308/65536 [3:39:29<7:16:44, 1.69it/s] 33%|███▎ | 21309/65536 [3:39:30<7:21:14, 1.67it/s] 33%|███▎ | 21310/65536 [3:39:31<7:31:05, 1.63it/s] 33%|███▎ | 21311/65536 [3:39:31<7:28:32, 1.64it/s] 33%|███▎ | 21312/65536 [3:39:32<7:21:26, 1.67it/s] 33%|███▎ | 21313/65536 [3:39:32<7:23:22, 1.66it/s] 33%|███▎ | 21314/65536 [3:39:33<7:16:03, 1.69it/s] 33%|███▎ | 21315/65536 [3:39:34<7:13:55, 1.70it/s] 33%|███▎ | 21316/65536 [3:39:34<7:33:33, 1.62it/s] 33%|███▎ | 21317/65536 [3:39:35<7:27:39, 1.65it/s] 33%|███▎ | 21318/65536 [3:39:36<7:30:23, 1.64it/s] 33%|███▎ | 21319/65536 [3:39:36<7:36:17, 1.62it/s] 33%|███▎ | 21320/65536 [3:39:37<7:42:46, 1.59it/s] {'loss': 3.0285, 'learning_rate': 7.090730990571813e-07, 'epoch': 1316.05} + 33%|███▎ | 21320/65536 [3:39:37<7:42:46, 1.59it/s] 33%|███▎ | 21321/65536 [3:39:37<7:29:44, 1.64it/s] 33%|███▎ | 21322/65536 [3:39:38<7:17:35, 1.68it/s] 33%|███▎ | 21323/65536 [3:39:39<7:29:45, 1.64it/s] 33%|███▎ | 21324/65536 [3:39:39<7:32:32, 1.63it/s] 33%|███▎ | 21325/65536 [3:39:40<7:27:12, 1.65it/s] 33%|███▎ | 21326/65536 [3:39:40<7:16:22, 1.69it/s] 33%|███▎ | 21327/65536 [3:39:41<7:14:18, 1.70it/s] 33%|███▎ | 21328/65536 [3:39:42<7:20:14, 1.67it/s] 33%|███▎ | 21329/65536 [3:39:42<7:25:53, 1.65it/s] 33%|███▎ | 21330/65536 [3:39:43<7:32:27, 1.63it/s] 33%|███▎ | 21331/65536 [3:39:43<7:27:13, 1.65it/s] 33%|███▎ | 21332/65536 [3:39:44<7:35:05, 1.62it/s] 33%|███▎ | 21333/65536 [3:39:45<7:47:32, 1.58it/s] 33%|███▎ | 21334/65536 [3:39:45<7:38:11, 1.61it/s] 33%|███▎ | 21335/65536 [3:39:46<7:42:00, 1.59it/s] 33%|███▎ | 21336/65536 [3:39:47<7:52:27, 1.56it/s] 33%|███▎ | 21337/65536 [3:39:47<7:41:10, 1.60it/s] 33%|███▎ | 21338/65536 [3:39:48<7:44:23, 1.59it/s] 33%|███▎ | 21339/65536 [3:39:48<7:32:44, 1.63it/s] 33%|███▎ | 21340/65536 [3:39:49<7:26:14, 1.65it/s] {'loss': 3.0237, 'learning_rate': 7.087976000979551e-07, 'epoch': 1317.28} + 33%|███▎ | 21340/65536 [3:39:49<7:26:14, 1.65it/s] 33%|███▎ | 21341/65536 [3:39:50<7:29:01, 1.64it/s] 33%|███▎ | 21342/65536 [3:39:50<7:38:15, 1.61it/s] 33%|███▎ | 21343/65536 [3:39:51<7:26:23, 1.65it/s] 33%|███▎ | 21344/65536 [3:39:51<7:26:13, 1.65it/s] 33%|███▎ | 21345/65536 [3:39:52<7:33:45, 1.62it/s] 33%|███▎ | 21346/65536 [3:39:53<7:34:56, 1.62it/s] 33%|███▎ | 21347/65536 [3:39:53<7:38:21, 1.61it/s] 33%|███▎ | 21348/65536 [3:39:54<7:29:53, 1.64it/s] 33%|███▎ | 21349/65536 [3:39:55<7:23:31, 1.66it/s] 33%|███▎ | 21350/65536 [3:39:55<7:24:18, 1.66it/s] 33%|███▎ | 21351/65536 [3:39:56<7:37:46, 1.61it/s] 33%|███▎ | 21352/65536 [3:39:56<7:51:43, 1.56it/s] 33%|███▎ | 21353/65536 [3:39:57<7:57:10, 1.54it/s] 33%|███▎ | 21354/65536 [3:39:58<7:59:27, 1.54it/s] 33%|███▎ | 21355/65536 [3:39:58<7:51:48, 1.56it/s] 33%|███▎ | 21356/65536 [3:39:59<7:49:27, 1.57it/s] 33%|███▎ | 21357/65536 [3:40:00<7:40:29, 1.60it/s] 33%|███▎ | 21358/65536 [3:40:00<7:40:26, 1.60it/s] 33%|███▎ | 21359/65536 [3:40:01<7:54:25, 1.55it/s] 33%|███▎ | 21360/65536 [3:40:02<7:52:03, 1.56it/s] {'loss': 2.9958, 'learning_rate': 7.085221011387288e-07, 'epoch': 1318.52} + 33%|███▎ | 21360/65536 [3:40:02<7:52:03, 1.56it/s] 33%|███▎ | 21361/65536 [3:40:02<7:34:30, 1.62it/s] 33%|███▎ | 21362/65536 [3:40:03<7:32:03, 1.63it/s] 33%|███▎ | 21363/65536 [3:40:03<7:37:44, 1.61it/s] 33%|███▎ | 21364/65536 [3:40:04<7:31:09, 1.63it/s] 33%|███▎ | 21365/65536 [3:40:05<7:26:56, 1.65it/s] 33%|███▎ | 21366/65536 [3:40:05<7:33:32, 1.62it/s] 33%|███▎ | 21367/65536 [3:40:06<7:28:21, 1.64it/s] 33%|███▎ | 21368/65536 [3:40:06<7:37:07, 1.61it/s] 33%|███▎ | 21369/65536 [3:40:07<7:37:01, 1.61it/s] 33%|███▎ | 21370/65536 [3:40:08<7:26:20, 1.65it/s] 33%|███▎ | 21371/65536 [3:40:08<7:27:28, 1.64it/s] 33%|███▎ | 21372/65536 [3:40:09<7:23:10, 1.66it/s] 33%|███▎ | 21373/65536 [3:40:09<7:24:18, 1.66it/s] 33%|███▎ | 21374/65536 [3:40:10<7:17:59, 1.68it/s] 33%|███▎ | 21375/65536 [3:40:11<7:17:38, 1.68it/s] 33%|███▎ | 21376/65536 [3:40:11<7:23:39, 1.66it/s] 33%|███▎ | 21377/65536 [3:40:12<7:23:20, 1.66it/s] 33%|███▎ | 21378/65536 [3:40:12<7:23:30, 1.66it/s] 33%|███▎ | 21379/65536 [3:40:13<7:24:43, 1.65it/s] 33%|███▎ | 21380/65536 [3:40:14<7:41:28, 1.59it/s] {'loss': 2.9874, 'learning_rate': 7.082466021795028e-07, 'epoch': 1319.75} + 33%|███▎ | 21380/65536 [3:40:14<7:41:28, 1.59it/s] 33%|███▎ | 21381/65536 [3:40:14<7:36:45, 1.61it/s] 33%|███▎ | 21382/65536 [3:40:15<7:37:04, 1.61it/s] 33%|███▎ | 21383/65536 [3:40:16<7:38:08, 1.61it/s] 33%|███▎ | 21384/65536 [3:40:16<7:42:17, 1.59it/s] 33%|███▎ | 21385/65536 [3:40:17<7:45:02, 1.58it/s] 33%|███▎ | 21386/65536 [3:40:18<7:45:15, 1.58it/s] 33%|███▎ | 21387/65536 [3:40:18<7:32:36, 1.63it/s] 33%|███▎ | 21388/65536 [3:40:19<7:26:05, 1.65it/s] 33%|███▎ | 21389/65536 [3:40:19<7:24:32, 1.66it/s] 33%|███▎ | 21390/65536 [3:40:20<7:24:40, 1.65it/s] 33%|███▎ | 21391/65536 [3:40:20<7:18:06, 1.68it/s] 33%|███▎ | 21392/65536 [3:40:21<7:11:03, 1.71it/s] 33%|███▎ | 21393/65536 [3:40:22<7:22:14, 1.66it/s] 33%|███▎ | 21394/65536 [3:40:22<7:25:55, 1.65it/s] 33%|███▎ | 21395/65536 [3:40:23<7:22:32, 1.66it/s] 33%|███▎ | 21396/65536 [3:40:24<7:31:04, 1.63it/s] 33%|███▎ | 21397/65536 [3:40:24<7:34:19, 1.62it/s] 33%|███▎ | 21398/65536 [3:40:25<7:30:31, 1.63it/s] 33%|███▎ | 21399/65536 [3:40:25<7:39:30, 1.60it/s] 33%|███▎ | 21400/65536 [3:40:26<7:36:11, 1.61it/s] {'loss': 3.0377, 'learning_rate': 7.079711032202767e-07, 'epoch': 1320.99} + 33%|███▎ | 21400/65536 [3:40:26<7:36:11, 1.61it/s] 33%|███▎ | 21401/65536 [3:40:27<7:50:03, 1.56it/s] 33%|███▎ | 21402/65536 [3:40:27<7:50:15, 1.56it/s] 33%|███▎ | 21403/65536 [3:40:28<7:42:42, 1.59it/s] 33%|███▎ | 21404/65536 [3:40:29<7:39:51, 1.60it/s] 33%|███▎ | 21405/65536 [3:40:29<7:38:51, 1.60it/s] 33%|███▎ | 21406/65536 [3:40:30<7:31:55, 1.63it/s] 33%|███▎ | 21407/65536 [3:40:30<7:45:03, 1.58it/s] 33%|███▎ | 21408/65536 [3:40:31<7:49:35, 1.57it/s] 33%|███▎ | 21409/65536 [3:40:32<7:42:24, 1.59it/s] 33%|███▎ | 21410/65536 [3:40:32<7:34:52, 1.62it/s] 33%|███▎ | 21411/65536 [3:40:33<7:26:36, 1.65it/s] 33%|███▎ | 21412/65536 [3:40:33<7:26:43, 1.65it/s] 33%|███▎ | 21413/65536 [3:40:34<7:27:35, 1.64it/s] 33%|███▎ | 21414/65536 [3:40:35<7:42:02, 1.59it/s] 33%|███▎ | 21415/65536 [3:40:35<7:30:07, 1.63it/s] 33%|███▎ | 21416/65536 [3:40:36<7:27:33, 1.64it/s] 33%|███▎ | 21417/65536 [3:40:37<7:40:17, 1.60it/s] 33%|███▎ | 21418/65536 [3:40:37<7:21:04, 1.67it/s] 33%|███▎ | 21419/65536 [3:40:38<7:25:21, 1.65it/s] 33%|███▎ | 21420/65536 [3:40:38<7:25:05, 1.65it/s] {'loss': 3.0517, 'learning_rate': 7.076956042610505e-07, 'epoch': 1322.22} + 33%|███▎ | 21420/65536 [3:40:38<7:25:05, 1.65it/s] 33%|███▎ | 21421/65536 [3:40:39<7:17:58, 1.68it/s] 33%|███▎ | 21422/65536 [3:40:40<7:25:52, 1.65it/s] 33%|███▎ | 21423/65536 [3:40:40<7:32:27, 1.62it/s] 33%|███▎ | 21424/65536 [3:40:41<7:27:53, 1.64it/s] 33%|███▎ | 21425/65536 [3:40:41<7:22:15, 1.66it/s] 33%|███▎ | 21426/65536 [3:40:42<7:25:25, 1.65it/s] 33%|███▎ | 21427/65536 [3:40:43<7:28:58, 1.64it/s] 33%|███▎ | 21428/65536 [3:40:43<7:22:35, 1.66it/s] 33%|███▎ | 21429/65536 [3:40:44<7:10:14, 1.71it/s] 33%|███▎ | 21430/65536 [3:40:44<7:06:46, 1.72it/s] 33%|███▎ | 21431/65536 [3:40:45<7:34:24, 1.62it/s] 33%|███▎ | 21432/65536 [3:40:46<7:37:56, 1.61it/s] 33%|███▎ | 21433/65536 [3:40:46<7:47:02, 1.57it/s] 33%|███▎ | 21434/65536 [3:40:47<7:37:08, 1.61it/s] 33%|███▎ | 21435/65536 [3:40:48<7:51:31, 1.56it/s] 33%|███▎ | 21436/65536 [3:40:48<7:41:16, 1.59it/s] 33%|███▎ | 21437/65536 [3:40:49<7:24:14, 1.65it/s] 33%|███▎ | 21438/65536 [3:40:49<7:19:39, 1.67it/s] 33%|███▎ | 21439/65536 [3:40:50<7:16:23, 1.68it/s] 33%|███▎ | 21440/65536 [3:40:51<7:27:11, 1.64it/s] {'loss': 2.9989, 'learning_rate': 7.074201053018244e-07, 'epoch': 1323.46} + 33%|███▎ | 21440/65536 [3:40:51<7:27:11, 1.64it/s] 33%|███▎ | 21441/65536 [3:40:51<7:32:38, 1.62it/s] 33%|███▎ | 21442/65536 [3:40:52<7:35:41, 1.61it/s] 33%|███▎ | 21443/65536 [3:40:52<7:33:58, 1.62it/s] 33%|███▎ | 21444/65536 [3:40:53<7:32:16, 1.62it/s] 33%|███▎ | 21445/65536 [3:40:54<7:20:14, 1.67it/s] 33%|███▎ | 21446/65536 [3:40:54<7:17:36, 1.68it/s] 33%|███▎ | 21447/65536 [3:40:55<7:17:14, 1.68it/s] 33%|███▎ | 21448/65536 [3:40:55<7:15:01, 1.69it/s] 33%|███▎ | 21449/65536 [3:40:56<7:32:59, 1.62it/s] 33%|███▎ | 21450/65536 [3:40:57<7:26:14, 1.65it/s] 33%|███▎ | 21451/65536 [3:40:57<7:25:02, 1.65it/s] 33%|███▎ | 21452/65536 [3:40:58<7:22:48, 1.66it/s] 33%|███▎ | 21453/65536 [3:40:59<7:41:32, 1.59it/s] 33%|███▎ | 21454/65536 [3:40:59<7:29:41, 1.63it/s] 33%|███▎ | 21455/65536 [3:41:00<7:32:19, 1.62it/s] 33%|███▎ | 21456/65536 [3:41:00<7:42:55, 1.59it/s] 33%|███▎ | 21457/65536 [3:41:01<7:38:30, 1.60it/s] 33%|███▎ | 21458/65536 [3:41:02<7:37:28, 1.61it/s] 33%|███▎ | 21459/65536 [3:41:02<7:30:50, 1.63it/s] 33%|███▎ | 21460/65536 [3:41:03<7:24:19, 1.65it/s] {'loss': 3.0555, 'learning_rate': 7.071446063425982e-07, 'epoch': 1324.69} + 33%|███▎ | 21460/65536 [3:41:03<7:24:19, 1.65it/s] 33%|███▎ | 21461/65536 [3:41:03<7:25:28, 1.65it/s] 33%|███▎ | 21462/65536 [3:41:04<7:22:42, 1.66it/s] 33%|███▎ | 21463/65536 [3:41:05<7:23:29, 1.66it/s] 33%|███▎ | 21464/65536 [3:41:05<7:20:15, 1.67it/s] 33%|███▎ | 21465/65536 [3:41:06<7:32:29, 1.62it/s] 33%|███▎ | 21466/65536 [3:41:06<7:40:10, 1.60it/s] 33%|███▎ | 21467/65536 [3:41:07<7:27:05, 1.64it/s] 33%|███▎ | 21468/65536 [3:41:08<7:30:01, 1.63it/s] 33%|███▎ | 21469/65536 [3:41:08<7:27:21, 1.64it/s] 33%|███▎ | 21470/65536 [3:41:09<7:31:26, 1.63it/s] 33%|███▎ | 21471/65536 [3:41:09<7:20:55, 1.67it/s] 33%|███▎ | 21472/65536 [3:41:10<7:19:00, 1.67it/s] 33%|███▎ | 21473/65536 [3:41:11<7:12:51, 1.70it/s] 33%|███▎ | 21474/65536 [3:41:11<7:12:28, 1.70it/s] 33%|███▎ | 21475/65536 [3:41:12<7:20:26, 1.67it/s] 33%|███▎ | 21476/65536 [3:41:12<7:18:08, 1.68it/s] 33%|███▎ | 21477/65536 [3:41:13<7:24:03, 1.65it/s] 33%|███▎ | 21478/65536 [3:41:14<7:27:20, 1.64it/s] 33%|███▎ | 21479/65536 [3:41:14<7:34:19, 1.62it/s] 33%|███▎ | 21480/65536 [3:41:15<7:26:24, 1.64it/s] {'loss': 3.0958, 'learning_rate': 7.068691073833721e-07, 'epoch': 1325.93} + 33%|███▎ | 21480/65536 [3:41:15<7:26:24, 1.64it/s] 33%|███▎ | 21481/65536 [3:41:16<7:32:58, 1.62it/s] 33%|███▎ | 21482/65536 [3:41:16<7:45:55, 1.58it/s] 33%|███▎ | 21483/65536 [3:41:17<7:40:36, 1.59it/s] 33%|███▎ | 21484/65536 [3:41:17<7:36:47, 1.61it/s] 33%|███▎ | 21485/65536 [3:41:18<7:22:27, 1.66it/s] 33%|███▎ | 21486/65536 [3:41:19<7:27:14, 1.64it/s] 33%|███▎ | 21487/65536 [3:41:19<7:26:36, 1.64it/s] 33%|███▎ | 21488/65536 [3:41:20<7:25:08, 1.65it/s] 33%|███▎ | 21489/65536 [3:41:20<7:22:50, 1.66it/s] 33%|███▎ | 21490/65536 [3:41:21<7:13:17, 1.69it/s] 33%|███▎ | 21491/65536 [3:41:22<7:14:03, 1.69it/s] 33%|███▎ | 21492/65536 [3:41:22<7:08:21, 1.71it/s] 33%|███▎ | 21493/65536 [3:41:23<7:16:57, 1.68it/s] 33%|███▎ | 21494/65536 [3:41:23<7:33:19, 1.62it/s] 33%|███▎ | 21495/65536 [3:41:24<7:30:50, 1.63it/s] 33%|███▎ | 21496/65536 [3:41:25<7:26:54, 1.64it/s] 33%|███▎ | 21497/65536 [3:41:25<7:31:38, 1.63it/s] 33%|███▎ | 21498/65536 [3:41:26<7:46:57, 1.57it/s] 33%|███▎ | 21499/65536 [3:41:27<7:43:01, 1.59it/s] 33%|███▎ | 21500/65536 [3:41:27<7:40:47, 1.59it/s] {'loss': 3.0202, 'learning_rate': 7.06593608424146e-07, 'epoch': 1327.16} + 33%|███▎ | 21500/65536 [3:41:27<7:40:47, 1.59it/s] 33%|███▎ | 21501/65536 [3:41:28<7:34:56, 1.61it/s] 33%|███▎ | 21502/65536 [3:41:28<7:27:24, 1.64it/s] 33%|███▎ | 21503/65536 [3:41:29<7:21:22, 1.66it/s] 33%|███▎ | 21504/65536 [3:41:30<7:23:03, 1.66it/s] 33%|███▎ | 21505/65536 [3:41:30<7:33:32, 1.62it/s] 33%|███▎ | 21506/65536 [3:41:31<7:33:18, 1.62it/s] 33%|███▎ | 21507/65536 [3:41:31<7:34:15, 1.62it/s] 33%|███▎ | 21508/65536 [3:41:32<7:24:34, 1.65it/s] 33%|███▎ | 21509/65536 [3:41:33<7:22:53, 1.66it/s] 33%|███▎ | 21510/65536 [3:41:33<7:28:46, 1.64it/s] 33%|███▎ | 21511/65536 [3:41:34<7:24:39, 1.65it/s] 33%|███▎ | 21512/65536 [3:41:35<7:36:25, 1.61it/s] 33%|███▎ | 21513/65536 [3:41:35<7:30:42, 1.63it/s] 33%|███▎ | 21514/65536 [3:41:36<7:48:30, 1.57it/s] 33%|███▎ | 21515/65536 [3:41:36<7:47:27, 1.57it/s] 33%|███▎ | 21516/65536 [3:41:37<7:43:29, 1.58it/s] 33%|███▎ | 21517/65536 [3:41:38<7:32:31, 1.62it/s] 33%|███▎ | 21518/65536 [3:41:38<7:28:29, 1.64it/s] 33%|███▎ | 21519/65536 [3:41:39<7:25:09, 1.65it/s] 33%|███▎ | 21520/65536 [3:41:39<7:27:52, 1.64it/s] {'loss': 3.0313, 'learning_rate': 7.063181094649198e-07, 'epoch': 1328.4} + 33%|███▎ | 21520/65536 [3:41:39<7:27:52, 1.64it/s] 33%|███▎ | 21521/65536 [3:41:40<7:20:30, 1.67it/s] 33%|███▎ | 21522/65536 [3:41:41<7:16:42, 1.68it/s] 33%|███▎ | 21523/65536 [3:41:41<7:34:52, 1.61it/s] 33%|███▎ | 21524/65536 [3:41:42<7:32:39, 1.62it/s] 33%|███▎ | 21525/65536 [3:41:43<7:32:28, 1.62it/s] 33%|███▎ | 21526/65536 [3:41:43<7:23:31, 1.65it/s] 33%|███▎ | 21527/65536 [3:41:44<7:19:13, 1.67it/s] 33%|███▎ | 21528/65536 [3:41:44<7:29:14, 1.63it/s] 33%|███▎ | 21529/65536 [3:41:45<7:23:55, 1.65it/s] 33%|███▎ | 21530/65536 [3:41:46<7:59:28, 1.53it/s] 33%|███▎ | 21531/65536 [3:41:46<7:49:39, 1.56it/s] 33%|███▎ | 21532/65536 [3:41:47<7:35:07, 1.61it/s] 33%|███▎ | 21533/65536 [3:41:47<7:33:03, 1.62it/s] 33%|███▎ | 21534/65536 [3:41:48<7:33:28, 1.62it/s] 33%|███▎ | 21535/65536 [3:41:49<7:28:37, 1.63it/s] 33%|███▎ | 21536/65536 [3:41:49<7:28:57, 1.63it/s] 33%|███▎ | 21537/65536 [3:41:50<7:30:33, 1.63it/s] 33%|███▎ | 21538/65536 [3:41:50<7:18:03, 1.67it/s] 33%|███▎ | 21539/65536 [3:41:51<7:23:30, 1.65it/s] 33%|███▎ | 21540/65536 [3:41:52<7:24:34, 1.65it/s] {'loss': 3.0543, 'learning_rate': 7.060426105056937e-07, 'epoch': 1329.63} + 33%|███▎ | 21540/65536 [3:41:52<7:24:34, 1.65it/s] 33%|███▎ | 21541/65536 [3:41:52<7:23:08, 1.65it/s] 33%|███▎ | 21542/65536 [3:41:53<7:18:13, 1.67it/s] 33%|███▎ | 21543/65536 [3:41:53<7:08:17, 1.71it/s] 33%|███▎ | 21544/65536 [3:41:54<7:30:52, 1.63it/s] 33%|███▎ | 21545/65536 [3:41:55<7:33:49, 1.62it/s] 33%|███▎ | 21546/65536 [3:41:55<7:36:08, 1.61it/s] 33%|███▎ | 21547/65536 [3:41:56<7:51:09, 1.56it/s] 33%|███▎ | 21548/65536 [3:41:57<7:43:53, 1.58it/s] 33%|███▎ | 21549/65536 [3:41:57<7:35:54, 1.61it/s] 33%|███▎ | 21550/65536 [3:41:58<7:40:13, 1.59it/s] 33%|███▎ | 21551/65536 [3:41:59<7:30:12, 1.63it/s] 33%|███▎ | 21552/65536 [3:41:59<7:28:10, 1.64it/s] 33%|███▎ | 21553/65536 [3:42:00<7:22:06, 1.66it/s] 33%|███▎ | 21554/65536 [3:42:00<7:17:30, 1.68it/s] 33%|███▎ | 21555/65536 [3:42:01<7:22:19, 1.66it/s] 33%|███▎ | 21556/65536 [3:42:02<7:22:55, 1.65it/s] 33%|███▎ | 21557/65536 [3:42:02<7:39:24, 1.60it/s] 33%|███▎ | 21558/65536 [3:42:03<7:33:03, 1.62it/s] 33%|███▎ | 21559/65536 [3:42:03<7:47:30, 1.57it/s] 33%|███▎ | 21560/65536 [3:42:04<7:39:47, 1.59it/s] {'loss': 3.0223, 'learning_rate': 7.057671115464675e-07, 'epoch': 1330.86} + 33%|███▎ | 21560/65536 [3:42:04<7:39:47, 1.59it/s] 33%|███▎ | 21561/65536 [3:42:05<7:40:31, 1.59it/s] 33%|███▎ | 21562/65536 [3:42:05<7:31:25, 1.62it/s] 33%|███▎ | 21563/65536 [3:42:06<7:57:48, 1.53it/s] 33%|███▎ | 21564/65536 [3:42:07<7:55:28, 1.54it/s] 33%|███▎ | 21565/65536 [3:42:07<7:42:27, 1.58it/s] 33%|███▎ | 21566/65536 [3:42:08<7:38:00, 1.60it/s] 33%|███▎ | 21567/65536 [3:42:09<7:37:36, 1.60it/s] 33%|███▎ | 21568/65536 [3:42:09<7:28:16, 1.63it/s] 33%|███▎ | 21569/65536 [3:42:10<7:27:27, 1.64it/s] 33%|███▎ | 21570/65536 [3:42:10<7:23:36, 1.65it/s] 33%|███▎ | 21571/65536 [3:42:11<7:19:48, 1.67it/s] 33%|███▎ | 21572/65536 [3:42:11<7:22:10, 1.66it/s] 33%|███▎ | 21573/65536 [3:42:12<7:23:21, 1.65it/s] 33%|███▎ | 21574/65536 [3:42:13<7:27:46, 1.64it/s] 33%|███▎ | 21575/65536 [3:42:13<7:32:44, 1.62it/s] 33%|███▎ | 21576/65536 [3:42:14<7:30:00, 1.63it/s] 33%|███▎ | 21577/65536 [3:42:15<7:26:13, 1.64it/s] 33%|███▎ | 21578/65536 [3:42:15<7:34:06, 1.61it/s] 33%|███▎ | 21579/65536 [3:42:16<7:55:19, 1.54it/s] 33%|███▎ | 21580/65536 [3:42:17<7:43:43, 1.58it/s] {'loss': 2.9771, 'learning_rate': 7.054916125872414e-07, 'epoch': 1332.1} + 33%|███▎ | 21580/65536 [3:42:17<7:43:43, 1.58it/s] 33%|███▎ | 21581/65536 [3:42:17<7:43:03, 1.58it/s] 33%|███▎ | 21582/65536 [3:42:18<7:32:49, 1.62it/s] 33%|███▎ | 21583/65536 [3:42:18<7:25:41, 1.64it/s] 33%|███▎ | 21584/65536 [3:42:19<7:19:41, 1.67it/s] 33%|███▎ | 21585/65536 [3:42:20<7:21:12, 1.66it/s] 33%|███▎ | 21586/65536 [3:42:20<7:24:38, 1.65it/s] 33%|███▎ | 21587/65536 [3:42:21<7:41:00, 1.59it/s] 33%|███▎ | 21588/65536 [3:42:21<7:26:33, 1.64it/s] 33%|███▎ | 21589/65536 [3:42:22<7:36:40, 1.60it/s] 33%|███▎ | 21590/65536 [3:42:23<7:32:21, 1.62it/s] 33%|███▎ | 21591/65536 [3:42:23<7:21:30, 1.66it/s] 33%|███▎ | 21592/65536 [3:42:24<7:30:19, 1.63it/s] 33%|███▎ | 21593/65536 [3:42:24<7:21:01, 1.66it/s] 33%|███▎ | 21594/65536 [3:42:25<7:27:04, 1.64it/s] 33%|███▎ | 21595/65536 [3:42:26<7:41:00, 1.59it/s] 33%|███▎ | 21596/65536 [3:42:26<7:39:46, 1.59it/s] 33%|███▎ | 21597/65536 [3:42:27<7:43:16, 1.58it/s] 33%|███▎ | 21598/65536 [3:42:28<7:34:42, 1.61it/s] 33%|███▎ | 21599/65536 [3:42:28<7:30:06, 1.63it/s] 33%|███▎ | 21600/65536 [3:42:29<7:27:16, 1.64it/s] {'loss': 3.0458, 'learning_rate': 7.052161136280152e-07, 'epoch': 1333.33} + 33%|███▎ | 21600/65536 [3:42:29<7:27:16, 1.64it/s] 33%|███▎ | 21601/65536 [3:42:29<7:19:24, 1.67it/s] 33%|███▎ | 21602/65536 [3:42:30<7:16:59, 1.68it/s] 33%|███▎ | 21603/65536 [3:42:31<7:11:29, 1.70it/s] 33%|███▎ | 21604/65536 [3:42:31<7:09:27, 1.70it/s] 33%|███▎ | 21605/65536 [3:42:32<7:21:05, 1.66it/s] 33%|███▎ | 21606/65536 [3:42:32<7:24:49, 1.65it/s] 33%|███▎ | 21607/65536 [3:42:33<7:34:14, 1.61it/s] 33%|███▎ | 21608/65536 [3:42:34<7:33:28, 1.61it/s] 33%|███▎ | 21609/65536 [3:42:34<7:30:50, 1.62it/s] 33%|███▎ | 21610/65536 [3:42:35<7:30:09, 1.63it/s] 33%|███▎ | 21611/65536 [3:42:36<7:45:45, 1.57it/s] 33%|███▎ | 21612/65536 [3:42:36<7:47:36, 1.57it/s] 33%|███▎ | 21613/65536 [3:42:37<7:45:02, 1.57it/s] 33%|███▎ | 21614/65536 [3:42:37<7:39:44, 1.59it/s] 33%|███▎ | 21615/65536 [3:42:38<7:38:06, 1.60it/s] 33%|███▎ | 21616/65536 [3:42:39<7:30:23, 1.63it/s] 33%|███▎ | 21617/65536 [3:42:39<7:38:05, 1.60it/s] 33%|███▎ | 21618/65536 [3:42:40<7:31:43, 1.62it/s] 33%|███▎ | 21619/65536 [3:42:40<7:29:15, 1.63it/s] 33%|███▎ | 21620/65536 [3:42:41<7:23:02, 1.65it/s] {'loss': 3.0683, 'learning_rate': 7.049406146687891e-07, 'epoch': 1334.57} + 33%|███▎ | 21620/65536 [3:42:41<7:23:02, 1.65it/s] 33%|███▎ | 21621/65536 [3:42:42<7:34:34, 1.61it/s] 33%|███▎ | 21622/65536 [3:42:42<7:21:12, 1.66it/s] 33%|███▎ | 21623/65536 [3:42:43<7:20:01, 1.66it/s] 33%|███▎ | 21624/65536 [3:42:43<7:25:15, 1.64it/s] 33%|███▎ | 21625/65536 [3:42:44<7:22:22, 1.65it/s] 33%|███▎ | 21626/65536 [3:42:45<7:31:34, 1.62it/s] 33%|███▎ | 21627/65536 [3:42:45<7:11:58, 1.69it/s] 33%|███▎ | 21628/65536 [3:42:46<7:31:39, 1.62it/s] 33%|███▎ | 21629/65536 [3:42:47<7:24:21, 1.65it/s] 33%|███▎ | 21630/65536 [3:42:47<7:34:29, 1.61it/s] 33%|███▎ | 21631/65536 [3:42:48<7:24:30, 1.65it/s] 33%|███▎ | 21632/65536 [3:42:48<7:35:17, 1.61it/s] 33%|███▎ | 21633/65536 [3:42:49<7:36:38, 1.60it/s] 33%|███▎ | 21634/65536 [3:42:50<7:20:28, 1.66it/s] 33%|███▎ | 21635/65536 [3:42:50<7:14:25, 1.68it/s] 33%|███▎ | 21636/65536 [3:42:51<7:17:08, 1.67it/s] 33%|███▎ | 21637/65536 [3:42:51<7:26:37, 1.64it/s] 33%|███▎ | 21638/65536 [3:42:52<7:16:10, 1.68it/s] 33%|███▎ | 21639/65536 [3:42:53<7:28:17, 1.63it/s] 33%|███▎ | 21640/65536 [3:42:53<7:31:10, 1.62it/s] {'loss': 3.0293, 'learning_rate': 7.046651157095629e-07, 'epoch': 1335.8} + 33%|███▎ | 21640/65536 [3:42:53<7:31:10, 1.62it/s] 33%|███▎ | 21641/65536 [3:42:54<7:30:50, 1.62it/s] 33%|███▎ | 21642/65536 [3:42:54<7:24:27, 1.65it/s] 33%|███▎ | 21643/65536 [3:42:55<7:16:24, 1.68it/s] 33%|███▎ | 21644/65536 [3:42:56<7:45:57, 1.57it/s] 33%|███▎ | 21645/65536 [3:42:56<7:37:52, 1.60it/s] 33%|███▎ | 21646/65536 [3:42:57<7:27:54, 1.63it/s] 33%|███▎ | 21647/65536 [3:42:58<7:26:15, 1.64it/s] 33%|███▎ | 21648/65536 [3:42:58<7:26:51, 1.64it/s] 33%|███▎ | 21649/65536 [3:42:59<7:20:58, 1.66it/s] 33%|███▎ | 21650/65536 [3:42:59<7:17:23, 1.67it/s] 33%|███▎ | 21651/65536 [3:43:00<7:21:49, 1.66it/s] 33%|███▎ | 21652/65536 [3:43:01<7:25:09, 1.64it/s] 33%|███▎ | 21653/65536 [3:43:01<7:18:36, 1.67it/s] 33%|███▎ | 21654/65536 [3:43:02<7:26:47, 1.64it/s] 33%|███▎ | 21655/65536 [3:43:02<7:19:32, 1.66it/s] 33%|███▎ | 21656/65536 [3:43:03<7:35:48, 1.60it/s] 33%|███▎ | 21657/65536 [3:43:04<7:23:59, 1.65it/s] 33%|███▎ | 21658/65536 [3:43:04<7:17:08, 1.67it/s] 33%|███▎ | 21659/65536 [3:43:05<7:34:27, 1.61it/s] 33%|███▎ | 21660/65536 [3:43:06<7:47:35, 1.56it/s] {'loss': 3.0793, 'learning_rate': 7.043896167503367e-07, 'epoch': 1337.04} + 33%|███▎ | 21660/65536 [3:43:06<7:47:35, 1.56it/s] 33%|███▎ | 21661/65536 [3:43:06<7:39:40, 1.59it/s] 33%|███▎ | 21662/65536 [3:43:07<7:38:06, 1.60it/s] 33%|███▎ | 21663/65536 [3:43:07<7:17:26, 1.67it/s] 33%|███▎ | 21664/65536 [3:43:08<7:31:32, 1.62it/s] 33%|███▎ | 21665/65536 [3:43:09<7:23:44, 1.65it/s] 33%|███▎ | 21666/65536 [3:43:09<7:35:25, 1.61it/s] 33%|███▎ | 21667/65536 [3:43:10<7:35:41, 1.60it/s] 33%|███▎ | 21668/65536 [3:43:10<7:26:12, 1.64it/s] 33%|███▎ | 21669/65536 [3:43:11<7:35:39, 1.60it/s] 33%|███▎ | 21670/65536 [3:43:12<7:28:03, 1.63it/s] 33%|███▎ | 21671/65536 [3:43:12<7:19:31, 1.66it/s] 33%|███▎ | 21672/65536 [3:43:13<7:17:42, 1.67it/s] 33%|███▎ | 21673/65536 [3:43:13<7:13:34, 1.69it/s] 33%|███▎ | 21674/65536 [3:43:14<7:16:11, 1.68it/s] 33%|███▎ | 21675/65536 [3:43:15<7:11:50, 1.69it/s] 33%|███▎ | 21676/65536 [3:43:15<7:37:31, 1.60it/s] 33%|███▎ | 21677/65536 [3:43:16<7:31:31, 1.62it/s] 33%|███▎ | 21678/65536 [3:43:16<7:29:58, 1.62it/s] 33%|███▎ | 21679/65536 [3:43:17<7:30:33, 1.62it/s] 33%|███▎ | 21680/65536 [3:43:18<7:27:39, 1.63it/s] {'loss': 3.0518, 'learning_rate': 7.041141177911105e-07, 'epoch': 1338.27} + 33%|███▎ | 21680/65536 [3:43:18<7:27:39, 1.63it/s] 33%|███▎ | 21681/65536 [3:43:18<7:25:54, 1.64it/s] 33%|███▎ | 21682/65536 [3:43:19<7:26:34, 1.64it/s] 33%|███▎ | 21683/65536 [3:43:20<7:19:53, 1.66it/s] 33%|███▎ | 21684/65536 [3:43:20<7:19:34, 1.66it/s] 33%|███▎ | 21685/65536 [3:43:21<7:23:45, 1.65it/s] 33%|███▎ | 21686/65536 [3:43:21<7:28:38, 1.63it/s] 33%|███▎ | 21687/65536 [3:43:22<7:35:32, 1.60it/s] 33%|███▎ | 21688/65536 [3:43:23<7:35:58, 1.60it/s] 33%|███▎ | 21689/65536 [3:43:23<7:40:31, 1.59it/s] 33%|███▎ | 21690/65536 [3:43:24<7:24:23, 1.64it/s] 33%|███▎ | 21691/65536 [3:43:24<7:24:06, 1.65it/s] 33%|███▎ | 21692/65536 [3:43:25<7:49:25, 1.56it/s] 33%|███▎ | 21693/65536 [3:43:26<7:44:08, 1.57it/s] 33%|███▎ | 21694/65536 [3:43:26<7:28:56, 1.63it/s] 33%|███▎ | 21695/65536 [3:43:27<7:32:00, 1.62it/s] 33%|███▎ | 21696/65536 [3:43:28<7:23:01, 1.65it/s] 33%|███▎ | 21697/65536 [3:43:28<7:24:29, 1.64it/s] 33%|███▎ | 21698/65536 [3:43:29<7:22:48, 1.65it/s] 33%|███▎ | 21699/65536 [3:43:29<7:13:04, 1.69it/s] 33%|███▎ | 21700/65536 [3:43:30<7:26:05, 1.64it/s] {'loss': 2.9899, 'learning_rate': 7.038386188318843e-07, 'epoch': 1339.51} + 33%|███▎ | 21700/65536 [3:43:30<7:26:05, 1.64it/s] 33%|███▎ | 21701/65536 [3:43:31<7:17:07, 1.67it/s] 33%|███▎ | 21702/65536 [3:43:31<7:13:59, 1.68it/s] 33%|███▎ | 21703/65536 [3:43:32<7:16:20, 1.67it/s] 33%|███▎ | 21704/65536 [3:43:32<7:11:50, 1.69it/s] 33%|███▎ | 21705/65536 [3:43:33<7:23:26, 1.65it/s] 33%|███▎ | 21706/65536 [3:43:34<7:29:39, 1.62it/s] 33%|███▎ | 21707/65536 [3:43:34<7:25:05, 1.64it/s] 33%|███▎ | 21708/65536 [3:43:35<7:34:32, 1.61it/s] 33%|███▎ | 21709/65536 [3:43:36<7:57:55, 1.53it/s] 33%|███▎ | 21710/65536 [3:43:36<7:48:40, 1.56it/s] 33%|███▎ | 21711/65536 [3:43:37<7:44:27, 1.57it/s] 33%|███▎ | 21712/65536 [3:43:37<7:38:51, 1.59it/s] 33%|███▎ | 21713/65536 [3:43:38<7:28:34, 1.63it/s] 33%|███▎ | 21714/65536 [3:43:39<7:21:25, 1.65it/s] 33%|███▎ | 21715/65536 [3:43:39<7:14:57, 1.68it/s] 33%|███▎ | 21716/65536 [3:43:40<7:24:10, 1.64it/s] 33%|███▎ | 21717/65536 [3:43:40<7:18:35, 1.67it/s] 33%|███▎ | 21718/65536 [3:43:41<7:24:18, 1.64it/s] 33%|███▎ | 21719/65536 [3:43:42<7:25:39, 1.64it/s] 33%|███▎ | 21720/65536 [3:43:42<7:27:07, 1.63it/s] {'loss': 3.0019, 'learning_rate': 7.035631198726582e-07, 'epoch': 1340.74} + 33%|███▎ | 21720/65536 [3:43:42<7:27:07, 1.63it/s] 33%|███▎ | 21721/65536 [3:43:43<7:16:58, 1.67it/s] 33%|███▎ | 21722/65536 [3:43:43<7:27:19, 1.63it/s] 33%|███▎ | 21723/65536 [3:43:44<7:36:29, 1.60it/s] 33%|███▎ | 21724/65536 [3:43:45<7:24:32, 1.64it/s] 33%|███▎ | 21725/65536 [3:43:45<7:45:47, 1.57it/s] 33%|███▎ | 21726/65536 [3:43:46<7:37:32, 1.60it/s] 33%|███▎ | 21727/65536 [3:43:47<7:33:12, 1.61it/s] 33%|███▎ | 21728/65536 [3:43:47<7:40:17, 1.59it/s] 33%|███▎ | 21729/65536 [3:43:48<7:37:59, 1.59it/s] 33%|███▎ | 21730/65536 [3:43:48<7:33:32, 1.61it/s] 33%|███▎ | 21731/65536 [3:43:49<7:30:51, 1.62it/s] 33%|███▎ | 21732/65536 [3:43:50<7:24:28, 1.64it/s] 33%|███▎ | 21733/65536 [3:43:50<7:22:41, 1.65it/s] 33%|███▎ | 21734/65536 [3:43:51<7:21:41, 1.65it/s] 33%|███▎ | 21735/65536 [3:43:51<7:14:15, 1.68it/s] 33%|███▎ | 21736/65536 [3:43:52<7:25:09, 1.64it/s] 33%|███▎ | 21737/65536 [3:43:53<7:21:57, 1.65it/s] 33%|███▎ | 21738/65536 [3:43:53<7:18:53, 1.66it/s] 33%|███▎ | 21739/65536 [3:43:54<7:24:06, 1.64it/s] 33%|███▎ | 21740/65536 [3:43:54<7:18:40, 1.66it/s] {'loss': 2.99, 'learning_rate': 7.032876209134321e-07, 'epoch': 1341.98} + 33%|███▎ | 21740/65536 [3:43:54<7:18:40, 1.66it/s] 33%|███▎ | 21741/65536 [3:43:55<7:37:18, 1.60it/s] 33%|███▎ | 21742/65536 [3:43:56<7:41:11, 1.58it/s] 33%|███▎ | 21743/65536 [3:43:56<7:36:13, 1.60it/s] 33%|███▎ | 21744/65536 [3:43:57<7:21:45, 1.65it/s] 33%|███▎ | 21745/65536 [3:43:58<7:20:18, 1.66it/s] 33%|███▎ | 21746/65536 [3:43:58<7:32:25, 1.61it/s] 33%|███▎ | 21747/65536 [3:43:59<7:27:16, 1.63it/s] 33%|███▎ | 21748/65536 [3:43:59<7:28:06, 1.63it/s] 33%|███▎ | 21749/65536 [3:44:00<7:16:42, 1.67it/s] 33%|███▎ | 21750/65536 [3:44:01<7:22:38, 1.65it/s] 33%|███▎ | 21751/65536 [3:44:01<7:19:41, 1.66it/s] 33%|███▎ | 21752/65536 [3:44:02<7:18:56, 1.66it/s] 33%|███▎ | 21753/65536 [3:44:02<7:18:51, 1.66it/s] 33%|███▎ | 21754/65536 [3:44:03<7:21:33, 1.65it/s] 33%|███▎ | 21755/65536 [3:44:04<7:18:22, 1.66it/s] 33%|███▎ | 21756/65536 [3:44:04<7:13:40, 1.68it/s] 33%|███▎ | 21757/65536 [3:44:05<7:37:02, 1.60it/s] 33%|███▎ | 21758/65536 [3:44:06<7:31:19, 1.62it/s] 33%|███▎ | 21759/65536 [3:44:06<7:28:15, 1.63it/s] 33%|███▎ | 21760/65536 [3:44:07<7:24:49, 1.64it/s] {'loss': 3.0385, 'learning_rate': 7.030121219542059e-07, 'epoch': 1343.21} + 33%|███▎ | 21760/65536 [3:44:07<7:24:49, 1.64it/s] 33%|███▎ | 21761/65536 [3:44:07<7:26:50, 1.63it/s] 33%|███▎ | 21762/65536 [3:44:08<7:23:51, 1.64it/s] 33%|███▎ | 21763/65536 [3:44:09<7:25:11, 1.64it/s] 33%|███▎ | 21764/65536 [3:44:09<7:33:40, 1.61it/s] 33%|███▎ | 21765/65536 [3:44:10<7:22:06, 1.65it/s] 33%|███▎ | 21766/65536 [3:44:10<7:19:54, 1.66it/s] 33%|███▎ | 21767/65536 [3:44:11<7:22:00, 1.65it/s] 33%|███▎ | 21768/65536 [3:44:12<7:14:43, 1.68it/s] 33%|███▎ | 21769/65536 [3:44:12<7:08:00, 1.70it/s] 33%|███▎ | 21770/65536 [3:44:13<7:13:22, 1.68it/s] 33%|███▎ | 21771/65536 [3:44:13<7:10:47, 1.69it/s] 33%|███▎ | 21772/65536 [3:44:14<7:36:41, 1.60it/s] 33%|███▎ | 21773/65536 [3:44:15<7:43:08, 1.57it/s] 33%|███▎ | 21774/65536 [3:44:15<7:34:34, 1.60it/s] 33%|███▎ | 21775/65536 [3:44:16<7:36:59, 1.60it/s] 33%|███▎ | 21776/65536 [3:44:17<7:36:12, 1.60it/s] 33%|███▎ | 21777/65536 [3:44:17<7:32:55, 1.61it/s] 33%|███▎ | 21778/65536 [3:44:18<7:32:02, 1.61it/s] 33%|███▎ | 21779/65536 [3:44:18<7:33:20, 1.61it/s] 33%|███▎ | 21780/65536 [3:44:19<7:38:32, 1.59it/s] {'loss': 3.0483, 'learning_rate': 7.027366229949798e-07, 'epoch': 1344.44} + 33%|███▎ | 21780/65536 [3:44:19<7:38:32, 1.59it/s] 33%|███▎ | 21781/65536 [3:44:20<7:28:48, 1.62it/s] 33%|███▎ | 21782/65536 [3:44:20<7:36:48, 1.60it/s] 33%|███▎ | 21783/65536 [3:44:21<7:37:41, 1.59it/s] 33%|███▎ | 21784/65536 [3:44:21<7:29:23, 1.62it/s] 33%|███▎ | 21785/65536 [3:44:22<7:25:58, 1.64it/s] 33%|███▎ | 21786/65536 [3:44:23<7:27:50, 1.63it/s] 33%|███▎ | 21787/65536 [3:44:23<7:13:34, 1.68it/s] 33%|███▎ | 21788/65536 [3:44:24<7:13:46, 1.68it/s] 33%|███▎ | 21789/65536 [3:44:24<7:06:29, 1.71it/s] 33%|███▎ | 21790/65536 [3:44:25<7:33:08, 1.61it/s] 33%|███▎ | 21791/65536 [3:44:26<7:33:41, 1.61it/s] 33%|███▎ | 21792/65536 [3:44:26<7:28:41, 1.62it/s] 33%|███▎ | 21793/65536 [3:44:27<7:23:31, 1.64it/s] 33%|███▎ | 21794/65536 [3:44:28<7:30:54, 1.62it/s] 33%|███▎ | 21795/65536 [3:44:28<7:25:13, 1.64it/s] 33%|███▎ | 21796/65536 [3:44:29<7:27:45, 1.63it/s] 33%|███▎ | 21797/65536 [3:44:29<7:36:31, 1.60it/s] 33%|███▎ | 21798/65536 [3:44:30<7:42:33, 1.58it/s] 33%|███▎ | 21799/65536 [3:44:31<7:28:35, 1.62it/s] 33%|███▎ | 21800/65536 [3:44:31<7:19:27, 1.66it/s] {'loss': 3.0463, 'learning_rate': 7.024611240357536e-07, 'epoch': 1345.68} + 33%|███▎ | 21800/65536 [3:44:31<7:19:27, 1.66it/s] 33%|███▎ | 21801/65536 [3:44:32<7:09:44, 1.70it/s] 33%|███▎ | 21802/65536 [3:44:32<7:19:29, 1.66it/s] 33%|███▎ | 21803/65536 [3:44:33<7:15:34, 1.67it/s] 33%|███▎ | 21804/65536 [3:44:34<7:15:40, 1.67it/s] 33%|███▎ | 21805/65536 [3:44:34<7:16:45, 1.67it/s] 33%|███▎ | 21806/65536 [3:44:35<7:35:54, 1.60it/s] 33%|███▎ | 21807/65536 [3:44:35<7:27:06, 1.63it/s] 33%|███▎ | 21808/65536 [3:44:36<7:17:02, 1.67it/s] 33%|███▎ | 21809/65536 [3:44:37<7:10:34, 1.69it/s] 33%|███▎ | 21810/65536 [3:44:37<7:08:21, 1.70it/s] 33%|███▎ | 21811/65536 [3:44:38<7:18:07, 1.66it/s] 33%|███▎ | 21812/65536 [3:44:38<7:25:32, 1.64it/s] 33%|███▎ | 21813/65536 [3:44:39<7:17:14, 1.67it/s] 33%|███▎ | 21814/65536 [3:44:40<7:19:07, 1.66it/s] 33%|███▎ | 21815/65536 [3:44:40<7:25:20, 1.64it/s] 33%|███▎ | 21816/65536 [3:44:41<7:26:19, 1.63it/s] 33%|███▎ | 21817/65536 [3:44:42<7:35:10, 1.60it/s] 33%|███▎ | 21818/65536 [3:44:42<7:34:39, 1.60it/s] 33%|███▎ | 21819/65536 [3:44:43<7:38:28, 1.59it/s] 33%|███▎ | 21820/65536 [3:44:43<7:32:34, 1.61it/s] {'loss': 3.0503, 'learning_rate': 7.021856250765275e-07, 'epoch': 1346.91} + 33%|███▎ | 21820/65536 [3:44:43<7:32:34, 1.61it/s] 33%|███▎ | 21821/65536 [3:44:44<7:35:35, 1.60it/s] 33%|███▎ | 21822/65536 [3:44:45<7:47:35, 1.56it/s] 33%|███▎ | 21823/65536 [3:44:45<7:52:46, 1.54it/s] 33%|███▎ | 21824/65536 [3:44:46<7:45:45, 1.56it/s] 33%|███▎ | 21825/65536 [3:44:47<7:36:22, 1.60it/s] 33%|███▎ | 21826/65536 [3:44:47<7:42:43, 1.57it/s] 33%|███▎ | 21827/65536 [3:44:48<7:37:30, 1.59it/s] 33%|███▎ | 21828/65536 [3:44:49<7:34:20, 1.60it/s] 33%|███▎ | 21829/65536 [3:44:49<7:28:31, 1.62it/s] 33%|███▎ | 21830/65536 [3:44:50<7:32:07, 1.61it/s] 33%|███▎ | 21831/65536 [3:44:50<7:25:35, 1.63it/s] 33%|███▎ | 21832/65536 [3:44:51<7:15:55, 1.67it/s] 33%|███▎ | 21833/65536 [3:44:52<7:19:54, 1.66it/s] 33%|███▎ | 21834/65536 [3:44:52<7:10:21, 1.69it/s] 33%|███▎ | 21835/65536 [3:44:53<7:22:35, 1.65it/s] 33%|███▎ | 21836/65536 [3:44:53<7:17:29, 1.66it/s] 33%|███▎ | 21837/65536 [3:44:54<7:25:23, 1.64it/s] 33%|███▎ | 21838/65536 [3:44:55<7:39:26, 1.59it/s] 33%|███▎ | 21839/65536 [3:44:55<7:33:49, 1.60it/s] 33%|███▎ | 21840/65536 [3:44:56<7:43:07, 1.57it/s] {'loss': 2.9832, 'learning_rate': 7.019101261173014e-07, 'epoch': 1348.15} + 33%|███▎ | 21840/65536 [3:44:56<7:43:07, 1.57it/s] 33%|███▎ | 21841/65536 [3:44:56<7:37:56, 1.59it/s] 33%|███▎ | 21842/65536 [3:44:57<7:34:35, 1.60it/s] 33%|███▎ | 21843/65536 [3:44:58<7:17:47, 1.66it/s] 33%|███▎ | 21844/65536 [3:44:58<7:26:56, 1.63it/s] 33%|███▎ | 21845/65536 [3:44:59<7:21:17, 1.65it/s] 33%|███▎ | 21846/65536 [3:44:59<7:16:56, 1.67it/s] 33%|███▎ | 21847/65536 [3:45:00<7:27:12, 1.63it/s] 33%|███▎ | 21848/65536 [3:45:01<7:10:45, 1.69it/s] 33%|███▎ | 21849/65536 [3:45:01<7:10:54, 1.69it/s] 33%|███▎ | 21850/65536 [3:45:02<7:26:10, 1.63it/s] 33%|███▎ | 21851/65536 [3:45:03<7:28:30, 1.62it/s] 33%|███▎ | 21852/65536 [3:45:03<7:28:47, 1.62it/s] 33%|███▎ | 21853/65536 [3:45:04<7:18:06, 1.66it/s] 33%|███▎ | 21854/65536 [3:45:04<7:36:09, 1.60it/s] 33%|███▎ | 21855/65536 [3:45:05<7:32:20, 1.61it/s] 33%|███▎ | 21856/65536 [3:45:06<7:24:17, 1.64it/s] 33%|███▎ | 21857/65536 [3:45:06<7:17:00, 1.67it/s] 33%|███▎ | 21858/65536 [3:45:07<7:17:29, 1.66it/s] 33%|███▎ | 21859/65536 [3:45:07<7:19:11, 1.66it/s] 33%|███▎ | 21860/65536 [3:45:08<7:12:31, 1.68it/s] {'loss': 2.9928, 'learning_rate': 7.016346271580752e-07, 'epoch': 1349.38} + 33%|███▎ | 21860/65536 [3:45:08<7:12:31, 1.68it/s] 33%|███▎ | 21861/65536 [3:45:09<7:22:28, 1.65it/s] 33%|███▎ | 21862/65536 [3:45:09<7:37:17, 1.59it/s] 33%|███▎ | 21863/65536 [3:45:10<7:33:44, 1.60it/s] 33%|███▎ | 21864/65536 [3:45:10<7:23:37, 1.64it/s] 33%|███▎ | 21865/65536 [3:45:11<7:21:45, 1.65it/s] 33%|███▎ | 21866/65536 [3:45:12<7:20:37, 1.65it/s] 33%|███▎ | 21867/65536 [3:45:12<7:14:43, 1.67it/s] 33%|███▎ | 21868/65536 [3:45:13<7:12:19, 1.68it/s] 33%|███▎ | 21869/65536 [3:45:13<7:19:25, 1.66it/s] 33%|███▎ | 21870/65536 [3:45:14<7:33:35, 1.60it/s] 33%|███▎ | 21871/65536 [3:45:15<7:57:25, 1.52it/s] 33%|███▎ | 21872/65536 [3:45:15<7:38:38, 1.59it/s] 33%|███▎ | 21873/65536 [3:45:16<7:37:52, 1.59it/s] 33%|███▎ | 21874/65536 [3:45:17<7:34:53, 1.60it/s] 33%|███▎ | 21875/65536 [3:45:17<7:43:01, 1.57it/s] 33%|███▎ | 21876/65536 [3:45:18<7:33:46, 1.60it/s] 33%|███▎ | 21877/65536 [3:45:19<7:34:32, 1.60it/s] 33%|███▎ | 21878/65536 [3:45:19<7:23:43, 1.64it/s] 33%|███▎ | 21879/65536 [3:45:20<7:21:28, 1.65it/s] 33%|███▎ | 21880/65536 [3:45:20<7:23:44, 1.64it/s] {'loss': 3.0193, 'learning_rate': 7.01359128198849e-07, 'epoch': 1350.62} + 33%|███▎ | 21880/65536 [3:45:20<7:23:44, 1.64it/s] 33%|███▎ | 21881/65536 [3:45:21<7:13:37, 1.68it/s] 33%|███▎ | 21882/65536 [3:45:22<7:24:03, 1.64it/s] 33%|███▎ | 21883/65536 [3:45:22<7:31:39, 1.61it/s] 33%|███▎ | 21884/65536 [3:45:23<7:25:03, 1.63it/s] 33%|███▎ | 21885/65536 [3:45:23<7:21:05, 1.65it/s] 33%|███▎ | 21886/65536 [3:45:24<7:17:13, 1.66it/s] 33%|███▎ | 21887/65536 [3:45:25<7:24:25, 1.64it/s] 33%|███▎ | 21888/65536 [3:45:25<7:20:56, 1.65it/s] 33%|███▎ | 21889/65536 [3:45:26<7:23:52, 1.64it/s] 33%|███▎ | 21890/65536 [3:45:26<7:30:44, 1.61it/s] 33%|███▎ | 21891/65536 [3:45:27<7:15:18, 1.67it/s] 33%|███▎ | 21892/65536 [3:45:28<7:08:54, 1.70it/s] 33%|███▎ | 21893/65536 [3:45:28<7:19:53, 1.65it/s] 33%|███▎ | 21894/65536 [3:45:29<7:26:22, 1.63it/s] 33%|███▎ | 21895/65536 [3:45:30<7:43:12, 1.57it/s] 33%|███▎ | 21896/65536 [3:45:30<7:44:40, 1.57it/s] 33%|███▎ | 21897/65536 [3:45:31<7:35:31, 1.60it/s] 33%|███▎ | 21898/65536 [3:45:31<7:32:22, 1.61it/s] 33%|███▎ | 21899/65536 [3:45:32<7:29:16, 1.62it/s] 33%|███▎ | 21900/65536 [3:45:33<7:33:59, 1.60it/s] {'loss': 3.0242, 'learning_rate': 7.010836292396228e-07, 'epoch': 1351.85} + 33%|███▎ | 21900/65536 [3:45:33<7:33:59, 1.60it/s] 33%|███▎ | 21901/65536 [3:45:33<7:22:41, 1.64it/s] 33%|███▎ | 21902/65536 [3:45:34<7:06:48, 1.70it/s] 33%|███▎ | 21903/65536 [3:45:34<7:18:18, 1.66it/s] 33%|███▎ | 21904/65536 [3:45:35<7:20:49, 1.65it/s] 33%|███▎ | 21905/65536 [3:45:36<7:21:39, 1.65it/s] 33%|███▎ | 21906/65536 [3:45:36<7:34:09, 1.60it/s] 33%|███▎ | 21907/65536 [3:45:37<7:20:37, 1.65it/s] 33%|███▎ | 21908/65536 [3:45:37<7:23:38, 1.64it/s] 33%|███▎ | 21909/65536 [3:45:38<7:24:08, 1.64it/s] 33%|███▎ | 21910/65536 [3:45:39<7:23:44, 1.64it/s] 33%|███▎ | 21911/65536 [3:45:39<7:34:34, 1.60it/s] 33%|███▎ | 21912/65536 [3:45:40<7:31:06, 1.61it/s] 33%|███▎ | 21913/65536 [3:45:41<7:24:40, 1.64it/s] 33%|███▎ | 21914/65536 [3:45:41<7:15:29, 1.67it/s] 33%|███▎ | 21915/65536 [3:45:42<7:11:16, 1.69it/s] 33%|███▎ | 21916/65536 [3:45:42<7:15:30, 1.67it/s] 33%|███▎ | 21917/65536 [3:45:43<7:26:17, 1.63it/s] 33%|███▎ | 21918/65536 [3:45:44<7:21:51, 1.65it/s] 33%|███▎ | 21919/65536 [3:45:44<7:29:02, 1.62it/s] 33%|███▎ | 21920/65536 [3:45:45<7:23:58, 1.64it/s] {'loss': 3.0383, 'learning_rate': 7.008081302803967e-07, 'epoch': 1353.09} + 33%|███▎ | 21920/65536 [3:45:45<7:23:58, 1.64it/s] 33%|███▎ | 21921/65536 [3:45:45<7:39:53, 1.58it/s] 33%|███▎ | 21922/65536 [3:45:46<7:24:36, 1.63it/s] 33%|███▎ | 21923/65536 [3:45:47<7:19:09, 1.66it/s] 33%|███▎ | 21924/65536 [3:45:47<7:25:45, 1.63it/s] 33%|███▎ | 21925/65536 [3:45:48<7:27:07, 1.63it/s] 33%|███▎ | 21926/65536 [3:45:48<7:23:07, 1.64it/s] 33%|███▎ | 21927/65536 [3:45:49<7:18:31, 1.66it/s] 33%|███▎ | 21928/65536 [3:45:50<7:23:46, 1.64it/s] 33%|███▎ | 21929/65536 [3:45:50<7:29:47, 1.62it/s] 33%|███▎ | 21930/65536 [3:45:51<7:32:25, 1.61it/s] 33%|███▎ | 21931/65536 [3:45:52<7:35:17, 1.60it/s] 33%|███▎ | 21932/65536 [3:45:52<7:28:04, 1.62it/s] 33%|███▎ | 21933/65536 [3:45:53<7:26:29, 1.63it/s] 33%|███▎ | 21934/65536 [3:45:53<7:21:07, 1.65it/s] 33%|███▎ | 21935/65536 [3:45:54<7:30:19, 1.61it/s] 33%|███▎ | 21936/65536 [3:45:55<7:23:50, 1.64it/s] 33%|███▎ | 21937/65536 [3:45:55<7:19:59, 1.65it/s] 33%|███▎ | 21938/65536 [3:45:56<7:26:38, 1.63it/s] 33%|███▎ | 21939/65536 [3:45:56<7:32:43, 1.60it/s] 33%|███▎ | 21940/65536 [3:45:57<7:25:13, 1.63it/s] {'loss': 3.0641, 'learning_rate': 7.005326313211704e-07, 'epoch': 1354.32} + 33%|███▎ | 21940/65536 [3:45:57<7:25:13, 1.63it/s] 33%|███▎ | 21941/65536 [3:45:58<7:31:13, 1.61it/s] 33%|███▎ | 21942/65536 [3:45:58<7:23:37, 1.64it/s] 33%|███▎ | 21943/65536 [3:45:59<7:16:28, 1.66it/s] 33%|███▎ | 21944/65536 [3:45:59<7:15:14, 1.67it/s] 33%|███▎ | 21945/65536 [3:46:00<7:20:08, 1.65it/s] 33%|███▎ | 21946/65536 [3:46:01<7:23:56, 1.64it/s] 33%|███▎ | 21947/65536 [3:46:01<7:24:18, 1.64it/s] 33%|███▎ | 21948/65536 [3:46:02<7:30:38, 1.61it/s] 33%|███▎ | 21949/65536 [3:46:03<7:25:28, 1.63it/s] 33%|███▎ | 21950/65536 [3:46:03<7:24:56, 1.63it/s] 33%|███▎ | 21951/65536 [3:46:04<7:17:51, 1.66it/s] 33%|███▎ | 21952/65536 [3:46:04<7:29:31, 1.62it/s] 33%|███▎ | 21953/65536 [3:46:05<7:25:19, 1.63it/s] 33%|███▎ | 21954/65536 [3:46:06<7:26:26, 1.63it/s] 34%|███▎ | 21955/65536 [3:46:06<7:11:07, 1.68it/s] 34%|███▎ | 21956/65536 [3:46:07<7:18:33, 1.66it/s] 34%|███▎ | 21957/65536 [3:46:07<7:23:47, 1.64it/s] 34%|███▎ | 21958/65536 [3:46:08<7:34:54, 1.60it/s] 34%|███▎ | 21959/65536 [3:46:09<7:35:46, 1.59it/s] 34%|███▎ | 21960/65536 [3:46:09<7:16:23, 1.66it/s] {'loss': 2.9852, 'learning_rate': 7.002571323619443e-07, 'epoch': 1355.56} + 34%|███▎ | 21960/65536 [3:46:09<7:16:23, 1.66it/s] 34%|███▎ | 21961/65536 [3:46:10<7:10:49, 1.69it/s] 34%|███▎ | 21962/65536 [3:46:11<7:28:33, 1.62it/s] 34%|███▎ | 21963/65536 [3:46:11<7:39:42, 1.58it/s] 34%|███▎ | 21964/65536 [3:46:12<7:34:27, 1.60it/s] 34%|███▎ | 21965/65536 [3:46:12<7:26:36, 1.63it/s] 34%|███▎ | 21966/65536 [3:46:13<7:17:43, 1.66it/s] 34%|███▎ | 21967/65536 [3:46:14<7:10:22, 1.69it/s] 34%|███▎ | 21968/65536 [3:46:14<7:25:29, 1.63it/s] 34%|███▎ | 21969/65536 [3:46:15<7:17:12, 1.66it/s] 34%|███▎ | 21970/65536 [3:46:15<7:17:26, 1.66it/s] 34%|███▎ | 21971/65536 [3:46:16<7:25:22, 1.63it/s] 34%|███▎ | 21972/65536 [3:46:17<7:23:15, 1.64it/s] 34%|███▎ | 21973/65536 [3:46:17<7:24:44, 1.63it/s] 34%|███▎ | 21974/65536 [3:46:18<7:22:54, 1.64it/s] 34%|███▎ | 21975/65536 [3:46:18<7:15:34, 1.67it/s] 34%|███▎ | 21976/65536 [3:46:19<7:07:30, 1.70it/s] 34%|███▎ | 21977/65536 [3:46:20<7:12:42, 1.68it/s] 34%|███▎ | 21978/65536 [3:46:20<7:12:57, 1.68it/s] 34%|███▎ | 21979/65536 [3:46:21<7:23:19, 1.64it/s] 34%|███▎ | 21980/65536 [3:46:21<7:16:21, 1.66it/s] {'loss': 3.0579, 'learning_rate': 6.999816334027183e-07, 'epoch': 1356.79} + 34%|███▎ | 21980/65536 [3:46:21<7:16:21, 1.66it/s] 34%|███▎ | 21981/65536 [3:46:22<7:37:01, 1.59it/s] 34%|███▎ | 21982/65536 [3:46:23<7:30:08, 1.61it/s] 34%|███▎ | 21983/65536 [3:46:23<7:21:06, 1.65it/s] 34%|███▎ | 21984/65536 [3:46:24<7:31:34, 1.61it/s] 34%|███▎ | 21985/65536 [3:46:25<7:26:39, 1.63it/s] 34%|███▎ | 21986/65536 [3:46:25<7:26:53, 1.62it/s] 34%|███▎ | 21987/65536 [3:46:26<7:30:43, 1.61it/s] 34%|███▎ | 21988/65536 [3:46:26<7:26:32, 1.63it/s] 34%|███▎ | 21989/65536 [3:46:27<7:27:20, 1.62it/s] 34%|███▎ | 21990/65536 [3:46:28<7:31:50, 1.61it/s] 34%|███▎ | 21991/65536 [3:46:28<7:23:50, 1.64it/s] 34%|███▎ | 21992/65536 [3:46:29<7:23:53, 1.63it/s] 34%|███▎ | 21993/65536 [3:46:30<7:34:47, 1.60it/s] 34%|███▎ | 21994/65536 [3:46:30<7:33:47, 1.60it/s] 34%|███▎ | 21995/65536 [3:46:31<7:28:15, 1.62it/s] 34%|███▎ | 21996/65536 [3:46:31<7:22:02, 1.64it/s] 34%|███▎ | 21997/65536 [3:46:32<7:12:02, 1.68it/s] 34%|███▎ | 21998/65536 [3:46:32<7:06:12, 1.70it/s] 34%|███▎ | 21999/65536 [3:46:33<7:07:03, 1.70it/s] 34%|███▎ | 22000/65536 [3:46:34<7:25:30, 1.63it/s] {'loss': 3.1143, 'learning_rate': 6.99706134443492e-07, 'epoch': 1358.02} + 34%|███▎ | 22000/65536 [3:46:34<7:25:30, 1.63it/s] 34%|███▎ | 22001/65536 [3:46:34<7:30:49, 1.61it/s] 34%|███▎ | 22002/65536 [3:46:35<7:26:17, 1.63it/s] 34%|███▎ | 22003/65536 [3:46:36<7:20:24, 1.65it/s] 34%|███▎ | 22004/65536 [3:46:36<7:17:20, 1.66it/s] 34%|███▎ | 22005/65536 [3:46:37<7:18:25, 1.65it/s] 34%|███▎ | 22006/65536 [3:46:37<7:11:54, 1.68it/s] 34%|███▎ | 22007/65536 [3:46:38<7:09:18, 1.69it/s] 34%|███▎ | 22008/65536 [3:46:38<7:10:34, 1.68it/s] 34%|███▎ | 22009/65536 [3:46:39<7:04:46, 1.71it/s] 34%|███▎ | 22010/65536 [3:46:40<7:01:14, 1.72it/s] 34%|███▎ | 22011/65536 [3:46:40<7:19:10, 1.65it/s] 34%|███▎ | 22012/65536 [3:46:41<7:25:49, 1.63it/s] 34%|███▎ | 22013/65536 [3:46:41<7:12:17, 1.68it/s] 34%|███▎ | 22014/65536 [3:46:42<7:19:19, 1.65it/s] 34%|███▎ | 22015/65536 [3:46:43<7:20:59, 1.64it/s] 34%|███▎ | 22016/65536 [3:46:43<7:40:52, 1.57it/s] 34%|███▎ | 22017/65536 [3:46:44<7:38:18, 1.58it/s] 34%|███▎ | 22018/65536 [3:46:45<7:38:03, 1.58it/s] 34%|███▎ | 22019/65536 [3:46:45<7:36:08, 1.59it/s] 34%|███▎ | 22020/65536 [3:46:46<7:30:29, 1.61it/s] {'loss': 2.9811, 'learning_rate': 6.994306354842659e-07, 'epoch': 1359.26} + 34%|███▎ | 22020/65536 [3:46:46<7:30:29, 1.61it/s] 34%|███▎ | 22021/65536 [3:46:46<7:24:04, 1.63it/s] 34%|███▎ | 22022/65536 [3:46:47<7:31:10, 1.61it/s] 34%|███▎ | 22023/65536 [3:46:48<7:27:07, 1.62it/s] 34%|███▎ | 22024/65536 [3:46:48<7:23:59, 1.63it/s] 34%|███▎ | 22025/65536 [3:46:49<7:23:00, 1.64it/s] 34%|███▎ | 22026/65536 [3:46:50<7:20:00, 1.65it/s] 34%|███▎ | 22027/65536 [3:46:50<7:20:09, 1.65it/s] 34%|███▎ | 22028/65536 [3:46:51<7:22:21, 1.64it/s] 34%|███▎ | 22029/65536 [3:46:51<7:22:10, 1.64it/s] 34%|███▎ | 22030/65536 [3:46:52<7:35:53, 1.59it/s] 34%|███▎ | 22031/65536 [3:46:53<7:25:16, 1.63it/s] 34%|███▎ | 22032/65536 [3:46:53<7:16:34, 1.66it/s] 34%|███▎ | 22033/65536 [3:46:54<7:37:00, 1.59it/s] 34%|███▎ | 22034/65536 [3:46:55<7:39:14, 1.58it/s] 34%|███▎ | 22035/65536 [3:46:55<7:32:38, 1.60it/s] 34%|███�� | 22036/65536 [3:46:56<7:27:33, 1.62it/s] 34%|███▎ | 22037/65536 [3:46:56<7:26:28, 1.62it/s] 34%|███▎ | 22038/65536 [3:46:57<7:25:32, 1.63it/s] 34%|███▎ | 22039/65536 [3:46:58<7:29:38, 1.61it/s] 34%|███▎ | 22040/65536 [3:46:58<7:24:56, 1.63it/s] {'loss': 2.9968, 'learning_rate': 6.991551365250397e-07, 'epoch': 1360.49} + 34%|███▎ | 22040/65536 [3:46:58<7:24:56, 1.63it/s] 34%|███▎ | 22041/65536 [3:46:59<7:35:49, 1.59it/s] 34%|███▎ | 22042/65536 [3:46:59<7:29:32, 1.61it/s] 34%|███▎ | 22043/65536 [3:47:00<7:29:00, 1.61it/s] 34%|███▎ | 22044/65536 [3:47:01<7:24:46, 1.63it/s] 34%|███▎ | 22045/65536 [3:47:01<7:25:41, 1.63it/s] 34%|███▎ | 22046/65536 [3:47:02<7:34:25, 1.60it/s] 34%|███▎ | 22047/65536 [3:47:03<7:31:55, 1.60it/s] 34%|███▎ | 22048/65536 [3:47:03<7:25:11, 1.63it/s] 34%|███▎ | 22049/65536 [3:47:04<7:46:54, 1.55it/s] 34%|███▎ | 22050/65536 [3:47:04<7:28:05, 1.62it/s] 34%|███▎ | 22051/65536 [3:47:05<7:40:38, 1.57it/s] 34%|███▎ | 22052/65536 [3:47:06<7:40:46, 1.57it/s] 34%|███▎ | 22053/65536 [3:47:06<7:32:31, 1.60it/s] 34%|███▎ | 22054/65536 [3:47:07<7:21:17, 1.64it/s] 34%|███▎ | 22055/65536 [3:47:08<7:23:57, 1.63it/s] 34%|███▎ | 22056/65536 [3:47:08<7:26:15, 1.62it/s] 34%|███▎ | 22057/65536 [3:47:09<7:18:04, 1.65it/s] 34%|███▎ | 22058/65536 [3:47:09<7:19:00, 1.65it/s] 34%|███▎ | 22059/65536 [3:47:10<7:27:11, 1.62it/s] 34%|███▎ | 22060/65536 [3:47:11<7:25:20, 1.63it/s] {'loss': 2.9376, 'learning_rate': 6.988796375658136e-07, 'epoch': 1361.73} + 34%|███▎ | 22060/65536 [3:47:11<7:25:20, 1.63it/s] 34%|███▎ | 22061/65536 [3:47:11<7:21:13, 1.64it/s] 34%|███▎ | 22062/65536 [3:47:12<7:25:31, 1.63it/s] 34%|███▎ | 22063/65536 [3:47:12<7:21:52, 1.64it/s] 34%|███▎ | 22064/65536 [3:47:13<7:22:45, 1.64it/s] 34%|███▎ | 22065/65536 [3:47:14<7:40:39, 1.57it/s] 34%|███▎ | 22066/65536 [3:47:14<7:30:46, 1.61it/s] 34%|███▎ | 22067/65536 [3:47:15<7:31:15, 1.61it/s] 34%|███▎ | 22068/65536 [3:47:16<7:26:14, 1.62it/s] 34%|███▎ | 22069/65536 [3:47:16<7:26:11, 1.62it/s] 34%|███▎ | 22070/65536 [3:47:17<7:35:37, 1.59it/s] 34%|███▎ | 22071/65536 [3:47:17<7:36:50, 1.59it/s] 34%|███▎ | 22072/65536 [3:47:18<7:23:11, 1.63it/s] 34%|███▎ | 22073/65536 [3:47:19<7:19:17, 1.65it/s] 34%|███▎ | 22074/65536 [3:47:19<7:23:38, 1.63it/s] 34%|███▎ | 22075/65536 [3:47:20<7:30:19, 1.61it/s] 34%|███▎ | 22076/65536 [3:47:20<7:19:17, 1.65it/s] 34%|███▎ | 22077/65536 [3:47:21<7:13:01, 1.67it/s] 34%|███▎ | 22078/65536 [3:47:22<7:20:39, 1.64it/s] 34%|███▎ | 22079/65536 [3:47:22<7:18:12, 1.65it/s] 34%|███▎ | 22080/65536 [3:47:23<7:18:38, 1.65it/s] {'loss': 2.7708, 'learning_rate': 6.986041386065874e-07, 'epoch': 1362.96} + 34%|███▎ | 22080/65536 [3:47:23<7:18:38, 1.65it/s] 34%|███▎ | 22081/65536 [3:47:23<7:18:04, 1.65it/s] 34%|███▎ | 22082/65536 [3:47:24<7:14:39, 1.67it/s] 34%|███▎ | 22083/65536 [3:47:25<7:12:45, 1.67it/s] 34%|███▎ | 22084/65536 [3:47:25<7:13:18, 1.67it/s] 34%|███▎ | 22085/65536 [3:47:26<7:18:46, 1.65it/s] 34%|███▎ | 22086/65536 [3:47:27<7:35:41, 1.59it/s] 34%|███▎ | 22087/65536 [3:47:27<7:26:28, 1.62it/s] 34%|███▎ | 22088/65536 [3:47:28<7:15:19, 1.66it/s] 34%|███▎ | 22089/65536 [3:47:28<7:31:45, 1.60it/s] 34%|███▎ | 22090/65536 [3:47:29<7:38:31, 1.58it/s] 34%|███▎ | 22091/65536 [3:47:30<7:25:47, 1.62it/s] 34%|███▎ | 22092/65536 [3:47:30<7:20:16, 1.64it/s] 34%|███▎ | 22093/65536 [3:47:31<7:28:24, 1.61it/s] 34%|███▎ | 22094/65536 [3:47:31<7:28:38, 1.61it/s] 34%|███▎ | 22095/65536 [3:47:32<7:25:01, 1.63it/s] 34%|███▎ | 22096/65536 [3:47:33<7:21:22, 1.64it/s] 34%|███▎ | 22097/65536 [3:47:33<7:39:21, 1.58it/s] 34%|███▎ | 22098/65536 [3:47:34<7:31:01, 1.61it/s] 34%|███▎ | 22099/65536 [3:47:35<7:31:44, 1.60it/s] 34%|███▎ | 22100/65536 [3:47:35<7:31:35, 1.60it/s] {'loss': 2.7872, 'learning_rate': 6.983286396473613e-07, 'epoch': 1364.2} + 34%|███▎ | 22100/65536 [3:47:35<7:31:35, 1.60it/s] 34%|███▎ | 22101/65536 [3:47:36<7:24:10, 1.63it/s] 34%|███▎ | 22102/65536 [3:47:36<7:33:02, 1.60it/s] 34%|███▎ | 22103/65536 [3:47:37<7:23:17, 1.63it/s] 34%|███▎ | 22104/65536 [3:47:38<7:31:09, 1.60it/s] 34%|███▎ | 22105/65536 [3:47:38<7:21:31, 1.64it/s] 34%|███▎ | 22106/65536 [3:47:39<7:23:44, 1.63it/s] 34%|███▎ | 22107/65536 [3:47:40<7:28:33, 1.61it/s] 34%|███▎ | 22108/65536 [3:47:40<7:23:19, 1.63it/s] 34%|███▎ | 22109/65536 [3:47:41<7:23:17, 1.63it/s] 34%|███▎ | 22110/65536 [3:47:41<7:23:16, 1.63it/s] 34%|███▎ | 22111/65536 [3:47:42<7:15:50, 1.66it/s] 34%|███▎ | 22112/65536 [3:47:42<7:06:33, 1.70it/s] 34%|███▎ | 22113/65536 [3:47:43<7:15:01, 1.66it/s] 34%|███▎ | 22114/65536 [3:47:44<7:44:19, 1.56it/s] 34%|███▎ | 22115/65536 [3:47:45<7:51:28, 1.53it/s] 34%|███▎ | 22116/65536 [3:47:45<7:48:06, 1.55it/s] 34%|███▎ | 22117/65536 [3:47:46<7:40:46, 1.57it/s] 34%|███▎ | 22118/65536 [3:47:46<7:23:01, 1.63it/s] 34%|███▍ | 22119/65536 [3:47:47<7:16:47, 1.66it/s] 34%|███▍ | 22120/65536 [3:47:48<7:11:19, 1.68it/s] {'loss': 2.9491, 'learning_rate': 6.980531406881352e-07, 'epoch': 1365.43} + 34%|███▍ | 22120/65536 [3:47:48<7:11:19, 1.68it/s] 34%|███▍ | 22121/65536 [3:47:48<7:17:03, 1.66it/s] 34%|███▍ | 22122/65536 [3:47:49<7:16:22, 1.66it/s] 34%|███▍ | 22123/65536 [3:47:49<7:17:04, 1.66it/s] 34%|███▍ | 22124/65536 [3:47:50<7:12:13, 1.67it/s] 34%|███▍ | 22125/65536 [3:47:51<7:11:18, 1.68it/s] 34%|███▍ | 22126/65536 [3:47:51<7:12:33, 1.67it/s] 34%|███▍ | 22127/65536 [3:47:52<7:15:03, 1.66it/s] 34%|███▍ | 22128/65536 [3:47:52<7:06:52, 1.69it/s] 34%|███▍ | 22129/65536 [3:47:53<7:10:46, 1.68it/s] 34%|███▍ | 22130/65536 [3:47:54<7:21:28, 1.64it/s] 34%|███▍ | 22131/65536 [3:47:54<7:19:07, 1.65it/s] 34%|███▍ | 22132/65536 [3:47:55<7:31:05, 1.60it/s] 34%|███▍ | 22133/65536 [3:47:55<7:23:37, 1.63it/s] 34%|███▍ | 22134/65536 [3:47:56<7:21:12, 1.64it/s] 34%|███▍ | 22135/65536 [3:47:57<7:15:43, 1.66it/s] 34%|███▍ | 22136/65536 [3:47:57<7:16:22, 1.66it/s] 34%|███▍ | 22137/65536 [3:47:58<7:26:14, 1.62it/s] 34%|███▍ | 22138/65536 [3:47:58<7:23:02, 1.63it/s] 34%|███▍ | 22139/65536 [3:47:59<7:24:29, 1.63it/s] 34%|███▍ | 22140/65536 [3:48:00<7:19:39, 1.65it/s] {'loss': 2.9494, 'learning_rate': 6.977776417289089e-07, 'epoch': 1366.67} + 34%|███▍ | 22140/65536 [3:48:00<7:19:39, 1.65it/s] 34%|███▍ | 22141/65536 [3:48:00<7:20:12, 1.64it/s] 34%|███▍ | 22142/65536 [3:48:01<7:18:40, 1.65it/s] 34%|███▍ | 22143/65536 [3:48:01<7:27:05, 1.62it/s] 34%|███▍ | 22144/65536 [3:48:02<7:20:21, 1.64it/s] 34%|███▍ | 22145/65536 [3:48:03<7:23:14, 1.63it/s] 34%|███▍ | 22146/65536 [3:48:03<7:34:42, 1.59it/s] 34%|███▍ | 22147/65536 [3:48:04<7:27:20, 1.62it/s] 34%|███▍ | 22148/65536 [3:48:05<7:20:41, 1.64it/s] 34%|███▍ | 22149/65536 [3:48:05<7:27:43, 1.62it/s] 34%|███▍ | 22150/65536 [3:48:06<7:21:52, 1.64it/s] 34%|███▍ | 22151/65536 [3:48:06<7:11:25, 1.68it/s] 34%|███▍ | 22152/65536 [3:48:07<7:17:41, 1.65it/s] 34%|███▍ | 22153/65536 [3:48:08<7:07:40, 1.69it/s] 34%|███▍ | 22154/65536 [3:48:08<7:11:15, 1.68it/s] 34%|███▍ | 22155/65536 [3:48:09<7:07:05, 1.69it/s] 34%|███▍ | 22156/65536 [3:48:09<7:11:03, 1.68it/s] 34%|███▍ | 22157/65536 [3:48:10<7:06:31, 1.70it/s] 34%|███▍ | 22158/65536 [3:48:11<7:11:56, 1.67it/s] 34%|███▍ | 22159/65536 [3:48:11<7:34:28, 1.59it/s] 34%|███▍ | 22160/65536 [3:48:12<7:45:10, 1.55it/s] {'loss': 2.952, 'learning_rate': 6.975021427696829e-07, 'epoch': 1367.9} + 34%|███▍ | 22160/65536 [3:48:12<7:45:10, 1.55it/s] 34%|███▍ | 22161/65536 [3:48:12<7:33:09, 1.60it/s] 34%|███▍ | 22162/65536 [3:48:13<7:43:28, 1.56it/s] 34%|███▍ | 22163/65536 [3:48:14<7:34:51, 1.59it/s] 34%|███▍ | 22164/65536 [3:48:14<7:24:55, 1.62it/s] 34%|███▍ | 22165/65536 [3:48:15<7:18:19, 1.65it/s] 34%|███▍ | 22166/65536 [3:48:16<7:19:14, 1.65it/s] 34%|███▍ | 22167/65536 [3:48:16<7:18:31, 1.65it/s] 34%|███▍ | 22168/65536 [3:48:17<7:32:53, 1.60it/s] 34%|███▍ | 22169/65536 [3:48:17<7:26:58, 1.62it/s] 34%|███▍ | 22170/65536 [3:48:18<7:26:15, 1.62it/s] 34%|███▍ | 22171/65536 [3:48:19<7:20:36, 1.64it/s] 34%|███▍ | 22172/65536 [3:48:19<7:27:56, 1.61it/s] 34%|███▍ | 22173/65536 [3:48:20<7:17:47, 1.65it/s] 34%|███▍ | 22174/65536 [3:48:20<7:17:48, 1.65it/s] 34%|███▍ | 22175/65536 [3:48:21<7:27:41, 1.61it/s] 34%|███▍ | 22176/65536 [3:48:22<7:26:46, 1.62it/s] 34%|███▍ | 22177/65536 [3:48:22<7:24:07, 1.63it/s] 34%|███▍ | 22178/65536 [3:48:23<7:35:52, 1.59it/s] 34%|███▍ | 22179/65536 [3:48:24<7:24:05, 1.63it/s] 34%|███▍ | 22180/65536 [3:48:24<7:21:31, 1.64it/s] {'loss': 2.8546, 'learning_rate': 6.972266438104567e-07, 'epoch': 1369.14} + 34%|███▍ | 22180/65536 [3:48:24<7:21:31, 1.64it/s] 34%|███▍ | 22181/65536 [3:48:25<7:27:42, 1.61it/s] 34%|███▍ | 22182/65536 [3:48:25<7:27:45, 1.61it/s] 34%|███▍ | 22183/65536 [3:48:26<7:22:08, 1.63it/s] 34%|███▍ | 22184/65536 [3:48:27<7:28:55, 1.61it/s] 34%|███▍ | 22185/65536 [3:48:27<7:29:05, 1.61it/s] 34%|███▍ | 22186/65536 [3:48:28<7:21:45, 1.64it/s] 34%|███▍ | 22187/65536 [3:48:28<7:22:42, 1.63it/s] 34%|███▍ | 22188/65536 [3:48:29<7:16:28, 1.66it/s] 34%|███▍ | 22189/65536 [3:48:30<7:18:22, 1.65it/s] 34%|███▍ | 22190/65536 [3:48:30<7:21:23, 1.64it/s] 34%|███▍ | 22191/65536 [3:48:31<7:12:16, 1.67it/s] 34%|███▍ | 22192/65536 [3:48:32<7:27:29, 1.61it/s] 34%|███▍ | 22193/65536 [3:48:32<7:25:51, 1.62it/s] 34%|███▍ | 22194/65536 [3:48:33<7:16:40, 1.65it/s] 34%|███▍ | 22195/65536 [3:48:33<7:30:04, 1.60it/s] 34%|███▍ | 22196/65536 [3:48:34<7:32:02, 1.60it/s] 34%|███▍ | 22197/65536 [3:48:35<7:23:16, 1.63it/s] 34%|███▍ | 22198/65536 [3:48:35<7:24:19, 1.63it/s] 34%|███▍ | 22199/65536 [3:48:36<7:27:39, 1.61it/s] 34%|███▍ | 22200/65536 [3:48:37<7:29:38, 1.61it/s] {'loss': 2.9859, 'learning_rate': 6.969511448512306e-07, 'epoch': 1370.37} + 34%|███▍ | 22200/65536 [3:48:37<7:29:38, 1.61it/s] 34%|███▍ | 22201/65536 [3:48:37<7:21:21, 1.64it/s] 34%|███▍ | 22202/65536 [3:48:38<7:29:23, 1.61it/s] 34%|███▍ | 22203/65536 [3:48:38<7:32:11, 1.60it/s] 34%|███▍ | 22204/65536 [3:48:39<7:22:32, 1.63it/s] 34%|███▍ | 22205/65536 [3:48:40<7:29:06, 1.61it/s] 34%|███▍ | 22206/65536 [3:48:40<7:27:21, 1.61it/s] 34%|███▍ | 22207/65536 [3:48:41<7:21:50, 1.63it/s] 34%|███▍ | 22208/65536 [3:48:41<7:16:12, 1.66it/s] 34%|███▍ | 22209/65536 [3:48:42<7:22:21, 1.63it/s] 34%|███▍ | 22210/65536 [3:48:43<7:20:40, 1.64it/s] 34%|███▍ | 22211/65536 [3:48:43<7:33:29, 1.59it/s] 34%|███▍ | 22212/65536 [3:48:44<7:41:45, 1.56it/s] 34%|███▍ | 22213/65536 [3:48:45<7:33:40, 1.59it/s] 34%|███▍ | 22214/65536 [3:48:45<7:34:05, 1.59it/s] 34%|███▍ | 22215/65536 [3:48:46<7:30:48, 1.60it/s] 34%|███▍ | 22216/65536 [3:48:46<7:16:59, 1.65it/s] 34%|███▍ | 22217/65536 [3:48:47<7:24:54, 1.62it/s] 34%|███▍ | 22218/65536 [3:48:48<7:15:43, 1.66it/s] 34%|███▍ | 22219/65536 [3:48:48<7:15:48, 1.66it/s] 34%|███▍ | 22220/65536 [3:48:49<7:13:02, 1.67it/s] {'loss': 3.0157, 'learning_rate': 6.966756458920044e-07, 'epoch': 1371.6} + 34%|███▍ | 22220/65536 [3:48:49<7:13:02, 1.67it/s] 34%|███▍ | 22221/65536 [3:48:49<7:15:24, 1.66it/s] 34%|███▍ | 22222/65536 [3:48:50<7:13:27, 1.67it/s] 34%|███▍ | 22223/65536 [3:48:51<7:09:31, 1.68it/s] 34%|███▍ | 22224/65536 [3:48:51<7:20:14, 1.64it/s] 34%|███▍ | 22225/65536 [3:48:52<7:17:38, 1.65it/s] 34%|███▍ | 22226/65536 [3:48:52<7:24:01, 1.63it/s] 34%|███▍ | 22227/65536 [3:48:53<7:32:00, 1.60it/s] 34%|███▍ | 22228/65536 [3:48:54<7:23:27, 1.63it/s] 34%|███▍ | 22229/65536 [3:48:54<7:18:45, 1.65it/s] 34%|███▍ | 22230/65536 [3:48:55<7:12:12, 1.67it/s] 34%|███▍ | 22231/65536 [3:48:55<7:11:46, 1.67it/s] 34%|███▍ | 22232/65536 [3:48:56<7:20:16, 1.64it/s] 34%|███▍ | 22233/65536 [3:48:57<7:20:28, 1.64it/s] 34%|███▍ | 22234/65536 [3:48:57<7:22:31, 1.63it/s] 34%|███▍ | 22235/65536 [3:48:58<7:32:01, 1.60it/s] 34%|███▍ | 22236/65536 [3:48:59<7:27:36, 1.61it/s] 34%|███▍ | 22237/65536 [3:48:59<7:21:25, 1.63it/s] 34%|███▍ | 22238/65536 [3:49:00<7:15:27, 1.66it/s] 34%|███▍ | 22239/65536 [3:49:00<7:29:37, 1.60it/s] 34%|███▍ | 22240/65536 [3:49:01<7:17:18, 1.65it/s] {'loss': 3.0706, 'learning_rate': 6.964001469327782e-07, 'epoch': 1372.84} + 34%|███▍ | 22240/65536 [3:49:01<7:17:18, 1.65it/s] 34%|███▍ | 22241/65536 [3:49:02<7:30:39, 1.60it/s] 34%|███▍ | 22242/65536 [3:49:02<7:23:03, 1.63it/s] 34%|███▍ | 22243/65536 [3:49:03<7:31:23, 1.60it/s] 34%|███▍ | 22244/65536 [3:49:04<7:45:16, 1.55it/s] 34%|███▍ | 22245/65536 [3:49:04<7:37:54, 1.58it/s] 34%|███▍ | 22246/65536 [3:49:05<7:31:37, 1.60it/s] 34%|███▍ | 22247/65536 [3:49:05<7:20:48, 1.64it/s] 34%|███▍ | 22248/65536 [3:49:06<7:19:52, 1.64it/s] 34%|███▍ | 22249/65536 [3:49:07<7:16:18, 1.65it/s] 34%|███▍ | 22250/65536 [3:49:07<7:21:32, 1.63it/s] 34%|███▍ | 22251/65536 [3:49:08<7:20:55, 1.64it/s] 34%|███▍ | 22252/65536 [3:49:08<7:17:20, 1.65it/s] 34%|███▍ | 22253/65536 [3:49:09<7:09:03, 1.68it/s] 34%|███▍ | 22254/65536 [3:49:10<7:23:54, 1.63it/s] 34%|███▍ | 22255/65536 [3:49:10<7:19:23, 1.64it/s] 34%|███▍ | 22256/65536 [3:49:11<7:14:43, 1.66it/s] 34%|███▍ | 22257/65536 [3:49:11<7:28:51, 1.61it/s] 34%|███▍ | 22258/65536 [3:49:12<7:12:54, 1.67it/s] 34%|███▍ | 22259/65536 [3:49:13<7:22:33, 1.63it/s] 34%|███▍ | 22260/65536 [3:49:13<7:12:35, 1.67it/s] {'loss': 3.0858, 'learning_rate': 6.961246479735521e-07, 'epoch': 1374.07} + 34%|███▍ | 22260/65536 [3:49:13<7:12:35, 1.67it/s] 34%|███▍ | 22261/65536 [3:49:14<7:09:48, 1.68it/s] 34%|███▍ | 22262/65536 [3:49:14<7:06:50, 1.69it/s] 34%|███▍ | 22263/65536 [3:49:15<7:07:20, 1.69it/s] 34%|███▍ | 22264/65536 [3:49:16<7:17:49, 1.65it/s] 34%|███▍ | 22265/65536 [3:49:16<7:23:32, 1.63it/s] 34%|███▍ | 22266/65536 [3:49:17<7:20:21, 1.64it/s] 34%|███▍ | 22267/65536 [3:49:18<7:21:46, 1.63it/s] 34%|███▍ | 22268/65536 [3:49:18<7:27:25, 1.61it/s] 34%|███▍ | 22269/65536 [3:49:19<7:24:53, 1.62it/s] 34%|███▍ | 22270/65536 [3:49:19<7:23:52, 1.62it/s] 34%|███▍ | 22271/65536 [3:49:20<7:22:56, 1.63it/s] 34%|███▍ | 22272/65536 [3:49:21<7:29:25, 1.60it/s] 34%|███▍ | 22273/65536 [3:49:21<7:38:11, 1.57it/s] 34%|███▍ | 22274/65536 [3:49:22<7:27:43, 1.61it/s] 34%|███▍ | 22275/65536 [3:49:22<7:22:51, 1.63it/s] 34%|███▍ | 22276/65536 [3:49:23<7:31:04, 1.60it/s] 34%|███▍ | 22277/65536 [3:49:24<7:36:36, 1.58it/s] 34%|███▍ | 22278/65536 [3:49:24<7:24:05, 1.62it/s] 34%|███▍ | 22279/65536 [3:49:25<7:28:47, 1.61it/s] 34%|███▍ | 22280/65536 [3:49:26<7:19:12, 1.64it/s] {'loss': 2.9814, 'learning_rate': 6.958491490143259e-07, 'epoch': 1375.31} + 34%|███▍ | 22280/65536 [3:49:26<7:19:12, 1.64it/s] 34%|███▍ | 22281/65536 [3:49:26<7:08:58, 1.68it/s] 34%|███▍ | 22282/65536 [3:49:27<7:09:55, 1.68it/s] 34%|███▍ | 22283/65536 [3:49:27<7:15:12, 1.66it/s] 34%|███▍ | 22284/65536 [3:49:28<7:24:37, 1.62it/s] 34%|███▍ | 22285/65536 [3:49:29<7:25:07, 1.62it/s] 34%|███▍ | 22286/65536 [3:49:29<7:19:06, 1.64it/s] 34%|███▍ | 22287/65536 [3:49:30<7:32:11, 1.59it/s] 34%|███▍ | 22288/65536 [3:49:30<7:18:22, 1.64it/s] 34%|███▍ | 22289/65536 [3:49:31<7:11:24, 1.67it/s] 34%|███▍ | 22290/65536 [3:49:32<7:21:39, 1.63it/s] 34%|███▍ | 22291/65536 [3:49:32<7:09:13, 1.68it/s] 34%|███▍ | 22292/65536 [3:49:33<7:42:02, 1.56it/s] 34%|███▍ | 22293/65536 [3:49:34<7:41:47, 1.56it/s] 34%|███▍ | 22294/65536 [3:49:34<7:42:10, 1.56it/s] 34%|███▍ | 22295/65536 [3:49:35<7:33:11, 1.59it/s] 34%|███▍ | 22296/65536 [3:49:36<7:42:38, 1.56it/s] 34%|███▍ | 22297/65536 [3:49:36<7:33:36, 1.59it/s] 34%|███▍ | 22298/65536 [3:49:37<7:19:35, 1.64it/s] 34%|███▍ | 22299/65536 [3:49:37<7:17:08, 1.65it/s] 34%|███▍ | 22300/65536 [3:49:38<7:08:12, 1.68it/s] {'loss': 3.0067, 'learning_rate': 6.955736500550998e-07, 'epoch': 1376.54} + 34%|███▍ | 22300/65536 [3:49:38<7:08:12, 1.68it/s] 34%|███▍ | 22301/65536 [3:49:38<7:11:34, 1.67it/s] 34%|███▍ | 22302/65536 [3:49:39<7:21:46, 1.63it/s] 34%|███▍ | 22303/65536 [3:49:40<7:14:12, 1.66it/s] 34%|███▍ | 22304/65536 [3:49:40<7:26:24, 1.61it/s] 34%|███▍ | 22305/65536 [3:49:41<7:21:18, 1.63it/s] 34%|███▍ | 22306/65536 [3:49:42<7:16:42, 1.65it/s] 34%|███▍ | 22307/65536 [3:49:42<7:10:52, 1.67it/s] 34%|███▍ | 22308/65536 [3:49:43<7:28:44, 1.61it/s] 34%|███▍ | 22309/65536 [3:49:43<7:25:07, 1.62it/s] 34%|███▍ | 22310/65536 [3:49:44<7:32:17, 1.59it/s] 34%|███▍ | 22311/65536 [3:49:45<7:22:31, 1.63it/s] 34%|███▍ | 22312/65536 [3:49:45<7:17:35, 1.65it/s] 34%|███▍ | 22313/65536 [3:49:46<7:20:27, 1.64it/s] 34%|███▍ | 22314/65536 [3:49:46<7:21:44, 1.63it/s] 34%|███▍ | 22315/65536 [3:49:47<7:27:09, 1.61it/s] 34%|███▍ | 22316/65536 [3:49:48<7:22:45, 1.63it/s] 34%|███▍ | 22317/65536 [3:49:48<7:16:39, 1.65it/s] 34%|███▍ | 22318/65536 [3:49:49<7:27:41, 1.61it/s] 34%|███▍ | 22319/65536 [3:49:50<7:33:53, 1.59it/s] 34%|███▍ | 22320/65536 [3:49:50<7:24:09, 1.62it/s] {'loss': 2.993, 'learning_rate': 6.952981510958736e-07, 'epoch': 1377.78} + 34%|███▍ | 22320/65536 [3:49:50<7:24:09, 1.62it/s] 34%|███▍ | 22321/65536 [3:49:51<7:32:47, 1.59it/s] 34%|███▍ | 22322/65536 [3:49:51<7:22:46, 1.63it/s] 34%|███▍ | 22323/65536 [3:49:52<7:21:20, 1.63it/s] 34%|███▍ | 22324/65536 [3:49:53<7:37:45, 1.57it/s] 34%|███▍ | 22325/65536 [3:49:53<7:29:54, 1.60it/s] 34%|███▍ | 22326/65536 [3:49:54<7:35:02, 1.58it/s] 34%|███▍ | 22327/65536 [3:49:55<7:16:38, 1.65it/s] 34%|███▍ | 22328/65536 [3:49:55<7:14:30, 1.66it/s] 34%|███▍ | 22329/65536 [3:49:56<7:14:45, 1.66it/s] 34%|███▍ | 22330/65536 [3:49:56<7:09:54, 1.68it/s] 34%|███▍ | 22331/65536 [3:49:57<7:11:38, 1.67it/s] 34%|███▍ | 22332/65536 [3:49:58<7:12:31, 1.66it/s] 34%|███▍ | 22333/65536 [3:49:58<7:06:11, 1.69it/s] 34%|███▍ | 22334/65536 [3:49:59<7:08:11, 1.68it/s] 34%|███▍ | 22335/65536 [3:49:59<7:09:44, 1.68it/s] 34%|███▍ | 22336/65536 [3:50:00<7:15:16, 1.65it/s] 34%|███▍ | 22337/65536 [3:50:01<7:18:29, 1.64it/s] 34%|███▍ | 22338/65536 [3:50:01<7:22:24, 1.63it/s] 34%|███▍ | 22339/65536 [3:50:02<7:15:53, 1.65it/s] 34%|███▍ | 22340/65536 [3:50:02<7:35:01, 1.58it/s] {'loss': 3.0182, 'learning_rate': 6.950226521366475e-07, 'epoch': 1379.01} + 34%|███▍ | 22340/65536 [3:50:02<7:35:01, 1.58it/s] 34%|███▍ | 22341/65536 [3:50:03<7:34:52, 1.58it/s] 34%|███▍ | 22342/65536 [3:50:04<7:28:50, 1.60it/s] 34%|███▍ | 22343/65536 [3:50:04<7:26:13, 1.61it/s] 34%|███▍ | 22344/65536 [3:50:05<7:28:05, 1.61it/s] 34%|███▍ | 22345/65536 [3:50:05<7:18:42, 1.64it/s] 34%|███▍ | 22346/65536 [3:50:06<7:21:02, 1.63it/s] 34%|███▍ | 22347/65536 [3:50:07<7:20:36, 1.63it/s] 34%|███▍ | 22348/65536 [3:50:07<7:09:53, 1.67it/s] 34%|███▍ | 22349/65536 [3:50:08<7:12:23, 1.66it/s] 34%|███▍ | 22350/65536 [3:50:08<7:15:36, 1.65it/s] 34%|███▍ | 22351/65536 [3:50:09<7:35:11, 1.58it/s] 34%|███▍ | 22352/65536 [3:50:10<7:26:55, 1.61it/s] 34%|███▍ | 22353/65536 [3:50:10<7:17:25, 1.65it/s] 34%|███▍ | 22354/65536 [3:50:11<7:17:38, 1.64it/s] 34%|███▍ | 22355/65536 [3:50:12<7:14:32, 1.66it/s] 34%|███▍ | 22356/65536 [3:50:12<7:24:02, 1.62it/s] 34%|███▍ | 22357/65536 [3:50:13<7:41:08, 1.56it/s] 34%|███▍ | 22358/65536 [3:50:14<7:31:51, 1.59it/s] 34%|███▍ | 22359/65536 [3:50:14<7:33:34, 1.59it/s] 34%|███▍ | 22360/65536 [3:50:15<7:31:55, 1.59it/s] {'loss': 2.9773, 'learning_rate': 6.947471531774214e-07, 'epoch': 1380.25} + 34%|███▍ | 22360/65536 [3:50:15<7:31:55, 1.59it/s] 34%|███▍ | 22361/65536 [3:50:15<7:34:47, 1.58it/s] 34%|███▍ | 22362/65536 [3:50:16<7:27:05, 1.61it/s] 34%|███▍ | 22363/65536 [3:50:17<7:23:46, 1.62it/s] 34%|███▍ | 22364/65536 [3:50:17<7:19:57, 1.64it/s] 34%|███▍ | 22365/65536 [3:50:18<7:21:20, 1.63it/s] 34%|███▍ | 22366/65536 [3:50:18<7:13:04, 1.66it/s] 34%|███▍ | 22367/65536 [3:50:19<7:13:33, 1.66it/s] 34%|███▍ | 22368/65536 [3:50:20<7:06:53, 1.69it/s] 34%|███▍ | 22369/65536 [3:50:20<7:23:45, 1.62it/s] 34%|███▍ | 22370/65536 [3:50:21<7:25:34, 1.61it/s] 34%|███▍ | 22371/65536 [3:50:22<7:27:01, 1.61it/s] 34%|███▍ | 22372/65536 [3:50:22<7:18:36, 1.64it/s] 34%|███▍ | 22373/65536 [3:50:23<7:38:14, 1.57it/s] 34%|███▍ | 22374/65536 [3:50:23<7:48:06, 1.54it/s] 34%|███▍ | 22375/65536 [3:50:24<7:36:59, 1.57it/s] 34%|███▍ | 22376/65536 [3:50:25<7:28:05, 1.61it/s] 34%|███▍ | 22377/65536 [3:50:25<7:17:29, 1.64it/s] 34%|███▍ | 22378/65536 [3:50:26<7:12:51, 1.66it/s] 34%|███▍ | 22379/65536 [3:50:26<7:09:44, 1.67it/s] 34%|███▍ | 22380/65536 [3:50:27<7:09:29, 1.67it/s] {'loss': 2.9697, 'learning_rate': 6.944716542181952e-07, 'epoch': 1381.48} + 34%|███▍ | 22380/65536 [3:50:27<7:09:29, 1.67it/s] 34%|███▍ | 22381/65536 [3:50:28<7:20:42, 1.63it/s] 34%|███▍ | 22382/65536 [3:50:28<7:09:13, 1.68it/s] 34%|███▍ | 22383/65536 [3:50:29<7:08:13, 1.68it/s] 34%|███▍ | 22384/65536 [3:50:29<7:12:19, 1.66it/s] 34%|███▍ | 22385/65536 [3:50:30<7:11:19, 1.67it/s] 34%|███▍ | 22386/65536 [3:50:31<7:22:16, 1.63it/s] 34%|███▍ | 22387/65536 [3:50:31<7:27:00, 1.61it/s] 34%|███▍ | 22388/65536 [3:50:32<7:20:36, 1.63it/s] 34%|███▍ | 22389/65536 [3:50:33<7:25:05, 1.62it/s] 34%|███▍ | 22390/65536 [3:50:33<7:15:18, 1.65it/s] 34%|███▍ | 22391/65536 [3:50:34<7:12:06, 1.66it/s] 34%|███▍ | 22392/65536 [3:50:34<7:07:53, 1.68it/s] 34%|███▍ | 22393/65536 [3:50:35<7:09:40, 1.67it/s] 34%|███▍ | 22394/65536 [3:50:35<7:12:12, 1.66it/s] 34%|███▍ | 22395/65536 [3:50:36<7:15:58, 1.65it/s] 34%|███▍ | 22396/65536 [3:50:37<7:10:14, 1.67it/s] 34%|███▍ | 22397/65536 [3:50:37<7:32:50, 1.59it/s] 34%|███▍ | 22398/65536 [3:50:38<7:22:59, 1.62it/s] 34%|███▍ | 22399/65536 [3:50:39<7:18:15, 1.64it/s] 34%|███▍ | 22400/65536 [3:50:39<7:19:14, 1.64it/s] {'loss': 3.0496, 'learning_rate': 6.941961552589691e-07, 'epoch': 1382.72} + 34%|███▍ | 22400/65536 [3:50:39<7:19:14, 1.64it/s] 34%|███▍ | 22401/65536 [3:50:40<7:24:22, 1.62it/s] 34%|███▍ | 22402/65536 [3:50:40<7:19:07, 1.64it/s] 34%|███▍ | 22403/65536 [3:50:41<7:19:02, 1.64it/s] 34%|███▍ | 22404/65536 [3:50:42<7:29:57, 1.60it/s] 34%|███▍ | 22405/65536 [3:50:42<7:39:02, 1.57it/s] 34%|███▍ | 22406/65536 [3:50:43<7:43:11, 1.55it/s] 34%|███▍ | 22407/65536 [3:50:44<7:44:42, 1.55it/s] 34%|███▍ | 22408/65536 [3:50:44<7:42:26, 1.55it/s] 34%|███▍ | 22409/65536 [3:50:45<7:36:40, 1.57it/s] 34%|███▍ | 22410/65536 [3:50:46<7:30:52, 1.59it/s] 34%|███▍ | 22411/65536 [3:50:46<7:24:08, 1.62it/s] 34%|███▍ | 22412/65536 [3:50:47<7:28:04, 1.60it/s] 34%|███▍ | 22413/65536 [3:50:47<7:24:21, 1.62it/s] 34%|███▍ | 22414/65536 [3:50:48<7:10:37, 1.67it/s] 34%|███▍ | 22415/65536 [3:50:49<7:08:44, 1.68it/s] 34%|███▍ | 22416/65536 [3:50:49<7:02:41, 1.70it/s] 34%|███▍ | 22417/65536 [3:50:50<7:05:24, 1.69it/s] 34%|███▍ | 22418/65536 [3:50:50<7:20:03, 1.63it/s] 34%|███▍ | 22419/65536 [3:50:51<7:23:57, 1.62it/s] 34%|███▍ | 22420/65536 [3:50:52<7:23:59, 1.62it/s] {'loss': 3.0031, 'learning_rate': 6.939206562997428e-07, 'epoch': 1383.95} + 34%|███▍ | 22420/65536 [3:50:52<7:23:59, 1.62it/s] 34%|███▍ | 22421/65536 [3:50:52<7:38:02, 1.57it/s] 34%|███▍ | 22422/65536 [3:50:53<7:26:16, 1.61it/s] 34%|███▍ | 22423/65536 [3:50:53<7:10:50, 1.67it/s] 34%|███▍ | 22424/65536 [3:50:54<7:09:51, 1.67it/s] 34%|███▍ | 22425/65536 [3:50:55<7:11:01, 1.67it/s] 34%|███▍ | 22426/65536 [3:50:55<7:21:15, 1.63it/s] 34%|███▍ | 22427/65536 [3:50:56<7:38:18, 1.57it/s] 34%|███▍ | 22428/65536 [3:50:57<7:41:08, 1.56it/s] 34%|███▍ | 22429/65536 [3:50:57<7:34:01, 1.58it/s] 34%|███▍ | 22430/65536 [3:50:58<7:26:46, 1.61it/s] 34%|███▍ | 22431/65536 [3:50:58<7:21:26, 1.63it/s] 34%|███▍ | 22432/65536 [3:50:59<7:10:10, 1.67it/s] 34%|███▍ | 22433/65536 [3:51:00<7:32:46, 1.59it/s] 34%|███▍ | 22434/65536 [3:51:00<7:28:34, 1.60it/s] 34%|███▍ | 22435/65536 [3:51:01<7:14:39, 1.65it/s] 34%|███▍ | 22436/65536 [3:51:01<7:11:46, 1.66it/s] 34%|███▍ | 22437/65536 [3:51:02<7:12:56, 1.66it/s] 34%|███▍ | 22438/65536 [3:51:03<7:27:09, 1.61it/s] 34%|███▍ | 22439/65536 [3:51:03<7:49:31, 1.53it/s] 34%|███▍ | 22440/65536 [3:51:04<7:37:00, 1.57it/s] {'loss': 3.0418, 'learning_rate': 6.936451573405167e-07, 'epoch': 1385.19} + 34%|███▍ | 22440/65536 [3:51:04<7:37:00, 1.57it/s] 34%|███▍ | 22441/65536 [3:51:05<7:28:24, 1.60it/s] 34%|███▍ | 22442/65536 [3:51:05<7:16:57, 1.64it/s] 34%|███▍ | 22443/65536 [3:51:06<7:15:00, 1.65it/s] 34%|███▍ | 22444/65536 [3:51:06<7:19:18, 1.63it/s] 34%|███▍ | 22445/65536 [3:51:07<7:15:15, 1.65it/s] 34%|███▍ | 22446/65536 [3:51:08<7:19:50, 1.63it/s] 34%|███▍ | 22447/65536 [3:51:08<7:14:23, 1.65it/s] 34%|███▍ | 22448/65536 [3:51:09<7:14:54, 1.65it/s] 34%|███▍ | 22449/65536 [3:51:09<7:19:59, 1.63it/s] 34%|███▍ | 22450/65536 [3:51:10<7:19:48, 1.63it/s] 34%|███▍ | 22451/65536 [3:51:11<7:11:14, 1.67it/s] 34%|███▍ | 22452/65536 [3:51:11<7:17:22, 1.64it/s] 34%|███▍ | 22453/65536 [3:51:12<7:19:09, 1.64it/s] 34%|███▍ | 22454/65536 [3:51:13<7:35:44, 1.58it/s] 34%|███▍ | 22455/65536 [3:51:13<7:31:05, 1.59it/s] 34%|███▍ | 22456/65536 [3:51:14<7:20:09, 1.63it/s] 34%|███▍ | 22457/65536 [3:51:14<7:21:40, 1.63it/s] 34%|███▍ | 22458/65536 [3:51:15<7:20:03, 1.63it/s] 34%|███▍ | 22459/65536 [3:51:16<7:23:39, 1.62it/s] 34%|███▍ | 22460/65536 [3:51:16<7:39:47, 1.56it/s] {'loss': 3.0117, 'learning_rate': 6.933696583812906e-07, 'epoch': 1386.42} + 34%|███▍ | 22460/65536 [3:51:16<7:39:47, 1.56it/s] 34%|███▍ | 22461/65536 [3:51:17<7:27:09, 1.61it/s] 34%|███▍ | 22462/65536 [3:51:17<7:19:14, 1.63it/s] 34%|███▍ | 22463/65536 [3:51:18<7:12:00, 1.66it/s] 34%|███▍ | 22464/65536 [3:51:19<7:37:47, 1.57it/s] 34%|███▍ | 22465/65536 [3:51:19<7:32:42, 1.59it/s] 34%|███▍ | 22466/65536 [3:51:20<7:24:35, 1.61it/s] 34%|███▍ | 22467/65536 [3:51:21<7:16:09, 1.65it/s] 34%|███▍ | 22468/65536 [3:51:21<7:12:08, 1.66it/s] 34%|███▍ | 22469/65536 [3:51:22<7:09:39, 1.67it/s] 34%|███▍ | 22470/65536 [3:51:22<7:28:27, 1.60it/s] 34%|███▍ | 22471/65536 [3:51:23<7:33:27, 1.58it/s] 34%|███▍ | 22472/65536 [3:51:24<7:28:46, 1.60it/s] 34%|███▍ | 22473/65536 [3:51:24<7:25:36, 1.61it/s] 34%|███▍ | 22474/65536 [3:51:25<7:18:29, 1.64it/s] 34%|███▍ | 22475/65536 [3:51:26<7:17:33, 1.64it/s] 34%|███▍ | 22476/65536 [3:51:26<7:08:41, 1.67it/s] 34%|███▍ | 22477/65536 [3:51:27<7:11:28, 1.66it/s] 34%|███▍ | 22478/65536 [3:51:27<7:12:22, 1.66it/s] 34%|███▍ | 22479/65536 [3:51:28<7:14:07, 1.65it/s] 34%|███▍ | 22480/65536 [3:51:29<7:16:47, 1.64it/s] {'loss': 2.9879, 'learning_rate': 6.930941594220643e-07, 'epoch': 1387.65} + 34%|███▍ | 22480/65536 [3:51:29<7:16:47, 1.64it/s] 34%|███▍ | 22481/65536 [3:51:29<7:13:48, 1.65it/s] 34%|███▍ | 22482/65536 [3:51:30<7:10:09, 1.67it/s] 34%|███▍ | 22483/65536 [3:51:30<7:16:41, 1.64it/s] 34%|███▍ | 22484/65536 [3:51:31<7:30:43, 1.59it/s] 34%|███▍ | 22485/65536 [3:51:32<7:22:28, 1.62it/s] 34%|███▍ | 22486/65536 [3:51:32<7:35:13, 1.58it/s] 34%|███▍ | 22487/65536 [3:51:33<7:24:33, 1.61it/s] 34%|███▍ | 22488/65536 [3:51:33<7:22:12, 1.62it/s] 34%|███▍ | 22489/65536 [3:51:34<7:25:03, 1.61it/s] 34%|███▍ | 22490/65536 [3:51:35<7:16:32, 1.64it/s] 34%|███▍ | 22491/65536 [3:51:35<7:16:48, 1.64it/s] 34%|███▍ | 22492/65536 [3:51:36<7:16:50, 1.64it/s] 34%|███▍ | 22493/65536 [3:51:37<7:20:19, 1.63it/s] 34%|███▍ | 22494/65536 [3:51:37<7:32:26, 1.59it/s] 34%|███▍ | 22495/65536 [3:51:38<7:19:29, 1.63it/s] 34%|███▍ | 22496/65536 [3:51:38<7:19:01, 1.63it/s] 34%|███▍ | 22497/65536 [3:51:39<7:10:50, 1.66it/s] 34%|███▍ | 22498/65536 [3:51:40<7:10:32, 1.67it/s] 34%|███▍ | 22499/65536 [3:51:40<7:04:08, 1.69it/s] 34%|███▍ | 22500/65536 [3:51:41<7:15:29, 1.65it/s] {'loss': 3.0156, 'learning_rate': 6.928186604628382e-07, 'epoch': 1388.89} + 34%|███▍ | 22500/65536 [3:51:41<7:15:29, 1.65it/s] 34%|███▍ | 22501/65536 [3:51:41<7:23:06, 1.62it/s] 34%|███▍ | 22502/65536 [3:51:42<7:36:53, 1.57it/s] 34%|███▍ | 22503/65536 [3:51:43<7:32:46, 1.58it/s] 34%|███▍ | 22504/65536 [3:51:43<7:27:38, 1.60it/s] 34%|███▍ | 22505/65536 [3:51:44<7:17:13, 1.64it/s] 34%|███▍ | 22506/65536 [3:51:44<7:13:56, 1.65it/s] 34%|███▍ | 22507/65536 [3:51:45<7:25:44, 1.61it/s] 34%|███▍ | 22508/65536 [3:51:46<7:21:10, 1.63it/s] 34%|███▍ | 22509/65536 [3:51:46<7:28:54, 1.60it/s] 34%|███▍ | 22510/65536 [3:51:47<7:21:56, 1.62it/s] 34%|███▍ | 22511/65536 [3:51:48<7:40:00, 1.56it/s] 34%|███▍ | 22512/65536 [3:51:48<7:39:02, 1.56it/s] 34%|███▍ | 22513/65536 [3:51:49<7:32:03, 1.59it/s] 34%|███▍ | 22514/65536 [3:51:50<7:19:52, 1.63it/s] 34%|███▍ | 22515/65536 [3:51:50<7:20:13, 1.63it/s] 34%|███▍ | 22516/65536 [3:51:51<7:17:55, 1.64it/s] 34%|███▍ | 22517/65536 [3:51:51<7:18:24, 1.64it/s] 34%|███▍ | 22518/65536 [3:51:52<7:07:25, 1.68it/s] 34%|███▍ | 22519/65536 [3:51:53<7:19:08, 1.63it/s] 34%|███▍ | 22520/65536 [3:51:53<7:23:42, 1.62it/s] {'loss': 2.9637, 'learning_rate': 6.92543161503612e-07, 'epoch': 1390.12} + 34%|███▍ | 22520/65536 [3:51:53<7:23:42, 1.62it/s] 34%|███▍ | 22521/65536 [3:51:54<7:29:58, 1.59it/s] 34%|███▍ | 22522/65536 [3:51:54<7:32:19, 1.58it/s] 34%|███▍ | 22523/65536 [3:51:55<7:25:58, 1.61it/s] 34%|███▍ | 22524/65536 [3:51:56<7:22:44, 1.62it/s] 34%|███▍ | 22525/65536 [3:51:56<7:23:27, 1.62it/s] 34%|███▍ | 22526/65536 [3:51:57<7:13:36, 1.65it/s] 34%|███▍ | 22527/65536 [3:51:57<7:08:31, 1.67it/s] 34%|███▍ | 22528/65536 [3:51:58<6:59:57, 1.71it/s] 34%|███▍ | 22529/65536 [3:51:59<7:19:23, 1.63it/s] 34%|███▍ | 22530/65536 [3:51:59<7:32:59, 1.58it/s] 34%|███▍ | 22531/65536 [3:52:00<7:29:55, 1.59it/s] 34%|███▍ | 22532/65536 [3:52:01<7:19:44, 1.63it/s] 34%|███▍ | 22533/65536 [3:52:01<7:12:38, 1.66it/s] 34%|███▍ | 22534/65536 [3:52:02<7:16:44, 1.64it/s] 34%|███▍ | 22535/65536 [3:52:02<7:23:58, 1.61it/s] 34%|███▍ | 22536/65536 [3:52:03<7:25:14, 1.61it/s] 34%|███▍ | 22537/65536 [3:52:04<7:31:24, 1.59it/s] 34%|███▍ | 22538/65536 [3:52:04<7:22:47, 1.62it/s] 34%|███▍ | 22539/65536 [3:52:05<7:17:15, 1.64it/s] 34%|███▍ | 22540/65536 [3:52:05<7:06:42, 1.68it/s] {'loss': 3.0093, 'learning_rate': 6.922676625443859e-07, 'epoch': 1391.36} + 34%|███▍ | 22540/65536 [3:52:05<7:06:42, 1.68it/s] 34%|███▍ | 22541/65536 [3:52:06<7:20:27, 1.63it/s] 34%|███▍ | 22542/65536 [3:52:07<7:09:11, 1.67it/s] 34%|███▍ | 22543/65536 [3:52:07<7:08:20, 1.67it/s] 34%|███▍ | 22544/65536 [3:52:08<7:07:04, 1.68it/s] 34%|███▍ | 22545/65536 [3:52:08<7:14:57, 1.65it/s] 34%|███▍ | 22546/65536 [3:52:09<7:15:43, 1.64it/s] 34%|███▍ | 22547/65536 [3:52:10<7:06:22, 1.68it/s] 34%|███▍ | 22548/65536 [3:52:10<7:19:43, 1.63it/s] 34%|███▍ | 22549/65536 [3:52:11<7:27:25, 1.60it/s] 34%|███▍ | 22550/65536 [3:52:12<7:20:17, 1.63it/s] 34%|███▍ | 22551/65536 [3:52:12<7:33:20, 1.58it/s] 34%|███▍ | 22552/65536 [3:52:13<7:40:27, 1.56it/s] 34%|███▍ | 22553/65536 [3:52:13<7:30:47, 1.59it/s] 34%|███▍ | 22554/65536 [3:52:14<7:18:46, 1.63it/s] 34%|███▍ | 22555/65536 [3:52:15<7:01:20, 1.70it/s] 34%|███▍ | 22556/65536 [3:52:15<7:14:36, 1.65it/s] 34%|███▍ | 22557/65536 [3:52:16<7:18:34, 1.63it/s] 34%|███▍ | 22558/65536 [3:52:16<7:22:00, 1.62it/s] 34%|███▍ | 22559/65536 [3:52:17<7:22:11, 1.62it/s] 34%|███▍ | 22560/65536 [3:52:18<7:13:07, 1.65it/s] {'loss': 3.0227, 'learning_rate': 6.919921635851597e-07, 'epoch': 1392.59} + 34%|███▍ | 22560/65536 [3:52:18<7:13:07, 1.65it/s] 34%|███▍ | 22561/65536 [3:52:18<7:15:06, 1.65it/s] 34%|███▍ | 22562/65536 [3:52:19<7:23:51, 1.61it/s] 34%|███▍ | 22563/65536 [3:52:20<7:16:16, 1.64it/s] 34%|███▍ | 22564/65536 [3:52:20<7:07:29, 1.68it/s] 34%|███▍ | 22565/65536 [3:52:21<7:10:47, 1.66it/s] 34%|███▍ | 22566/65536 [3:52:21<7:07:55, 1.67it/s] 34%|███▍ | 22567/65536 [3:52:22<7:34:40, 1.58it/s] 34%|███▍ | 22568/65536 [3:52:23<7:26:19, 1.60it/s] 34%|███▍ | 22569/65536 [3:52:23<7:24:21, 1.61it/s] 34%|███▍ | 22570/65536 [3:52:24<7:18:28, 1.63it/s] 34%|███▍ | 22571/65536 [3:52:24<7:17:28, 1.64it/s] 34%|███▍ | 22572/65536 [3:52:25<7:16:05, 1.64it/s] 34%|███▍ | 22573/65536 [3:52:26<7:25:35, 1.61it/s] 34%|███▍ | 22574/65536 [3:52:26<7:16:45, 1.64it/s] 34%|███▍ | 22575/65536 [3:52:27<7:17:30, 1.64it/s] 34%|███▍ | 22576/65536 [3:52:27<7:17:12, 1.64it/s] 34%|███▍ | 22577/65536 [3:52:28<7:12:17, 1.66it/s] 34%|███▍ | 22578/65536 [3:52:29<7:21:14, 1.62it/s] 34%|███▍ | 22579/65536 [3:52:29<7:13:51, 1.65it/s] 34%|███▍ | 22580/65536 [3:52:30<7:12:14, 1.66it/s] {'loss': 3.0063, 'learning_rate': 6.917166646259336e-07, 'epoch': 1393.83} + 34%|███▍ | 22580/65536 [3:52:30<7:12:14, 1.66it/s] 34%|███▍ | 22581/65536 [3:52:31<7:12:27, 1.66it/s] 34%|███▍ | 22582/65536 [3:52:31<7:14:36, 1.65it/s] 34%|███▍ | 22583/65536 [3:52:32<7:24:34, 1.61it/s] 34%|███▍ | 22584/65536 [3:52:32<7:23:39, 1.61it/s] 34%|███▍ | 22585/65536 [3:52:33<7:18:11, 1.63it/s] 34%|███▍ | 22586/65536 [3:52:34<7:16:16, 1.64it/s] 34%|███▍ | 22587/65536 [3:52:34<7:05:30, 1.68it/s] 34%|███▍ | 22588/65536 [3:52:35<7:03:04, 1.69it/s] 34%|███▍ | 22589/65536 [3:52:35<7:08:16, 1.67it/s] 34%|███▍ | 22590/65536 [3:52:36<7:07:53, 1.67it/s] 34%|███▍ | 22591/65536 [3:52:37<7:21:16, 1.62it/s] 34%|███▍ | 22592/65536 [3:52:37<7:22:06, 1.62it/s] 34%|███▍ | 22593/65536 [3:52:38<7:14:34, 1.65it/s] 34%|███▍ | 22594/65536 [3:52:38<7:12:17, 1.66it/s] 34%|███▍ | 22595/65536 [3:52:39<7:25:07, 1.61it/s] 34%|███▍ | 22596/65536 [3:52:40<7:22:05, 1.62it/s] 34%|███▍ | 22597/65536 [3:52:40<7:21:43, 1.62it/s] 34%|███▍ | 22598/65536 [3:52:41<7:27:25, 1.60it/s] 34%|███▍ | 22599/65536 [3:52:41<7:12:20, 1.66it/s] 34%|███▍ | 22600/65536 [3:52:42<7:25:14, 1.61it/s] {'loss': 3.0236, 'learning_rate': 6.914411656667076e-07, 'epoch': 1395.06} + 34%|███▍ | 22600/65536 [3:52:42<7:25:14, 1.61it/s] 34%|███▍ | 22601/65536 [3:52:43<7:20:28, 1.62it/s] 34%|███▍ | 22602/65536 [3:52:43<7:10:53, 1.66it/s] 34%|███▍ | 22603/65536 [3:52:44<7:20:52, 1.62it/s] 34%|███▍ | 22604/65536 [3:52:45<7:19:11, 1.63it/s] 34%|███▍ | 22605/65536 [3:52:45<7:18:19, 1.63it/s] 34%|███▍ | 22606/65536 [3:52:46<7:20:31, 1.62it/s] 34%|███▍ | 22607/65536 [3:52:46<7:18:43, 1.63it/s] 34%|███▍ | 22608/65536 [3:52:47<7:12:10, 1.66it/s] 34%|███▍ | 22609/65536 [3:52:48<7:15:02, 1.64it/s] 35%|███▍ | 22610/65536 [3:52:48<7:19:55, 1.63it/s] 35%|███▍ | 22611/65536 [3:52:49<7:35:09, 1.57it/s] 35%|███▍ | 22612/65536 [3:52:50<7:29:47, 1.59it/s] 35%|███▍ | 22613/65536 [3:52:50<7:19:05, 1.63it/s] 35%|███▍ | 22614/65536 [3:52:51<7:20:23, 1.62it/s] 35%|███▍ | 22615/65536 [3:52:51<7:14:35, 1.65it/s] 35%|███▍ | 22616/65536 [3:52:52<7:33:24, 1.58it/s] 35%|███▍ | 22617/65536 [3:52:53<7:38:08, 1.56it/s] 35%|███▍ | 22618/65536 [3:52:53<7:27:12, 1.60it/s] 35%|███▍ | 22619/65536 [3:52:54<7:18:34, 1.63it/s] 35%|███▍ | 22620/65536 [3:52:54<7:21:23, 1.62it/s] {'loss': 2.9828, 'learning_rate': 6.911656667074813e-07, 'epoch': 1396.3} + 35%|███▍ | 22620/65536 [3:52:54<7:21:23, 1.62it/s] 35%|███▍ | 22621/65536 [3:52:55<7:29:33, 1.59it/s] 35%|███▍ | 22622/65536 [3:52:56<7:24:32, 1.61it/s] 35%|███▍ | 22623/65536 [3:52:56<7:36:40, 1.57it/s] 35%|███▍ | 22624/65536 [3:52:57<7:37:26, 1.56it/s] 35%|███▍ | 22625/65536 [3:52:58<7:21:45, 1.62it/s] 35%|███▍ | 22626/65536 [3:52:58<7:13:45, 1.65it/s] 35%|███▍ | 22627/65536 [3:52:59<7:15:11, 1.64it/s] 35%|███▍ | 22628/65536 [3:52:59<7:19:43, 1.63it/s] 35%|███▍ | 22629/65536 [3:53:00<7:21:07, 1.62it/s] 35%|███▍ | 22630/65536 [3:53:01<7:17:23, 1.63it/s] 35%|███▍ | 22631/65536 [3:53:01<7:15:53, 1.64it/s] 35%|███▍ | 22632/65536 [3:53:02<7:28:54, 1.59it/s] 35%|███▍ | 22633/65536 [3:53:03<7:28:53, 1.59it/s] 35%|███▍ | 22634/65536 [3:53:03<7:19:00, 1.63it/s] 35%|███▍ | 22635/65536 [3:53:04<7:18:41, 1.63it/s] 35%|███▍ | 22636/65536 [3:53:04<7:24:56, 1.61it/s] 35%|███▍ | 22637/65536 [3:53:05<7:20:25, 1.62it/s] 35%|███▍ | 22638/65536 [3:53:06<7:14:55, 1.64it/s] 35%|███▍ | 22639/65536 [3:53:06<7:24:34, 1.61it/s] 35%|███▍ | 22640/65536 [3:53:07<7:17:50, 1.63it/s] {'loss': 3.0113, 'learning_rate': 6.908901677482552e-07, 'epoch': 1397.53} + 35%|███▍ | 22640/65536 [3:53:07<7:17:50, 1.63it/s] 35%|███▍ | 22641/65536 [3:53:07<7:10:31, 1.66it/s] 35%|███▍ | 22642/65536 [3:53:08<7:08:19, 1.67it/s] 35%|███▍ | 22643/65536 [3:53:09<7:06:09, 1.68it/s] 35%|███▍ | 22644/65536 [3:53:09<7:06:12, 1.68it/s] 35%|███▍ | 22645/65536 [3:53:10<7:17:57, 1.63it/s] 35%|███▍ | 22646/65536 [3:53:10<7:18:55, 1.63it/s] 35%|███▍ | 22647/65536 [3:53:11<7:26:45, 1.60it/s] 35%|███▍ | 22648/65536 [3:53:12<7:31:10, 1.58it/s] 35%|███▍ | 22649/65536 [3:53:12<7:34:28, 1.57it/s] 35%|███▍ | 22650/65536 [3:53:13<7:25:17, 1.61it/s] 35%|███▍ | 22651/65536 [3:53:14<7:24:45, 1.61it/s] 35%|███▍ | 22652/65536 [3:53:14<7:18:33, 1.63it/s] 35%|███▍ | 22653/65536 [3:53:15<7:28:04, 1.60it/s] 35%|███▍ | 22654/65536 [3:53:15<7:14:18, 1.65it/s] 35%|███▍ | 22655/65536 [3:53:16<7:15:15, 1.64it/s] 35%|███▍ | 22656/65536 [3:53:17<7:18:34, 1.63it/s] 35%|███▍ | 22657/65536 [3:53:17<7:12:00, 1.65it/s] 35%|███▍ | 22658/65536 [3:53:18<7:29:30, 1.59it/s] 35%|███▍ | 22659/65536 [3:53:19<7:28:43, 1.59it/s] 35%|███▍ | 22660/65536 [3:53:19<7:25:13, 1.61it/s] {'loss': 3.0021, 'learning_rate': 6.90614668789029e-07, 'epoch': 1398.77} + 35%|███▍ | 22660/65536 [3:53:19<7:25:13, 1.61it/s] 35%|███▍ | 22661/65536 [3:53:20<7:18:18, 1.63it/s] 35%|███▍ | 22662/65536 [3:53:20<7:05:08, 1.68it/s] 35%|███▍ | 22663/65536 [3:53:21<7:11:50, 1.65it/s] 35%|███▍ | 22664/65536 [3:53:22<7:22:37, 1.61it/s] 35%|███▍ | 22665/65536 [3:53:22<7:16:41, 1.64it/s] 35%|███▍ | 22666/65536 [3:53:23<7:10:07, 1.66it/s] 35%|███▍ | 22667/65536 [3:53:23<7:27:14, 1.60it/s] 35%|███▍ | 22668/65536 [3:53:24<7:20:20, 1.62it/s] 35%|███▍ | 22669/65536 [3:53:25<7:25:19, 1.60it/s] 35%|███▍ | 22670/65536 [3:53:25<7:23:39, 1.61it/s] 35%|███▍ | 22671/65536 [3:53:26<7:21:17, 1.62it/s] 35%|███▍ | 22672/65536 [3:53:27<7:20:01, 1.62it/s] 35%|███▍ | 22673/65536 [3:53:27<7:23:31, 1.61it/s] 35%|███▍ | 22674/65536 [3:53:28<7:47:44, 1.53it/s] 35%|███▍ | 22675/65536 [3:53:29<7:55:13, 1.50it/s] 35%|███▍ | 22676/65536 [3:53:29<7:42:49, 1.54it/s] 35%|███▍ | 22677/65536 [3:53:30<7:34:03, 1.57it/s] 35%|███▍ | 22678/65536 [3:53:30<7:23:23, 1.61it/s] 35%|███▍ | 22679/65536 [3:53:31<7:17:01, 1.63it/s] 35%|███▍ | 22680/65536 [3:53:32<7:22:17, 1.61it/s] {'loss': 2.9998, 'learning_rate': 6.903391698298028e-07, 'epoch': 1400.0} + 35%|███▍ | 22680/65536 [3:53:32<7:22:17, 1.61it/s] 35%|███▍ | 22681/65536 [3:53:32<7:29:14, 1.59it/s] 35%|███▍ | 22682/65536 [3:53:33<7:24:53, 1.61it/s] 35%|███▍ | 22683/65536 [3:53:34<7:22:59, 1.61it/s] 35%|███▍ | 22684/65536 [3:53:34<7:26:20, 1.60it/s] 35%|███▍ | 22685/65536 [3:53:35<7:21:06, 1.62it/s] 35%|███▍ | 22686/65536 [3:53:35<7:28:17, 1.59it/s] 35%|███▍ | 22687/65536 [3:53:36<7:21:01, 1.62it/s] 35%|███▍ | 22688/65536 [3:53:37<7:13:38, 1.65it/s] 35%|███▍ | 22689/65536 [3:53:37<7:12:30, 1.65it/s] 35%|███▍ | 22690/65536 [3:53:38<7:16:33, 1.64it/s] 35%|███▍ | 22691/65536 [3:53:38<7:16:32, 1.64it/s] 35%|███▍ | 22692/65536 [3:53:39<7:02:42, 1.69it/s] 35%|███▍ | 22693/65536 [3:53:40<6:59:35, 1.70it/s] 35%|███▍ | 22694/65536 [3:53:40<7:13:45, 1.65it/s] 35%|███▍ | 22695/65536 [3:53:41<7:14:05, 1.64it/s] 35%|███▍ | 22696/65536 [3:53:41<7:15:46, 1.64it/s] 35%|███▍ | 22697/65536 [3:53:42<7:38:39, 1.56it/s] 35%|███▍ | 22698/65536 [3:53:43<7:23:53, 1.61it/s] 35%|███▍ | 22699/65536 [3:53:43<7:21:37, 1.62it/s] 35%|███▍ | 22700/65536 [3:53:44<7:24:55, 1.60it/s] {'loss': 3.0051, 'learning_rate': 6.900636708705766e-07, 'epoch': 1401.23} + 35%|███▍ | 22700/65536 [3:53:44<7:24:55, 1.60it/s] 35%|███▍ | 22701/65536 [3:53:45<7:21:58, 1.62it/s] 35%|███▍ | 22702/65536 [3:53:45<7:13:57, 1.65it/s] 35%|███▍ | 22703/65536 [3:53:46<7:09:51, 1.66it/s] 35%|███▍ | 22704/65536 [3:53:46<7:07:55, 1.67it/s] 35%|███▍ | 22705/65536 [3:53:47<7:07:34, 1.67it/s] 35%|███▍ | 22706/65536 [3:53:48<7:11:31, 1.65it/s] 35%|███▍ | 22707/65536 [3:53:48<7:15:36, 1.64it/s] 35%|███▍ | 22708/65536 [3:53:49<7:11:28, 1.65it/s] 35%|███▍ | 22709/65536 [3:53:49<7:08:16, 1.67it/s] 35%|███▍ | 22710/65536 [3:53:50<6:59:41, 1.70it/s] 35%|███▍ | 22711/65536 [3:53:51<7:10:29, 1.66it/s] 35%|███▍ | 22712/65536 [3:53:51<7:27:12, 1.60it/s] 35%|███▍ | 22713/65536 [3:53:52<7:35:02, 1.57it/s] 35%|███▍ | 22714/65536 [3:53:52<7:22:19, 1.61it/s] 35%|███▍ | 22715/65536 [3:53:53<7:14:51, 1.64it/s] 35%|███▍ | 22716/65536 [3:53:54<7:34:43, 1.57it/s] 35%|███▍ | 22717/65536 [3:53:54<7:17:30, 1.63it/s] 35%|███▍ | 22718/65536 [3:53:55<7:07:11, 1.67it/s] 35%|███▍ | 22719/65536 [3:53:55<7:14:06, 1.64it/s] 35%|███▍ | 22720/65536 [3:53:56<7:12:25, 1.65it/s] {'loss': 3.0361, 'learning_rate': 6.897881719113505e-07, 'epoch': 1402.47} + 35%|███▍ | 22720/65536 [3:53:56<7:12:25, 1.65it/s] 35%|███▍ | 22721/65536 [3:53:57<7:12:48, 1.65it/s] 35%|███▍ | 22722/65536 [3:53:57<7:11:47, 1.65it/s] 35%|███▍ | 22723/65536 [3:53:58<7:28:15, 1.59it/s] 35%|███▍ | 22724/65536 [3:53:59<7:26:50, 1.60it/s] 35%|███▍ | 22725/65536 [3:53:59<7:30:18, 1.58it/s] 35%|███▍ | 22726/65536 [3:54:00<7:29:56, 1.59it/s] 35%|███▍ | 22727/65536 [3:54:01<7:32:51, 1.58it/s] 35%|███▍ | 22728/65536 [3:54:01<7:24:53, 1.60it/s] 35%|███▍ | 22729/65536 [3:54:02<7:30:34, 1.58it/s] 35%|███▍ | 22730/65536 [3:54:02<7:31:40, 1.58it/s] 35%|███▍ | 22731/65536 [3:54:03<7:18:28, 1.63it/s] 35%|███▍ | 22732/65536 [3:54:04<7:19:11, 1.62it/s] 35%|███▍ | 22733/65536 [3:54:04<7:23:29, 1.61it/s] 35%|███▍ | 22734/65536 [3:54:05<7:09:15, 1.66it/s] 35%|███▍ | 22735/65536 [3:54:05<7:14:08, 1.64it/s] 35%|███▍ | 22736/65536 [3:54:06<7:17:30, 1.63it/s] 35%|███▍ | 22737/65536 [3:54:07<7:17:03, 1.63it/s] 35%|███▍ | 22738/65536 [3:54:07<7:13:13, 1.65it/s] 35%|███▍ | 22739/65536 [3:54:08<7:04:14, 1.68it/s] 35%|███▍ | 22740/65536 [3:54:08<7:11:39, 1.65it/s] {'loss': 3.0125, 'learning_rate': 6.895126729521244e-07, 'epoch': 1403.7} + 35%|███▍ | 22740/65536 [3:54:08<7:11:39, 1.65it/s] 35%|███▍ | 22741/65536 [3:54:09<7:18:52, 1.63it/s] 35%|███▍ | 22742/65536 [3:54:10<7:17:52, 1.63it/s] 35%|███▍ | 22743/65536 [3:54:10<7:29:24, 1.59it/s] 35%|███▍ | 22744/65536 [3:54:11<7:25:41, 1.60it/s] 35%|███▍ | 22745/65536 [3:54:12<7:35:56, 1.56it/s] 35%|███▍ | 22746/65536 [3:54:12<7:31:48, 1.58it/s] 35%|███▍ | 22747/65536 [3:54:13<7:17:10, 1.63it/s] 35%|███▍ | 22748/65536 [3:54:13<7:21:01, 1.62it/s] 35%|███▍ | 22749/65536 [3:54:14<7:21:48, 1.61it/s] 35%|███▍ | 22750/65536 [3:54:15<7:13:48, 1.64it/s] 35%|███▍ | 22751/65536 [3:54:15<7:27:45, 1.59it/s] 35%|███▍ | 22752/65536 [3:54:16<7:24:03, 1.61it/s] 35%|███▍ | 22753/65536 [3:54:17<7:20:03, 1.62it/s] 35%|███▍ | 22754/65536 [3:54:17<7:11:51, 1.65it/s] 35%|███▍ | 22755/65536 [3:54:18<7:11:20, 1.65it/s] 35%|███▍ | 22756/65536 [3:54:18<7:14:24, 1.64it/s] 35%|███▍ | 22757/65536 [3:54:19<7:12:44, 1.65it/s] 35%|███▍ | 22758/65536 [3:54:20<7:17:55, 1.63it/s] 35%|███▍ | 22759/65536 [3:54:20<7:08:30, 1.66it/s] 35%|███▍ | 22760/65536 [3:54:21<7:16:56, 1.63it/s] {'loss': 2.9742, 'learning_rate': 6.892371739928981e-07, 'epoch': 1404.94} + 35%|███▍ | 22760/65536 [3:54:21<7:16:56, 1.63it/s] 35%|███▍ | 22761/65536 [3:54:21<7:12:00, 1.65it/s] 35%|███▍ | 22762/65536 [3:54:22<7:24:08, 1.61it/s] 35%|███▍ | 22763/65536 [3:54:23<7:18:30, 1.63it/s] 35%|███▍ | 22764/65536 [3:54:23<7:22:51, 1.61it/s] 35%|███▍ | 22765/65536 [3:54:24<7:15:07, 1.64it/s] 35%|███▍ | 22766/65536 [3:54:24<7:13:39, 1.64it/s] 35%|███▍ | 22767/65536 [3:54:25<7:22:32, 1.61it/s] 35%|███▍ | 22768/65536 [3:54:26<7:26:40, 1.60it/s] 35%|███▍ | 22769/65536 [3:54:26<7:18:29, 1.63it/s] 35%|███▍ | 22770/65536 [3:54:27<7:18:06, 1.63it/s] 35%|███▍ | 22771/65536 [3:54:28<7:12:47, 1.65it/s] 35%|███▍ | 22772/65536 [3:54:28<7:25:40, 1.60it/s] 35%|███▍ | 22773/65536 [3:54:29<7:14:17, 1.64it/s] 35%|███▍ | 22774/65536 [3:54:29<7:21:44, 1.61it/s] 35%|███▍ | 22775/65536 [3:54:30<7:23:30, 1.61it/s] 35%|███▍ | 22776/65536 [3:54:31<7:32:03, 1.58it/s] 35%|███▍ | 22777/65536 [3:54:31<7:14:10, 1.64it/s] 35%|███▍ | 22778/65536 [3:54:32<7:29:20, 1.59it/s] 35%|███▍ | 22779/65536 [3:54:33<7:35:05, 1.57it/s] 35%|███▍ | 22780/65536 [3:54:33<7:26:36, 1.60it/s] {'loss': 2.9415, 'learning_rate': 6.88961675033672e-07, 'epoch': 1406.17} + 35%|███▍ | 22780/65536 [3:54:33<7:26:36, 1.60it/s] 35%|███▍ | 22781/65536 [3:54:34<7:23:41, 1.61it/s] 35%|███▍ | 22782/65536 [3:54:34<7:31:07, 1.58it/s] 35%|███▍ | 22783/65536 [3:54:35<7:30:31, 1.58it/s] 35%|███▍ | 22784/65536 [3:54:36<7:20:57, 1.62it/s] 35%|███▍ | 22785/65536 [3:54:36<7:30:03, 1.58it/s] 35%|███▍ | 22786/65536 [3:54:37<7:29:17, 1.59it/s] 35%|███▍ | 22787/65536 [3:54:38<7:16:48, 1.63it/s] 35%|███▍ | 22788/65536 [3:54:38<7:06:17, 1.67it/s] 35%|███▍ | 22789/65536 [3:54:39<7:02:01, 1.69it/s] 35%|███▍ | 22790/65536 [3:54:39<7:02:47, 1.69it/s] 35%|███▍ | 22791/65536 [3:54:40<7:05:03, 1.68it/s] 35%|███▍ | 22792/65536 [3:54:41<7:11:49, 1.65it/s] 35%|███▍ | 22793/65536 [3:54:41<7:13:11, 1.64it/s] 35%|███▍ | 22794/65536 [3:54:42<7:25:50, 1.60it/s] 35%|███▍ | 22795/65536 [3:54:42<7:16:24, 1.63it/s] 35%|███▍ | 22796/65536 [3:54:43<7:09:24, 1.66it/s] 35%|███▍ | 22797/65536 [3:54:44<7:10:43, 1.65it/s] 35%|███▍ | 22798/65536 [3:54:44<7:07:14, 1.67it/s] 35%|███▍ | 22799/65536 [3:54:45<7:15:39, 1.63it/s] 35%|███▍ | 22800/65536 [3:54:45<7:27:49, 1.59it/s] {'loss': 2.9931, 'learning_rate': 6.886861760744458e-07, 'epoch': 1407.41} + 35%|███▍ | 22800/65536 [3:54:45<7:27:49, 1.59it/s] 35%|███▍ | 22801/65536 [3:54:46<7:23:46, 1.60it/s] 35%|███▍ | 22802/65536 [3:54:47<7:18:30, 1.62it/s] 35%|███▍ | 22803/65536 [3:54:47<7:23:37, 1.61it/s] 35%|███▍ | 22804/65536 [3:54:48<7:26:53, 1.59it/s] 35%|███▍ | 22805/65536 [3:54:49<7:22:58, 1.61it/s] 35%|███▍ | 22806/65536 [3:54:49<7:24:52, 1.60it/s] 35%|███▍ | 22807/65536 [3:54:50<7:28:42, 1.59it/s] 35%|███▍ | 22808/65536 [3:54:50<7:23:09, 1.61it/s] 35%|███▍ | 22809/65536 [3:54:51<7:20:35, 1.62it/s] 35%|███▍ | 22810/65536 [3:54:52<7:35:18, 1.56it/s] 35%|███▍ | 22811/65536 [3:54:52<7:27:20, 1.59it/s] 35%|███▍ | 22812/65536 [3:54:53<7:24:23, 1.60it/s] 35%|███▍ | 22813/65536 [3:54:54<7:17:57, 1.63it/s] 35%|███▍ | 22814/65536 [3:54:54<7:27:44, 1.59it/s] 35%|███▍ | 22815/65536 [3:54:55<7:19:38, 1.62it/s] 35%|███▍ | 22816/65536 [3:54:55<7:12:39, 1.65it/s] 35%|███▍ | 22817/65536 [3:54:56<7:13:40, 1.64it/s] 35%|███▍ | 22818/65536 [3:54:57<7:17:50, 1.63it/s] 35%|███▍ | 22819/65536 [3:54:57<7:05:05, 1.67it/s] 35%|███▍ | 22820/65536 [3:54:58<7:06:54, 1.67it/s] {'loss': 2.9967, 'learning_rate': 6.884106771152197e-07, 'epoch': 1408.64} + 35%|███▍ | 22820/65536 [3:54:58<7:06:54, 1.67it/s] 35%|███▍ | 22821/65536 [3:54:58<7:09:15, 1.66it/s] 35%|███▍ | 22822/65536 [3:54:59<7:03:54, 1.68it/s] 35%|███▍ | 22823/65536 [3:55:00<7:03:33, 1.68it/s] 35%|███▍ | 22824/65536 [3:55:00<7:15:03, 1.64it/s] 35%|███▍ | 22825/65536 [3:55:01<7:24:45, 1.60it/s] 35%|███▍ | 22826/65536 [3:55:02<7:49:12, 1.52it/s] 35%|███▍ | 22827/65536 [3:55:02<7:36:20, 1.56it/s] 35%|███▍ | 22828/65536 [3:55:03<7:27:20, 1.59it/s] 35%|███▍ | 22829/65536 [3:55:03<7:16:09, 1.63it/s] 35%|███▍ | 22830/65536 [3:55:04<7:14:53, 1.64it/s] 35%|███▍ | 22831/65536 [3:55:05<7:19:40, 1.62it/s] 35%|███▍ | 22832/65536 [3:55:05<7:29:15, 1.58it/s] 35%|███▍ | 22833/65536 [3:55:06<7:27:52, 1.59it/s] 35%|███▍ | 22834/65536 [3:55:07<7:34:02, 1.57it/s] 35%|███▍ | 22835/65536 [3:55:07<7:27:53, 1.59it/s] 35%|███▍ | 22836/65536 [3:55:08<7:19:34, 1.62it/s] 35%|███▍ | 22837/65536 [3:55:08<7:18:11, 1.62it/s] 35%|███▍ | 22838/65536 [3:55:09<7:17:54, 1.63it/s] 35%|███▍ | 22839/65536 [3:55:10<7:14:43, 1.64it/s] 35%|███▍ | 22840/65536 [3:55:10<7:22:49, 1.61it/s] {'loss': 2.971, 'learning_rate': 6.881351781559936e-07, 'epoch': 1409.88} + 35%|███▍ | 22840/65536 [3:55:10<7:22:49, 1.61it/s] 35%|███▍ | 22841/65536 [3:55:11<7:08:48, 1.66it/s] 35%|███▍ | 22842/65536 [3:55:11<7:05:26, 1.67it/s] 35%|███▍ | 22843/65536 [3:55:12<7:16:46, 1.63it/s] 35%|███▍ | 22844/65536 [3:55:13<7:18:24, 1.62it/s] 35%|███▍ | 22845/65536 [3:55:13<7:12:10, 1.65it/s] 35%|███▍ | 22846/65536 [3:55:14<7:02:56, 1.68it/s] 35%|███▍ | 22847/65536 [3:55:14<7:15:20, 1.63it/s] 35%|███▍ | 22848/65536 [3:55:15<7:12:20, 1.65it/s] 35%|███▍ | 22849/65536 [3:55:16<7:14:26, 1.64it/s] 35%|███▍ | 22850/65536 [3:55:16<7:09:48, 1.66it/s] 35%|███▍ | 22851/65536 [3:55:17<7:13:28, 1.64it/s] 35%|███▍ | 22852/65536 [3:55:17<7:06:51, 1.67it/s] 35%|███▍ | 22853/65536 [3:55:18<7:20:07, 1.62it/s] 35%|███▍ | 22854/65536 [3:55:19<7:16:34, 1.63it/s] 35%|███▍ | 22855/65536 [3:55:19<7:12:55, 1.64it/s] 35%|███▍ | 22856/65536 [3:55:20<7:19:38, 1.62it/s] 35%|███▍ | 22857/65536 [3:55:21<7:12:02, 1.65it/s] 35%|███▍ | 22858/65536 [3:55:21<7:21:21, 1.61it/s] 35%|███▍ | 22859/65536 [3:55:22<7:44:17, 1.53it/s] 35%|███▍ | 22860/65536 [3:55:23<7:31:08, 1.58it/s] {'loss': 2.9922, 'learning_rate': 6.878596791967674e-07, 'epoch': 1411.11} + 35%|███▍ | 22860/65536 [3:55:23<7:31:08, 1.58it/s] 35%|███▍ | 22861/65536 [3:55:23<7:28:11, 1.59it/s] 35%|███▍ | 22862/65536 [3:55:24<7:28:34, 1.59it/s] 35%|███▍ | 22863/65536 [3:55:24<7:20:40, 1.61it/s] 35%|███▍ | 22864/65536 [3:55:25<7:14:31, 1.64it/s] 35%|███▍ | 22865/65536 [3:55:26<7:12:12, 1.65it/s] 35%|███▍ | 22866/65536 [3:55:26<7:13:05, 1.64it/s] 35%|███▍ | 22867/65536 [3:55:27<7:08:12, 1.66it/s] 35%|███▍ | 22868/65536 [3:55:27<7:10:57, 1.65it/s] 35%|███▍ | 22869/65536 [3:55:28<7:04:48, 1.67it/s] 35%|███▍ | 22870/65536 [3:55:29<7:09:17, 1.66it/s] 35%|███▍ | 22871/65536 [3:55:29<6:59:26, 1.70it/s] 35%|███▍ | 22872/65536 [3:55:30<7:02:31, 1.68it/s] 35%|███▍ | 22873/65536 [3:55:30<7:02:38, 1.68it/s] 35%|███▍ | 22874/65536 [3:55:31<7:02:02, 1.68it/s] 35%|███▍ | 22875/65536 [3:55:32<7:24:21, 1.60it/s] 35%|███▍ | 22876/65536 [3:55:32<7:26:18, 1.59it/s] 35%|███▍ | 22877/65536 [3:55:33<7:21:32, 1.61it/s] 35%|███▍ | 22878/65536 [3:55:33<7:18:34, 1.62it/s] 35%|███▍ | 22879/65536 [3:55:34<7:05:01, 1.67it/s] 35%|███▍ | 22880/65536 [3:55:35<7:18:37, 1.62it/s] {'loss': 3.0975, 'learning_rate': 6.875841802375413e-07, 'epoch': 1412.35} + 35%|███▍ | 22880/65536 [3:55:35<7:18:37, 1.62it/s] 35%|███▍ | 22881/65536 [3:55:35<7:05:29, 1.67it/s] 35%|███▍ | 22882/65536 [3:55:36<7:17:27, 1.63it/s] 35%|███▍ | 22883/65536 [3:55:37<7:12:55, 1.64it/s] 35%|███▍ | 22884/65536 [3:55:37<7:06:09, 1.67it/s] 35%|███▍ | 22885/65536 [3:55:38<7:01:16, 1.69it/s] 35%|███▍ | 22886/65536 [3:55:38<7:06:41, 1.67it/s] 35%|███▍ | 22887/65536 [3:55:39<7:13:23, 1.64it/s] 35%|███▍ | 22888/65536 [3:55:40<7:15:06, 1.63it/s] 35%|███▍ | 22889/65536 [3:55:40<7:22:13, 1.61it/s] 35%|███▍ | 22890/65536 [3:55:41<7:18:48, 1.62it/s] 35%|███▍ | 22891/65536 [3:55:41<7:39:18, 1.55it/s] 35%|███▍ | 22892/65536 [3:55:42<7:38:15, 1.55it/s] 35%|███▍ | 22893/65536 [3:55:43<7:44:53, 1.53it/s] 35%|███▍ | 22894/65536 [3:55:43<7:33:41, 1.57it/s] 35%|███▍ | 22895/65536 [3:55:44<7:20:49, 1.61it/s] 35%|███▍ | 22896/65536 [3:55:45<7:10:13, 1.65it/s] 35%|███▍ | 22897/65536 [3:55:45<7:16:10, 1.63it/s] 35%|███▍ | 22898/65536 [3:55:46<7:23:51, 1.60it/s] 35%|███▍ | 22899/65536 [3:55:46<7:14:04, 1.64it/s] 35%|███▍ | 22900/65536 [3:55:47<7:09:50, 1.65it/s] {'loss': 2.9825, 'learning_rate': 6.873086812783151e-07, 'epoch': 1413.58} + 35%|███▍ | 22900/65536 [3:55:47<7:09:50, 1.65it/s] 35%|███▍ | 22901/65536 [3:55:48<7:01:58, 1.68it/s] 35%|███▍ | 22902/65536 [3:55:48<6:59:38, 1.69it/s] 35%|███▍ | 22903/65536 [3:55:49<7:16:42, 1.63it/s] 35%|███▍ | 22904/65536 [3:55:49<7:16:39, 1.63it/s] 35%|███▍ | 22905/65536 [3:55:50<7:16:22, 1.63it/s] 35%|███▍ | 22906/65536 [3:55:51<7:07:38, 1.66it/s] 35%|███▍ | 22907/65536 [3:55:51<7:29:20, 1.58it/s] 35%|███▍ | 22908/65536 [3:55:52<7:26:55, 1.59it/s] 35%|███▍ | 22909/65536 [3:55:53<7:24:27, 1.60it/s] 35%|███▍ | 22910/65536 [3:55:53<7:13:58, 1.64it/s] 35%|███▍ | 22911/65536 [3:55:54<7:10:17, 1.65it/s] 35%|███▍ | 22912/65536 [3:55:54<7:08:46, 1.66it/s] 35%|███▍ | 22913/65536 [3:55:55<7:05:12, 1.67it/s] 35%|███▍ | 22914/65536 [3:55:56<7:12:41, 1.64it/s] 35%|███▍ | 22915/65536 [3:55:56<7:06:44, 1.66it/s] 35%|███▍ | 22916/65536 [3:55:57<7:12:50, 1.64it/s] 35%|███▍ | 22917/65536 [3:55:57<7:23:03, 1.60it/s] 35%|███▍ | 22918/65536 [3:55:58<7:13:35, 1.64it/s] 35%|███▍ | 22919/65536 [3:55:59<7:21:44, 1.61it/s] 35%|███▍ | 22920/65536 [3:55:59<7:09:36, 1.65it/s] {'loss': 3.023, 'learning_rate': 6.87033182319089e-07, 'epoch': 1414.81} + 35%|███▍ | 22920/65536 [3:55:59<7:09:36, 1.65it/s] 35%|███▍ | 22921/65536 [3:56:00<7:06:41, 1.66it/s] 35%|███▍ | 22922/65536 [3:56:00<7:00:00, 1.69it/s] 35%|███▍ | 22923/65536 [3:56:01<7:00:12, 1.69it/s] 35%|███▍ | 22924/65536 [3:56:02<7:14:33, 1.63it/s] 35%|███▍ | 22925/65536 [3:56:02<7:12:39, 1.64it/s] 35%|███▍ | 22926/65536 [3:56:03<7:10:48, 1.65it/s] 35%|███▍ | 22927/65536 [3:56:03<7:11:01, 1.65it/s] 35%|███▍ | 22928/65536 [3:56:04<7:23:18, 1.60it/s] 35%|███▍ | 22929/65536 [3:56:05<7:33:18, 1.57it/s] 35%|███▍ | 22930/65536 [3:56:05<7:14:37, 1.63it/s] 35%|███▍ | 22931/65536 [3:56:06<7:16:45, 1.63it/s] 35%|███▍ | 22932/65536 [3:56:07<7:16:09, 1.63it/s] 35%|███▍ | 22933/65536 [3:56:07<7:15:37, 1.63it/s] 35%|███▍ | 22934/65536 [3:56:08<7:05:38, 1.67it/s] 35%|███▍ | 22935/65536 [3:56:08<7:08:46, 1.66it/s] 35%|███▍ | 22936/65536 [3:56:09<7:14:27, 1.63it/s] 35%|███▍ | 22937/65536 [3:56:10<7:16:29, 1.63it/s] 35%|███▌ | 22938/65536 [3:56:10<7:22:07, 1.61it/s] 35%|███▌ | 22939/65536 [3:56:11<7:24:23, 1.60it/s] 35%|███▌ | 22940/65536 [3:56:12<7:39:12, 1.55it/s] {'loss': 3.0236, 'learning_rate': 6.86757683359863e-07, 'epoch': 1416.05} + 35%|███▌ | 22940/65536 [3:56:12<7:39:12, 1.55it/s] 35%|███▌ | 22941/65536 [3:56:12<7:27:56, 1.58it/s] 35%|███▌ | 22942/65536 [3:56:13<7:16:04, 1.63it/s] 35%|███▌ | 22943/65536 [3:56:13<7:06:21, 1.66it/s] 35%|███▌ | 22944/65536 [3:56:14<7:01:26, 1.68it/s] 35%|███▌ | 22945/65536 [3:56:14<7:01:39, 1.68it/s] 35%|███▌ | 22946/65536 [3:56:15<7:02:21, 1.68it/s] 35%|███▌ | 22947/65536 [3:56:16<7:10:23, 1.65it/s] 35%|███▌ | 22948/65536 [3:56:16<7:15:59, 1.63it/s] 35%|███▌ | 22949/65536 [3:56:17<7:21:31, 1.61it/s] 35%|███▌ | 22950/65536 [3:56:18<7:15:21, 1.63it/s] 35%|███▌ | 22951/65536 [3:56:18<7:22:49, 1.60it/s] 35%|███▌ | 22952/65536 [3:56:19<7:29:06, 1.58it/s] 35%|███▌ | 22953/65536 [3:56:19<7:20:15, 1.61it/s] 35%|███▌ | 22954/65536 [3:56:20<7:21:50, 1.61it/s] 35%|███▌ | 22955/65536 [3:56:21<7:24:51, 1.60it/s] 35%|███▌ | 22956/65536 [3:56:21<7:38:30, 1.55it/s] 35%|███▌ | 22957/65536 [3:56:22<7:33:43, 1.56it/s] 35%|███▌ | 22958/65536 [3:56:23<7:25:03, 1.59it/s] 35%|███▌ | 22959/65536 [3:56:23<7:12:08, 1.64it/s] 35%|███▌ | 22960/65536 [3:56:24<7:16:01, 1.63it/s] {'loss': 3.0156, 'learning_rate': 6.864821844006366e-07, 'epoch': 1417.28} + 35%|███▌ | 22960/65536 [3:56:24<7:16:01, 1.63it/s] 35%|███▌ | 22961/65536 [3:56:24<7:08:02, 1.66it/s] 35%|███▌ | 22962/65536 [3:56:25<7:12:31, 1.64it/s] 35%|███▌ | 22963/65536 [3:56:26<7:06:22, 1.66it/s] 35%|███▌ | 22964/65536 [3:56:26<7:03:11, 1.68it/s] 35%|███▌ | 22965/65536 [3:56:27<7:06:04, 1.67it/s] 35%|███▌ | 22966/65536 [3:56:27<7:02:11, 1.68it/s] 35%|███▌ | 22967/65536 [3:56:28<7:14:06, 1.63it/s] 35%|███▌ | 22968/65536 [3:56:29<7:05:18, 1.67it/s] 35%|███▌ | 22969/65536 [3:56:29<7:11:29, 1.64it/s] 35%|███▌ | 22970/65536 [3:56:30<7:05:12, 1.67it/s] 35%|███▌ | 22971/65536 [3:56:30<7:13:56, 1.63it/s] 35%|███▌ | 22972/65536 [3:56:31<7:26:55, 1.59it/s] 35%|███▌ | 22973/65536 [3:56:32<7:30:25, 1.57it/s] 35%|███▌ | 22974/65536 [3:56:32<7:23:51, 1.60it/s] 35%|███▌ | 22975/65536 [3:56:33<7:15:56, 1.63it/s] 35%|███▌ | 22976/65536 [3:56:34<7:16:12, 1.63it/s] 35%|███▌ | 22977/65536 [3:56:34<7:15:49, 1.63it/s] 35%|███▌ | 22978/65536 [3:56:35<7:16:50, 1.62it/s] 35%|███▌ | 22979/65536 [3:56:35<7:20:20, 1.61it/s] 35%|███▌ | 22980/65536 [3:56:36<7:17:39, 1.62it/s] {'loss': 2.9708, 'learning_rate': 6.862066854414106e-07, 'epoch': 1418.52} + 35%|███▌ | 22980/65536 [3:56:36<7:17:39, 1.62it/s] 35%|███▌ | 22981/65536 [3:56:37<7:12:26, 1.64it/s] 35%|███▌ | 22982/65536 [3:56:37<7:14:57, 1.63it/s] 35%|███▌ | 22983/65536 [3:56:38<7:14:02, 1.63it/s] 35%|███▌ | 22984/65536 [3:56:38<7:07:34, 1.66it/s] 35%|███▌ | 22985/65536 [3:56:39<7:07:41, 1.66it/s] 35%|███▌ | 22986/65536 [3:56:40<7:13:59, 1.63it/s] 35%|███▌ | 22987/65536 [3:56:40<7:11:27, 1.64it/s] 35%|███▌ | 22988/65536 [3:56:41<7:17:21, 1.62it/s] 35%|███▌ | 22989/65536 [3:56:42<7:16:22, 1.63it/s] 35%|███▌ | 22990/65536 [3:56:42<7:15:22, 1.63it/s] 35%|███▌ | 22991/65536 [3:56:43<7:17:13, 1.62it/s] 35%|███▌ | 22992/65536 [3:56:43<7:13:57, 1.63it/s] 35%|███▌ | 22993/65536 [3:56:44<7:11:23, 1.64it/s] 35%|███▌ | 22994/65536 [3:56:45<7:02:46, 1.68it/s] 35%|███▌ | 22995/65536 [3:56:45<7:09:10, 1.65it/s] 35%|███▌ | 22996/65536 [3:56:46<7:24:41, 1.59it/s] 35%|███▌ | 22997/65536 [3:56:46<7:13:53, 1.63it/s] 35%|███▌ | 22998/65536 [3:56:47<7:11:02, 1.64it/s] 35%|███▌ | 22999/65536 [3:56:48<7:08:21, 1.66it/s] 35%|███▌ | 23000/65536 [3:56:48<7:00:50, 1.68it/s] {'loss': 3.0535, 'learning_rate': 6.859311864821844e-07, 'epoch': 1419.75} + 35%|███▌ | 23000/65536 [3:56:48<7:00:50, 1.68it/s] 35%|███▌ | 23001/65536 [3:56:49<7:18:53, 1.62it/s] 35%|███▌ | 23002/65536 [3:56:50<7:17:34, 1.62it/s] 35%|███▌ | 23003/65536 [3:56:50<7:26:24, 1.59it/s] 35%|███▌ | 23004/65536 [3:56:51<7:16:22, 1.62it/s] 35%|███▌ | 23005/65536 [3:56:51<7:18:39, 1.62it/s] 35%|███▌ | 23006/65536 [3:56:52<7:15:20, 1.63it/s] 35%|███▌ | 23007/65536 [3:56:53<7:07:36, 1.66it/s] 35%|███▌ | 23008/65536 [3:56:53<7:05:36, 1.67it/s] 35%|███▌ | 23009/65536 [3:56:54<7:06:37, 1.66it/s] 35%|███▌ | 23010/65536 [3:56:54<7:20:52, 1.61it/s] 35%|███▌ | 23011/65536 [3:56:55<7:26:39, 1.59it/s] 35%|███▌ | 23012/65536 [3:56:56<7:26:48, 1.59it/s] 35%|███▌ | 23013/65536 [3:56:56<7:19:15, 1.61it/s] 35%|███▌ | 23014/65536 [3:56:57<7:24:28, 1.59it/s] 35%|███▌ | 23015/65536 [3:56:58<7:20:30, 1.61it/s] 35%|███▌ | 23016/65536 [3:56:58<7:18:49, 1.61it/s] 35%|███▌ | 23017/65536 [3:56:59<7:09:32, 1.65it/s] 35%|███▌ | 23018/65536 [3:56:59<7:10:32, 1.65it/s] 35%|███▌ | 23019/65536 [3:57:00<7:05:45, 1.66it/s] 35%|███▌ | 23020/65536 [3:57:01<7:00:54, 1.68it/s] {'loss': 2.9992, 'learning_rate': 6.856556875229582e-07, 'epoch': 1420.99} + 35%|███▌ | 23020/65536 [3:57:01<7:00:54, 1.68it/s] 35%|███▌ | 23021/65536 [3:57:01<7:28:26, 1.58it/s] 35%|███▌ | 23022/65536 [3:57:02<7:27:54, 1.58it/s] 35%|███▌ | 23023/65536 [3:57:02<7:13:11, 1.64it/s] 35%|███▌ | 23024/65536 [3:57:03<7:18:41, 1.62it/s] 35%|███▌ | 23025/65536 [3:57:04<7:18:04, 1.62it/s] 35%|███▌ | 23026/65536 [3:57:04<7:17:50, 1.62it/s] 35%|███▌ | 23027/65536 [3:57:05<7:06:06, 1.66it/s] 35%|███▌ | 23028/65536 [3:57:06<7:27:37, 1.58it/s] 35%|███▌ | 23029/65536 [3:57:06<7:31:23, 1.57it/s] 35%|███▌ | 23030/65536 [3:57:07<7:25:11, 1.59it/s] 35%|███▌ | 23031/65536 [3:57:07<7:17:03, 1.62it/s] 35%|███▌ | 23032/65536 [3:57:08<7:05:48, 1.66it/s] 35%|███▌ | 23033/65536 [3:57:09<7:06:37, 1.66it/s] 35%|███▌ | 23034/65536 [3:57:09<7:17:30, 1.62it/s] 35%|███▌ | 23035/65536 [3:57:10<7:20:30, 1.61it/s] 35%|███▌ | 23036/65536 [3:57:10<7:09:01, 1.65it/s] 35%|███▌ | 23037/65536 [3:57:11<7:16:21, 1.62it/s] 35%|███▌ | 23038/65536 [3:57:12<7:21:39, 1.60it/s] 35%|███▌ | 23039/65536 [3:57:12<7:30:25, 1.57it/s] 35%|███▌ | 23040/65536 [3:57:13<7:25:43, 1.59it/s] {'loss': 2.9647, 'learning_rate': 6.85380188563732e-07, 'epoch': 1422.22} + 35%|███▌ | 23040/65536 [3:57:13<7:25:43, 1.59it/s] 35%|███▌ | 23041/65536 [3:57:14<7:28:35, 1.58it/s] 35%|███▌ | 23042/65536 [3:57:14<7:17:09, 1.62it/s] 35%|███▌ | 23043/65536 [3:57:15<7:19:00, 1.61it/s] 35%|███▌ | 23044/65536 [3:57:15<7:19:19, 1.61it/s] 35%|███▌ | 23045/65536 [3:57:16<7:18:38, 1.61it/s] 35%|███▌ | 23046/65536 [3:57:17<7:14:08, 1.63it/s] 35%|███▌ | 23047/65536 [3:57:17<7:17:24, 1.62it/s] 35%|███▌ | 23048/65536 [3:57:18<7:09:22, 1.65it/s] 35%|███▌ | 23049/65536 [3:57:18<7:04:44, 1.67it/s] 35%|███▌ | 23050/65536 [3:57:19<7:15:42, 1.63it/s] 35%|███▌ | 23051/65536 [3:57:20<7:06:56, 1.66it/s] 35%|███▌ | 23052/65536 [3:57:20<7:11:18, 1.64it/s] 35%|███▌ | 23053/65536 [3:57:21<7:22:06, 1.60it/s] 35%|███▌ | 23054/65536 [3:57:22<7:18:47, 1.61it/s] 35%|███▌ | 23055/65536 [3:57:22<7:10:25, 1.64it/s] 35%|███▌ | 23056/65536 [3:57:23<7:19:55, 1.61it/s] 35%|███▌ | 23057/65536 [3:57:23<7:21:37, 1.60it/s] 35%|███▌ | 23058/65536 [3:57:24<7:14:45, 1.63it/s] 35%|███▌ | 23059/65536 [3:57:25<7:07:30, 1.66it/s] 35%|███▌ | 23060/65536 [3:57:25<7:26:18, 1.59it/s] {'loss': 2.9628, 'learning_rate': 6.851046896045059e-07, 'epoch': 1423.46} + 35%|███▌ | 23060/65536 [3:57:25<7:26:18, 1.59it/s] 35%|███▌ | 23061/65536 [3:57:26<7:30:29, 1.57it/s] 35%|███▌ | 23062/65536 [3:57:27<7:24:27, 1.59it/s] 35%|███▌ | 23063/65536 [3:57:27<7:17:17, 1.62it/s] 35%|███▌ | 23064/65536 [3:57:28<7:11:14, 1.64it/s] 35%|███▌ | 23065/65536 [3:57:28<7:01:51, 1.68it/s] 35%|███▌ | 23066/65536 [3:57:29<7:14:15, 1.63it/s] 35%|███▌ | 23067/65536 [3:57:30<7:12:12, 1.64it/s] 35%|███▌ | 23068/65536 [3:57:30<6:59:20, 1.69it/s] 35%|███▌ | 23069/65536 [3:57:31<7:21:32, 1.60it/s] 35%|███▌ | 23070/65536 [3:57:31<7:12:47, 1.64it/s] 35%|███▌ | 23071/65536 [3:57:32<7:08:51, 1.65it/s] 35%|███▌ | 23072/65536 [3:57:33<7:16:17, 1.62it/s] 35%|███▌ | 23073/65536 [3:57:33<7:18:41, 1.61it/s] 35%|███▌ | 23074/65536 [3:57:34<7:09:07, 1.65it/s] 35%|███▌ | 23075/65536 [3:57:34<7:11:35, 1.64it/s] 35%|███▌ | 23076/65536 [3:57:35<7:16:31, 1.62it/s] 35%|███▌ | 23077/65536 [3:57:36<7:10:47, 1.64it/s] 35%|███▌ | 23078/65536 [3:57:36<7:10:49, 1.64it/s] 35%|███▌ | 23079/65536 [3:57:37<7:11:17, 1.64it/s] 35%|███▌ | 23080/65536 [3:57:38<7:10:17, 1.64it/s] {'loss': 3.0062, 'learning_rate': 6.848291906452798e-07, 'epoch': 1424.69} + 35%|███▌ | 23080/65536 [3:57:38<7:10:17, 1.64it/s] 35%|███▌ | 23081/65536 [3:57:38<7:16:30, 1.62it/s] 35%|███▌ | 23082/65536 [3:57:39<7:13:07, 1.63it/s] 35%|███▌ | 23083/65536 [3:57:39<7:09:06, 1.65it/s] 35%|███▌ | 23084/65536 [3:57:40<7:03:14, 1.67it/s] 35%|███▌ | 23085/65536 [3:57:41<7:06:44, 1.66it/s] 35%|███▌ | 23086/65536 [3:57:41<7:12:08, 1.64it/s] 35%|███▌ | 23087/65536 [3:57:42<7:17:47, 1.62it/s] 35%|███▌ | 23088/65536 [3:57:42<7:15:12, 1.63it/s] 35%|███▌ | 23089/65536 [3:57:43<7:09:45, 1.65it/s] 35%|███▌ | 23090/65536 [3:57:44<7:03:01, 1.67it/s] 35%|███▌ | 23091/65536 [3:57:44<7:21:23, 1.60it/s] 35%|███▌ | 23092/65536 [3:57:45<7:30:26, 1.57it/s] 35%|███▌ | 23093/65536 [3:57:46<7:21:47, 1.60it/s] 35%|███▌ | 23094/65536 [3:57:46<7:21:53, 1.60it/s] 35%|███▌ | 23095/65536 [3:57:47<7:16:34, 1.62it/s] 35%|███▌ | 23096/65536 [3:57:47<7:11:53, 1.64it/s] 35%|███▌ | 23097/65536 [3:57:48<7:12:31, 1.64it/s] 35%|███▌ | 23098/65536 [3:57:49<7:14:11, 1.63it/s] 35%|███▌ | 23099/65536 [3:57:49<7:12:05, 1.64it/s] 35%|███▌ | 23100/65536 [3:57:50<7:10:28, 1.64it/s] {'loss': 2.943, 'learning_rate': 6.845536916860536e-07, 'epoch': 1425.93} + 35%|███▌ | 23100/65536 [3:57:50<7:10:28, 1.64it/s] 35%|███▌ | 23101/65536 [3:57:50<7:13:37, 1.63it/s] 35%|███▌ | 23102/65536 [3:57:51<7:33:03, 1.56it/s] 35%|███▌ | 23103/65536 [3:57:52<7:26:59, 1.58it/s] 35%|███▌ | 23104/65536 [3:57:52<7:18:29, 1.61it/s] 35%|███▌ | 23105/65536 [3:57:53<7:16:38, 1.62it/s] 35%|███▌ | 23106/65536 [3:57:54<7:08:01, 1.65it/s] 35%|███▌ | 23107/65536 [3:57:54<7:16:24, 1.62it/s] 35%|███▌ | 23108/65536 [3:57:55<7:10:14, 1.64it/s] 35%|███▌ | 23109/65536 [3:57:55<7:13:14, 1.63it/s] 35%|███▌ | 23110/65536 [3:57:56<7:04:34, 1.67it/s] 35%|███▌ | 23111/65536 [3:57:57<7:14:39, 1.63it/s] 35%|███▌ | 23112/65536 [3:57:57<7:27:49, 1.58it/s] 35%|███▌ | 23113/65536 [3:57:58<7:13:02, 1.63it/s] 35%|███▌ | 23114/65536 [3:57:59<7:26:40, 1.58it/s] 35%|███▌ | 23115/65536 [3:57:59<7:14:51, 1.63it/s] 35%|███▌ | 23116/65536 [3:58:00<7:05:59, 1.66it/s] 35%|███▌ | 23117/65536 [3:58:00<7:00:53, 1.68it/s] 35%|███▌ | 23118/65536 [3:58:01<7:13:04, 1.63it/s] 35%|███▌ | 23119/65536 [3:58:01<7:09:44, 1.65it/s] 35%|███▌ | 23120/65536 [3:58:02<7:05:22, 1.66it/s] {'loss': 3.0331, 'learning_rate': 6.842781927268275e-07, 'epoch': 1427.16} + 35%|███▌ | 23120/65536 [3:58:02<7:05:22, 1.66it/s] 35%|███▌ | 23121/65536 [3:58:03<7:05:29, 1.66it/s] 35%|███▌ | 23122/65536 [3:58:03<7:14:32, 1.63it/s] 35%|███▌ | 23123/65536 [3:58:04<7:18:57, 1.61it/s] 35%|███▌ | 23124/65536 [3:58:05<7:17:13, 1.62it/s] 35%|███▌ | 23125/65536 [3:58:05<7:04:47, 1.66it/s] 35%|███▌ | 23126/65536 [3:58:06<7:09:06, 1.65it/s] 35%|███▌ | 23127/65536 [3:58:06<7:08:45, 1.65it/s] 35%|███▌ | 23128/65536 [3:58:07<7:06:11, 1.66it/s] 35%|███▌ | 23129/65536 [3:58:08<7:01:53, 1.68it/s] 35%|███▌ | 23130/65536 [3:58:08<7:15:54, 1.62it/s] 35%|███▌ | 23131/65536 [3:58:09<7:15:46, 1.62it/s] 35%|███▌ | 23132/65536 [3:58:09<7:21:51, 1.60it/s] 35%|███▌ | 23133/65536 [3:58:10<7:12:32, 1.63it/s] 35%|███▌ | 23134/65536 [3:58:11<7:23:13, 1.59it/s] 35%|███▌ | 23135/65536 [3:58:11<7:24:34, 1.59it/s] 35%|███▌ | 23136/65536 [3:58:12<7:13:24, 1.63it/s] 35%|███▌ | 23137/65536 [3:58:12<7:06:26, 1.66it/s] 35%|███▌ | 23138/65536 [3:58:13<7:24:25, 1.59it/s] 35%|███▌ | 23139/65536 [3:58:14<7:33:15, 1.56it/s] 35%|███▌ | 23140/65536 [3:58:14<7:23:26, 1.59it/s] {'loss': 3.0069, 'learning_rate': 6.840026937676013e-07, 'epoch': 1428.4} + 35%|███▌ | 23140/65536 [3:58:14<7:23:26, 1.59it/s] 35%|███▌ | 23141/65536 [3:58:15<7:20:21, 1.60it/s] 35%|███▌ | 23142/65536 [3:58:16<7:09:28, 1.65it/s] 35%|███▌ | 23143/65536 [3:58:16<7:00:09, 1.68it/s] 35%|███▌ | 23144/65536 [3:58:17<6:53:01, 1.71it/s] 35%|███▌ | 23145/65536 [3:58:17<6:58:38, 1.69it/s] 35%|███▌ | 23146/65536 [3:58:18<7:16:11, 1.62it/s] 35%|███▌ | 23147/65536 [3:58:19<7:11:51, 1.64it/s] 35%|███▌ | 23148/65536 [3:58:19<7:00:10, 1.68it/s] 35%|███▌ | 23149/65536 [3:58:20<7:06:54, 1.65it/s] 35%|███▌ | 23150/65536 [3:58:20<7:19:04, 1.61it/s] 35%|███▌ | 23151/65536 [3:58:21<7:13:25, 1.63it/s] 35%|███▌ | 23152/65536 [3:58:22<7:10:37, 1.64it/s] 35%|███▌ | 23153/65536 [3:58:22<7:02:24, 1.67it/s] 35%|███▌ | 23154/65536 [3:58:23<6:54:46, 1.70it/s] 35%|███▌ | 23155/65536 [3:58:24<7:17:57, 1.61it/s] 35%|███▌ | 23156/65536 [3:58:24<7:29:00, 1.57it/s] 35%|███▌ | 23157/65536 [3:58:25<7:19:23, 1.61it/s] 35%|███▌ | 23158/65536 [3:58:25<7:13:17, 1.63it/s] 35%|███▌ | 23159/65536 [3:58:26<7:11:17, 1.64it/s] 35%|███▌ | 23160/65536 [3:58:27<7:15:04, 1.62it/s] {'loss': 3.1004, 'learning_rate': 6.837271948083752e-07, 'epoch': 1429.63} + 35%|███▌ | 23160/65536 [3:58:27<7:15:04, 1.62it/s] 35%|███▌ | 23161/65536 [3:58:27<7:18:57, 1.61it/s] 35%|███▌ | 23162/65536 [3:58:28<7:12:15, 1.63it/s] 35%|███▌ | 23163/65536 [3:58:28<7:13:40, 1.63it/s] 35%|███▌ | 23164/65536 [3:58:29<7:13:04, 1.63it/s] 35%|███▌ | 23165/65536 [3:58:30<7:19:45, 1.61it/s] 35%|███▌ | 23166/65536 [3:58:30<7:09:20, 1.64it/s] 35%|███▌ | 23167/65536 [3:58:31<7:23:00, 1.59it/s] 35%|███▌ | 23168/65536 [3:58:32<7:12:30, 1.63it/s] 35%|███▌ | 23169/65536 [3:58:32<7:12:56, 1.63it/s] 35%|███▌ | 23170/65536 [3:58:33<7:19:21, 1.61it/s] 35%|███▌ | 23171/65536 [3:58:33<7:17:59, 1.61it/s] 35%|███▌ | 23172/65536 [3:58:34<7:14:15, 1.63it/s] 35%|███▌ | 23173/65536 [3:58:35<7:11:08, 1.64it/s] 35%|███▌ | 23174/65536 [3:58:35<7:18:43, 1.61it/s] 35%|███▌ | 23175/65536 [3:58:36<7:12:12, 1.63it/s] 35%|███▌ | 23176/65536 [3:58:36<7:08:32, 1.65it/s] 35%|███▌ | 23177/65536 [3:58:37<6:58:47, 1.69it/s] 35%|███▌ | 23178/65536 [3:58:38<7:08:47, 1.65it/s] 35%|███▌ | 23179/65536 [3:58:38<7:15:17, 1.62it/s] 35%|███▌ | 23180/65536 [3:58:39<7:27:45, 1.58it/s] {'loss': 2.9466, 'learning_rate': 6.83451695849149e-07, 'epoch': 1430.86} + 35%|███▌ | 23180/65536 [3:58:39<7:27:45, 1.58it/s] 35%|███▌ | 23181/65536 [3:58:40<7:23:12, 1.59it/s] 35%|███▌ | 23182/65536 [3:58:40<7:14:23, 1.63it/s] 35%|███▌ | 23183/65536 [3:58:41<7:28:25, 1.57it/s] 35%|███▌ | 23184/65536 [3:58:41<7:24:48, 1.59it/s] 35%|███▌ | 23185/65536 [3:58:42<7:25:32, 1.58it/s] 35%|███▌ | 23186/65536 [3:58:43<7:18:39, 1.61it/s] 35%|███▌ | 23187/65536 [3:58:43<7:07:50, 1.65it/s] 35%|███▌ | 23188/65536 [3:58:44<7:14:06, 1.63it/s] 35%|███▌ | 23189/65536 [3:58:45<7:15:37, 1.62it/s] 35%|███▌ | 23190/65536 [3:58:45<7:15:29, 1.62it/s] 35%|███▌ | 23191/65536 [3:58:46<7:16:51, 1.62it/s] 35%|███▌ | 23192/65536 [3:58:46<7:19:17, 1.61it/s] 35%|███▌ | 23193/65536 [3:58:47<7:10:18, 1.64it/s] 35%|███▌ | 23194/65536 [3:58:48<7:03:56, 1.66it/s] 35%|███▌ | 23195/65536 [3:58:48<7:02:38, 1.67it/s] 35%|███▌ | 23196/65536 [3:58:49<7:16:31, 1.62it/s] 35%|███▌ | 23197/65536 [3:58:49<7:05:28, 1.66it/s] 35%|███▌ | 23198/65536 [3:58:50<7:12:40, 1.63it/s] 35%|███▌ | 23199/65536 [3:58:51<7:19:38, 1.60it/s] 35%|███▌ | 23200/65536 [3:58:51<7:20:57, 1.60it/s] {'loss': 3.0308, 'learning_rate': 6.831761968899229e-07, 'epoch': 1432.1} + 35%|███▌ | 23200/65536 [3:58:51<7:20:57, 1.60it/s] 35%|███▌ | 23201/65536 [3:58:52<7:14:48, 1.62it/s] 35%|███▌ | 23202/65536 [3:58:52<7:15:51, 1.62it/s] 35%|███▌ | 23203/65536 [3:58:53<7:29:49, 1.57it/s] 35%|███▌ | 23204/65536 [3:58:54<7:28:44, 1.57it/s] 35%|███▌ | 23205/65536 [3:58:54<7:25:07, 1.58it/s] 35%|███▌ | 23206/65536 [3:58:55<7:09:09, 1.64it/s] 35%|███▌ | 23207/65536 [3:58:56<7:13:57, 1.63it/s] 35%|███▌ | 23208/65536 [3:58:56<7:09:59, 1.64it/s] 35%|███▌ | 23209/65536 [3:58:57<7:12:39, 1.63it/s] 35%|███▌ | 23210/65536 [3:58:57<7:09:26, 1.64it/s] 35%|███▌ | 23211/65536 [3:58:58<7:14:20, 1.62it/s] 35%|███▌ | 23212/65536 [3:58:59<7:05:31, 1.66it/s] 35%|███▌ | 23213/65536 [3:58:59<6:55:47, 1.70it/s] 35%|███▌ | 23214/65536 [3:59:00<6:55:56, 1.70it/s] 35%|███▌ | 23215/65536 [3:59:00<7:13:34, 1.63it/s] 35%|███▌ | 23216/65536 [3:59:01<7:09:19, 1.64it/s] 35%|███▌ | 23217/65536 [3:59:02<7:12:39, 1.63it/s] 35%|███▌ | 23218/65536 [3:59:02<7:06:01, 1.66it/s] 35%|███▌ | 23219/65536 [3:59:03<7:01:47, 1.67it/s] 35%|███▌ | 23220/65536 [3:59:03<7:03:38, 1.66it/s] {'loss': 3.0082, 'learning_rate': 6.829006979306967e-07, 'epoch': 1433.33} + 35%|███▌ | 23220/65536 [3:59:03<7:03:38, 1.66it/s] 35%|███▌ | 23221/65536 [3:59:04<6:58:37, 1.68it/s] 35%|███▌ | 23222/65536 [3:59:05<6:58:52, 1.68it/s] 35%|███▌ | 23223/65536 [3:59:05<7:09:54, 1.64it/s] 35%|███▌ | 23224/65536 [3:59:06<7:13:53, 1.63it/s] 35%|███▌ | 23225/65536 [3:59:06<7:09:06, 1.64it/s] 35%|███▌ | 23226/65536 [3:59:07<7:14:21, 1.62it/s] 35%|███▌ | 23227/65536 [3:59:08<7:12:59, 1.63it/s] 35%|███▌ | 23228/65536 [3:59:08<7:15:24, 1.62it/s] 35%|███▌ | 23229/65536 [3:59:09<7:07:48, 1.65it/s] 35%|███▌ | 23230/65536 [3:59:10<7:11:43, 1.63it/s] 35%|███▌ | 23231/65536 [3:59:10<7:34:00, 1.55it/s] 35%|███▌ | 23232/65536 [3:59:11<7:23:14, 1.59it/s] 35%|███▌ | 23233/65536 [3:59:11<7:12:09, 1.63it/s] 35%|███▌ | 23234/65536 [3:59:12<6:59:16, 1.68it/s] 35%|███▌ | 23235/65536 [3:59:13<7:05:51, 1.66it/s] 35%|███▌ | 23236/65536 [3:59:13<7:10:03, 1.64it/s] 35%|███▌ | 23237/65536 [3:59:14<7:07:57, 1.65it/s] 35%|███▌ | 23238/65536 [3:59:14<7:15:26, 1.62it/s] 35%|███▌ | 23239/65536 [3:59:15<7:11:53, 1.63it/s] 35%|███▌ | 23240/65536 [3:59:16<7:02:24, 1.67it/s] {'loss': 2.9983, 'learning_rate': 6.826251989714705e-07, 'epoch': 1434.57} + 35%|███▌ | 23240/65536 [3:59:16<7:02:24, 1.67it/s] 35%|███▌ | 23241/65536 [3:59:16<7:03:54, 1.66it/s] 35%|███▌ | 23242/65536 [3:59:17<7:01:22, 1.67it/s] 35%|███▌ | 23243/65536 [3:59:17<7:03:55, 1.66it/s] 35%|███▌ | 23244/65536 [3:59:18<7:16:47, 1.61it/s] 35%|███▌ | 23245/65536 [3:59:19<7:34:10, 1.55it/s] 35%|███▌ | 23246/65536 [3:59:19<7:30:46, 1.56it/s] 35%|█���█▌ | 23247/65536 [3:59:20<7:15:27, 1.62it/s] 35%|███▌ | 23248/65536 [3:59:21<7:31:20, 1.56it/s] 35%|███▌ | 23249/65536 [3:59:21<7:19:20, 1.60it/s] 35%|███▌ | 23250/65536 [3:59:22<7:15:05, 1.62it/s] 35%|███▌ | 23251/65536 [3:59:22<7:07:00, 1.65it/s] 35%|███▌ | 23252/65536 [3:59:23<7:13:12, 1.63it/s] 35%|███▌ | 23253/65536 [3:59:24<7:14:30, 1.62it/s] 35%|███▌ | 23254/65536 [3:59:24<7:16:49, 1.61it/s] 35%|███▌ | 23255/65536 [3:59:25<7:30:29, 1.56it/s] 35%|███▌ | 23256/65536 [3:59:26<7:34:15, 1.55it/s] 35%|███▌ | 23257/65536 [3:59:26<7:30:01, 1.57it/s] 35%|███▌ | 23258/65536 [3:59:27<7:20:10, 1.60it/s] 35%|███▌ | 23259/65536 [3:59:28<7:16:42, 1.61it/s] 35%|███▌ | 23260/65536 [3:59:28<7:01:25, 1.67it/s] {'loss': 2.9632, 'learning_rate': 6.823497000122444e-07, 'epoch': 1435.8} + 35%|███▌ | 23260/65536 [3:59:28<7:01:25, 1.67it/s] 35%|███▌ | 23261/65536 [3:59:29<7:06:49, 1.65it/s] 35%|███▌ | 23262/65536 [3:59:29<7:03:54, 1.66it/s] 35%|███▌ | 23263/65536 [3:59:30<6:58:10, 1.68it/s] 35%|███▌ | 23264/65536 [3:59:31<7:16:44, 1.61it/s] 35%|███▌ | 23265/65536 [3:59:31<7:02:31, 1.67it/s] 36%|███▌ | 23266/65536 [3:59:32<6:56:04, 1.69it/s] 36%|███▌ | 23267/65536 [3:59:32<7:05:07, 1.66it/s] 36%|███▌ | 23268/65536 [3:59:33<7:02:45, 1.67it/s] 36%|███▌ | 23269/65536 [3:59:34<7:08:03, 1.65it/s] 36%|███▌ | 23270/65536 [3:59:34<7:18:12, 1.61it/s] 36%|███▌ | 23271/65536 [3:59:35<7:15:29, 1.62it/s] 36%|███▌ | 23272/65536 [3:59:35<7:23:13, 1.59it/s] 36%|███▌ | 23273/65536 [3:59:36<7:14:17, 1.62it/s] 36%|███▌ | 23274/65536 [3:59:37<7:08:22, 1.64it/s] 36%|███▌ | 23275/65536 [3:59:37<7:11:37, 1.63it/s] 36%|███▌ | 23276/65536 [3:59:38<7:04:19, 1.66it/s] 36%|███▌ | 23277/65536 [3:59:39<7:25:32, 1.58it/s] 36%|███▌ | 23278/65536 [3:59:39<7:18:35, 1.61it/s] 36%|███▌ | 23279/65536 [3:59:40<7:11:01, 1.63it/s] 36%|███▌ | 23280/65536 [3:59:40<7:25:49, 1.58it/s] {'loss': 2.986, 'learning_rate': 6.820742010530181e-07, 'epoch': 1437.04} + 36%|███▌ | 23280/65536 [3:59:40<7:25:49, 1.58it/s] 36%|███▌ | 23281/65536 [3:59:41<7:17:31, 1.61it/s] 36%|███▌ | 23282/65536 [3:59:42<7:19:41, 1.60it/s] 36%|███▌ | 23283/65536 [3:59:42<7:14:53, 1.62it/s] 36%|███▌ | 23284/65536 [3:59:43<7:08:26, 1.64it/s] 36%|███▌ | 23285/65536 [3:59:43<7:06:51, 1.65it/s] 36%|███▌ | 23286/65536 [3:59:44<7:11:46, 1.63it/s] 36%|███▌ | 23287/65536 [3:59:45<7:12:59, 1.63it/s] 36%|███▌ | 23288/65536 [3:59:45<7:19:43, 1.60it/s] 36%|███▌ | 23289/65536 [3:59:46<7:25:03, 1.58it/s] 36%|███▌ | 23290/65536 [3:59:47<7:25:13, 1.58it/s] 36%|███▌ | 23291/65536 [3:59:47<7:10:21, 1.64it/s] 36%|███▌ | 23292/65536 [3:59:48<7:19:46, 1.60it/s] 36%|███▌ | 23293/65536 [3:59:48<7:20:10, 1.60it/s] 36%|███▌ | 23294/65536 [3:59:49<7:06:15, 1.65it/s] 36%|███▌ | 23295/65536 [3:59:50<7:00:16, 1.68it/s] 36%|███▌ | 23296/65536 [3:59:50<7:08:13, 1.64it/s] 36%|███▌ | 23297/65536 [3:59:51<7:06:04, 1.65it/s] 36%|███▌ | 23298/65536 [3:59:51<7:12:28, 1.63it/s] 36%|███▌ | 23299/65536 [3:59:52<7:10:41, 1.63it/s] 36%|███▌ | 23300/65536 [3:59:53<7:01:10, 1.67it/s] {'loss': 3.0113, 'learning_rate': 6.81798702093792e-07, 'epoch': 1438.27} + 36%|███▌ | 23300/65536 [3:59:53<7:01:10, 1.67it/s] 36%|███▌ | 23301/65536 [3:59:53<7:00:07, 1.68it/s] 36%|███▌ | 23302/65536 [3:59:54<7:00:02, 1.68it/s] 36%|███▌ | 23303/65536 [3:59:54<7:00:05, 1.68it/s] 36%|███▌ | 23304/65536 [3:59:55<6:53:24, 1.70it/s] 36%|███▌ | 23305/65536 [3:59:56<7:06:49, 1.65it/s] 36%|███▌ | 23306/65536 [3:59:56<7:08:54, 1.64it/s] 36%|███▌ | 23307/65536 [3:59:57<7:06:03, 1.65it/s] 36%|███▌ | 23308/65536 [3:59:57<7:17:13, 1.61it/s] 36%|███▌ | 23309/65536 [3:59:58<7:19:27, 1.60it/s] 36%|███▌ | 23310/65536 [3:59:59<7:32:51, 1.55it/s] 36%|███▌ | 23311/65536 [3:59:59<7:29:43, 1.56it/s] 36%|███▌ | 23312/65536 [4:00:00<7:31:13, 1.56it/s] 36%|███▌ | 23313/65536 [4:00:01<7:18:43, 1.60it/s] 36%|███▌ | 23314/65536 [4:00:01<7:10:36, 1.63it/s] 36%|███▌ | 23315/65536 [4:00:02<7:18:31, 1.60it/s] 36%|███▌ | 23316/65536 [4:00:02<7:10:24, 1.63it/s] 36%|███▌ | 23317/65536 [4:00:03<7:10:37, 1.63it/s] 36%|███▌ | 23318/65536 [4:00:04<7:02:23, 1.67it/s] 36%|███▌ | 23319/65536 [4:00:04<7:01:06, 1.67it/s] 36%|███▌ | 23320/65536 [4:00:05<7:03:18, 1.66it/s] {'loss': 2.9418, 'learning_rate': 6.81523203134566e-07, 'epoch': 1439.51} + 36%|███▌ | 23320/65536 [4:00:05<7:03:18, 1.66it/s] 36%|███▌ | 23321/65536 [4:00:05<7:07:04, 1.65it/s] 36%|███▌ | 23322/65536 [4:00:06<7:29:02, 1.57it/s] 36%|███▌ | 23323/65536 [4:00:07<7:28:43, 1.57it/s] 36%|███▌ | 23324/65536 [4:00:07<7:08:38, 1.64it/s] 36%|███▌ | 23325/65536 [4:00:08<7:05:44, 1.65it/s] 36%|███▌ | 23326/65536 [4:00:09<7:15:42, 1.61it/s] 36%|███▌ | 23327/65536 [4:00:09<7:17:30, 1.61it/s] 36%|███▌ | 23328/65536 [4:00:10<7:17:46, 1.61it/s] 36%|███▌ | 23329/65536 [4:00:11<7:39:59, 1.53it/s] 36%|███▌ | 23330/65536 [4:00:11<7:33:46, 1.55it/s] 36%|███▌ | 23331/65536 [4:00:12<7:24:59, 1.58it/s] 36%|███▌ | 23332/65536 [4:00:12<7:14:39, 1.62it/s] 36%|███▌ | 23333/65536 [4:00:13<7:25:55, 1.58it/s] 36%|███▌ | 23334/65536 [4:00:14<7:18:10, 1.61it/s] 36%|███▌ | 23335/65536 [4:00:14<7:09:55, 1.64it/s] 36%|███▌ | 23336/65536 [4:00:15<7:13:10, 1.62it/s] 36%|███▌ | 23337/65536 [4:00:15<7:07:26, 1.65it/s] 36%|███▌ | 23338/65536 [4:00:16<7:08:45, 1.64it/s] 36%|███▌ | 23339/65536 [4:00:17<7:19:47, 1.60it/s] 36%|███▌ | 23340/65536 [4:00:17<7:11:40, 1.63it/s] {'loss': 2.9326, 'learning_rate': 6.812477041753397e-07, 'epoch': 1440.74} + 36%|███▌ | 23340/65536 [4:00:17<7:11:40, 1.63it/s] 36%|███▌ | 23341/65536 [4:00:18<7:03:35, 1.66it/s] 36%|███▌ | 23342/65536 [4:00:19<7:14:43, 1.62it/s] 36%|███▌ | 23343/65536 [4:00:19<7:05:32, 1.65it/s] 36%|███▌ | 23344/65536 [4:00:20<7:07:01, 1.65it/s] 36%|███▌ | 23345/65536 [4:00:20<7:18:28, 1.60it/s] 36%|███▌ | 23346/65536 [4:00:21<7:06:03, 1.65it/s] 36%|███▌ | 23347/65536 [4:00:22<7:11:08, 1.63it/s] 36%|███▌ | 23348/65536 [4:00:22<7:02:01, 1.67it/s] 36%|███▌ | 23349/65536 [4:00:23<7:07:14, 1.65it/s] 36%|███▌ | 23350/65536 [4:00:23<7:09:23, 1.64it/s] 36%|███▌ | 23351/65536 [4:00:24<7:10:28, 1.63it/s] 36%|███▌ | 23352/65536 [4:00:25<7:11:49, 1.63it/s] 36%|███▌ | 23353/65536 [4:00:25<7:08:34, 1.64it/s] 36%|███▌ | 23354/65536 [4:00:26<7:21:45, 1.59it/s] 36%|███▌ | 23355/65536 [4:00:27<7:10:44, 1.63it/s] 36%|███▌ | 23356/65536 [4:00:27<7:27:02, 1.57it/s] 36%|███▌ | 23357/65536 [4:00:28<7:24:51, 1.58it/s] 36%|███▌ | 23358/65536 [4:00:28<7:21:26, 1.59it/s] 36%|███▌ | 23359/65536 [4:00:29<7:06:00, 1.65it/s] 36%|███▌ | 23360/65536 [4:00:30<7:03:37, 1.66it/s] {'loss': 2.9788, 'learning_rate': 6.809722052161136e-07, 'epoch': 1441.98} + 36%|███▌ | 23360/65536 [4:00:30<7:03:37, 1.66it/s] 36%|███▌ | 23361/65536 [4:00:30<7:19:43, 1.60it/s] 36%|███▌ | 23362/65536 [4:00:31<7:05:18, 1.65it/s] 36%|███▌ | 23363/65536 [4:00:31<7:08:59, 1.64it/s] 36%|███▌ | 23364/65536 [4:00:32<7:03:53, 1.66it/s] 36%|███▌ | 23365/65536 [4:00:33<7:07:15, 1.64it/s] 36%|███▌ | 23366/65536 [4:00:33<7:24:29, 1.58it/s] 36%|███▌ | 23367/65536 [4:00:34<7:22:55, 1.59it/s] 36%|███▌ | 23368/65536 [4:00:35<7:15:34, 1.61it/s] 36%|███▌ | 23369/65536 [4:00:35<7:17:58, 1.60it/s] 36%|███▌ | 23370/65536 [4:00:36<7:23:36, 1.58it/s] 36%|███▌ | 23371/65536 [4:00:37<7:32:21, 1.55it/s] 36%|███▌ | 23372/65536 [4:00:37<7:13:16, 1.62it/s] 36%|███▌ | 23373/65536 [4:00:38<7:05:55, 1.65it/s] 36%|███▌ | 23374/65536 [4:00:38<6:57:02, 1.68it/s] 36%|███▌ | 23375/65536 [4:00:39<6:58:47, 1.68it/s] 36%|███▌ | 23376/65536 [4:00:39<6:56:28, 1.69it/s] 36%|███▌ | 23377/65536 [4:00:40<7:09:26, 1.64it/s] 36%|███▌ | 23378/65536 [4:00:41<7:11:48, 1.63it/s] 36%|███▌ | 23379/65536 [4:00:41<7:14:27, 1.62it/s] 36%|███▌ | 23380/65536 [4:00:42<7:23:55, 1.58it/s] {'loss': 3.0468, 'learning_rate': 6.806967062568874e-07, 'epoch': 1443.21} + 36%|███▌ | 23380/65536 [4:00:42<7:23:55, 1.58it/s] 36%|███▌ | 23381/65536 [4:00:43<7:13:37, 1.62it/s] 36%|███▌ | 23382/65536 [4:00:43<7:07:19, 1.64it/s] 36%|███▌ | 23383/65536 [4:00:44<7:05:53, 1.65it/s] 36%|███▌ | 23384/65536 [4:00:44<7:05:39, 1.65it/s] 36%|███▌ | 23385/65536 [4:00:45<7:08:55, 1.64it/s] 36%|███▌ | 23386/65536 [4:00:46<7:21:21, 1.59it/s] 36%|███▌ | 23387/65536 [4:00:46<7:13:36, 1.62it/s] 36%|███▌ | 23388/65536 [4:00:47<7:12:59, 1.62it/s] 36%|███▌ | 23389/65536 [4:00:47<7:06:57, 1.65it/s] 36%|███▌ | 23390/65536 [4:00:48<7:13:02, 1.62it/s] 36%|███▌ | 23391/65536 [4:00:49<7:05:48, 1.65it/s] 36%|███▌ | 23392/65536 [4:00:49<7:12:45, 1.62it/s] 36%|███▌ | 23393/65536 [4:00:50<7:21:13, 1.59it/s] 36%|███▌ | 23394/65536 [4:00:51<7:11:45, 1.63it/s] 36%|███▌ | 23395/65536 [4:00:51<7:13:52, 1.62it/s] 36%|███▌ | 23396/65536 [4:00:52<7:11:24, 1.63it/s] 36%|███▌ | 23397/65536 [4:00:52<7:05:27, 1.65it/s] 36%|███▌ | 23398/65536 [4:00:53<7:10:38, 1.63it/s] 36%|███▌ | 23399/65536 [4:00:54<6:59:46, 1.67it/s] 36%|███▌ | 23400/65536 [4:00:54<6:59:37, 1.67it/s] {'loss': 3.0129, 'learning_rate': 6.804212072976613e-07, 'epoch': 1444.44} + 36%|███▌ | 23400/65536 [4:00:54<6:59:37, 1.67it/s] 36%|███▌ | 23401/65536 [4:00:55<7:02:20, 1.66it/s] 36%|███▌ | 23402/65536 [4:00:55<7:06:27, 1.65it/s] 36%|███▌ | 23403/65536 [4:00:56<7:07:43, 1.64it/s] 36%|███▌ | 23404/65536 [4:00:57<7:14:17, 1.62it/s] 36%|███▌ | 23405/65536 [4:00:57<7:10:59, 1.63it/s] 36%|███▌ | 23406/65536 [4:00:58<7:19:36, 1.60it/s] 36%|███▌ | 23407/65536 [4:00:58<7:05:48, 1.65it/s] 36%|███▌ | 23408/65536 [4:00:59<7:17:24, 1.61it/s] 36%|███▌ | 23409/65536 [4:01:00<7:19:56, 1.60it/s] 36%|███▌ | 23410/65536 [4:01:00<7:20:48, 1.59it/s] 36%|███▌ | 23411/65536 [4:01:01<7:10:57, 1.63it/s] 36%|███▌ | 23412/65536 [4:01:02<7:10:19, 1.63it/s] 36%|███▌ | 23413/65536 [4:01:02<7:03:00, 1.66it/s] 36%|███▌ | 23414/65536 [4:01:03<6:58:17, 1.68it/s] 36%|███▌ | 23415/65536 [4:01:03<6:55:19, 1.69it/s] 36%|███▌ | 23416/65536 [4:01:04<6:59:28, 1.67it/s] 36%|███▌ | 23417/65536 [4:01:05<7:01:27, 1.67it/s] 36%|███▌ | 23418/65536 [4:01:05<7:14:01, 1.62it/s] 36%|███▌ | 23419/65536 [4:01:06<7:15:20, 1.61it/s] 36%|███▌ | 23420/65536 [4:01:06<7:11:36, 1.63it/s] {'loss': 2.9964, 'learning_rate': 6.801457083384351e-07, 'epoch': 1445.68} + 36%|███▌ | 23420/65536 [4:01:06<7:11:36, 1.63it/s] 36%|███▌ | 23421/65536 [4:01:07<7:20:22, 1.59it/s] 36%|███▌ | 23422/65536 [4:01:08<7:22:22, 1.59it/s] 36%|███▌ | 23423/65536 [4:01:08<7:20:00, 1.60it/s] 36%|███▌ | 23424/65536 [4:01:09<7:21:28, 1.59it/s] 36%|███▌ | 23425/65536 [4:01:10<7:14:07, 1.62it/s] 36%|███▌ | 23426/65536 [4:01:10<7:23:21, 1.58it/s] 36%|███▌ | 23427/65536 [4:01:11<7:15:56, 1.61it/s] 36%|███▌ | 23428/65536 [4:01:11<7:12:33, 1.62it/s] 36%|███▌ | 23429/65536 [4:01:12<7:15:11, 1.61it/s] 36%|███▌ | 23430/65536 [4:01:13<7:15:14, 1.61it/s] 36%|███▌ | 23431/65536 [4:01:13<7:18:34, 1.60it/s] 36%|███▌ | 23432/65536 [4:01:14<7:24:16, 1.58it/s] 36%|███▌ | 23433/65536 [4:01:15<7:23:23, 1.58it/s] 36%|███▌ | 23434/65536 [4:01:15<7:21:33, 1.59it/s] 36%|███▌ | 23435/65536 [4:01:16<7:19:14, 1.60it/s] 36%|███▌ | 23436/65536 [4:01:16<7:16:41, 1.61it/s] 36%|███▌ | 23437/65536 [4:01:17<7:13:54, 1.62it/s] 36%|███▌ | 23438/65536 [4:01:18<7:04:46, 1.65it/s] 36%|███▌ | 23439/65536 [4:01:18<7:09:36, 1.63it/s] 36%|███▌ | 23440/65536 [4:01:19<7:01:04, 1.67it/s] {'loss': 3.0094, 'learning_rate': 6.79870209379209e-07, 'epoch': 1446.91} + 36%|███▌ | 23440/65536 [4:01:19<7:01:04, 1.67it/s] 36%|███▌ | 23441/65536 [4:01:19<6:54:25, 1.69it/s] 36%|███▌ | 23442/65536 [4:01:20<7:18:50, 1.60it/s] 36%|███▌ | 23443/65536 [4:01:21<7:09:59, 1.63it/s] 36%|███▌ | 23444/65536 [4:01:21<7:06:36, 1.64it/s] 36%|███▌ | 23445/65536 [4:01:22<7:01:08, 1.67it/s] 36%|███▌ | 23446/65536 [4:01:22<7:03:30, 1.66it/s] 36%|███▌ | 23447/65536 [4:01:23<7:04:31, 1.65it/s] 36%|███▌ | 23448/65536 [4:01:24<6:57:55, 1.68it/s] 36%|███▌ | 23449/65536 [4:01:24<7:01:14, 1.67it/s] 36%|███▌ | 23450/65536 [4:01:25<6:56:56, 1.68it/s] 36%|███▌ | 23451/65536 [4:01:25<6:58:39, 1.68it/s] 36%|███▌ | 23452/65536 [4:01:26<7:13:14, 1.62it/s] 36%|███▌ | 23453/65536 [4:01:27<7:16:43, 1.61it/s] 36%|███▌ | 23454/65536 [4:01:27<7:10:57, 1.63it/s] 36%|███▌ | 23455/65536 [4:01:28<7:13:29, 1.62it/s] 36%|███▌ | 23456/65536 [4:01:29<7:20:37, 1.59it/s] 36%|███▌ | 23457/65536 [4:01:29<7:09:53, 1.63it/s] 36%|███▌ | 23458/65536 [4:01:30<7:41:40, 1.52it/s] 36%|███▌ | 23459/65536 [4:01:31<7:28:54, 1.56it/s] 36%|███▌ | 23460/65536 [4:01:31<7:19:45, 1.59it/s] {'loss': 2.9579, 'learning_rate': 6.795947104199829e-07, 'epoch': 1448.15} + 36%|███▌ | 23460/65536 [4:01:31<7:19:45, 1.59it/s] 36%|███▌ | 23461/65536 [4:01:32<7:18:32, 1.60it/s] 36%|███▌ | 23462/65536 [4:01:32<7:23:03, 1.58it/s] 36%|███▌ | 23463/65536 [4:01:33<7:13:11, 1.62it/s] 36%|███▌ | 23464/65536 [4:01:34<7:13:56, 1.62it/s] 36%|███▌ | 23465/65536 [4:01:34<7:17:47, 1.60it/s] 36%|███▌ | 23466/65536 [4:01:35<7:09:19, 1.63it/s] 36%|███▌ | 23467/65536 [4:01:36<7:21:00, 1.59it/s] 36%|███▌ | 23468/65536 [4:01:36<7:11:02, 1.63it/s] 36%|███▌ | 23469/65536 [4:01:37<7:06:52, 1.64it/s] 36%|███▌ | 23470/65536 [4:01:37<6:56:51, 1.68it/s] 36%|███▌ | 23471/65536 [4:01:38<6:48:16, 1.72it/s] 36%|███▌ | 23472/65536 [4:01:38<7:03:28, 1.66it/s] 36%|███▌ | 23473/65536 [4:01:39<6:55:33, 1.69it/s] 36%|███▌ | 23474/65536 [4:01:40<7:16:46, 1.61it/s] 36%|███▌ | 23475/65536 [4:01:40<7:14:13, 1.61it/s] 36%|███▌ | 23476/65536 [4:01:41<7:11:55, 1.62it/s] 36%|███▌ | 23477/65536 [4:01:42<7:19:20, 1.60it/s] 36%|███▌ | 23478/65536 [4:01:42<7:14:19, 1.61it/s] 36%|███▌ | 23479/65536 [4:01:43<7:18:41, 1.60it/s] 36%|███▌ | 23480/65536 [4:01:43<7:12:23, 1.62it/s] {'loss': 3.0166, 'learning_rate': 6.793192114607567e-07, 'epoch': 1449.38} + 36%|███▌ | 23480/65536 [4:01:43<7:12:23, 1.62it/s] 36%|███▌ | 23481/65536 [4:01:44<6:59:35, 1.67it/s] 36%|███▌ | 23482/65536 [4:01:45<7:11:05, 1.63it/s] 36%|███▌ | 23483/65536 [4:01:45<7:09:35, 1.63it/s] 36%|███▌ | 23484/65536 [4:01:46<7:12:00, 1.62it/s] 36%|███▌ | 23485/65536 [4:01:47<7:12:39, 1.62it/s] 36%|███▌ | 23486/65536 [4:01:47<7:04:05, 1.65it/s] 36%|███▌ | 23487/65536 [4:01:48<6:49:46, 1.71it/s] 36%|███▌ | 23488/65536 [4:01:48<6:51:48, 1.70it/s] 36%|███▌ | 23489/65536 [4:01:49<6:57:07, 1.68it/s] 36%|███▌ | 23490/65536 [4:01:49<7:09:03, 1.63it/s] 36%|███▌ | 23491/65536 [4:01:50<7:19:55, 1.59it/s] 36%|███▌ | 23492/65536 [4:01:51<7:26:28, 1.57it/s] 36%|███▌ | 23493/65536 [4:01:51<7:17:08, 1.60it/s] 36%|███▌ | 23494/65536 [4:01:52<7:07:39, 1.64it/s] 36%|███▌ | 23495/65536 [4:01:53<7:12:00, 1.62it/s] 36%|███▌ | 23496/65536 [4:01:53<7:10:38, 1.63it/s] 36%|███▌ | 23497/65536 [4:01:54<7:05:00, 1.65it/s] 36%|███▌ | 23498/65536 [4:01:54<7:04:23, 1.65it/s] 36%|███▌ | 23499/65536 [4:01:55<7:05:24, 1.65it/s] 36%|███▌ | 23500/65536 [4:01:56<7:01:57, 1.66it/s] {'loss': 3.0441, 'learning_rate': 6.790437125015305e-07, 'epoch': 1450.62} + 36%|███▌ | 23500/65536 [4:01:56<7:01:57, 1.66it/s] 36%|███▌ | 23501/65536 [4:01:56<6:55:04, 1.69it/s] 36%|███▌ | 23502/65536 [4:01:57<7:06:14, 1.64it/s] 36%|███▌ | 23503/65536 [4:01:58<7:23:37, 1.58it/s] 36%|███▌ | 23504/65536 [4:01:58<7:20:26, 1.59it/s] 36%|███▌ | 23505/65536 [4:01:59<7:15:57, 1.61it/s] 36%|███▌ | 23506/65536 [4:01:59<7:11:56, 1.62it/s] 36%|███▌ | 23507/65536 [4:02:00<7:19:18, 1.59it/s] 36%|███▌ | 23508/65536 [4:02:01<7:27:58, 1.56it/s] 36%|███▌ | 23509/65536 [4:02:01<7:18:22, 1.60it/s] 36%|███▌ | 23510/65536 [4:02:02<7:11:41, 1.62it/s] 36%|███▌ | 23511/65536 [4:02:02<7:13:20, 1.62it/s] 36%|███▌ | 23512/65536 [4:02:03<7:09:00, 1.63it/s] 36%|███▌ | 23513/65536 [4:02:04<7:02:58, 1.66it/s] 36%|███▌ | 23514/65536 [4:02:04<7:01:05, 1.66it/s] 36%|███▌ | 23515/65536 [4:02:05<7:12:57, 1.62it/s] 36%|███▌ | 23516/65536 [4:02:05<6:56:28, 1.68it/s] 36%|███▌ | 23517/65536 [4:02:06<7:05:28, 1.65it/s] 36%|███▌ | 23518/65536 [4:02:07<7:10:21, 1.63it/s] 36%|███▌ | 23519/65536 [4:02:07<7:19:49, 1.59it/s] 36%|███▌ | 23520/65536 [4:02:08<7:09:55, 1.63it/s] {'loss': 2.9766, 'learning_rate': 6.787682135423043e-07, 'epoch': 1451.85} + 36%|███▌ | 23520/65536 [4:02:08<7:09:55, 1.63it/s] 36%|███▌ | 23521/65536 [4:02:09<7:13:15, 1.62it/s] 36%|███▌ | 23522/65536 [4:02:09<7:13:41, 1.61it/s] 36%|███▌ | 23523/65536 [4:02:10<7:24:40, 1.57it/s] 36%|███▌ | 23524/65536 [4:02:11<7:23:08, 1.58it/s] 36%|███▌ | 23525/65536 [4:02:11<7:16:26, 1.60it/s] 36%|███▌ | 23526/65536 [4:02:12<7:20:12, 1.59it/s] 36%|███▌ | 23527/65536 [4:02:12<7:10:19, 1.63it/s] 36%|███▌ | 23528/65536 [4:02:13<6:54:27, 1.69it/s] 36%|███▌ | 23529/65536 [4:02:14<7:00:12, 1.67it/s] 36%|███▌ | 23530/65536 [4:02:14<6:58:59, 1.67it/s] 36%|███▌ | 23531/65536 [4:02:15<6:55:56, 1.68it/s] 36%|███▌ | 23532/65536 [4:02:15<6:59:06, 1.67it/s] 36%|███▌ | 23533/65536 [4:02:16<7:06:56, 1.64it/s] 36%|███▌ | 23534/65536 [4:02:17<7:04:26, 1.65it/s] 36%|███▌ | 23535/65536 [4:02:17<7:04:12, 1.65it/s] 36%|███▌ | 23536/65536 [4:02:18<7:08:30, 1.63it/s] 36%|███▌ | 23537/65536 [4:02:18<7:10:13, 1.63it/s] 36%|███▌ | 23538/65536 [4:02:19<7:04:40, 1.65it/s] 36%|███▌ | 23539/65536 [4:02:20<7:16:34, 1.60it/s] 36%|███▌ | 23540/65536 [4:02:20<7:11:24, 1.62it/s] {'loss': 3.0122, 'learning_rate': 6.784927145830782e-07, 'epoch': 1453.09} + 36%|███▌ | 23540/65536 [4:02:20<7:11:24, 1.62it/s] 36%|███▌ | 23541/65536 [4:02:21<7:12:05, 1.62it/s] 36%|███▌ | 23542/65536 [4:02:21<7:07:25, 1.64it/s] 36%|███▌ | 23543/65536 [4:02:22<7:03:58, 1.65it/s] 36%|███▌ | 23544/65536 [4:02:23<7:05:44, 1.64it/s] 36%|███▌ | 23545/65536 [4:02:23<6:58:57, 1.67it/s] 36%|███▌ | 23546/65536 [4:02:24<6:57:27, 1.68it/s] 36%|███▌ | 23547/65536 [4:02:24<6:48:12, 1.71it/s] 36%|███▌ | 23548/65536 [4:02:25<6:51:00, 1.70it/s] 36%|███▌ | 23549/65536 [4:02:26<7:04:14, 1.65it/s] 36%|███▌ | 23550/65536 [4:02:26<7:16:54, 1.60it/s] 36%|███▌ | 23551/65536 [4:02:27<7:08:08, 1.63it/s] 36%|███▌ | 23552/65536 [4:02:27<6:54:02, 1.69it/s] 36%|███▌ | 23553/65536 [4:02:28<7:05:24, 1.64it/s] 36%|███▌ | 23554/65536 [4:02:29<7:09:39, 1.63it/s] 36%|███▌ | 23555/65536 [4:02:29<7:18:42, 1.59it/s] 36%|███▌ | 23556/65536 [4:02:30<7:11:06, 1.62it/s] 36%|███▌ | 23557/65536 [4:02:31<7:07:03, 1.64it/s] 36%|███▌ | 23558/65536 [4:02:31<7:04:20, 1.65it/s] 36%|███▌ | 23559/65536 [4:02:32<7:04:39, 1.65it/s] 36%|███▌ | 23560/65536 [4:02:32<6:55:24, 1.68it/s] {'loss': 3.0115, 'learning_rate': 6.782172156238519e-07, 'epoch': 1454.32} + 36%|███▌ | 23560/65536 [4:02:32<6:55:24, 1.68it/s] 36%|███▌ | 23561/65536 [4:02:33<7:03:38, 1.65it/s] 36%|███▌ | 23562/65536 [4:02:34<6:57:49, 1.67it/s] 36%|███▌ | 23563/65536 [4:02:34<6:54:43, 1.69it/s] 36%|███▌ | 23564/65536 [4:02:35<6:47:50, 1.72it/s] 36%|███▌ | 23565/65536 [4:02:35<6:49:44, 1.71it/s] 36%|███▌ | 23566/65536 [4:02:36<7:20:23, 1.59it/s] 36%|███▌ | 23567/65536 [4:02:37<7:14:46, 1.61it/s] 36%|███▌ | 23568/65536 [4:02:37<7:11:30, 1.62it/s] 36%|███▌ | 23569/65536 [4:02:38<7:06:46, 1.64it/s] 36%|███▌ | 23570/65536 [4:02:38<7:21:07, 1.59it/s] 36%|███▌ | 23571/65536 [4:02:39<7:18:25, 1.60it/s] 36%|███▌ | 23572/65536 [4:02:40<7:37:49, 1.53it/s] 36%|███▌ | 23573/65536 [4:02:40<7:25:37, 1.57it/s] 36%|███▌ | 23574/65536 [4:02:41<7:18:02, 1.60it/s] 36%|███▌ | 23575/65536 [4:02:42<7:14:26, 1.61it/s] 36%|███▌ | 23576/65536 [4:02:42<7:05:37, 1.64it/s] 36%|███▌ | 23577/65536 [4:02:43<7:00:37, 1.66it/s] 36%|███▌ | 23578/65536 [4:02:43<7:10:30, 1.62it/s] 36%|███▌ | 23579/65536 [4:02:44<7:08:07, 1.63it/s] 36%|███▌ | 23580/65536 [4:02:45<7:17:33, 1.60it/s] {'loss': 3.0546, 'learning_rate': 6.779417166646258e-07, 'epoch': 1455.56} + 36%|███▌ | 23580/65536 [4:02:45<7:17:33, 1.60it/s] 36%|███▌ | 23581/65536 [4:02:45<7:06:53, 1.64it/s] 36%|███▌ | 23582/65536 [4:02:46<7:08:10, 1.63it/s] 36%|███▌ | 23583/65536 [4:02:46<7:02:02, 1.66it/s] 36%|███▌ | 23584/65536 [4:02:47<7:00:34, 1.66it/s] 36%|███▌ | 23585/65536 [4:02:48<7:09:59, 1.63it/s] 36%|███▌ | 23586/65536 [4:02:48<7:21:55, 1.58it/s] 36%|███▌ | 23587/65536 [4:02:49<7:15:39, 1.60it/s] 36%|███▌ | 23588/65536 [4:02:50<7:28:37, 1.56it/s] 36%|███▌ | 23589/65536 [4:02:50<7:24:01, 1.57it/s] 36%|███▌ | 23590/65536 [4:02:51<7:11:24, 1.62it/s] 36%|███▌ | 23591/65536 [4:02:51<7:04:59, 1.64it/s] 36%|███▌ | 23592/65536 [4:02:52<7:06:20, 1.64it/s] 36%|███▌ | 23593/65536 [4:02:53<6:56:04, 1.68it/s] 36%|███▌ | 23594/65536 [4:02:53<6:55:15, 1.68it/s] 36%|███▌ | 23595/65536 [4:02:54<7:04:20, 1.65it/s] 36%|███▌ | 23596/65536 [4:02:54<6:55:50, 1.68it/s] 36%|███▌ | 23597/65536 [4:02:55<6:53:13, 1.69it/s] 36%|███▌ | 23598/65536 [4:02:56<7:15:51, 1.60it/s] 36%|███▌ | 23599/65536 [4:02:56<7:13:29, 1.61it/s] 36%|███▌ | 23600/65536 [4:02:57<7:04:06, 1.65it/s] {'loss': 2.9454, 'learning_rate': 6.776662177053997e-07, 'epoch': 1456.79} + 36%|███▌ | 23600/65536 [4:02:57<7:04:06, 1.65it/s] 36%|███▌ | 23601/65536 [4:02:57<7:04:32, 1.65it/s] 36%|███▌ | 23602/65536 [4:02:58<6:57:36, 1.67it/s] 36%|███▌ | 23603/65536 [4:02:59<7:11:32, 1.62it/s] 36%|███▌ | 23604/65536 [4:02:59<7:28:16, 1.56it/s] 36%|███▌ | 23605/65536 [4:03:00<7:21:18, 1.58it/s] 36%|███▌ | 23606/65536 [4:03:01<7:16:12, 1.60it/s] 36%|███▌ | 23607/65536 [4:03:01<7:11:14, 1.62it/s] 36%|███▌ | 23608/65536 [4:03:02<6:58:34, 1.67it/s] 36%|███▌ | 23609/65536 [4:03:02<6:59:37, 1.67it/s] 36%|███▌ | 23610/65536 [4:03:03<7:01:48, 1.66it/s] 36%|███▌ | 23611/65536 [4:03:04<7:01:33, 1.66it/s] 36%|███▌ | 23612/65536 [4:03:04<6:55:34, 1.68it/s] 36%|███▌ | 23613/65536 [4:03:05<7:03:19, 1.65it/s] 36%|███▌ | 23614/65536 [4:03:05<6:58:55, 1.67it/s] 36%|███▌ | 23615/65536 [4:03:06<7:14:05, 1.61it/s] 36%|███▌ | 23616/65536 [4:03:07<7:20:29, 1.59it/s] 36%|███▌ | 23617/65536 [4:03:07<7:14:46, 1.61it/s] 36%|███▌ | 23618/65536 [4:03:08<7:22:33, 1.58it/s] 36%|███▌ | 23619/65536 [4:03:09<7:16:52, 1.60it/s] 36%|███▌ | 23620/65536 [4:03:09<7:24:11, 1.57it/s] {'loss': 3.0118, 'learning_rate': 6.773907187461735e-07, 'epoch': 1458.02} + 36%|███▌ | 23620/65536 [4:03:09<7:24:11, 1.57it/s] 36%|███▌ | 23621/65536 [4:03:10<7:12:29, 1.62it/s] 36%|███▌ | 23622/65536 [4:03:10<7:04:04, 1.65it/s] 36%|███▌ | 23623/65536 [4:03:11<6:56:57, 1.68it/s] 36%|███▌ | 23624/65536 [4:03:12<6:57:08, 1.67it/s] 36%|███▌ | 23625/65536 [4:03:12<7:03:26, 1.65it/s] 36%|███▌ | 23626/65536 [4:03:13<7:03:23, 1.65it/s] 36%|███▌ | 23627/65536 [4:03:13<7:09:24, 1.63it/s] 36%|███▌ | 23628/65536 [4:03:14<7:02:25, 1.65it/s] 36%|███▌ | 23629/65536 [4:03:15<7:03:57, 1.65it/s] 36%|███▌ | 23630/65536 [4:03:15<7:04:51, 1.64it/s] 36%|███▌ | 23631/65536 [4:03:16<7:14:23, 1.61it/s] 36%|███▌ | 23632/65536 [4:03:17<7:10:56, 1.62it/s] 36%|███▌ | 23633/65536 [4:03:17<7:07:26, 1.63it/s] 36%|███▌ | 23634/65536 [4:03:18<7:07:12, 1.63it/s] 36%|███▌ | 23635/65536 [4:03:18<7:13:55, 1.61it/s] 36%|███▌ | 23636/65536 [4:03:19<7:24:55, 1.57it/s] 36%|███▌ | 23637/65536 [4:03:20<7:29:35, 1.55it/s] 36%|███▌ | 23638/65536 [4:03:20<7:27:40, 1.56it/s] 36%|███▌ | 23639/65536 [4:03:21<7:18:38, 1.59it/s] 36%|███▌ | 23640/65536 [4:03:22<7:16:34, 1.60it/s] {'loss': 3.0519, 'learning_rate': 6.771152197869475e-07, 'epoch': 1459.26} + 36%|███▌ | 23640/65536 [4:03:22<7:16:34, 1.60it/s] 36%|███▌ | 23641/65536 [4:03:22<7:14:49, 1.61it/s] 36%|███▌ | 23642/65536 [4:03:23<7:13:02, 1.61it/s] 36%|███▌ | 23643/65536 [4:03:23<7:04:46, 1.64it/s] 36%|███▌ | 23644/65536 [4:03:24<7:11:00, 1.62it/s] 36%|███▌ | 23645/65536 [4:03:25<7:16:11, 1.60it/s] 36%|███▌ | 23646/65536 [4:03:25<7:11:55, 1.62it/s] 36%|███▌ | 23647/65536 [4:03:26<7:14:54, 1.61it/s] 36%|███▌ | 23648/65536 [4:03:27<7:13:18, 1.61it/s] 36%|███▌ | 23649/65536 [4:03:27<7:03:09, 1.65it/s] 36%|███▌ | 23650/65536 [4:03:28<7:06:07, 1.64it/s] 36%|███▌ | 23651/65536 [4:03:28<7:09:49, 1.62it/s] 36%|███▌ | 23652/65536 [4:03:29<7:05:03, 1.64it/s] 36%|███▌ | 23653/65536 [4:03:30<7:33:01, 1.54it/s] 36%|███▌ | 23654/65536 [4:03:30<7:14:12, 1.61it/s] 36%|███▌ | 23655/65536 [4:03:31<7:10:47, 1.62it/s] 36%|███▌ | 23656/65536 [4:03:31<7:10:29, 1.62it/s] 36%|███▌ | 23657/65536 [4:03:32<7:12:53, 1.61it/s] 36%|███▌ | 23658/65536 [4:03:33<7:10:50, 1.62it/s] 36%|███▌ | 23659/65536 [4:03:33<7:04:39, 1.64it/s] 36%|███▌ | 23660/65536 [4:03:34<7:06:41, 1.64it/s] {'loss': 2.9304, 'learning_rate': 6.768397208277213e-07, 'epoch': 1460.49} + 36%|███▌ | 23660/65536 [4:03:34<7:06:41, 1.64it/s] 36%|███▌ | 23661/65536 [4:03:34<7:04:10, 1.65it/s] 36%|███▌ | 23662/65536 [4:03:35<7:04:33, 1.64it/s] 36%|███▌ | 23663/65536 [4:03:36<7:10:56, 1.62it/s] 36%|███▌ | 23664/65536 [4:03:36<7:15:30, 1.60it/s] 36%|███▌ | 23665/65536 [4:03:37<7:04:59, 1.64it/s] 36%|███▌ | 23666/65536 [4:03:38<7:13:35, 1.61it/s] 36%|███▌ | 23667/65536 [4:03:38<7:03:51, 1.65it/s] 36%|███▌ | 23668/65536 [4:03:39<6:52:44, 1.69it/s] 36%|███▌ | 23669/65536 [4:03:39<7:01:02, 1.66it/s] 36%|███▌ | 23670/65536 [4:03:40<7:06:56, 1.63it/s] 36%|███▌ | 23671/65536 [4:03:41<7:04:03, 1.65it/s] 36%|███▌ | 23672/65536 [4:03:41<7:00:22, 1.66it/s] 36%|███▌ | 23673/65536 [4:03:42<7:01:46, 1.65it/s] 36%|███▌ | 23674/65536 [4:03:42<7:02:16, 1.65it/s] 36%|███▌ | 23675/65536 [4:03:43<6:59:43, 1.66it/s] 36%|███▌ | 23676/65536 [4:03:44<7:00:00, 1.66it/s] 36%|███▌ | 23677/65536 [4:03:44<6:59:19, 1.66it/s] 36%|███▌ | 23678/65536 [4:03:45<6:58:13, 1.67it/s] 36%|███▌ | 23679/65536 [4:03:45<7:02:52, 1.65it/s] 36%|███▌ | 23680/65536 [4:03:46<7:24:59, 1.57it/s] {'loss': 2.9727, 'learning_rate': 6.765642218684952e-07, 'epoch': 1461.73} + 36%|███▌ | 23680/65536 [4:03:46<7:24:59, 1.57it/s] 36%|███▌ | 23681/65536 [4:03:47<7:19:16, 1.59it/s] 36%|███▌ | 23682/65536 [4:03:47<7:20:44, 1.58it/s] 36%|███▌ | 23683/65536 [4:03:48<7:10:54, 1.62it/s] 36%|███▌ | 23684/65536 [4:03:49<7:08:54, 1.63it/s] 36%|███▌ | 23685/65536 [4:03:49<7:29:50, 1.55it/s] 36%|███▌ | 23686/65536 [4:03:50<7:20:18, 1.58it/s] 36%|███▌ | 23687/65536 [4:03:50<7:15:41, 1.60it/s] 36%|███▌ | 23688/65536 [4:03:51<7:09:44, 1.62it/s] 36%|███▌ | 23689/65536 [4:03:52<7:01:44, 1.65it/s] 36%|███▌ | 23690/65536 [4:03:52<7:07:50, 1.63it/s] 36%|███▌ | 23691/65536 [4:03:53<7:05:24, 1.64it/s] 36%|███▌ | 23692/65536 [4:03:53<6:59:32, 1.66it/s] 36%|███▌ | 23693/65536 [4:03:54<7:01:52, 1.65it/s] 36%|███▌ | 23694/65536 [4:03:55<6:58:06, 1.67it/s] 36%|███▌ | 23695/65536 [4:03:55<6:52:02, 1.69it/s] 36%|███▌ | 23696/65536 [4:03:56<6:49:57, 1.70it/s] 36%|███▌ | 23697/65536 [4:03:56<6:52:58, 1.69it/s] 36%|███▌ | 23698/65536 [4:03:57<6:58:02, 1.67it/s] 36%|███▌ | 23699/65536 [4:03:58<7:17:55, 1.59it/s] 36%|███▌ | 23700/65536 [4:03:58<7:28:57, 1.55it/s] {'loss': 2.9836, 'learning_rate': 6.762887229092691e-07, 'epoch': 1462.96} + 36%|███▌ | 23700/65536 [4:03:58<7:28:57, 1.55it/s] 36%|███▌ | 23701/65536 [4:03:59<7:36:36, 1.53it/s] 36%|███▌ | 23702/65536 [4:04:00<7:25:15, 1.57it/s] 36%|███▌ | 23703/65536 [4:04:00<7:20:59, 1.58it/s] 36%|███▌ | 23704/65536 [4:04:01<7:18:10, 1.59it/s] 36%|███▌ | 23705/65536 [4:04:02<7:17:43, 1.59it/s] 36%|███▌ | 23706/65536 [4:04:02<7:12:43, 1.61it/s] 36%|███▌ | 23707/65536 [4:04:03<7:06:46, 1.63it/s] 36%|███▌ | 23708/65536 [4:04:03<7:14:48, 1.60it/s] 36%|███▌ | 23709/65536 [4:04:04<7:15:17, 1.60it/s] 36%|███▌ | 23710/65536 [4:04:05<7:16:43, 1.60it/s] 36%|███▌ | 23711/65536 [4:04:05<7:08:53, 1.63it/s] 36%|███▌ | 23712/65536 [4:04:06<6:59:09, 1.66it/s] 36%|███▌ | 23713/65536 [4:04:06<6:54:46, 1.68it/s] 36%|███▌ | 23714/65536 [4:04:07<6:59:21, 1.66it/s] 36%|███▌ | 23715/65536 [4:04:08<7:00:32, 1.66it/s] 36%|███▌ | 23716/65536 [4:04:08<7:03:01, 1.65it/s] 36%|███▌ | 23717/65536 [4:04:09<7:18:11, 1.59it/s] 36%|███▌ | 23718/65536 [4:04:10<7:17:54, 1.59it/s] 36%|███▌ | 23719/65536 [4:04:10<7:12:08, 1.61it/s] 36%|██��▌ | 23720/65536 [4:04:11<7:09:52, 1.62it/s] {'loss': 3.0426, 'learning_rate': 6.760132239500429e-07, 'epoch': 1464.2} + 36%|███▌ | 23720/65536 [4:04:11<7:09:52, 1.62it/s] 36%|███▌ | 23721/65536 [4:04:11<7:06:34, 1.63it/s] 36%|███▌ | 23722/65536 [4:04:12<7:08:44, 1.63it/s] 36%|███▌ | 23723/65536 [4:04:13<7:16:18, 1.60it/s] 36%|███▌ | 23724/65536 [4:04:13<7:24:59, 1.57it/s] 36%|███▌ | 23725/65536 [4:04:14<7:16:29, 1.60it/s] 36%|███▌ | 23726/65536 [4:04:15<7:17:31, 1.59it/s] 36%|███▌ | 23727/65536 [4:04:15<7:11:16, 1.62it/s] 36%|███▌ | 23728/65536 [4:04:16<7:02:01, 1.65it/s] 36%|███▌ | 23729/65536 [4:04:16<6:57:55, 1.67it/s] 36%|███▌ | 23730/65536 [4:04:17<6:56:17, 1.67it/s] 36%|███▌ | 23731/65536 [4:04:18<7:00:38, 1.66it/s] 36%|███▌ | 23732/65536 [4:04:18<6:57:13, 1.67it/s] 36%|███▌ | 23733/65536 [4:04:19<7:00:55, 1.66it/s] 36%|███▌ | 23734/65536 [4:04:19<7:16:34, 1.60it/s] 36%|███▌ | 23735/65536 [4:04:20<7:07:44, 1.63it/s] 36%|███▌ | 23736/65536 [4:04:21<6:57:56, 1.67it/s] 36%|███▌ | 23737/65536 [4:04:21<6:51:24, 1.69it/s] 36%|███▌ | 23738/65536 [4:04:22<6:57:38, 1.67it/s] 36%|███▌ | 23739/65536 [4:04:22<7:02:34, 1.65it/s] 36%|███▌ | 23740/65536 [4:04:23<6:59:00, 1.66it/s] {'loss': 3.0034, 'learning_rate': 6.757377249908168e-07, 'epoch': 1465.43} + 36%|███▌ | 23740/65536 [4:04:23<6:59:00, 1.66it/s] 36%|███▌ | 23741/65536 [4:04:24<7:05:25, 1.64it/s] 36%|███▌ | 23742/65536 [4:04:24<7:06:04, 1.63it/s] 36%|███▌ | 23743/65536 [4:04:25<6:59:45, 1.66it/s] 36%|███▌ | 23744/65536 [4:04:25<7:06:54, 1.63it/s] 36%|███▌ | 23745/65536 [4:04:26<7:15:29, 1.60it/s] 36%|███▌ | 23746/65536 [4:04:27<7:09:42, 1.62it/s] 36%|███▌ | 23747/65536 [4:04:27<7:09:51, 1.62it/s] 36%|███▌ | 23748/65536 [4:04:28<7:21:27, 1.58it/s] 36%|███▌ | 23749/65536 [4:04:29<7:08:39, 1.62it/s] 36%|███▌ | 23750/65536 [4:04:29<7:19:14, 1.59it/s] 36%|███▌ | 23751/65536 [4:04:30<7:13:31, 1.61it/s] 36%|███▌ | 23752/65536 [4:04:30<7:17:38, 1.59it/s] 36%|███▌ | 23753/65536 [4:04:31<7:11:41, 1.61it/s] 36%|███▌ | 23754/65536 [4:04:32<7:11:18, 1.61it/s] 36%|███▌ | 23755/65536 [4:04:32<7:07:21, 1.63it/s] 36%|███▌ | 23756/65536 [4:04:33<7:15:37, 1.60it/s] 36%|███▋ | 23757/65536 [4:04:34<7:24:49, 1.57it/s] 36%|███▋ | 23758/65536 [4:04:34<7:13:03, 1.61it/s] 36%|███▋ | 23759/65536 [4:04:35<7:01:08, 1.65it/s] 36%|███▋ | 23760/65536 [4:04:35<7:06:19, 1.63it/s] {'loss': 2.9911, 'learning_rate': 6.754622260315905e-07, 'epoch': 1466.67} + 36%|███▋ | 23760/65536 [4:04:35<7:06:19, 1.63it/s] 36%|███▋ | 23761/65536 [4:04:36<7:17:25, 1.59it/s] 36%|███▋ | 23762/65536 [4:04:37<7:05:49, 1.63it/s] 36%|███▋ | 23763/65536 [4:04:37<6:58:08, 1.67it/s] 36%|███▋ | 23764/65536 [4:04:38<7:01:46, 1.65it/s] 36%|███▋ | 23765/65536 [4:04:38<6:55:24, 1.68it/s] 36%|███▋ | 23766/65536 [4:04:39<7:08:50, 1.62it/s] 36%|███▋ | 23767/65536 [4:04:40<7:12:20, 1.61it/s] 36%|███▋ | 23768/65536 [4:04:40<6:58:11, 1.66it/s] 36%|███▋ | 23769/65536 [4:04:41<6:57:40, 1.67it/s] 36%|███▋ | 23770/65536 [4:04:41<7:06:37, 1.63it/s] 36%|███▋ | 23771/65536 [4:04:42<7:04:10, 1.64it/s] 36%|███▋ | 23772/65536 [4:04:43<7:09:00, 1.62it/s] 36%|███▋ | 23773/65536 [4:04:43<7:04:00, 1.64it/s] 36%|███▋ | 23774/65536 [4:04:44<7:05:29, 1.64it/s] 36%|███▋ | 23775/65536 [4:04:45<7:19:20, 1.58it/s] 36%|███▋ | 23776/65536 [4:04:45<7:02:14, 1.65it/s] 36%|███▋ | 23777/65536 [4:04:46<7:04:43, 1.64it/s] 36%|███▋ | 23778/65536 [4:04:46<7:00:32, 1.65it/s] 36%|███▋ | 23779/65536 [4:04:47<6:58:09, 1.66it/s] 36%|███▋ | 23780/65536 [4:04:48<6:58:57, 1.66it/s] {'loss': 2.9945, 'learning_rate': 6.751867270723644e-07, 'epoch': 1467.9} + 36%|███▋ | 23780/65536 [4:04:48<6:58:57, 1.66it/s] 36%|███▋ | 23781/65536 [4:04:48<7:12:12, 1.61it/s] 36%|███▋ | 23782/65536 [4:04:49<7:23:17, 1.57it/s] 36%|███▋ | 23783/65536 [4:04:50<7:28:16, 1.55it/s] 36%|███▋ | 23784/65536 [4:04:50<7:12:52, 1.61it/s] 36%|███▋ | 23785/65536 [4:04:51<7:08:25, 1.62it/s] 36%|███▋ | 23786/65536 [4:04:51<7:08:45, 1.62it/s] 36%|███▋ | 23787/65536 [4:04:52<7:03:51, 1.64it/s] 36%|███▋ | 23788/65536 [4:04:53<7:02:54, 1.65it/s] 36%|███▋ | 23789/65536 [4:04:53<7:12:31, 1.61it/s] 36%|███▋ | 23790/65536 [4:04:54<7:06:52, 1.63it/s] 36%|███▋ | 23791/65536 [4:04:54<6:58:54, 1.66it/s] 36%|███▋ | 23792/65536 [4:04:55<7:00:04, 1.66it/s] 36%|███▋ | 23793/65536 [4:04:56<7:12:52, 1.61it/s] 36%|███▋ | 23794/65536 [4:04:56<7:05:21, 1.64it/s] 36%|███▋ | 23795/65536 [4:04:57<6:58:49, 1.66it/s] 36%|███▋ | 23796/65536 [4:04:57<6:49:57, 1.70it/s] 36%|███▋ | 23797/65536 [4:04:58<7:04:00, 1.64it/s] 36%|███▋ | 23798/65536 [4:04:59<7:27:46, 1.55it/s] 36%|███▋ | 23799/65536 [4:04:59<7:25:49, 1.56it/s] 36%|███▋ | 23800/65536 [4:05:00<7:20:46, 1.58it/s] {'loss': 2.9749, 'learning_rate': 6.749112281131383e-07, 'epoch': 1469.14} + 36%|███▋ | 23800/65536 [4:05:00<7:20:46, 1.58it/s] 36%|███▋ | 23801/65536 [4:05:01<7:22:32, 1.57it/s] 36%|███▋ | 23802/65536 [4:05:01<7:13:40, 1.60it/s] 36%|███▋ | 23803/65536 [4:05:02<7:00:43, 1.65it/s] 36%|███▋ | 23804/65536 [4:05:02<7:19:14, 1.58it/s] 36%|███▋ | 23805/65536 [4:05:03<7:18:39, 1.59it/s] 36%|███▋ | 23806/65536 [4:05:04<7:12:04, 1.61it/s] 36%|███▋ | 23807/65536 [4:05:04<7:07:47, 1.63it/s] 36%|███▋ | 23808/65536 [4:05:05<7:04:54, 1.64it/s] 36%|███▋ | 23809/65536 [4:05:06<7:06:49, 1.63it/s] 36%|███▋ | 23810/65536 [4:05:06<7:02:29, 1.65it/s] 36%|███▋ | 23811/65536 [4:05:07<6:57:31, 1.67it/s] 36%|███▋ | 23812/65536 [4:05:07<6:58:19, 1.66it/s] 36%|███▋ | 23813/65536 [4:05:08<6:55:03, 1.68it/s] 36%|███▋ | 23814/65536 [4:05:08<6:49:26, 1.70it/s] 36%|███▋ | 23815/65536 [4:05:09<7:08:03, 1.62it/s] 36%|███▋ | 23816/65536 [4:05:10<7:01:32, 1.65it/s] 36%|███▋ | 23817/65536 [4:05:10<6:58:32, 1.66it/s] 36%|███▋ | 23818/65536 [4:05:11<7:14:45, 1.60it/s] 36%|███▋ | 23819/65536 [4:05:12<7:13:40, 1.60it/s] 36%|███▋ | 23820/65536 [4:05:12<7:06:19, 1.63it/s] {'loss': 2.9992, 'learning_rate': 6.74635729153912e-07, 'epoch': 1470.37} + 36%|███▋ | 23820/65536 [4:05:12<7:06:19, 1.63it/s] 36%|███▋ | 23821/65536 [4:05:13<7:11:05, 1.61it/s] 36%|███▋ | 23822/65536 [4:05:13<7:02:07, 1.65it/s] 36%|███▋ | 23823/65536 [4:05:14<6:59:12, 1.66it/s] 36%|███▋ | 23824/65536 [4:05:15<6:53:32, 1.68it/s] 36%|███▋ | 23825/65536 [4:05:15<6:44:30, 1.72it/s] 36%|███▋ | 23826/65536 [4:05:16<6:58:43, 1.66it/s] 36%|███▋ | 23827/65536 [4:05:16<6:53:24, 1.68it/s] 36%|███▋ | 23828/65536 [4:05:17<7:09:55, 1.62it/s] 36%|███▋ | 23829/65536 [4:05:18<6:59:39, 1.66it/s] 36%|███▋ | 23830/65536 [4:05:18<7:03:01, 1.64it/s] 36%|███▋ | 23831/65536 [4:05:19<7:16:47, 1.59it/s] 36%|███▋ | 23832/65536 [4:05:19<7:04:43, 1.64it/s] 36%|███▋ | 23833/65536 [4:05:20<7:01:06, 1.65it/s] 36%|███▋ | 23834/65536 [4:05:21<7:01:52, 1.65it/s] 36%|███▋ | 23835/65536 [4:05:21<6:55:37, 1.67it/s] 36%|███▋ | 23836/65536 [4:05:22<6:53:47, 1.68it/s] 36%|███▋ | 23837/65536 [4:05:23<7:18:47, 1.58it/s] 36%|███▋ | 23838/65536 [4:05:23<7:10:00, 1.62it/s] 36%|███▋ | 23839/65536 [4:05:24<7:01:41, 1.65it/s] 36%|███▋ | 23840/65536 [4:05:24<7:05:16, 1.63it/s] {'loss': 2.9742, 'learning_rate': 6.743602301946859e-07, 'epoch': 1471.6} + 36%|███▋ | 23840/65536 [4:05:24<7:05:16, 1.63it/s] 36%|███▋ | 23841/65536 [4:05:25<6:56:27, 1.67it/s] 36%|███▋ | 23842/65536 [4:05:26<7:11:06, 1.61it/s] 36%|███▋ | 23843/65536 [4:05:26<7:10:22, 1.61it/s] 36%|███▋ | 23844/65536 [4:05:27<7:04:42, 1.64it/s] 36%|███▋ | 23845/65536 [4:05:27<7:04:41, 1.64it/s] 36%|███▋ | 23846/65536 [4:05:28<7:07:54, 1.62it/s] 36%|███▋ | 23847/65536 [4:05:29<7:43:21, 1.50it/s] 36%|███▋ | 23848/65536 [4:05:29<7:27:06, 1.55it/s] 36%|███▋ | 23849/65536 [4:05:30<7:10:35, 1.61it/s] 36%|███▋ | 23850/65536 [4:05:31<7:01:49, 1.65it/s] 36%|███▋ | 23851/65536 [4:05:31<7:02:58, 1.64it/s] 36%|███▋ | 23852/65536 [4:05:32<7:07:47, 1.62it/s] 36%|███▋ | 23853/65536 [4:05:32<6:56:14, 1.67it/s] 36%|���██▋ | 23854/65536 [4:05:33<7:01:17, 1.65it/s] 36%|███▋ | 23855/65536 [4:05:34<6:57:51, 1.66it/s] 36%|███▋ | 23856/65536 [4:05:34<6:47:36, 1.70it/s] 36%|███▋ | 23857/65536 [4:05:35<6:57:35, 1.66it/s] 36%|███▋ | 23858/65536 [4:05:35<7:03:59, 1.64it/s] 36%|███▋ | 23859/65536 [4:05:36<7:13:47, 1.60it/s] 36%|███▋ | 23860/65536 [4:05:37<7:06:49, 1.63it/s] {'loss': 3.0011, 'learning_rate': 6.740847312354597e-07, 'epoch': 1472.84} + 36%|███▋ | 23860/65536 [4:05:37<7:06:49, 1.63it/s] 36%|███▋ | 23861/65536 [4:05:37<7:08:14, 1.62it/s] 36%|███▋ | 23862/65536 [4:05:38<7:03:22, 1.64it/s] 36%|███▋ | 23863/65536 [4:05:39<7:17:03, 1.59it/s] 36%|███▋ | 23864/65536 [4:05:39<7:22:09, 1.57it/s] 36%|███▋ | 23865/65536 [4:05:40<7:27:10, 1.55it/s] 36%|███▋ | 23866/65536 [4:05:40<7:14:16, 1.60it/s] 36%|███▋ | 23867/65536 [4:05:41<7:08:11, 1.62it/s] 36%|███▋ | 23868/65536 [4:05:42<6:59:37, 1.65it/s] 36%|███▋ | 23869/65536 [4:05:42<6:50:58, 1.69it/s] 36%|███▋ | 23870/65536 [4:05:43<7:04:08, 1.64it/s] 36%|███▋ | 23871/65536 [4:05:43<7:01:36, 1.65it/s] 36%|███▋ | 23872/65536 [4:05:44<6:53:19, 1.68it/s] 36%|███▋ | 23873/65536 [4:05:45<6:50:15, 1.69it/s] 36%|███▋ | 23874/65536 [4:05:45<6:58:42, 1.66it/s] 36%|███▋ | 23875/65536 [4:05:46<7:03:44, 1.64it/s] 36%|███▋ | 23876/65536 [4:05:46<6:59:45, 1.65it/s] 36%|███▋ | 23877/65536 [4:05:47<7:04:21, 1.64it/s] 36%|███▋ | 23878/65536 [4:05:48<7:03:10, 1.64it/s] 36%|███▋ | 23879/65536 [4:05:48<7:28:25, 1.55it/s] 36%|███▋ | 23880/65536 [4:05:49<7:23:54, 1.56it/s] {'loss': 2.9287, 'learning_rate': 6.738092322762336e-07, 'epoch': 1474.07} + 36%|███▋ | 23880/65536 [4:05:49<7:23:54, 1.56it/s] 36%|███▋ | 23881/65536 [4:05:50<7:11:34, 1.61it/s] 36%|███▋ | 23882/65536 [4:05:50<7:06:07, 1.63it/s] 36%|███▋ | 23883/65536 [4:05:51<7:10:06, 1.61it/s] 36%|███▋ | 23884/65536 [4:05:51<7:05:27, 1.63it/s] 36%|███▋ | 23885/65536 [4:05:52<6:56:58, 1.66it/s] 36%|███▋ | 23886/65536 [4:05:53<7:08:09, 1.62it/s] 36%|███▋ | 23887/65536 [4:05:53<7:03:27, 1.64it/s] 36%|███▋ | 23888/65536 [4:05:54<7:04:10, 1.64it/s] 36%|███▋ | 23889/65536 [4:05:54<7:02:14, 1.64it/s] 36%|███▋ | 23890/65536 [4:05:55<7:04:06, 1.64it/s] 36%|███▋ | 23891/65536 [4:05:56<7:25:05, 1.56it/s] 36%|███▋ | 23892/65536 [4:05:56<7:14:13, 1.60it/s] 36%|███▋ | 23893/65536 [4:05:57<7:14:52, 1.60it/s] 36%|███▋ | 23894/65536 [4:05:58<7:07:07, 1.62it/s] 36%|███▋ | 23895/65536 [4:05:58<7:16:27, 1.59it/s] 36%|███▋ | 23896/65536 [4:05:59<7:37:00, 1.52it/s] 36%|███▋ | 23897/65536 [4:06:00<7:26:23, 1.55it/s] 36%|███▋ | 23898/65536 [4:06:00<7:22:06, 1.57it/s] 36%|███▋ | 23899/65536 [4:06:01<7:10:57, 1.61it/s] 36%|███▋ | 23900/65536 [4:06:01<6:59:42, 1.65it/s] {'loss': 2.9665, 'learning_rate': 6.735337333170074e-07, 'epoch': 1475.31} + 36%|███▋ | 23900/65536 [4:06:01<6:59:42, 1.65it/s] 36%|███▋ | 23901/65536 [4:06:02<7:03:40, 1.64it/s] 36%|███▋ | 23902/65536 [4:06:03<7:00:23, 1.65it/s] 36%|███▋ | 23903/65536 [4:06:03<6:53:26, 1.68it/s] 36%|███▋ | 23904/65536 [4:06:04<6:54:12, 1.68it/s] 36%|███▋ | 23905/65536 [4:06:04<7:00:06, 1.65it/s] 36%|███▋ | 23906/65536 [4:06:05<7:02:00, 1.64it/s] 36%|███▋ | 23907/65536 [4:06:06<7:03:08, 1.64it/s] 36%|███▋ | 23908/65536 [4:06:06<6:56:25, 1.67it/s] 36%|███▋ | 23909/65536 [4:06:07<6:59:55, 1.65it/s] 36%|███▋ | 23910/65536 [4:06:07<7:05:45, 1.63it/s] 36%|███▋ | 23911/65536 [4:06:08<6:55:12, 1.67it/s] 36%|███▋ | 23912/65536 [4:06:09<7:11:24, 1.61it/s] 36%|███▋ | 23913/65536 [4:06:09<7:05:57, 1.63it/s] 36%|███▋ | 23914/65536 [4:06:10<7:01:16, 1.65it/s] 36%|███▋ | 23915/65536 [4:06:10<7:00:13, 1.65it/s] 36%|███▋ | 23916/65536 [4:06:11<7:15:25, 1.59it/s] 36%|███▋ | 23917/65536 [4:06:12<7:03:29, 1.64it/s] 36%|███▋ | 23918/65536 [4:06:12<6:56:07, 1.67it/s] 36%|███▋ | 23919/65536 [4:06:13<6:54:31, 1.67it/s] 36%|███▋ | 23920/65536 [4:06:13<6:56:15, 1.67it/s] {'loss': 2.973, 'learning_rate': 6.732582343577813e-07, 'epoch': 1476.54} + 36%|███▋ | 23920/65536 [4:06:13<6:56:15, 1.67it/s] 37%|███▋ | 23921/65536 [4:06:14<7:00:35, 1.65it/s] 37%|███▋ | 23922/65536 [4:06:15<7:03:29, 1.64it/s] 37%|███▋ | 23923/65536 [4:06:15<7:13:32, 1.60it/s] 37%|███▋ | 23924/65536 [4:06:16<7:05:56, 1.63it/s] 37%|███▋ | 23925/65536 [4:06:17<6:57:53, 1.66it/s] 37%|███▋ | 23926/65536 [4:06:17<7:10:45, 1.61it/s] 37%|███▋ | 23927/65536 [4:06:18<7:01:10, 1.65it/s] 37%|███▋ | 23928/65536 [4:06:18<7:22:08, 1.57it/s] 37%|███▋ | 23929/65536 [4:06:19<7:14:32, 1.60it/s] 37%|███▋ | 23930/65536 [4:06:20<7:12:09, 1.60it/s] 37%|███▋ | 23931/65536 [4:06:20<7:08:57, 1.62it/s] 37%|███▋ | 23932/65536 [4:06:21<7:03:04, 1.64it/s] 37%|███▋ | 23933/65536 [4:06:22<7:05:56, 1.63it/s] 37%|███▋ | 23934/65536 [4:06:22<6:57:09, 1.66it/s] 37%|███▋ | 23935/65536 [4:06:23<6:54:50, 1.67it/s] 37%|███▋ | 23936/65536 [4:06:23<7:00:10, 1.65it/s] 37%|███▋ | 23937/65536 [4:06:24<7:10:10, 1.61it/s] 37%|███▋ | 23938/65536 [4:06:25<7:11:06, 1.61it/s] 37%|███▋ | 23939/65536 [4:06:25<6:59:10, 1.65it/s] 37%|███▋ | 23940/65536 [4:06:26<6:58:22, 1.66it/s] {'loss': 2.9416, 'learning_rate': 6.729827353985553e-07, 'epoch': 1477.78} + 37%|███▋ | 23940/65536 [4:06:26<6:58:22, 1.66it/s] 37%|███▋ | 23941/65536 [4:06:26<7:16:20, 1.59it/s] 37%|███▋ | 23942/65536 [4:06:27<7:13:13, 1.60it/s] 37%|███▋ | 23943/65536 [4:06:28<7:30:57, 1.54it/s] 37%|███▋ | 23944/65536 [4:06:28<7:50:34, 1.47it/s] 37%|███▋ | 23945/65536 [4:06:29<7:44:12, 1.49it/s] 37%|███▋ | 23946/65536 [4:06:30<7:27:25, 1.55it/s] 37%|███▋ | 23947/65536 [4:06:30<7:23:23, 1.56it/s] 37%|███▋ | 23948/65536 [4:06:31<7:23:06, 1.56it/s] 37%|███▋ | 23949/65536 [4:06:32<7:10:25, 1.61it/s] 37%|███▋ | 23950/65536 [4:06:32<7:09:11, 1.61it/s] 37%|███▋ | 23951/65536 [4:06:33<7:15:30, 1.59it/s] 37%|███▋ | 23952/65536 [4:06:33<6:54:28, 1.67it/s] 37%|███▋ | 23953/65536 [4:06:34<6:45:56, 1.71it/s] 37%|███▋ | 23954/65536 [4:06:34<6:42:18, 1.72it/s] 37%|███▋ | 23955/65536 [4:06:35<6:48:16, 1.70it/s] 37%|███▋ | 23956/65536 [4:06:36<6:48:14, 1.70it/s] 37%|███▋ | 23957/65536 [4:06:36<6:50:54, 1.69it/s] 37%|███▋ | 23958/65536 [4:06:37<6:46:05, 1.71it/s] 37%|███▋ | 23959/65536 [4:06:38<7:01:25, 1.64it/s] 37%|███▋ | 23960/65536 [4:06:38<7:15:30, 1.59it/s] {'loss': 3.0906, 'learning_rate': 6.72707236439329e-07, 'epoch': 1479.01} + 37%|███▋ | 23960/65536 [4:06:38<7:15:30, 1.59it/s] 37%|███▋ | 23961/65536 [4:06:39<7:11:58, 1.60it/s] 37%|███▋ | 23962/65536 [4:06:39<6:59:26, 1.65it/s] 37%|███▋ | 23963/65536 [4:06:40<7:03:55, 1.63it/s] 37%|███▋ | 23964/65536 [4:06:41<7:01:49, 1.64it/s] 37%|███▋ | 23965/65536 [4:06:41<7:08:29, 1.62it/s] 37%|███▋ | 23966/65536 [4:06:42<7:15:20, 1.59it/s] 37%|███▋ | 23967/65536 [4:06:42<7:07:25, 1.62it/s] 37%|███▋ | 23968/65536 [4:06:43<6:59:10, 1.65it/s] 37%|███▋ | 23969/65536 [4:06:44<7:00:43, 1.65it/s] 37%|███▋ | 23970/65536 [4:06:44<7:02:14, 1.64it/s] 37%|███▋ | 23971/65536 [4:06:45<6:59:00, 1.65it/s] 37%|███▋ | 23972/65536 [4:06:46<7:04:09, 1.63it/s] 37%|███▋ | 23973/65536 [4:06:46<6:58:04, 1.66it/s] 37%|███▋ | 23974/65536 [4:06:47<7:06:45, 1.62it/s] 37%|███▋ | 23975/65536 [4:06:47<7:07:56, 1.62it/s] 37%|███▋ | 23976/65536 [4:06:48<7:07:14, 1.62it/s] 37%|███▋ | 23977/65536 [4:06:49<7:17:34, 1.58it/s] 37%|███▋ | 23978/65536 [4:06:49<7:10:26, 1.61it/s] 37%|███▋ | 23979/65536 [4:06:50<7:16:53, 1.59it/s] 37%|███▋ | 23980/65536 [4:06:50<7:08:09, 1.62it/s] {'loss': 2.9303, 'learning_rate': 6.724317374801029e-07, 'epoch': 1480.25} + 37%|███▋ | 23980/65536 [4:06:50<7:08:09, 1.62it/s] 37%|███▋ | 23981/65536 [4:06:51<7:00:47, 1.65it/s] 37%|███▋ | 23982/65536 [4:06:52<7:00:23, 1.65it/s] 37%|███▋ | 23983/65536 [4:06:52<7:02:20, 1.64it/s] 37%|███▋ | 23984/65536 [4:06:53<7:02:11, 1.64it/s] 37%|███▋ | 23985/65536 [4:06:54<7:18:29, 1.58it/s] 37%|███▋ | 23986/65536 [4:06:54<7:04:08, 1.63it/s] 37%|███▋ | 23987/65536 [4:06:55<7:10:29, 1.61it/s] 37%|███▋ | 23988/65536 [4:06:55<7:09:28, 1.61it/s] 37%|███▋ | 23989/65536 [4:06:56<7:08:03, 1.62it/s] 37%|███▋ | 23990/65536 [4:06:57<7:01:58, 1.64it/s] 37%|███▋ | 23991/65536 [4:06:57<7:00:01, 1.65it/s] 37%|███▋ | 23992/65536 [4:06:58<6:51:47, 1.68it/s] 37%|███▋ | 23993/65536 [4:06:58<7:12:20, 1.60it/s] 37%|███▋ | 23994/65536 [4:06:59<7:31:00, 1.54it/s] 37%|███▋ | 23995/65536 [4:07:00<7:36:42, 1.52it/s] 37%|███▋ | 23996/65536 [4:07:00<7:23:30, 1.56it/s] 37%|███▋ | 23997/65536 [4:07:01<7:19:01, 1.58it/s] 37%|███▋ | 23998/65536 [4:07:02<7:17:28, 1.58it/s] 37%|███▋ | 23999/65536 [4:07:02<7:07:56, 1.62it/s] 37%|███▋ | 24000/65536 [4:07:03<6:57:09, 1.66it/s] {'loss': 2.9875, 'learning_rate': 6.721562385208767e-07, 'epoch': 1481.48} + 37%|███▋ | 24000/65536 [4:07:03<6:57:09, 1.66it/s] 37%|███▋ | 24001/65536 [4:07:03<6:51:15, 1.68it/s] 37%|███▋ | 24002/65536 [4:07:04<6:50:17, 1.69it/s] 37%|███▋ | 24003/65536 [4:07:05<6:48:53, 1.69it/s] 37%|███▋ | 24004/65536 [4:07:05<6:49:57, 1.69it/s] 37%|███▋ | 24005/65536 [4:07:06<6:55:32, 1.67it/s] 37%|███▋ | 24006/65536 [4:07:06<6:59:23, 1.65it/s] 37%|███▋ | 24007/65536 [4:07:07<6:55:30, 1.67it/s] 37%|███▋ | 24008/65536 [4:07:08<7:10:05, 1.61it/s] 37%|███▋ | 24009/65536 [4:07:08<7:15:04, 1.59it/s] 37%|███▋ | 24010/65536 [4:07:09<7:13:10, 1.60it/s] 37%|███▋ | 24011/65536 [4:07:10<7:09:35, 1.61it/s] 37%|███▋ | 24012/65536 [4:07:10<7:02:41, 1.64it/s] 37%|███▋ | 24013/65536 [4:07:11<7:03:25, 1.63it/s] 37%|███▋ | 24014/65536 [4:07:11<7:07:41, 1.62it/s] 37%|███▋ | 24015/65536 [4:07:12<7:07:46, 1.62it/s] 37%|███▋ | 24016/65536 [4:07:13<7:12:38, 1.60it/s] 37%|███▋ | 24017/65536 [4:07:13<7:05:38, 1.63it/s] 37%|███▋ | 24018/65536 [4:07:14<7:03:20, 1.63it/s] 37%|███▋ | 24019/65536 [4:07:14<7:03:13, 1.63it/s] 37%|███▋ | 24020/65536 [4:07:15<6:53:36, 1.67it/s] {'loss': 2.9577, 'learning_rate': 6.718807395616506e-07, 'epoch': 1482.72} + 37%|███▋ | 24020/65536 [4:07:15<6:53:36, 1.67it/s] 37%|███▋ | 24021/65536 [4:07:16<6:56:09, 1.66it/s] 37%|███▋ | 24022/65536 [4:07:16<6:50:24, 1.69it/s] 37%|███▋ | 24023/65536 [4:07:17<6:47:47, 1.70it/s] 37%|███▋ | 24024/65536 [4:07:17<7:01:17, 1.64it/s] 37%|███▋ | 24025/65536 [4:07:18<7:19:14, 1.58it/s] 37%|███▋ | 24026/65536 [4:07:19<7:02:04, 1.64it/s] 37%|███▋ | 24027/65536 [4:07:19<6:55:50, 1.66it/s] 37%|███▋ | 24028/65536 [4:07:20<7:00:57, 1.64it/s] 37%|███▋ | 24029/65536 [4:07:21<7:11:51, 1.60it/s] 37%|███▋ | 24030/65536 [4:07:21<7:12:03, 1.60it/s] 37%|███▋ | 24031/65536 [4:07:22<7:01:31, 1.64it/s] 37%|███▋ | 24032/65536 [4:07:22<7:04:41, 1.63it/s] 37%|███▋ | 24033/65536 [4:07:23<6:57:17, 1.66it/s] 37%|███▋ | 24034/65536 [4:07:24<6:54:38, 1.67it/s] 37%|███▋ | 24035/65536 [4:07:24<7:10:58, 1.60it/s] 37%|███▋ | 24036/65536 [4:07:25<7:00:51, 1.64it/s] 37%|███▋ | 24037/65536 [4:07:25<6:54:37, 1.67it/s] 37%|███▋ | 24038/65536 [4:07:26<6:50:55, 1.68it/s] 37%|███▋ | 24039/65536 [4:07:27<7:08:14, 1.61it/s] 37%|███▋ | 24040/65536 [4:07:27<6:57:03, 1.66it/s] {'loss': 3.0099, 'learning_rate': 6.716052406024243e-07, 'epoch': 1483.95} + 37%|███▋ | 24040/65536 [4:07:27<6:57:03, 1.66it/s] 37%|███▋ | 24041/65536 [4:07:28<7:12:39, 1.60it/s] 37%|███▋ | 24042/65536 [4:07:28<7:03:45, 1.63it/s] 37%|███▋ | 24043/65536 [4:07:29<6:56:20, 1.66it/s] 37%|███▋ | 24044/65536 [4:07:30<7:01:52, 1.64it/s] 37%|███▋ | 24045/65536 [4:07:30<7:06:25, 1.62it/s] 37%|███▋ | 24046/65536 [4:07:31<7:07:28, 1.62it/s] 37%|███▋ | 24047/65536 [4:07:32<7:10:17, 1.61it/s] 37%|███▋ | 24048/65536 [4:07:32<7:09:42, 1.61it/s] 37%|███▋ | 24049/65536 [4:07:33<7:07:47, 1.62it/s] 37%|███▋ | 24050/65536 [4:07:33<6:58:50, 1.65it/s] 37%|███▋ | 24051/65536 [4:07:34<7:16:28, 1.58it/s] 37%|███▋ | 24052/65536 [4:07:35<7:10:47, 1.60it/s] 37%|███▋ | 24053/65536 [4:07:35<7:01:23, 1.64it/s] 37%|███▋ | 24054/65536 [4:07:36<7:03:34, 1.63it/s] 37%|███▋ | 24055/65536 [4:07:36<6:53:34, 1.67it/s] 37%|███▋ | 24056/65536 [4:07:37<7:07:59, 1.62it/s] 37%|███▋ | 24057/65536 [4:07:38<7:16:07, 1.59it/s] 37%|███▋ | 24058/65536 [4:07:38<7:35:11, 1.52it/s] 37%|███▋ | 24059/65536 [4:07:39<7:27:00, 1.55it/s] 37%|███▋ | 24060/65536 [4:07:40<7:10:19, 1.61it/s] {'loss': 2.9635, 'learning_rate': 6.713297416431982e-07, 'epoch': 1485.19} + 37%|███▋ | 24060/65536 [4:07:40<7:10:19, 1.61it/s] 37%|███▋ | 24061/65536 [4:07:40<7:10:17, 1.61it/s] 37%|███▋ | 24062/65536 [4:07:41<7:17:34, 1.58it/s] 37%|███▋ | 24063/65536 [4:07:42<7:10:24, 1.61it/s] 37%|███▋ | 24064/65536 [4:07:42<7:04:11, 1.63it/s] 37%|███▋ | 24065/65536 [4:07:43<6:56:15, 1.66it/s] 37%|███▋ | 24066/65536 [4:07:43<6:57:44, 1.65it/s] 37%|███▋ | 24067/65536 [4:07:44<7:05:02, 1.63it/s] 37%|███▋ | 24068/65536 [4:07:45<6:55:58, 1.66it/s] 37%|███▋ | 24069/65536 [4:07:45<6:54:56, 1.67it/s] 37%|███▋ | 24070/65536 [4:07:46<6:53:10, 1.67it/s] 37%|███▋ | 24071/65536 [4:07:46<6:47:01, 1.70it/s] 37%|███▋ | 24072/65536 [4:07:47<6:49:27, 1.69it/s] 37%|███▋ | 24073/65536 [4:07:48<6:57:51, 1.65it/s] 37%|███▋ | 24074/65536 [4:07:48<7:14:30, 1.59it/s] 37%|███▋ | 24075/65536 [4:07:49<7:20:30, 1.57it/s] 37%|███▋ | 24076/65536 [4:07:49<7:16:53, 1.58it/s] 37%|███▋ | 24077/65536 [4:07:50<7:22:25, 1.56it/s] 37%|███▋ | 24078/65536 [4:07:51<7:09:01, 1.61it/s] 37%|███▋ | 24079/65536 [4:07:51<7:03:28, 1.63it/s] 37%|███▋ | 24080/65536 [4:07:52<7:05:33, 1.62it/s] {'loss': 3.0368, 'learning_rate': 6.710542426839721e-07, 'epoch': 1486.42} + 37%|███▋ | 24080/65536 [4:07:52<7:05:33, 1.62it/s] 37%|███▋ | 24081/65536 [4:07:53<7:00:01, 1.64it/s] 37%|███▋ | 24082/65536 [4:07:53<6:55:34, 1.66it/s] 37%|███▋ | 24083/65536 [4:07:54<6:51:27, 1.68it/s] 37%|███▋ | 24084/65536 [4:07:54<6:51:37, 1.68it/s] 37%|███▋ | 24085/65536 [4:07:55<6:51:17, 1.68it/s] 37%|███▋ | 24086/65536 [4:07:56<7:14:33, 1.59it/s] 37%|███▋ | 24087/65536 [4:07:56<7:14:23, 1.59it/s] 37%|███▋ | 24088/65536 [4:07:57<7:04:57, 1.63it/s] 37%|███▋ | 24089/65536 [4:07:57<7:07:28, 1.62it/s] 37%|███▋ | 24090/65536 [4:07:58<7:16:20, 1.58it/s] 37%|███▋ | 24091/65536 [4:07:59<7:02:33, 1.63it/s] 37%|███▋ | 24092/65536 [4:07:59<7:03:14, 1.63it/s] 37%|███▋ | 24093/65536 [4:08:00<7:15:24, 1.59it/s] 37%|███▋ | 24094/65536 [4:08:01<7:11:37, 1.60it/s] 37%|███▋ | 24095/65536 [4:08:01<7:16:11, 1.58it/s] 37%|███▋ | 24096/65536 [4:08:02<7:14:54, 1.59it/s] 37%|███▋ | 24097/65536 [4:08:02<7:11:39, 1.60it/s] 37%|███▋ | 24098/65536 [4:08:03<7:06:07, 1.62it/s] 37%|███▋ | 24099/65536 [4:08:04<7:02:12, 1.64it/s] 37%|███▋ | 24100/65536 [4:08:04<6:53:36, 1.67it/s] {'loss': 2.8998, 'learning_rate': 6.707787437247458e-07, 'epoch': 1487.65} + 37%|███▋ | 24100/65536 [4:08:04<6:53:36, 1.67it/s] 37%|███▋ | 24101/65536 [4:08:05<6:56:23, 1.66it/s] 37%|███▋ | 24102/65536 [4:08:05<6:59:55, 1.64it/s] 37%|███▋ | 24103/65536 [4:08:06<7:08:30, 1.61it/s] 37%|███▋ | 24104/65536 [4:08:07<7:16:51, 1.58it/s] 37%|███▋ | 24105/65536 [4:08:07<7:06:21, 1.62it/s] 37%|███▋ | 24106/65536 [4:08:08<7:25:50, 1.55it/s] 37%|███▋ | 24107/65536 [4:08:09<7:31:09, 1.53it/s] 37%|███▋ | 24108/65536 [4:08:09<7:22:59, 1.56it/s] 37%|███▋ | 24109/65536 [4:08:10<7:13:30, 1.59it/s] 37%|███▋ | 24110/65536 [4:08:11<7:10:02, 1.61it/s] 37%|███▋ | 24111/65536 [4:08:11<7:13:11, 1.59it/s] 37%|███▋ | 24112/65536 [4:08:12<7:06:55, 1.62it/s] 37%|███▋ | 24113/65536 [4:08:12<6:57:10, 1.65it/s] 37%|███▋ | 24114/65536 [4:08:13<6:57:30, 1.65it/s] 37%|███▋ | 24115/65536 [4:08:14<6:52:06, 1.68it/s] 37%|███▋ | 24116/65536 [4:08:14<7:07:29, 1.61it/s] 37%|███▋ | 24117/65536 [4:08:15<7:07:23, 1.62it/s] 37%|███▋ | 24118/65536 [4:08:15<6:55:31, 1.66it/s] 37%|███▋ | 24119/65536 [4:08:16<6:50:50, 1.68it/s] 37%|███▋ | 24120/65536 [4:08:17<6:55:46, 1.66it/s] {'loss': 2.9467, 'learning_rate': 6.705032447655197e-07, 'epoch': 1488.89} + 37%|███▋ | 24120/65536 [4:08:17<6:55:46, 1.66it/s] 37%|███▋ | 24121/65536 [4:08:17<7:00:41, 1.64it/s] 37%|███▋ | 24122/65536 [4:08:18<7:09:22, 1.61it/s] 37%|███▋ | 24123/65536 [4:08:18<7:04:00, 1.63it/s] 37%|███▋ | 24124/65536 [4:08:19<6:54:36, 1.66it/s] 37%|███▋ | 24125/65536 [4:08:20<6:53:28, 1.67it/s] 37%|███▋ | 24126/65536 [4:08:20<6:46:50, 1.70it/s] 37%|███▋ | 24127/65536 [4:08:21<7:01:56, 1.64it/s] 37%|███▋ | 24128/65536 [4:08:22<7:09:32, 1.61it/s] 37%|███▋ | 24129/65536 [4:08:22<7:01:27, 1.64it/s] 37%|███▋ | 24130/65536 [4:08:23<6:57:54, 1.65it/s] 37%|███▋ | 24131/65536 [4:08:23<6:52:25, 1.67it/s] 37%|███▋ | 24132/65536 [4:08:24<6:52:51, 1.67it/s] 37%|███▋ | 24133/65536 [4:08:24<6:52:55, 1.67it/s] 37%|███▋ | 24134/65536 [4:08:25<6:48:28, 1.69it/s] 37%|███▋ | 24135/65536 [4:08:26<6:56:34, 1.66it/s] 37%|███▋ | 24136/65536 [4:08:26<6:55:06, 1.66it/s] 37%|███▋ | 24137/65536 [4:08:27<6:48:32, 1.69it/s] 37%|███▋ | 24138/65536 [4:08:28<7:12:34, 1.60it/s] 37%|███▋ | 24139/65536 [4:08:28<7:36:02, 1.51it/s] 37%|███▋ | 24140/65536 [4:08:29<7:22:35, 1.56it/s] {'loss': 3.0443, 'learning_rate': 6.702277458062935e-07, 'epoch': 1490.12} + 37%|███▋ | 24140/65536 [4:08:29<7:22:35, 1.56it/s] 37%|███▋ | 24141/65536 [4:08:30<7:21:32, 1.56it/s] 37%|███▋ | 24142/65536 [4:08:30<7:11:16, 1.60it/s] 37%|███▋ | 24143/65536 [4:08:31<7:04:19, 1.63it/s] 37%|███▋ | 24144/65536 [4:08:31<7:03:30, 1.63it/s] 37%|███▋ | 24145/65536 [4:08:32<7:12:43, 1.59it/s] 37%|███▋ | 24146/65536 [4:08:33<7:10:43, 1.60it/s] 37%|███▋ | 24147/65536 [4:08:33<7:07:22, 1.61it/s] 37%|███▋ | 24148/65536 [4:08:34<6:53:23, 1.67it/s] 37%|███▋ | 24149/65536 [4:08:34<7:06:30, 1.62it/s] 37%|███▋ | 24150/65536 [4:08:35<6:58:54, 1.65it/s] 37%|███▋ | 24151/65536 [4:08:36<7:09:06, 1.61it/s] 37%|███▋ | 24152/65536 [4:08:36<7:00:27, 1.64it/s] 37%|███▋ | 24153/65536 [4:08:37<7:03:21, 1.63it/s] 37%|███▋ | 24154/65536 [4:08:37<7:05:38, 1.62it/s] 37%|███▋ | 24155/65536 [4:08:38<7:29:46, 1.53it/s] 37%|███▋ | 24156/65536 [4:08:39<7:19:42, 1.57it/s] 37%|███▋ | 24157/65536 [4:08:39<7:27:37, 1.54it/s] 37%|███▋ | 24158/65536 [4:08:40<7:28:03, 1.54it/s] 37%|███▋ | 24159/65536 [4:08:41<7:16:29, 1.58it/s] 37%|███▋ | 24160/65536 [4:08:41<7:04:08, 1.63it/s] {'loss': 2.9395, 'learning_rate': 6.699522468470674e-07, 'epoch': 1491.36} + 37%|███▋ | 24160/65536 [4:08:41<7:04:08, 1.63it/s] 37%|███▋ | 24161/65536 [4:08:42<6:54:35, 1.66it/s] 37%|███▋ | 24162/65536 [4:08:42<6:45:35, 1.70it/s] 37%|███▋ | 24163/65536 [4:08:43<6:44:52, 1.70it/s] 37%|███▋ | 24164/65536 [4:08:44<6:42:56, 1.71it/s] 37%|███▋ | 24165/65536 [4:08:44<6:54:52, 1.66it/s] 37%|███▋ | 24166/65536 [4:08:45<7:04:09, 1.63it/s] 37%|███▋ | 24167/65536 [4:08:45<7:01:38, 1.64it/s] 37%|███▋ | 24168/65536 [4:08:46<6:58:37, 1.65it/s] 37%|███▋ | 24169/65536 [4:08:47<7:04:43, 1.62it/s] 37%|███▋ | 24170/65536 [4:08:47<6:51:47, 1.67it/s] 37%|███▋ | 24171/65536 [4:08:48<7:08:47, 1.61it/s] 37%|███▋ | 24172/65536 [4:08:49<7:13:47, 1.59it/s] 37%|███▋ | 24173/65536 [4:08:49<7:07:41, 1.61it/s] 37%|███▋ | 24174/65536 [4:08:50<6:53:52, 1.67it/s] 37%|███▋ | 24175/65536 [4:08:50<6:57:19, 1.65it/s] 37%|███▋ | 24176/65536 [4:08:51<6:46:35, 1.70it/s] 37%|███▋ | 24177/65536 [4:08:52<6:43:55, 1.71it/s] 37%|███▋ | 24178/65536 [4:08:52<6:40:19, 1.72it/s] 37%|███▋ | 24179/65536 [4:08:53<6:53:37, 1.67it/s] 37%|███▋ | 24180/65536 [4:08:53<6:59:36, 1.64it/s] {'loss': 3.0041, 'learning_rate': 6.696767478878413e-07, 'epoch': 1492.59} + 37%|███▋ | 24180/65536 [4:08:53<6:59:36, 1.64it/s] 37%|███▋ | 24181/65536 [4:08:54<7:14:32, 1.59it/s] 37%|███▋ | 24182/65536 [4:08:55<7:09:06, 1.61it/s] 37%|███▋ | 24183/65536 [4:08:55<7:03:30, 1.63it/s] 37%|███▋ | 24184/65536 [4:08:56<6:54:20, 1.66it/s] 37%|███▋ | 24185/65536 [4:08:56<6:59:55, 1.64it/s] 37%|███▋ | 24186/65536 [4:08:57<6:59:08, 1.64it/s] 37%|███▋ | 24187/65536 [4:08:58<7:14:36, 1.59it/s] 37%|███▋ | 24188/65536 [4:08:58<7:10:44, 1.60it/s] 37%|███▋ | 24189/65536 [4:08:59<7:04:01, 1.63it/s] 37%|███��� | 24190/65536 [4:09:00<7:12:25, 1.59it/s] 37%|███▋ | 24191/65536 [4:09:00<7:07:38, 1.61it/s] 37%|███▋ | 24192/65536 [4:09:01<7:02:12, 1.63it/s] 37%|███▋ | 24193/65536 [4:09:01<7:02:24, 1.63it/s] 37%|███▋ | 24194/65536 [4:09:02<7:01:20, 1.64it/s] 37%|███▋ | 24195/65536 [4:09:03<6:54:57, 1.66it/s] 37%|███▋ | 24196/65536 [4:09:03<6:55:37, 1.66it/s] 37%|███▋ | 24197/65536 [4:09:04<6:58:56, 1.64it/s] 37%|███▋ | 24198/65536 [4:09:04<7:05:54, 1.62it/s] 37%|███▋ | 24199/65536 [4:09:05<6:59:54, 1.64it/s] 37%|███▋ | 24200/65536 [4:09:06<7:15:06, 1.58it/s] {'loss': 2.9951, 'learning_rate': 6.694012489286151e-07, 'epoch': 1493.83} + 37%|███▋ | 24200/65536 [4:09:06<7:15:06, 1.58it/s] 37%|███▋ | 24201/65536 [4:09:06<7:03:57, 1.62it/s] 37%|███▋ | 24202/65536 [4:09:07<7:01:40, 1.63it/s] 37%|███▋ | 24203/65536 [4:09:08<7:14:25, 1.59it/s] 37%|███▋ | 24204/65536 [4:09:08<6:58:16, 1.65it/s] 37%|███▋ | 24205/65536 [4:09:09<6:52:35, 1.67it/s] 37%|███▋ | 24206/65536 [4:09:09<6:52:06, 1.67it/s] 37%|███▋ | 24207/65536 [4:09:10<6:54:18, 1.66it/s] 37%|███▋ | 24208/65536 [4:09:10<6:52:20, 1.67it/s] 37%|███▋ | 24209/65536 [4:09:11<7:11:04, 1.60it/s] 37%|███▋ | 24210/65536 [4:09:12<7:18:25, 1.57it/s] 37%|███▋ | 24211/65536 [4:09:12<7:14:14, 1.59it/s] 37%|███▋ | 24212/65536 [4:09:13<7:02:37, 1.63it/s] 37%|███▋ | 24213/65536 [4:09:14<6:51:08, 1.68it/s] 37%|███▋ | 24214/65536 [4:09:14<6:57:22, 1.65it/s] 37%|███▋ | 24215/65536 [4:09:15<6:56:26, 1.65it/s] 37%|███▋ | 24216/65536 [4:09:15<7:07:30, 1.61it/s] 37%|███▋ | 24217/65536 [4:09:16<7:04:49, 1.62it/s] 37%|███▋ | 24218/65536 [4:09:17<7:00:09, 1.64it/s] 37%|███▋ | 24219/65536 [4:09:17<7:00:26, 1.64it/s] 37%|███▋ | 24220/65536 [4:09:18<7:08:32, 1.61it/s] {'loss': 3.0352, 'learning_rate': 6.69125749969389e-07, 'epoch': 1495.06} + 37%|███▋ | 24220/65536 [4:09:18<7:08:32, 1.61it/s] 37%|███▋ | 24221/65536 [4:09:19<7:05:53, 1.62it/s] 37%|███▋ | 24222/65536 [4:09:19<7:04:47, 1.62it/s] 37%|███▋ | 24223/65536 [4:09:20<7:17:56, 1.57it/s] 37%|███▋ | 24224/65536 [4:09:20<7:10:46, 1.60it/s] 37%|███▋ | 24225/65536 [4:09:21<7:01:07, 1.63it/s] 37%|███▋ | 24226/65536 [4:09:22<6:53:40, 1.66it/s] 37%|███▋ | 24227/65536 [4:09:22<6:50:00, 1.68it/s] 37%|███▋ | 24228/65536 [4:09:23<6:42:41, 1.71it/s] 37%|███▋ | 24229/65536 [4:09:23<6:51:09, 1.67it/s] 37%|███▋ | 24230/65536 [4:09:24<6:57:21, 1.65it/s] 37%|███▋ | 24231/65536 [4:09:25<7:02:56, 1.63it/s] 37%|███▋ | 24232/65536 [4:09:25<7:13:36, 1.59it/s] 37%|███▋ | 24233/65536 [4:09:26<7:18:44, 1.57it/s] 37%|███▋ | 24234/65536 [4:09:27<7:12:55, 1.59it/s] 37%|███▋ | 24235/65536 [4:09:27<6:54:46, 1.66it/s] 37%|███▋ | 24236/65536 [4:09:28<7:02:19, 1.63it/s] 37%|███▋ | 24237/65536 [4:09:28<7:21:41, 1.56it/s] 37%|███▋ | 24238/65536 [4:09:29<7:07:10, 1.61it/s] 37%|███▋ | 24239/65536 [4:09:30<6:54:23, 1.66it/s] 37%|███▋ | 24240/65536 [4:09:30<7:06:56, 1.61it/s] {'loss': 2.9081, 'learning_rate': 6.688502510101628e-07, 'epoch': 1496.3} + 37%|███▋ | 24240/65536 [4:09:30<7:06:56, 1.61it/s] 37%|███▋ | 24241/65536 [4:09:31<7:05:57, 1.62it/s] 37%|███▋ | 24242/65536 [4:09:31<6:56:22, 1.65it/s] 37%|███▋ | 24243/65536 [4:09:32<7:03:51, 1.62it/s] 37%|███▋ | 24244/65536 [4:09:33<7:04:26, 1.62it/s] 37%|███▋ | 24245/65536 [4:09:33<7:00:36, 1.64it/s] 37%|███▋ | 24246/65536 [4:09:34<6:59:06, 1.64it/s] 37%|███▋ | 24247/65536 [4:09:35<7:02:26, 1.63it/s] 37%|███▋ | 24248/65536 [4:09:35<7:01:39, 1.63it/s] 37%|███▋ | 24249/65536 [4:09:36<7:03:59, 1.62it/s] 37%|███▋ | 24250/65536 [4:09:36<6:58:02, 1.65it/s] 37%|███▋ | 24251/65536 [4:09:37<6:57:46, 1.65it/s] 37%|███▋ | 24252/65536 [4:09:38<7:13:06, 1.59it/s] 37%|███▋ | 24253/65536 [4:09:38<7:06:07, 1.61it/s] 37%|███▋ | 24254/65536 [4:09:39<6:59:50, 1.64it/s] 37%|███▋ | 24255/65536 [4:09:39<6:58:24, 1.64it/s] 37%|███▋ | 24256/65536 [4:09:40<6:54:42, 1.66it/s] 37%|███▋ | 24257/65536 [4:09:41<6:51:17, 1.67it/s] 37%|███▋ | 24258/65536 [4:09:41<6:59:36, 1.64it/s] 37%|███▋ | 24259/65536 [4:09:42<6:56:57, 1.65it/s] 37%|███▋ | 24260/65536 [4:09:42<7:06:08, 1.61it/s] {'loss': 3.0379, 'learning_rate': 6.685747520509367e-07, 'epoch': 1497.53} + 37%|███▋ | 24260/65536 [4:09:42<7:06:08, 1.61it/s] 37%|███▋ | 24261/65536 [4:09:43<7:12:54, 1.59it/s] 37%|███▋ | 24262/65536 [4:09:44<7:10:14, 1.60it/s] 37%|███▋ | 24263/65536 [4:09:44<7:07:51, 1.61it/s] 37%|███▋ | 24264/65536 [4:09:45<7:12:48, 1.59it/s] 37%|███▋ | 24265/65536 [4:09:46<7:09:36, 1.60it/s] 37%|███▋ | 24266/65536 [4:09:46<7:02:14, 1.63it/s] 37%|███▋ | 24267/65536 [4:09:47<7:02:14, 1.63it/s] 37%|███▋ | 24268/65536 [4:09:48<7:22:02, 1.56it/s] 37%|███▋ | 24269/65536 [4:09:48<7:12:38, 1.59it/s] 37%|███▋ | 24270/65536 [4:09:49<7:12:54, 1.59it/s] 37%|███▋ | 24271/65536 [4:09:49<6:59:22, 1.64it/s] 37%|███▋ | 24272/65536 [4:09:50<7:04:35, 1.62it/s] 37%|███▋ | 24273/65536 [4:09:51<6:55:52, 1.65it/s] 37%|███▋ | 24274/65536 [4:09:51<6:50:52, 1.67it/s] 37%|███▋ | 24275/65536 [4:09:52<6:55:46, 1.65it/s] 37%|███▋ | 24276/65536 [4:09:52<6:53:48, 1.66it/s] 37%|███▋ | 24277/65536 [4:09:53<6:56:48, 1.65it/s] 37%|███▋ | 24278/65536 [4:09:54<6:50:40, 1.67it/s] 37%|███▋ | 24279/65536 [4:09:54<7:00:00, 1.64it/s] 37%|███▋ | 24280/65536 [4:09:55<6:57:39, 1.65it/s] {'loss': 2.9549, 'learning_rate': 6.682992530917105e-07, 'epoch': 1498.77} + 37%|███▋ | 24280/65536 [4:09:55<6:57:39, 1.65it/s] 37%|███▋ | 24281/65536 [4:09:55<7:14:28, 1.58it/s] 37%|███▋ | 24282/65536 [4:09:56<7:02:00, 1.63it/s] 37%|███▋ | 24283/65536 [4:09:57<6:55:40, 1.65it/s] 37%|███▋ | 24284/65536 [4:09:57<7:02:38, 1.63it/s] 37%|███▋ | 24285/65536 [4:09:58<7:05:00, 1.62it/s] 37%|███▋ | 24286/65536 [4:09:58<7:04:03, 1.62it/s] 37%|███▋ | 24287/65536 [4:09:59<6:53:46, 1.66it/s] 37%|███▋ | 24288/65536 [4:10:00<6:45:09, 1.70it/s] 37%|███▋ | 24289/65536 [4:10:00<6:50:48, 1.67it/s] 37%|███▋ | 24290/65536 [4:10:01<6:46:07, 1.69it/s] 37%|███▋ | 24291/65536 [4:10:01<6:57:16, 1.65it/s] 37%|███▋ | 24292/65536 [4:10:02<6:48:07, 1.68it/s] 37%|███▋ | 24293/65536 [4:10:03<7:04:07, 1.62it/s] 37%|███▋ | 24294/65536 [4:10:03<7:04:29, 1.62it/s] 37%|███▋ | 24295/65536 [4:10:04<6:54:57, 1.66it/s] 37%|███▋ | 24296/65536 [4:10:04<6:52:28, 1.67it/s] 37%|███▋ | 24297/65536 [4:10:05<6:54:42, 1.66it/s] 37%|███▋ | 24298/65536 [4:10:06<7:10:25, 1.60it/s] 37%|███▋ | 24299/65536 [4:10:06<7:01:07, 1.63it/s] 37%|███▋ | 24300/65536 [4:10:07<7:00:01, 1.64it/s] {'loss': 3.0366, 'learning_rate': 6.680237541324844e-07, 'epoch': 1500.0} + 37%|███▋ | 24300/65536 [4:10:07<7:00:01, 1.64it/s] 37%|███▋ | 24301/65536 [4:10:08<7:11:03, 1.59it/s] 37%|███▋ | 24302/65536 [4:10:08<7:13:11, 1.59it/s] 37%|███▋ | 24303/65536 [4:10:09<7:08:17, 1.60it/s] 37%|███▋ | 24304/65536 [4:10:09<7:07:52, 1.61it/s] 37%|███▋ | 24305/65536 [4:10:10<7:00:57, 1.63it/s] 37%|███▋ | 24306/65536 [4:10:11<6:55:10, 1.66it/s] 37%|███▋ | 24307/65536 [4:10:11<7:00:41, 1.63it/s] 37%|███▋ | 24308/65536 [4:10:12<7:14:11, 1.58it/s] 37%|███▋ | 24309/65536 [4:10:13<7:05:51, 1.61it/s] 37%|███▋ | 24310/65536 [4:10:13<7:02:37, 1.63it/s] 37%|███▋ | 24311/65536 [4:10:14<7:08:58, 1.60it/s] 37%|███▋ | 24312/65536 [4:10:14<7:05:31, 1.61it/s] 37%|███▋ | 24313/65536 [4:10:15<7:03:09, 1.62it/s] 37%|███▋ | 24314/65536 [4:10:16<7:00:37, 1.63it/s] 37%|███▋ | 24315/65536 [4:10:16<7:01:09, 1.63it/s] 37%|███▋ | 24316/65536 [4:10:17<6:56:03, 1.65it/s] 37%|███▋ | 24317/65536 [4:10:18<7:12:30, 1.59it/s] 37%|███▋ | 24318/65536 [4:10:18<7:20:02, 1.56it/s] 37%|███▋ | 24319/65536 [4:10:19<7:09:50, 1.60it/s] 37%|███▋ | 24320/65536 [4:10:19<7:08:07, 1.60it/s] {'loss': 2.9446, 'learning_rate': 6.677482551732582e-07, 'epoch': 1501.23} + 37%|███▋ | 24320/65536 [4:10:19<7:08:07, 1.60it/s] 37%|███▋ | 24321/65536 [4:10:20<7:02:08, 1.63it/s] 37%|███▋ | 24322/65536 [4:10:21<7:04:09, 1.62it/s] 37%|███▋ | 24323/65536 [4:10:21<6:54:05, 1.66it/s] 37%|��██▋ | 24324/65536 [4:10:22<6:53:11, 1.66it/s] 37%|███▋ | 24325/65536 [4:10:22<6:55:02, 1.65it/s] 37%|███▋ | 24326/65536 [4:10:23<6:56:04, 1.65it/s] 37%|███▋ | 24327/65536 [4:10:24<6:50:45, 1.67it/s] 37%|███▋ | 24328/65536 [4:10:24<6:51:14, 1.67it/s] 37%|███▋ | 24329/65536 [4:10:25<6:51:50, 1.67it/s] 37%|███▋ | 24330/65536 [4:10:25<6:52:12, 1.67it/s] 37%|███▋ | 24331/65536 [4:10:26<6:56:42, 1.65it/s] 37%|███▋ | 24332/65536 [4:10:27<6:57:42, 1.64it/s] 37%|███▋ | 24333/65536 [4:10:27<7:20:02, 1.56it/s] 37%|███▋ | 24334/65536 [4:10:28<7:17:09, 1.57it/s] 37%|███▋ | 24335/65536 [4:10:29<7:16:10, 1.57it/s] 37%|███▋ | 24336/65536 [4:10:29<7:06:15, 1.61it/s] 37%|███▋ | 24337/65536 [4:10:30<6:49:11, 1.68it/s] 37%|███▋ | 24338/65536 [4:10:30<6:55:58, 1.65it/s] 37%|███▋ | 24339/65536 [4:10:31<7:07:31, 1.61it/s] 37%|███▋ | 24340/65536 [4:10:32<7:03:37, 1.62it/s] {'loss': 2.9547, 'learning_rate': 6.67472756214032e-07, 'epoch': 1502.47} + 37%|███▋ | 24340/65536 [4:10:32<7:03:37, 1.62it/s] 37%|███▋ | 24341/65536 [4:10:32<7:12:52, 1.59it/s] 37%|███▋ | 24342/65536 [4:10:33<7:07:18, 1.61it/s] 37%|███▋ | 24343/65536 [4:10:33<6:56:46, 1.65it/s] 37%|███▋ | 24344/65536 [4:10:34<7:03:28, 1.62it/s] 37%|███▋ | 24345/65536 [4:10:35<7:00:07, 1.63it/s] 37%|███▋ | 24346/65536 [4:10:35<6:58:34, 1.64it/s] 37%|███▋ | 24347/65536 [4:10:36<6:49:18, 1.68it/s] 37%|███▋ | 24348/65536 [4:10:36<6:52:29, 1.66it/s] 37%|███▋ | 24349/65536 [4:10:37<7:15:18, 1.58it/s] 37%|███▋ | 24350/65536 [4:10:38<7:15:16, 1.58it/s] 37%|███▋ | 24351/65536 [4:10:38<7:07:12, 1.61it/s] 37%|███▋ | 24352/65536 [4:10:39<6:50:16, 1.67it/s] 37%|███▋ | 24353/65536 [4:10:40<6:51:38, 1.67it/s] 37%|███▋ | 24354/65536 [4:10:40<6:51:57, 1.67it/s] 37%|███▋ | 24355/65536 [4:10:41<6:54:26, 1.66it/s] 37%|███▋ | 24356/65536 [4:10:41<7:00:43, 1.63it/s] 37%|███▋ | 24357/65536 [4:10:42<7:01:11, 1.63it/s] 37%|███▋ | 24358/65536 [4:10:43<7:01:00, 1.63it/s] 37%|███▋ | 24359/65536 [4:10:43<7:00:57, 1.63it/s] 37%|███▋ | 24360/65536 [4:10:44<6:54:43, 1.65it/s] {'loss': 2.9982, 'learning_rate': 6.671972572548059e-07, 'epoch': 1503.7} + 37%|███▋ | 24360/65536 [4:10:44<6:54:43, 1.65it/s] 37%|███▋ | 24361/65536 [4:10:44<6:53:49, 1.66it/s] 37%|███▋ | 24362/65536 [4:10:45<7:00:21, 1.63it/s] 37%|███▋ | 24363/65536 [4:10:46<7:06:24, 1.61it/s] 37%|███▋ | 24364/65536 [4:10:46<7:06:47, 1.61it/s] 37%|███▋ | 24365/65536 [4:10:47<7:09:10, 1.60it/s] 37%|███▋ | 24366/65536 [4:10:48<7:11:16, 1.59it/s] 37%|███▋ | 24367/65536 [4:10:48<7:04:23, 1.62it/s] 37%|███▋ | 24368/65536 [4:10:49<7:06:01, 1.61it/s] 37%|███▋ | 24369/65536 [4:10:49<7:02:04, 1.63it/s] 37%|███▋ | 24370/65536 [4:10:50<6:54:22, 1.66it/s] 37%|███▋ | 24371/65536 [4:10:51<6:50:54, 1.67it/s] 37%|███▋ | 24372/65536 [4:10:51<6:49:40, 1.67it/s] 37%|███▋ | 24373/65536 [4:10:52<6:46:05, 1.69it/s] 37%|███▋ | 24374/65536 [4:10:52<6:40:18, 1.71it/s] 37%|███▋ | 24375/65536 [4:10:53<6:39:58, 1.72it/s] 37%|███▋ | 24376/65536 [4:10:54<7:01:47, 1.63it/s] 37%|███▋ | 24377/65536 [4:10:54<7:13:54, 1.58it/s] 37%|███▋ | 24378/65536 [4:10:55<7:10:15, 1.59it/s] 37%|███▋ | 24379/65536 [4:10:56<7:13:00, 1.58it/s] 37%|███▋ | 24380/65536 [4:10:56<7:16:45, 1.57it/s] {'loss': 3.0066, 'learning_rate': 6.669217582955797e-07, 'epoch': 1504.94} + 37%|███▋ | 24380/65536 [4:10:56<7:16:45, 1.57it/s] 37%|███▋ | 24381/65536 [4:10:57<7:03:31, 1.62it/s] 37%|███▋ | 24382/65536 [4:10:57<7:12:15, 1.59it/s] 37%|███▋ | 24383/65536 [4:10:58<7:03:57, 1.62it/s] 37%|███▋ | 24384/65536 [4:10:59<7:15:32, 1.57it/s] 37%|███▋ | 24385/65536 [4:10:59<7:14:30, 1.58it/s] 37%|███▋ | 24386/65536 [4:11:00<7:10:27, 1.59it/s] 37%|███▋ | 24387/65536 [4:11:01<7:08:02, 1.60it/s] 37%|███▋ | 24388/65536 [4:11:01<6:58:31, 1.64it/s] 37%|███▋ | 24389/65536 [4:11:02<6:54:07, 1.66it/s] 37%|███▋ | 24390/65536 [4:11:02<6:53:37, 1.66it/s] 37%|███▋ | 24391/65536 [4:11:03<6:49:34, 1.67it/s] 37%|███▋ | 24392/65536 [4:11:03<6:53:38, 1.66it/s] 37%|███▋ | 24393/65536 [4:11:04<6:54:50, 1.65it/s] 37%|███▋ | 24394/65536 [4:11:05<6:50:42, 1.67it/s] 37%|███▋ | 24395/65536 [4:11:05<6:52:00, 1.66it/s] 37%|███▋ | 24396/65536 [4:11:06<7:00:59, 1.63it/s] 37%|███▋ | 24397/65536 [4:11:07<7:05:23, 1.61it/s] 37%|███▋ | 24398/65536 [4:11:07<7:08:59, 1.60it/s] 37%|███▋ | 24399/65536 [4:11:08<7:15:45, 1.57it/s] 37%|███▋ | 24400/65536 [4:11:08<7:13:33, 1.58it/s] {'loss': 3.0048, 'learning_rate': 6.666462593363536e-07, 'epoch': 1506.17} + 37%|███▋ | 24400/65536 [4:11:08<7:13:33, 1.58it/s] 37%|███▋ | 24401/65536 [4:11:09<7:04:47, 1.61it/s] 37%|███▋ | 24402/65536 [4:11:10<6:58:41, 1.64it/s] 37%|███▋ | 24403/65536 [4:11:10<7:04:47, 1.61it/s] 37%|███▋ | 24404/65536 [4:11:11<6:54:46, 1.65it/s] 37%|███▋ | 24405/65536 [4:11:11<6:50:13, 1.67it/s] 37%|███▋ | 24406/65536 [4:11:12<6:52:37, 1.66it/s] 37%|███▋ | 24407/65536 [4:11:13<7:02:22, 1.62it/s] 37%|███▋ | 24408/65536 [4:11:13<7:02:39, 1.62it/s] 37%|███▋ | 24409/65536 [4:11:14<6:54:44, 1.65it/s] 37%|███▋ | 24410/65536 [4:11:15<6:50:59, 1.67it/s] 37%|███▋ | 24411/65536 [4:11:15<7:02:50, 1.62it/s] 37%|███▋ | 24412/65536 [4:11:16<7:05:14, 1.61it/s] 37%|███▋ | 24413/65536 [4:11:16<7:09:37, 1.60it/s] 37%|███▋ | 24414/65536 [4:11:17<7:17:10, 1.57it/s] 37%|███▋ | 24415/65536 [4:11:18<7:09:25, 1.60it/s] 37%|███▋ | 24416/65536 [4:11:18<7:01:56, 1.62it/s] 37%|███▋ | 24417/65536 [4:11:19<6:57:32, 1.64it/s] 37%|███▋ | 24418/65536 [4:11:19<6:54:23, 1.65it/s] 37%|███▋ | 24419/65536 [4:11:20<6:53:01, 1.66it/s] 37%|███▋ | 24420/65536 [4:11:21<6:55:56, 1.65it/s] {'loss': 3.0346, 'learning_rate': 6.663707603771275e-07, 'epoch': 1507.41} + 37%|███▋ | 24420/65536 [4:11:21<6:55:56, 1.65it/s] 37%|███▋ | 24421/65536 [4:11:21<6:51:34, 1.66it/s] 37%|███▋ | 24422/65536 [4:11:22<6:59:58, 1.63it/s] 37%|███▋ | 24423/65536 [4:11:23<6:53:45, 1.66it/s] 37%|███▋ | 24424/65536 [4:11:23<7:00:26, 1.63it/s] 37%|███▋ | 24425/65536 [4:11:24<6:58:50, 1.64it/s] 37%|███▋ | 24426/65536 [4:11:24<7:13:53, 1.58it/s] 37%|███▋ | 24427/65536 [4:11:25<7:08:51, 1.60it/s] 37%|███▋ | 24428/65536 [4:11:26<7:05:52, 1.61it/s] 37%|███▋ | 24429/65536 [4:11:26<7:01:41, 1.62it/s] 37%|███▋ | 24430/65536 [4:11:27<7:11:25, 1.59it/s] 37%|███▋ | 24431/65536 [4:11:28<7:04:37, 1.61it/s] 37%|███▋ | 24432/65536 [4:11:28<6:55:38, 1.65it/s] 37%|███▋ | 24433/65536 [4:11:29<6:48:37, 1.68it/s] 37%|███▋ | 24434/65536 [4:11:29<6:57:27, 1.64it/s] 37%|███▋ | 24435/65536 [4:11:30<6:49:34, 1.67it/s] 37%|███▋ | 24436/65536 [4:11:30<6:45:17, 1.69it/s] 37%|███▋ | 24437/65536 [4:11:31<6:39:42, 1.71it/s] 37%|███▋ | 24438/65536 [4:11:32<6:47:56, 1.68it/s] 37%|███▋ | 24439/65536 [4:11:32<6:52:50, 1.66it/s] 37%|███▋ | 24440/65536 [4:11:33<6:59:02, 1.63it/s] {'loss': 2.9453, 'learning_rate': 6.660952614179013e-07, 'epoch': 1508.64} + 37%|███▋ | 24440/65536 [4:11:33<6:59:02, 1.63it/s] 37%|███▋ | 24441/65536 [4:11:34<7:04:34, 1.61it/s] 37%|███▋ | 24442/65536 [4:11:34<7:13:06, 1.58it/s] 37%|███▋ | 24443/65536 [4:11:35<7:11:14, 1.59it/s] 37%|███▋ | 24444/65536 [4:11:36<7:24:05, 1.54it/s] 37%|███▋ | 24445/65536 [4:11:36<7:15:21, 1.57it/s] 37%|███▋ | 24446/65536 [4:11:37<7:16:29, 1.57it/s] 37%|███▋ | 24447/65536 [4:11:37<7:10:23, 1.59it/s] 37%|███▋ | 24448/65536 [4:11:38<7:07:32, 1.60it/s] 37%|███▋ | 24449/65536 [4:11:39<6:57:22, 1.64it/s] 37%|███▋ | 24450/65536 [4:11:39<7:02:00, 1.62it/s] 37%|███▋ | 24451/65536 [4:11:40<7:01:02, 1.63it/s] 37%|███▋ | 24452/65536 [4:11:40<7:03:28, 1.62it/s] 37%|███▋ | 24453/65536 [4:11:41<7:06:08, 1.61it/s] 37%|███▋ | 24454/65536 [4:11:42<6:52:29, 1.66it/s] 37%|███▋ | 24455/65536 [4:11:42<6:45:15, 1.69it/s] 37%|███▋ | 24456/65536 [4:11:43<6:52:19, 1.66it/s] 37%|███▋ | 24457/65536 [4:11:43<6:59:03, 1.63it/s] 37%|███▋ | 24458/65536 [4:11:44<6:59:56, 1.63it/s] 37%|███▋ | 24459/65536 [4:11:45<7:08:47, 1.60it/s] 37%|███▋ | 24460/65536 [4:11:45<7:03:24, 1.62it/s] {'loss': 2.9708, 'learning_rate': 6.658197624586752e-07, 'epoch': 1509.88} + 37%|███▋ | 24460/65536 [4:11:45<7:03:24, 1.62it/s] 37%|███▋ | 24461/65536 [4:11:46<7:05:43, 1.61it/s] 37%|███▋ | 24462/65536 [4:11:47<6:59:23, 1.63it/s] 37%|███▋ | 24463/65536 [4:11:47<7:11:48, 1.59it/s] 37%|███▋ | 24464/65536 [4:11:48<7:04:44, 1.61it/s] 37%|███▋ | 24465/65536 [4:11:48<6:54:10, 1.65it/s] 37%|███▋ | 24466/65536 [4:11:49<6:51:27, 1.66it/s] 37%|███▋ | 24467/65536 [4:11:50<6:56:15, 1.64it/s] 37%|███▋ | 24468/65536 [4:11:50<7:06:14, 1.61it/s] 37%|███▋ | 24469/65536 [4:11:51<7:01:27, 1.62it/s] 37%|███▋ | 24470/65536 [4:11:51<7:00:04, 1.63it/s] 37%|███▋ | 24471/65536 [4:11:52<6:58:10, 1.64it/s] 37%|███▋ | 24472/65536 [4:11:53<7:02:35, 1.62it/s] 37%|███▋ | 24473/65536 [4:11:53<7:04:18, 1.61it/s] 37%|███▋ | 24474/65536 [4:11:54<7:06:50, 1.60it/s] 37%|███▋ | 24475/65536 [4:11:55<7:05:26, 1.61it/s] 37%|███▋ | 24476/65536 [4:11:55<7:05:16, 1.61it/s] 37%|███▋ | 24477/65536 [4:11:56<7:10:09, 1.59it/s] 37%|███▋ | 24478/65536 [4:11:56<6:59:45, 1.63it/s] 37%|███▋ | 24479/65536 [4:11:57<7:20:06, 1.55it/s] 37%|███▋ | 24480/65536 [4:11:58<7:16:46, 1.57it/s] {'loss': 2.9684, 'learning_rate': 6.65544263499449e-07, 'epoch': 1511.11} + 37%|███▋ | 24480/65536 [4:11:58<7:16:46, 1.57it/s] 37%|███▋ | 24481/65536 [4:11:58<7:21:28, 1.55it/s] 37%|███▋ | 24482/65536 [4:11:59<7:06:23, 1.60it/s] 37%|███▋ | 24483/65536 [4:12:00<7:03:34, 1.62it/s] 37%|███▋ | 24484/65536 [4:12:00<6:59:30, 1.63it/s] 37%|███▋ | 24485/65536 [4:12:01<6:48:09, 1.68it/s] 37%|███▋ | 24486/65536 [4:12:01<6:49:09, 1.67it/s] 37%|███▋ | 24487/65536 [4:12:02<7:00:20, 1.63it/s] 37%|███▋ | 24488/65536 [4:12:03<6:48:56, 1.67it/s] 37%|███▋ | 24489/65536 [4:12:03<6:44:22, 1.69it/s] 37%|███▋ | 24490/65536 [4:12:04<6:50:25, 1.67it/s] 37%|███▋ | 24491/65536 [4:12:04<6:52:23, 1.66it/s] 37%|███▋ | 24492/65536 [4:12:05<6:59:44, 1.63it/s] 37%|███▋ | 24493/65536 [4:12:06<7:07:15, 1.60it/s] 37%|███▋ | 24494/65536 [4:12:06<7:03:07, 1.62it/s] 37%|███▋ | 24495/65536 [4:12:07<7:12:37, 1.58it/s] 37%|███▋ | 24496/65536 [4:12:08<7:04:06, 1.61it/s] 37%|███▋ | 24497/65536 [4:12:08<7:10:29, 1.59it/s] 37%|███▋ | 24498/65536 [4:12:09<6:55:32, 1.65it/s] 37%|███▋ | 24499/65536 [4:12:09<6:47:39, 1.68it/s] 37%|███▋ | 24500/65536 [4:12:10<6:54:14, 1.65it/s] {'loss': 2.9751, 'learning_rate': 6.652687645402229e-07, 'epoch': 1512.35} + 37%|███▋ | 24500/65536 [4:12:10<6:54:14, 1.65it/s] 37%|███▋ | 24501/65536 [4:12:11<6:52:43, 1.66it/s] 37%|███▋ | 24502/65536 [4:12:11<6:58:01, 1.64it/s] 37%|███▋ | 24503/65536 [4:12:12<7:04:45, 1.61it/s] 37%|███▋ | 24504/65536 [4:12:12<7:06:20, 1.60it/s] 37%|███▋ | 24505/65536 [4:12:13<7:06:45, 1.60it/s] 37%|███▋ | 24506/65536 [4:12:14<6:58:08, 1.64it/s] 37%|███▋ | 24507/65536 [4:12:14<6:51:18, 1.66it/s] 37%|███▋ | 24508/65536 [4:12:15<6:53:14, 1.65it/s] 37%|███▋ | 24509/65536 [4:12:15<6:51:04, 1.66it/s] 37%|███▋ | 24510/65536 [4:12:16<7:19:56, 1.55it/s] 37%|███▋ | 24511/65536 [4:12:17<7:22:06, 1.55it/s] 37%|███▋ | 24512/65536 [4:12:17<7:17:12, 1.56it/s] 37%|███▋ | 24513/65536 [4:12:18<7:17:13, 1.56it/s] 37%|███▋ | 24514/65536 [4:12:19<7:09:28, 1.59it/s] 37%|███▋ | 24515/65536 [4:12:19<7:07:17, 1.60it/s] 37%|███▋ | 24516/65536 [4:12:20<7:05:41, 1.61it/s] 37%|███▋ | 24517/65536 [4:12:20<6:54:28, 1.65it/s] 37%|███▋ | 24518/65536 [4:12:21<6:44:39, 1.69it/s] 37%|███▋ | 24519/65536 [4:12:22<6:45:58, 1.68it/s] 37%|███▋ | 24520/65536 [4:12:22<7:02:23, 1.62it/s] {'loss': 2.9727, 'learning_rate': 6.649932655809967e-07, 'epoch': 1513.58} + 37%|███▋ | 24520/65536 [4:12:22<7:02:23, 1.62it/s] 37%|███▋ | 24521/65536 [4:12:23<7:01:20, 1.62it/s] 37%|███▋ | 24522/65536 [4:12:24<6:55:42, 1.64it/s] 37%|███▋ | 24523/65536 [4:12:24<6:57:04, 1.64it/s] 37%|███▋ | 24524/65536 [4:12:25<7:05:16, 1.61it/s] 37%|███▋ | 24525/65536 [4:12:25<7:02:41, 1.62it/s] 37%|███▋ | 24526/65536 [4:12:26<7:03:29, 1.61it/s] 37%|███▋ | 24527/65536 [4:12:27<7:12:28, 1.58it/s] 37%|███▋ | 24528/65536 [4:12:27<7:11:47, 1.58it/s] 37%|███▋ | 24529/65536 [4:12:28<7:10:45, 1.59it/s] 37%|███▋ | 24530/65536 [4:12:29<7:05:53, 1.60it/s] 37%|███▋ | 24531/65536 [4:12:29<7:09:27, 1.59it/s] 37%|███▋ | 24532/65536 [4:12:30<7:04:37, 1.61it/s] 37%|███▋ | 24533/65536 [4:12:30<7:02:13, 1.62it/s] 37%|███▋ | 24534/65536 [4:12:31<7:14:50, 1.57it/s] 37%|███▋ | 24535/65536 [4:12:32<7:10:52, 1.59it/s] 37%|███▋ | 24536/65536 [4:12:32<7:04:11, 1.61it/s] 37%|███▋ | 24537/65536 [4:12:33<6:53:42, 1.65it/s] 37%|███▋ | 24538/65536 [4:12:33<6:58:25, 1.63it/s] 37%|███▋ | 24539/65536 [4:12:34<7:04:26, 1.61it/s] 37%|███▋ | 24540/65536 [4:12:35<6:59:34, 1.63it/s] {'loss': 2.9699, 'learning_rate': 6.647177666217706e-07, 'epoch': 1514.81} + 37%|███▋ | 24540/65536 [4:12:35<6:59:34, 1.63it/s] 37%|███▋ | 24541/65536 [4:12:35<6:56:02, 1.64it/s] 37%|███▋ | 24542/65536 [4:12:36<6:56:16, 1.64it/s] 37%|███▋ | 24543/65536 [4:12:37<7:01:13, 1.62it/s] 37%|███▋ | 24544/65536 [4:12:37<7:25:59, 1.53it/s] 37%|███▋ | 24545/65536 [4:12:38<7:06:27, 1.60it/s] 37%|███▋ | 24546/65536 [4:12:38<7:07:10, 1.60it/s] 37%|███▋ | 24547/65536 [4:12:39<6:59:44, 1.63it/s] 37%|███▋ | 24548/65536 [4:12:40<6:53:31, 1.65it/s] 37%|███▋ | 24549/65536 [4:12:40<7:01:13, 1.62it/s] 37%|███▋ | 24550/65536 [4:12:41<6:53:32, 1.65it/s] 37%|███▋ | 24551/65536 [4:12:41<6:49:30, 1.67it/s] 37%|███▋ | 24552/65536 [4:12:42<7:12:16, 1.58it/s] 37%|███▋ | 24553/65536 [4:12:43<7:26:30, 1.53it/s] 37%|███▋ | 24554/65536 [4:12:43<7:13:07, 1.58it/s] 37%|███▋ | 24555/65536 [4:12:44<7:08:12, 1.60it/s] 37%|███▋ | 24556/65536 [4:12:45<7:03:10, 1.61it/s] 37%|███▋ | 24557/65536 [4:12:45<7:12:18, 1.58it/s] 37%|███▋ | 24558/65536 [4:12:46<7:11:09, 1.58it/s] 37%|███▋ | 24559/65536 [4:12:47<7:08:16, 1.59it/s] 37%|███▋ | 24560/65536 [4:12:47<7:20:21, 1.55it/s] {'loss': 2.9626, 'learning_rate': 6.644422676625444e-07, 'epoch': 1516.05} + 37%|███▋ | 24560/65536 [4:12:47<7:20:21, 1.55it/s] 37%|███▋ | 24561/65536 [4:12:48<7:11:34, 1.58it/s] 37%|███▋ | 24562/65536 [4:12:49<7:13:38, 1.57it/s] 37%|███▋ | 24563/65536 [4:12:49<7:08:28, 1.59it/s] 37%|███▋ | 24564/65536 [4:12:50<6:53:22, 1.65it/s] 37%|███▋ | 24565/65536 [4:12:50<6:46:24, 1.68it/s] 37%|███▋ | 24566/65536 [4:12:51<6:54:56, 1.65it/s] 37%|███▋ | 24567/65536 [4:12:51<6:54:48, 1.65it/s] 37%|███▋ | 24568/65536 [4:12:52<6:52:41, 1.65it/s] 37%|███▋ | 24569/65536 [4:12:53<6:47:37, 1.68it/s] 37%|███▋ | 24570/65536 [4:12:53<6:52:12, 1.66it/s] 37%|███▋ | 24571/65536 [4:12:54<6:59:16, 1.63it/s] 37%|███▋ | 24572/65536 [4:12:55<6:54:21, 1.65it/s] 37%|███▋ | 24573/65536 [4:12:55<7:08:29, 1.59it/s] 37%|███▋ | 24574/65536 [4:12:56<7:11:43, 1.58it/s] 37%|███▋ | 24575/65536 [4:12:56<7:01:37, 1.62it/s] 38%|███▊ | 24576/65536 [4:12:57<7:19:25, 1.55it/s] 38%|███▊ | 24577/65536 [4:12:58<7:25:01, 1.53it/s] 38%|███▊ | 24578/65536 [4:12:58<7:11:27, 1.58it/s] 38%|███▊ | 24579/65536 [4:12:59<7:08:14, 1.59it/s] 38%|███▊ | 24580/65536 [4:13:00<7:13:15, 1.58it/s] {'loss': 3.0035, 'learning_rate': 6.641667687033182e-07, 'epoch': 1517.28} + 38%|███▊ | 24580/65536 [4:13:00<7:13:15, 1.58it/s] 38%|███▊ | 24581/65536 [4:13:00<7:07:14, 1.60it/s] 38%|███▊ | 24582/65536 [4:13:01<7:00:03, 1.62it/s] 38%|███▊ | 24583/65536 [4:13:01<6:57:48, 1.63it/s] 38%|███▊ | 24584/65536 [4:13:02<6:58:51, 1.63it/s] 38%|███▊ | 24585/65536 [4:13:03<6:58:36, 1.63it/s] 38%|███▊ | 24586/65536 [4:13:03<6:50:26, 1.66it/s] 38%|███▊ | 24587/65536 [4:13:04<7:01:12, 1.62it/s] 38%|███▊ | 24588/65536 [4:13:05<7:05:41, 1.60it/s] 38%|███▊ | 24589/65536 [4:13:05<6:59:46, 1.63it/s] 38%|███▊ | 24590/65536 [4:13:06<7:01:59, 1.62it/s] 38%|███▊ | 24591/65536 [4:13:06<6:57:53, 1.63it/s] 38%|███▊ | 24592/65536 [4:13:07<7:06:33, 1.60it/s] 38%|███▊ | 24593/65536 [4:13:08<7:10:51, 1.58it/s] 38%|███▊ | 24594/65536 [4:13:08<7:14:18, 1.57it/s] 38%|███▊ | 24595/65536 [4:13:09<7:03:04, 1.61it/s] 38%|███▊ | 24596/65536 [4:13:09<6:57:15, 1.64it/s] 38%|███▊ | 24597/65536 [4:13:10<7:03:58, 1.61it/s] 38%|███▊ | 24598/65536 [4:13:11<7:05:24, 1.60it/s] 38%|███▊ | 24599/65536 [4:13:11<6:58:51, 1.63it/s] 38%|███▊ | 24600/65536 [4:13:12<7:08:28, 1.59it/s] {'loss': 2.9803, 'learning_rate': 6.638912697440921e-07, 'epoch': 1518.52} + 38%|███▊ | 24600/65536 [4:13:12<7:08:28, 1.59it/s] 38%|███▊ | 24601/65536 [4:13:13<6:57:27, 1.63it/s] 38%|███▊ | 24602/65536 [4:13:13<7:02:18, 1.62it/s] 38%|███▊ | 24603/65536 [4:13:14<6:51:14, 1.66it/s] 38%|███▊ | 24604/65536 [4:13:14<6:52:51, 1.65it/s] 38%|███▊ | 24605/65536 [4:13:15<6:46:52, 1.68it/s] 38%|███▊ | 24606/65536 [4:13:16<6:46:59, 1.68it/s] 38%|███▊ | 24607/65536 [4:13:16<6:40:19, 1.70it/s] 38%|███▊ | 24608/65536 [4:13:17<6:57:22, 1.63it/s] 38%|███▊ | 24609/65536 [4:13:17<7:00:12, 1.62it/s] 38%|███▊ | 24610/65536 [4:13:18<6:54:02, 1.65it/s] 38%|███▊ | 24611/65536 [4:13:19<6:52:35, 1.65it/s] 38%|███▊ | 24612/65536 [4:13:19<6:55:48, 1.64it/s] 38%|███▊ | 24613/65536 [4:13:20<7:01:12, 1.62it/s] 38%|███▊ | 24614/65536 [4:13:21<7:07:52, 1.59it/s] 38%|███▊ | 24615/65536 [4:13:21<7:04:31, 1.61it/s] 38%|███▊ | 24616/65536 [4:13:22<6:59:23, 1.63it/s] 38%|███▊ | 24617/65536 [4:13:22<7:00:41, 1.62it/s] 38%|███▊ | 24618/65536 [4:13:23<6:59:52, 1.62it/s] 38%|███▊ | 24619/65536 [4:13:24<6:45:01, 1.68it/s] 38%|███▊ | 24620/65536 [4:13:24<6:50:49, 1.66it/s] {'loss': 2.9758, 'learning_rate': 6.636157707848659e-07, 'epoch': 1519.75} + 38%|███▊ | 24620/65536 [4:13:24<6:50:49, 1.66it/s] 38%|███▊ | 24621/65536 [4:13:25<6:53:19, 1.65it/s] 38%|███▊ | 24622/65536 [4:13:25<7:06:14, 1.60it/s] 38%|███▊ | 24623/65536 [4:13:26<7:13:02, 1.57it/s] 38%|███▊ | 24624/65536 [4:13:27<7:02:55, 1.61it/s] 38%|███▊ | 24625/65536 [4:13:27<7:16:33, 1.56it/s] 38%|███▊ | 24626/65536 [4:13:28<7:13:45, 1.57it/s] 38%|███▊ | 24627/65536 [4:13:29<7:04:39, 1.61it/s] 38%|███▊ | 24628/65536 [4:13:29<6:57:15, 1.63it/s] 38%|███▊ | 24629/65536 [4:13:30<6:52:51, 1.65it/s] 38%|███▊ | 24630/65536 [4:13:30<7:10:14, 1.58it/s] 38%|███▊ | 24631/65536 [4:13:31<6:56:49, 1.64it/s] 38%|███▊ | 24632/65536 [4:13:32<6:55:49, 1.64it/s] 38%|███▊ | 24633/65536 [4:13:32<6:55:17, 1.64it/s] 38%|███▊ | 24634/65536 [4:13:33<6:51:11, 1.66it/s] 38%|███▊ | 24635/65536 [4:13:33<6:55:48, 1.64it/s] 38%|███▊ | 24636/65536 [4:13:34<7:04:09, 1.61it/s] 38%|███▊ | 24637/65536 [4:13:35<6:58:51, 1.63it/s] 38%|███▊ | 24638/65536 [4:13:35<7:05:00, 1.60it/s] 38%|███▊ | 24639/65536 [4:13:36<7:00:27, 1.62it/s] 38%|███▊ | 24640/65536 [4:13:37<6:55:12, 1.64it/s] {'loss': 2.9671, 'learning_rate': 6.633402718256397e-07, 'epoch': 1520.99} + 38%|███▊ | 24640/65536 [4:13:37<6:55:12, 1.64it/s] 38%|███▊ | 24641/65536 [4:13:37<7:17:12, 1.56it/s] 38%|███▊ | 24642/65536 [4:13:38<7:14:10, 1.57it/s] 38%|███▊ | 24643/65536 [4:13:38<7:02:44, 1.61it/s] 38%|███▊ | 24644/65536 [4:13:39<6:57:33, 1.63it/s] 38%|███▊ | 24645/65536 [4:13:40<7:10:35, 1.58it/s] 38%|███▊ | 24646/65536 [4:13:40<7:23:15, 1.54it/s] 38%|███▊ | 24647/65536 [4:13:41<7:16:20, 1.56it/s] 38%|███▊ | 24648/65536 [4:13:42<7:07:19, 1.59it/s] 38%|███▊ | 24649/65536 [4:13:42<7:00:34, 1.62it/s] 38%|███▊ | 24650/65536 [4:13:43<6:55:27, 1.64it/s] 38%|███▊ | 24651/65536 [4:13:43<6:54:18, 1.64it/s] 38%|███▊ | 24652/65536 [4:13:44<6:54:25, 1.64it/s] 38%|███▊ | 24653/65536 [4:13:45<6:55:08, 1.64it/s] 38%|███▊ | 24654/65536 [4:13:45<6:54:38, 1.64it/s] 38%|███▊ | 24655/65536 [4:13:46<6:48:25, 1.67it/s] 38%|███▊ | 24656/65536 [4:13:46<6:41:56, 1.70it/s] 38%|███▊ | 24657/65536 [4:13:47<6:56:38, 1.64it/s] 38%|███▊ | 24658/65536 [4:13:48<7:00:42, 1.62it/s] 38%|███▊ | 24659/65536 [4:13:48<7:01:01, 1.62it/s] 38%|███▊ | 24660/65536 [4:13:49<7:07:54, 1.59it/s] {'loss': 3.0017, 'learning_rate': 6.630647728664134e-07, 'epoch': 1522.22} + 38%|███▊ | 24660/65536 [4:13:49<7:07:54, 1.59it/s] 38%|███▊ | 24661/65536 [4:13:50<7:03:45, 1.61it/s] 38%|███▊ | 24662/65536 [4:13:50<6:56:07, 1.64it/s] 38%|███▊ | 24663/65536 [4:13:51<6:51:15, 1.66it/s] 38%|███▊ | 24664/65536 [4:13:51<6:49:29, 1.66it/s] 38%|███▊ | 24665/65536 [4:13:52<6:38:43, 1.71it/s] 38%|███▊ | 24666/65536 [4:13:53<7:01:09, 1.62it/s] 38%|███▊ | 24667/65536 [4:13:53<6:52:07, 1.65it/s] 38%|███▊ | 24668/65536 [4:13:54<6:54:02, 1.65it/s] 38%|███▊ | 24669/65536 [4:13:54<6:48:29, 1.67it/s] 38%|███▊ | 24670/65536 [4:13:55<7:02:24, 1.61it/s] 38%|███▊ | 24671/65536 [4:13:56<7:01:45, 1.61it/s] 38%|███▊ | 24672/65536 [4:13:56<6:57:57, 1.63it/s] 38%|███▊ | 24673/65536 [4:13:57<7:06:43, 1.60it/s] 38%|███▊ | 24674/65536 [4:13:57<6:59:27, 1.62it/s] 38%|███▊ | 24675/65536 [4:13:58<7:17:02, 1.56it/s] 38%|███▊ | 24676/65536 [4:13:59<7:15:17, 1.56it/s] 38%|███▊ | 24677/65536 [4:13:59<7:13:23, 1.57it/s] 38%|███▊ | 24678/65536 [4:14:00<6:59:48, 1.62it/s] 38%|███▊ | 24679/65536 [4:14:01<6:51:57, 1.65it/s] 38%|███▊ | 24680/65536 [4:14:01<6:50:19, 1.66it/s] {'loss': 2.9997, 'learning_rate': 6.627892739071874e-07, 'epoch': 1523.46} + 38%|███▊ | 24680/65536 [4:14:01<6:50:19, 1.66it/s] 38%|███▊ | 24681/65536 [4:14:02<6:50:45, 1.66it/s] 38%|███▊ | 24682/65536 [4:14:02<6:57:12, 1.63it/s] 38%|███▊ | 24683/65536 [4:14:03<7:09:44, 1.58it/s] 38%|███▊ | 24684/65536 [4:14:04<6:59:48, 1.62it/s] 38%|███▊ | 24685/65536 [4:14:04<7:05:00, 1.60it/s] 38%|███▊ | 24686/65536 [4:14:05<7:00:25, 1.62it/s] 38%|███▊ | 24687/65536 [4:14:06<6:55:33, 1.64it/s] 38%|███▊ | 24688/65536 [4:14:06<6:59:11, 1.62it/s] 38%|███▊ | 24689/65536 [4:14:07<7:12:38, 1.57it/s] 38%|███▊ | 24690/65536 [4:14:07<7:10:06, 1.58it/s] 38%|███▊ | 24691/65536 [4:14:08<6:59:24, 1.62it/s] 38%|███▊ | 24692/65536 [4:14:09<7:04:39, 1.60it/s] 38%|███▊ | 24693/65536 [4:14:09<7:01:54, 1.61it/s] 38%|███▊ | 24694/65536 [4:14:10<7:05:41, 1.60it/s] 38%|███▊ | 24695/65536 [4:14:11<6:58:47, 1.63it/s] 38%|███▊ | 24696/65536 [4:14:11<6:57:06, 1.63it/s] 38%|███▊ | 24697/65536 [4:14:12<6:58:44, 1.63it/s] 38%|███▊ | 24698/65536 [4:14:12<6:52:52, 1.65it/s] 38%|███▊ | 24699/65536 [4:14:13<7:00:20, 1.62it/s] 38%|███▊ | 24700/65536 [4:14:14<7:04:12, 1.60it/s] {'loss': 2.9376, 'learning_rate': 6.625137749479613e-07, 'epoch': 1524.69} + 38%|███▊ | 24700/65536 [4:14:14<7:04:12, 1.60it/s] 38%|███▊ | 24701/65536 [4:14:14<6:57:48, 1.63it/s] 38%|███▊ | 24702/65536 [4:14:15<6:56:05, 1.64it/s] 38%|███▊ | 24703/65536 [4:14:15<7:01:47, 1.61it/s] 38%|███▊ | 24704/65536 [4:14:16<7:01:36, 1.61it/s] 38%|███▊ | 24705/65536 [4:14:17<7:03:20, 1.61it/s] 38%|███▊ | 24706/65536 [4:14:17<7:22:12, 1.54it/s] 38%|███▊ | 24707/65536 [4:14:18<7:11:38, 1.58it/s] 38%|███▊ | 24708/65536 [4:14:19<7:03:18, 1.61it/s] 38%|███▊ | 24709/65536 [4:14:19<6:55:14, 1.64it/s] 38%|███▊ | 24710/65536 [4:14:20<6:53:57, 1.64it/s] 38%|███▊ | 24711/65536 [4:14:20<6:53:13, 1.65it/s] 38%|███▊ | 24712/65536 [4:14:21<7:02:10, 1.61it/s] 38%|███▊ | 24713/65536 [4:14:22<7:03:06, 1.61it/s] 38%|███▊ | 24714/65536 [4:14:22<7:01:54, 1.61it/s] 38%|███▊ | 24715/65536 [4:14:23<7:05:37, 1.60it/s] 38%|███▊ | 24716/65536 [4:14:24<7:10:45, 1.58it/s] 38%|███▊ | 24717/65536 [4:14:24<7:05:42, 1.60it/s] 38%|███▊ | 24718/65536 [4:14:25<7:02:23, 1.61it/s] 38%|███▊ | 24719/65536 [4:14:25<7:01:43, 1.61it/s] 38%|███▊ | 24720/65536 [4:14:26<7:00:41, 1.62it/s] {'loss': 2.9338, 'learning_rate': 6.622382759887351e-07, 'epoch': 1525.93} + 38%|███▊ | 24720/65536 [4:14:26<7:00:41, 1.62it/s] 38%|███▊ | 24721/65536 [4:14:27<6:53:42, 1.64it/s] 38%|███▊ | 24722/65536 [4:14:27<7:05:19, 1.60it/s] 38%|███▊ | 24723/65536 [4:14:28<7:12:48, 1.57it/s] 38%|███▊ | 24724/65536 [4:14:29<7:04:40, 1.60it/s] 38%|███▊ | 24725/65536 [4:14:29<6:57:02, 1.63it/s] 38%|███▊ | 24726/65536 [4:14:30<7:04:42, 1.60it/s] 38%|███▊ | 24727/65536 [4:14:30<7:05:43, 1.60it/s] 38%|███▊ | 24728/65536 [4:14:31<7:05:09, 1.60it/s] 38%|███▊ | 24729/65536 [4:14:32<7:05:40, 1.60it/s] 38%|███▊ | 24730/65536 [4:14:32<7:01:08, 1.61it/s] 38%|███▊ | 24731/65536 [4:14:33<6:51:41, 1.65it/s] 38%|███▊ | 24732/65536 [4:14:33<6:39:27, 1.70it/s] 38%|███▊ | 24733/65536 [4:14:34<6:52:03, 1.65it/s] 38%|███▊ | 24734/65536 [4:14:35<6:47:57, 1.67it/s] 38%|███▊ | 24735/65536 [4:14:35<6:47:10, 1.67it/s] 38%|███▊ | 24736/65536 [4:14:36<6:46:21, 1.67it/s] 38%|███▊ | 24737/65536 [4:14:36<6:40:47, 1.70it/s] 38%|███▊ | 24738/65536 [4:14:37<6:59:29, 1.62it/s] 38%|███▊ | 24739/65536 [4:14:38<7:04:22, 1.60it/s] 38%|███▊ | 24740/65536 [4:14:38<6:54:32, 1.64it/s] {'loss': 3.0424, 'learning_rate': 6.61962777029509e-07, 'epoch': 1527.16} + 38%|███▊ | 24740/65536 [4:14:38<6:54:32, 1.64it/s] 38%|███▊ | 24741/65536 [4:14:39<6:56:49, 1.63it/s] 38%|███▊ | 24742/65536 [4:14:40<7:03:58, 1.60it/s] 38%|███▊ | 24743/65536 [4:14:40<7:12:20, 1.57it/s] 38%|███▊ | 24744/65536 [4:14:41<7:16:00, 1.56it/s] 38%|███▊ | 24745/65536 [4:14:41<7:10:52, 1.58it/s] 38%|███▊ | 24746/65536 [4:14:42<7:04:30, 1.60it/s] 38%|███▊ | 24747/65536 [4:14:43<6:57:38, 1.63it/s] 38%|███▊ | 24748/65536 [4:14:43<6:54:58, 1.64it/s] 38%|███▊ | 24749/65536 [4:14:44<6:52:58, 1.65it/s] 38%|███▊ | 24750/65536 [4:14:44<6:44:00, 1.68it/s] 38%|███▊ | 24751/65536 [4:14:45<6:38:05, 1.71it/s] 38%|███▊ | 24752/65536 [4:14:46<6:47:21, 1.67it/s] 38%|███▊ | 24753/65536 [4:14:46<6:53:18, 1.64it/s] 38%|███▊ | 24754/65536 [4:14:47<7:18:00, 1.55it/s] 38%|███▊ | 24755/65536 [4:14:48<7:09:09, 1.58it/s] 38%|███▊ | 24756/65536 [4:14:48<7:04:11, 1.60it/s] 38%|███▊ | 24757/65536 [4:14:49<6:55:53, 1.63it/s] 38%|███▊ | 24758/65536 [4:14:49<6:45:12, 1.68it/s] 38%|███▊ | 24759/65536 [4:14:50<6:47:25, 1.67it/s] 38%|███▊ | 24760/65536 [4:14:51<6:56:45, 1.63it/s] {'loss': 3.0168, 'learning_rate': 6.616872780702828e-07, 'epoch': 1528.4} + 38%|███▊ | 24760/65536 [4:14:51<6:56:45, 1.63it/s] 38%|███▊ | 24761/65536 [4:14:51<6:48:10, 1.66it/s] 38%|███▊ | 24762/65536 [4:14:52<7:04:18, 1.60it/s] 38%|███▊ | 24763/65536 [4:14:52<6:52:58, 1.65it/s] 38%|███▊ | 24764/65536 [4:14:53<6:51:37, 1.65it/s] 38%|███▊ | 24765/65536 [4:14:54<6:43:54, 1.68it/s] 38%|███▊ | 24766/65536 [4:14:54<6:52:12, 1.65it/s] 38%|███▊ | 24767/65536 [4:14:55<6:50:05, 1.66it/s] 38%|███▊ | 24768/65536 [4:14:55<7:02:30, 1.61it/s] 38%|███▊ | 24769/65536 [4:14:56<7:07:01, 1.59it/s] 38%|███▊ | 24770/65536 [4:14:57<7:10:22, 1.58it/s] 38%|███▊ | 24771/65536 [4:14:57<7:03:27, 1.60it/s] 38%|███▊ | 24772/65536 [4:14:58<6:59:36, 1.62it/s] 38%|███▊ | 24773/65536 [4:14:59<6:57:11, 1.63it/s] 38%|███▊ | 24774/65536 [4:14:59<6:51:57, 1.65it/s] 38%|███▊ | 24775/65536 [4:15:00<6:46:39, 1.67it/s] 38%|███▊ | 24776/65536 [4:15:00<6:48:20, 1.66it/s] 38%|███▊ | 24777/65536 [4:15:01<6:40:21, 1.70it/s] 38%|███▊ | 24778/65536 [4:15:02<6:59:17, 1.62it/s] 38%|███▊ | 24779/65536 [4:15:02<6:49:42, 1.66it/s] 38%|███▊ | 24780/65536 [4:15:03<6:45:47, 1.67it/s] {'loss': 2.9788, 'learning_rate': 6.614117791110567e-07, 'epoch': 1529.63} + 38%|███▊ | 24780/65536 [4:15:03<6:45:47, 1.67it/s] 38%|███▊ | 24781/65536 [4:15:03<6:54:37, 1.64it/s] 38%|███▊ | 24782/65536 [4:15:04<7:00:36, 1.61it/s] 38%|███▊ | 24783/65536 [4:15:05<6:59:58, 1.62it/s] 38%|███▊ | 24784/65536 [4:15:05<6:50:34, 1.65it/s] 38%|███▊ | 24785/65536 [4:15:06<7:03:57, 1.60it/s] 38%|███▊ | 24786/65536 [4:15:07<7:06:52, 1.59it/s] 38%|███▊ | 24787/65536 [4:15:07<7:12:42, 1.57it/s] 38%|███▊ | 24788/65536 [4:15:08<7:09:38, 1.58it/s] 38%|███▊ | 24789/65536 [4:15:08<7:07:23, 1.59it/s] 38%|███▊ | 24790/65536 [4:15:09<6:52:38, 1.65it/s] 38%|███▊ | 24791/65536 [4:15:10<6:53:39, 1.64it/s] 38%|███▊ | 24792/65536 [4:15:10<6:55:16, 1.64it/s] 38%|███▊ | 24793/65536 [4:15:11<7:07:22, 1.59it/s] 38%|███▊ | 24794/65536 [4:15:12<7:09:07, 1.58it/s] 38%|███▊ | 24795/65536 [4:15:12<7:08:19, 1.59it/s] 38%|███▊ | 24796/65536 [4:15:13<7:00:26, 1.61it/s] 38%|███▊ | 24797/65536 [4:15:13<6:57:18, 1.63it/s] 38%|███▊ | 24798/65536 [4:15:14<7:02:44, 1.61it/s] 38%|███▊ | 24799/65536 [4:15:15<7:03:58, 1.60it/s] 38%|███▊ | 24800/65536 [4:15:15<6:56:35, 1.63it/s] {'loss': 2.968, 'learning_rate': 6.611362801518306e-07, 'epoch': 1530.86} + 38%|███▊ | 24800/65536 [4:15:15<6:56:35, 1.63it/s] 38%|███▊ | 24801/65536 [4:15:16<6:50:13, 1.65it/s] 38%|███▊ | 24802/65536 [4:15:16<6:40:26, 1.70it/s] 38%|███▊ | 24803/65536 [4:15:17<6:53:54, 1.64it/s] 38%|███▊ | 24804/65536 [4:15:18<7:02:06, 1.61it/s] 38%|███▊ | 24805/65536 [4:15:18<7:02:16, 1.61it/s] 38%|███▊ | 24806/65536 [4:15:19<6:52:11, 1.65it/s] 38%|███▊ | 24807/65536 [4:15:19<6:50:10, 1.65it/s] 38%|███▊ | 24808/65536 [4:15:20<6:52:59, 1.64it/s] 38%|███▊ | 24809/65536 [4:15:21<6:54:03, 1.64it/s] 38%|███▊ | 24810/65536 [4:15:21<7:08:17, 1.58it/s] 38%|███▊ | 24811/65536 [4:15:22<6:58:07, 1.62it/s] 38%|███▊ | 24812/65536 [4:15:23<6:54:06, 1.64it/s] 38%|███▊ | 24813/65536 [4:15:23<6:59:11, 1.62it/s] 38%|███▊ | 24814/65536 [4:15:24<7:00:26, 1.61it/s] 38%|███▊ | 24815/65536 [4:15:24<6:51:41, 1.65it/s] 38%|███▊ | 24816/65536 [4:15:25<7:00:57, 1.61it/s] 38%|███▊ | 24817/65536 [4:15:26<6:59:40, 1.62it/s] 38%|███▊ | 24818/65536 [4:15:26<6:50:47, 1.65it/s] 38%|███▊ | 24819/65536 [4:15:27<6:59:11, 1.62it/s] 38%|███▊ | 24820/65536 [4:15:27<6:59:17, 1.62it/s] {'loss': 3.0261, 'learning_rate': 6.608607811926044e-07, 'epoch': 1532.1} + 38%|███▊ | 24820/65536 [4:15:27<6:59:17, 1.62it/s] 38%|███▊ | 24821/65536 [4:15:28<6:59:58, 1.62it/s] 38%|███▊ | 24822/65536 [4:15:29<6:51:10, 1.65it/s] 38%|███▊ | 24823/65536 [4:15:29<6:48:34, 1.66it/s] 38%|███▊ | 24824/65536 [4:15:30<6:48:42, 1.66it/s] 38%|███▊ | 24825/65536 [4:15:30<6:48:15, 1.66it/s] 38%|███▊ | 24826/65536 [4:15:31<6:46:23, 1.67it/s] 38%|███▊ | 24827/65536 [4:15:32<6:52:01, 1.65it/s] 38%|███▊ | 24828/65536 [4:15:32<6:52:58, 1.64it/s] 38%|███▊ | 24829/65536 [4:15:33<6:49:34, 1.66it/s] 38%|███▊ | 24830/65536 [4:15:34<6:59:07, 1.62it/s] 38%|███▊ | 24831/65536 [4:15:34<6:52:00, 1.65it/s] 38%|███▊ | 24832/65536 [4:15:35<6:51:35, 1.65it/s] 38%|███▊ | 24833/65536 [4:15:35<6:59:40, 1.62it/s] 38%|███▊ | 24834/65536 [4:15:36<6:55:35, 1.63it/s] 38%|███▊ | 24835/65536 [4:15:37<7:07:43, 1.59it/s] 38%|███▊ | 24836/65536 [4:15:37<6:57:25, 1.63it/s] 38%|███▊ | 24837/65536 [4:15:38<6:52:56, 1.64it/s] 38%|███▊ | 24838/65536 [4:15:38<6:46:24, 1.67it/s] 38%|███▊ | 24839/65536 [4:15:39<6:47:07, 1.67it/s] 38%|███▊ | 24840/65536 [4:15:40<6:38:22, 1.70it/s] {'loss': 3.0065, 'learning_rate': 6.605852822333782e-07, 'epoch': 1533.33} + 38%|███▊ | 24840/65536 [4:15:40<6:38:22, 1.70it/s] 38%|███▊ | 24841/65536 [4:15:40<6:44:36, 1.68it/s] 38%|███▊ | 24842/65536 [4:15:41<6:53:29, 1.64it/s] 38%|███▊ | 24843/65536 [4:15:41<6:53:30, 1.64it/s] 38%|███▊ | 24844/65536 [4:15:42<6:48:39, 1.66it/s] 38%|███▊ | 24845/65536 [4:15:43<6:46:58, 1.67it/s] 38%|███▊ | 24846/65536 [4:15:43<6:51:09, 1.65it/s] 38%|███▊ | 24847/65536 [4:15:44<6:58:50, 1.62it/s] 38%|███▊ | 24848/65536 [4:15:45<7:05:03, 1.60it/s] 38%|███▊ | 24849/65536 [4:15:45<6:55:34, 1.63it/s] 38%|███▊ | 24850/65536 [4:15:46<7:08:19, 1.58it/s] 38%|███▊ | 24851/65536 [4:15:46<7:29:19, 1.51it/s] 38%|███▊ | 24852/65536 [4:15:47<7:22:49, 1.53it/s] 38%|███▊ | 24853/65536 [4:15:48<7:17:21, 1.55it/s] 38%|███▊ | 24854/65536 [4:15:48<7:06:10, 1.59it/s] 38%|███▊ | 24855/65536 [4:15:49<7:05:05, 1.59it/s] 38%|███▊ | 24856/65536 [4:15:50<6:59:36, 1.62it/s] 38%|███▊ | 24857/65536 [4:15:50<7:04:01, 1.60it/s] 38%|███▊ | 24858/65536 [4:15:51<7:03:29, 1.60it/s] 38%|███▊ | 24859/65536 [4:15:51<6:55:55, 1.63it/s] 38%|███▊ | 24860/65536 [4:15:52<6:54:51, 1.63it/s] {'loss': 2.9672, 'learning_rate': 6.60309783274152e-07, 'epoch': 1534.57} + 38%|███▊ | 24860/65536 [4:15:52<6:54:51, 1.63it/s] 38%|███▊ | 24861/65536 [4:15:53<7:00:07, 1.61it/s] 38%|███▊ | 24862/65536 [4:15:53<7:00:07, 1.61it/s] 38%|███▊ | 24863/65536 [4:15:54<6:55:26, 1.63it/s] 38%|███▊ | 24864/65536 [4:15:54<6:55:41, 1.63it/s] 38%|███▊ | 24865/65536 [4:15:55<6:55:38, 1.63it/s] 38%|███▊ | 24866/65536 [4:15:56<6:57:46, 1.62it/s] 38%|███▊ | 24867/65536 [4:15:56<6:49:33, 1.65it/s] 38%|███▊ | 24868/65536 [4:15:57<7:01:31, 1.61it/s] 38%|███▊ | 24869/65536 [4:15:58<6:59:53, 1.61it/s] 38%|███▊ | 24870/65536 [4:15:58<6:54:45, 1.63it/s] 38%|███▊ | 24871/65536 [4:15:59<6:52:49, 1.64it/s] 38%|███▊ | 24872/65536 [4:15:59<6:55:06, 1.63it/s] 38%|███▊ | 24873/65536 [4:16:00<6:58:50, 1.62it/s] 38%|███▊ | 24874/65536 [4:16:01<6:56:38, 1.63it/s] 38%|███▊ | 24875/65536 [4:16:01<6:57:11, 1.62it/s] 38%|███▊ | 24876/65536 [4:16:02<7:08:17, 1.58it/s] 38%|███▊ | 24877/65536 [4:16:03<7:00:56, 1.61it/s] 38%|███▊ | 24878/65536 [4:16:03<6:50:46, 1.65it/s] 38%|███▊ | 24879/65536 [4:16:04<6:53:28, 1.64it/s] 38%|███▊ | 24880/65536 [4:16:04<6:43:11, 1.68it/s] {'loss': 2.9691, 'learning_rate': 6.600342843149259e-07, 'epoch': 1535.8} + 38%|███▊ | 24880/65536 [4:16:04<6:43:11, 1.68it/s] 38%|███▊ | 24881/65536 [4:16:05<6:42:56, 1.68it/s] 38%|███▊ | 24882/65536 [4:16:06<6:54:10, 1.64it/s] 38%|███▊ | 24883/65536 [4:16:06<7:00:51, 1.61it/s] 38%|███▊ | 24884/65536 [4:16:07<7:10:48, 1.57it/s] 38%|███▊ | 24885/65536 [4:16:07<7:00:38, 1.61it/s] 38%|███▊ | 24886/65536 [4:16:08<6:54:17, 1.64it/s] 38%|███▊ | 24887/65536 [4:16:09<6:49:19, 1.66it/s] 38%|███▊ | 24888/65536 [4:16:09<6:47:49, 1.66it/s] 38%|███▊ | 24889/65536 [4:16:10<6:50:32, 1.65it/s] 38%|███▊ | 24890/65536 [4:16:10<6:54:07, 1.64it/s] 38%|███▊ | 24891/65536 [4:16:11<6:50:12, 1.65it/s] 38%|███▊ | 24892/65536 [4:16:12<6:58:04, 1.62it/s] 38%|███▊ | 24893/65536 [4:16:12<6:44:40, 1.67it/s] 38%|███▊ | 24894/65536 [4:16:13<6:43:57, 1.68it/s] 38%|███▊ | 24895/65536 [4:16:13<6:41:29, 1.69it/s] 38%|███▊ | 24896/65536 [4:16:14<6:35:39, 1.71it/s] 38%|███▊ | 24897/65536 [4:16:15<6:52:35, 1.64it/s] 38%|███▊ | 24898/65536 [4:16:15<7:11:54, 1.57it/s] 38%|███▊ | 24899/65536 [4:16:16<7:10:19, 1.57it/s] 38%|███▊ | 24900/65536 [4:16:17<7:25:29, 1.52it/s] {'loss': 2.9562, 'learning_rate': 6.597587853556996e-07, 'epoch': 1537.04} + 38%|███▊ | 24900/65536 [4:16:17<7:25:29, 1.52it/s] 38%|███▊ | 24901/65536 [4:16:17<7:10:00, 1.57it/s] 38%|███▊ | 24902/65536 [4:16:18<6:57:43, 1.62it/s] 38%|███▊ | 24903/65536 [4:16:18<6:57:32, 1.62it/s] 38%|███▊ | 24904/65536 [4:16:19<6:53:52, 1.64it/s] 38%|███▊ | 24905/65536 [4:16:20<6:48:50, 1.66it/s] 38%|███▊ | 24906/65536 [4:16:20<6:54:21, 1.63it/s] 38%|███▊ | 24907/65536 [4:16:21<6:47:08, 1.66it/s] 38%|███▊ | 24908/65536 [4:16:21<6:46:31, 1.67it/s] 38%|███▊ | 24909/65536 [4:16:22<6:46:05, 1.67it/s] 38%|███▊ | 24910/65536 [4:16:23<6:50:09, 1.65it/s] 38%|███▊ | 24911/65536 [4:16:23<6:47:51, 1.66it/s] 38%|███▊ | 24912/65536 [4:16:24<6:46:38, 1.66it/s] 38%|███▊ | 24913/65536 [4:16:25<7:03:18, 1.60it/s] 38%|███▊ | 24914/65536 [4:16:25<7:09:54, 1.57it/s] 38%|███▊ | 24915/65536 [4:16:26<7:04:15, 1.60it/s] 38%|███▊ | 24916/65536 [4:16:26<7:07:33, 1.58it/s] 38%|███▊ | 24917/65536 [4:16:27<7:13:47, 1.56it/s] 38%|███▊ | 24918/65536 [4:16:28<7:11:19, 1.57it/s] 38%|███▊ | 24919/65536 [4:16:28<6:57:19, 1.62it/s] 38%|███▊ | 24920/65536 [4:16:29<6:54:54, 1.63it/s] {'loss': 2.9616, 'learning_rate': 6.594832863964735e-07, 'epoch': 1538.27} + 38%|███▊ | 24920/65536 [4:16:29<6:54:54, 1.63it/s] 38%|███▊ | 24921/65536 [4:16:30<6:54:22, 1.63it/s] 38%|███▊ | 24922/65536 [4:16:30<6:53:48, 1.64it/s] 38%|███▊ | 24923/65536 [4:16:31<6:48:36, 1.66it/s] 38%|███▊ | 24924/65536 [4:16:31<7:01:11, 1.61it/s] 38%|███▊ | 24925/65536 [4:16:32<6:52:41, 1.64it/s] 38%|███▊ | 24926/65536 [4:16:33<6:56:49, 1.62it/s] 38%|███▊ | 24927/65536 [4:16:33<6:54:48, 1.63it/s] 38%|███▊ | 24928/65536 [4:16:34<6:52:43, 1.64it/s] 38%|███▊ | 24929/65536 [4:16:34<6:52:46, 1.64it/s] 38%|███▊ | 24930/65536 [4:16:35<6:46:26, 1.67it/s] 38%|███▊ | 24931/65536 [4:16:36<6:49:07, 1.65it/s] 38%|███▊ | 24932/65536 [4:16:36<7:03:59, 1.60it/s] 38%|███▊ | 24933/65536 [4:16:37<7:10:27, 1.57it/s] 38%|███▊ | 24934/65536 [4:16:38<7:17:47, 1.55it/s] 38%|███▊ | 24935/65536 [4:16:38<7:02:26, 1.60it/s] 38%|███▊ | 24936/65536 [4:16:39<6:48:25, 1.66it/s] 38%|███▊ | 24937/65536 [4:16:39<6:59:35, 1.61it/s] 38%|███▊ | 24938/65536 [4:16:40<6:48:51, 1.65it/s] 38%|███▊ | 24939/65536 [4:16:41<6:48:33, 1.66it/s] 38%|███▊ | 24940/65536 [4:16:41<6:42:31, 1.68it/s] {'loss': 3.0074, 'learning_rate': 6.592077874372474e-07, 'epoch': 1539.51} + 38%|███▊ | 24940/65536 [4:16:41<6:42:31, 1.68it/s] 38%|███▊ | 24941/65536 [4:16:42<6:39:40, 1.69it/s] 38%|███▊ | 24942/65536 [4:16:42<6:47:52, 1.66it/s] 38%|███▊ | 24943/65536 [4:16:43<6:46:00, 1.67it/s] 38%|███▊ | 24944/65536 [4:16:44<6:56:27, 1.62it/s] 38%|███▊ | 24945/65536 [4:16:44<6:48:33, 1.66it/s] 38%|███▊ | 24946/65536 [4:16:45<6:50:52, 1.65it/s] 38%|███▊ | 24947/65536 [4:16:45<6:50:10, 1.65it/s] 38%|███▊ | 24948/65536 [4:16:46<6:51:17, 1.64it/s] 38%|███▊ | 24949/65536 [4:16:47<7:09:18, 1.58it/s] 38%|███▊ | 24950/65536 [4:16:47<7:07:46, 1.58it/s] 38%|███▊ | 24951/65536 [4:16:48<7:06:47, 1.58it/s] 38%|███▊ | 24952/65536 [4:16:49<6:55:41, 1.63it/s] 38%|███▊ | 24953/65536 [4:16:49<6:58:58, 1.61it/s] 38%|███▊ | 24954/65536 [4:16:50<6:48:20, 1.66it/s] 38%|███▊ | 24955/65536 [4:16:50<6:50:41, 1.65it/s] 38%|███▊ | 24956/65536 [4:16:51<6:50:44, 1.65it/s] 38%|███▊ | 24957/65536 [4:16:52<6:50:33, 1.65it/s] 38%|███▊ | 24958/65536 [4:16:52<7:00:54, 1.61it/s] 38%|███▊ | 24959/65536 [4:16:53<6:53:56, 1.63it/s] 38%|███▊ | 24960/65536 [4:16:53<6:45:57, 1.67it/s] {'loss': 2.9765, 'learning_rate': 6.589322884780212e-07, 'epoch': 1540.74} + 38%|███▊ | 24960/65536 [4:16:53<6:45:57, 1.67it/s] 38%|███▊ | 24961/65536 [4:16:54<7:00:46, 1.61it/s] 38%|███▊ | 24962/65536 [4:16:55<6:53:51, 1.63it/s] 38%|███▊ | 24963/65536 [4:16:55<6:47:45, 1.66it/s] 38%|███▊ | 24964/65536 [4:16:56<6:55:55, 1.63it/s] 38%|███▊ | 24965/65536 [4:16:57<7:03:14, 1.60it/s] 38%|███▊ | 24966/65536 [4:16:57<6:55:45, 1.63it/s] 38%|███▊ | 24967/65536 [4:16:58<7:03:40, 1.60it/s] 38%|███▊ | 24968/65536 [4:16:58<6:54:51, 1.63it/s] 38%|███▊ | 24969/65536 [4:16:59<7:05:41, 1.59it/s] 38%|███▊ | 24970/65536 [4:17:00<7:16:11, 1.55it/s] 38%|███▊ | 24971/65536 [4:17:00<7:09:44, 1.57it/s] 38%|███▊ | 24972/65536 [4:17:01<6:57:33, 1.62it/s] 38%|███▊ | 24973/65536 [4:17:01<6:49:45, 1.65it/s] 38%|███▊ | 24974/65536 [4:17:02<6:41:02, 1.69it/s] 38%|███▊ | 24975/65536 [4:17:03<6:54:52, 1.63it/s] 38%|███▊ | 24976/65536 [4:17:03<6:53:36, 1.63it/s] 38%|███▊ | 24977/65536 [4:17:04<6:54:36, 1.63it/s] 38%|███▊ | 24978/65536 [4:17:04<6:51:02, 1.64it/s] 38%|███▊ | 24979/65536 [4:17:05<6:53:56, 1.63it/s] 38%|███▊ | 24980/65536 [4:17:06<6:52:33, 1.64it/s] {'loss': 2.9252, 'learning_rate': 6.586567895187951e-07, 'epoch': 1541.98} + 38%|███▊ | 24980/65536 [4:17:06<6:52:33, 1.64it/s] 38%|███▊ | 24981/65536 [4:17:06<7:09:25, 1.57it/s] 38%|███▊ | 24982/65536 [4:17:07<6:59:07, 1.61it/s] 38%|███▊ | 24983/65536 [4:17:08<7:08:00, 1.58it/s] 38%|███▊ | 24984/65536 [4:17:08<7:04:18, 1.59it/s] 38%|███▊ | 24985/65536 [4:17:09<7:08:20, 1.58it/s] 38%|███▊ | 24986/65536 [4:17:10<7:19:40, 1.54it/s] 38%|███▊ | 24987/65536 [4:17:10<7:07:37, 1.58it/s] 38%|███▊ | 24988/65536 [4:17:11<7:02:29, 1.60it/s] 38%|███▊ | 24989/65536 [4:17:11<6:51:11, 1.64it/s] 38%|███▊ | 24990/65536 [4:17:12<6:42:34, 1.68it/s] 38%|███▊ | 24991/65536 [4:17:13<6:48:02, 1.66it/s] 38%|███▊ | 24992/65536 [4:17:13<6:52:28, 1.64it/s] 38%|███▊ | 24993/65536 [4:17:14<6:46:13, 1.66it/s] 38%|███▊ | 24994/65536 [4:17:14<6:38:41, 1.69it/s] 38%|███▊ | 24995/65536 [4:17:15<6:39:18, 1.69it/s] 38%|███▊ | 24996/65536 [4:17:16<6:45:28, 1.67it/s] 38%|███▊ | 24997/65536 [4:17:16<6:53:42, 1.63it/s] 38%|███▊ | 24998/65536 [4:17:17<7:01:35, 1.60it/s] 38%|███▊ | 24999/65536 [4:17:17<6:51:10, 1.64it/s] 38%|███▊ | 25000/65536 [4:17:18<6:50:54, 1.64it/s] {'loss': 2.9334, 'learning_rate': 6.583812905595689e-07, 'epoch': 1543.21} + 38%|███▊ | 25000/65536 [4:17:18<6:50:54, 1.64it/s] 38%|███▊ | 25001/65536 [4:17:19<6:49:15, 1.65it/s] 38%|███▊ | 25002/65536 [4:17:19<6:46:06, 1.66it/s] 38%|███▊ | 25003/65536 [4:17:20<6:46:36, 1.66it/s] 38%|███▊ | 25004/65536 [4:17:20<6:50:18, 1.65it/s] 38%|███▊ | 25005/65536 [4:17:21<6:56:35, 1.62it/s] 38%|███▊ | 25006/65536 [4:17:22<6:45:42, 1.66it/s] 38%|███▊ | 25007/65536 [4:17:22<6:57:27, 1.62it/s] 38%|███▊ | 25008/65536 [4:17:23<6:55:52, 1.62it/s] 38%|███▊ | 25009/65536 [4:17:23<6:47:33, 1.66it/s] 38%|███▊ | 25010/65536 [4:17:24<6:52:16, 1.64it/s] 38%|███▊ | 25011/65536 [4:17:25<6:53:10, 1.63it/s] 38%|███▊ | 25012/65536 [4:17:25<6:55:54, 1.62it/s] 38%|███▊ | 25013/65536 [4:17:26<7:20:38, 1.53it/s] 38%|███▊ | 25014/65536 [4:17:27<7:06:17, 1.58it/s] 38%|███▊ | 25015/65536 [4:17:27<6:58:09, 1.62it/s] 38%|███▊ | 25016/65536 [4:17:28<6:52:55, 1.64it/s] 38%|███▊ | 25017/65536 [4:17:28<6:55:12, 1.63it/s] 38%|███▊ | 25018/65536 [4:17:29<6:48:32, 1.65it/s] 38%|███▊ | 25019/65536 [4:17:30<6:53:56, 1.63it/s] 38%|███▊ | 25020/65536 [4:17:30<6:57:57, 1.62it/s] {'loss': 2.9412, 'learning_rate': 6.581057916003428e-07, 'epoch': 1544.44} + 38%|███▊ | 25020/65536 [4:17:30<6:57:57, 1.62it/s] 38%|███▊ | 25021/65536 [4:17:31<6:54:39, 1.63it/s] 38%|███▊ | 25022/65536 [4:17:32<7:01:01, 1.60it/s] 38%|███▊ | 25023/65536 [4:17:32<6:57:24, 1.62it/s] 38%|███▊ | 25024/65536 [4:17:33<6:57:50, 1.62it/s] 38%|███▊ | 25025/65536 [4:17:33<6:49:47, 1.65it/s] 38%|███▊ | 25026/65536 [4:17:34<6:50:58, 1.64it/s] 38%|███▊ | 25027/65536 [4:17:35<6:56:19, 1.62it/s] 38%|███▊ | 25028/65536 [4:17:35<6:50:29, 1.64it/s] 38%|███▊ | 25029/65536 [4:17:36<6:49:25, 1.65it/s] 38%|███▊ | 25030/65536 [4:17:37<7:07:29, 1.58it/s] 38%|███▊ | 25031/65536 [4:17:37<6:54:57, 1.63it/s] 38%|███▊ | 25032/65536 [4:17:38<6:53:06, 1.63it/s] 38%|███▊ | 25033/65536 [4:17:38<6:52:33, 1.64it/s] 38%|███▊ | 25034/65536 [4:17:39<7:05:46, 1.59it/s] 38%|███▊ | 25035/65536 [4:17:40<6:54:34, 1.63it/s] 38%|███▊ | 25036/65536 [4:17:40<6:54:53, 1.63it/s] 38%|███▊ | 25037/65536 [4:17:41<6:44:49, 1.67it/s] 38%|███▊ | 25038/65536 [4:17:41<6:55:08, 1.63it/s] 38%|███▊ | 25039/65536 [4:17:42<6:49:35, 1.65it/s] 38%|███▊ | 25040/65536 [4:17:43<6:46:39, 1.66it/s] {'loss': 2.9712, 'learning_rate': 6.578302926411167e-07, 'epoch': 1545.68} + 38%|███▊ | 25040/65536 [4:17:43<6:46:39, 1.66it/s] 38%|███▊ | 25041/65536 [4:17:43<6:51:11, 1.64it/s] 38%|███▊ | 25042/65536 [4:17:44<6:56:22, 1.62it/s] 38%|███▊ | 25043/65536 [4:17:44<6:53:35, 1.63it/s] 38%|███▊ | 25044/65536 [4:17:45<6:53:35, 1.63it/s] 38%|███▊ | 25045/65536 [4:17:46<6:46:52, 1.66it/s] 38%|███▊ | 25046/65536 [4:17:46<6:53:55, 1.63it/s] 38%|███▊ | 25047/65536 [4:17:47<6:45:34, 1.66it/s] 38%|███▊ | 25048/65536 [4:17:47<6:47:10, 1.66it/s] 38%|███▊ | 25049/65536 [4:17:48<6:48:44, 1.65it/s] 38%|███▊ | 25050/65536 [4:17:49<6:45:49, 1.66it/s] 38%|███▊ | 25051/65536 [4:17:49<6:46:41, 1.66it/s] 38%|███▊ | 25052/65536 [4:17:50<7:05:50, 1.58it/s] 38%|███▊ | 25053/65536 [4:17:51<6:55:56, 1.62it/s] 38%|███▊ | 25054/65536 [4:17:51<6:49:01, 1.65it/s] 38%|███▊ | 25055/65536 [4:17:52<6:51:37, 1.64it/s] 38%|███▊ | 25056/65536 [4:17:52<7:11:53, 1.56it/s] 38%|███▊ | 25057/65536 [4:17:53<7:13:04, 1.56it/s] 38%|███▊ | 25058/65536 [4:17:54<7:13:57, 1.55it/s] 38%|███▊ | 25059/65536 [4:17:54<7:06:38, 1.58it/s] 38%|███▊ | 25060/65536 [4:17:55<7:01:07, 1.60it/s] {'loss': 3.0192, 'learning_rate': 6.575547936818905e-07, 'epoch': 1546.91} + 38%|███▊ | 25060/65536 [4:17:55<7:01:07, 1.60it/s] 38%|███▊ | 25061/65536 [4:17:56<6:53:06, 1.63it/s] 38%|███▊ | 25062/65536 [4:17:56<7:07:34, 1.58it/s] 38%|███▊ | 25063/65536 [4:17:57<7:01:18, 1.60it/s] 38%|███▊ | 25064/65536 [4:17:57<6:58:21, 1.61it/s] 38%|███▊ | 25065/65536 [4:17:58<6:52:23, 1.64it/s] 38%|███▊ | 25066/65536 [4:17:59<6:40:57, 1.68it/s] 38%|███▊ | 25067/65536 [4:17:59<6:41:22, 1.68it/s] 38%|███▊ | 25068/65536 [4:18:00<6:46:28, 1.66it/s] 38%|███▊ | 25069/65536 [4:18:00<6:55:11, 1.62it/s] 38%|███▊ | 25070/65536 [4:18:01<6:53:48, 1.63it/s] 38%|███▊ | 25071/65536 [4:18:02<7:10:21, 1.57it/s] 38%|███▊ | 25072/65536 [4:18:02<6:52:42, 1.63it/s] 38%|███▊ | 25073/65536 [4:18:03<6:45:45, 1.66it/s] 38%|███▊ | 25074/65536 [4:18:04<6:54:35, 1.63it/s] 38%|███▊ | 25075/65536 [4:18:04<6:52:50, 1.63it/s] 38%|███▊ | 25076/65536 [4:18:05<6:45:37, 1.66it/s] 38%|███▊ | 25077/65536 [4:18:05<6:43:25, 1.67it/s] 38%|███▊ | 25078/65536 [4:18:06<7:05:49, 1.58it/s] 38%|███▊ | 25079/65536 [4:18:07<7:03:46, 1.59it/s] 38%|███▊ | 25080/65536 [4:18:07<6:50:44, 1.64it/s] {'loss': 3.0106, 'learning_rate': 6.572792947226644e-07, 'epoch': 1548.15} + 38%|███▊ | 25080/65536 [4:18:07<6:50:44, 1.64it/s] 38%|███▊ | 25081/65536 [4:18:08<6:48:48, 1.65it/s] 38%|███▊ | 25082/65536 [4:18:08<6:42:17, 1.68it/s] 38%|███▊ | 25083/65536 [4:18:09<6:49:37, 1.65it/s] 38%|███▊ | 25084/65536 [4:18:10<7:05:16, 1.59it/s] 38%|███▊ | 25085/65536 [4:18:10<6:54:31, 1.63it/s] 38%|███▊ | 25086/65536 [4:18:11<6:48:36, 1.65it/s] 38%|███▊ | 25087/65536 [4:18:11<6:43:20, 1.67it/s] 38%|███▊ | 25088/65536 [4:18:12<6:37:47, 1.69it/s] 38%|███▊ | 25089/65536 [4:18:13<6:45:12, 1.66it/s] 38%|███▊ | 25090/65536 [4:18:13<6:42:18, 1.68it/s] 38%|███▊ | 25091/65536 [4:18:14<6:34:43, 1.71it/s] 38%|███▊ | 25092/65536 [4:18:14<6:40:52, 1.68it/s] 38%|███▊ | 25093/65536 [4:18:15<6:54:11, 1.63it/s] 38%|███▊ | 25094/65536 [4:18:16<7:21:11, 1.53it/s] 38%|███▊ | 25095/65536 [4:18:16<7:10:27, 1.57it/s] 38%|███▊ | 25096/65536 [4:18:17<7:09:34, 1.57it/s] 38%|███▊ | 25097/65536 [4:18:18<7:14:30, 1.55it/s] 38%|███▊ | 25098/65536 [4:18:18<7:02:24, 1.60it/s] 38%|███▊ | 25099/65536 [4:18:19<6:51:30, 1.64it/s] 38%|███▊ | 25100/65536 [4:18:19<6:50:55, 1.64it/s] {'loss': 2.9342, 'learning_rate': 6.570037957634381e-07, 'epoch': 1549.38} + 38%|███▊ | 25100/65536 [4:18:19<6:50:55, 1.64it/s] 38%|███▊ | 25101/65536 [4:18:20<6:41:34, 1.68it/s] 38%|███▊ | 25102/65536 [4:18:21<6:29:13, 1.73it/s] 38%|███▊ | 25103/65536 [4:18:21<6:28:30, 1.73it/s] 38%|███▊ | 25104/65536 [4:18:22<6:33:43, 1.71it/s] 38%|███▊ | 25105/65536 [4:18:22<6:44:33, 1.67it/s] 38%|███▊ | 25106/65536 [4:18:23<6:42:33, 1.67it/s] 38%|███▊ | 25107/65536 [4:18:24<6:47:01, 1.66it/s] 38%|███▊ | 25108/65536 [4:18:24<6:56:17, 1.62it/s] 38%|███▊ | 25109/65536 [4:18:25<7:29:54, 1.50it/s] 38%|███▊ | 25110/65536 [4:18:26<7:32:08, 1.49it/s] 38%|███▊ | 25111/65536 [4:18:26<7:32:18, 1.49it/s] 38%|███▊ | 25112/65536 [4:18:27<7:12:47, 1.56it/s] 38%|███▊ | 25113/65536 [4:18:28<7:06:12, 1.58it/s] 38%|███▊ | 25114/65536 [4:18:28<6:58:27, 1.61it/s] 38%|███▊ | 25115/65536 [4:18:29<7:15:02, 1.55it/s] 38%|███▊ | 25116/65536 [4:18:29<7:05:34, 1.58it/s] 38%|███▊ | 25117/65536 [4:18:30<6:52:05, 1.63it/s] 38%|███▊ | 25118/65536 [4:18:31<6:48:38, 1.65it/s] 38%|███▊ | 25119/65536 [4:18:31<6:44:27, 1.67it/s] 38%|███▊ | 25120/65536 [4:18:32<6:48:31, 1.65it/s] {'loss': 2.9943, 'learning_rate': 6.56728296804212e-07, 'epoch': 1550.62} + 38%|███▊ | 25120/65536 [4:18:32<6:48:31, 1.65it/s] 38%|███▊ | 25121/65536 [4:18:32<6:47:28, 1.65it/s] 38%|███▊ | 25122/65536 [4:18:33<6:47:33, 1.65it/s] 38%|███▊ | 25123/65536 [4:18:34<6:49:47, 1.64it/s] 38%|███▊ | 25124/65536 [4:18:34<6:48:44, 1.65it/s] 38%|███▊ | 25125/65536 [4:18:35<6:58:52, 1.61it/s] 38%|███▊ | 25126/65536 [4:18:36<6:59:02, 1.61it/s] 38%|███▊ | 25127/65536 [4:18:36<7:09:59, 1.57it/s] 38%|███▊ | 25128/65536 [4:18:37<7:05:39, 1.58it/s] 38%|███▊ | 25129/65536 [4:18:38<7:20:34, 1.53it/s] 38%|███▊ | 25130/65536 [4:18:38<7:04:00, 1.59it/s] 38%|███▊ | 25131/65536 [4:18:39<6:51:13, 1.64it/s] 38%|███▊ | 25132/65536 [4:18:39<6:43:50, 1.67it/s] 38%|███▊ | 25133/65536 [4:18:40<6:40:14, 1.68it/s] 38%|███▊ | 25134/65536 [4:18:40<6:49:59, 1.64it/s] 38%|███▊ | 25135/65536 [4:18:41<6:44:15, 1.67it/s] 38%|███▊ | 25136/65536 [4:18:42<6:45:55, 1.66it/s] 38%|███▊ | 25137/65536 [4:18:42<6:57:45, 1.61it/s] 38%|███▊ | 25138/65536 [4:18:43<6:44:01, 1.67it/s] 38%|███▊ | 25139/65536 [4:18:43<6:47:41, 1.65it/s] 38%|███▊ | 25140/65536 [4:18:44<6:49:47, 1.64it/s] {'loss': 2.8885, 'learning_rate': 6.564527978449859e-07, 'epoch': 1551.85} + 38%|███▊ | 25140/65536 [4:18:44<6:49:47, 1.64it/s] 38%|███▊ | 25141/65536 [4:18:45<6:45:13, 1.66it/s] 38%|███▊ | 25142/65536 [4:18:45<6:54:33, 1.62it/s] 38%|███▊ | 25143/65536 [4:18:46<7:11:26, 1.56it/s] 38%|███▊ | 25144/65536 [4:18:47<7:01:57, 1.60it/s] 38%|███▊ | 25145/65536 [4:18:47<6:45:34, 1.66it/s] 38%|███▊ | 25146/65536 [4:18:48<6:44:35, 1.66it/s] 38%|███▊ | 25147/65536 [4:18:48<6:48:50, 1.65it/s] 38%|███▊ | 25148/65536 [4:18:49<6:56:05, 1.62it/s] 38%|███▊ | 25149/65536 [4:18:50<6:53:53, 1.63it/s] 38%|███▊ | 25150/65536 [4:18:50<7:02:48, 1.59it/s] 38%|███▊ | 25151/65536 [4:18:51<6:53:57, 1.63it/s] 38%|███▊ | 25152/65536 [4:18:52<6:56:16, 1.62it/s] 38%|███▊ | 25153/65536 [4:18:52<6:51:11, 1.64it/s] 38%|███▊ | 25154/65536 [4:18:53<6:42:10, 1.67it/s] 38%|███▊ | 25155/65536 [4:18:53<6:38:13, 1.69it/s] 38%|███▊ | 25156/65536 [4:18:54<6:46:57, 1.65it/s] 38%|███▊ | 25157/65536 [4:18:55<6:55:13, 1.62it/s] 38%|███▊ | 25158/65536 [4:18:55<6:48:02, 1.65it/s] 38%|███▊ | 25159/65536 [4:18:56<7:04:20, 1.59it/s] 38%|███▊ | 25160/65536 [4:18:56<6:57:40, 1.61it/s] {'loss': 2.9782, 'learning_rate': 6.561772988857598e-07, 'epoch': 1553.09} + 38%|███▊ | 25160/65536 [4:18:56<6:57:40, 1.61it/s] 38%|███▊ | 25161/65536 [4:18:57<7:05:50, 1.58it/s] 38%|███▊ | 25162/65536 [4:18:58<7:12:59, 1.55it/s] 38%|███▊ | 25163/65536 [4:18:58<7:14:33, 1.55it/s] 38%|███▊ | 25164/65536 [4:18:59<7:05:16, 1.58it/s] 38%|███▊ | 25165/65536 [4:19:00<6:51:40, 1.63it/s] 38%|███▊ | 25166/65536 [4:19:00<6:55:15, 1.62it/s] 38%|███▊ | 25167/65536 [4:19:01<7:00:08, 1.60it/s] 38%|███▊ | 25168/65536 [4:19:01<6:51:56, 1.63it/s] 38%|███▊ | 25169/65536 [4:19:02<6:59:17, 1.60it/s] 38%|███▊ | 25170/65536 [4:19:03<6:58:36, 1.61it/s] 38%|███▊ | 25171/65536 [4:19:03<6:51:14, 1.64it/s] 38%|███▊ | 25172/65536 [4:19:04<6:44:26, 1.66it/s] 38%|███▊ | 25173/65536 [4:19:04<6:42:30, 1.67it/s] 38%|███▊ | 25174/65536 [4:19:05<6:53:12, 1.63it/s] 38%|███▊ | 25175/65536 [4:19:06<6:58:23, 1.61it/s] 38%|███▊ | 25176/65536 [4:19:06<6:50:32, 1.64it/s] 38%|███▊ | 25177/65536 [4:19:07<6:32:19, 1.71it/s] 38%|███▊ | 25178/65536 [4:19:07<6:34:48, 1.70it/s] 38%|███▊ | 25179/65536 [4:19:08<6:57:57, 1.61it/s] 38%|███▊ | 25180/65536 [4:19:09<7:03:40, 1.59it/s] {'loss': 2.9691, 'learning_rate': 6.559017999265336e-07, 'epoch': 1554.32} + 38%|███▊ | 25180/65536 [4:19:09<7:03:40, 1.59it/s] 38%|███▊ | 25181/65536 [4:19:09<7:00:47, 1.60it/s] 38%|███▊ | 25182/65536 [4:19:10<7:07:31, 1.57it/s] 38%|███▊ | 25183/65536 [4:19:11<6:58:01, 1.61it/s] 38%|███▊ | 25184/65536 [4:19:11<6:58:25, 1.61it/s] 38%|███▊ | 25185/65536 [4:19:12<6:53:05, 1.63it/s] 38%|███▊ | 25186/65536 [4:19:12<6:55:17, 1.62it/s] 38%|███▊ | 25187/65536 [4:19:13<6:50:28, 1.64it/s] 38%|███▊ | 25188/65536 [4:19:14<6:41:09, 1.68it/s] 38%|███▊ | 25189/65536 [4:19:14<6:42:54, 1.67it/s] 38%|███▊ | 25190/65536 [4:19:15<6:44:34, 1.66it/s] 38%|███▊ | 25191/65536 [4:19:15<6:49:04, 1.64it/s] 38%|███▊ | 25192/65536 [4:19:16<7:08:24, 1.57it/s] 38%|███▊ | 25193/65536 [4:19:17<7:12:13, 1.56it/s] 38%|███▊ | 25194/65536 [4:19:17<6:59:00, 1.60it/s] 38%|███▊ | 25195/65536 [4:19:18<6:56:24, 1.61it/s] 38%|███▊ | 25196/65536 [4:19:19<6:53:24, 1.63it/s] 38%|███▊ | 25197/65536 [4:19:19<6:53:36, 1.63it/s] 38%|███▊ | 25198/65536 [4:19:20<6:48:37, 1.65it/s] 38%|███▊ | 25199/65536 [4:19:20<6:53:14, 1.63it/s] 38%|███▊ | 25200/65536 [4:19:21<6:52:29, 1.63it/s] {'loss': 2.9399, 'learning_rate': 6.556263009673074e-07, 'epoch': 1555.56} + 38%|███▊ | 25200/65536 [4:19:21<6:52:29, 1.63it/s] 38%|███▊ | 25201/65536 [4:19:22<6:38:29, 1.69it/s] 38%|███▊ | 25202/65536 [4:19:22<6:50:12, 1.64it/s] 38%|███▊ | 25203/65536 [4:19:23<6:45:08, 1.66it/s] 38%|███▊ | 25204/65536 [4:19:23<6:44:54, 1.66it/s] 38%|███▊ | 25205/65536 [4:19:24<6:52:30, 1.63it/s] 38%|███▊ | 25206/65536 [4:19:25<6:37:30, 1.69it/s] 38%|███▊ | 25207/65536 [4:19:25<6:39:25, 1.68it/s] 38%|███▊ | 25208/65536 [4:19:26<7:01:32, 1.59it/s] 38%|███▊ | 25209/65536 [4:19:27<6:56:00, 1.62it/s] 38%|███▊ | 25210/65536 [4:19:27<6:48:05, 1.65it/s] 38%|███▊ | 25211/65536 [4:19:28<6:50:26, 1.64it/s] 38%|███▊ | 25212/65536 [4:19:28<6:58:53, 1.60it/s] 38%|███▊ | 25213/65536 [4:19:29<7:02:46, 1.59it/s] 38%|███▊ | 25214/65536 [4:19:30<7:06:43, 1.57it/s] 38%|███▊ | 25215/65536 [4:19:30<6:56:51, 1.61it/s] 38%|███▊ | 25216/65536 [4:19:31<7:00:22, 1.60it/s] 38%|███▊ | 25217/65536 [4:19:31<6:55:28, 1.62it/s] 38%|███▊ | 25218/65536 [4:19:32<6:53:16, 1.63it/s] 38%|███▊ | 25219/65536 [4:19:33<6:54:50, 1.62it/s] 38%|███▊ | 25220/65536 [4:19:33<6:53:47, 1.62it/s] {'loss': 2.9602, 'learning_rate': 6.553508020080813e-07, 'epoch': 1556.79} + 38%|███▊ | 25220/65536 [4:19:33<6:53:47, 1.62it/s] 38%|███▊ | 25221/65536 [4:19:34<6:54:43, 1.62it/s] 38%|███▊ | 25222/65536 [4:19:35<6:48:30, 1.64it/s] 38%|███▊ | 25223/65536 [4:19:35<6:47:32, 1.65it/s] 38%|███▊ | 25224/65536 [4:19:36<7:03:31, 1.59it/s] 38%|███▊ | 25225/65536 [4:19:36<6:58:53, 1.60it/s] 38%|███▊ | 25226/65536 [4:19:37<6:53:46, 1.62it/s] 38%|███▊ | 25227/65536 [4:19:38<7:05:45, 1.58it/s] 38%|███▊ | 25228/65536 [4:19:38<6:51:55, 1.63it/s] 38%|███▊ | 25229/65536 [4:19:39<6:44:37, 1.66it/s] 38%|███▊ | 25230/65536 [4:19:39<6:43:16, 1.67it/s] 38%|███▊ | 25231/65536 [4:19:40<6:42:54, 1.67it/s] 39%|███▊ | 25232/65536 [4:19:41<6:45:07, 1.66it/s] 39%|███▊ | 25233/65536 [4:19:41<6:50:17, 1.64it/s] 39%|███▊ | 25234/65536 [4:19:42<6:49:18, 1.64it/s] 39%|███▊ | 25235/65536 [4:19:42<6:45:22, 1.66it/s] 39%|███▊ | 25236/65536 [4:19:43<6:50:51, 1.63it/s] 39%|███▊ | 25237/65536 [4:19:44<6:44:19, 1.66it/s] 39%|███▊ | 25238/65536 [4:19:44<6:51:53, 1.63it/s] 39%|███▊ | 25239/65536 [4:19:45<6:52:37, 1.63it/s] 39%|███▊ | 25240/65536 [4:19:46<7:11:15, 1.56it/s] {'loss': 2.9456, 'learning_rate': 6.550753030488551e-07, 'epoch': 1558.02} + 39%|███▊ | 25240/65536 [4:19:46<7:11:15, 1.56it/s] 39%|███▊ | 25241/65536 [4:19:46<7:06:07, 1.58it/s] 39%|███▊ | 25242/65536 [4:19:47<6:55:21, 1.62it/s] 39%|███▊ | 25243/65536 [4:19:47<6:51:32, 1.63it/s] 39%|███▊ | 25244/65536 [4:19:48<6:50:52, 1.63it/s] 39%|███▊ | 25245/65536 [4:19:49<6:52:37, 1.63it/s] 39%|███▊ | 25246/65536 [4:19:49<6:56:47, 1.61it/s] 39%|███▊ | 25247/65536 [4:19:50<6:45:04, 1.66it/s] 39%|███▊ | 25248/65536 [4:19:50<6:40:03, 1.68it/s] 39%|███▊ | 25249/65536 [4:19:51<6:42:37, 1.67it/s] 39%|███▊ | 25250/65536 [4:19:52<6:42:54, 1.67it/s] 39%|███▊ | 25251/65536 [4:19:52<6:38:59, 1.68it/s] 39%|███▊ | 25252/65536 [4:19:53<6:50:51, 1.63it/s] 39%|███▊ | 25253/65536 [4:19:54<6:58:15, 1.61it/s] 39%|███▊ | 25254/65536 [4:19:54<6:46:52, 1.65it/s] 39%|███▊ | 25255/65536 [4:19:55<6:44:45, 1.66it/s] 39%|███▊ | 25256/65536 [4:19:55<6:59:51, 1.60it/s] 39%|███▊ | 25257/65536 [4:19:56<6:55:14, 1.62it/s] 39%|███▊ | 25258/65536 [4:19:57<6:47:47, 1.65it/s] 39%|███▊ | 25259/65536 [4:19:57<6:59:41, 1.60it/s] 39%|███▊ | 25260/65536 [4:19:58<7:02:58, 1.59it/s] {'loss': 3.0105, 'learning_rate': 6.54799804089629e-07, 'epoch': 1559.26} + 39%|███▊ | 25260/65536 [4:19:58<7:02:58, 1.59it/s] 39%|███▊ | 25261/65536 [4:19:59<7:04:49, 1.58it/s] 39%|███▊ | 25262/65536 [4:19:59<7:01:26, 1.59it/s] 39%|███▊ | 25263/65536 [4:20:00<6:58:56, 1.60it/s] 39%|███▊ | 25264/65536 [4:20:00<6:50:18, 1.64it/s] 39%|███▊ | 25265/65536 [4:20:01<6:47:11, 1.65it/s] 39%|███▊ | 25266/65536 [4:20:02<6:43:08, 1.66it/s] 39%|██��▊ | 25267/65536 [4:20:02<6:40:11, 1.68it/s] 39%|███▊ | 25268/65536 [4:20:03<6:42:24, 1.67it/s] 39%|███▊ | 25269/65536 [4:20:03<6:44:28, 1.66it/s] 39%|███▊ | 25270/65536 [4:20:04<7:03:19, 1.59it/s] 39%|███▊ | 25271/65536 [4:20:05<6:53:09, 1.62it/s] 39%|███▊ | 25272/65536 [4:20:05<6:47:26, 1.65it/s] 39%|███▊ | 25273/65536 [4:20:06<7:17:28, 1.53it/s] 39%|███▊ | 25274/65536 [4:20:07<7:12:32, 1.55it/s] 39%|███▊ | 25275/65536 [4:20:07<6:57:03, 1.61it/s] 39%|███▊ | 25276/65536 [4:20:08<7:03:04, 1.59it/s] 39%|███▊ | 25277/65536 [4:20:08<7:02:07, 1.59it/s] 39%|███▊ | 25278/65536 [4:20:09<7:01:02, 1.59it/s] 39%|███▊ | 25279/65536 [4:20:10<6:55:53, 1.61it/s] 39%|███▊ | 25280/65536 [4:20:10<6:55:26, 1.61it/s] {'loss': 2.9561, 'learning_rate': 6.545243051304027e-07, 'epoch': 1560.49} + 39%|███▊ | 25280/65536 [4:20:10<6:55:26, 1.61it/s] 39%|███▊ | 25281/65536 [4:20:11<6:51:45, 1.63it/s] 39%|███▊ | 25282/65536 [4:20:11<6:53:59, 1.62it/s] 39%|███▊ | 25283/65536 [4:20:12<6:56:04, 1.61it/s] 39%|███▊ | 25284/65536 [4:20:13<6:52:45, 1.63it/s] 39%|███▊ | 25285/65536 [4:20:13<6:42:48, 1.67it/s] 39%|███▊ | 25286/65536 [4:20:14<6:48:08, 1.64it/s] 39%|███▊ | 25287/65536 [4:20:14<6:39:13, 1.68it/s] 39%|███▊ | 25288/65536 [4:20:15<6:28:34, 1.73it/s] 39%|███▊ | 25289/65536 [4:20:16<7:03:35, 1.58it/s] 39%|███▊ | 25290/65536 [4:20:16<6:58:27, 1.60it/s] 39%|███▊ | 25291/65536 [4:20:17<6:53:07, 1.62it/s] 39%|███▊ | 25292/65536 [4:20:18<6:59:46, 1.60it/s] 39%|███▊ | 25293/65536 [4:20:18<6:51:01, 1.63it/s] 39%|███▊ | 25294/65536 [4:20:19<6:49:21, 1.64it/s] 39%|███▊ | 25295/65536 [4:20:19<6:58:31, 1.60it/s] 39%|███▊ | 25296/65536 [4:20:20<6:53:34, 1.62it/s] 39%|███▊ | 25297/65536 [4:20:21<6:41:09, 1.67it/s] 39%|███▊ | 25298/65536 [4:20:21<6:48:23, 1.64it/s] 39%|███▊ | 25299/65536 [4:20:22<6:45:31, 1.65it/s] 39%|███▊ | 25300/65536 [4:20:22<6:47:46, 1.64it/s] {'loss': 2.9641, 'learning_rate': 6.542488061711767e-07, 'epoch': 1561.73} + 39%|███▊ | 25300/65536 [4:20:22<6:47:46, 1.64it/s] 39%|███▊ | 25301/65536 [4:20:23<6:43:40, 1.66it/s] 39%|███▊ | 25302/65536 [4:20:24<6:38:24, 1.68it/s] 39%|███▊ | 25303/65536 [4:20:24<6:41:50, 1.67it/s] 39%|███▊ | 25304/65536 [4:20:25<6:42:06, 1.67it/s] 39%|███▊ | 25305/65536 [4:20:26<7:05:41, 1.58it/s] 39%|███▊ | 25306/65536 [4:20:26<6:52:33, 1.63it/s] 39%|███▊ | 25307/65536 [4:20:27<6:48:15, 1.64it/s] 39%|███▊ | 25308/65536 [4:20:27<6:42:03, 1.67it/s] 39%|███▊ | 25309/65536 [4:20:28<6:32:46, 1.71it/s] 39%|███▊ | 25310/65536 [4:20:28<6:42:01, 1.67it/s] 39%|███▊ | 25311/65536 [4:20:29<6:38:11, 1.68it/s] 39%|███▊ | 25312/65536 [4:20:30<6:40:46, 1.67it/s] 39%|███▊ | 25313/65536 [4:20:30<7:02:34, 1.59it/s] 39%|███▊ | 25314/65536 [4:20:31<6:53:45, 1.62it/s] 39%|███▊ | 25315/65536 [4:20:32<6:59:49, 1.60it/s] 39%|███▊ | 25316/65536 [4:20:32<7:02:00, 1.59it/s] 39%|███▊ | 25317/65536 [4:20:33<6:47:35, 1.64it/s] 39%|███▊ | 25318/65536 [4:20:33<6:46:18, 1.65it/s] 39%|███▊ | 25319/65536 [4:20:34<6:41:45, 1.67it/s] 39%|███▊ | 25320/65536 [4:20:35<6:43:04, 1.66it/s] {'loss': 3.0078, 'learning_rate': 6.539733072119506e-07, 'epoch': 1562.96} + 39%|███▊ | 25320/65536 [4:20:35<6:43:04, 1.66it/s] 39%|███▊ | 25321/65536 [4:20:35<6:59:04, 1.60it/s] 39%|███▊ | 25322/65536 [4:20:36<7:04:08, 1.58it/s] 39%|███▊ | 25323/65536 [4:20:36<6:50:48, 1.63it/s] 39%|███▊ | 25324/65536 [4:20:37<6:49:11, 1.64it/s] 39%|███▊ | 25325/65536 [4:20:38<6:50:17, 1.63it/s] 39%|███▊ | 25326/65536 [4:20:38<6:56:25, 1.61it/s] 39%|███▊ | 25327/65536 [4:20:39<6:49:00, 1.64it/s] 39%|███▊ | 25328/65536 [4:20:40<6:54:30, 1.62it/s] 39%|███▊ | 25329/65536 [4:20:40<6:51:55, 1.63it/s] 39%|███▊ | 25330/65536 [4:20:41<6:49:52, 1.63it/s] 39%|███▊ | 25331/65536 [4:20:41<6:41:58, 1.67it/s] 39%|███▊ | 25332/65536 [4:20:42<6:49:09, 1.64it/s] 39%|███▊ | 25333/65536 [4:20:43<6:49:32, 1.64it/s] 39%|███▊ | 25334/65536 [4:20:43<6:48:13, 1.64it/s] 39%|███▊ | 25335/65536 [4:20:44<6:43:38, 1.66it/s] 39%|███▊ | 25336/65536 [4:20:44<6:38:47, 1.68it/s] 39%|███▊ | 25337/65536 [4:20:45<6:50:22, 1.63it/s] 39%|███▊ | 25338/65536 [4:20:46<6:47:53, 1.64it/s] 39%|███▊ | 25339/65536 [4:20:46<6:44:20, 1.66it/s] 39%|███▊ | 25340/65536 [4:20:47<6:55:09, 1.61it/s] {'loss': 2.9818, 'learning_rate': 6.536978082527244e-07, 'epoch': 1564.2} + 39%|███▊ | 25340/65536 [4:20:47<6:55:09, 1.61it/s] 39%|███▊ | 25341/65536 [4:20:48<6:59:52, 1.60it/s] 39%|███▊ | 25342/65536 [4:20:48<6:50:25, 1.63it/s] 39%|███▊ | 25343/65536 [4:20:49<6:46:16, 1.65it/s] 39%|███▊ | 25344/65536 [4:20:49<6:44:34, 1.66it/s] 39%|███▊ | 25345/65536 [4:20:50<6:39:52, 1.68it/s] 39%|███▊ | 25346/65536 [4:20:50<6:44:29, 1.66it/s] 39%|███▊ | 25347/65536 [4:20:51<6:43:07, 1.66it/s] 39%|███▊ | 25348/65536 [4:20:52<6:35:00, 1.70it/s] 39%|███▊ | 25349/65536 [4:20:52<6:41:33, 1.67it/s] 39%|███▊ | 25350/65536 [4:20:53<6:43:13, 1.66it/s] 39%|███▊ | 25351/65536 [4:20:53<6:45:09, 1.65it/s] 39%|███▊ | 25352/65536 [4:20:54<6:55:27, 1.61it/s] 39%|███▊ | 25353/65536 [4:20:55<6:58:29, 1.60it/s] 39%|███▊ | 25354/65536 [4:20:55<7:13:50, 1.54it/s] 39%|███▊ | 25355/65536 [4:20:56<7:06:59, 1.57it/s] 39%|███▊ | 25356/65536 [4:20:57<7:08:28, 1.56it/s] 39%|███▊ | 25357/65536 [4:20:57<7:00:08, 1.59it/s] 39%|███▊ | 25358/65536 [4:20:58<7:02:28, 1.59it/s] 39%|███▊ | 25359/65536 [4:20:59<6:47:22, 1.64it/s] 39%|███▊ | 25360/65536 [4:20:59<6:48:06, 1.64it/s] {'loss': 3.0106, 'learning_rate': 6.534223092934983e-07, 'epoch': 1565.43} + 39%|███▊ | 25360/65536 [4:20:59<6:48:06, 1.64it/s] 39%|███▊ | 25361/65536 [4:21:00<6:43:16, 1.66it/s] 39%|███▊ | 25362/65536 [4:21:00<6:42:13, 1.66it/s] 39%|███▊ | 25363/65536 [4:21:01<6:49:17, 1.64it/s] 39%|███▊ | 25364/65536 [4:21:02<6:41:25, 1.67it/s] 39%|███▊ | 25365/65536 [4:21:02<6:39:54, 1.67it/s] 39%|███▊ | 25366/65536 [4:21:03<7:02:18, 1.59it/s] 39%|███▊ | 25367/65536 [4:21:03<6:50:09, 1.63it/s] 39%|███▊ | 25368/65536 [4:21:04<6:57:03, 1.61it/s] 39%|███▊ | 25369/65536 [4:21:05<6:51:43, 1.63it/s] 39%|███▊ | 25370/65536 [4:21:05<7:07:12, 1.57it/s] 39%|███▊ | 25371/65536 [4:21:06<6:59:06, 1.60it/s] 39%|███▊ | 25372/65536 [4:21:07<7:00:44, 1.59it/s] 39%|███▊ | 25373/65536 [4:21:07<7:04:22, 1.58it/s] 39%|███▊ | 25374/65536 [4:21:08<7:04:47, 1.58it/s] 39%|███▊ | 25375/65536 [4:21:09<7:22:00, 1.51it/s] 39%|███▊ | 25376/65536 [4:21:09<7:17:20, 1.53it/s] 39%|███▊ | 25377/65536 [4:21:10<7:02:33, 1.58it/s] 39%|███▊ | 25378/65536 [4:21:10<6:48:23, 1.64it/s] 39%|███▊ | 25379/65536 [4:21:11<6:54:27, 1.61it/s] 39%|███▊ | 25380/65536 [4:21:12<6:49:29, 1.63it/s] {'loss': 2.9646, 'learning_rate': 6.53146810334272e-07, 'epoch': 1566.67} + 39%|███▊ | 25380/65536 [4:21:12<6:49:29, 1.63it/s] 39%|███▊ | 25381/65536 [4:21:12<6:43:29, 1.66it/s] 39%|███▊ | 25382/65536 [4:21:13<6:43:44, 1.66it/s] 39%|███▊ | 25383/65536 [4:21:13<7:02:06, 1.59it/s] 39%|███▊ | 25384/65536 [4:21:14<6:51:55, 1.62it/s] 39%|███▊ | 25385/65536 [4:21:15<6:51:45, 1.63it/s] 39%|███▊ | 25386/65536 [4:21:15<7:03:09, 1.58it/s] 39%|███▊ | 25387/65536 [4:21:16<7:00:28, 1.59it/s] 39%|███▊ | 25388/65536 [4:21:17<6:50:38, 1.63it/s] 39%|███▊ | 25389/65536 [4:21:17<6:53:23, 1.62it/s] 39%|███▊ | 25390/65536 [4:21:18<7:01:40, 1.59it/s] 39%|███▊ | 25391/65536 [4:21:18<6:56:34, 1.61it/s] 39%|███▊ | 25392/65536 [4:21:19<7:01:52, 1.59it/s] 39%|███▊ | 25393/65536 [4:21:20<7:13:36, 1.54it/s] 39%|███▊ | 25394/65536 [4:21:20<7:04:09, 1.58it/s] 39%|███▊ | 25395/65536 [4:21:21<6:53:48, 1.62it/s] 39%|███▉ | 25396/65536 [4:21:22<6:50:53, 1.63it/s] 39%|███▉ | 25397/65536 [4:21:22<6:45:11, 1.65it/s] 39%|███▉ | 25398/65536 [4:21:23<6:42:21, 1.66it/s] 39%|███▉ | 25399/65536 [4:21:23<6:42:33, 1.66it/s] 39%|███▉ | 25400/65536 [4:21:24<6:46:06, 1.65it/s] {'loss': 2.9095, 'learning_rate': 6.528713113750459e-07, 'epoch': 1567.9} + 39%|███▉ | 25400/65536 [4:21:24<6:46:06, 1.65it/s] 39%|███▉ | 25401/65536 [4:21:25<6:57:15, 1.60it/s] 39%|███▉ | 25402/65536 [4:21:25<7:09:06, 1.56it/s] 39%|███▉ | 25403/65536 [4:21:26<6:57:45, 1.60it/s] 39%|███▉ | 25404/65536 [4:21:27<6:57:25, 1.60it/s] 39%|███▉ | 25405/65536 [4:21:27<6:53:49, 1.62it/s] 39%|███▉ | 25406/65536 [4:21:28<6:51:45, 1.62it/s] 39%|███▉ | 25407/65536 [4:21:28<6:50:59, 1.63it/s] 39%|███▉ | 25408/65536 [4:21:29<7:00:55, 1.59it/s] 39%|███▉ | 25409/65536 [4:21:30<6:46:28, 1.65it/s] 39%|███▉ | 25410/65536 [4:21:30<6:51:02, 1.63it/s] 39%|███▉ | 25411/65536 [4:21:31<6:58:55, 1.60it/s] 39%|███▉ | 25412/65536 [4:21:31<6:59:08, 1.60it/s] 39%|███▉ | 25413/65536 [4:21:32<7:02:51, 1.58it/s] 39%|███▉ | 25414/65536 [4:21:33<7:03:28, 1.58it/s] 39%|███▉ | 25415/65536 [4:21:33<6:52:52, 1.62it/s] 39%|███▉ | 25416/65536 [4:21:34<6:46:17, 1.65it/s] 39%|███▉ | 25417/65536 [4:21:35<6:48:52, 1.64it/s] 39%|███▉ | 25418/65536 [4:21:35<7:09:23, 1.56it/s] 39%|███▉ | 25419/65536 [4:21:36<6:59:24, 1.59it/s] 39%|███▉ | 25420/65536 [4:21:36<7:02:46, 1.58it/s] {'loss': 2.9554, 'learning_rate': 6.525958124158198e-07, 'epoch': 1569.14} + 39%|███▉ | 25420/65536 [4:21:36<7:02:46, 1.58it/s] 39%|███▉ | 25421/65536 [4:21:37<6:56:13, 1.61it/s] 39%|███▉ | 25422/65536 [4:21:38<6:53:28, 1.62it/s] 39%|███▉ | 25423/65536 [4:21:38<6:59:01, 1.60it/s] 39%|███▉ | 25424/65536 [4:21:39<6:49:39, 1.63it/s] 39%|███▉ | 25425/65536 [4:21:40<6:50:46, 1.63it/s] 39%|███▉ | 25426/65536 [4:21:40<6:51:51, 1.62it/s] 39%|███▉ | 25427/65536 [4:21:41<6:55:31, 1.61it/s] 39%|███▉ | 25428/65536 [4:21:41<6:56:54, 1.60it/s] 39%|███▉ | 25429/65536 [4:21:42<6:52:16, 1.62it/s] 39%|███▉ | 25430/65536 [4:21:43<6:45:34, 1.65it/s] 39%|███▉ | 25431/65536 [4:21:43<6:48:25, 1.64it/s] 39%|███▉ | 25432/65536 [4:21:44<6:57:10, 1.60it/s] 39%|███▉ | 25433/65536 [4:21:44<6:52:45, 1.62it/s] 39%|███▉ | 25434/65536 [4:21:45<6:46:12, 1.65it/s] 39%|███▉ | 25435/65536 [4:21:46<7:05:55, 1.57it/s] 39%|███▉ | 25436/65536 [4:21:46<6:56:52, 1.60it/s] 39%|███▉ | 25437/65536 [4:21:47<6:54:31, 1.61it/s] 39%|███▉ | 25438/65536 [4:21:48<6:53:22, 1.62it/s] 39%|███▉ | 25439/65536 [4:21:48<6:57:00, 1.60it/s] 39%|███▉ | 25440/65536 [4:21:49<7:05:10, 1.57it/s] {'loss': 2.8864, 'learning_rate': 6.523203134565935e-07, 'epoch': 1570.37} + 39%|███▉ | 25440/65536 [4:21:49<7:05:10, 1.57it/s] 39%|███▉ | 25441/65536 [4:21:50<6:57:52, 1.60it/s] 39%|███▉ | 25442/65536 [4:21:50<6:53:12, 1.62it/s] 39%|███▉ | 25443/65536 [4:21:51<6:47:56, 1.64it/s] 39%|███▉ | 25444/65536 [4:21:51<6:48:22, 1.64it/s] 39%|███▉ | 25445/65536 [4:21:52<6:45:23, 1.65it/s] 39%|███▉ | 25446/65536 [4:21:52<6:42:39, 1.66it/s] 39%|███▉ | 25447/65536 [4:21:53<6:51:29, 1.62it/s] 39%|███▉ | 25448/65536 [4:21:54<6:43:59, 1.65it/s] 39%|███▉ | 25449/65536 [4:21:54<6:41:43, 1.66it/s] 39%|███▉ | 25450/65536 [4:21:55<6:42:02, 1.66it/s] 39%|███▉ | 25451/65536 [4:21:56<6:59:55, 1.59it/s] 39%|███▉ | 25452/65536 [4:21:56<6:47:28, 1.64it/s] 39%|███▉ | 25453/65536 [4:21:57<6:56:34, 1.60it/s] 39%|███▉ | 25454/65536 [4:21:57<6:46:26, 1.64it/s] 39%|███▉ | 25455/65536 [4:21:58<6:54:37, 1.61it/s] 39%|███▉ | 25456/65536 [4:21:59<6:48:14, 1.64it/s] 39%|███▉ | 25457/65536 [4:21:59<6:41:20, 1.66it/s] 39%|███▉ | 25458/65536 [4:22:00<6:35:01, 1.69it/s] 39%|███▉ | 25459/65536 [4:22:00<6:32:26, 1.70it/s] 39%|███▉ | 25460/65536 [4:22:01<6:44:10, 1.65it/s] {'loss': 2.9818, 'learning_rate': 6.520448144973674e-07, 'epoch': 1571.6} + 39%|███▉ | 25460/65536 [4:22:01<6:44:10, 1.65it/s] 39%|███▉ | 25461/65536 [4:22:02<6:42:22, 1.66it/s] 39%|███▉ | 25462/65536 [4:22:02<6:39:00, 1.67it/s] 39%|███▉ | 25463/65536 [4:22:03<6:49:42, 1.63it/s] 39%|███▉ | 25464/65536 [4:22:04<7:07:27, 1.56it/s] 39%|███▉ | 25465/65536 [4:22:04<7:03:54, 1.58it/s] 39%|███▉ | 25466/65536 [4:22:05<6:57:50, 1.60it/s] 39%|███▉ | 25467/65536 [4:22:05<7:02:17, 1.58it/s] 39%|███▉ | 25468/65536 [4:22:06<7:01:06, 1.59it/s] 39%|███▉ | 25469/65536 [4:22:07<6:53:36, 1.61it/s] 39%|███▉ | 25470/65536 [4:22:07<6:41:25, 1.66it/s] 39%|███▉ | 25471/65536 [4:22:08<6:45:14, 1.65it/s] 39%|███▉ | 25472/65536 [4:22:08<6:53:38, 1.61it/s] 39%|███▉ | 25473/65536 [4:22:09<6:46:54, 1.64it/s] 39%|███▉ | 25474/65536 [4:22:10<6:40:40, 1.67it/s] 39%|███▉ | 25475/65536 [4:22:10<6:54:49, 1.61it/s] 39%|███▉ | 25476/65536 [4:22:11<6:48:48, 1.63it/s] 39%|███▉ | 25477/65536 [4:22:12<6:52:14, 1.62it/s] 39%|███▉ | 25478/65536 [4:22:12<6:59:34, 1.59it/s] 39%|███▉ | 25479/65536 [4:22:13<7:03:02, 1.58it/s] 39%|███▉ | 25480/65536 [4:22:13<7:03:30, 1.58it/s] {'loss': 2.9394, 'learning_rate': 6.517693155381412e-07, 'epoch': 1572.84} + 39%|███▉ | 25480/65536 [4:22:13<7:03:30, 1.58it/s] 39%|███▉ | 25481/65536 [4:22:14<6:50:05, 1.63it/s] 39%|███▉ | 25482/65536 [4:22:15<6:46:21, 1.64it/s] 39%|███▉ | 25483/65536 [4:22:15<6:54:06, 1.61it/s] 39%|███▉ | 25484/65536 [4:22:16<6:51:39, 1.62it/s] 39%|███▉ | 25485/65536 [4:22:16<6:40:58, 1.66it/s] 39%|███▉ | 25486/65536 [4:22:17<6:49:51, 1.63it/s] 39%|███▉ | 25487/65536 [4:22:18<6:52:48, 1.62it/s] 39%|███▉ | 25488/65536 [4:22:18<7:06:08, 1.57it/s] 39%|███▉ | 25489/65536 [4:22:19<6:59:35, 1.59it/s] 39%|███▉ | 25490/65536 [4:22:20<6:55:46, 1.61it/s] 39%|███▉ | 25491/65536 [4:22:20<6:43:27, 1.65it/s] 39%|███▉ | 25492/65536 [4:22:21<6:40:49, 1.67it/s] 39%|███▉ | 25493/65536 [4:22:21<6:40:07, 1.67it/s] 39%|███▉ | 25494/65536 [4:22:22<6:42:53, 1.66it/s] 39%|███▉ | 25495/65536 [4:22:23<6:40:04, 1.67it/s] 39%|███▉ | 25496/65536 [4:22:23<6:42:47, 1.66it/s] 39%|███▉ | 25497/65536 [4:22:24<6:51:52, 1.62it/s] 39%|███▉ | 25498/65536 [4:22:24<6:43:53, 1.65it/s] 39%|███▉ | 25499/65536 [4:22:25<6:56:36, 1.60it/s] 39%|███▉ | 25500/65536 [4:22:26<6:46:50, 1.64it/s] {'loss': 2.9811, 'learning_rate': 6.514938165789151e-07, 'epoch': 1574.07} + 39%|███▉ | 25500/65536 [4:22:26<6:46:50, 1.64it/s] 39%|███▉ | 25501/65536 [4:22:26<6:43:53, 1.65it/s] 39%|███▉ | 25502/65536 [4:22:27<6:45:55, 1.64it/s] 39%|███▉ | 25503/65536 [4:22:27<6:44:57, 1.65it/s] 39%|███▉ | 25504/65536 [4:22:28<6:46:14, 1.64it/s] 39%|███▉ | 25505/65536 [4:22:29<6:43:02, 1.66it/s] 39%|███▉ | 25506/65536 [4:22:29<6:52:47, 1.62it/s] 39%|███▉ | 25507/65536 [4:22:30<6:52:30, 1.62it/s] 39%|███▉ | 25508/65536 [4:22:31<6:46:28, 1.64it/s] 39%|███▉ | 25509/65536 [4:22:31<6:38:50, 1.67it/s] 39%|███▉ | 25510/65536 [4:22:32<6:36:01, 1.68it/s] 39%|███▉ | 25511/65536 [4:22:32<6:47:51, 1.64it/s] 39%|███▉ | 25512/65536 [4:22:33<6:54:58, 1.61it/s] 39%|███▉ | 25513/65536 [4:22:34<6:57:01, 1.60it/s] 39%|███▉ | 25514/65536 [4:22:34<6:58:35, 1.59it/s] 39%|███▉ | 25515/65536 [4:22:35<6:59:27, 1.59it/s] 39%|███▉ | 25516/65536 [4:22:36<7:21:44, 1.51it/s] 39%|███▉ | 25517/65536 [4:22:36<7:11:50, 1.54it/s] 39%|███▉ | 25518/65536 [4:22:37<7:00:23, 1.59it/s] 39%|███▉ | 25519/65536 [4:22:37<6:52:24, 1.62it/s] 39%|███▉ | 25520/65536 [4:22:38<6:54:22, 1.61it/s] {'loss': 2.963, 'learning_rate': 6.51218317619689e-07, 'epoch': 1575.31} + 39%|███▉ | 25520/65536 [4:22:38<6:54:22, 1.61it/s] 39%|███▉ | 25521/65536 [4:22:39<7:08:07, 1.56it/s] 39%|███▉ | 25522/65536 [4:22:39<7:00:10, 1.59it/s] 39%|███▉ | 25523/65536 [4:22:40<7:00:58, 1.58it/s] 39%|███▉ | 25524/65536 [4:22:41<6:47:50, 1.64it/s] 39%|███▉ | 25525/65536 [4:22:41<6:49:51, 1.63it/s] 39%|███▉ | 25526/65536 [4:22:42<6:52:02, 1.62it/s] 39%|███▉ | 25527/65536 [4:22:42<6:42:45, 1.66it/s] 39%|███▉ | 25528/65536 [4:22:43<6:39:56, 1.67it/s] 39%|███▉ | 25529/65536 [4:22:44<6:36:42, 1.68it/s] 39%|███▉ | 25530/65536 [4:22:44<6:36:54, 1.68it/s] 39%|███▉ | 25531/65536 [4:22:45<6:52:03, 1.62it/s] 39%|███▉ | 25532/65536 [4:22:46<7:08:57, 1.55it/s] 39%|███▉ | 25533/65536 [4:22:46<7:06:56, 1.56it/s] 39%|███▉ | 25534/65536 [4:22:47<6:59:37, 1.59it/s] 39%|███▉ | 25535/65536 [4:22:47<6:51:37, 1.62it/s] 39%|███▉ | 25536/65536 [4:22:48<6:48:01, 1.63it/s] 39%|███▉ | 25537/65536 [4:22:49<6:46:30, 1.64it/s] 39%|███▉ | 25538/65536 [4:22:49<6:45:29, 1.64it/s] 39%|███▉ | 25539/65536 [4:22:50<6:37:01, 1.68it/s] 39%|███▉ | 25540/65536 [4:22:50<6:46:56, 1.64it/s] {'loss': 2.9593, 'learning_rate': 6.509428186604628e-07, 'epoch': 1576.54} + 39%|███▉ | 25540/65536 [4:22:50<6:46:56, 1.64it/s] 39%|███▉ | 25541/65536 [4:22:51<6:47:23, 1.64it/s] 39%|███▉ | 25542/65536 [4:22:52<6:58:51, 1.59it/s] 39%|███▉ | 25543/65536 [4:22:52<6:51:03, 1.62it/s] 39%|███▉ | 25544/65536 [4:22:53<7:00:46, 1.58it/s] 39%|███▉ | 25545/65536 [4:22:53<6:44:40, 1.65it/s] 39%|███▉ | 25546/65536 [4:22:54<6:42:46, 1.65it/s] 39%|███▉ | 25547/65536 [4:22:55<6:38:00, 1.67it/s] 39%|███▉ | 25548/65536 [4:22:55<6:48:21, 1.63it/s] 39%|███▉ | 25549/65536 [4:22:56<6:46:33, 1.64it/s] 39%|███▉ | 25550/65536 [4:22:56<6:43:50, 1.65it/s] 39%|███▉ | 25551/65536 [4:22:57<6:35:17, 1.69it/s] 39%|███▉ | 25552/65536 [4:22:58<6:31:29, 1.70it/s] 39%|███▉ | 25553/65536 [4:22:58<6:34:14, 1.69it/s] 39%|███▉ | 25554/65536 [4:22:59<6:28:30, 1.72it/s] 39%|███▉ | 25555/65536 [4:22:59<6:33:37, 1.69it/s] 39%|███▉ | 25556/65536 [4:23:00<6:41:28, 1.66it/s] 39%|███▉ | 25557/65536 [4:23:01<6:51:54, 1.62it/s] 39%|███▉ | 25558/65536 [4:23:01<6:52:20, 1.62it/s] 39%|███▉ | 25559/65536 [4:23:02<6:50:58, 1.62it/s] 39%|███▉ | 25560/65536 [4:23:03<6:57:55, 1.59it/s] {'loss': 2.9009, 'learning_rate': 6.506673197012367e-07, 'epoch': 1577.78} + 39%|███▉ | 25560/65536 [4:23:03<6:57:55, 1.59it/s] 39%|███▉ | 25561/65536 [4:23:03<6:51:14, 1.62it/s] 39%|███▉ | 25562/65536 [4:23:04<7:05:03, 1.57it/s] 39%|███▉ | 25563/65536 [4:23:04<6:52:05, 1.62it/s] 39%|███▉ | 25564/65536 [4:23:05<7:01:20, 1.58it/s] 39%|███▉ | 25565/65536 [4:23:06<7:08:17, 1.56it/s] 39%|███▉ | 25566/65536 [4:23:06<7:04:49, 1.57it/s] 39%|███▉ | 25567/65536 [4:23:07<6:51:34, 1.62it/s] 39%|███▉ | 25568/65536 [4:23:07<6:41:29, 1.66it/s] 39%|███▉ | 25569/65536 [4:23:08<6:46:32, 1.64it/s] 39%|███▉ | 25570/65536 [4:23:09<6:52:59, 1.61it/s] 39%|███▉ | 25571/65536 [4:23:09<6:58:19, 1.59it/s] 39%|███▉ | 25572/65536 [4:23:10<6:52:39, 1.61it/s] 39%|███▉ | 25573/65536 [4:23:11<6:42:29, 1.65it/s] 39%|███▉ | 25574/65536 [4:23:11<6:42:52, 1.65it/s] 39%|███▉ | 25575/65536 [4:23:12<6:37:36, 1.68it/s] 39%|███▉ | 25576/65536 [4:23:12<6:40:53, 1.66it/s] 39%|███▉ | 25577/65536 [4:23:13<6:43:15, 1.65it/s] 39%|███▉ | 25578/65536 [4:23:14<6:49:40, 1.63it/s] 39%|███▉ | 25579/65536 [4:23:14<6:44:29, 1.65it/s] 39%|███▉ | 25580/65536 [4:23:15<6:52:03, 1.62it/s] {'loss': 2.9137, 'learning_rate': 6.503918207420105e-07, 'epoch': 1579.01} + 39%|███▉ | 25580/65536 [4:23:15<6:52:03, 1.62it/s] 39%|███▉ | 25581/65536 [4:23:15<6:47:41, 1.63it/s] 39%|███▉ | 25582/65536 [4:23:16<6:43:45, 1.65it/s] 39%|███▉ | 25583/65536 [4:23:17<6:42:52, 1.65it/s] 39%|███▉ | 25584/65536 [4:23:17<6:42:45, 1.65it/s] 39%|███▉ | 25585/65536 [4:23:18<6:49:27, 1.63it/s] 39%|███▉ | 25586/65536 [4:23:19<6:51:15, 1.62it/s] 39%|███▉ | 25587/65536 [4:23:19<6:52:56, 1.61it/s] 39%|███▉ | 25588/65536 [4:23:20<6:40:35, 1.66it/s] 39%|███▉ | 25589/65536 [4:23:20<6:43:58, 1.65it/s] 39%|███▉ | 25590/65536 [4:23:21<6:45:30, 1.64it/s] 39%|███▉ | 25591/65536 [4:23:22<6:48:05, 1.63it/s] 39%|███▉ | 25592/65536 [4:23:22<6:49:52, 1.62it/s] 39%|███▉ | 25593/65536 [4:23:23<6:58:36, 1.59it/s] 39%|███▉ | 25594/65536 [4:23:23<6:54:14, 1.61it/s] 39%|███▉ | 25595/65536 [4:23:24<6:49:56, 1.62it/s] 39%|███▉ | 25596/65536 [4:23:25<6:49:36, 1.63it/s] 39%|███▉ | 25597/65536 [4:23:25<7:05:29, 1.56it/s] 39%|███▉ | 25598/65536 [4:23:26<7:01:18, 1.58it/s] 39%|███▉ | 25599/65536 [4:23:27<6:59:56, 1.59it/s] 39%|███▉ | 25600/65536 [4:23:27<6:53:34, 1.61it/s] {'loss': 2.8327, 'learning_rate': 6.501163217827844e-07, 'epoch': 1580.25} + 39%|███▉ | 25600/65536 [4:23:27<6:53:34, 1.61it/s] 39%|███▉ | 25601/65536 [4:23:28<6:49:11, 1.63it/s] 39%|███▉ | 25602/65536 [4:23:28<6:47:00, 1.64it/s] 39%|███▉ | 25603/65536 [4:23:29<6:47:22, 1.63it/s] 39%|███▉ | 25604/65536 [4:23:30<6:52:45, 1.61it/s] 39%|███▉ | 25605/65536 [4:23:30<6:57:19, 1.59it/s] 39%|███▉ | 25606/65536 [4:23:31<6:56:25, 1.60it/s] 39%|███▉ | 25607/65536 [4:23:32<6:53:50, 1.61it/s] 39%|███▉ | 25608/65536 [4:23:32<6:52:37, 1.61it/s] 39%|███▉ | 25609/65536 [4:23:33<6:45:38, 1.64it/s] 39%|███▉ | 25610/65536 [4:23:33<6:45:17, 1.64it/s] 39%|███▉ | 25611/65536 [4:23:34<6:36:05, 1.68it/s] 39%|███▉ | 25612/65536 [4:23:35<6:36:28, 1.68it/s] 39%|███▉ | 25613/65536 [4:23:35<6:47:03, 1.63it/s] 39%|███▉ | 25614/65536 [4:23:36<6:42:32, 1.65it/s] 39%|███▉ | 25615/65536 [4:23:36<6:54:08, 1.61it/s] 39%|███▉ | 25616/65536 [4:23:37<6:56:09, 1.60it/s] 39%|███▉ | 25617/65536 [4:23:38<6:53:02, 1.61it/s] 39%|███▉ | 25618/65536 [4:23:38<6:42:58, 1.65it/s] 39%|███▉ | 25619/65536 [4:23:39<6:55:10, 1.60it/s] 39%|███▉ | 25620/65536 [4:23:39<6:46:29, 1.64it/s] {'loss': 2.9212, 'learning_rate': 6.498408228235582e-07, 'epoch': 1581.48} + 39%|███▉ | 25620/65536 [4:23:39<6:46:29, 1.64it/s] 39%|███▉ | 25621/65536 [4:23:40<6:54:09, 1.61it/s] 39%|███▉ | 25622/65536 [4:23:41<6:58:40, 1.59it/s] 39%|███▉ | 25623/65536 [4:23:41<6:59:51, 1.58it/s] 39%|███▉ | 25624/65536 [4:23:42<6:56:23, 1.60it/s] 39%|███▉ | 25625/65536 [4:23:43<6:46:27, 1.64it/s] 39%|███▉ | 25626/65536 [4:23:43<6:40:14, 1.66it/s] 39%|███▉ | 25627/65536 [4:23:44<6:39:59, 1.66it/s] 39%|███▉ | 25628/65536 [4:23:44<6:37:13, 1.67it/s] 39%|███▉ | 25629/65536 [4:23:45<6:58:10, 1.59it/s] 39%|███▉ | 25630/65536 [4:23:46<6:59:40, 1.58it/s] 39%|███▉ | 25631/65536 [4:23:46<6:55:58, 1.60it/s] 39%|███▉ | 25632/65536 [4:23:47<6:51:14, 1.62it/s] 39%|███▉ | 25633/65536 [4:23:47<6:44:01, 1.65it/s] 39%|███▉ | 25634/65536 [4:23:48<6:40:40, 1.66it/s] 39%|███▉ | 25635/65536 [4:23:49<6:55:32, 1.60it/s] 39%|███▉ | 25636/65536 [4:23:49<6:59:50, 1.58it/s] 39%|███▉ | 25637/65536 [4:23:50<6:44:33, 1.64it/s] 39%|███▉ | 25638/65536 [4:23:51<6:47:35, 1.63it/s] 39%|███▉ | 25639/65536 [4:23:51<6:48:28, 1.63it/s] 39%|███▉ | 25640/65536 [4:23:52<6:46:20, 1.64it/s] {'loss': 2.8549, 'learning_rate': 6.495653238643321e-07, 'epoch': 1582.72} + 39%|███▉ | 25640/65536 [4:23:52<6:46:20, 1.64it/s] 39%|███▉ | 25641/65536 [4:23:52<6:40:45, 1.66it/s] 39%|███▉ | 25642/65536 [4:23:53<6:38:57, 1.67it/s] 39%|███▉ | 25643/65536 [4:23:54<6:46:19, 1.64it/s] 39%|███▉ | 25644/65536 [4:23:54<6:42:20, 1.65it/s] 39%|███▉ | 25645/65536 [4:23:55<6:46:56, 1.63it/s] 39%|███▉ | 25646/65536 [4:23:55<6:47:40, 1.63it/s] 39%|███▉ | 25647/65536 [4:23:56<6:59:14, 1.59it/s] 39%|███▉ | 25648/65536 [4:23:57<6:45:00, 1.64it/s] 39%|███▉ | 25649/65536 [4:23:57<6:44:00, 1.65it/s] 39%|███▉ | 25650/65536 [4:23:58<6:41:24, 1.66it/s] 39%|███▉ | 25651/65536 [4:23:58<6:34:34, 1.68it/s] 39%|███▉ | 25652/65536 [4:23:59<6:46:08, 1.64it/s] 39%|███▉ | 25653/65536 [4:24:00<6:52:10, 1.61it/s] 39%|███▉ | 25654/65536 [4:24:00<6:51:30, 1.62it/s] 39%|███▉ | 25655/65536 [4:24:01<6:46:53, 1.63it/s] 39%|███▉ | 25656/65536 [4:24:02<6:37:51, 1.67it/s] 39%|███▉ | 25657/65536 [4:24:02<6:33:59, 1.69it/s] 39%|███▉ | 25658/65536 [4:24:03<6:41:54, 1.65it/s] 39%|███▉ | 25659/65536 [4:24:03<6:43:20, 1.65it/s] 39%|███▉ | 25660/65536 [4:24:04<6:42:15, 1.65it/s] {'loss': 2.889, 'learning_rate': 6.492898249051059e-07, 'epoch': 1583.95} + 39%|███▉ | 25660/65536 [4:24:04<6:42:15, 1.65it/s] 39%|███▉ | 25661/65536 [4:24:05<6:58:42, 1.59it/s] 39%|███▉ | 25662/65536 [4:24:05<6:51:58, 1.61it/s] 39%|███▉ | 25663/65536 [4:24:06<7:09:33, 1.55it/s] 39%|███▉ | 25664/65536 [4:24:07<6:57:46, 1.59it/s] 39%|███▉ | 25665/65536 [4:24:07<6:54:25, 1.60it/s] 39%|███▉ | 25666/65536 [4:24:08<6:45:27, 1.64it/s] 39%|███▉ | 25667/65536 [4:24:08<6:48:55, 1.62it/s] 39%|███▉ | 25668/65536 [4:24:09<6:47:29, 1.63it/s] 39%|███▉ | 25669/65536 [4:24:10<6:54:11, 1.60it/s] 39%|███▉ | 25670/65536 [4:24:10<6:54:08, 1.60it/s] 39%|███▉ | 25671/65536 [4:24:11<6:45:12, 1.64it/s] 39%|███▉ | 25672/65536 [4:24:11<6:38:50, 1.67it/s] 39%|███▉ | 25673/65536 [4:24:12<6:42:15, 1.65it/s] 39%|███▉ | 25674/65536 [4:24:13<6:40:48, 1.66it/s] 39%|███▉ | 25675/65536 [4:24:13<6:41:33, 1.65it/s] 39%|███▉ | 25676/65536 [4:24:14<6:43:58, 1.64it/s] 39%|███▉ | 25677/65536 [4:24:14<6:38:19, 1.67it/s] 39%|███▉ | 25678/65536 [4:24:15<6:49:29, 1.62it/s] 39%|███▉ | 25679/65536 [4:24:16<6:48:29, 1.63it/s] 39%|███▉ | 25680/65536 [4:24:16<6:59:12, 1.58it/s] {'loss': 2.8057, 'learning_rate': 6.490143259458797e-07, 'epoch': 1585.19} + 39%|███▉ | 25680/65536 [4:24:16<6:59:12, 1.58it/s] 39%|███▉ | 25681/65536 [4:24:17<6:45:59, 1.64it/s] 39%|███▉ | 25682/65536 [4:24:18<6:44:00, 1.64it/s] 39%|███▉ | 25683/65536 [4:24:18<6:50:49, 1.62it/s] 39%|███▉ | 25684/65536 [4:24:19<6:38:07, 1.67it/s] 39%|███▉ | 25685/65536 [4:24:19<6:48:55, 1.62it/s] 39%|███▉ | 25686/65536 [4:24:20<6:52:41, 1.61it/s] 39%|███▉ | 25687/65536 [4:24:21<6:54:39, 1.60it/s] 39%|███▉ | 25688/65536 [4:24:21<6:53:19, 1.61it/s] 39%|███▉ | 25689/65536 [4:24:22<7:04:36, 1.56it/s] 39%|███▉ | 25690/65536 [4:24:23<6:53:48, 1.60it/s] 39%|███▉ | 25691/65536 [4:24:23<6:45:49, 1.64it/s] 39%|███▉ | 25692/65536 [4:24:24<6:43:54, 1.64it/s] 39%|███▉ | 25693/65536 [4:24:24<6:38:39, 1.67it/s] 39%|███▉ | 25694/65536 [4:24:25<7:04:50, 1.56it/s] 39%|███▉ | 25695/65536 [4:24:26<6:54:37, 1.60it/s] 39%|███▉ | 25696/65536 [4:24:26<7:02:02, 1.57it/s] 39%|███▉ | 25697/65536 [4:24:27<6:51:00, 1.62it/s] 39%|███▉ | 25698/65536 [4:24:27<6:41:32, 1.65it/s] 39%|███▉ | 25699/65536 [4:24:28<7:00:59, 1.58it/s] 39%|███▉ | 25700/65536 [4:24:29<7:01:35, 1.57it/s] {'loss': 2.7912, 'learning_rate': 6.487388269866536e-07, 'epoch': 1586.42} + 39%|███▉ | 25700/65536 [4:24:29<7:01:35, 1.57it/s] 39%|███▉ | 25701/65536 [4:24:29<6:44:36, 1.64it/s] 39%|███▉ | 25702/65536 [4:24:30<6:44:52, 1.64it/s] 39%|███▉ | 25703/65536 [4:24:31<6:44:48, 1.64it/s] 39%|███▉ | 25704/65536 [4:24:31<6:35:10, 1.68it/s] 39%|███▉ | 25705/65536 [4:24:32<6:30:38, 1.70it/s] 39%|███▉ | 25706/65536 [4:24:32<6:30:15, 1.70it/s] 39%|███▉ | 25707/65536 [4:24:33<6:37:25, 1.67it/s] 39%|███▉ | 25708/65536 [4:24:33<6:37:34, 1.67it/s] 39%|███▉ | 25709/65536 [4:24:34<6:31:50, 1.69it/s] 39%|███▉ | 25710/65536 [4:24:35<6:46:05, 1.63it/s] 39%|███▉ | 25711/65536 [4:24:35<6:45:41, 1.64it/s] 39%|███▉ | 25712/65536 [4:24:36<6:55:26, 1.60it/s] 39%|███▉ | 25713/65536 [4:24:37<7:02:03, 1.57it/s] 39%|███▉ | 25714/65536 [4:24:37<6:59:24, 1.58it/s] 39%|███▉ | 25715/65536 [4:24:38<6:47:22, 1.63it/s] 39%|███▉ | 25716/65536 [4:24:38<6:55:09, 1.60it/s] 39%|███▉ | 25717/65536 [4:24:39<7:06:57, 1.55it/s] 39%|███▉ | 25718/65536 [4:24:40<6:52:16, 1.61it/s] 39%|███▉ | 25719/65536 [4:24:40<6:48:46, 1.62it/s] 39%|███▉ | 25720/65536 [4:24:41<6:44:39, 1.64it/s] {'loss': 2.8488, 'learning_rate': 6.484633280274273e-07, 'epoch': 1587.65} + 39%|███▉ | 25720/65536 [4:24:41<6:44:39, 1.64it/s] 39%|███▉ | 25721/65536 [4:24:42<6:40:15, 1.66it/s] 39%|███▉ | 25722/65536 [4:24:42<6:28:29, 1.71it/s] 39%|███▉ | 25723/65536 [4:24:43<6:26:14, 1.72it/s] 39%|███▉ | 25724/65536 [4:24:43<6:26:17, 1.72it/s] 39%|███▉ | 25725/65536 [4:24:44<6:35:43, 1.68it/s] 39%|███▉ | 25726/65536 [4:24:45<6:50:14, 1.62it/s] 39%|███▉ | 25727/65536 [4:24:45<6:47:56, 1.63it/s] 39%|███▉ | 25728/65536 [4:24:46<6:57:17, 1.59it/s] 39%|███▉ | 25729/65536 [4:24:46<6:59:12, 1.58it/s] 39%|███▉ | 25730/65536 [4:24:47<6:46:22, 1.63it/s] 39%|███▉ | 25731/65536 [4:24:48<6:36:08, 1.67it/s] 39%|███▉ | 25732/65536 [4:24:48<6:32:24, 1.69it/s] 39%|███▉ | 25733/65536 [4:24:49<6:51:33, 1.61it/s] 39%|███▉ | 25734/65536 [4:24:49<6:39:31, 1.66it/s] 39%|███▉ | 25735/65536 [4:24:50<6:38:28, 1.66it/s] 39%|███▉ | 25736/65536 [4:24:51<6:43:08, 1.65it/s] 39%|███▉ | 25737/65536 [4:24:51<6:47:24, 1.63it/s] 39%|███▉ | 25738/65536 [4:24:52<6:35:17, 1.68it/s] 39%|███▉ | 25739/65536 [4:24:52<6:37:21, 1.67it/s] 39%|███▉ | 25740/65536 [4:24:53<6:35:22, 1.68it/s] {'loss': 2.8975, 'learning_rate': 6.481878290682012e-07, 'epoch': 1588.89} + 39%|███▉ | 25740/65536 [4:24:53<6:35:22, 1.68it/s] 39%|███▉ | 25741/65536 [4:24:54<6:33:09, 1.69it/s] 39%|███▉ | 25742/65536 [4:24:54<6:52:35, 1.61it/s] 39%|███▉ | 25743/65536 [4:24:55<6:52:11, 1.61it/s] 39%|███▉ | 25744/65536 [4:24:55<6:50:56, 1.61it/s] 39%|███▉ | 25745/65536 [4:24:56<7:03:47, 1.56it/s] 39%|███▉ | 25746/65536 [4:24:57<6:58:28, 1.58it/s] 39%|███▉ | 25747/65536 [4:24:57<6:51:21, 1.61it/s] 39%|███▉ | 25748/65536 [4:24:58<6:55:20, 1.60it/s] 39%|███▉ | 25749/65536 [4:24:59<6:50:46, 1.61it/s] 39%|███▉ | 25750/65536 [4:24:59<6:47:06, 1.63it/s] 39%|███▉ | 25751/65536 [4:25:00<6:46:44, 1.63it/s] 39%|███▉ | 25752/65536 [4:25:00<6:42:27, 1.65it/s] 39%|███▉ | 25753/65536 [4:25:01<6:47:10, 1.63it/s] 39%|███▉ | 25754/65536 [4:25:02<6:33:33, 1.68it/s] 39%|███▉ | 25755/65536 [4:25:02<6:38:53, 1.66it/s] 39%|███▉ | 25756/65536 [4:25:03<6:32:30, 1.69it/s] 39%|███▉ | 25757/65536 [4:25:03<6:40:42, 1.65it/s] 39%|███▉ | 25758/65536 [4:25:04<6:41:23, 1.65it/s] 39%|███▉ | 25759/65536 [4:25:05<6:52:28, 1.61it/s] 39%|███▉ | 25760/65536 [4:25:05<6:51:29, 1.61it/s] {'loss': 2.7681, 'learning_rate': 6.47912330108975e-07, 'epoch': 1590.12} + 39%|███▉ | 25760/65536 [4:25:05<6:51:29, 1.61it/s] 39%|███▉ | 25761/65536 [4:25:06<6:54:55, 1.60it/s] 39%|███▉ | 25762/65536 [4:25:07<6:50:20, 1.62it/s] 39%|███▉ | 25763/65536 [4:25:07<6:48:58, 1.62it/s] 39%|███▉ | 25764/65536 [4:25:08<6:51:34, 1.61it/s] 39%|███▉ | 25765/65536 [4:25:08<6:45:59, 1.63it/s] 39%|███▉ | 25766/65536 [4:25:09<6:36:39, 1.67it/s] 39%|███▉ | 25767/65536 [4:25:10<6:34:52, 1.68it/s] 39%|███▉ | 25768/65536 [4:25:10<6:36:40, 1.67it/s] 39%|███▉ | 25769/65536 [4:25:11<6:44:40, 1.64it/s] 39%|███▉ | 25770/65536 [4:25:11<6:54:08, 1.60it/s] 39%|███▉ | 25771/65536 [4:25:12<6:54:53, 1.60it/s] 39%|███▉ | 25772/65536 [4:25:13<7:01:39, 1.57it/s] 39%|███▉ | 25773/65536 [4:25:13<7:00:44, 1.58it/s] 39%|███▉ | 25774/65536 [4:25:14<6:52:58, 1.60it/s] 39%|███▉ | 25775/65536 [4:25:15<7:11:28, 1.54it/s] 39%|███▉ | 25776/65536 [4:25:15<7:12:01, 1.53it/s] 39%|███▉ | 25777/65536 [4:25:16<7:16:22, 1.52it/s] 39%|███▉ | 25778/65536 [4:25:17<7:05:51, 1.56it/s] 39%|███▉ | 25779/65536 [4:25:17<7:02:41, 1.57it/s] 39%|███▉ | 25780/65536 [4:25:18<7:11:29, 1.54it/s] {'loss': 2.3341, 'learning_rate': 6.476368311497489e-07, 'epoch': 1591.36} + 39%|███▉ | 25780/65536 [4:25:18<7:11:29, 1.54it/s] 39%|███▉ | 25781/65536 [4:25:18<6:56:40, 1.59it/s] 39%|███▉ | 25782/65536 [4:25:19<6:41:07, 1.65it/s] 39%|███▉ | 25783/65536 [4:25:20<6:38:38, 1.66it/s] 39%|███▉ | 25784/65536 [4:25:20<6:31:47, 1.69it/s] 39%|███▉ | 25785/65536 [4:25:21<6:37:31, 1.67it/s] 39%|███▉ | 25786/65536 [4:25:21<6:33:09, 1.69it/s] 39%|███▉ | 25787/65536 [4:25:22<6:33:57, 1.68it/s] 39%|███▉ | 25788/65536 [4:25:23<6:27:08, 1.71it/s] 39%|███▉ | 25789/65536 [4:25:23<6:31:28, 1.69it/s] 39%|███▉ | 25790/65536 [4:25:24<6:27:24, 1.71it/s] 39%|███▉ | 25791/65536 [4:25:24<6:59:31, 1.58it/s] 39%|███▉ | 25792/65536 [4:25:25<6:50:08, 1.62it/s] 39%|███▉ | 25793/65536 [4:25:26<6:44:36, 1.64it/s] 39%|███▉ | 25794/65536 [4:25:26<6:44:20, 1.64it/s] 39%|███▉ | 25795/65536 [4:25:27<6:41:09, 1.65it/s] 39%|███▉ | 25796/65536 [4:25:27<6:42:02, 1.65it/s] 39%|███▉ | 25797/65536 [4:25:28<6:54:09, 1.60it/s] 39%|███▉ | 25798/65536 [4:25:29<6:50:08, 1.61it/s] 39%|███▉ | 25799/65536 [4:25:29<6:41:43, 1.65it/s] 39%|███▉ | 25800/65536 [4:25:30<6:52:40, 1.60it/s] {'loss': 2.1036, 'learning_rate': 6.473613321905228e-07, 'epoch': 1592.59} + 39%|███▉ | 25800/65536 [4:25:30<6:52:40, 1.60it/s] 39%|███▉ | 25801/65536 [4:25:31<6:53:04, 1.60it/s] 39%|███▉ | 25802/65536 [4:25:31<6:57:23, 1.59it/s] 39%|███▉ | 25803/65536 [4:25:32<6:51:57, 1.61it/s] 39%|███▉ | 25804/65536 [4:25:32<6:45:29, 1.63it/s] 39%|███▉ | 25805/65536 [4:25:33<6:32:08, 1.69it/s] 39%|███▉ | 25806/65536 [4:25:34<6:30:56, 1.69it/s] 39%|███▉ | 25807/65536 [4:25:34<6:47:46, 1.62it/s] 39%|███▉ | 25808/65536 [4:25:35<6:51:28, 1.61it/s] 39%|███▉ | 25809/65536 [4:25:36<6:49:03, 1.62it/s] 39%|███▉ | 25810/65536 [4:25:36<6:49:01, 1.62it/s] 39%|███▉ | 25811/65536 [4:25:37<6:42:25, 1.65it/s] 39%|███▉ | 25812/65536 [4:25:37<6:36:58, 1.67it/s] 39%|███▉ | 25813/65536 [4:25:38<6:48:19, 1.62it/s] 39%|███▉ | 25814/65536 [4:25:39<6:40:29, 1.65it/s] 39%|███▉ | 25815/65536 [4:25:39<6:48:08, 1.62it/s] 39%|███▉ | 25816/65536 [4:25:40<6:51:45, 1.61it/s] 39%|███▉ | 25817/65536 [4:25:40<6:43:51, 1.64it/s] 39%|███▉ | 25818/65536 [4:25:41<6:44:26, 1.64it/s] 39%|███▉ | 25819/65536 [4:25:42<6:46:39, 1.63it/s] 39%|███▉ | 25820/65536 [4:25:42<6:42:20, 1.65it/s] {'loss': 2.0129, 'learning_rate': 6.470858332312966e-07, 'epoch': 1593.83} + 39%|███▉ | 25820/65536 [4:25:42<6:42:20, 1.65it/s] 39%|███▉ | 25821/65536 [4:25:43<6:48:41, 1.62it/s] 39%|███▉ | 25822/65536 [4:25:43<6:38:28, 1.66it/s] 39%|███▉ | 25823/65536 [4:25:44<6:51:22, 1.61it/s] 39%|███▉ | 25824/65536 [4:25:45<6:40:19, 1.65it/s] 39%|███▉ | 25825/65536 [4:25:45<6:36:07, 1.67it/s] 39%|███▉ | 25826/65536 [4:25:46<6:41:54, 1.65it/s] 39%|███▉ | 25827/65536 [4:25:46<6:40:44, 1.65it/s] 39%|███▉ | 25828/65536 [4:25:47<6:58:19, 1.58it/s] 39%|███▉ | 25829/65536 [4:25:48<6:52:52, 1.60it/s] 39%|███▉ | 25830/65536 [4:25:48<6:50:25, 1.61it/s] 39%|███▉ | 25831/65536 [4:25:49<6:46:14, 1.63it/s] 39%|███▉ | 25832/65536 [4:25:50<6:52:26, 1.60it/s] 39%|███▉ | 25833/65536 [4:25:50<6:51:38, 1.61it/s] 39%|███▉ | 25834/65536 [4:25:51<6:47:24, 1.62it/s] 39%|███▉ | 25835/65536 [4:25:51<6:52:32, 1.60it/s] 39%|███▉ | 25836/65536 [4:25:52<6:47:32, 1.62it/s] 39%|███▉ | 25837/65536 [4:25:53<6:44:04, 1.64it/s] 39%|███▉ | 25838/65536 [4:25:53<6:49:53, 1.61it/s] 39%|███▉ | 25839/65536 [4:25:54<6:37:29, 1.66it/s] 39%|███▉ | 25840/65536 [4:25:55<7:08:16, 1.54it/s] {'loss': 2.0491, 'learning_rate': 6.468103342720705e-07, 'epoch': 1595.06} + 39%|███▉ | 25840/65536 [4:25:55<7:08:16, 1.54it/s] 39%|███▉ | 25841/65536 [4:25:55<6:57:55, 1.58it/s] 39%|███▉ | 25842/65536 [4:25:56<6:54:06, 1.60it/s] 39%|███▉ | 25843/65536 [4:25:56<6:45:52, 1.63it/s] 39%|███▉ | 25844/65536 [4:25:57<6:45:28, 1.63it/s] 39%|███▉ | 25845/65536 [4:25:58<6:45:40, 1.63it/s] 39%|███▉ | 25846/65536 [4:25:58<6:36:13, 1.67it/s] 39%|███▉ | 25847/65536 [4:25:59<6:44:49, 1.63it/s] 39%|███▉ | 25848/65536 [4:25:59<6:40:07, 1.65it/s] 39%|███▉ | 25849/65536 [4:26:00<6:39:30, 1.66it/s] 39%|███▉ | 25850/65536 [4:26:01<6:42:10, 1.64it/s] 39%|███▉ | 25851/65536 [4:26:01<6:54:30, 1.60it/s] 39%|███▉ | 25852/65536 [4:26:02<6:53:00, 1.60it/s] 39%|███▉ | 25853/65536 [4:26:03<6:57:25, 1.58it/s] 39%|███▉ | 25854/65536 [4:26:03<6:45:40, 1.63it/s] 39%|███▉ | 25855/65536 [4:26:04<6:40:23, 1.65it/s] 39%|███▉ | 25856/65536 [4:26:04<6:51:38, 1.61it/s] 39%|███▉ | 25857/65536 [4:26:05<6:44:18, 1.64it/s] 39%|███▉ | 25858/65536 [4:26:06<6:43:45, 1.64it/s] 39%|███▉ | 25859/65536 [4:26:06<6:37:40, 1.66it/s] 39%|███▉ | 25860/65536 [4:26:07<6:38:48, 1.66it/s] {'loss': 2.0864, 'learning_rate': 6.465348353128443e-07, 'epoch': 1596.3} + 39%|███▉ | 25860/65536 [4:26:07<6:38:48, 1.66it/s] 39%|███▉ | 25861/65536 [4:26:07<6:41:44, 1.65it/s] 39%|███▉ | 25862/65536 [4:26:08<6:43:32, 1.64it/s] 39%|███▉ | 25863/65536 [4:26:09<6:40:21, 1.65it/s] 39%|███▉ | 25864/65536 [4:26:09<6:40:47, 1.65it/s] 39%|███▉ | 25865/65536 [4:26:10<6:37:58, 1.66it/s] 39%|███▉ | 25866/65536 [4:26:10<6:34:59, 1.67it/s] 39%|███▉ | 25867/65536 [4:26:11<6:42:39, 1.64it/s] 39%|███▉ | 25868/65536 [4:26:12<6:42:22, 1.64it/s] 39%|███▉ | 25869/65536 [4:26:12<6:44:54, 1.63it/s] 39%|███▉ | 25870/65536 [4:26:13<6:45:51, 1.63it/s] 39%|███▉ | 25871/65536 [4:26:13<6:41:06, 1.65it/s] 39%|███▉ | 25872/65536 [4:26:14<7:09:28, 1.54it/s] 39%|███▉ | 25873/65536 [4:26:15<6:57:55, 1.58it/s] 39%|█��█▉ | 25874/65536 [4:26:15<6:43:11, 1.64it/s] 39%|███▉ | 25875/65536 [4:26:16<6:38:27, 1.66it/s] 39%|███▉ | 25876/65536 [4:26:17<6:47:11, 1.62it/s] 39%|███▉ | 25877/65536 [4:26:17<6:41:49, 1.64it/s] 39%|███▉ | 25878/65536 [4:26:18<6:48:44, 1.62it/s] 39%|███▉ | 25879/65536 [4:26:18<6:51:19, 1.61it/s] 39%|███▉ | 25880/65536 [4:26:19<6:43:18, 1.64it/s] {'loss': 2.1008, 'learning_rate': 6.462593363536182e-07, 'epoch': 1597.53} + 39%|███▉ | 25880/65536 [4:26:19<6:43:18, 1.64it/s] 39%|███▉ | 25881/65536 [4:26:20<6:35:52, 1.67it/s] 39%|███▉ | 25882/65536 [4:26:20<6:28:48, 1.70it/s] 39%|███▉ | 25883/65536 [4:26:21<6:29:51, 1.70it/s] 39%|███▉ | 25884/65536 [4:26:21<6:33:52, 1.68it/s] 39%|███▉ | 25885/65536 [4:26:22<6:25:51, 1.71it/s] 39%|███▉ | 25886/65536 [4:26:23<6:36:24, 1.67it/s] 40%|███▉ | 25887/65536 [4:26:23<6:44:06, 1.64it/s] 40%|███▉ | 25888/65536 [4:26:24<6:49:15, 1.61it/s] 40%|███▉ | 25889/65536 [4:26:24<6:49:39, 1.61it/s] 40%|███▉ | 25890/65536 [4:26:25<6:51:51, 1.60it/s] 40%|███▉ | 25891/65536 [4:26:26<6:47:26, 1.62it/s] 40%|███▉ | 25892/65536 [4:26:26<6:37:11, 1.66it/s] 40%|███▉ | 25893/65536 [4:26:27<6:30:13, 1.69it/s] 40%|███▉ | 25894/65536 [4:26:28<6:41:41, 1.64it/s] 40%|███▉ | 25895/65536 [4:26:28<6:39:30, 1.65it/s] 40%|███▉ | 25896/65536 [4:26:29<6:27:24, 1.71it/s] 40%|███▉ | 25897/65536 [4:26:29<6:40:20, 1.65it/s] 40%|███▉ | 25898/65536 [4:26:30<6:41:54, 1.64it/s] 40%|███▉ | 25899/65536 [4:26:30<6:32:48, 1.68it/s] 40%|███▉ | 25900/65536 [4:26:31<6:38:36, 1.66it/s] {'loss': 1.9265, 'learning_rate': 6.459838373943921e-07, 'epoch': 1598.77} + 40%|███▉ | 25900/65536 [4:26:31<6:38:36, 1.66it/s] 40%|███▉ | 25901/65536 [4:26:32<6:42:47, 1.64it/s] 40%|███▉ | 25902/65536 [4:26:32<6:43:32, 1.64it/s] 40%|███▉ | 25903/65536 [4:26:33<6:50:02, 1.61it/s] 40%|███▉ | 25904/65536 [4:26:34<7:04:02, 1.56it/s] 40%|███▉ | 25905/65536 [4:26:34<7:02:14, 1.56it/s] 40%|███▉ | 25906/65536 [4:26:35<6:54:11, 1.59it/s] 40%|███▉ | 25907/65536 [4:26:36<6:52:26, 1.60it/s] 40%|███▉ | 25908/65536 [4:26:36<6:42:44, 1.64it/s] 40%|███▉ | 25909/65536 [4:26:37<6:33:45, 1.68it/s] 40%|███▉ | 25910/65536 [4:26:37<6:35:07, 1.67it/s] 40%|███▉ | 25911/65536 [4:26:38<6:50:52, 1.61it/s] 40%|███▉ | 25912/65536 [4:26:39<7:00:43, 1.57it/s] 40%|███▉ | 25913/65536 [4:26:39<6:52:52, 1.60it/s] 40%|███▉ | 25914/65536 [4:26:40<6:44:30, 1.63it/s] 40%|███▉ | 25915/65536 [4:26:40<6:51:29, 1.60it/s] 40%|███▉ | 25916/65536 [4:26:41<6:43:23, 1.64it/s] 40%|███▉ | 25917/65536 [4:26:42<6:30:57, 1.69it/s] 40%|███▉ | 25918/65536 [4:26:42<6:30:44, 1.69it/s] 40%|███▉ | 25919/65536 [4:26:43<6:27:50, 1.70it/s] 40%|███▉ | 25920/65536 [4:26:43<6:42:55, 1.64it/s] {'loss': 1.8548, 'learning_rate': 6.457083384351659e-07, 'epoch': 1600.0} + 40%|███▉ | 25920/65536 [4:26:43<6:42:55, 1.64it/s] 40%|███▉ | 25921/65536 [4:26:44<7:02:02, 1.56it/s] 40%|███▉ | 25922/65536 [4:26:45<6:58:02, 1.58it/s] 40%|███▉ | 25923/65536 [4:26:45<7:10:46, 1.53it/s] 40%|███▉ | 25924/65536 [4:26:46<7:02:28, 1.56it/s] 40%|███▉ | 25925/65536 [4:26:47<7:03:50, 1.56it/s] 40%|███▉ | 25926/65536 [4:26:47<6:52:29, 1.60it/s] 40%|███▉ | 25927/65536 [4:26:48<6:41:16, 1.65it/s] 40%|███▉ | 25928/65536 [4:26:48<6:42:28, 1.64it/s] 40%|███▉ | 25929/65536 [4:26:49<6:37:20, 1.66it/s] 40%|███▉ | 25930/65536 [4:26:50<6:34:11, 1.67it/s] 40%|███▉ | 25931/65536 [4:26:50<6:33:59, 1.68it/s] 40%|███▉ | 25932/65536 [4:26:51<6:38:34, 1.66it/s] 40%|███▉ | 25933/65536 [4:26:51<6:36:00, 1.67it/s] 40%|███▉ | 25934/65536 [4:26:52<6:39:38, 1.65it/s] 40%|███▉ | 25935/65536 [4:26:53<6:38:57, 1.65it/s] 40%|███▉ | 25936/65536 [4:26:53<6:47:50, 1.62it/s] 40%|███▉ | 25937/65536 [4:26:54<6:57:19, 1.58it/s] 40%|███▉ | 25938/65536 [4:26:55<6:53:14, 1.60it/s] 40%|███▉ | 25939/65536 [4:26:55<6:45:31, 1.63it/s] 40%|███▉ | 25940/65536 [4:26:56<6:39:54, 1.65it/s] {'loss': 1.8596, 'learning_rate': 6.454328394759398e-07, 'epoch': 1601.23} + 40%|███▉ | 25940/65536 [4:26:56<6:39:54, 1.65it/s] 40%|███▉ | 25941/65536 [4:26:56<6:37:38, 1.66it/s] 40%|███▉ | 25942/65536 [4:26:57<6:44:18, 1.63it/s] 40%|███▉ | 25943/65536 [4:26:58<6:40:24, 1.65it/s] 40%|███▉ | 25944/65536 [4:26:58<6:34:15, 1.67it/s] 40%|███▉ | 25945/65536 [4:26:59<6:32:29, 1.68it/s] 40%|███▉ | 25946/65536 [4:26:59<6:50:13, 1.61it/s] 40%|███▉ | 25947/65536 [4:27:00<6:50:40, 1.61it/s] 40%|███▉ | 25948/65536 [4:27:01<6:50:47, 1.61it/s] 40%|███▉ | 25949/65536 [4:27:01<6:48:25, 1.62it/s] 40%|███▉ | 25950/65536 [4:27:02<6:39:26, 1.65it/s] 40%|███▉ | 25951/65536 [4:27:02<6:39:50, 1.65it/s] 40%|███▉ | 25952/65536 [4:27:03<6:45:38, 1.63it/s] 40%|███▉ | 25953/65536 [4:27:04<6:54:43, 1.59it/s] 40%|███▉ | 25954/65536 [4:27:04<6:43:14, 1.64it/s] 40%|███▉ | 25955/65536 [4:27:05<6:37:13, 1.66it/s] 40%|███▉ | 25956/65536 [4:27:06<6:38:23, 1.66it/s] 40%|███▉ | 25957/65536 [4:27:06<6:56:26, 1.58it/s] 40%|███▉ | 25958/65536 [4:27:07<6:53:26, 1.60it/s] 40%|███▉ | 25959/65536 [4:27:07<6:44:41, 1.63it/s] 40%|███▉ | 25960/65536 [4:27:08<6:44:13, 1.63it/s] {'loss': 1.7976, 'learning_rate': 6.451573405167136e-07, 'epoch': 1602.47} + 40%|███▉ | 25960/65536 [4:27:08<6:44:13, 1.63it/s] 40%|███▉ | 25961/65536 [4:27:09<6:41:45, 1.64it/s] 40%|███▉ | 25962/65536 [4:27:09<6:40:41, 1.65it/s] 40%|███▉ | 25963/65536 [4:27:10<6:37:36, 1.66it/s] 40%|███▉ | 25964/65536 [4:27:10<6:34:28, 1.67it/s] 40%|███▉ | 25965/65536 [4:27:11<6:37:51, 1.66it/s] 40%|███▉ | 25966/65536 [4:27:12<6:41:18, 1.64it/s] 40%|███▉ | 25967/65536 [4:27:12<7:03:23, 1.56it/s] 40%|███▉ | 25968/65536 [4:27:13<6:53:02, 1.60it/s] 40%|███▉ | 25969/65536 [4:27:14<7:01:07, 1.57it/s] 40%|███▉ | 25970/65536 [4:27:14<6:57:38, 1.58it/s] 40%|███▉ | 25971/65536 [4:27:15<7:01:32, 1.56it/s] 40%|███▉ | 25972/65536 [4:27:16<7:04:05, 1.55it/s] 40%|███▉ | 25973/65536 [4:27:16<6:47:43, 1.62it/s] 40%|███▉ | 25974/65536 [4:27:17<6:42:03, 1.64it/s] 40%|███▉ | 25975/65536 [4:27:17<6:40:54, 1.64it/s] 40%|███▉ | 25976/65536 [4:27:18<6:39:21, 1.65it/s] 40%|███▉ | 25977/65536 [4:27:19<6:45:02, 1.63it/s] 40%|███▉ | 25978/65536 [4:27:19<6:38:50, 1.65it/s] 40%|███▉ | 25979/65536 [4:27:20<6:42:10, 1.64it/s] 40%|███▉ | 25980/65536 [4:27:20<6:43:17, 1.63it/s] {'loss': 1.7973, 'learning_rate': 6.448818415574874e-07, 'epoch': 1603.7} + 40%|███▉ | 25980/65536 [4:27:20<6:43:17, 1.63it/s] 40%|███▉ | 25981/65536 [4:27:21<6:37:10, 1.66it/s] 40%|███▉ | 25982/65536 [4:27:22<6:33:11, 1.68it/s] 40%|███▉ | 25983/65536 [4:27:22<6:45:27, 1.63it/s] 40%|███▉ | 25984/65536 [4:27:23<6:46:59, 1.62it/s] 40%|███▉ | 25985/65536 [4:27:23<6:55:15, 1.59it/s] 40%|███▉ | 25986/65536 [4:27:24<6:48:58, 1.61it/s] 40%|███▉ | 25987/65536 [4:27:25<6:45:23, 1.63it/s] 40%|███▉ | 25988/65536 [4:27:25<6:43:29, 1.63it/s] 40%|███▉ | 25989/65536 [4:27:26<6:41:32, 1.64it/s] 40%|███▉ | 25990/65536 [4:27:26<6:35:02, 1.67it/s] 40%|███▉ | 25991/65536 [4:27:27<6:37:53, 1.66it/s] 40%|███▉ | 25992/65536 [4:27:28<6:30:43, 1.69it/s] 40%|███▉ | 25993/65536 [4:27:28<6:37:57, 1.66it/s] 40%|███▉ | 25994/65536 [4:27:29<6:36:10, 1.66it/s] 40%|███▉ | 25995/65536 [4:27:29<6:44:08, 1.63it/s] 40%|███▉ | 25996/65536 [4:27:30<6:37:47, 1.66it/s] 40%|███▉ | 25997/65536 [4:27:31<6:35:30, 1.67it/s] 40%|███▉ | 25998/65536 [4:27:31<6:53:21, 1.59it/s] 40%|███▉ | 25999/65536 [4:27:32<6:51:11, 1.60it/s] 40%|███▉ | 26000/65536 [4:27:33<6:49:19, 1.61it/s] {'loss': 1.8429, 'learning_rate': 6.446063425982611e-07, 'epoch': 1604.94} + 40%|███▉ | 26000/65536 [4:27:33<6:49:19, 1.61it/s] 40%|███▉ | 26001/65536 [4:27:33<6:44:31, 1.63it/s] 40%|███▉ | 26002/65536 [4:27:34<6:51:54, 1.60it/s] 40%|███▉ | 26003/65536 [4:27:34<6:37:59, 1.66it/s] 40%|███▉ | 26004/65536 [4:27:35<6:37:59, 1.66it/s] 40%|███▉ | 26005/65536 [4:27:36<6:52:56, 1.60it/s] 40%|███▉ | 26006/65536 [4:27:36<6:55:16, 1.59it/s] 40%|███▉ | 26007/65536 [4:27:37<6:51:42, 1.60it/s] 40%|███▉ | 26008/65536 [4:27:38<6:52:00, 1.60it/s] 40%|███▉ | 26009/65536 [4:27:38<6:49:13, 1.61it/s] 40%|███▉ | 26010/65536 [4:27:39<6:46:43, 1.62it/s] 40%|███▉ | 26011/65536 [4:27:39<6:44:41, 1.63it/s] 40%|███▉ | 26012/65536 [4:27:40<6:35:49, 1.66it/s] 40%|███▉ | 26013/65536 [4:27:41<6:35:59, 1.66it/s] 40%|███▉ | 26014/65536 [4:27:41<6:44:02, 1.63it/s] 40%|███▉ | 26015/65536 [4:27:42<6:39:43, 1.65it/s] 40%|███▉ | 26016/65536 [4:27:42<6:36:04, 1.66it/s] 40%|███▉ | 26017/65536 [4:27:43<6:34:48, 1.67it/s] 40%|███▉ | 26018/65536 [4:27:44<6:46:09, 1.62it/s] 40%|███▉ | 26019/65536 [4:27:44<6:39:24, 1.65it/s] 40%|███▉ | 26020/65536 [4:27:45<6:32:33, 1.68it/s] {'loss': 1.8667, 'learning_rate': 6.443308436390351e-07, 'epoch': 1606.17} + 40%|███▉ | 26020/65536 [4:27:45<6:32:33, 1.68it/s] 40%|███▉ | 26021/65536 [4:27:45<6:34:38, 1.67it/s] 40%|███▉ | 26022/65536 [4:27:46<6:45:44, 1.62it/s] 40%|███▉ | 26023/65536 [4:27:47<6:52:06, 1.60it/s] 40%|███▉ | 26024/65536 [4:27:47<6:42:23, 1.64it/s] 40%|███▉ | 26025/65536 [4:27:48<6:38:08, 1.65it/s] 40%|███▉ | 26026/65536 [4:27:48<6:34:03, 1.67it/s] 40%|███▉ | 26027/65536 [4:27:49<6:36:54, 1.66it/s] 40%|███▉ | 26028/65536 [4:27:50<6:48:34, 1.61it/s] 40%|███▉ | 26029/65536 [4:27:50<6:52:03, 1.60it/s] 40%|███▉ | 26030/65536 [4:27:51<6:46:23, 1.62it/s] 40%|███▉ | 26031/65536 [4:27:51<6:32:33, 1.68it/s] 40%|███▉ | 26032/65536 [4:27:52<6:39:25, 1.65it/s] 40%|███▉ | 26033/65536 [4:27:53<6:45:19, 1.62it/s] 40%|███▉ | 26034/65536 [4:27:53<6:57:13, 1.58it/s] 40%|███▉ | 26035/65536 [4:27:54<7:08:00, 1.54it/s] 40%|███▉ | 26036/65536 [4:27:55<7:00:47, 1.56it/s] 40%|███▉ | 26037/65536 [4:27:55<6:56:24, 1.58it/s] 40%|███▉ | 26038/65536 [4:27:56<6:47:54, 1.61it/s] 40%|███▉ | 26039/65536 [4:27:57<6:42:42, 1.63it/s] 40%|███▉ | 26040/65536 [4:27:57<6:43:59, 1.63it/s] {'loss': 1.8682, 'learning_rate': 6.44055344679809e-07, 'epoch': 1607.41} + 40%|███▉ | 26040/65536 [4:27:57<6:43:59, 1.63it/s] 40%|███▉ | 26041/65536 [4:27:58<6:41:17, 1.64it/s] 40%|███▉ | 26042/65536 [4:27:58<6:36:06, 1.66it/s] 40%|███▉ | 26043/65536 [4:27:59<6:35:09, 1.67it/s] 40%|███▉ | 26044/65536 [4:28:00<6:34:17, 1.67it/s] 40%|███▉ | 26045/65536 [4:28:00<6:32:32, 1.68it/s] 40%|███▉ | 26046/65536 [4:28:01<6:28:51, 1.69it/s] 40%|███▉ | 26047/65536 [4:28:01<6:25:28, 1.71it/s] 40%|███▉ | 26048/65536 [4:28:02<6:25:37, 1.71it/s] 40%|███▉ | 26049/65536 [4:28:02<6:34:20, 1.67it/s] 40%|███▉ | 26050/65536 [4:28:03<6:54:00, 1.59it/s] 40%|███▉ | 26051/65536 [4:28:04<6:43:54, 1.63it/s] 40%|███▉ | 26052/65536 [4:28:04<6:39:26, 1.65it/s] 40%|███▉ | 26053/65536 [4:28:05<6:38:58, 1.65it/s] 40%|███▉ | 26054/65536 [4:28:06<6:33:22, 1.67it/s] 40%|███▉ | 26055/65536 [4:28:06<6:32:39, 1.68it/s] 40%|███▉ | 26056/65536 [4:28:07<6:32:07, 1.68it/s] 40%|███▉ | 26057/65536 [4:28:07<6:24:09, 1.71it/s] 40%|███▉ | 26058/65536 [4:28:08<6:34:30, 1.67it/s] 40%|███▉ | 26059/65536 [4:28:09<6:38:57, 1.65it/s] 40%|███▉ | 26060/65536 [4:28:09<6:33:57, 1.67it/s] {'loss': 1.8548, 'learning_rate': 6.437798457205828e-07, 'epoch': 1608.64} + 40%|███▉ | 26060/65536 [4:28:09<6:33:57, 1.67it/s] 40%|███▉ | 26061/65536 [4:28:10<6:39:49, 1.65it/s] 40%|███▉ | 26062/65536 [4:28:10<6:36:58, 1.66it/s] 40%|███▉ | 26063/65536 [4:28:11<6:37:33, 1.65it/s] 40%|███▉ | 26064/65536 [4:28:12<6:35:33, 1.66it/s] 40%|███▉ | 26065/65536 [4:28:12<6:44:18, 1.63it/s] 40%|███▉ | 26066/65536 [4:28:13<7:10:33, 1.53it/s] 40%|███▉ | 26067/65536 [4:28:14<6:57:18, 1.58it/s] 40%|███▉ | 26068/65536 [4:28:14<6:51:24, 1.60it/s] 40%|███▉ | 26069/65536 [4:28:15<6:56:07, 1.58it/s] 40%|███▉ | 26070/65536 [4:28:15<6:48:47, 1.61it/s] 40%|███▉ | 26071/65536 [4:28:16<6:40:55, 1.64it/s] 40%|███▉ | 26072/65536 [4:28:17<6:51:50, 1.60it/s] 40%|███▉ | 26073/65536 [4:28:17<7:01:39, 1.56it/s] 40%|███▉ | 26074/65536 [4:28:18<6:55:22, 1.58it/s] 40%|███▉ | 26075/65536 [4:28:19<6:53:44, 1.59it/s] 40%|███▉ | 26076/65536 [4:28:19<6:48:01, 1.61it/s] 40%|███▉ | 26077/65536 [4:28:20<6:47:49, 1.61it/s] 40%|███▉ | 26078/65536 [4:28:20<6:50:41, 1.60it/s] 40%|███▉ | 26079/65536 [4:28:21<6:40:40, 1.64it/s] 40%|███▉ | 26080/65536 [4:28:22<6:36:28, 1.66it/s] {'loss': 1.787, 'learning_rate': 6.435043467613567e-07, 'epoch': 1609.88} + 40%|███▉ | 26080/65536 [4:28:22<6:36:28, 1.66it/s] 40%|███▉ | 26081/65536 [4:28:22<6:38:11, 1.65it/s] 40%|███▉ | 26082/65536 [4:28:23<6:31:49, 1.68it/s] 40%|███▉ | 26083/65536 [4:28:23<6:47:50, 1.61it/s] 40%|███▉ | 26084/65536 [4:28:24<6:46:35, 1.62it/s] 40%|███▉ | 26085/65536 [4:28:25<6:39:42, 1.64it/s] 40%|███▉ | 26086/65536 [4:28:25<6:39:24, 1.65it/s] 40%|███▉ | 26087/65536 [4:28:26<6:28:16, 1.69it/s] 40%|███▉ | 26088/65536 [4:28:26<6:21:39, 1.72it/s] 40%|███▉ | 26089/65536 [4:28:27<6:21:14, 1.72it/s] 40%|███▉ | 26090/65536 [4:28:28<6:30:57, 1.68it/s] 40%|███▉ | 26091/65536 [4:28:28<6:43:42, 1.63it/s] 40%|███▉ | 26092/65536 [4:28:29<6:35:52, 1.66it/s] 40%|███▉ | 26093/65536 [4:28:29<6:45:51, 1.62it/s] 40%|███▉ | 26094/65536 [4:28:30<6:50:30, 1.60it/s] 40%|███▉ | 26095/65536 [4:28:31<6:53:08, 1.59it/s] 40%|███▉ | 26096/65536 [4:28:31<6:43:22, 1.63it/s] 40%|███▉ | 26097/65536 [4:28:32<6:44:52, 1.62it/s] 40%|███▉ | 26098/65536 [4:28:33<6:48:23, 1.61it/s] 40%|███▉ | 26099/65536 [4:28:33<7:07:15, 1.54it/s] 40%|███▉ | 26100/65536 [4:28:34<7:02:06, 1.56it/s] {'loss': 1.8336, 'learning_rate': 6.432288478021305e-07, 'epoch': 1611.11} + 40%|███▉ | 26100/65536 [4:28:34<7:02:06, 1.56it/s] 40%|███▉ | 26101/65536 [4:28:34<6:53:27, 1.59it/s] 40%|███▉ | 26102/65536 [4:28:35<6:49:03, 1.61it/s] 40%|███▉ | 26103/65536 [4:28:36<6:40:46, 1.64it/s] 40%|███▉ | 26104/65536 [4:28:36<6:46:32, 1.62it/s] 40%|███▉ | 26105/65536 [4:28:37<6:40:30, 1.64it/s] 40%|███▉ | 26106/65536 [4:28:37<6:33:23, 1.67it/s] 40%|███▉ | 26107/65536 [4:28:38<6:24:58, 1.71it/s] 40%|███▉ | 26108/65536 [4:28:39<6:21:33, 1.72it/s] 40%|███▉ | 26109/65536 [4:28:39<6:27:50, 1.69it/s] 40%|███▉ | 26110/65536 [4:28:40<6:32:09, 1.68it/s] 40%|███▉ | 26111/65536 [4:28:40<6:48:52, 1.61it/s] 40%|███▉ | 26112/65536 [4:28:41<6:47:39, 1.61it/s] 40%|███▉ | 26113/65536 [4:28:42<6:42:46, 1.63it/s] 40%|███▉ | 26114/65536 [4:28:42<6:41:10, 1.64it/s] 40%|███▉ | 26115/65536 [4:28:43<6:50:09, 1.60it/s] 40%|███▉ | 26116/65536 [4:28:44<6:49:17, 1.61it/s] 40%|███▉ | 26117/65536 [4:28:44<6:44:48, 1.62it/s] 40%|███▉ | 26118/65536 [4:28:45<6:43:24, 1.63it/s] 40%|███▉ | 26119/65536 [4:28:45<6:58:10, 1.57it/s] 40%|███▉ | 26120/65536 [4:28:46<6:45:18, 1.62it/s] {'loss': 1.7849, 'learning_rate': 6.429533488429044e-07, 'epoch': 1612.35} + 40%|███▉ | 26120/65536 [4:28:46<6:45:18, 1.62it/s] 40%|███▉ | 26121/65536 [4:28:47<6:37:26, 1.65it/s] 40%|███▉ | 26122/65536 [4:28:47<6:43:36, 1.63it/s] 40%|███▉ | 26123/65536 [4:28:48<6:43:43, 1.63it/s] 40%|███▉ | 26124/65536 [4:28:49<6:47:34, 1.61it/s] 40%|███▉ | 26125/65536 [4:28:49<6:40:13, 1.64it/s] 40%|███▉ | 26126/65536 [4:28:50<6:40:19, 1.64it/s] 40%|███▉ | 26127/65536 [4:28:50<6:51:53, 1.59it/s] 40%|███▉ | 26128/65536 [4:28:51<6:34:30, 1.66it/s] 40%|███▉ | 26129/65536 [4:28:52<6:46:00, 1.62it/s] 40%|███▉ | 26130/65536 [4:28:52<6:33:31, 1.67it/s] 40%|███▉ | 26131/65536 [4:28:53<6:52:36, 1.59it/s] 40%|███▉ | 26132/65536 [4:28:53<6:49:51, 1.60it/s] 40%|███▉ | 26133/65536 [4:28:54<6:41:37, 1.64it/s] 40%|███▉ | 26134/65536 [4:28:55<6:36:28, 1.66it/s] 40%|███▉ | 26135/65536 [4:28:55<6:37:19, 1.65it/s] 40%|███▉ | 26136/65536 [4:28:56<6:34:32, 1.66it/s] 40%|███▉ | 26137/65536 [4:28:56<6:33:00, 1.67it/s] 40%|███▉ | 26138/65536 [4:28:57<6:47:07, 1.61it/s] 40%|███▉ | 26139/65536 [4:28:58<6:50:16, 1.60it/s] 40%|███▉ | 26140/65536 [4:28:58<6:55:16, 1.58it/s] {'loss': 1.7917, 'learning_rate': 6.426778498836783e-07, 'epoch': 1613.58} + 40%|███▉ | 26140/65536 [4:28:58<6:55:16, 1.58it/s] 40%|███▉ | 26141/65536 [4:28:59<6:49:24, 1.60it/s] 40%|███▉ | 26142/65536 [4:29:00<6:52:42, 1.59it/s] 40%|███▉ | 26143/65536 [4:29:00<6:52:57, 1.59it/s] 40%|███▉ | 26144/65536 [4:29:01<6:49:35, 1.60it/s] 40%|███▉ | 26145/65536 [4:29:01<6:34:49, 1.66it/s] 40%|███▉ | 26146/65536 [4:29:02<6:38:58, 1.65it/s] 40%|███▉ | 26147/65536 [4:29:03<6:46:59, 1.61it/s] 40%|███▉ | 26148/65536 [4:29:03<6:50:50, 1.60it/s] 40%|███▉ | 26149/65536 [4:29:04<6:57:21, 1.57it/s] 40%|███▉ | 26150/65536 [4:29:05<6:52:30, 1.59it/s] 40%|███▉ | 26151/65536 [4:29:05<6:48:34, 1.61it/s] 40%|███▉ | 26152/65536 [4:29:06<6:44:13, 1.62it/s] 40%|███▉ | 26153/65536 [4:29:06<6:45:07, 1.62it/s] 40%|███▉ | 26154/65536 [4:29:07<6:40:25, 1.64it/s] 40%|███▉ | 26155/65536 [4:29:08<6:41:10, 1.64it/s] 40%|███▉ | 26156/65536 [4:29:08<6:35:39, 1.66it/s] 40%|███▉ | 26157/65536 [4:29:09<6:46:52, 1.61it/s] 40%|███▉ | 26158/65536 [4:29:09<6:39:48, 1.64it/s] 40%|███▉ | 26159/65536 [4:29:10<6:44:05, 1.62it/s] 40%|███▉ | 26160/65536 [4:29:11<6:39:48, 1.64it/s] {'loss': 1.7625, 'learning_rate': 6.424023509244521e-07, 'epoch': 1614.81} + 40%|███▉ | 26160/65536 [4:29:11<6:39:48, 1.64it/s] 40%|███▉ | 26161/65536 [4:29:11<6:38:24, 1.65it/s] 40%|███▉ | 26162/65536 [4:29:12<6:29:23, 1.69it/s] 40%|███▉ | 26163/65536 [4:29:12<6:41:38, 1.63it/s] 40%|███▉ | 26164/65536 [4:29:13<6:46:17, 1.62it/s] 40%|███▉ | 26165/65536 [4:29:14<6:38:46, 1.65it/s] 40%|███▉ | 26166/65536 [4:29:14<6:50:57, 1.60it/s] 40%|███▉ | 26167/65536 [4:29:15<6:58:20, 1.57it/s] 40%|███▉ | 26168/65536 [4:29:16<6:50:53, 1.60it/s] 40%|███▉ | 26169/65536 [4:29:16<6:45:22, 1.62it/s] 40%|███▉ | 26170/65536 [4:29:17<6:31:36, 1.68it/s] 40%|███▉ | 26171/65536 [4:29:17<6:29:04, 1.69it/s] 40%|███▉ | 26172/65536 [4:29:18<6:34:31, 1.66it/s] 40%|███▉ | 26173/65536 [4:29:19<6:34:07, 1.66it/s] 40%|███▉ | 26174/65536 [4:29:19<6:37:20, 1.65it/s] 40%|███▉ | 26175/65536 [4:29:20<6:48:56, 1.60it/s] 40%|███▉ | 26176/65536 [4:29:20<6:44:17, 1.62it/s] 40%|███▉ | 26177/65536 [4:29:21<6:52:20, 1.59it/s] 40%|███▉ | 26178/65536 [4:29:22<6:42:03, 1.63it/s] 40%|███▉ | 26179/65536 [4:29:22<6:41:19, 1.63it/s] 40%|███▉ | 26180/65536 [4:29:23<6:55:57, 1.58it/s] {'loss': 1.8147, 'learning_rate': 6.421268519652259e-07, 'epoch': 1616.05} + 40%|███▉ | 26180/65536 [4:29:23<6:55:57, 1.58it/s] 40%|███▉ | 26181/65536 [4:29:24<6:52:16, 1.59it/s] 40%|███▉ | 26182/65536 [4:29:24<6:45:28, 1.62it/s] 40%|███▉ | 26183/65536 [4:29:25<6:43:23, 1.63it/s] 40%|███▉ | 26184/65536 [4:29:25<6:49:52, 1.60it/s] 40%|███▉ | 26185/65536 [4:29:26<6:56:23, 1.58it/s] 40%|███▉ | 26186/65536 [4:29:27<7:05:47, 1.54it/s] 40%|███▉ | 26187/65536 [4:29:27<6:50:56, 1.60it/s] 40%|███▉ | 26188/65536 [4:29:28<6:42:58, 1.63it/s] 40%|███▉ | 26189/65536 [4:29:29<6:36:45, 1.65it/s] 40%|███▉ | 26190/65536 [4:29:29<6:40:51, 1.64it/s] 40%|███▉ | 26191/65536 [4:29:30<6:40:25, 1.64it/s] 40%|███▉ | 26192/65536 [4:29:30<6:37:27, 1.65it/s] 40%|███▉ | 26193/65536 [4:29:31<6:40:39, 1.64it/s] 40%|███▉ | 26194/65536 [4:29:32<6:47:10, 1.61it/s] 40%|███▉ | 26195/65536 [4:29:32<6:43:50, 1.62it/s] 40%|███▉ | 26196/65536 [4:29:33<6:51:57, 1.59it/s] 40%|███▉ | 26197/65536 [4:29:34<6:48:47, 1.60it/s] 40%|███▉ | 26198/65536 [4:29:34<6:51:55, 1.59it/s] 40%|███▉ | 26199/65536 [4:29:35<6:53:03, 1.59it/s] 40%|███▉ | 26200/65536 [4:29:35<6:56:02, 1.58it/s] {'loss': 1.749, 'learning_rate': 6.418513530059997e-07, 'epoch': 1617.28} + 40%|███▉ | 26200/65536 [4:29:35<6:56:02, 1.58it/s] 40%|███▉ | 26201/65536 [4:29:36<6:57:22, 1.57it/s] 40%|███▉ | 26202/65536 [4:29:37<6:47:39, 1.61it/s] 40%|███▉ | 26203/65536 [4:29:37<6:48:42, 1.60it/s] 40%|███▉ | 26204/65536 [4:29:38<6:55:46, 1.58it/s] 40%|███▉ | 26205/65536 [4:29:39<6:46:10, 1.61it/s] 40%|███▉ | 26206/65536 [4:29:39<6:50:12, 1.60it/s] 40%|███▉ | 26207/65536 [4:29:40<6:48:11, 1.61it/s] 40%|███▉ | 26208/65536 [4:29:40<6:44:54, 1.62it/s] 40%|███▉ | 26209/65536 [4:29:41<6:47:26, 1.61it/s] 40%|███▉ | 26210/65536 [4:29:42<6:44:26, 1.62it/s] 40%|███▉ | 26211/65536 [4:29:42<6:37:37, 1.65it/s] 40%|███▉ | 26212/65536 [4:29:43<6:51:56, 1.59it/s] 40%|███▉ | 26213/65536 [4:29:43<6:41:32, 1.63it/s] 40%|███▉ | 26214/65536 [4:29:44<6:44:13, 1.62it/s] 40%|████ | 26215/65536 [4:29:45<6:57:26, 1.57it/s] 40%|████ | 26216/65536 [4:29:45<7:00:48, 1.56it/s] 40%|████ | 26217/65536 [4:29:46<7:01:42, 1.55it/s] 40%|████ | 26218/65536 [4:29:47<6:54:05, 1.58it/s] 40%|████ | 26219/65536 [4:29:47<6:54:15, 1.58it/s] 40%|████ | 26220/65536 [4:29:48<6:43:41, 1.62it/s] {'loss': 1.7624, 'learning_rate': 6.415758540467736e-07, 'epoch': 1618.52} + 40%|████ | 26220/65536 [4:29:48<6:43:41, 1.62it/s] 40%|████ | 26221/65536 [4:29:49<6:41:41, 1.63it/s] 40%|████ | 26222/65536 [4:29:49<6:50:14, 1.60it/s] 40%|████ | 26223/65536 [4:29:50<6:49:19, 1.60it/s] 40%|████ | 26224/65536 [4:29:51<7:09:33, 1.53it/s] 40%|████ | 26225/65536 [4:29:51<6:59:36, 1.56it/s] 40%|████ | 26226/65536 [4:29:52<6:50:41, 1.60it/s] 40%|████ | 26227/65536 [4:29:52<6:37:54, 1.65it/s] 40%|████ | 26228/65536 [4:29:53<6:47:49, 1.61it/s] 40%|████ | 26229/65536 [4:29:53<6:35:28, 1.66it/s] 40%|████ | 26230/65536 [4:29:54<6:44:48, 1.62it/s] 40%|████ | 26231/65536 [4:29:55<6:50:09, 1.60it/s] 40%|████ | 26232/65536 [4:29:55<6:52:54, 1.59it/s] 40%|████ | 26233/65536 [4:29:56<6:53:56, 1.58it/s] 40%|████ | 26234/65536 [4:29:57<6:43:24, 1.62it/s] 40%|████ | 26235/65536 [4:29:57<6:35:54, 1.65it/s] 40%|████ | 26236/65536 [4:29:58<6:33:27, 1.66it/s] 40%|████ | 26237/65536 [4:29:58<6:29:40, 1.68it/s] 40%|████ | 26238/65536 [4:29:59<6:42:27, 1.63it/s] 40%|████ | 26239/65536 [4:30:00<6:40:39, 1.63it/s] 40%|████ | 26240/65536 [4:30:00<6:32:58, 1.67it/s] {'loss': 1.7598, 'learning_rate': 6.413003550875474e-07, 'epoch': 1619.75} + 40%|████ | 26240/65536 [4:30:00<6:32:58, 1.67it/s] 40%|████ | 26241/65536 [4:30:01<6:41:21, 1.63it/s] 40%|████ | 26242/65536 [4:30:01<6:36:57, 1.65it/s] 40%|████ | 26243/65536 [4:30:02<6:37:32, 1.65it/s] 40%|████ | 26244/65536 [4:30:03<6:38:47, 1.64it/s] 40%|████ | 26245/65536 [4:30:03<7:01:05, 1.56it/s] 40%|████ | 26246/65536 [4:30:04<6:53:04, 1.59it/s] 40%|████ | 26247/65536 [4:30:05<6:51:49, 1.59it/s] 40%|████ | 26248/65536 [4:30:05<6:46:25, 1.61it/s] 40%|████ | 26249/65536 [4:30:06<6:48:05, 1.60it/s] 40%|████ | 26250/65536 [4:30:07<6:52:18, 1.59it/s] 40%|████ | 26251/65536 [4:30:07<6:49:11, 1.60it/s] 40%|████ | 26252/65536 [4:30:08<6:52:29, 1.59it/s] 40%|████ | 26253/65536 [4:30:08<6:38:34, 1.64it/s] 40%|████ | 26254/65536 [4:30:09<6:40:52, 1.63it/s] 40%|████ | 26255/65536 [4:30:10<6:40:44, 1.63it/s] 40%|████ | 26256/65536 [4:30:10<6:40:15, 1.64it/s] 40%|████ | 26257/65536 [4:30:11<6:36:15, 1.65it/s] 40%|████ | 26258/65536 [4:30:11<6:30:37, 1.68it/s] 40%|████ | 26259/65536 [4:30:12<6:32:24, 1.67it/s] 40%|████ | 26260/65536 [4:30:13<6:34:37, 1.66it/s] {'loss': 1.7928, 'learning_rate': 6.410248561283212e-07, 'epoch': 1620.99} + 40%|████ | 26260/65536 [4:30:13<6:34:37, 1.66it/s] 40%|████ | 26261/65536 [4:30:13<6:49:33, 1.60it/s] 40%|████ | 26262/65536 [4:30:14<6:42:16, 1.63it/s] 40%|████ | 26263/65536 [4:30:14<6:46:25, 1.61it/s] 40%|████ | 26264/65536 [4:30:15<6:36:37, 1.65it/s] 40%|████ | 26265/65536 [4:30:16<6:43:09, 1.62it/s] 40%|████ | 26266/65536 [4:30:16<6:34:56, 1.66it/s] 40%|████ | 26267/65536 [4:30:17<6:24:03, 1.70it/s] 40%|████ | 26268/65536 [4:30:18<6:50:09, 1.60it/s] 40%|████ | 26269/65536 [4:30:18<6:41:42, 1.63it/s] 40%|████ | 26270/65536 [4:30:19<6:38:31, 1.64it/s] 40%|████ | 26271/65536 [4:30:19<6:42:43, 1.62it/s] 40%|████ | 26272/65536 [4:30:20<6:36:36, 1.65it/s] 40%|████ | 26273/65536 [4:30:21<6:38:04, 1.64it/s] 40%|████ | 26274/65536 [4:30:21<6:43:17, 1.62it/s] 40%|████ | 26275/65536 [4:30:22<6:58:57, 1.56it/s] 40%|████ | 26276/65536 [4:30:23<7:10:54, 1.52it/s] 40%|████ | 26277/65536 [4:30:23<7:18:59, 1.49it/s] 40%|████ | 26278/65536 [4:30:24<7:05:28, 1.54it/s] 40%|████ | 26279/65536 [4:30:24<7:00:08, 1.56it/s] 40%|████ | 26280/65536 [4:30:25<6:54:20, 1.58it/s] {'loss': 1.7973, 'learning_rate': 6.407493571690951e-07, 'epoch': 1622.22} + 40%|████ | 26280/65536 [4:30:25<6:54:20, 1.58it/s] 40%|████ | 26281/65536 [4:30:26<7:01:50, 1.55it/s] 40%|████ | 26282/65536 [4:30:26<6:53:21, 1.58it/s] 40%|████ | 26283/65536 [4:30:27<6:43:37, 1.62it/s] 40%|████ | 26284/65536 [4:30:28<6:43:43, 1.62it/s] 40%|████ | 26285/65536 [4:30:28<6:33:35, 1.66it/s] 40%|████ | 26286/65536 [4:30:29<6:30:49, 1.67it/s] 40%|████ | 26287/65536 [4:30:29<6:32:49, 1.67it/s] 40%|████ | 26288/65536 [4:30:30<6:34:40, 1.66it/s] 40%|████ | 26289/65536 [4:30:31<6:29:41, 1.68it/s] 40%|████ | 26290/65536 [4:30:31<6:37:55, 1.64it/s] 40%|████ | 26291/65536 [4:30:32<6:54:30, 1.58it/s] 40%|████ | 26292/65536 [4:30:32<6:42:50, 1.62it/s] 40%|████ | 26293/65536 [4:30:33<6:47:29, 1.61it/s] 40%|████ | 26294/65536 [4:30:34<6:46:50, 1.61it/s] 40%|████ | 26295/65536 [4:30:34<6:51:58, 1.59it/s] 40%|████ | 26296/65536 [4:30:35<6:38:59, 1.64it/s] 40%|████ | 26297/65536 [4:30:35<6:37:07, 1.65it/s] 40%|████ | 26298/65536 [4:30:36<6:45:54, 1.61it/s] 40%|████ | 26299/65536 [4:30:37<6:41:25, 1.63it/s] 40%|████ | 26300/65536 [4:30:37<6:49:59, 1.59it/s] {'loss': 1.7961, 'learning_rate': 6.404738582098689e-07, 'epoch': 1623.46} + 40%|████ | 26300/65536 [4:30:37<6:49:59, 1.59it/s] 40%|████ | 26301/65536 [4:30:38<6:42:39, 1.62it/s] 40%|████ | 26302/65536 [4:30:39<6:42:01, 1.63it/s] 40%|████ | 26303/65536 [4:30:39<6:44:08, 1.62it/s] 40%|████ | 26304/65536 [4:30:40<6:42:56, 1.62it/s] 40%|████ | 26305/65536 [4:30:40<6:32:40, 1.67it/s] 40%|████ | 26306/65536 [4:30:41<6:26:09, 1.69it/s] 40%|████ | 26307/65536 [4:30:42<6:25:18, 1.70it/s] 40%|████ | 26308/65536 [4:30:42<6:21:53, 1.71it/s] 40%|████ | 26309/65536 [4:30:43<6:40:21, 1.63it/s] 40%|████ | 26310/65536 [4:30:43<6:38:47, 1.64it/s] 40%|████ | 26311/65536 [4:30:44<6:33:06, 1.66it/s] 40%|████ | 26312/65536 [4:30:45<6:31:46, 1.67it/s] 40%|████ | 26313/65536 [4:30:45<6:31:12, 1.67it/s] 40%|████ | 26314/65536 [4:30:46<6:37:08, 1.65it/s] 40%|████ | 26315/65536 [4:30:46<6:36:51, 1.65it/s] 40%|████ | 26316/65536 [4:30:47<6:35:31, 1.65it/s] 40%|████ | 26317/65536 [4:30:48<6:31:11, 1.67it/s] 40%|████ | 26318/65536 [4:30:48<6:33:16, 1.66it/s] 40%|████ | 26319/65536 [4:30:49<6:32:30, 1.67it/s] 40%|████ | 26320/65536 [4:30:50<7:00:59, 1.55it/s] {'loss': 1.811, 'learning_rate': 6.401983592506428e-07, 'epoch': 1624.69} + 40%|████ | 26320/65536 [4:30:50<7:00:59, 1.55it/s] 40%|████ | 26321/65536 [4:30:50<6:58:54, 1.56it/s] 40%|████ | 26322/65536 [4:30:51<7:07:09, 1.53it/s] 40%|████ | 26323/65536 [4:30:51<6:47:49, 1.60it/s] 40%|████ | 26324/65536 [4:30:52<6:45:30, 1.61it/s] 40%|████ | 26325/65536 [4:30:53<6:41:54, 1.63it/s] 40%|████ | 26326/65536 [4:30:53<6:57:19, 1.57it/s] 40%|████ | 26327/65536 [4:30:54<6:56:21, 1.57it/s] 40%|████ | 26328/65536 [4:30:55<6:47:35, 1.60it/s] 40%|████ | 26329/65536 [4:30:55<6:44:59, 1.61it/s] 40%|████ | 26330/65536 [4:30:56<6:49:42, 1.59it/s] 40%|████ | 26331/65536 [4:30:56<6:50:51, 1.59it/s] 40%|████ | 26332/65536 [4:30:57<6:45:30, 1.61it/s] 40%|████ | 26333/65536 [4:30:58<6:46:25, 1.61it/s] 40%|████ | 26334/65536 [4:30:58<6:42:17, 1.62it/s] 40%|████ | 26335/65536 [4:30:59<6:44:03, 1.62it/s] 40%|████ | 26336/65536 [4:30:59<6:36:20, 1.65it/s] 40%|████ | 26337/65536 [4:31:00<6:34:31, 1.66it/s] 40%|████ | 26338/65536 [4:31:01<6:38:49, 1.64it/s] 40%|████ | 26339/65536 [4:31:01<6:37:18, 1.64it/s] 40%|████ | 26340/65536 [4:31:02<6:39:29, 1.64it/s] {'loss': 1.7704, 'learning_rate': 6.399228602914166e-07, 'epoch': 1625.93} + 40%|████ | 26340/65536 [4:31:02<6:39:29, 1.64it/s] 40%|████ | 26341/65536 [4:31:03<6:37:10, 1.64it/s] 40%|████ | 26342/65536 [4:31:03<6:51:20, 1.59it/s] 40%|████ | 26343/65536 [4:31:04<6:44:05, 1.62it/s] 40%|██���█ | 26344/65536 [4:31:04<6:53:24, 1.58it/s] 40%|████ | 26345/65536 [4:31:05<6:48:05, 1.60it/s] 40%|████ | 26346/65536 [4:31:06<6:55:44, 1.57it/s] 40%|████ | 26347/65536 [4:31:06<6:49:54, 1.59it/s] 40%|████ | 26348/65536 [4:31:07<6:50:20, 1.59it/s] 40%|████ | 26349/65536 [4:31:08<6:40:02, 1.63it/s] 40%|████ | 26350/65536 [4:31:08<6:35:11, 1.65it/s] 40%|████ | 26351/65536 [4:31:09<6:27:24, 1.69it/s] 40%|████ | 26352/65536 [4:31:09<6:27:06, 1.69it/s] 40%|████ | 26353/65536 [4:31:10<6:42:03, 1.62it/s] 40%|████ | 26354/65536 [4:31:11<6:52:35, 1.58it/s] 40%|████ | 26355/65536 [4:31:11<6:46:52, 1.60it/s] 40%|████ | 26356/65536 [4:31:12<6:41:55, 1.62it/s] 40%|████ | 26357/65536 [4:31:12<6:32:33, 1.66it/s] 40%|████ | 26358/65536 [4:31:13<6:45:58, 1.61it/s] 40%|████ | 26359/65536 [4:31:14<6:41:14, 1.63it/s] 40%|████ | 26360/65536 [4:31:14<6:42:46, 1.62it/s] {'loss': 1.7649, 'learning_rate': 6.396473613321905e-07, 'epoch': 1627.16} + 40%|████ | 26360/65536 [4:31:14<6:42:46, 1.62it/s] 40%|████ | 26361/65536 [4:31:15<6:42:03, 1.62it/s] 40%|████ | 26362/65536 [4:31:16<6:41:13, 1.63it/s] 40%|████ | 26363/65536 [4:31:16<6:41:36, 1.63it/s] 40%|████ | 26364/65536 [4:31:17<6:37:11, 1.64it/s] 40%|████ | 26365/65536 [4:31:17<6:38:43, 1.64it/s] 40%|████ | 26366/65536 [4:31:18<6:44:37, 1.61it/s] 40%|████ | 26367/65536 [4:31:19<6:47:00, 1.60it/s] 40%|████ | 26368/65536 [4:31:19<6:46:28, 1.61it/s] 40%|████ | 26369/65536 [4:31:20<6:42:47, 1.62it/s] 40%|████ | 26370/65536 [4:31:20<6:42:09, 1.62it/s] 40%|████ | 26371/65536 [4:31:21<6:37:18, 1.64it/s] 40%|████ | 26372/65536 [4:31:22<6:38:03, 1.64it/s] 40%|████ | 26373/65536 [4:31:22<6:33:45, 1.66it/s] 40%|████ | 26374/65536 [4:31:23<6:48:41, 1.60it/s] 40%|████ | 26375/65536 [4:31:24<6:47:59, 1.60it/s] 40%|████ | 26376/65536 [4:31:24<6:46:57, 1.60it/s] 40%|████ | 26377/65536 [4:31:25<6:41:57, 1.62it/s] 40%|████ | 26378/65536 [4:31:25<6:30:15, 1.67it/s] 40%|████ | 26379/65536 [4:31:26<6:37:14, 1.64it/s] 40%|████ | 26380/65536 [4:31:27<6:43:20, 1.62it/s] {'loss': 1.7882, 'learning_rate': 6.393718623729643e-07, 'epoch': 1628.4} + 40%|████ | 26380/65536 [4:31:27<6:43:20, 1.62it/s] 40%|████ | 26381/65536 [4:31:27<6:37:54, 1.64it/s] 40%|████ | 26382/65536 [4:31:28<6:36:35, 1.65it/s] 40%|████ | 26383/65536 [4:31:28<6:47:46, 1.60it/s] 40%|████ | 26384/65536 [4:31:29<6:38:53, 1.64it/s] 40%|████ | 26385/65536 [4:31:30<6:35:56, 1.65it/s] 40%|████ | 26386/65536 [4:31:30<6:45:29, 1.61it/s] 40%|████ | 26387/65536 [4:31:31<6:35:12, 1.65it/s] 40%|████ | 26388/65536 [4:31:31<6:37:58, 1.64it/s] 40%|████ | 26389/65536 [4:31:32<6:35:38, 1.65it/s] 40%|████ | 26390/65536 [4:31:33<6:51:11, 1.59it/s] 40%|████ | 26391/65536 [4:31:33<6:48:54, 1.60it/s] 40%|████ | 26392/65536 [4:31:34<6:50:44, 1.59it/s] 40%|████ | 26393/65536 [4:31:35<6:55:54, 1.57it/s] 40%|████ | 26394/65536 [4:31:35<6:46:56, 1.60it/s] 40%|████ | 26395/65536 [4:31:36<6:45:05, 1.61it/s] 40%|████ | 26396/65536 [4:31:36<6:41:12, 1.63it/s] 40%|████ | 26397/65536 [4:31:37<6:35:17, 1.65it/s] 40%|████ | 26398/65536 [4:31:38<6:29:37, 1.67it/s] 40%|████ | 26399/65536 [4:31:38<6:27:27, 1.68it/s] 40%|████ | 26400/65536 [4:31:39<6:25:16, 1.69it/s] {'loss': 1.7661, 'learning_rate': 6.390963634137382e-07, 'epoch': 1629.63} + 40%|████ | 26400/65536 [4:31:39<6:25:16, 1.69it/s] 40%|████ | 26401/65536 [4:31:39<6:38:05, 1.64it/s] 40%|████ | 26402/65536 [4:31:40<6:40:09, 1.63it/s] 40%|████ | 26403/65536 [4:31:41<6:47:11, 1.60it/s] 40%|████ | 26404/65536 [4:31:41<6:48:55, 1.59it/s] 40%|████ | 26405/65536 [4:31:42<6:42:13, 1.62it/s] 40%|████ | 26406/65536 [4:31:43<6:37:26, 1.64it/s] 40%|████ | 26407/65536 [4:31:43<6:49:53, 1.59it/s] 40%|████ | 26408/65536 [4:31:44<6:43:14, 1.62it/s] 40%|████ | 26409/65536 [4:31:44<6:45:03, 1.61it/s] 40%|████ | 26410/65536 [4:31:45<6:50:09, 1.59it/s] 40%|████ | 26411/65536 [4:31:46<6:41:55, 1.62it/s] 40%|████ | 26412/65536 [4:31:46<6:36:02, 1.65it/s] 40%|████ | 26413/65536 [4:31:47<6:37:32, 1.64it/s] 40%|████ | 26414/65536 [4:31:47<6:32:27, 1.66it/s] 40%|████ | 26415/65536 [4:31:48<6:39:01, 1.63it/s] 40%|████ | 26416/65536 [4:31:49<6:41:05, 1.63it/s] 40%|████ | 26417/65536 [4:31:49<6:36:28, 1.64it/s] 40%|████ | 26418/65536 [4:31:50<6:40:25, 1.63it/s] 40%|████ | 26419/65536 [4:31:51<6:36:13, 1.65it/s] 40%|████ | 26420/65536 [4:31:51<6:43:46, 1.61it/s] {'loss': 1.7519, 'learning_rate': 6.388208644545121e-07, 'epoch': 1630.86} + 40%|████ | 26420/65536 [4:31:51<6:43:46, 1.61it/s] 40%|████ | 26421/65536 [4:31:52<6:38:38, 1.64it/s] 40%|████ | 26422/65536 [4:31:52<6:45:57, 1.61it/s] 40%|████ | 26423/65536 [4:31:53<7:05:08, 1.53it/s] 40%|████ | 26424/65536 [4:31:54<6:49:26, 1.59it/s] 40%|████ | 26425/65536 [4:31:54<6:43:44, 1.61it/s] 40%|████ | 26426/65536 [4:31:55<6:38:04, 1.64it/s] 40%|████ | 26427/65536 [4:31:56<6:54:39, 1.57it/s] 40%|████ | 26428/65536 [4:31:56<6:50:26, 1.59it/s] 40%|████ | 26429/65536 [4:31:57<6:42:03, 1.62it/s] 40%|████ | 26430/65536 [4:31:57<6:51:09, 1.59it/s] 40%|████ | 26431/65536 [4:31:58<6:46:35, 1.60it/s] 40%|████ | 26432/65536 [4:31:59<6:47:56, 1.60it/s] 40%|████ | 26433/65536 [4:31:59<6:49:16, 1.59it/s] 40%|████ | 26434/65536 [4:32:00<6:47:52, 1.60it/s] 40%|████ | 26435/65536 [4:32:01<6:40:30, 1.63it/s] 40%|████ | 26436/65536 [4:32:01<6:40:27, 1.63it/s] 40%|████ | 26437/65536 [4:32:02<6:42:36, 1.62it/s] 40%|████ | 26438/65536 [4:32:02<6:38:28, 1.64it/s] 40%|████ | 26439/65536 [4:32:03<6:46:30, 1.60it/s] 40%|████ | 26440/65536 [4:32:04<6:35:34, 1.65it/s] {'loss': 1.7378, 'learning_rate': 6.385453654952859e-07, 'epoch': 1632.1} + 40%|████ | 26440/65536 [4:32:04<6:35:34, 1.65it/s] 40%|████ | 26441/65536 [4:32:04<6:49:45, 1.59it/s] 40%|████ | 26442/65536 [4:32:05<6:45:34, 1.61it/s] 40%|████ | 26443/65536 [4:32:05<6:42:19, 1.62it/s] 40%|████ | 26444/65536 [4:32:06<6:42:15, 1.62it/s] 40%|████ | 26445/65536 [4:32:07<6:33:00, 1.66it/s] 40%|████ | 26446/65536 [4:32:07<6:31:59, 1.66it/s] 40%|████ | 26447/65536 [4:32:08<6:34:43, 1.65it/s] 40%|████ | 26448/65536 [4:32:09<6:35:56, 1.65it/s] 40%|████ | 26449/65536 [4:32:09<6:39:42, 1.63it/s] 40%|████ | 26450/65536 [4:32:10<6:58:35, 1.56it/s] 40%|████ | 26451/65536 [4:32:10<6:43:59, 1.61it/s] 40%|████ | 26452/65536 [4:32:11<6:32:45, 1.66it/s] 40%|████ | 26453/65536 [4:32:12<6:36:12, 1.64it/s] 40%|████ | 26454/65536 [4:32:12<6:40:30, 1.63it/s] 40%|████ | 26455/65536 [4:32:13<6:48:45, 1.59it/s] 40%|████ | 26456/65536 [4:32:13<6:45:37, 1.61it/s] 40%|████ | 26457/65536 [4:32:14<6:37:56, 1.64it/s] 40%|████ | 26458/65536 [4:32:15<6:40:28, 1.63it/s] 40%|████ | 26459/65536 [4:32:15<6:31:44, 1.66it/s] 40%|████ | 26460/65536 [4:32:16<6:33:00, 1.66it/s] {'loss': 1.7445, 'learning_rate': 6.382698665360597e-07, 'epoch': 1633.33} + 40%|████ | 26460/65536 [4:32:16<6:33:00, 1.66it/s] 40%|████ | 26461/65536 [4:32:16<6:35:47, 1.65it/s] 40%|████ | 26462/65536 [4:32:17<6:41:09, 1.62it/s] 40%|████ | 26463/65536 [4:32:18<6:35:26, 1.65it/s] 40%|████ | 26464/65536 [4:32:18<6:42:26, 1.62it/s] 40%|████ | 26465/65536 [4:32:19<6:41:01, 1.62it/s] 40%|████ | 26466/65536 [4:32:20<6:40:04, 1.63it/s] 40%|████ | 26467/65536 [4:32:20<6:42:27, 1.62it/s] 40%|████ | 26468/65536 [4:32:21<6:47:10, 1.60it/s] 40%|████ | 26469/65536 [4:32:22<7:02:24, 1.54it/s] 40%|████ | 26470/65536 [4:32:22<6:51:09, 1.58it/s] 40%|████ | 26471/65536 [4:32:23<6:54:42, 1.57it/s] 40%|████ | 26472/65536 [4:32:23<6:48:41, 1.59it/s] 40%|████ | 26473/65536 [4:32:24<6:46:57, 1.60it/s] 40%|████ | 26474/65536 [4:32:25<6:49:01, 1.59it/s] 40%|████ | 26475/65536 [4:32:25<6:50:05, 1.59it/s] 40%|████ | 26476/65536 [4:32:26<6:41:21, 1.62it/s] 40%|████ | 26477/65536 [4:32:27<6:45:59, 1.60it/s] 40%|████ | 26478/65536 [4:32:27<6:43:41, 1.61it/s] 40%|████ | 26479/65536 [4:32:28<6:41:32, 1.62it/s] 40%|████ | 26480/65536 [4:32:28<6:41:39, 1.62it/s] {'loss': 1.7006, 'learning_rate': 6.379943675768335e-07, 'epoch': 1634.57} + 40%|████ | 26480/65536 [4:32:28<6:41:39, 1.62it/s] 40%|████ | 26481/65536 [4:32:29<6:39:23, 1.63it/s] 40%|████ | 26482/65536 [4:32:30<6:36:07, 1.64it/s] 40%|████ | 26483/65536 [4:32:30<6:44:17, 1.61it/s] 40%|████ | 26484/65536 [4:32:31<6:33:48, 1.65it/s] 40%|████ | 26485/65536 [4:32:31<6:40:24, 1.63it/s] 40%|████ | 26486/65536 [4:32:32<6:35:35, 1.65it/s] 40%|████ | 26487/65536 [4:32:33<6:34:59, 1.65it/s] 40%|████ | 26488/65536 [4:32:33<6:55:12, 1.57it/s] 40%|████ | 26489/65536 [4:32:34<6:47:11, 1.60it/s] 40%|████ | 26490/65536 [4:32:35<6:48:52, 1.59it/s] 40%|████ | 26491/65536 [4:32:35<6:43:31, 1.61it/s] 40%|████ | 26492/65536 [4:32:36<6:33:47, 1.65it/s] 40%|████ | 26493/65536 [4:32:36<6:41:36, 1.62it/s] 40%|████ | 26494/65536 [4:32:37<6:43:48, 1.61it/s] 40%|████ | 26495/65536 [4:32:38<6:30:39, 1.67it/s] 40%|████ | 26496/65536 [4:32:38<6:28:01, 1.68it/s] 40%|████ | 26497/65536 [4:32:39<6:21:49, 1.70it/s] 40%|████ | 26498/65536 [4:32:39<6:23:10, 1.70it/s] 40%|████ | 26499/65536 [4:32:40<6:32:53, 1.66it/s] 40%|████ | 26500/65536 [4:32:41<6:29:15, 1.67it/s] {'loss': 1.8112, 'learning_rate': 6.377188686176074e-07, 'epoch': 1635.8} + 40%|████ | 26500/65536 [4:32:41<6:29:15, 1.67it/s] 40%|████ | 26501/65536 [4:32:41<6:28:07, 1.68it/s] 40%|████ | 26502/65536 [4:32:42<6:40:09, 1.63it/s] 40%|████ | 26503/65536 [4:32:42<6:42:22, 1.62it/s] 40%|████ | 26504/65536 [4:32:43<6:47:01, 1.60it/s] 40%|████ | 26505/65536 [4:32:44<6:31:44, 1.66it/s] 40%|████ | 26506/65536 [4:32:44<6:27:20, 1.68it/s] 40%|████ | 26507/65536 [4:32:45<6:30:37, 1.67it/s] 40%|████ | 26508/65536 [4:32:45<6:35:03, 1.65it/s] 40%|████ | 26509/65536 [4:32:46<6:35:08, 1.65it/s] 40%|████ | 26510/65536 [4:32:47<6:34:36, 1.65it/s] 40%|████ | 26511/65536 [4:32:47<6:37:24, 1.64it/s] 40%|████ | 26512/65536 [4:32:48<6:41:57, 1.62it/s] 40%|████ | 26513/65536 [4:32:48<6:42:35, 1.62it/s] 40%|████ | 26514/65536 [4:32:49<6:47:15, 1.60it/s] 40%|████ | 26515/65536 [4:32:50<6:44:48, 1.61it/s] 40%|████ | 26516/65536 [4:32:50<6:48:42, 1.59it/s] 40%|████ | 26517/65536 [4:32:51<6:47:21, 1.60it/s] 40%|████ | 26518/65536 [4:32:52<6:46:07, 1.60it/s] 40%|████ | 26519/65536 [4:32:52<6:39:07, 1.63it/s] 40%|████ | 26520/65536 [4:32:53<6:41:22, 1.62it/s] {'loss': 1.7643, 'learning_rate': 6.374433696583811e-07, 'epoch': 1637.04} + 40%|████ | 26520/65536 [4:32:53<6:41:22, 1.62it/s] 40%|████ | 26521/65536 [4:32:53<6:34:58, 1.65it/s] 40%|████ | 26522/65536 [4:32:54<6:40:50, 1.62it/s] 40%|████ | 26523/65536 [4:32:55<6:41:40, 1.62it/s] 40%|████ | 26524/65536 [4:32:55<6:51:52, 1.58it/s] 40%|████ | 26525/65536 [4:32:56<6:54:46, 1.57it/s] 40%|████ | 26526/65536 [4:32:57<6:45:38, 1.60it/s] 40%|████ | 26527/65536 [4:32:57<6:46:36, 1.60it/s] 40%|████ | 26528/65536 [4:32:58<6:45:57, 1.60it/s] 40%|████ | 26529/65536 [4:32:58<6:36:47, 1.64it/s] 40%|████ | 26530/65536 [4:32:59<6:26:50, 1.68it/s] 40%|████ | 26531/65536 [4:33:00<6:27:11, 1.68it/s] 40%|████ | 26532/65536 [4:33:00<6:24:57, 1.69it/s] 40%|████ | 26533/65536 [4:33:01<6:32:09, 1.66it/s] 40%|████ | 26534/65536 [4:33:01<6:31:35, 1.66it/s] 40%|████ | 26535/65536 [4:33:02<6:34:23, 1.65it/s] 40%|████ | 26536/65536 [4:33:03<6:51:25, 1.58it/s] 40%|████ | 26537/65536 [4:33:03<6:40:03, 1.62it/s] 40%|████ | 26538/65536 [4:33:04<6:36:28, 1.64it/s] 40%|████ | 26539/65536 [4:33:05<6:38:48, 1.63it/s] 40%|████ | 26540/65536 [4:33:05<6:46:53, 1.60it/s] {'loss': 1.7564, 'learning_rate': 6.37167870699155e-07, 'epoch': 1638.27} + 40%|████ | 26540/65536 [4:33:05<6:46:53, 1.60it/s] 40%|████ | 26541/65536 [4:33:06<6:57:55, 1.56it/s] 40%|████ | 26542/65536 [4:33:06<6:48:39, 1.59it/s] 41%|████ | 26543/65536 [4:33:07<6:59:07, 1.55it/s] 41%|████ | 26544/65536 [4:33:08<6:49:21, 1.59it/s] 41%|████ | 26545/65536 [4:33:08<6:47:11, 1.60it/s] 41%|████ | 26546/65536 [4:33:09<6:41:12, 1.62it/s] 41%|████ | 26547/65536 [4:33:10<6:38:00, 1.63it/s] 41%|████ | 26548/65536 [4:33:10<6:36:16, 1.64it/s] 41%|████ | 26549/65536 [4:33:11<6:36:32, 1.64it/s] 41%|████ | 26550/65536 [4:33:11<6:33:21, 1.65it/s] 41%|████ | 26551/65536 [4:33:12<6:26:02, 1.68it/s] 41%|████ | 26552/65536 [4:33:13<6:44:22, 1.61it/s] 41%|████ | 26553/65536 [4:33:13<6:39:13, 1.63it/s] 41%|████ | 26554/65536 [4:33:14<6:34:18, 1.65it/s] 41%|████ | 26555/65536 [4:33:14<6:33:24, 1.65it/s] 41%|████ | 26556/65536 [4:33:15<6:41:25, 1.62it/s] 41%|████ | 26557/65536 [4:33:16<6:36:27, 1.64it/s] 41%|████ | 26558/65536 [4:33:16<6:28:43, 1.67it/s] 41%|████ | 26559/65536 [4:33:17<6:25:56, 1.68it/s] 41%|████ | 26560/65536 [4:33:17<6:35:00, 1.64it/s] {'loss': 1.7366, 'learning_rate': 6.368923717399289e-07, 'epoch': 1639.51} + 41%|████ | 26560/65536 [4:33:17<6:35:00, 1.64it/s] 41%|████ | 26561/65536 [4:33:18<6:44:43, 1.60it/s] 41%|████ | 26562/65536 [4:33:19<6:35:07, 1.64it/s] 41%|████ | 26563/65536 [4:33:19<6:51:24, 1.58it/s] 41%|████ | 26564/65536 [4:33:20<6:47:44, 1.59it/s] 41%|████ | 26565/65536 [4:33:21<6:37:51, 1.63it/s] 41%|████ | 26566/65536 [4:33:21<6:47:41, 1.59it/s] 41%|████ | 26567/65536 [4:33:22<6:40:03, 1.62it/s] 41%|████ | 26568/65536 [4:33:22<6:35:09, 1.64it/s] 41%|████ | 26569/65536 [4:33:23<6:51:24, 1.58it/s] 41%|████ | 26570/65536 [4:33:24<6:48:29, 1.59it/s] 41%|████ | 26571/65536 [4:33:24<6:46:54, 1.60it/s] 41%|████ | 26572/65536 [4:33:25<6:48:23, 1.59it/s] 41%|████ | 26573/65536 [4:33:26<6:39:18, 1.63it/s] 41%|████ | 26574/65536 [4:33:26<6:29:13, 1.67it/s] 41%|████ | 26575/65536 [4:33:27<6:44:20, 1.61it/s] 41%|████ | 26576/65536 [4:33:27<6:40:45, 1.62it/s] 41%|████ | 26577/65536 [4:33:28<6:44:17, 1.61it/s] 41%|████ | 26578/65536 [4:33:29<6:37:39, 1.63it/s] 41%|████ | 26579/65536 [4:33:29<6:42:42, 1.61it/s] 41%|████ | 26580/65536 [4:33:30<6:42:18, 1.61it/s] {'loss': 1.7218, 'learning_rate': 6.366168727807027e-07, 'epoch': 1640.74} + 41%|████ | 26580/65536 [4:33:30<6:42:18, 1.61it/s] 41%|████ | 26581/65536 [4:33:30<6:40:08, 1.62it/s] 41%|████ | 26582/65536 [4:33:31<6:33:51, 1.65it/s] 41%|████ | 26583/65536 [4:33:32<6:31:11, 1.66it/s] 41%|████ | 26584/65536 [4:33:32<6:25:04, 1.69it/s] 41%|████ | 26585/65536 [4:33:33<6:42:16, 1.61it/s] 41%|████ | 26586/65536 [4:33:33<6:30:59, 1.66it/s] 41%|████ | 26587/65536 [4:33:34<6:28:03, 1.67it/s] 41%|████ | 26588/65536 [4:33:35<6:26:15, 1.68it/s] 41%|████ | 26589/65536 [4:33:35<6:25:50, 1.68it/s] 41%|████ | 26590/65536 [4:33:36<6:34:21, 1.65it/s] 41%|████ | 26591/65536 [4:33:37<6:50:03, 1.58it/s] 41%|████ | 26592/65536 [4:33:37<6:53:51, 1.57it/s] 41%|████ | 26593/65536 [4:33:38<6:52:32, 1.57it/s] 41%|████ | 26594/65536 [4:33:38<6:53:56, 1.57it/s] 41%|████ | 26595/65536 [4:33:39<6:58:23, 1.55it/s] 41%|████ | 26596/65536 [4:33:40<6:53:50, 1.57it/s] 41%|████ | 26597/65536 [4:33:40<6:45:07, 1.60it/s] 41%|████ | 26598/65536 [4:33:41<6:38:58, 1.63it/s] 41%|████ | 26599/65536 [4:33:42<6:29:54, 1.66it/s] 41%|████ | 26600/65536 [4:33:42<6:26:13, 1.68it/s] {'loss': 1.7528, 'learning_rate': 6.363413738214766e-07, 'epoch': 1641.98} + 41%|████ | 26600/65536 [4:33:42<6:26:13, 1.68it/s] 41%|████ | 26601/65536 [4:33:43<6:36:40, 1.64it/s] 41%|████ | 26602/65536 [4:33:43<6:30:08, 1.66it/s] 41%|████ | 26603/65536 [4:33:44<6:30:56, 1.66it/s] 41%|████ | 26604/65536 [4:33:45<6:36:51, 1.63it/s] 41%|████ | 26605/65536 [4:33:45<6:28:53, 1.67it/s] 41%|████ | 26606/65536 [4:33:46<6:45:26, 1.60it/s] 41%|████ | 26607/65536 [4:33:46<6:36:02, 1.64it/s] 41%|████ | 26608/65536 [4:33:47<6:30:56, 1.66it/s] 41%|████ | 26609/65536 [4:33:48<6:32:55, 1.65it/s] 41%|████ | 26610/65536 [4:33:48<6:38:23, 1.63it/s] 41%|████ | 26611/65536 [4:33:49<6:23:29, 1.69it/s] 41%|████ | 26612/65536 [4:33:49<6:20:45, 1.70it/s] 41%|████ | 26613/65536 [4:33:50<6:33:40, 1.65it/s] 41%|████ | 26614/65536 [4:33:51<6:32:41, 1.65it/s] 41%|████ | 26615/65536 [4:33:51<6:33:03, 1.65it/s] 41%|████ | 26616/65536 [4:33:52<6:39:16, 1.62it/s] 41%|████ | 26617/65536 [4:33:53<6:55:49, 1.56it/s] 41%|████ | 26618/65536 [4:33:53<6:49:08, 1.59it/s] 41%|████ | 26619/65536 [4:33:54<6:38:59, 1.63it/s] 41%|████ | 26620/65536 [4:33:54<6:43:00, 1.61it/s] {'loss': 1.7528, 'learning_rate': 6.360658748622505e-07, 'epoch': 1643.21} + 41%|████ | 26620/65536 [4:33:54<6:43:00, 1.61it/s] 41%|████ | 26621/65536 [4:33:55<6:39:32, 1.62it/s] 41%|████ | 26622/65536 [4:33:56<6:34:47, 1.64it/s] 41%|████ | 26623/65536 [4:33:56<6:32:12, 1.65it/s] 41%|████ | 26624/65536 [4:33:57<6:39:36, 1.62it/s] 41%|████ | 26625/65536 [4:33:57<6:30:28, 1.66it/s] 41%|████ | 26626/65536 [4:33:58<6:43:21, 1.61it/s] 41%|████ | 26627/65536 [4:33:59<6:41:36, 1.61it/s] 41%|████ | 26628/65536 [4:33:59<6:37:50, 1.63it/s] 41%|████ | 26629/65536 [4:34:00<6:41:46, 1.61it/s] 41%|████ | 26630/65536 [4:34:01<6:43:58, 1.61it/s] 41%|████ | 26631/65536 [4:34:01<6:42:47, 1.61it/s] 41%|████ | 26632/65536 [4:34:02<6:31:05, 1.66it/s] 41%|████ | 26633/65536 [4:34:02<6:41:46, 1.61it/s] 41%|████ | 26634/65536 [4:34:03<6:57:45, 1.55it/s] 41%|████ | 26635/65536 [4:34:04<6:48:34, 1.59it/s] 41%|████ | 26636/65536 [4:34:04<6:47:58, 1.59it/s] 41%|████ | 26637/65536 [4:34:05<6:36:35, 1.63it/s] 41%|████ | 26638/65536 [4:34:05<6:27:18, 1.67it/s] 41%|████ | 26639/65536 [4:34:06<6:27:30, 1.67it/s] 41%|████ | 26640/65536 [4:34:07<6:28:14, 1.67it/s] {'loss': 1.7733, 'learning_rate': 6.357903759030244e-07, 'epoch': 1644.44} + 41%|████ | 26640/65536 [4:34:07<6:28:14, 1.67it/s] 41%|████ | 26641/65536 [4:34:07<6:42:42, 1.61it/s] 41%|████ | 26642/65536 [4:34:08<6:41:19, 1.62it/s] 41%|████ | 26643/65536 [4:34:08<6:38:40, 1.63it/s] 41%|████ | 26644/65536 [4:34:09<6:35:19, 1.64it/s] 41%|████ | 26645/65536 [4:34:10<6:30:36, 1.66it/s] 41%|████ | 26646/65536 [4:34:10<6:37:37, 1.63it/s] 41%|████ | 26647/65536 [4:34:11<6:28:23, 1.67it/s] 41%|████ | 26648/65536 [4:34:11<6:28:20, 1.67it/s] 41%|████ | 26649/65536 [4:34:12<6:28:49, 1.67it/s] 41%|████ | 26650/65536 [4:34:13<6:32:47, 1.65it/s] 41%|████ | 26651/65536 [4:34:13<6:21:06, 1.70it/s] 41%|████ | 26652/65536 [4:34:14<6:34:41, 1.64it/s] 41%|████ | 26653/65536 [4:34:15<6:36:28, 1.63it/s] 41%|████ | 26654/65536 [4:34:15<6:32:59, 1.65it/s] 41%|████ | 26655/65536 [4:34:16<6:38:56, 1.62it/s] 41%|████ | 26656/65536 [4:34:16<6:32:55, 1.65it/s] 41%|████ | 26657/65536 [4:34:17<6:40:30, 1.62it/s] 41%|████ | 26658/65536 [4:34:18<6:31:42, 1.65it/s] 41%|████ | 26659/65536 [4:34:18<6:35:55, 1.64it/s] 41%|████ | 26660/65536 [4:34:19<6:30:15, 1.66it/s] {'loss': 1.7352, 'learning_rate': 6.355148769437983e-07, 'epoch': 1645.68} + 41%|████ | 26660/65536 [4:34:19<6:30:15, 1.66it/s] 41%|████ | 26661/65536 [4:34:19<6:30:10, 1.66it/s] 41%|████ | 26662/65536 [4:34:20<6:28:03, 1.67it/s] 41%|████ | 26663/65536 [4:34:21<6:35:12, 1.64it/s] 41%|████ | 26664/65536 [4:34:21<6:48:48, 1.58it/s] 41%|████ | 26665/65536 [4:34:22<6:41:52, 1.61it/s] 41%|████ | 26666/65536 [4:34:23<6:55:09, 1.56it/s] 41%|████ | 26667/65536 [4:34:23<6:56:19, 1.56it/s] 41%|████ | 26668/65536 [4:34:24<6:42:15, 1.61it/s] 41%|████ | 26669/65536 [4:34:24<6:48:44, 1.58it/s] 41%|████ | 26670/65536 [4:34:25<6:42:20, 1.61it/s] 41%|████ | 26671/65536 [4:34:26<6:41:12, 1.61it/s] 41%|████ | 26672/65536 [4:34:26<6:30:49, 1.66it/s] 41%|████ | 26673/65536 [4:34:27<6:37:53, 1.63it/s] 41%|████ | 26674/65536 [4:34:27<6:37:13, 1.63it/s] 41%|████ | 26675/65536 [4:34:28<6:28:49, 1.67it/s] 41%|████ | 26676/65536 [4:34:29<6:32:47, 1.65it/s] 41%|████ | 26677/65536 [4:34:29<6:38:02, 1.63it/s] 41%|████ | 26678/65536 [4:34:30<6:42:15, 1.61it/s] 41%|████ | 26679/65536 [4:34:31<6:36:17, 1.63it/s] 41%|████ | 26680/65536 [4:34:31<6:38:34, 1.62it/s] {'loss': 1.7101, 'learning_rate': 6.352393779845721e-07, 'epoch': 1646.91} + 41%|████ | 26680/65536 [4:34:31<6:38:34, 1.62it/s] 41%|████ | 26681/65536 [4:34:32<6:39:49, 1.62it/s] 41%|████ | 26682/65536 [4:34:32<6:53:29, 1.57it/s] 41%|████ | 26683/65536 [4:34:33<6:40:24, 1.62it/s] 41%|████ | 26684/65536 [4:34:34<6:43:45, 1.60it/s] 41%|████ | 26685/65536 [4:34:34<6:37:14, 1.63it/s] 41%|████ | 26686/65536 [4:34:35<6:30:27, 1.66it/s] 41%|████ | 26687/65536 [4:34:35<6:36:29, 1.63it/s] 41%|████ | 26688/65536 [4:34:36<6:35:57, 1.64it/s] 41%|████ | 26689/65536 [4:34:37<6:23:36, 1.69it/s] 41%|████ | 26690/65536 [4:34:37<6:17:31, 1.71it/s] 41%|████ | 26691/65536 [4:34:38<6:32:20, 1.65it/s] 41%|████ | 26692/65536 [4:34:38<6:31:46, 1.65it/s] 41%|████ | 26693/65536 [4:34:39<6:38:27, 1.62it/s] 41%|████ | 26694/65536 [4:34:40<6:34:28, 1.64it/s] 41%|████ | 26695/65536 [4:34:40<6:29:21, 1.66it/s] 41%|████ | 26696/65536 [4:34:41<6:32:54, 1.65it/s] 41%|████ | 26697/65536 [4:34:42<6:39:00, 1.62it/s] 41%|████ | 26698/65536 [4:34:42<6:52:52, 1.57it/s] 41%|████ | 26699/65536 [4:34:43<6:47:27, 1.59it/s] 41%|████ | 26700/65536 [4:34:43<6:43:15, 1.61it/s] {'loss': 1.7967, 'learning_rate': 6.34963879025346e-07, 'epoch': 1648.15} + 41%|████ | 26700/65536 [4:34:43<6:43:15, 1.61it/s] 41%|████ | 26701/65536 [4:34:44<6:46:18, 1.59it/s] 41%|████ | 26702/65536 [4:34:45<6:39:09, 1.62it/s] 41%|████ | 26703/65536 [4:34:45<6:34:59, 1.64it/s] 41%|████ | 26704/65536 [4:34:46<6:33:42, 1.64it/s] 41%|████ | 26705/65536 [4:34:46<6:39:25, 1.62it/s] 41%|████ | 26706/65536 [4:34:47<6:46:58, 1.59it/s] 41%|████ | 26707/65536 [4:34:48<6:45:55, 1.59it/s] 41%|████ | 26708/65536 [4:34:48<6:41:49, 1.61it/s] 41%|████ | 26709/65536 [4:34:49<6:42:23, 1.61it/s] 41%|████ | 26710/65536 [4:34:50<6:36:18, 1.63it/s] 41%|████ | 26711/65536 [4:34:50<6:33:19, 1.65it/s] 41%|████ | 26712/65536 [4:34:51<6:36:41, 1.63it/s] 41%|████ | 26713/65536 [4:34:51<6:28:17, 1.67it/s] 41%|████ | 26714/65536 [4:34:52<6:38:01, 1.63it/s] 41%|████ | 26715/65536 [4:34:53<6:37:41, 1.63it/s] 41%|████ | 26716/65536 [4:34:53<6:38:58, 1.62it/s] 41%|████ | 26717/65536 [4:34:54<6:40:31, 1.62it/s] 41%|████ | 26718/65536 [4:34:55<6:51:23, 1.57it/s] 41%|████ | 26719/65536 [4:34:55<6:42:13, 1.61it/s] 41%|████ | 26720/65536 [4:34:56<6:45:44, 1.59it/s] {'loss': 1.7317, 'learning_rate': 6.346883800661197e-07, 'epoch': 1649.38} + 41%|████ | 26720/65536 [4:34:56<6:45:44, 1.59it/s] 41%|████ | 26721/65536 [4:34:56<6:37:54, 1.63it/s] 41%|████ | 26722/65536 [4:34:57<6:40:44, 1.61it/s] 41%|████ | 26723/65536 [4:34:58<6:47:09, 1.59it/s] 41%|████ | 26724/65536 [4:34:58<6:47:17, 1.59it/s] 41%|████ | 26725/65536 [4:34:59<6:41:44, 1.61it/s] 41%|████ | 26726/65536 [4:34:59<6:32:40, 1.65it/s] 41%|████ | 26727/65536 [4:35:00<6:31:40, 1.65it/s] 41%|████ | 26728/65536 [4:35:01<6:20:33, 1.70it/s] 41%|████ | 26729/65536 [4:35:01<6:19:45, 1.70it/s] 41%|████ | 26730/65536 [4:35:02<6:17:11, 1.71it/s] 41%|████ | 26731/65536 [4:35:03<6:53:26, 1.56it/s] 41%|████ | 26732/65536 [4:35:03<6:48:02, 1.58it/s] 41%|████ | 26733/65536 [4:35:04<6:41:04, 1.61it/s] 41%|████ | 26734/65536 [4:35:04<6:39:16, 1.62it/s] 41%|████ | 26735/65536 [4:35:05<6:43:08, 1.60it/s] 41%|████ | 26736/65536 [4:35:06<6:34:35, 1.64it/s] 41%|████ | 26737/65536 [4:35:06<6:36:11, 1.63it/s] 41%|████ | 26738/65536 [4:35:07<6:38:08, 1.62it/s] 41%|████ | 26739/65536 [4:35:07<6:36:42, 1.63it/s] 41%|████ | 26740/65536 [4:35:08<6:30:55, 1.65it/s] {'loss': 1.766, 'learning_rate': 6.344128811068936e-07, 'epoch': 1650.62} + 41%|████ | 26740/65536 [4:35:08<6:30:55, 1.65it/s] 41%|████ | 26741/65536 [4:35:09<6:27:03, 1.67it/s] 41%|████ | 26742/65536 [4:35:09<6:18:32, 1.71it/s] 41%|████ | 26743/65536 [4:35:10<6:17:05, 1.71it/s] 41%|████ | 26744/65536 [4:35:10<6:16:50, 1.72it/s] 41%|████ | 26745/65536 [4:35:11<6:21:44, 1.69it/s] 41%|████ | 26746/65536 [4:35:12<6:29:55, 1.66it/s] 41%|████ | 26747/65536 [4:35:12<6:49:44, 1.58it/s] 41%|████ | 26748/65536 [4:35:13<7:02:05, 1.53it/s] 41%|████ | 26749/65536 [4:35:14<6:48:06, 1.58it/s] 41%|████ | 26750/65536 [4:35:14<6:38:10, 1.62it/s] 41%|████ | 26751/65536 [4:35:15<6:32:05, 1.65it/s] 41%|████ | 26752/65536 [4:35:15<6:22:06, 1.69it/s] 41%|████ | 26753/65536 [4:35:16<6:33:57, 1.64it/s] 41%|████ | 26754/65536 [4:35:17<6:36:14, 1.63it/s] 41%|████ | 26755/65536 [4:35:17<6:34:57, 1.64it/s] 41%|████ | 26756/65536 [4:35:18<6:32:01, 1.65it/s] 41%|████ | 26757/65536 [4:35:18<6:26:11, 1.67it/s] 41%|████ | 26758/65536 [4:35:19<6:33:50, 1.64it/s] 41%|████ | 26759/65536 [4:35:20<6:31:50, 1.65it/s] 41%|████ | 26760/65536 [4:35:20<6:35:07, 1.64it/s] {'loss': 1.7895, 'learning_rate': 6.341373821476675e-07, 'epoch': 1651.85} + 41%|████ | 26760/65536 [4:35:20<6:35:07, 1.64it/s] 41%|████ | 26761/65536 [4:35:21<6:35:53, 1.63it/s] 41%|████ | 26762/65536 [4:35:21<6:34:42, 1.64it/s] 41%|████ | 26763/65536 [4:35:22<7:05:06, 1.52it/s] 41%|████ | 26764/65536 [4:35:23<7:03:38, 1.53it/s] 41%|████ | 26765/65536 [4:35:23<6:53:57, 1.56it/s] 41%|████ | 26766/65536 [4:35:24<6:41:20, 1.61it/s] 41%|████ | 26767/65536 [4:35:25<6:34:25, 1.64it/s] 41%|████ | 26768/65536 [4:35:25<6:28:58, 1.66it/s] 41%|████ | 26769/65536 [4:35:26<6:32:20, 1.65it/s] 41%|████ | 26770/65536 [4:35:26<6:37:08, 1.63it/s] 41%|████ | 26771/65536 [4:35:27<6:26:16, 1.67it/s] 41%|████ | 26772/65536 [4:35:28<6:22:35, 1.69it/s] 41%|████ | 26773/65536 [4:35:28<6:22:08, 1.69it/s] 41%|████ | 26774/65536 [4:35:29<6:31:42, 1.65it/s] 41%|████ | 26775/65536 [4:35:29<6:34:59, 1.64it/s] 41%|████ | 26776/65536 [4:35:30<6:42:17, 1.61it/s] 41%|████ | 26777/65536 [4:35:31<6:35:24, 1.63it/s] 41%|████ | 26778/65536 [4:35:31<6:27:42, 1.67it/s] 41%|████ | 26779/65536 [4:35:32<6:41:15, 1.61it/s] 41%|████ | 26780/65536 [4:35:33<6:44:53, 1.60it/s] {'loss': 1.7515, 'learning_rate': 6.338618831884413e-07, 'epoch': 1653.09} + 41%|████ | 26780/65536 [4:35:33<6:44:53, 1.60it/s] 41%|████ | 26781/65536 [4:35:33<6:39:31, 1.62it/s] 41%|████ | 26782/65536 [4:35:34<6:43:45, 1.60it/s] 41%|████ | 26783/65536 [4:35:34<6:40:00, 1.61it/s] 41%|████ | 26784/65536 [4:35:35<6:27:40, 1.67it/s] 41%|████ | 26785/65536 [4:35:36<6:25:46, 1.67it/s] 41%|████ | 26786/65536 [4:35:36<6:28:34, 1.66it/s] 41%|████ | 26787/65536 [4:35:37<6:21:21, 1.69it/s] 41%|████ | 26788/65536 [4:35:37<6:28:31, 1.66it/s] 41%|████ | 26789/65536 [4:35:38<6:21:35, 1.69it/s] 41%|████ | 26790/65536 [4:35:39<6:31:25, 1.65it/s] 41%|████ | 26791/65536 [4:35:39<6:45:04, 1.59it/s] 41%|████ | 26792/65536 [4:35:40<6:37:00, 1.63it/s] 41%|████ | 26793/65536 [4:35:40<6:41:13, 1.61it/s] 41%|████ | 26794/65536 [4:35:41<6:40:01, 1.61it/s] 41%|████ | 26795/65536 [4:35:42<6:44:47, 1.60it/s] 41%|████ | 26796/65536 [4:35:42<6:44:18, 1.60it/s] 41%|████ | 26797/65536 [4:35:43<6:41:45, 1.61it/s] 41%|████ | 26798/65536 [4:35:44<6:49:46, 1.58it/s] 41%|████ | 26799/65536 [4:35:44<6:38:59, 1.62it/s] 41%|████ | 26800/65536 [4:35:45<6:41:12, 1.61it/s] {'loss': 1.7262, 'learning_rate': 6.335863842292151e-07, 'epoch': 1654.32} + 41%|████ | 26800/65536 [4:35:45<6:41:12, 1.61it/s] 41%|████ | 26801/65536 [4:35:45<6:38:53, 1.62it/s] 41%|████ | 26802/65536 [4:35:46<6:37:13, 1.63it/s] 41%|████ | 26803/65536 [4:35:47<6:34:22, 1.64it/s] 41%|████ | 26804/65536 [4:35:47<6:29:08, 1.66it/s] 41%|████ | 26805/65536 [4:35:48<6:36:50, 1.63it/s] 41%|████ | 26806/65536 [4:35:49<6:50:32, 1.57it/s] 41%|████ | 26807/65536 [4:35:49<6:37:43, 1.62it/s] 41%|████ | 26808/65536 [4:35:50<6:27:39, 1.67it/s] 41%|████ | 26809/65536 [4:35:50<6:28:02, 1.66it/s] 41%|████ | 26810/65536 [4:35:51<6:21:50, 1.69it/s] 41%|████ | 26811/65536 [4:35:51<6:23:54, 1.68it/s] 41%|████ | 26812/65536 [4:35:52<6:38:54, 1.62it/s] 41%|████ | 26813/65536 [4:35:53<6:37:03, 1.63it/s] 41%|████ | 26814/65536 [4:35:53<6:42:28, 1.60it/s] 41%|████ | 26815/65536 [4:35:54<6:30:22, 1.65it/s] 41%|████ | 26816/65536 [4:35:55<6:33:12, 1.64it/s] 41%|████ | 26817/65536 [4:35:55<6:34:20, 1.64it/s] 41%|████ | 26818/65536 [4:35:56<6:29:31, 1.66it/s] 41%|████ | 26819/65536 [4:35:56<6:32:06, 1.65it/s] 41%|████ | 26820/65536 [4:35:57<6:27:30, 1.67it/s] {'loss': 1.7339, 'learning_rate': 6.333108852699889e-07, 'epoch': 1655.56} + 41%|████ | 26820/65536 [4:35:57<6:27:30, 1.67it/s] 41%|████ | 26821/65536 [4:35:58<6:21:32, 1.69it/s] 41%|████ | 26822/65536 [4:35:58<6:23:41, 1.68it/s] 41%|████ | 26823/65536 [4:35:59<6:23:13, 1.68it/s] 41%|████ | 26824/65536 [4:35:59<6:25:35, 1.67it/s] 41%|████ | 26825/65536 [4:36:00<6:35:45, 1.63it/s] 41%|████ | 26826/65536 [4:36:01<6:53:18, 1.56it/s] 41%|████ | 26827/65536 [4:36:01<6:42:38, 1.60it/s] 41%|████ | 26828/65536 [4:36:02<6:56:21, 1.55it/s] 41%|████ | 26829/65536 [4:36:03<6:58:00, 1.54it/s] 41%|████ | 26830/65536 [4:36:03<6:42:59, 1.60it/s] 41%|████ | 26831/65536 [4:36:04<6:42:35, 1.60it/s] 41%|████ | 26832/65536 [4:36:04<6:40:38, 1.61it/s] 41%|████ | 26833/65536 [4:36:05<6:41:51, 1.61it/s] 41%|████ | 26834/65536 [4:36:06<6:50:25, 1.57it/s] 41%|████ | 26835/65536 [4:36:06<6:43:19, 1.60it/s] 41%|████ | 26836/65536 [4:36:07<6:40:57, 1.61it/s] 41%|████ | 26837/65536 [4:36:07<6:33:54, 1.64it/s] 41%|████ | 26838/65536 [4:36:08<6:33:15, 1.64it/s] 41%|████ | 26839/65536 [4:36:09<6:28:26, 1.66it/s] 41%|████ | 26840/65536 [4:36:09<6:30:04, 1.65it/s] {'loss': 1.721, 'learning_rate': 6.330353863107628e-07, 'epoch': 1656.79} + 41%|████ | 26840/65536 [4:36:09<6:30:04, 1.65it/s] 41%|████ | 26841/65536 [4:36:10<6:26:51, 1.67it/s] 41%|████ | 26842/65536 [4:36:11<6:29:20, 1.66it/s] 41%|████ | 26843/65536 [4:36:11<6:33:57, 1.64it/s] 41%|████ | 26844/65536 [4:36:12<6:45:13, 1.59it/s] 41%|████ | 26845/65536 [4:36:12<6:44:44, 1.59it/s] 41%|████ | 26846/65536 [4:36:13<6:33:56, 1.64it/s] 41%|████ | 26847/65536 [4:36:14<6:38:48, 1.62it/s] 41%|████ | 26848/65536 [4:36:14<6:47:34, 1.58it/s] 41%|████ | 26849/65536 [4:36:15<6:43:14, 1.60it/s] 41%|████ | 26850/65536 [4:36:16<6:38:46, 1.62it/s] 41%|████ | 26851/65536 [4:36:16<6:31:48, 1.65it/s] 41%|████ | 26852/65536 [4:36:17<6:28:28, 1.66it/s] 41%|████ | 26853/65536 [4:36:17<6:30:22, 1.65it/s] 41%|████ | 26854/65536 [4:36:18<6:29:08, 1.66it/s] 41%|████ | 26855/65536 [4:36:18<6:29:06, 1.66it/s] 41%|████ | 26856/65536 [4:36:19<6:40:01, 1.61it/s] 41%|████ | 26857/65536 [4:36:20<6:43:18, 1.60it/s] 41%|████ | 26858/65536 [4:36:20<6:41:36, 1.61it/s] 41%|████ | 26859/65536 [4:36:21<6:29:49, 1.65it/s] 41%|████ | 26860/65536 [4:36:22<6:45:25, 1.59it/s] {'loss': 1.757, 'learning_rate': 6.327598873515366e-07, 'epoch': 1658.02} + 41%|████ | 26860/65536 [4:36:22<6:45:25, 1.59it/s] 41%|████ | 26861/65536 [4:36:22<6:37:56, 1.62it/s] 41%|████ | 26862/65536 [4:36:23<6:41:13, 1.61it/s] 41%|████ | 26863/65536 [4:36:23<6:34:09, 1.64it/s] 41%|████ | 26864/65536 [4:36:24<6:32:52, 1.64it/s] 41%|████ | 26865/65536 [4:36:25<6:27:27, 1.66it/s] 41%|████ | 26866/65536 [4:36:25<6:20:02, 1.70it/s] 41%|████ | 26867/65536 [4:36:26<6:32:18, 1.64it/s] 41%|████ | 26868/65536 [4:36:26<6:23:39, 1.68it/s] 41%|████ | 26869/65536 [4:36:27<6:26:07, 1.67it/s] 41%|████ | 26870/65536 [4:36:28<6:24:04, 1.68it/s] 41%|████ | 26871/65536 [4:36:28<6:29:21, 1.66it/s] 41%|████ | 26872/65536 [4:36:29<6:26:13, 1.67it/s] 41%|████ | 26873/65536 [4:36:29<6:35:53, 1.63it/s] 41%|████ | 26874/65536 [4:36:30<6:41:08, 1.61it/s] 41%|████ | 26875/65536 [4:36:31<6:45:20, 1.59it/s] 41%|████ | 26876/65536 [4:36:31<6:56:49, 1.55it/s] 41%|████ | 26877/65536 [4:36:32<6:41:49, 1.60it/s] 41%|████ | 26878/65536 [4:36:33<6:41:10, 1.61it/s] 41%|████ | 26879/65536 [4:36:33<6:44:16, 1.59it/s] 41%|████ | 26880/65536 [4:36:34<6:42:32, 1.60it/s] {'loss': 1.7432, 'learning_rate': 6.324843883923105e-07, 'epoch': 1659.26} + 41%|████ | 26880/65536 [4:36:34<6:42:32, 1.60it/s] 41%|████ | 26881/65536 [4:36:34<6:33:35, 1.64it/s] 41%|████ | 26882/65536 [4:36:35<6:31:47, 1.64it/s] 41%|████ | 26883/65536 [4:36:36<6:40:20, 1.61it/s] 41%|████ | 26884/65536 [4:36:36<6:33:28, 1.64it/s] 41%|████ | 26885/65536 [4:36:37<6:40:24, 1.61it/s] 41%|████ | 26886/65536 [4:36:38<6:37:24, 1.62it/s] 41%|████ | 26887/65536 [4:36:38<6:40:40, 1.61it/s] 41%|████ | 26888/65536 [4:36:39<6:33:25, 1.64it/s] 41%|████ | 26889/65536 [4:36:39<6:40:13, 1.61it/s] 41%|████ | 26890/65536 [4:36:40<6:35:30, 1.63it/s] 41%|████ | 26891/65536 [4:36:41<6:31:39, 1.64it/s] 41%|████ | 26892/65536 [4:36:41<6:22:53, 1.68it/s] 41%|████ | 26893/65536 [4:36:42<6:31:22, 1.65it/s] 41%|████ | 26894/65536 [4:36:42<6:31:28, 1.65it/s] 41%|████ | 26895/65536 [4:36:43<6:39:30, 1.61it/s] 41%|████ | 26896/65536 [4:36:44<6:41:23, 1.60it/s] 41%|████ | 26897/65536 [4:36:44<6:56:57, 1.54it/s] 41%|████ | 26898/65536 [4:36:45<6:47:04, 1.58it/s] 41%|████ | 26899/65536 [4:36:46<6:41:46, 1.60it/s] 41%|████ | 26900/65536 [4:36:46<6:41:05, 1.61it/s] {'loss': 1.7564, 'learning_rate': 6.322088894330844e-07, 'epoch': 1660.49} + 41%|████ | 26900/65536 [4:36:46<6:41:05, 1.61it/s] 41%|████ | 26901/65536 [4:36:47<6:40:58, 1.61it/s] 41%|████ | 26902/65536 [4:36:47<6:34:20, 1.63it/s] 41%|████ | 26903/65536 [4:36:48<6:32:48, 1.64it/s] 41%|████ | 26904/65536 [4:36:49<6:28:02, 1.66it/s] 41%|████ | 26905/65536 [4:36:49<6:25:10, 1.67it/s] 41%|████ | 26906/65536 [4:36:50<6:17:17, 1.71it/s] 41%|████ | 26907/65536 [4:36:50<6:27:43, 1.66it/s] 41%|████ | 26908/65536 [4:36:51<6:24:29, 1.67it/s] 41%|████ | 26909/65536 [4:36:52<6:41:47, 1.60it/s] 41%|████ | 26910/65536 [4:36:52<6:39:12, 1.61it/s] 41%|████ | 26911/65536 [4:36:53<6:38:01, 1.62it/s] 41%|████ | 26912/65536 [4:36:54<6:43:05, 1.60it/s] 41%|████ | 26913/65536 [4:36:54<6:41:49, 1.60it/s] 41%|████ | 26914/65536 [4:36:55<6:38:14, 1.62it/s] 41%|████ | 26915/65536 [4:36:55<6:38:20, 1.62it/s] 41%|████ | 26916/65536 [4:36:56<6:32:47, 1.64it/s] 41%|████ | 26917/65536 [4:36:57<6:43:07, 1.60it/s] 41%|████ | 26918/65536 [4:36:57<6:33:41, 1.63it/s] 41%|████ | 26919/65536 [4:36:58<6:39:53, 1.61it/s] 41%|████ | 26920/65536 [4:36:59<6:36:34, 1.62it/s] {'loss': 1.7716, 'learning_rate': 6.319333904738582e-07, 'epoch': 1661.73} + 41%|████ | 26920/65536 [4:36:59<6:36:34, 1.62it/s] 41%|████ | 26921/65536 [4:36:59<6:32:20, 1.64it/s] 41%|████ | 26922/65536 [4:37:00<6:30:18, 1.65it/s] 41%|████ | 26923/65536 [4:37:00<6:32:12, 1.64it/s] 41%|████ | 26924/65536 [4:37:01<6:27:53, 1.66it/s] 41%|████ | 26925/65536 [4:37:02<6:38:11, 1.62it/s] 41%|████ | 26926/65536 [4:37:02<6:35:31, 1.63it/s] 41%|████ | 26927/65536 [4:37:03<6:33:30, 1.64it/s] 41%|████ | 26928/65536 [4:37:03<6:36:49, 1.62it/s] 41%|████ | 26929/65536 [4:37:04<6:46:30, 1.58it/s] 41%|████ | 26930/65536 [4:37:05<6:44:14, 1.59it/s] 41%|████ | 26931/65536 [4:37:05<6:39:51, 1.61it/s] 41%|████ | 26932/65536 [4:37:06<6:38:34, 1.61it/s] 41%|████ | 26933/65536 [4:37:07<6:35:15, 1.63it/s] 41%|████ | 26934/65536 [4:37:07<6:31:52, 1.64it/s] 41%|████ | 26935/65536 [4:37:08<6:39:57, 1.61it/s] 41%|████ | 26936/65536 [4:37:08<6:43:00, 1.60it/s] 41%|████ | 26937/65536 [4:37:09<6:36:14, 1.62it/s] 41%|████ | 26938/65536 [4:37:10<6:32:03, 1.64it/s] 41%|████ | 26939/65536 [4:37:10<6:34:57, 1.63it/s] 41%|████ | 26940/65536 [4:37:11<6:38:54, 1.61it/s] {'loss': 1.7049, 'learning_rate': 6.316578915146321e-07, 'epoch': 1662.96} + 41%|████ | 26940/65536 [4:37:11<6:38:54, 1.61it/s] 41%|████ | 26941/65536 [4:37:12<6:47:23, 1.58it/s] 41%|████ | 26942/65536 [4:37:12<7:01:34, 1.53it/s] 41%|████ | 26943/65536 [4:37:13<6:48:25, 1.57it/s] 41%|████ | 26944/65536 [4:37:13<6:38:35, 1.61it/s] 41%|████ | 26945/65536 [4:37:14<6:29:53, 1.65it/s] 41%|████ | 26946/65536 [4:37:15<6:36:23, 1.62it/s] 41%|████ | 26947/65536 [4:37:15<6:35:27, 1.63it/s] 41%|████ | 26948/65536 [4:37:16<6:41:40, 1.60it/s] 41%|████ | 26949/65536 [4:37:16<6:31:11, 1.64it/s] 41%|████ | 26950/65536 [4:37:17<6:32:17, 1.64it/s] 41%|████ | 26951/65536 [4:37:18<6:28:50, 1.65it/s] 41%|████ | 26952/65536 [4:37:18<6:41:35, 1.60it/s] 41%|████ | 26953/65536 [4:37:19<6:45:09, 1.59it/s] 41%|████ | 26954/65536 [4:37:20<6:39:35, 1.61it/s] 41%|████ | 26955/65536 [4:37:20<6:38:51, 1.61it/s] 41%|████ | 26956/65536 [4:37:21<6:33:27, 1.63it/s] 41%|████ | 26957/65536 [4:37:21<6:39:08, 1.61it/s] 41%|████ | 26958/65536 [4:37:22<6:40:00, 1.61it/s] 41%|████ | 26959/65536 [4:37:23<6:34:32, 1.63it/s] 41%|████ | 26960/65536 [4:37:23<6:26:21, 1.66it/s] {'loss': 1.7469, 'learning_rate': 6.313823925554059e-07, 'epoch': 1664.2} + 41%|████ | 26960/65536 [4:37:23<6:26:21, 1.66it/s] 41%|████ | 26961/65536 [4:37:24<6:26:15, 1.66it/s] 41%|████ | 26962/65536 [4:37:24<6:24:07, 1.67it/s] 41%|████ | 26963/65536 [4:37:25<6:29:28, 1.65it/s] 41%|████ | 26964/65536 [4:37:26<6:30:26, 1.65it/s] 41%|████ | 26965/65536 [4:37:26<6:46:13, 1.58it/s] 41%|████ | 26966/65536 [4:37:27<6:35:39, 1.62it/s] 41%|████ | 26967/65536 [4:37:27<6:35:05, 1.63it/s] 41%|████ | 26968/65536 [4:37:28<6:38:10, 1.61it/s] 41%|████ | 26969/65536 [4:37:29<6:26:09, 1.66it/s] 41%|████ | 26970/65536 [4:37:29<6:35:43, 1.62it/s] 41%|████ | 26971/65536 [4:37:30<6:49:04, 1.57it/s] 41%|████ | 26972/65536 [4:37:31<6:45:36, 1.58it/s] 41%|████ | 26973/65536 [4:37:31<6:36:39, 1.62it/s] 41%|████ | 26974/65536 [4:37:32<6:53:35, 1.55it/s] 41%|████ | 26975/65536 [4:37:33<6:41:30, 1.60it/s] 41%|████ | 26976/65536 [4:37:33<6:35:58, 1.62it/s] 41%|████ | 26977/65536 [4:37:34<6:37:54, 1.62it/s] 41%|████ | 26978/65536 [4:37:34<6:25:29, 1.67it/s] 41%|████ | 26979/65536 [4:37:35<6:37:09, 1.62it/s] 41%|████ | 26980/65536 [4:37:36<6:36:10, 1.62it/s] {'loss': 1.7347, 'learning_rate': 6.311068935961798e-07, 'epoch': 1665.43} + 41%|████ | 26980/65536 [4:37:36<6:36:10, 1.62it/s] 41%|████ | 26981/65536 [4:37:36<6:38:33, 1.61it/s] 41%|████ | 26982/65536 [4:37:37<6:36:18, 1.62it/s] 41%|████ | 26983/65536 [4:37:37<6:38:13, 1.61it/s] 41%|████ | 26984/65536 [4:37:38<6:37:19, 1.62it/s] 41%|████ | 26985/65536 [4:37:39<6:43:45, 1.59it/s] 41%|████ | 26986/65536 [4:37:39<6:37:07, 1.62it/s] 41%|████ | 26987/65536 [4:37:40<6:34:33, 1.63it/s] 41%|████ | 26988/65536 [4:37:40<6:26:36, 1.66it/s] 41%|████ | 26989/65536 [4:37:41<6:31:47, 1.64it/s] 41%|████ | 26990/65536 [4:37:42<6:46:46, 1.58it/s] 41%|████ | 26991/65536 [4:37:42<6:38:41, 1.61it/s] 41%|████ | 26992/65536 [4:37:43<6:37:00, 1.62it/s] 41%|████ | 26993/65536 [4:37:44<6:39:13, 1.61it/s] 41%|████ | 26994/65536 [4:37:44<6:34:14, 1.63it/s] 41%|████ | 26995/65536 [4:37:45<6:47:09, 1.58it/s] 41%|████ | 26996/65536 [4:37:45<6:42:11, 1.60it/s] 41%|████ | 26997/65536 [4:37:46<6:33:18, 1.63it/s] 41%|████ | 26998/65536 [4:37:47<6:39:54, 1.61it/s] 41%|████ | 26999/65536 [4:37:47<6:29:23, 1.65it/s] 41%|████ | 27000/65536 [4:37:48<6:28:28, 1.65it/s] {'loss': 1.7357, 'learning_rate': 6.308313946369535e-07, 'epoch': 1666.67} + 41%|████ | 27000/65536 [4:37:48<6:28:28, 1.65it/s] 41%|████ | 27001/65536 [4:37:48<6:27:27, 1.66it/s] 41%|████ | 27002/65536 [4:37:49<6:28:23, 1.65it/s] 41%|████ | 27003/65536 [4:37:50<6:18:56, 1.69it/s] 41%|████ | 27004/65536 [4:37:50<6:29:12, 1.65it/s] 41%|████ | 27005/65536 [4:37:51<6:33:01, 1.63it/s] 41%|████ | 27006/65536 [4:37:52<6:39:39, 1.61it/s] 41%|████ | 27007/65536 [4:37:52<6:34:38, 1.63it/s] 41%|████ | 27008/65536 [4:37:53<6:25:39, 1.67it/s] 41%|████ | 27009/65536 [4:37:53<6:20:53, 1.69it/s] 41%|████ | 27010/65536 [4:37:54<6:15:57, 1.71it/s] 41%|████ | 27011/65536 [4:37:54<6:16:50, 1.70it/s] 41%|████ | 27012/65536 [4:37:55<6:33:51, 1.63it/s] 41%|████ | 27013/65536 [4:37:56<6:40:05, 1.60it/s] 41%|████ | 27014/65536 [4:37:56<6:44:23, 1.59it/s] 41%|████ | 27015/65536 [4:37:57<6:39:47, 1.61it/s] 41%|████ | 27016/65536 [4:37:58<6:36:58, 1.62it/s] 41%|████ | 27017/65536 [4:37:58<6:33:42, 1.63it/s] 41%|████ | 27018/65536 [4:37:59<6:31:24, 1.64it/s] 41%|████ | 27019/65536 [4:37:59<6:24:10, 1.67it/s] 41%|████ | 27020/65536 [4:38:00<6:25:06, 1.67it/s] {'loss': 1.7542, 'learning_rate': 6.305558956777274e-07, 'epoch': 1667.9} + 41%|████ | 27020/65536 [4:38:00<6:25:06, 1.67it/s] 41%|████ | 27021/65536 [4:38:01<6:41:00, 1.60it/s] 41%|████ | 27022/65536 [4:38:01<6:54:58, 1.55it/s] 41%|████ | 27023/65536 [4:38:02<6:44:17, 1.59it/s] 41%|████ | 27024/65536 [4:38:03<6:37:19, 1.62it/s] 41%|████ | 27025/65536 [4:38:03<6:38:34, 1.61it/s] 41%|████ | 27026/65536 [4:38:04<6:35:11, 1.62it/s] 41%|████ | 27027/65536 [4:38:04<6:39:03, 1.61it/s] 41%|████ | 27028/65536 [4:38:05<6:31:46, 1.64it/s] 41%|████ | 27029/65536 [4:38:06<6:37:13, 1.62it/s] 41%|████ | 27030/65536 [4:38:06<6:32:29, 1.64it/s] 41%|████ | 27031/65536 [4:38:07<6:35:04, 1.62it/s] 41%|████ | 27032/65536 [4:38:07<6:31:27, 1.64it/s] 41%|████ | 27033/65536 [4:38:08<6:35:04, 1.62it/s] 41%|████▏ | 27034/65536 [4:38:09<6:39:20, 1.61it/s] 41%|████▏ | 27035/65536 [4:38:09<6:37:11, 1.62it/s] 41%|████▏ | 27036/65536 [4:38:10<6:39:38, 1.61it/s] 41%|████▏ | 27037/65536 [4:38:11<6:37:36, 1.61it/s] 41%|████▏ | 27038/65536 [4:38:11<6:49:05, 1.57it/s] 41%|████▏ | 27039/65536 [4:38:12<6:44:12, 1.59it/s] 41%|████▏ | 27040/65536 [4:38:13<6:59:51, 1.53it/s] {'loss': 1.6961, 'learning_rate': 6.302803967185013e-07, 'epoch': 1669.14} + 41%|████▏ | 27040/65536 [4:38:13<6:59:51, 1.53it/s] 41%|████▏ | 27041/65536 [4:38:13<6:45:15, 1.58it/s] 41%|████▏ | 27042/65536 [4:38:14<6:35:01, 1.62it/s] 41%|████▏ | 27043/65536 [4:38:14<6:26:23, 1.66it/s] 41%|████▏ | 27044/65536 [4:38:15<6:18:28, 1.70it/s] 41%|████▏ | 27045/65536 [4:38:16<6:18:45, 1.69it/s] 41%|████▏ | 27046/65536 [4:38:16<6:29:28, 1.65it/s] 41%|████▏ | 27047/65536 [4:38:17<6:29:53, 1.65it/s] 41%|████▏ | 27048/65536 [4:38:17<6:20:52, 1.68it/s] 41%|████▏ | 27049/65536 [4:38:18<6:20:42, 1.68it/s] 41%|████▏ | 27050/65536 [4:38:19<6:32:38, 1.63it/s] 41%|████▏ | 27051/65536 [4:38:19<6:33:58, 1.63it/s] 41%|████▏ | 27052/65536 [4:38:20<6:31:10, 1.64it/s] 41%|████▏ | 27053/65536 [4:38:20<6:27:24, 1.66it/s] 41%|████▏ | 27054/65536 [4:38:21<6:28:58, 1.65it/s] 41%|████▏ | 27055/65536 [4:38:22<6:42:09, 1.59it/s] 41%|████▏ | 27056/65536 [4:38:22<6:37:39, 1.61it/s] 41%|████▏ | 27057/65536 [4:38:23<6:32:33, 1.63it/s] 41%|████▏ | 27058/65536 [4:38:24<6:40:06, 1.60it/s] 41%|████▏ | 27059/65536 [4:38:24<6:44:04, 1.59it/s] 41%|████▏ | 27060/65536 [4:38:25<6:34:15, 1.63it/s] {'loss': 1.7537, 'learning_rate': 6.30004897759275e-07, 'epoch': 1670.37} + 41%|████▏ | 27060/65536 [4:38:25<6:34:15, 1.63it/s] 41%|████▏ | 27061/65536 [4:38:25<6:39:51, 1.60it/s] 41%|████▏ | 27062/65536 [4:38:26<6:26:36, 1.66it/s] 41%|████▏ | 27063/65536 [4:38:27<6:25:33, 1.66it/s] 41%|████▏ | 27064/65536 [4:38:27<6:22:49, 1.67it/s] 41%|████▏ | 27065/65536 [4:38:28<6:32:59, 1.63it/s] 41%|████▏ | 27066/65536 [4:38:28<6:27:59, 1.65it/s] 41%|████▏ | 27067/65536 [4:38:29<6:35:59, 1.62it/s] 41%|████▏ | 27068/65536 [4:38:30<6:31:47, 1.64it/s] 41%|████▏ | 27069/65536 [4:38:30<6:29:03, 1.65it/s] 41%|████▏ | 27070/65536 [4:38:31<6:28:17, 1.65it/s] 41%|████▏ | 27071/65536 [4:38:31<6:39:36, 1.60it/s] 41%|████▏ | 27072/65536 [4:38:32<6:31:48, 1.64it/s] 41%|████▏ | 27073/65536 [4:38:33<6:34:35, 1.62it/s] 41%|████▏ | 27074/65536 [4:38:33<6:41:24, 1.60it/s] 41%|████▏ | 27075/65536 [4:38:34<6:39:17, 1.61it/s] 41%|████▏ | 27076/65536 [4:38:35<6:40:40, 1.60it/s] 41%|████▏ | 27077/65536 [4:38:35<6:43:35, 1.59it/s] 41%|████▏ | 27078/65536 [4:38:36<6:30:54, 1.64it/s] 41%|████▏ | 27079/65536 [4:38:36<6:28:51, 1.65it/s] 41%|████▏ | 27080/65536 [4:38:37<6:23:55, 1.67it/s] {'loss': 1.7612, 'learning_rate': 6.297293988000489e-07, 'epoch': 1671.6} + 41%|████▏ | 27080/65536 [4:38:37<6:23:55, 1.67it/s] 41%|████▏ | 27081/65536 [4:38:38<6:21:40, 1.68it/s] 41%|████▏ | 27082/65536 [4:38:38<6:24:16, 1.67it/s] 41%|████▏ | 27083/65536 [4:38:39<6:23:05, 1.67it/s] 41%|████▏ | 27084/65536 [4:38:39<6:27:35, 1.65it/s] 41%|████▏ | 27085/65536 [4:38:40<6:26:02, 1.66it/s] 41%|████▏ | 27086/65536 [4:38:41<6:36:15, 1.62it/s] 41%|████▏ | 27087/65536 [4:38:41<6:51:13, 1.56it/s] 41%|████▏ | 27088/65536 [4:38:42<6:49:52, 1.56it/s] 41%|████▏ | 27089/65536 [4:38:43<6:41:06, 1.60it/s] 41%|████▏ | 27090/65536 [4:38:43<6:44:30, 1.58it/s] 41%|████▏ | 27091/65536 [4:38:44<6:31:51, 1.64it/s] 41%|████▏ | 27092/65536 [4:38:44<6:27:41, 1.65it/s] 41%|████▏ | 27093/65536 [4:38:45<6:31:32, 1.64it/s] 41%|████▏ | 27094/65536 [4:38:46<6:31:34, 1.64it/s] 41%|████▏ | 27095/65536 [4:38:46<6:37:01, 1.61it/s] 41%|████▏ | 27096/65536 [4:38:47<6:34:40, 1.62it/s] 41%|████▏ | 27097/65536 [4:38:47<6:39:13, 1.60it/s] 41%|████▏ | 27098/65536 [4:38:48<6:40:45, 1.60it/s] 41%|████▏ | 27099/65536 [4:38:49<6:32:25, 1.63it/s] 41%|████▏ | 27100/65536 [4:38:49<6:26:47, 1.66it/s] {'loss': 1.7779, 'learning_rate': 6.294538998408227e-07, 'epoch': 1672.84} + 41%|████▏ | 27100/65536 [4:38:49<6:26:47, 1.66it/s] 41%|████▏ | 27101/65536 [4:38:50<6:31:58, 1.63it/s] 41%|████▏ | 27102/65536 [4:38:51<6:33:02, 1.63it/s] 41%|████▏ | 27103/65536 [4:38:51<6:37:50, 1.61it/s] 41%|████▏ | 27104/65536 [4:38:52<6:37:06, 1.61it/s] 41%|████▏ | 27105/65536 [4:38:52<6:36:17, 1.62it/s] 41%|████▏ | 27106/65536 [4:38:53<6:34:23, 1.62it/s] 41%|████▏ | 27107/65536 [4:38:54<6:30:59, 1.64it/s] 41%|████▏ | 27108/65536 [4:38:54<6:25:44, 1.66it/s] 41%|████▏ | 27109/65536 [4:38:55<6:19:57, 1.69it/s] 41%|████▏ | 27110/65536 [4:38:55<6:16:01, 1.70it/s] 41%|████▏ | 27111/65536 [4:38:56<6:16:05, 1.70it/s] 41%|████▏ | 27112/65536 [4:38:57<6:22:47, 1.67it/s] 41%|████▏ | 27113/65536 [4:38:57<6:28:28, 1.65it/s] 41%|████▏ | 27114/65536 [4:38:58<6:49:06, 1.57it/s] 41%|████▏ | 27115/65536 [4:38:58<6:42:30, 1.59it/s] 41%|████▏ | 27116/65536 [4:38:59<6:42:35, 1.59it/s] 41%|████▏ | 27117/65536 [4:39:00<6:38:28, 1.61it/s] 41%|████▏ | 27118/65536 [4:39:00<6:35:15, 1.62it/s] 41%|████▏ | 27119/65536 [4:39:01<6:43:46, 1.59it/s] 41%|████▏ | 27120/65536 [4:39:02<6:29:03, 1.65it/s] {'loss': 1.7238, 'learning_rate': 6.291784008815966e-07, 'epoch': 1674.07} + 41%|████▏ | 27120/65536 [4:39:02<6:29:03, 1.65it/s] 41%|████▏ | 27121/65536 [4:39:02<6:44:42, 1.58it/s] 41%|████▏ | 27122/65536 [4:39:03<6:38:01, 1.61it/s] 41%|████▏ | 27123/65536 [4:39:03<6:31:43, 1.63it/s] 41%|████▏ | 27124/65536 [4:39:04<6:45:33, 1.58it/s] 41%|████▏ | 27125/65536 [4:39:05<6:41:34, 1.59it/s] 41%|████▏ | 27126/65536 [4:39:05<6:35:03, 1.62it/s] 41%|████▏ | 27127/65536 [4:39:06<6:31:04, 1.64it/s] 41%|████▏ | 27128/65536 [4:39:07<6:40:28, 1.60it/s] 41%|████▏ | 27129/65536 [4:39:07<6:47:08, 1.57it/s] 41%|████▏ | 27130/65536 [4:39:08<6:37:13, 1.61it/s] 41%|████▏ | 27131/65536 [4:39:08<6:31:39, 1.63it/s] 41%|████▏ | 27132/65536 [4:39:09<6:33:47, 1.63it/s] 41%|████▏ | 27133/65536 [4:39:10<6:32:55, 1.63it/s] 41%|████▏ | 27134/65536 [4:39:10<6:25:04, 1.66it/s] 41%|████▏ | 27135/65536 [4:39:11<6:26:12, 1.66it/s] 41%|████▏ | 27136/65536 [4:39:11<6:41:11, 1.60it/s] 41%|████▏ | 27137/65536 [4:39:12<6:30:43, 1.64it/s] 41%|████▏ | 27138/65536 [4:39:13<6:26:28, 1.66it/s] 41%|████▏ | 27139/65536 [4:39:13<6:25:27, 1.66it/s] 41%|████▏ | 27140/65536 [4:39:14<6:39:26, 1.60it/s] {'loss': 1.7107, 'learning_rate': 6.289029019223705e-07, 'epoch': 1675.31} + 41%|████▏ | 27140/65536 [4:39:14<6:39:26, 1.60it/s] 41%|████▏ | 27141/65536 [4:39:14<6:27:07, 1.65it/s] 41%|████▏ | 27142/65536 [4:39:15<6:27:06, 1.65it/s] 41%|████▏ | 27143/65536 [4:39:16<6:42:58, 1.59it/s] 41%|████▏ | 27144/65536 [4:39:16<6:34:42, 1.62it/s] 41%|████▏ | 27145/65536 [4:39:17<6:39:11, 1.60it/s] 41%|████▏ | 27146/65536 [4:39:18<6:38:07, 1.61it/s] 41%|████▏ | 27147/65536 [4:39:18<6:34:42, 1.62it/s] 41%|████▏ | 27148/65536 [4:39:19<6:41:41, 1.59it/s] 41%|████▏ | 27149/65536 [4:39:19<6:36:09, 1.61it/s] 41%|████▏ | 27150/65536 [4:39:20<6:26:14, 1.66it/s] 41%|████▏ | 27151/65536 [4:39:21<6:23:51, 1.67it/s] 41%|████▏ | 27152/65536 [4:39:21<6:31:06, 1.64it/s] 41%|████▏ | 27153/65536 [4:39:22<6:23:50, 1.67it/s] 41%|████▏ | 27154/65536 [4:39:22<6:28:55, 1.64it/s] 41%|████▏ | 27155/65536 [4:39:23<6:41:22, 1.59it/s] 41%|████▏ | 27156/65536 [4:39:24<6:43:39, 1.58it/s] 41%|████▏ | 27157/65536 [4:39:24<6:32:28, 1.63it/s] 41%|████▏ | 27158/65536 [4:39:25<6:29:44, 1.64it/s] 41%|████▏ | 27159/65536 [4:39:26<6:29:22, 1.64it/s] 41%|████▏ | 27160/65536 [4:39:26<6:37:44, 1.61it/s] {'loss': 1.7065, 'learning_rate': 6.286274029631443e-07, 'epoch': 1676.54} + 41%|████▏ | 27160/65536 [4:39:26<6:37:44, 1.61it/s] 41%|████▏ | 27161/65536 [4:39:27<6:39:21, 1.60it/s] 41%|████▏ | 27162/65536 [4:39:27<6:33:33, 1.63it/s] 41%|████▏ | 27163/65536 [4:39:28<6:44:00, 1.58it/s] 41%|████▏ | 27164/65536 [4:39:29<6:36:57, 1.61it/s] 41%|████▏ | 27165/65536 [4:39:29<6:30:38, 1.64it/s] 41%|████▏ | 27166/65536 [4:39:30<6:29:32, 1.64it/s] 41%|████▏ | 27167/65536 [4:39:30<6:21:02, 1.68it/s] 41%|████▏ | 27168/65536 [4:39:31<6:33:21, 1.63it/s] 41%|████▏ | 27169/65536 [4:39:32<6:25:16, 1.66it/s] 41%|████▏ | 27170/65536 [4:39:32<6:28:39, 1.65it/s] 41%|████▏ | 27171/65536 [4:39:33<6:41:54, 1.59it/s] 41%|████▏ | 27172/65536 [4:39:34<6:35:34, 1.62it/s] 41%|████▏ | 27173/65536 [4:39:34<6:34:34, 1.62it/s] 41%|████▏ | 27174/65536 [4:39:35<6:39:31, 1.60it/s] 41%|████▏ | 27175/65536 [4:39:35<6:42:17, 1.59it/s] 41%|████▏ | 27176/65536 [4:39:36<6:33:54, 1.62it/s] 41%|████▏ | 27177/65536 [4:39:37<6:22:18, 1.67it/s] 41%|████▏ | 27178/65536 [4:39:37<6:28:33, 1.65it/s] 41%|████▏ | 27179/65536 [4:39:38<6:23:10, 1.67it/s] 41%|████▏ | 27180/65536 [4:39:38<6:18:15, 1.69it/s] {'loss': 1.7517, 'learning_rate': 6.283519040039182e-07, 'epoch': 1677.78} + 41%|████▏ | 27180/65536 [4:39:38<6:18:15, 1.69it/s] 41%|████▏ | 27181/65536 [4:39:39<6:23:49, 1.67it/s] 41%|████▏ | 27182/65536 [4:39:40<6:29:27, 1.64it/s] 41%|████▏ | 27183/65536 [4:39:40<6:21:04, 1.68it/s] 41%|████▏ | 27184/65536 [4:39:41<6:43:18, 1.58it/s] 41%|████▏ | 27185/65536 [4:39:42<6:40:14, 1.60it/s] 41%|████▏ | 27186/65536 [4:39:42<6:31:18, 1.63it/s] 41%|████▏ | 27187/65536 [4:39:43<6:25:41, 1.66it/s] 41%|████▏ | 27188/65536 [4:39:43<6:34:25, 1.62it/s] 41%|████▏ | 27189/65536 [4:39:44<6:23:19, 1.67it/s] 41%|████▏ | 27190/65536 [4:39:45<6:19:34, 1.68it/s] 41%|████▏ | 27191/65536 [4:39:45<6:25:51, 1.66it/s] 41%|████▏ | 27192/65536 [4:39:46<6:40:13, 1.60it/s] 41%|████▏ | 27193/65536 [4:39:46<6:35:32, 1.62it/s] 41%|████▏ | 27194/65536 [4:39:47<6:23:19, 1.67it/s] 41%|████▏ | 27195/65536 [4:39:48<6:28:48, 1.64it/s] 41%|████▏ | 27196/65536 [4:39:48<6:25:12, 1.66it/s] 41%|████▏ | 27197/65536 [4:39:49<6:28:13, 1.65it/s] 42%|████▏ | 27198/65536 [4:39:49<6:24:37, 1.66it/s] 42%|████▏ | 27199/65536 [4:39:50<6:37:06, 1.61it/s] 42%|████▏ | 27200/65536 [4:39:51<6:43:41, 1.58it/s] {'loss': 1.7725, 'learning_rate': 6.28076405044692e-07, 'epoch': 1679.01} + 42%|████▏ | 27200/65536 [4:39:51<6:43:41, 1.58it/s] 42%|████▏ | 27201/65536 [4:39:51<6:38:04, 1.60it/s] 42%|████▏ | 27202/65536 [4:39:52<6:32:07, 1.63it/s] 42%|████▏ | 27203/65536 [4:39:53<6:46:24, 1.57it/s] 42%|████▏ | 27204/65536 [4:39:53<6:42:29, 1.59it/s] 42%|████▏ | 27205/65536 [4:39:54<6:32:00, 1.63it/s] 42%|████▏ | 27206/65536 [4:39:54<6:29:15, 1.64it/s] 42%|████▏ | 27207/65536 [4:39:55<6:25:21, 1.66it/s] 42%|████▏ | 27208/65536 [4:39:56<6:29:18, 1.64it/s] 42%|████▏ | 27209/65536 [4:39:56<6:24:49, 1.66it/s] 42%|████▏ | 27210/65536 [4:39:57<6:34:12, 1.62it/s] 42%|████▏ | 27211/65536 [4:39:57<6:38:22, 1.60it/s] 42%|████▏ | 27212/65536 [4:39:58<6:27:10, 1.65it/s] 42%|████▏ | 27213/65536 [4:39:59<6:18:20, 1.69it/s] 42%|████▏ | 27214/65536 [4:39:59<6:21:36, 1.67it/s] 42%|████▏ | 27215/65536 [4:40:00<6:21:16, 1.68it/s] 42%|████▏ | 27216/65536 [4:40:00<6:15:53, 1.70it/s] 42%|████▏ | 27217/65536 [4:40:01<6:36:31, 1.61it/s] 42%|████▏ | 27218/65536 [4:40:02<6:31:53, 1.63it/s] 42%|████▏ | 27219/65536 [4:40:02<6:35:02, 1.62it/s] 42%|████▏ | 27220/65536 [4:40:03<6:31:22, 1.63it/s] {'loss': 1.7428, 'learning_rate': 6.278009060854659e-07, 'epoch': 1680.25} + 42%|████▏ | 27220/65536 [4:40:03<6:31:22, 1.63it/s] 42%|████▏ | 27221/65536 [4:40:04<6:30:38, 1.63it/s] 42%|████▏ | 27222/65536 [4:40:04<6:16:40, 1.70it/s] 42%|████▏ | 27223/65536 [4:40:05<6:31:58, 1.63it/s] 42%|████▏ | 27224/65536 [4:40:05<6:39:06, 1.60it/s] 42%|████▏ | 27225/65536 [4:40:06<6:40:04, 1.60it/s] 42%|████▏ | 27226/65536 [4:40:07<6:34:09, 1.62it/s] 42%|████▏ | 27227/65536 [4:40:07<6:33:12, 1.62it/s] 42%|████▏ | 27228/65536 [4:40:08<6:34:51, 1.62it/s] 42%|████▏ | 27229/65536 [4:40:08<6:40:54, 1.59it/s] 42%|████▏ | 27230/65536 [4:40:09<6:37:28, 1.61it/s] 42%|████▏ | 27231/65536 [4:40:10<6:35:37, 1.61it/s] 42%|████▏ | 27232/65536 [4:40:10<6:30:19, 1.64it/s] 42%|████▏ | 27233/65536 [4:40:11<6:50:56, 1.55it/s] 42%|████▏ | 27234/65536 [4:40:12<6:40:32, 1.59it/s] 42%|████▏ | 27235/65536 [4:40:12<6:36:54, 1.61it/s] 42%|████▏ | 27236/65536 [4:40:13<6:33:50, 1.62it/s] 42%|████▏ | 27237/65536 [4:40:13<6:36:38, 1.61it/s] 42%|████▏ | 27238/65536 [4:40:14<6:39:49, 1.60it/s] 42%|████▏ | 27239/65536 [4:40:15<6:35:22, 1.61it/s] 42%|████▏ | 27240/65536 [4:40:15<6:29:45, 1.64it/s] {'loss': 1.7686, 'learning_rate': 6.275254071262399e-07, 'epoch': 1681.48} + 42%|████▏ | 27240/65536 [4:40:15<6:29:45, 1.64it/s] 42%|████▏ | 27241/65536 [4:40:16<6:35:12, 1.61it/s] 42%|████▏ | 27242/65536 [4:40:17<6:31:06, 1.63it/s] 42%|████▏ | 27243/65536 [4:40:17<6:26:40, 1.65it/s] 42%|████▏ | 27244/65536 [4:40:18<6:27:22, 1.65it/s] 42%|████▏ | 27245/65536 [4:40:18<6:23:15, 1.67it/s] 42%|████▏ | 27246/65536 [4:40:19<6:30:34, 1.63it/s] 42%|████▏ | 27247/65536 [4:40:20<6:32:19, 1.63it/s] 42%|████▏ | 27248/65536 [4:40:20<6:26:56, 1.65it/s] 42%|████▏ | 27249/65536 [4:40:21<6:40:12, 1.59it/s] 42%|████▏ | 27250/65536 [4:40:21<6:31:51, 1.63it/s] 42%|████▏ | 27251/65536 [4:40:22<6:38:36, 1.60it/s] 42%|████▏ | 27252/65536 [4:40:23<6:34:06, 1.62it/s] 42%|████▏ | 27253/65536 [4:40:23<6:35:15, 1.61it/s] 42%|████▏ | 27254/65536 [4:40:24<6:34:38, 1.62it/s] 42%|████▏ | 27255/65536 [4:40:24<6:26:31, 1.65it/s] 42%|████▏ | 27256/65536 [4:40:25<6:32:14, 1.63it/s] 42%|████▏ | 27257/65536 [4:40:26<6:36:49, 1.61it/s] 42%|████▏ | 27258/65536 [4:40:26<6:23:22, 1.66it/s] 42%|████▏ | 27259/65536 [4:40:27<6:30:07, 1.64it/s] 42%|████▏ | 27260/65536 [4:40:28<6:29:15, 1.64it/s] {'loss': 1.7229, 'learning_rate': 6.272499081670135e-07, 'epoch': 1682.72} + 42%|████▏ | 27260/65536 [4:40:28<6:29:15, 1.64it/s] 42%|████▏ | 27261/65536 [4:40:28<6:27:56, 1.64it/s] 42%|████▏ | 27262/65536 [4:40:29<6:30:40, 1.63it/s] 42%|████▏ | 27263/65536 [4:40:29<6:29:26, 1.64it/s] 42%|████▏ | 27264/65536 [4:40:30<6:25:14, 1.66it/s] 42%|████▏ | 27265/65536 [4:40:31<6:33:28, 1.62it/s] 42%|████▏ | 27266/65536 [4:40:31<6:37:00, 1.61it/s] 42%|████▏ | 27267/65536 [4:40:32<6:28:28, 1.64it/s] 42%|████▏ | 27268/65536 [4:40:32<6:20:49, 1.67it/s] 42%|████▏ | 27269/65536 [4:40:33<6:17:28, 1.69it/s] 42%|████▏ | 27270/65536 [4:40:34<6:18:27, 1.69it/s] 42%|████▏ | 27271/65536 [4:40:34<6:23:17, 1.66it/s] 42%|████▏ | 27272/65536 [4:40:35<6:20:20, 1.68it/s] 42%|████▏ | 27273/65536 [4:40:35<6:34:38, 1.62it/s] 42%|████▏ | 27274/65536 [4:40:36<6:28:41, 1.64it/s] 42%|████▏ | 27275/65536 [4:40:37<6:21:34, 1.67it/s] 42%|████▏ | 27276/65536 [4:40:37<6:18:23, 1.69it/s] 42%|████▏ | 27277/65536 [4:40:38<6:10:35, 1.72it/s] 42%|████▏ | 27278/65536 [4:40:38<6:25:54, 1.65it/s] 42%|████▏ | 27279/65536 [4:40:39<6:27:27, 1.65it/s] 42%|████▏ | 27280/65536 [4:40:40<6:21:52, 1.67it/s] {'loss': 1.7524, 'learning_rate': 6.269744092077874e-07, 'epoch': 1683.95} + 42%|████▏ | 27280/65536 [4:40:40<6:21:52, 1.67it/s] 42%|████▏ | 27281/65536 [4:40:40<6:51:55, 1.55it/s] 42%|████▏ | 27282/65536 [4:40:41<6:44:37, 1.58it/s] 42%|████▏ | 27283/65536 [4:40:42<6:37:34, 1.60it/s] 42%|████▏ | 27284/65536 [4:40:42<6:40:18, 1.59it/s] 42%|████▏ | 27285/65536 [4:40:43<6:37:32, 1.60it/s] 42%|████▏ | 27286/65536 [4:40:43<6:39:14, 1.60it/s] 42%|████▏ | 27287/65536 [4:40:44<6:30:16, 1.63it/s] 42%|████▏ | 27288/65536 [4:40:45<6:31:59, 1.63it/s] 42%|████▏ | 27289/65536 [4:40:45<6:24:25, 1.66it/s] 42%|████▏ | 27290/65536 [4:40:46<6:39:35, 1.60it/s] 42%|████▏ | 27291/65536 [4:40:47<6:42:20, 1.58it/s] 42%|████▏ | 27292/65536 [4:40:47<6:30:14, 1.63it/s] 42%|████▏ | 27293/65536 [4:40:48<6:32:08, 1.63it/s] 42%|████▏ | 27294/65536 [4:40:48<6:30:15, 1.63it/s] 42%|████▏ | 27295/65536 [4:40:49<6:23:00, 1.66it/s] 42%|████▏ | 27296/65536 [4:40:50<6:22:11, 1.67it/s] 42%|████▏ | 27297/65536 [4:40:50<6:20:45, 1.67it/s] 42%|████▏ | 27298/65536 [4:40:51<6:34:22, 1.62it/s] 42%|████▏ | 27299/65536 [4:40:51<6:38:33, 1.60it/s] 42%|████▏ | 27300/65536 [4:40:52<6:27:16, 1.65it/s] {'loss': 1.7512, 'learning_rate': 6.266989102485612e-07, 'epoch': 1685.19} + 42%|████▏ | 27300/65536 [4:40:52<6:27:16, 1.65it/s] 42%|████▏ | 27301/65536 [4:40:53<6:35:47, 1.61it/s] 42%|████▏ | 27302/65536 [4:40:53<6:28:02, 1.64it/s] 42%|████▏ | 27303/65536 [4:40:54<6:28:22, 1.64it/s] 42%|████▏ | 27304/65536 [4:40:54<6:22:25, 1.67it/s] 42%|████▏ | 27305/65536 [4:40:55<6:14:19, 1.70it/s] 42%|████▏ | 27306/65536 [4:40:56<6:21:58, 1.67it/s] 42%|████▏ | 27307/65536 [4:40:56<6:21:49, 1.67it/s] 42%|████▏ | 27308/65536 [4:40:57<6:26:01, 1.65it/s] 42%|████▏ | 27309/65536 [4:40:57<6:24:15, 1.66it/s] 42%|████▏ | 27310/65536 [4:40:58<6:27:52, 1.64it/s] 42%|████▏ | 27311/65536 [4:40:59<6:39:22, 1.60it/s] 42%|████▏ | 27312/65536 [4:40:59<6:30:10, 1.63it/s] 42%|████▏ | 27313/65536 [4:41:00<6:29:46, 1.63it/s] 42%|████▏ | 27314/65536 [4:41:01<6:37:10, 1.60it/s] 42%|████▏ | 27315/65536 [4:41:01<6:33:12, 1.62it/s] 42%|████▏ | 27316/65536 [4:41:02<6:24:26, 1.66it/s] 42%|████▏ | 27317/65536 [4:41:02<6:31:21, 1.63it/s] 42%|████▏ | 27318/65536 [4:41:03<6:24:15, 1.66it/s] 42%|████▏ | 27319/65536 [4:41:04<6:21:44, 1.67it/s] 42%|████▏ | 27320/65536 [4:41:04<6:26:16, 1.65it/s] {'loss': 1.7376, 'learning_rate': 6.264234112893351e-07, 'epoch': 1686.42} + 42%|████▏ | 27320/65536 [4:41:04<6:26:16, 1.65it/s] 42%|████▏ | 27321/65536 [4:41:05<6:32:12, 1.62it/s] 42%|████▏ | 27322/65536 [4:41:05<6:21:53, 1.67it/s] 42%|████▏ | 27323/65536 [4:41:06<6:33:33, 1.62it/s] 42%|████▏ | 27324/65536 [4:41:07<6:35:34, 1.61it/s] 42%|████▏ | 27325/65536 [4:41:07<6:35:20, 1.61it/s] 42%|████▏ | 27326/65536 [4:41:08<6:38:07, 1.60it/s] 42%|████▏ | 27327/65536 [4:41:09<6:44:42, 1.57it/s] 42%|████▏ | 27328/65536 [4:41:09<6:32:18, 1.62it/s] 42%|████▏ | 27329/65536 [4:41:10<6:26:05, 1.65it/s] 42%|████▏ | 27330/65536 [4:41:10<6:48:56, 1.56it/s] 42%|████▏ | 27331/65536 [4:41:11<6:41:16, 1.59it/s] 42%|████▏ | 27332/65536 [4:41:12<6:31:10, 1.63it/s] 42%|████▏ | 27333/65536 [4:41:12<6:29:48, 1.63it/s] 42%|████▏ | 27334/65536 [4:41:13<6:27:01, 1.65it/s] 42%|████▏ | 27335/65536 [4:41:13<6:23:49, 1.66it/s] 42%|████▏ | 27336/65536 [4:41:14<6:34:09, 1.62it/s] 42%|████▏ | 27337/65536 [4:41:15<6:35:21, 1.61it/s] 42%|████▏ | 27338/65536 [4:41:15<6:31:07, 1.63it/s] 42%|████▏ | 27339/65536 [4:41:16<6:30:21, 1.63it/s] 42%|████▏ | 27340/65536 [4:41:16<6:20:42, 1.67it/s] {'loss': 1.7337, 'learning_rate': 6.261479123301089e-07, 'epoch': 1687.65} + 42%|████▏ | 27340/65536 [4:41:16<6:20:42, 1.67it/s] 42%|████▏ | 27341/65536 [4:41:17<6:27:40, 1.64it/s] 42%|████▏ | 27342/65536 [4:41:18<6:29:01, 1.64it/s] 42%|████▏ | 27343/65536 [4:41:18<6:21:48, 1.67it/s] 42%|████▏ | 27344/65536 [4:41:19<6:21:46, 1.67it/s] 42%|████▏ | 27345/65536 [4:41:19<6:25:12, 1.65it/s] 42%|████▏ | 27346/65536 [4:41:20<6:45:56, 1.57it/s] 42%|████▏ | 27347/65536 [4:41:21<6:43:47, 1.58it/s] 42%|████▏ | 27348/65536 [4:41:21<6:32:35, 1.62it/s] 42%|████▏ | 27349/65536 [4:41:22<6:30:00, 1.63it/s] 42%|████▏ | 27350/65536 [4:41:23<6:28:22, 1.64it/s] 42%|████▏ | 27351/65536 [4:41:23<6:37:32, 1.60it/s] 42%|████▏ | 27352/65536 [4:41:24<6:33:36, 1.62it/s] 42%|████▏ | 27353/65536 [4:41:24<6:22:34, 1.66it/s] 42%|████▏ | 27354/65536 [4:41:25<6:18:56, 1.68it/s] 42%|████▏ | 27355/65536 [4:41:26<6:26:33, 1.65it/s] 42%|████▏ | 27356/65536 [4:41:26<6:20:06, 1.67it/s] 42%|████▏ | 27357/65536 [4:41:27<6:16:06, 1.69it/s] 42%|████▏ | 27358/65536 [4:41:27<6:18:43, 1.68it/s] 42%|████▏ | 27359/65536 [4:41:28<6:27:11, 1.64it/s] 42%|████▏ | 27360/65536 [4:41:29<6:35:11, 1.61it/s] {'loss': 1.7469, 'learning_rate': 6.258724133708828e-07, 'epoch': 1688.89} + 42%|████▏ | 27360/65536 [4:41:29<6:35:11, 1.61it/s] 42%|████▏ | 27361/65536 [4:41:29<6:42:16, 1.58it/s] 42%|████▏ | 27362/65536 [4:41:30<8:13:07, 1.29it/s] 42%|████▏ | 27363/65536 [4:41:31<7:33:24, 1.40it/s] 42%|████▏ | 27364/65536 [4:41:32<7:16:58, 1.46it/s] 42%|████▏ | 27365/65536 [4:41:32<7:00:28, 1.51it/s] 42%|████▏ | 27366/65536 [4:41:33<6:50:41, 1.55it/s] 42%|████▏ | 27367/65536 [4:41:33<6:44:59, 1.57it/s] 42%|████▏ | 27368/65536 [4:41:34<6:41:29, 1.58it/s] 42%|████▏ | 27369/65536 [4:41:35<6:40:04, 1.59it/s] 42%|████▏ | 27370/65536 [4:41:35<6:46:56, 1.56it/s] 42%|████▏ | 27371/65536 [4:41:36<6:40:14, 1.59it/s] 42%|████▏ | 27372/65536 [4:41:37<6:35:26, 1.61it/s] 42%|████▏ | 27373/65536 [4:41:37<6:35:43, 1.61it/s] 42%|████▏ | 27374/65536 [4:41:38<6:27:21, 1.64it/s] 42%|████▏ | 27375/65536 [4:41:38<6:22:22, 1.66it/s] 42%|████▏ | 27376/65536 [4:41:39<6:22:30, 1.66it/s] 42%|████▏ | 27377/65536 [4:41:40<6:24:23, 1.65it/s] 42%|████▏ | 27378/65536 [4:41:40<6:34:18, 1.61it/s] 42%|████▏ | 27379/65536 [4:41:41<6:43:44, 1.58it/s] 42%|████▏ | 27380/65536 [4:41:42<6:41:29, 1.58it/s] {'loss': 1.7249, 'learning_rate': 6.255969144116567e-07, 'epoch': 1690.12} + 42%|████▏ | 27380/65536 [4:41:42<6:41:29, 1.58it/s] 42%|████▏ | 27381/65536 [4:41:42<6:37:37, 1.60it/s] 42%|████▏ | 27382/65536 [4:41:43<6:48:00, 1.56it/s] 42%|████▏ | 27383/65536 [4:41:43<6:38:54, 1.59it/s] 42%|████▏ | 27384/65536 [4:41:44<6:31:51, 1.62it/s] 42%|████▏ | 27385/65536 [4:41:45<6:30:54, 1.63it/s] 42%|████▏ | 27386/65536 [4:41:45<6:36:28, 1.60it/s] 42%|████▏ | 27387/65536 [4:41:46<6:38:01, 1.60it/s] 42%|████▏ | 27388/65536 [4:41:47<6:37:13, 1.60it/s] 42%|████▏ | 27389/65536 [4:41:47<6:30:41, 1.63it/s] 42%|████▏ | 27390/65536 [4:41:48<6:35:42, 1.61it/s] 42%|████▏ | 27391/65536 [4:41:48<6:47:34, 1.56it/s] 42%|████▏ | 27392/65536 [4:41:49<6:32:19, 1.62it/s] 42%|████▏ | 27393/65536 [4:41:50<6:37:32, 1.60it/s] 42%|████▏ | 27394/65536 [4:41:50<6:27:57, 1.64it/s] 42%|████▏ | 27395/65536 [4:41:51<6:37:47, 1.60it/s] 42%|████▏ | 27396/65536 [4:41:51<6:26:01, 1.65it/s] 42%|████▏ | 27397/65536 [4:41:52<6:26:34, 1.64it/s] 42%|████▏ | 27398/65536 [4:41:53<6:19:56, 1.67it/s] 42%|████▏ | 27399/65536 [4:41:53<6:20:01, 1.67it/s] 42%|████▏ | 27400/65536 [4:41:54<6:24:31, 1.65it/s] {'loss': 1.728, 'learning_rate': 6.253214154524305e-07, 'epoch': 1691.36} + 42%|████▏ | 27400/65536 [4:41:54<6:24:31, 1.65it/s] 42%|████▏ | 27401/65536 [4:41:54<6:23:35, 1.66it/s] 42%|████▏ | 27402/65536 [4:41:55<6:26:08, 1.65it/s] 42%|████▏ | 27403/65536 [4:41:56<6:24:52, 1.65it/s] 42%|████▏ | 27404/65536 [4:41:56<6:29:06, 1.63it/s] 42%|████▏ | 27405/65536 [4:41:57<6:37:32, 1.60it/s] 42%|████▏ | 27406/65536 [4:41:58<6:32:38, 1.62it/s] 42%|████▏ | 27407/65536 [4:41:58<6:41:24, 1.58it/s] 42%|████▏ | 27408/65536 [4:41:59<6:35:52, 1.61it/s] 42%|████▏ | 27409/65536 [4:41:59<6:34:07, 1.61it/s] 42%|████▏ | 27410/65536 [4:42:00<6:31:31, 1.62it/s] 42%|████▏ | 27411/65536 [4:42:01<6:45:57, 1.57it/s] 42%|████▏ | 27412/65536 [4:42:01<6:54:07, 1.53it/s] 42%|████▏ | 27413/65536 [4:42:02<6:49:57, 1.55it/s] 42%|████▏ | 27414/65536 [4:42:03<6:33:44, 1.61it/s] 42%|████▏ | 27415/65536 [4:42:03<6:32:20, 1.62it/s] 42%|████▏ | 27416/65536 [4:42:04<6:23:34, 1.66it/s] 42%|████▏ | 27417/65536 [4:42:04<6:24:02, 1.65it/s] 42%|████▏ | 27418/65536 [4:42:05<6:19:51, 1.67it/s] 42%|████▏ | 27419/65536 [4:42:06<6:15:56, 1.69it/s] 42%|████▏ | 27420/65536 [4:42:06<6:17:55, 1.68it/s] {'loss': 1.7316, 'learning_rate': 6.250459164932044e-07, 'epoch': 1692.59} + 42%|████▏ | 27420/65536 [4:42:06<6:17:55, 1.68it/s] 42%|████▏ | 27421/65536 [4:42:07<6:17:12, 1.68it/s] 42%|████▏ | 27422/65536 [4:42:07<6:16:38, 1.69it/s] 42%|████▏ | 27423/65536 [4:42:08<6:29:58, 1.63it/s] 42%|████▏ | 27424/65536 [4:42:09<6:27:48, 1.64it/s] 42%|████▏ | 27425/65536 [4:42:09<6:28:49, 1.63it/s] 42%|████▏ | 27426/65536 [4:42:10<6:34:55, 1.61it/s] 42%|████▏ | 27427/65536 [4:42:11<6:40:58, 1.58it/s] 42%|████▏ | 27428/65536 [4:42:11<6:32:09, 1.62it/s] 42%|████▏ | 27429/65536 [4:42:12<6:35:04, 1.61it/s] 42%|████▏ | 27430/65536 [4:42:12<6:30:00, 1.63it/s] 42%|████▏ | 27431/65536 [4:42:13<6:30:29, 1.63it/s] 42%|████▏ | 27432/65536 [4:42:14<6:30:20, 1.63it/s] 42%|████▏ | 27433/65536 [4:42:14<6:33:16, 1.61it/s] 42%|████▏ | 27434/65536 [4:42:15<6:26:24, 1.64it/s] 42%|████▏ | 27435/65536 [4:42:15<6:21:56, 1.66it/s] 42%|████▏ | 27436/65536 [4:42:16<6:41:14, 1.58it/s] 42%|████▏ | 27437/65536 [4:42:17<6:31:01, 1.62it/s] 42%|████▏ | 27438/65536 [4:42:17<6:35:42, 1.60it/s] 42%|████▏ | 27439/65536 [4:42:18<6:31:23, 1.62it/s] 42%|████▏ | 27440/65536 [4:42:18<6:26:20, 1.64it/s] {'loss': 1.7038, 'learning_rate': 6.247704175339782e-07, 'epoch': 1693.83} + 42%|████▏ | 27440/65536 [4:42:18<6:26:20, 1.64it/s] 42%|████▏ | 27441/65536 [4:42:19<6:26:27, 1.64it/s] 42%|████▏ | 27442/65536 [4:42:20<6:43:40, 1.57it/s] 42%|████▏ | 27443/65536 [4:42:21<7:09:42, 1.48it/s] 42%|████▏ | 27444/65536 [4:42:21<7:01:25, 1.51it/s] 42%|████▏ | 27445/65536 [4:42:22<6:48:24, 1.55it/s] 42%|████▏ | 27446/65536 [4:42:22<6:38:26, 1.59it/s] 42%|████▏ | 27447/65536 [4:42:23<6:42:38, 1.58it/s] 42%|████▏ | 27448/65536 [4:42:24<6:41:30, 1.58it/s] 42%|████▏ | 27449/65536 [4:42:24<6:31:03, 1.62it/s] 42%|████▏ | 27450/65536 [4:42:25<6:27:19, 1.64it/s] 42%|████▏ | 27451/65536 [4:42:25<6:28:12, 1.64it/s] 42%|████▏ | 27452/65536 [4:42:26<6:33:32, 1.61it/s] 42%|████▏ | 27453/65536 [4:42:27<6:27:41, 1.64it/s] 42%|████▏ | 27454/65536 [4:42:27<6:23:03, 1.66it/s] 42%|████▏ | 27455/65536 [4:42:28<6:24:17, 1.65it/s] 42%|████▏ | 27456/65536 [4:42:28<6:26:10, 1.64it/s] 42%|████▏ | 27457/65536 [4:42:29<6:30:13, 1.63it/s] 42%|████▏ | 27458/65536 [4:42:30<6:28:02, 1.64it/s] 42%|████▏ | 27459/65536 [4:42:30<6:26:35, 1.64it/s] 42%|████▏ | 27460/65536 [4:42:31<6:37:16, 1.60it/s] {'loss': 1.7378, 'learning_rate': 6.244949185747521e-07, 'epoch': 1695.06} + 42%|████▏ | 27460/65536 [4:42:31<6:37:16, 1.60it/s] 42%|████▏ | 27461/65536 [4:42:32<6:31:29, 1.62it/s] 42%|████▏ | 27462/65536 [4:42:32<6:29:51, 1.63it/s] 42%|████▏ | 27463/65536 [4:42:33<6:37:33, 1.60it/s] 42%|████▏ | 27464/65536 [4:42:33<6:38:56, 1.59it/s] 42%|████▏ | 27465/65536 [4:42:34<6:34:21, 1.61it/s] 42%|████▏ | 27466/65536 [4:42:35<6:27:00, 1.64it/s] 42%|████▏ | 27467/65536 [4:42:35<6:28:37, 1.63it/s] 42%|████▏ | 27468/65536 [4:42:36<6:31:54, 1.62it/s] 42%|████▏ | 27469/65536 [4:42:37<6:26:58, 1.64it/s] 42%|████▏ | 27470/65536 [4:42:37<6:31:59, 1.62it/s] 42%|████▏ | 27471/65536 [4:42:38<6:29:57, 1.63it/s] 42%|████▏ | 27472/65536 [4:42:38<6:25:26, 1.65it/s] 42%|████▏ | 27473/65536 [4:42:39<6:25:14, 1.65it/s] 42%|████▏ | 27474/65536 [4:42:40<6:27:11, 1.64it/s] 42%|████▏ | 27475/65536 [4:42:40<6:20:23, 1.67it/s] 42%|████▏ | 27476/65536 [4:42:41<6:31:46, 1.62it/s] 42%|████▏ | 27477/65536 [4:42:41<6:29:52, 1.63it/s] 42%|████▏ | 27478/65536 [4:42:42<6:31:11, 1.62it/s] 42%|████▏ | 27479/65536 [4:42:43<6:25:17, 1.65it/s] 42%|████▏ | 27480/65536 [4:42:43<6:22:43, 1.66it/s] {'loss': 1.7121, 'learning_rate': 6.24219419615526e-07, 'epoch': 1696.3} + 42%|████▏ | 27480/65536 [4:42:43<6:22:43, 1.66it/s] 42%|████▏ | 27481/65536 [4:42:44<6:29:29, 1.63it/s] 42%|████▏ | 27482/65536 [4:42:44<6:26:12, 1.64it/s] 42%|████▏ | 27483/65536 [4:42:45<6:30:31, 1.62it/s] 42%|████▏ | 27484/65536 [4:42:46<6:23:45, 1.65it/s] 42%|████▏ | 27485/65536 [4:42:46<6:20:30, 1.67it/s] 42%|████▏ | 27486/65536 [4:42:47<6:25:59, 1.64it/s] 42%|████▏ | 27487/65536 [4:42:47<6:20:34, 1.67it/s] 42%|████▏ | 27488/65536 [4:42:48<6:26:13, 1.64it/s] 42%|████▏ | 27489/65536 [4:42:49<6:23:35, 1.65it/s] 42%|████▏ | 27490/65536 [4:42:49<6:13:42, 1.70it/s] 42%|████▏ | 27491/65536 [4:42:50<6:25:25, 1.65it/s] 42%|████▏ | 27492/65536 [4:42:51<6:38:22, 1.59it/s] 42%|████▏ | 27493/65536 [4:42:51<6:50:31, 1.54it/s] 42%|████▏ | 27494/65536 [4:42:52<6:40:02, 1.58it/s] 42%|████▏ | 27495/65536 [4:42:52<6:39:51, 1.59it/s] 42%|████▏ | 27496/65536 [4:42:53<6:26:09, 1.64it/s] 42%|████▏ | 27497/65536 [4:42:54<6:38:53, 1.59it/s] 42%|████▏ | 27498/65536 [4:42:54<6:40:58, 1.58it/s] 42%|████▏ | 27499/65536 [4:42:55<6:28:41, 1.63it/s] 42%|████▏ | 27500/65536 [4:42:56<6:24:32, 1.65it/s] {'loss': 1.7308, 'learning_rate': 6.239439206562998e-07, 'epoch': 1697.53} + 42%|████▏ | 27500/65536 [4:42:56<6:24:32, 1.65it/s] 42%|████▏ | 27501/65536 [4:42:56<6:36:39, 1.60it/s] 42%|████▏ | 27502/65536 [4:42:57<6:31:19, 1.62it/s] 42%|████▏ | 27503/65536 [4:42:57<6:23:35, 1.65it/s] 42%|████▏ | 27504/65536 [4:42:58<6:24:30, 1.65it/s] 42%|████▏ | 27505/65536 [4:42:59<6:23:29, 1.65it/s] 42%|████▏ | 27506/65536 [4:42:59<6:24:06, 1.65it/s] 42%|████▏ | 27507/65536 [4:43:00<6:20:31, 1.67it/s] 42%|████▏ | 27508/65536 [4:43:00<6:28:23, 1.63it/s] 42%|████▏ | 27509/65536 [4:43:01<6:43:26, 1.57it/s] 42%|████▏ | 27510/65536 [4:43:02<6:36:54, 1.60it/s] 42%|████▏ | 27511/65536 [4:43:02<6:31:17, 1.62it/s] 42%|████▏ | 27512/65536 [4:43:03<6:40:38, 1.58it/s] 42%|████▏ | 27513/65536 [4:43:04<6:36:23, 1.60it/s] 42%|████▏ | 27514/65536 [4:43:04<6:35:20, 1.60it/s] 42%|████▏ | 27515/65536 [4:43:05<6:29:18, 1.63it/s] 42%|████▏ | 27516/65536 [4:43:05<6:31:19, 1.62it/s] 42%|████▏ | 27517/65536 [4:43:06<6:29:14, 1.63it/s] 42%|████▏ | 27518/65536 [4:43:07<6:34:09, 1.61it/s] 42%|████▏ | 27519/65536 [4:43:07<6:29:37, 1.63it/s] 42%|████▏ | 27520/65536 [4:43:08<6:25:40, 1.64it/s] {'loss': 1.7108, 'learning_rate': 6.236684216970737e-07, 'epoch': 1698.77} + 42%|████▏ | 27520/65536 [4:43:08<6:25:40, 1.64it/s] 42%|████▏ | 27521/65536 [4:43:08<6:20:13, 1.67it/s] 42%|████▏ | 27522/65536 [4:43:09<6:10:22, 1.71it/s] 42%|████▏ | 27523/65536 [4:43:10<6:12:12, 1.70it/s] 42%|████▏ | 27524/65536 [4:43:10<6:36:44, 1.60it/s] 42%|████▏ | 27525/65536 [4:43:11<6:29:48, 1.63it/s] 42%|████▏ | 27526/65536 [4:43:11<6:29:51, 1.62it/s] 42%|████▏ | 27527/65536 [4:43:12<6:31:07, 1.62it/s] 42%|████▏ | 27528/65536 [4:43:13<6:28:36, 1.63it/s] 42%|████▏ | 27529/65536 [4:43:13<6:25:36, 1.64it/s] 42%|████▏ | 27530/65536 [4:43:14<6:26:57, 1.64it/s] 42%|████▏ | 27531/65536 [4:43:15<6:24:15, 1.65it/s] 42%|████▏ | 27532/65536 [4:43:15<6:26:06, 1.64it/s] 42%|████▏ | 27533/65536 [4:43:16<6:29:59, 1.62it/s] 42%|████▏ | 27534/65536 [4:43:16<6:35:38, 1.60it/s] 42%|████▏ | 27535/65536 [4:43:17<6:26:54, 1.64it/s] 42%|████▏ | 27536/65536 [4:43:18<6:22:48, 1.65it/s] 42%|████▏ | 27537/65536 [4:43:18<6:26:20, 1.64it/s] 42%|████▏ | 27538/65536 [4:43:19<6:23:26, 1.65it/s] 42%|████▏ | 27539/65536 [4:43:19<6:15:35, 1.69it/s] 42%|████▏ | 27540/65536 [4:43:20<6:21:08, 1.66it/s] {'loss': 1.7702, 'learning_rate': 6.233929227378474e-07, 'epoch': 1700.0} + 42%|████▏ | 27540/65536 [4:43:20<6:21:08, 1.66it/s] 42%|████▏ | 27541/65536 [4:43:21<6:33:29, 1.61it/s] 42%|████▏ | 27542/65536 [4:43:21<6:26:07, 1.64it/s] 42%|████▏ | 27543/65536 [4:43:22<6:21:06, 1.66it/s] 42%|████▏ | 27544/65536 [4:43:22<6:23:11, 1.65it/s] 42%|████▏ | 27545/65536 [4:43:23<6:21:03, 1.66it/s] 42%|████▏ | 27546/65536 [4:43:24<6:22:29, 1.66it/s] 42%|████▏ | 27547/65536 [4:43:24<6:23:05, 1.65it/s] 42%|████▏ | 27548/65536 [4:43:25<6:22:16, 1.66it/s] 42%|████▏ | 27549/65536 [4:43:26<6:33:54, 1.61it/s] 42%|████▏ | 27550/65536 [4:43:26<6:35:32, 1.60it/s] 42%|████▏ | 27551/65536 [4:43:27<6:35:40, 1.60it/s] 42%|████▏ | 27552/65536 [4:43:27<6:44:03, 1.57it/s] 42%|████▏ | 27553/65536 [4:43:28<6:46:15, 1.56it/s] 42%|████▏ | 27554/65536 [4:43:29<6:44:27, 1.57it/s] 42%|████▏ | 27555/65536 [4:43:29<6:40:50, 1.58it/s] 42%|████▏ | 27556/65536 [4:43:30<6:32:08, 1.61it/s] 42%|████▏ | 27557/65536 [4:43:31<6:42:59, 1.57it/s] 42%|████▏ | 27558/65536 [4:43:31<6:35:20, 1.60it/s] 42%|████▏ | 27559/65536 [4:43:32<6:35:07, 1.60it/s] 42%|████▏ | 27560/65536 [4:43:32<6:36:36, 1.60it/s] {'loss': 1.7137, 'learning_rate': 6.231174237786213e-07, 'epoch': 1701.23} + 42%|████▏ | 27560/65536 [4:43:32<6:36:36, 1.60it/s] 42%|████▏ | 27561/65536 [4:43:33<6:35:44, 1.60it/s] 42%|████▏ | 27562/65536 [4:43:34<6:36:07, 1.60it/s] 42%|████▏ | 27563/65536 [4:43:34<6:31:16, 1.62it/s] 42%|████▏ | 27564/65536 [4:43:35<6:29:46, 1.62it/s] 42%|████▏ | 27565/65536 [4:43:36<6:30:07, 1.62it/s] 42%|████▏ | 27566/65536 [4:43:36<6:36:09, 1.60it/s] 42%|████▏ | 27567/65536 [4:43:37<6:25:31, 1.64it/s] 42%|████▏ | 27568/65536 [4:43:37<6:22:59, 1.65it/s] 42%|████▏ | 27569/65536 [4:43:38<6:25:14, 1.64it/s] 42%|████▏ | 27570/65536 [4:43:39<6:32:54, 1.61it/s] 42%|████▏ | 27571/65536 [4:43:39<6:26:27, 1.64it/s] 42%|████▏ | 27572/65536 [4:43:40<6:28:19, 1.63it/s] 42%|████▏ | 27573/65536 [4:43:41<6:50:29, 1.54it/s] 42%|████▏ | 27574/65536 [4:43:41<6:43:58, 1.57it/s] 42%|████▏ | 27575/65536 [4:43:42<6:39:13, 1.58it/s] 42%|████▏ | 27576/65536 [4:43:42<6:33:00, 1.61it/s] 42%|████▏ | 27577/65536 [4:43:43<6:33:54, 1.61it/s] 42%|████▏ | 27578/65536 [4:43:44<6:23:55, 1.65it/s] 42%|████▏ | 27579/65536 [4:43:44<6:19:20, 1.67it/s] 42%|████▏ | 27580/65536 [4:43:45<6:05:49, 1.73it/s] {'loss': 1.7127, 'learning_rate': 6.228419248193951e-07, 'epoch': 1702.47} + 42%|████▏ | 27580/65536 [4:43:45<6:05:49, 1.73it/s] 42%|████▏ | 27581/65536 [4:43:45<6:08:16, 1.72it/s] 42%|████▏ | 27582/65536 [4:43:46<6:15:28, 1.68it/s] 42%|████▏ | 27583/65536 [4:43:47<6:14:58, 1.69it/s] 42%|████▏ | 27584/65536 [4:43:47<6:23:54, 1.65it/s] 42%|████▏ | 27585/65536 [4:43:48<6:27:14, 1.63it/s] 42%|████▏ | 27586/65536 [4:43:48<6:18:53, 1.67it/s] 42%|████▏ | 27587/65536 [4:43:49<6:23:27, 1.65it/s] 42%|████▏ | 27588/65536 [4:43:50<6:40:15, 1.58it/s] 42%|████▏ | 27589/65536 [4:43:50<6:48:00, 1.55it/s] 42%|████▏ | 27590/65536 [4:43:51<6:44:55, 1.56it/s] 42%|████▏ | 27591/65536 [4:43:52<6:36:57, 1.59it/s] 42%|████▏ | 27592/65536 [4:43:52<6:28:36, 1.63it/s] 42%|████▏ | 27593/65536 [4:43:53<6:19:44, 1.67it/s] 42%|████▏ | 27594/65536 [4:43:53<6:20:57, 1.66it/s] 42%|████▏ | 27595/65536 [4:43:54<6:28:52, 1.63it/s] 42%|████▏ | 27596/65536 [4:43:55<6:33:44, 1.61it/s] 42%|████▏ | 27597/65536 [4:43:55<6:26:08, 1.64it/s] 42%|████▏ | 27598/65536 [4:43:56<6:41:22, 1.58it/s] 42%|████▏ | 27599/65536 [4:43:56<6:33:25, 1.61it/s] 42%|████▏ | 27600/65536 [4:43:57<6:28:36, 1.63it/s] {'loss': 1.7142, 'learning_rate': 6.225664258601689e-07, 'epoch': 1703.7} + 42%|████▏ | 27600/65536 [4:43:57<6:28:36, 1.63it/s] 42%|████▏ | 27601/65536 [4:43:58<6:30:11, 1.62it/s] 42%|████▏ | 27602/65536 [4:43:58<6:30:57, 1.62it/s] 42%|████▏ | 27603/65536 [4:43:59<6:30:36, 1.62it/s] 42%|████▏ | 27604/65536 [4:44:00<6:28:31, 1.63it/s] 42%|████▏ | 27605/65536 [4:44:00<6:43:13, 1.57it/s] 42%|████▏ | 27606/65536 [4:44:01<6:39:33, 1.58it/s] 42%|████▏ | 27607/65536 [4:44:01<6:29:24, 1.62it/s] 42%|████▏ | 27608/65536 [4:44:02<6:22:05, 1.65it/s] 42%|████▏ | 27609/65536 [4:44:03<6:22:36, 1.65it/s] 42%|████▏ | 27610/65536 [4:44:03<6:20:08, 1.66it/s] 42%|████▏ | 27611/65536 [4:44:04<6:25:44, 1.64it/s] 42%|████▏ | 27612/65536 [4:44:04<6:20:40, 1.66it/s] 42%|████▏ | 27613/65536 [4:44:05<6:24:38, 1.64it/s] 42%|████▏ | 27614/65536 [4:44:06<6:33:23, 1.61it/s] 42%|████▏ | 27615/65536 [4:44:06<6:37:22, 1.59it/s] 42%|████▏ | 27616/65536 [4:44:07<6:46:49, 1.55it/s] 42%|████▏ | 27617/65536 [4:44:08<6:29:28, 1.62it/s] 42%|████▏ | 27618/65536 [4:44:08<6:15:33, 1.68it/s] 42%|████▏ | 27619/65536 [4:44:09<6:17:02, 1.68it/s] 42%|████▏ | 27620/65536 [4:44:09<6:16:22, 1.68it/s] {'loss': 1.771, 'learning_rate': 6.222909269009427e-07, 'epoch': 1704.94} + 42%|████▏ | 27620/65536 [4:44:09<6:16:22, 1.68it/s] 42%|████▏ | 27621/65536 [4:44:10<6:21:18, 1.66it/s] 42%|████▏ | 27622/65536 [4:44:11<6:37:33, 1.59it/s] 42%|████▏ | 27623/65536 [4:44:11<6:29:04, 1.62it/s] 42%|████▏ | 27624/65536 [4:44:12<6:26:29, 1.63it/s] 42%|████▏ | 27625/65536 [4:44:12<6:35:44, 1.60it/s] 42%|████▏ | 27626/65536 [4:44:13<6:27:33, 1.63it/s] 42%|████▏ | 27627/65536 [4:44:14<6:25:38, 1.64it/s] 42%|████▏ | 27628/65536 [4:44:14<6:19:59, 1.66it/s] 42%|████▏ | 27629/65536 [4:44:15<6:18:03, 1.67it/s] 42%|████▏ | 27630/65536 [4:44:15<6:25:20, 1.64it/s] 42%|████▏ | 27631/65536 [4:44:16<6:26:10, 1.64it/s] 42%|████▏ | 27632/65536 [4:44:17<6:24:01, 1.65it/s] 42%|████▏ | 27633/65536 [4:44:17<6:26:28, 1.63it/s] 42%|████▏ | 27634/65536 [4:44:18<6:28:37, 1.63it/s] 42%|████▏ | 27635/65536 [4:44:19<6:32:00, 1.61it/s] 42%|████▏ | 27636/65536 [4:44:19<6:27:36, 1.63it/s] 42%|████▏ | 27637/65536 [4:44:20<6:33:54, 1.60it/s] 42%|████▏ | 27638/65536 [4:44:20<6:45:06, 1.56it/s] 42%|████▏ | 27639/65536 [4:44:21<6:46:17, 1.55it/s] 42%|████▏ | 27640/65536 [4:44:22<6:51:13, 1.54it/s] {'loss': 1.7219, 'learning_rate': 6.220154279417166e-07, 'epoch': 1706.17} + 42%|████▏ | 27640/65536 [4:44:22<6:51:13, 1.54it/s] 42%|████▏ | 27641/65536 [4:44:22<6:46:44, 1.55it/s] 42%|████▏ | 27642/65536 [4:44:23<6:28:45, 1.62it/s] 42%|████▏ | 27643/65536 [4:44:24<6:20:44, 1.66it/s] 42%|████▏ | 27644/65536 [4:44:24<6:19:03, 1.67it/s] 42%|████▏ | 27645/65536 [4:44:25<6:24:33, 1.64it/s] 42%|████▏ | 27646/65536 [4:44:25<6:31:19, 1.61it/s] 42%|████▏ | 27647/65536 [4:44:26<6:32:33, 1.61it/s] 42%|████▏ | 27648/65536 [4:44:27<6:55:30, 1.52it/s] 42%|████▏ | 27649/65536 [4:44:27<6:58:51, 1.51it/s] 42%|████▏ | 27650/65536 [4:44:28<6:46:06, 1.55it/s] 42%|████▏ | 27651/65536 [4:44:29<6:38:05, 1.59it/s] 42%|████▏ | 27652/65536 [4:44:29<6:31:38, 1.61it/s] 42%|████▏ | 27653/65536 [4:44:30<6:23:51, 1.64it/s] 42%|████▏ | 27654/65536 [4:44:31<6:39:43, 1.58it/s] 42%|████▏ | 27655/65536 [4:44:31<6:46:59, 1.55it/s] 42%|████▏ | 27656/65536 [4:44:32<6:39:44, 1.58it/s] 42%|████▏ | 27657/65536 [4:44:32<6:37:43, 1.59it/s] 42%|████▏ | 27658/65536 [4:44:33<6:33:55, 1.60it/s] 42%|████▏ | 27659/65536 [4:44:34<6:29:35, 1.62it/s] 42%|████▏ | 27660/65536 [4:44:34<6:22:16, 1.65it/s] {'loss': 1.7772, 'learning_rate': 6.217399289824905e-07, 'epoch': 1707.41} + 42%|████▏ | 27660/65536 [4:44:34<6:22:16, 1.65it/s] 42%|████▏ | 27661/65536 [4:44:35<6:31:28, 1.61it/s] 42%|████▏ | 27662/65536 [4:44:35<6:28:18, 1.63it/s] 42%|████▏ | 27663/65536 [4:44:36<6:25:21, 1.64it/s] 42%|████▏ | 27664/65536 [4:44:37<6:21:31, 1.65it/s] 42%|████▏ | 27665/65536 [4:44:37<6:34:24, 1.60it/s] 42%|████▏ | 27666/65536 [4:44:38<6:27:26, 1.63it/s] 42%|████▏ | 27667/65536 [4:44:39<6:25:21, 1.64it/s] 42%|████▏ | 27668/65536 [4:44:39<6:25:09, 1.64it/s] 42%|████▏ | 27669/65536 [4:44:40<6:29:50, 1.62it/s] 42%|████▏ | 27670/65536 [4:44:40<6:44:26, 1.56it/s] 42%|████▏ | 27671/65536 [4:44:41<6:39:02, 1.58it/s] 42%|████▏ | 27672/65536 [4:44:42<6:44:14, 1.56it/s] 42%|████▏ | 27673/65536 [4:44:42<6:38:52, 1.58it/s] 42%|████▏ | 27674/65536 [4:44:43<6:31:27, 1.61it/s] 42%|████▏ | 27675/65536 [4:44:44<6:30:18, 1.62it/s] 42%|████▏ | 27676/65536 [4:44:44<6:26:08, 1.63it/s] 42%|████▏ | 27677/65536 [4:44:45<6:33:11, 1.60it/s] 42%|████▏ | 27678/65536 [4:44:45<6:27:01, 1.63it/s] 42%|████▏ | 27679/65536 [4:44:46<6:25:50, 1.64it/s] 42%|████▏ | 27680/65536 [4:44:47<6:24:03, 1.64it/s] {'loss': 1.6827, 'learning_rate': 6.214644300232643e-07, 'epoch': 1708.64} + 42%|████▏ | 27680/65536 [4:44:47<6:24:03, 1.64it/s] 42%|████▏ | 27681/65536 [4:44:47<6:28:39, 1.62it/s] 42%|████▏ | 27682/65536 [4:44:48<6:26:48, 1.63it/s] 42%|████▏ | 27683/65536 [4:44:48<6:26:49, 1.63it/s] 42%|████▏ | 27684/65536 [4:44:49<6:34:06, 1.60it/s] 42%|████▏ | 27685/65536 [4:44:50<6:25:49, 1.64it/s] 42%|████▏ | 27686/65536 [4:44:50<6:32:49, 1.61it/s] 42%|████▏ | 27687/65536 [4:44:51<6:39:37, 1.58it/s] 42%|████▏ | 27688/65536 [4:44:52<6:34:34, 1.60it/s] 42%|████▏ | 27689/65536 [4:44:52<6:50:08, 1.54it/s] 42%|████▏ | 27690/65536 [4:44:53<6:40:16, 1.58it/s] 42%|████▏ | 27691/65536 [4:44:54<6:35:46, 1.59it/s] 42%|████▏ | 27692/65536 [4:44:54<6:26:10, 1.63it/s] 42%|████▏ | 27693/65536 [4:44:55<6:27:47, 1.63it/s] 42%|████▏ | 27694/65536 [4:44:55<6:25:25, 1.64it/s] 42%|████▏ | 27695/65536 [4:44:56<6:31:18, 1.61it/s] 42%|████▏ | 27696/65536 [4:44:57<6:20:54, 1.66it/s] 42%|████▏ | 27697/65536 [4:44:57<6:18:09, 1.67it/s] 42%|████▏ | 27698/65536 [4:44:58<6:25:10, 1.64it/s] 42%|████▏ | 27699/65536 [4:44:58<6:22:45, 1.65it/s] 42%|████▏ | 27700/65536 [4:44:59<6:32:56, 1.60it/s] {'loss': 1.7013, 'learning_rate': 6.211889310640382e-07, 'epoch': 1709.88} + 42%|████▏ | 27700/65536 [4:44:59<6:32:56, 1.60it/s] 42%|████▏ | 27701/65536 [4:45:00<6:31:06, 1.61it/s] 42%|████▏ | 27702/65536 [4:45:00<6:22:44, 1.65it/s] 42%|████▏ | 27703/65536 [4:45:01<6:33:33, 1.60it/s] 42%|████▏ | 27704/65536 [4:45:01<6:30:32, 1.61it/s] 42%|████▏ | 27705/65536 [4:45:02<6:21:53, 1.65it/s] 42%|████▏ | 27706/65536 [4:45:03<6:27:05, 1.63it/s] 42%|████▏ | 27707/65536 [4:45:03<6:24:51, 1.64it/s] 42%|████▏ | 27708/65536 [4:45:04<6:22:08, 1.65it/s] 42%|████▏ | 27709/65536 [4:45:05<6:27:53, 1.63it/s] 42%|████▏ | 27710/65536 [4:45:05<6:27:41, 1.63it/s] 42%|████▏ | 27711/65536 [4:45:06<6:27:20, 1.63it/s] 42%|████▏ | 27712/65536 [4:45:06<6:35:20, 1.59it/s] 42%|████▏ | 27713/65536 [4:45:07<6:27:47, 1.63it/s] 42%|████▏ | 27714/65536 [4:45:08<6:27:53, 1.63it/s] 42%|████▏ | 27715/65536 [4:45:08<6:45:11, 1.56it/s] 42%|████▏ | 27716/65536 [4:45:09<6:37:14, 1.59it/s] 42%|████▏ | 27717/65536 [4:45:10<6:35:38, 1.59it/s] 42%|████▏ | 27718/65536 [4:45:10<6:37:29, 1.59it/s] 42%|████▏ | 27719/65536 [4:45:11<6:39:23, 1.58it/s] 42%|████▏ | 27720/65536 [4:45:11<6:30:55, 1.61it/s] {'loss': 1.7148, 'learning_rate': 6.20913432104812e-07, 'epoch': 1711.11} + 42%|████▏ | 27720/65536 [4:45:11<6:30:55, 1.61it/s] 42%|████▏ | 27721/65536 [4:45:12<6:29:35, 1.62it/s] 42%|████▏ | 27722/65536 [4:45:13<6:33:46, 1.60it/s] 42%|████▏ | 27723/65536 [4:45:13<6:27:31, 1.63it/s] 42%|████▏ | 27724/65536 [4:45:14<6:29:40, 1.62it/s] 42%|████▏ | 27725/65536 [4:45:15<6:37:58, 1.58it/s] 42%|████▏ | 27726/65536 [4:45:15<6:40:26, 1.57it/s] 42%|████▏ | 27727/65536 [4:45:16<6:45:43, 1.55it/s] 42%|████▏ | 27728/65536 [4:45:16<6:31:55, 1.61it/s] 42%|████▏ | 27729/65536 [4:45:17<6:19:05, 1.66it/s] 42%|████▏ | 27730/65536 [4:45:18<6:17:27, 1.67it/s] 42%|████▏ | 27731/65536 [4:45:18<6:17:27, 1.67it/s] 42%|████▏ | 27732/65536 [4:45:19<6:19:40, 1.66it/s] 42%|████▏ | 27733/65536 [4:45:19<6:12:31, 1.69it/s] 42%|████▏ | 27734/65536 [4:45:20<6:21:55, 1.65it/s] 42%|████▏ | 27735/65536 [4:45:21<6:39:30, 1.58it/s] 42%|████▏ | 27736/65536 [4:45:21<6:32:12, 1.61it/s] 42%|████▏ | 27737/65536 [4:45:22<6:30:14, 1.61it/s] 42%|████▏ | 27738/65536 [4:45:22<6:20:20, 1.66it/s] 42%|████▏ | 27739/65536 [4:45:23<6:20:45, 1.65it/s] 42%|████▏ | 27740/65536 [4:45:24<6:19:33, 1.66it/s] {'loss': 1.7623, 'learning_rate': 6.206379331455859e-07, 'epoch': 1712.35} + 42%|████▏ | 27740/65536 [4:45:24<6:19:33, 1.66it/s] 42%|████▏ | 27741/65536 [4:45:24<6:20:48, 1.65it/s] 42%|████▏ | 27742/65536 [4:45:25<6:16:52, 1.67it/s] 42%|████▏ | 27743/65536 [4:45:25<6:13:42, 1.69it/s] 42%|████▏ | 27744/65536 [4:45:26<6:16:59, 1.67it/s] 42%|████▏ | 27745/65536 [4:45:27<6:24:05, 1.64it/s] 42%|████▏ | 27746/65536 [4:45:27<6:27:50, 1.62it/s] 42%|████▏ | 27747/65536 [4:45:28<6:44:20, 1.56it/s] 42%|████▏ | 27748/65536 [4:45:29<6:36:57, 1.59it/s] 42%|████▏ | 27749/65536 [4:45:29<6:32:51, 1.60it/s] 42%|████▏ | 27750/65536 [4:45:30<6:27:03, 1.63it/s] 42%|████▏ | 27751/65536 [4:45:31<6:43:21, 1.56it/s] 42%|████▏ | 27752/65536 [4:45:31<6:35:08, 1.59it/s] 42%|████▏ | 27753/65536 [4:45:32<6:39:41, 1.58it/s] 42%|████▏ | 27754/65536 [4:45:32<6:31:34, 1.61it/s] 42%|████▏ | 27755/65536 [4:45:33<6:25:05, 1.64it/s] 42%|████▏ | 27756/65536 [4:45:34<6:30:27, 1.61it/s] 42%|████▏ | 27757/65536 [4:45:34<6:25:48, 1.63it/s] 42%|████▏ | 27758/65536 [4:45:35<6:18:45, 1.66it/s] 42%|████▏ | 27759/65536 [4:45:35<6:18:10, 1.66it/s] 42%|████▏ | 27760/65536 [4:45:36<6:17:37, 1.67it/s] {'loss': 1.7249, 'learning_rate': 6.203624341863598e-07, 'epoch': 1713.58} + 42%|████▏ | 27760/65536 [4:45:36<6:17:37, 1.67it/s] 42%|████▏ | 27761/65536 [4:45:37<6:22:11, 1.65it/s] 42%|████▏ | 27762/65536 [4:45:37<6:22:34, 1.65it/s] 42%|████▏ | 27763/65536 [4:45:38<6:21:51, 1.65it/s] 42%|████▏ | 27764/65536 [4:45:38<6:26:28, 1.63it/s] 42%|████▏ | 27765/65536 [4:45:39<6:35:27, 1.59it/s] 42%|████▏ | 27766/65536 [4:45:40<6:39:31, 1.58it/s] 42%|████▏ | 27767/65536 [4:45:40<6:46:22, 1.55it/s] 42%|████▏ | 27768/65536 [4:45:41<6:38:26, 1.58it/s] 42%|████▏ | 27769/65536 [4:45:42<6:31:17, 1.61it/s] 42%|████▏ | 27770/65536 [4:45:42<6:30:06, 1.61it/s] 42%|████▏ | 27771/65536 [4:45:43<6:33:17, 1.60it/s] 42%|████▏ | 27772/65536 [4:45:43<6:33:06, 1.60it/s] 42%|████▏ | 27773/65536 [4:45:44<6:34:23, 1.60it/s] 42%|████▏ | 27774/65536 [4:45:45<6:24:33, 1.64it/s] 42%|████▏ | 27775/65536 [4:45:45<6:23:59, 1.64it/s] 42%|████▏ | 27776/65536 [4:45:46<6:27:10, 1.63it/s] 42%|████▏ | 27777/65536 [4:45:47<6:28:31, 1.62it/s] 42%|████▏ | 27778/65536 [4:45:47<6:23:06, 1.64it/s] 42%|████▏ | 27779/65536 [4:45:48<6:16:13, 1.67it/s] 42%|████▏ | 27780/65536 [4:45:48<6:13:46, 1.68it/s] {'loss': 1.7041, 'learning_rate': 6.200869352271336e-07, 'epoch': 1714.81} + 42%|████▏ | 27780/65536 [4:45:48<6:13:46, 1.68it/s] 42%|████▏ | 27781/65536 [4:45:49<6:27:20, 1.62it/s] 42%|████▏ | 27782/65536 [4:45:50<6:25:13, 1.63it/s] 42%|████▏ | 27783/65536 [4:45:50<6:27:07, 1.63it/s] 42%|████▏ | 27784/65536 [4:45:51<6:43:02, 1.56it/s] 42%|████▏ | 27785/65536 [4:45:52<6:45:27, 1.55it/s] 42%|████▏ | 27786/65536 [4:45:52<6:40:32, 1.57it/s] 42%|████▏ | 27787/65536 [4:45:53<6:27:43, 1.62it/s] 42%|████▏ | 27788/65536 [4:45:53<6:24:24, 1.64it/s] 42%|████▏ | 27789/65536 [4:45:54<6:22:08, 1.65it/s] 42%|████▏ | 27790/65536 [4:45:54<6:18:23, 1.66it/s] 42%|████▏ | 27791/65536 [4:45:55<6:24:03, 1.64it/s] 42%|████▏ | 27792/65536 [4:45:56<6:27:33, 1.62it/s] 42%|████▏ | 27793/65536 [4:45:56<6:19:07, 1.66it/s] 42%|████▏ | 27794/65536 [4:45:57<6:12:34, 1.69it/s] 42%|████▏ | 27795/65536 [4:45:57<6:14:49, 1.68it/s] 42%|████▏ | 27796/65536 [4:45:58<6:31:48, 1.61it/s] 42%|████▏ | 27797/65536 [4:45:59<6:33:32, 1.60it/s] 42%|████▏ | 27798/65536 [4:45:59<6:34:25, 1.59it/s] 42%|████▏ | 27799/65536 [4:46:00<6:31:38, 1.61it/s] 42%|████▏ | 27800/65536 [4:46:01<6:48:24, 1.54it/s] {'loss': 1.6833, 'learning_rate': 6.198114362679074e-07, 'epoch': 1716.05} + 42%|████▏ | 27800/65536 [4:46:01<6:48:24, 1.54it/s] 42%|████▏ | 27801/65536 [4:46:01<6:40:34, 1.57it/s] 42%|████▏ | 27802/65536 [4:46:02<6:32:24, 1.60it/s] 42%|████▏ | 27803/65536 [4:46:03<6:36:09, 1.59it/s] 42%|████▏ | 27804/65536 [4:46:03<6:49:03, 1.54it/s] 42%|████▏ | 27805/65536 [4:46:04<6:33:33, 1.60it/s] 42%|████▏ | 27806/65536 [4:46:04<6:23:39, 1.64it/s] 42%|████▏ | 27807/65536 [4:46:05<6:21:17, 1.65it/s] 42%|████▏ | 27808/65536 [4:46:06<6:16:28, 1.67it/s] 42%|████▏ | 27809/65536 [4:46:06<6:10:41, 1.70it/s] 42%|████▏ | 27810/65536 [4:46:07<6:23:04, 1.64it/s] 42%|████▏ | 27811/65536 [4:46:07<6:19:45, 1.66it/s] 42%|████▏ | 27812/65536 [4:46:08<6:24:42, 1.63it/s] 42%|████▏ | 27813/65536 [4:46:09<6:19:05, 1.66it/s] 42%|████▏ | 27814/65536 [4:46:09<6:21:15, 1.65it/s] 42%|████▏ | 27815/65536 [4:46:10<6:26:45, 1.63it/s] 42%|████▏ | 27816/65536 [4:46:11<6:46:57, 1.54it/s] 42%|████▏ | 27817/65536 [4:46:11<6:43:12, 1.56it/s] 42%|████▏ | 27818/65536 [4:46:12<6:37:35, 1.58it/s] 42%|████▏ | 27819/65536 [4:46:12<6:28:47, 1.62it/s] 42%|████▏ | 27820/65536 [4:46:13<6:22:20, 1.64it/s] {'loss': 1.7555, 'learning_rate': 6.195359373086812e-07, 'epoch': 1717.28} + 42%|████▏ | 27820/65536 [4:46:13<6:22:20, 1.64it/s] 42%|████▏ | 27821/65536 [4:46:14<6:25:35, 1.63it/s] 42%|████▏ | 27822/65536 [4:46:14<6:32:41, 1.60it/s] 42%|████▏ | 27823/65536 [4:46:15<6:25:55, 1.63it/s] 42%|████▏ | 27824/65536 [4:46:15<6:20:07, 1.65it/s] 42%|████▏ | 27825/65536 [4:46:16<6:26:39, 1.63it/s] 42%|████▏ | 27826/65536 [4:46:17<6:19:40, 1.66it/s] 42%|████▏ | 27827/65536 [4:46:17<6:23:17, 1.64it/s] 42%|████▏ | 27828/65536 [4:46:18<6:23:31, 1.64it/s] 42%|████▏ | 27829/65536 [4:46:19<6:17:01, 1.67it/s] 42%|████▏ | 27830/65536 [4:46:19<6:17:29, 1.66it/s] 42%|████▏ | 27831/65536 [4:46:20<6:26:04, 1.63it/s] 42%|████▏ | 27832/65536 [4:46:20<6:33:40, 1.60it/s] 42%|████▏ | 27833/65536 [4:46:21<6:35:16, 1.59it/s] 42%|████▏ | 27834/65536 [4:46:22<6:15:24, 1.67it/s] 42%|████▏ | 27835/65536 [4:46:22<6:28:36, 1.62it/s] 42%|████▏ | 27836/65536 [4:46:23<6:24:46, 1.63it/s] 42%|████▏ | 27837/65536 [4:46:23<6:19:23, 1.66it/s] 42%|████▏ | 27838/65536 [4:46:24<6:19:41, 1.65it/s] 42%|████▏ | 27839/65536 [4:46:25<6:27:05, 1.62it/s] 42%|████▏ | 27840/65536 [4:46:25<6:36:48, 1.58it/s] {'loss': 1.6939, 'learning_rate': 6.192604383494551e-07, 'epoch': 1718.52} + 42%|████▏ | 27840/65536 [4:46:25<6:36:48, 1.58it/s] 42%|████▏ | 27841/65536 [4:46:26<6:36:48, 1.58it/s] 42%|████▏ | 27842/65536 [4:46:27<6:36:04, 1.59it/s] 42%|████▏ | 27843/65536 [4:46:27<6:23:54, 1.64it/s] 42%|████▏ | 27844/65536 [4:46:28<6:18:58, 1.66it/s] 42%|████▏ | 27845/65536 [4:46:28<6:13:43, 1.68it/s] 42%|████▏ | 27846/65536 [4:46:29<6:19:16, 1.66it/s] 42%|████▏ | 27847/65536 [4:46:30<6:22:31, 1.64it/s] 42%|████▏ | 27848/65536 [4:46:30<6:40:26, 1.57it/s] 42%|████▏ | 27849/65536 [4:46:31<6:34:57, 1.59it/s] 42%|████▏ | 27850/65536 [4:46:31<6:25:28, 1.63it/s] 42%|████▏ | 27851/65536 [4:46:32<6:24:43, 1.63it/s] 42%|████▏ | 27852/65536 [4:46:33<6:25:32, 1.63it/s] 43%|████▎ | 27853/65536 [4:46:33<6:28:52, 1.62it/s] 43%|████▎ | 27854/65536 [4:46:34<6:22:00, 1.64it/s] 43%|████▎ | 27855/65536 [4:46:35<6:24:20, 1.63it/s] 43%|████▎ | 27856/65536 [4:46:35<6:33:10, 1.60it/s] 43%|████▎ | 27857/65536 [4:46:36<6:36:17, 1.58it/s] 43%|████▎ | 27858/65536 [4:46:37<6:45:47, 1.55it/s] 43%|████▎ | 27859/65536 [4:46:37<6:40:19, 1.57it/s] 43%|████▎ | 27860/65536 [4:46:38<6:31:07, 1.61it/s] {'loss': 1.6779, 'learning_rate': 6.18984939390229e-07, 'epoch': 1719.75} + 43%|████▎ | 27860/65536 [4:46:38<6:31:07, 1.61it/s] 43%|████▎ | 27861/65536 [4:46:38<6:28:36, 1.62it/s] 43%|████▎ | 27862/65536 [4:46:39<6:24:49, 1.63it/s] 43%|████▎ | 27863/65536 [4:46:39<6:15:19, 1.67it/s] 43%|████▎ | 27864/65536 [4:46:40<6:09:32, 1.70it/s] 43%|████▎ | 27865/65536 [4:46:41<6:19:09, 1.66it/s] 43%|████▎ | 27866/65536 [4:46:41<6:20:57, 1.65it/s] 43%|████▎ | 27867/65536 [4:46:42<6:27:54, 1.62it/s] 43%|████▎ | 27868/65536 [4:46:43<6:26:59, 1.62it/s] 43%|████▎ | 27869/65536 [4:46:43<6:25:53, 1.63it/s] 43%|████▎ | 27870/65536 [4:46:44<6:29:14, 1.61it/s] 43%|████▎ | 27871/65536 [4:46:44<6:28:44, 1.61it/s] 43%|████▎ | 27872/65536 [4:46:45<6:27:41, 1.62it/s] 43%|████▎ | 27873/65536 [4:46:46<6:28:38, 1.62it/s] 43%|████▎ | 27874/65536 [4:46:46<6:26:01, 1.63it/s] 43%|████▎ | 27875/65536 [4:46:47<6:27:26, 1.62it/s] 43%|████▎ | 27876/65536 [4:46:48<6:37:12, 1.58it/s] 43%|████▎ | 27877/65536 [4:46:48<6:29:06, 1.61it/s] 43%|████▎ | 27878/65536 [4:46:49<6:25:47, 1.63it/s] 43%|████▎ | 27879/65536 [4:46:49<6:19:08, 1.66it/s] 43%|████▎ | 27880/65536 [4:46:50<6:12:49, 1.68it/s] {'loss': 1.7628, 'learning_rate': 6.187094404310027e-07, 'epoch': 1720.99} + 43%|████▎ | 27880/65536 [4:46:50<6:12:49, 1.68it/s] 43%|████▎ | 27881/65536 [4:46:51<6:28:30, 1.62it/s] 43%|████▎ | 27882/65536 [4:46:51<6:33:41, 1.59it/s] 43%|████▎ | 27883/65536 [4:46:52<6:49:30, 1.53it/s] 43%|████▎ | 27884/65536 [4:46:53<6:32:06, 1.60it/s] 43%|████▎ | 27885/65536 [4:46:53<6:27:24, 1.62it/s] 43%|████▎ | 27886/65536 [4:46:54<6:18:54, 1.66it/s] 43%|████▎ | 27887/65536 [4:46:54<6:10:30, 1.69it/s] 43%|████▎ | 27888/65536 [4:46:55<6:09:47, 1.70it/s] 43%|████▎ | 27889/65536 [4:46:55<6:13:43, 1.68it/s] 43%|████▎ | 27890/65536 [4:46:56<6:22:21, 1.64it/s] 43%|████▎ | 27891/65536 [4:46:57<6:27:24, 1.62it/s] 43%|████▎ | 27892/65536 [4:46:57<6:18:36, 1.66it/s] 43%|████▎ | 27893/65536 [4:46:58<6:19:23, 1.65it/s] 43%|████▎ | 27894/65536 [4:46:59<6:28:43, 1.61it/s] 43%|████▎ | 27895/65536 [4:46:59<6:22:10, 1.64it/s] 43%|████▎ | 27896/65536 [4:47:00<6:16:48, 1.66it/s] 43%|████▎ | 27897/65536 [4:47:00<6:37:32, 1.58it/s] 43%|████▎ | 27898/65536 [4:47:01<6:32:02, 1.60it/s] 43%|████▎ | 27899/65536 [4:47:02<6:25:14, 1.63it/s] 43%|████▎ | 27900/65536 [4:47:02<6:26:12, 1.62it/s] {'loss': 1.7266, 'learning_rate': 6.184339414717766e-07, 'epoch': 1722.22} + 43%|████▎ | 27900/65536 [4:47:02<6:26:12, 1.62it/s] 43%|████▎ | 27901/65536 [4:47:03<6:25:12, 1.63it/s] 43%|████▎ | 27902/65536 [4:47:03<6:17:36, 1.66it/s] 43%|████▎ | 27903/65536 [4:47:04<6:24:12, 1.63it/s] 43%|████▎ | 27904/65536 [4:47:05<6:20:25, 1.65it/s] 43%|████▎ | 27905/65536 [4:47:05<6:21:14, 1.65it/s] 43%|████▎ | 27906/65536 [4:47:06<6:18:51, 1.66it/s] 43%|████▎ | 27907/65536 [4:47:06<6:14:47, 1.67it/s] 43%|████▎ | 27908/65536 [4:47:07<6:31:30, 1.60it/s] 43%|████▎ | 27909/65536 [4:47:08<6:30:27, 1.61it/s] 43%|████▎ | 27910/65536 [4:47:08<6:20:35, 1.65it/s] 43%|████▎ | 27911/65536 [4:47:09<6:29:55, 1.61it/s] 43%|████▎ | 27912/65536 [4:47:10<6:31:46, 1.60it/s] 43%|████▎ | 27913/65536 [4:47:10<6:48:28, 1.54it/s] 43%|████▎ | 27914/65536 [4:47:11<6:31:07, 1.60it/s] 43%|████▎ | 27915/65536 [4:47:11<6:29:41, 1.61it/s] 43%|████▎ | 27916/65536 [4:47:12<6:31:49, 1.60it/s] 43%|████▎ | 27917/65536 [4:47:13<6:24:51, 1.63it/s] 43%|████▎ | 27918/65536 [4:47:13<6:22:55, 1.64it/s] 43%|████▎ | 27919/65536 [4:47:14<6:34:48, 1.59it/s] 43%|████▎ | 27920/65536 [4:47:15<6:35:44, 1.58it/s] {'loss': 1.7034, 'learning_rate': 6.181584425125504e-07, 'epoch': 1723.46} + 43%|████▎ | 27920/65536 [4:47:15<6:35:44, 1.58it/s] 43%|████▎ | 27921/65536 [4:47:15<6:36:09, 1.58it/s] 43%|████▎ | 27922/65536 [4:47:16<6:39:47, 1.57it/s] 43%|████▎ | 27923/65536 [4:47:17<6:32:34, 1.60it/s] 43%|████▎ | 27924/65536 [4:47:17<6:24:30, 1.63it/s] 43%|████▎ | 27925/65536 [4:47:18<6:12:33, 1.68it/s] 43%|████▎ | 27926/65536 [4:47:18<6:13:31, 1.68it/s] 43%|████▎ | 27927/65536 [4:47:19<6:11:14, 1.69it/s] 43%|████▎ | 27928/65536 [4:47:19<6:16:02, 1.67it/s] 43%|████▎ | 27929/65536 [4:47:20<6:27:05, 1.62it/s] 43%|████▎ | 27930/65536 [4:47:21<6:24:56, 1.63it/s] 43%|████▎ | 27931/65536 [4:47:21<6:31:46, 1.60it/s] 43%|████▎ | 27932/65536 [4:47:22<6:29:45, 1.61it/s] 43%|████▎ | 27933/65536 [4:47:23<6:27:20, 1.62it/s] 43%|████▎ | 27934/65536 [4:47:23<6:21:58, 1.64it/s] 43%|████▎ | 27935/65536 [4:47:24<6:14:06, 1.68it/s] 43%|████▎ | 27936/65536 [4:47:24<6:17:08, 1.66it/s] 43%|████▎ | 27937/65536 [4:47:25<6:19:01, 1.65it/s] 43%|████▎ | 27938/65536 [4:47:26<6:29:28, 1.61it/s] 43%|████▎ | 27939/65536 [4:47:26<6:25:42, 1.62it/s] 43%|████▎ | 27940/65536 [4:47:27<6:33:01, 1.59it/s] {'loss': 1.758, 'learning_rate': 6.178829435533243e-07, 'epoch': 1724.69} + 43%|████▎ | 27940/65536 [4:47:27<6:33:01, 1.59it/s] 43%|████▎ | 27941/65536 [4:47:27<6:31:54, 1.60it/s] 43%|████▎ | 27942/65536 [4:47:28<6:29:19, 1.61it/s] 43%|████▎ | 27943/65536 [4:47:29<6:29:16, 1.61it/s] 43%|████▎ | 27944/65536 [4:47:29<6:17:39, 1.66it/s] 43%|████▎ | 27945/65536 [4:47:30<6:12:13, 1.68it/s] 43%|████▎ | 27946/65536 [4:47:31<6:29:17, 1.61it/s] 43%|████▎ | 27947/65536 [4:47:31<6:26:24, 1.62it/s] 43%|████▎ | 27948/65536 [4:47:32<6:31:09, 1.60it/s] 43%|████▎ | 27949/65536 [4:47:32<6:21:18, 1.64it/s] 43%|████▎ | 27950/65536 [4:47:33<6:25:34, 1.62it/s] 43%|████▎ | 27951/65536 [4:47:34<6:18:40, 1.65it/s] 43%|████▎ | 27952/65536 [4:47:34<6:25:41, 1.62it/s] 43%|████▎ | 27953/65536 [4:47:35<6:19:03, 1.65it/s] 43%|████▎ | 27954/65536 [4:47:35<6:14:04, 1.67it/s] 43%|████▎ | 27955/65536 [4:47:36<6:15:45, 1.67it/s] 43%|████▎ | 27956/65536 [4:47:37<6:28:46, 1.61it/s] 43%|████▎ | 27957/65536 [4:47:37<6:16:08, 1.67it/s] 43%|████▎ | 27958/65536 [4:47:38<6:27:06, 1.62it/s] 43%|████▎ | 27959/65536 [4:47:39<6:37:09, 1.58it/s] 43%|████▎ | 27960/65536 [4:47:39<6:36:23, 1.58it/s] {'loss': 1.7253, 'learning_rate': 6.176074445940982e-07, 'epoch': 1725.93} + 43%|████▎ | 27960/65536 [4:47:39<6:36:23, 1.58it/s] 43%|████▎ | 27961/65536 [4:47:40<6:24:53, 1.63it/s] 43%|████▎ | 27962/65536 [4:47:40<6:31:57, 1.60it/s] 43%|████▎ | 27963/65536 [4:47:41<6:16:47, 1.66it/s] 43%|████▎ | 27964/65536 [4:47:42<6:10:31, 1.69it/s] 43%|████▎ | 27965/65536 [4:47:42<6:09:15, 1.70it/s] 43%|████▎ | 27966/65536 [4:47:43<6:13:03, 1.68it/s] 43%|████▎ | 27967/65536 [4:47:43<6:16:04, 1.66it/s] 43%|████▎ | 27968/65536 [4:47:44<6:28:31, 1.61it/s] 43%|████▎ | 27969/65536 [4:47:45<6:28:55, 1.61it/s] 43%|████▎ | 27970/65536 [4:47:45<6:20:17, 1.65it/s] 43%|████▎ | 27971/65536 [4:47:46<6:15:40, 1.67it/s] 43%|████▎ | 27972/65536 [4:47:46<6:12:37, 1.68it/s] 43%|████▎ | 27973/65536 [4:47:47<6:22:15, 1.64it/s] 43%|████▎ | 27974/65536 [4:47:48<6:14:21, 1.67it/s] 43%|████▎ | 27975/65536 [4:47:48<6:21:42, 1.64it/s] 43%|████▎ | 27976/65536 [4:47:49<6:36:20, 1.58it/s] 43%|████▎ | 27977/65536 [4:47:49<6:30:44, 1.60it/s] 43%|████▎ | 27978/65536 [4:47:50<6:36:57, 1.58it/s] 43%|████▎ | 27979/65536 [4:47:51<6:25:54, 1.62it/s] 43%|████▎ | 27980/65536 [4:47:51<6:20:34, 1.64it/s] {'loss': 1.7962, 'learning_rate': 6.17331945634872e-07, 'epoch': 1727.16} + 43%|████▎ | 27980/65536 [4:47:51<6:20:34, 1.64it/s] 43%|████▎ | 27981/65536 [4:47:52<6:27:19, 1.62it/s] 43%|████▎ | 27982/65536 [4:47:53<6:27:09, 1.62it/s] 43%|████▎ | 27983/65536 [4:47:53<6:33:51, 1.59it/s] 43%|████▎ | 27984/65536 [4:47:54<6:21:30, 1.64it/s] 43%|████▎ | 27985/65536 [4:47:54<6:24:26, 1.63it/s] 43%|████▎ | 27986/65536 [4:47:55<6:17:41, 1.66it/s] 43%|████▎ | 27987/65536 [4:47:56<6:25:57, 1.62it/s] 43%|████▎ | 27988/65536 [4:47:56<6:18:24, 1.65it/s] 43%|████▎ | 27989/65536 [4:47:57<6:14:05, 1.67it/s] 43%|████▎ | 27990/65536 [4:47:57<6:19:43, 1.65it/s] 43%|████▎ | 27991/65536 [4:47:58<6:20:59, 1.64it/s] 43%|████▎ | 27992/65536 [4:47:59<6:23:48, 1.63it/s] 43%|████▎ | 27993/65536 [4:47:59<6:25:11, 1.62it/s] 43%|████▎ | 27994/65536 [4:48:00<6:35:50, 1.58it/s] 43%|████▎ | 27995/65536 [4:48:01<6:37:28, 1.57it/s] 43%|████▎ | 27996/65536 [4:48:01<6:40:27, 1.56it/s] 43%|████▎ | 27997/65536 [4:48:02<6:36:09, 1.58it/s] 43%|████▎ | 27998/65536 [4:48:02<6:31:35, 1.60it/s] 43%|████▎ | 27999/65536 [4:48:03<6:39:00, 1.57it/s] 43%|████▎ | 28000/65536 [4:48:04<6:19:43, 1.65it/s] {'loss': 1.7053, 'learning_rate': 6.170564466756459e-07, 'epoch': 1728.4} + 43%|████▎ | 28000/65536 [4:48:04<6:19:43, 1.65it/s] 43%|████▎ | 28001/65536 [4:48:04<6:28:08, 1.61it/s] 43%|████▎ | 28002/65536 [4:48:05<6:18:46, 1.65it/s] 43%|████▎ | 28003/65536 [4:48:05<6:13:28, 1.67it/s] 43%|████▎ | 28004/65536 [4:48:06<6:26:11, 1.62it/s] 43%|████▎ | 28005/65536 [4:48:07<6:21:36, 1.64it/s] 43%|████▎ | 28006/65536 [4:48:07<6:17:44, 1.66it/s] 43%|████▎ | 28007/65536 [4:48:08<6:19:11, 1.65it/s] 43%|████▎ | 28008/65536 [4:48:09<6:18:38, 1.65it/s] 43%|████▎ | 28009/65536 [4:48:09<6:20:48, 1.64it/s] 43%|████▎ | 28010/65536 [4:48:10<6:25:50, 1.62it/s] 43%|████▎ | 28011/65536 [4:48:10<6:26:00, 1.62it/s] 43%|████▎ | 28012/65536 [4:48:11<6:15:38, 1.66it/s] 43%|████▎ | 28013/65536 [4:48:12<6:08:47, 1.70it/s] 43%|████▎ | 28014/65536 [4:48:12<6:17:51, 1.66it/s] 43%|████▎ | 28015/65536 [4:48:13<6:12:28, 1.68it/s] 43%|████▎ | 28016/65536 [4:48:13<6:10:45, 1.69it/s] 43%|████▎ | 28017/65536 [4:48:14<6:11:13, 1.68it/s] 43%|████▎ | 28018/65536 [4:48:14<6:05:16, 1.71it/s] 43%|████▎ | 28019/65536 [4:48:15<6:24:00, 1.63it/s] 43%|████▎ | 28020/65536 [4:48:16<6:29:49, 1.60it/s] {'loss': 1.7582, 'learning_rate': 6.167809477164197e-07, 'epoch': 1729.63} + 43%|████▎ | 28020/65536 [4:48:16<6:29:49, 1.60it/s] 43%|████▎ | 28021/65536 [4:48:16<6:19:53, 1.65it/s] 43%|████▎ | 28022/65536 [4:48:17<6:20:47, 1.64it/s] 43%|████▎ | 28023/65536 [4:48:18<6:27:04, 1.62it/s] 43%|████▎ | 28024/65536 [4:48:18<6:23:43, 1.63it/s] 43%|████▎ | 28025/65536 [4:48:19<6:23:09, 1.63it/s] 43%|████▎ | 28026/65536 [4:48:20<6:35:43, 1.58it/s] 43%|████▎ | 28027/65536 [4:48:20<6:46:58, 1.54it/s] 43%|████▎ | 28028/65536 [4:48:21<6:33:42, 1.59it/s] 43%|████▎ | 28029/65536 [4:48:21<6:36:58, 1.57it/s] 43%|████▎ | 28030/65536 [4:48:22<6:25:05, 1.62it/s] 43%|████▎ | 28031/65536 [4:48:23<6:21:37, 1.64it/s] 43%|████▎ | 28032/65536 [4:48:23<6:19:34, 1.65it/s] 43%|████▎ | 28033/65536 [4:48:24<6:15:51, 1.66it/s] 43%|████▎ | 28034/65536 [4:48:24<6:16:49, 1.66it/s] 43%|████▎ | 28035/65536 [4:48:25<6:21:14, 1.64it/s] 43%|████▎ | 28036/65536 [4:48:26<6:21:19, 1.64it/s] 43%|████▎ | 28037/65536 [4:48:26<6:22:09, 1.64it/s] 43%|████▎ | 28038/65536 [4:48:27<6:24:45, 1.62it/s] 43%|████▎ | 28039/65536 [4:48:28<6:35:42, 1.58it/s] 43%|████▎ | 28040/65536 [4:48:28<6:23:12, 1.63it/s] {'loss': 1.7062, 'learning_rate': 6.165054487571936e-07, 'epoch': 1730.86} + 43%|████▎ | 28040/65536 [4:48:28<6:23:12, 1.63it/s] 43%|████▎ | 28041/65536 [4:48:29<6:18:58, 1.65it/s] 43%|████▎ | 28042/65536 [4:48:29<6:21:07, 1.64it/s] 43%|████▎ | 28043/65536 [4:48:30<6:29:27, 1.60it/s] 43%|████▎ | 28044/65536 [4:48:31<6:26:13, 1.62it/s] 43%|████▎ | 28045/65536 [4:48:31<6:20:26, 1.64it/s] 43%|████▎ | 28046/65536 [4:48:32<6:21:41, 1.64it/s] 43%|████▎ | 28047/65536 [4:48:32<6:21:13, 1.64it/s] 43%|████▎ | 28048/65536 [4:48:33<6:28:15, 1.61it/s] 43%|████▎ | 28049/65536 [4:48:34<6:23:37, 1.63it/s] 43%|████▎ | 28050/65536 [4:48:34<6:13:23, 1.67it/s] 43%|████▎ | 28051/65536 [4:48:35<6:17:48, 1.65it/s] 43%|████▎ | 28052/65536 [4:48:35<6:19:01, 1.65it/s] 43%|████▎ | 28053/65536 [4:48:36<6:17:50, 1.65it/s] 43%|████▎ | 28054/65536 [4:48:37<6:17:58, 1.65it/s] 43%|████▎ | 28055/65536 [4:48:37<6:28:43, 1.61it/s] 43%|████▎ | 28056/65536 [4:48:38<6:23:22, 1.63it/s] 43%|████▎ | 28057/65536 [4:48:39<6:37:17, 1.57it/s] 43%|████▎ | 28058/65536 [4:48:39<6:27:40, 1.61it/s] 43%|████▎ | 28059/65536 [4:48:40<6:42:09, 1.55it/s] 43%|████▎ | 28060/65536 [4:48:40<6:32:27, 1.59it/s] {'loss': 1.7443, 'learning_rate': 6.162299497979674e-07, 'epoch': 1732.1} + 43%|████▎ | 28060/65536 [4:48:40<6:32:27, 1.59it/s] 43%|████▎ | 28061/65536 [4:48:41<6:22:53, 1.63it/s] 43%|████▎ | 28062/65536 [4:48:42<6:34:16, 1.58it/s] 43%|████▎ | 28063/65536 [4:48:42<6:25:48, 1.62it/s] 43%|████▎ | 28064/65536 [4:48:43<6:19:52, 1.64it/s] 43%|████▎ | 28065/65536 [4:48:44<6:35:31, 1.58it/s] 43%|████▎ | 28066/65536 [4:48:44<6:28:45, 1.61it/s] 43%|████▎ | 28067/65536 [4:48:45<6:24:56, 1.62it/s] 43%|████▎ | 28068/65536 [4:48:45<6:16:22, 1.66it/s] 43%|████▎ | 28069/65536 [4:48:46<6:17:34, 1.65it/s] 43%|████▎ | 28070/65536 [4:48:47<6:10:12, 1.69it/s] 43%|████▎ | 28071/65536 [4:48:47<6:24:45, 1.62it/s] 43%|████▎ | 28072/65536 [4:48:48<6:19:47, 1.64it/s] 43%|████▎ | 28073/65536 [4:48:48<6:20:06, 1.64it/s] 43%|████▎ | 28074/65536 [4:48:49<6:11:06, 1.68it/s] 43%|████▎ | 28075/65536 [4:48:50<6:27:46, 1.61it/s] 43%|████▎ | 28076/65536 [4:48:50<6:19:19, 1.65it/s] 43%|████▎ | 28077/65536 [4:48:51<6:19:42, 1.64it/s] 43%|████▎ | 28078/65536 [4:48:51<6:13:26, 1.67it/s] 43%|████▎ | 28079/65536 [4:48:52<6:06:13, 1.70it/s] 43%|████▎ | 28080/65536 [4:48:53<6:14:55, 1.67it/s] {'loss': 1.7365, 'learning_rate': 6.159544508387412e-07, 'epoch': 1733.33} + 43%|████▎ | 28080/65536 [4:48:53<6:14:55, 1.67it/s] 43%|████▎ | 28081/65536 [4:48:53<6:19:33, 1.64it/s] 43%|████▎ | 28082/65536 [4:48:54<6:24:31, 1.62it/s] 43%|████▎ | 28083/65536 [4:48:54<6:23:32, 1.63it/s] 43%|████▎ | 28084/65536 [4:48:55<6:24:00, 1.63it/s] 43%|████▎ | 28085/65536 [4:48:56<6:25:26, 1.62it/s] 43%|████▎ | 28086/65536 [4:48:56<6:34:33, 1.58it/s] 43%|████▎ | 28087/65536 [4:48:57<6:34:02, 1.58it/s] 43%|████▎ | 28088/65536 [4:48:58<6:24:30, 1.62it/s] 43%|████▎ | 28089/65536 [4:48:58<6:23:24, 1.63it/s] 43%|████▎ | 28090/65536 [4:48:59<6:19:39, 1.64it/s] 43%|████▎ | 28091/65536 [4:48:59<6:35:27, 1.58it/s] 43%|████▎ | 28092/65536 [4:49:00<6:38:23, 1.57it/s] 43%|████▎ | 28093/65536 [4:49:01<6:35:40, 1.58it/s] 43%|████▎ | 28094/65536 [4:49:01<6:31:20, 1.59it/s] 43%|████▎ | 28095/65536 [4:49:02<6:25:59, 1.62it/s] 43%|████▎ | 28096/65536 [4:49:03<6:22:16, 1.63it/s] 43%|████▎ | 28097/65536 [4:49:03<6:19:22, 1.64it/s] 43%|████▎ | 28098/65536 [4:49:04<6:09:08, 1.69it/s] 43%|████▎ | 28099/65536 [4:49:04<6:20:14, 1.64it/s] 43%|████▎ | 28100/65536 [4:49:05<6:28:03, 1.61it/s] {'loss': 1.6809, 'learning_rate': 6.15678951879515e-07, 'epoch': 1734.57} + 43%|████▎ | 28100/65536 [4:49:05<6:28:03, 1.61it/s] 43%|████▎ | 28101/65536 [4:49:06<6:29:13, 1.60it/s] 43%|████▎ | 28102/65536 [4:49:06<6:24:34, 1.62it/s] 43%|████▎ | 28103/65536 [4:49:07<6:23:07, 1.63it/s] 43%|████▎ | 28104/65536 [4:49:07<6:24:24, 1.62it/s] 43%|████▎ | 28105/65536 [4:49:08<6:17:44, 1.65it/s] 43%|████▎ | 28106/65536 [4:49:09<6:23:04, 1.63it/s] 43%|████▎ | 28107/65536 [4:49:09<6:16:08, 1.66it/s] 43%|████▎ | 28108/65536 [4:49:10<6:26:17, 1.61it/s] 43%|████▎ | 28109/65536 [4:49:11<6:32:33, 1.59it/s] 43%|████▎ | 28110/65536 [4:49:11<6:35:01, 1.58it/s] 43%|████▎ | 28111/65536 [4:49:12<6:25:29, 1.62it/s] 43%|████▎ | 28112/65536 [4:49:12<6:23:06, 1.63it/s] 43%|████▎ | 28113/65536 [4:49:13<6:17:06, 1.65it/s] 43%|████▎ | 28114/65536 [4:49:14<6:29:44, 1.60it/s] 43%|████▎ | 28115/65536 [4:49:14<6:39:52, 1.56it/s] 43%|████▎ | 28116/65536 [4:49:15<6:35:28, 1.58it/s] 43%|████▎ | 28117/65536 [4:49:16<6:27:07, 1.61it/s] 43%|████▎ | 28118/65536 [4:49:16<6:25:47, 1.62it/s] 43%|████▎ | 28119/65536 [4:49:17<6:27:28, 1.61it/s] 43%|████▎ | 28120/65536 [4:49:17<6:16:33, 1.66it/s] {'loss': 1.7113, 'learning_rate': 6.15403452920289e-07, 'epoch': 1735.8} + 43%|████▎ | 28120/65536 [4:49:17<6:16:33, 1.66it/s] 43%|████▎ | 28121/65536 [4:49:18<6:09:42, 1.69it/s] 43%|████▎ | 28122/65536 [4:49:19<6:11:06, 1.68it/s] 43%|████▎ | 28123/65536 [4:49:19<6:00:40, 1.73it/s] 43%|████▎ | 28124/65536 [4:49:20<6:17:21, 1.65it/s] 43%|████▎ | 28125/65536 [4:49:20<6:10:53, 1.68it/s] 43%|████▎ | 28126/65536 [4:49:21<6:07:20, 1.70it/s] 43%|████▎ | 28127/65536 [4:49:21<6:11:27, 1.68it/s] 43%|████▎ | 28128/65536 [4:49:22<6:12:37, 1.67it/s] 43%|████▎ | 28129/65536 [4:49:23<6:14:23, 1.67it/s] 43%|████▎ | 28130/65536 [4:49:23<6:16:54, 1.65it/s] 43%|████▎ | 28131/65536 [4:49:24<6:31:21, 1.59it/s] 43%|████▎ | 28132/65536 [4:49:25<6:21:20, 1.63it/s] 43%|█���██▎ | 28133/65536 [4:49:25<6:26:42, 1.61it/s] 43%|████▎ | 28134/65536 [4:49:26<6:36:31, 1.57it/s] 43%|████▎ | 28135/65536 [4:49:26<6:27:58, 1.61it/s] 43%|████▎ | 28136/65536 [4:49:27<6:17:45, 1.65it/s] 43%|████▎ | 28137/65536 [4:49:28<6:10:24, 1.68it/s] 43%|████▎ | 28138/65536 [4:49:28<6:07:28, 1.70it/s] 43%|████▎ | 28139/65536 [4:49:29<6:16:00, 1.66it/s] 43%|████▎ | 28140/65536 [4:49:29<6:25:45, 1.62it/s] {'loss': 1.7568, 'learning_rate': 6.151279539610628e-07, 'epoch': 1737.04} + 43%|████▎ | 28140/65536 [4:49:29<6:25:45, 1.62it/s] 43%|████▎ | 28141/65536 [4:49:30<6:30:31, 1.60it/s] 43%|████▎ | 28142/65536 [4:49:31<6:25:23, 1.62it/s] 43%|████▎ | 28143/65536 [4:49:31<6:17:24, 1.65it/s] 43%|████▎ | 28144/65536 [4:49:32<6:15:28, 1.66it/s] 43%|████▎ | 28145/65536 [4:49:32<6:13:01, 1.67it/s] 43%|████▎ | 28146/65536 [4:49:33<6:06:32, 1.70it/s] 43%|████▎ | 28147/65536 [4:49:34<6:02:31, 1.72it/s] 43%|████▎ | 28148/65536 [4:49:34<6:06:09, 1.70it/s] 43%|████▎ | 28149/65536 [4:49:35<6:19:16, 1.64it/s] 43%|████▎ | 28150/65536 [4:49:36<6:26:13, 1.61it/s] 43%|████▎ | 28151/65536 [4:49:36<6:24:25, 1.62it/s] 43%|████▎ | 28152/65536 [4:49:37<6:22:19, 1.63it/s] 43%|████▎ | 28153/65536 [4:49:37<6:22:59, 1.63it/s] 43%|████▎ | 28154/65536 [4:49:38<6:19:07, 1.64it/s] 43%|████▎ | 28155/65536 [4:49:39<6:21:46, 1.63it/s] 43%|████▎ | 28156/65536 [4:49:39<6:39:27, 1.56it/s] 43%|████▎ | 28157/65536 [4:49:40<6:25:05, 1.62it/s] 43%|████▎ | 28158/65536 [4:49:40<6:25:25, 1.62it/s] 43%|████▎ | 28159/65536 [4:49:41<6:27:35, 1.61it/s] 43%|████▎ | 28160/65536 [4:49:42<6:16:20, 1.66it/s] {'loss': 1.7214, 'learning_rate': 6.148524550018366e-07, 'epoch': 1738.27} + 43%|████▎ | 28160/65536 [4:49:42<6:16:20, 1.66it/s] 43%|████▎ | 28161/65536 [4:49:42<6:06:18, 1.70it/s] 43%|████▎ | 28162/65536 [4:49:43<6:05:50, 1.70it/s] 43%|████▎ | 28163/65536 [4:49:43<6:18:01, 1.65it/s] 43%|████▎ | 28164/65536 [4:49:44<6:11:13, 1.68it/s] 43%|████▎ | 28165/65536 [4:49:45<6:22:51, 1.63it/s] 43%|████▎ | 28166/65536 [4:49:45<6:16:42, 1.65it/s] 43%|████▎ | 28167/65536 [4:49:46<6:23:22, 1.62it/s] 43%|████▎ | 28168/65536 [4:49:47<6:21:02, 1.63it/s] 43%|████▎ | 28169/65536 [4:49:47<6:25:17, 1.62it/s] 43%|████▎ | 28170/65536 [4:49:48<6:21:12, 1.63it/s] 43%|████▎ | 28171/65536 [4:49:48<6:25:00, 1.62it/s] 43%|████▎ | 28172/65536 [4:49:49<6:43:20, 1.54it/s] 43%|████▎ | 28173/65536 [4:49:50<6:40:36, 1.55it/s] 43%|████▎ | 28174/65536 [4:49:50<6:46:31, 1.53it/s] 43%|████▎ | 28175/65536 [4:49:51<6:40:42, 1.55it/s] 43%|████▎ | 28176/65536 [4:49:52<6:39:15, 1.56it/s] 43%|████▎ | 28177/65536 [4:49:52<6:37:20, 1.57it/s] 43%|████▎ | 28178/65536 [4:49:53<6:32:01, 1.59it/s] 43%|████▎ | 28179/65536 [4:49:53<6:27:53, 1.61it/s] 43%|████▎ | 28180/65536 [4:49:54<6:24:46, 1.62it/s] {'loss': 1.6456, 'learning_rate': 6.145769560426105e-07, 'epoch': 1739.51} + 43%|████▎ | 28180/65536 [4:49:54<6:24:46, 1.62it/s] 43%|████▎ | 28181/65536 [4:49:55<6:19:25, 1.64it/s] 43%|████▎ | 28182/65536 [4:49:55<6:12:09, 1.67it/s] 43%|████▎ | 28183/65536 [4:49:56<6:25:53, 1.61it/s] 43%|████▎ | 28184/65536 [4:49:57<6:23:50, 1.62it/s] 43%|████▎ | 28185/65536 [4:49:57<6:17:07, 1.65it/s] 43%|████▎ | 28186/65536 [4:49:58<6:14:35, 1.66it/s] 43%|████▎ | 28187/65536 [4:49:58<6:23:05, 1.62it/s] 43%|████▎ | 28188/65536 [4:49:59<6:20:26, 1.64it/s] 43%|████▎ | 28189/65536 [4:50:00<6:36:39, 1.57it/s] 43%|████▎ | 28190/65536 [4:50:00<6:32:08, 1.59it/s] 43%|████▎ | 28191/65536 [4:50:01<6:27:40, 1.61it/s] 43%|████▎ | 28192/65536 [4:50:02<6:35:36, 1.57it/s] 43%|████▎ | 28193/65536 [4:50:02<6:25:55, 1.61it/s] 43%|████▎ | 28194/65536 [4:50:03<6:15:44, 1.66it/s] 43%|████▎ | 28195/65536 [4:50:03<6:17:24, 1.65it/s] 43%|████▎ | 28196/65536 [4:50:04<6:21:13, 1.63it/s] 43%|████▎ | 28197/65536 [4:50:05<6:34:38, 1.58it/s] 43%|████▎ | 28198/65536 [4:50:05<6:29:37, 1.60it/s] 43%|████▎ | 28199/65536 [4:50:06<6:25:35, 1.61it/s] 43%|████▎ | 28200/65536 [4:50:06<6:24:18, 1.62it/s] {'loss': 1.707, 'learning_rate': 6.143014570833843e-07, 'epoch': 1740.74} + 43%|████▎ | 28200/65536 [4:50:06<6:24:18, 1.62it/s] 43%|████▎ | 28201/65536 [4:50:07<6:28:07, 1.60it/s] 43%|████▎ | 28202/65536 [4:50:08<6:29:06, 1.60it/s] 43%|████▎ | 28203/65536 [4:50:08<6:27:14, 1.61it/s] 43%|████▎ | 28204/65536 [4:50:09<6:18:33, 1.64it/s] 43%|████▎ | 28205/65536 [4:50:10<6:27:56, 1.60it/s] 43%|████▎ | 28206/65536 [4:50:10<6:28:28, 1.60it/s] 43%|████▎ | 28207/65536 [4:50:11<6:35:26, 1.57it/s] 43%|████▎ | 28208/65536 [4:50:11<6:31:19, 1.59it/s] 43%|████▎ | 28209/65536 [4:50:12<6:35:14, 1.57it/s] 43%|████▎ | 28210/65536 [4:50:13<6:27:11, 1.61it/s] 43%|████▎ | 28211/65536 [4:50:13<6:23:28, 1.62it/s] 43%|████▎ | 28212/65536 [4:50:14<6:22:43, 1.63it/s] 43%|████▎ | 28213/65536 [4:50:15<6:18:30, 1.64it/s] 43%|████▎ | 28214/65536 [4:50:15<6:23:09, 1.62it/s] 43%|████▎ | 28215/65536 [4:50:16<6:27:15, 1.61it/s] 43%|████▎ | 28216/65536 [4:50:16<6:23:04, 1.62it/s] 43%|████▎ | 28217/65536 [4:50:17<6:13:54, 1.66it/s] 43%|████▎ | 28218/65536 [4:50:18<6:12:46, 1.67it/s] 43%|████▎ | 28219/65536 [4:50:18<6:13:02, 1.67it/s] 43%|████▎ | 28220/65536 [4:50:19<6:15:31, 1.66it/s] {'loss': 1.6937, 'learning_rate': 6.140259581241582e-07, 'epoch': 1741.98} + 43%|████▎ | 28220/65536 [4:50:19<6:15:31, 1.66it/s] 43%|████▎ | 28221/65536 [4:50:19<6:23:43, 1.62it/s] 43%|████▎ | 28222/65536 [4:50:20<6:21:36, 1.63it/s] 43%|████▎ | 28223/65536 [4:50:21<6:15:39, 1.66it/s] 43%|████▎ | 28224/65536 [4:50:21<6:18:48, 1.64it/s] 43%|████▎ | 28225/65536 [4:50:22<6:22:47, 1.62it/s] 43%|████▎ | 28226/65536 [4:50:22<6:15:34, 1.66it/s] 43%|████▎ | 28227/65536 [4:50:23<6:12:34, 1.67it/s] 43%|████▎ | 28228/65536 [4:50:24<6:10:25, 1.68it/s] 43%|████▎ | 28229/65536 [4:50:24<6:14:12, 1.66it/s] 43%|████▎ | 28230/65536 [4:50:25<6:15:14, 1.66it/s] 43%|████▎ | 28231/65536 [4:50:26<6:28:31, 1.60it/s] 43%|████▎ | 28232/65536 [4:50:26<6:23:29, 1.62it/s] 43%|████▎ | 28233/65536 [4:50:27<6:18:50, 1.64it/s] 43%|████▎ | 28234/65536 [4:50:27<6:28:51, 1.60it/s] 43%|████▎ | 28235/65536 [4:50:28<6:31:47, 1.59it/s] 43%|████▎ | 28236/65536 [4:50:29<6:26:40, 1.61it/s] 43%|████▎ | 28237/65536 [4:50:29<6:31:02, 1.59it/s] 43%|████▎ | 28238/65536 [4:50:30<6:23:45, 1.62it/s] 43%|████▎ | 28239/65536 [4:50:30<6:21:07, 1.63it/s] 43%|████▎ | 28240/65536 [4:50:31<6:24:22, 1.62it/s] {'loss': 1.7067, 'learning_rate': 6.137504591649321e-07, 'epoch': 1743.21} + 43%|████▎ | 28240/65536 [4:50:31<6:24:22, 1.62it/s] 43%|████▎ | 28241/65536 [4:50:32<6:15:31, 1.66it/s] 43%|████▎ | 28242/65536 [4:50:32<6:27:32, 1.60it/s] 43%|████▎ | 28243/65536 [4:50:33<6:33:36, 1.58it/s] 43%|████▎ | 28244/65536 [4:50:34<6:26:07, 1.61it/s] 43%|████▎ | 28245/65536 [4:50:34<6:22:19, 1.63it/s] 43%|████▎ | 28246/65536 [4:50:35<6:20:27, 1.63it/s] 43%|████▎ | 28247/65536 [4:50:35<6:27:19, 1.60it/s] 43%|████▎ | 28248/65536 [4:50:36<6:27:55, 1.60it/s] 43%|████▎ | 28249/65536 [4:50:37<6:19:46, 1.64it/s] 43%|████▎ | 28250/65536 [4:50:37<6:17:24, 1.65it/s] 43%|████▎ | 28251/65536 [4:50:38<6:14:06, 1.66it/s] 43%|████▎ | 28252/65536 [4:50:39<6:33:53, 1.58it/s] 43%|████▎ | 28253/65536 [4:50:39<6:40:15, 1.55it/s] 43%|████▎ | 28254/65536 [4:50:40<6:40:31, 1.55it/s] 43%|████▎ | 28255/65536 [4:50:40<6:33:22, 1.58it/s] 43%|████▎ | 28256/65536 [4:50:41<6:22:28, 1.62it/s] 43%|████▎ | 28257/65536 [4:50:42<6:20:15, 1.63it/s] 43%|████▎ | 28258/65536 [4:50:42<6:29:10, 1.60it/s] 43%|████▎ | 28259/65536 [4:50:43<6:23:21, 1.62it/s] 43%|████▎ | 28260/65536 [4:50:43<6:19:00, 1.64it/s] {'loss': 1.726, 'learning_rate': 6.134749602057059e-07, 'epoch': 1744.44} + 43%|████▎ | 28260/65536 [4:50:43<6:19:00, 1.64it/s] 43%|████▎ | 28261/65536 [4:50:44<6:15:36, 1.65it/s] 43%|████▎ | 28262/65536 [4:50:45<6:13:18, 1.66it/s] 43%|████▎ | 28263/65536 [4:50:45<6:28:12, 1.60it/s] 43%|████▎ | 28264/65536 [4:50:46<6:26:19, 1.61it/s] 43%|████▎ | 28265/65536 [4:50:47<6:26:59, 1.61it/s] 43%|████▎ | 28266/65536 [4:50:47<6:15:10, 1.66it/s] 43%|████▎ | 28267/65536 [4:50:48<6:11:48, 1.67it/s] 43%|████▎ | 28268/65536 [4:50:48<6:12:17, 1.67it/s] 43%|████▎ | 28269/65536 [4:50:49<6:23:05, 1.62it/s] 43%|████▎ | 28270/65536 [4:50:50<6:34:27, 1.57it/s] 43%|████▎ | 28271/65536 [4:50:50<6:25:04, 1.61it/s] 43%|████▎ | 28272/65536 [4:50:51<6:25:44, 1.61it/s] 43%|████▎ | 28273/65536 [4:50:51<6:29:01, 1.60it/s] 43%|████▎ | 28274/65536 [4:50:52<6:24:14, 1.62it/s] 43%|████▎ | 28275/65536 [4:50:53<6:30:06, 1.59it/s] 43%|████▎ | 28276/65536 [4:50:53<6:22:10, 1.62it/s] 43%|████▎ | 28277/65536 [4:50:54<6:16:59, 1.65it/s] 43%|████▎ | 28278/65536 [4:50:55<6:18:53, 1.64it/s] 43%|████▎ | 28279/65536 [4:50:55<6:15:23, 1.65it/s] 43%|████▎ | 28280/65536 [4:50:56<6:16:46, 1.65it/s] {'loss': 1.7014, 'learning_rate': 6.131994612464798e-07, 'epoch': 1745.68} + 43%|████▎ | 28280/65536 [4:50:56<6:16:46, 1.65it/s] 43%|████▎ | 28281/65536 [4:50:56<6:11:06, 1.67it/s] 43%|████▎ | 28282/65536 [4:50:57<6:05:57, 1.70it/s] 43%|████▎ | 28283/65536 [4:50:58<6:24:49, 1.61it/s] 43%|████▎ | 28284/65536 [4:50:58<6:24:32, 1.61it/s] 43%|████▎ | 28285/65536 [4:50:59<6:18:17, 1.64it/s] 43%|████▎ | 28286/65536 [4:50:59<6:32:32, 1.58it/s] 43%|████▎ | 28287/65536 [4:51:00<6:24:05, 1.62it/s] 43%|████▎ | 28288/65536 [4:51:01<6:34:37, 1.57it/s] 43%|████▎ | 28289/65536 [4:51:01<6:33:25, 1.58it/s] 43%|████▎ | 28290/65536 [4:51:02<6:38:05, 1.56it/s] 43%|████▎ | 28291/65536 [4:51:03<6:22:00, 1.62it/s] 43%|████▎ | 28292/65536 [4:51:03<6:16:16, 1.65it/s] 43%|████▎ | 28293/65536 [4:51:04<6:09:08, 1.68it/s] 43%|████▎ | 28294/65536 [4:51:04<6:11:50, 1.67it/s] 43%|████▎ | 28295/65536 [4:51:05<6:12:45, 1.67it/s] 43%|████▎ | 28296/65536 [4:51:06<6:14:02, 1.66it/s] 43%|████▎ | 28297/65536 [4:51:06<6:19:28, 1.64it/s] 43%|████▎ | 28298/65536 [4:51:07<6:13:52, 1.66it/s] 43%|████▎ | 28299/65536 [4:51:07<6:20:59, 1.63it/s] 43%|████▎ | 28300/65536 [4:51:08<6:13:59, 1.66it/s] {'loss': 1.7578, 'learning_rate': 6.129239622872536e-07, 'epoch': 1746.91} + 43%|████▎ | 28300/65536 [4:51:08<6:13:59, 1.66it/s] 43%|████▎ | 28301/65536 [4:51:09<6:23:09, 1.62it/s] 43%|████▎ | 28302/65536 [4:51:09<6:40:26, 1.55it/s] 43%|████▎ | 28303/65536 [4:51:10<6:40:50, 1.55it/s] 43%|████▎ | 28304/65536 [4:51:11<6:36:24, 1.57it/s] 43%|████▎ | 28305/65536 [4:51:11<6:27:44, 1.60it/s] 43%|████▎ | 28306/65536 [4:51:12<6:23:19, 1.62it/s] 43%|████▎ | 28307/65536 [4:51:12<6:19:48, 1.63it/s] 43%|████▎ | 28308/65536 [4:51:13<6:15:00, 1.65it/s] 43%|████▎ | 28309/65536 [4:51:14<6:15:10, 1.65it/s] 43%|████▎ | 28310/65536 [4:51:14<6:25:08, 1.61it/s] 43%|████▎ | 28311/65536 [4:51:15<6:28:09, 1.60it/s] 43%|████▎ | 28312/65536 [4:51:15<6:21:31, 1.63it/s] 43%|████▎ | 28313/65536 [4:51:16<6:28:45, 1.60it/s] 43%|████▎ | 28314/65536 [4:51:17<6:25:01, 1.61it/s] 43%|████▎ | 28315/65536 [4:51:17<6:22:05, 1.62it/s] 43%|████▎ | 28316/65536 [4:51:18<6:24:09, 1.61it/s] 43%|████▎ | 28317/65536 [4:51:19<6:19:05, 1.64it/s] 43%|████▎ | 28318/65536 [4:51:19<6:37:00, 1.56it/s] 43%|████▎ | 28319/65536 [4:51:20<6:26:58, 1.60it/s] 43%|████▎ | 28320/65536 [4:51:20<6:28:14, 1.60it/s] {'loss': 1.6701, 'learning_rate': 6.126484633280275e-07, 'epoch': 1748.15} + 43%|████▎ | 28320/65536 [4:51:20<6:28:14, 1.60it/s] 43%|████▎ | 28321/65536 [4:51:21<6:29:19, 1.59it/s] 43%|████▎ | 28322/65536 [4:51:22<6:20:06, 1.63it/s] 43%|████▎ | 28323/65536 [4:51:22<6:23:40, 1.62it/s] 43%|████▎ | 28324/65536 [4:51:23<6:11:19, 1.67it/s] 43%|████▎ | 28325/65536 [4:51:23<6:13:50, 1.66it/s] 43%|████▎ | 28326/65536 [4:51:24<6:11:47, 1.67it/s] 43%|████▎ | 28327/65536 [4:51:25<6:15:36, 1.65it/s] 43%|████▎ | 28328/65536 [4:51:25<6:16:16, 1.65it/s] 43%|██��█▎ | 28329/65536 [4:51:26<6:21:32, 1.63it/s] 43%|████▎ | 28330/65536 [4:51:27<6:33:13, 1.58it/s] 43%|████▎ | 28331/65536 [4:51:27<6:27:54, 1.60it/s] 43%|████▎ | 28332/65536 [4:51:28<6:33:17, 1.58it/s] 43%|████▎ | 28333/65536 [4:51:28<6:24:44, 1.61it/s] 43%|████▎ | 28334/65536 [4:51:29<6:38:16, 1.56it/s] 43%|████▎ | 28335/65536 [4:51:30<6:31:31, 1.58it/s] 43%|████▎ | 28336/65536 [4:51:30<6:22:50, 1.62it/s] 43%|████▎ | 28337/65536 [4:51:31<6:14:52, 1.65it/s] 43%|████▎ | 28338/65536 [4:51:32<6:12:48, 1.66it/s] 43%|████▎ | 28339/65536 [4:51:32<6:10:52, 1.67it/s] 43%|████▎ | 28340/65536 [4:51:33<6:16:45, 1.65it/s] {'loss': 1.6948, 'learning_rate': 6.123729643688012e-07, 'epoch': 1749.38} + 43%|████▎ | 28340/65536 [4:51:33<6:16:45, 1.65it/s] 43%|████▎ | 28341/65536 [4:51:33<6:19:57, 1.63it/s] 43%|████▎ | 28342/65536 [4:51:34<6:19:50, 1.63it/s] 43%|████▎ | 28343/65536 [4:51:35<6:10:21, 1.67it/s] 43%|████▎ | 28344/65536 [4:51:35<6:20:42, 1.63it/s] 43%|████▎ | 28345/65536 [4:51:36<6:24:13, 1.61it/s] 43%|████▎ | 28346/65536 [4:51:36<6:20:00, 1.63it/s] 43%|████▎ | 28347/65536 [4:51:37<6:27:19, 1.60it/s] 43%|████▎ | 28348/65536 [4:51:38<6:19:19, 1.63it/s] 43%|████▎ | 28349/65536 [4:51:38<6:26:10, 1.60it/s] 43%|████▎ | 28350/65536 [4:51:39<6:17:08, 1.64it/s] 43%|████▎ | 28351/65536 [4:51:40<6:32:14, 1.58it/s] 43%|████▎ | 28352/65536 [4:51:40<6:36:51, 1.56it/s] 43%|████▎ | 28353/65536 [4:51:41<6:47:18, 1.52it/s] 43%|████▎ | 28354/65536 [4:51:42<6:31:52, 1.58it/s] 43%|████▎ | 28355/65536 [4:51:42<6:29:55, 1.59it/s] 43%|████▎ | 28356/65536 [4:51:43<6:22:37, 1.62it/s] 43%|████▎ | 28357/65536 [4:51:43<6:20:52, 1.63it/s] 43%|████▎ | 28358/65536 [4:51:44<6:25:13, 1.61it/s] 43%|████▎ | 28359/65536 [4:51:45<6:18:11, 1.64it/s] 43%|████▎ | 28360/65536 [4:51:45<6:12:55, 1.66it/s] {'loss': 1.699, 'learning_rate': 6.120974654095751e-07, 'epoch': 1750.62} + 43%|████▎ | 28360/65536 [4:51:45<6:12:55, 1.66it/s] 43%|████▎ | 28361/65536 [4:51:46<6:13:29, 1.66it/s] 43%|████▎ | 28362/65536 [4:51:46<6:17:23, 1.64it/s] 43%|████▎ | 28363/65536 [4:51:47<6:16:25, 1.65it/s] 43%|████▎ | 28364/65536 [4:51:48<6:19:43, 1.63it/s] 43%|████▎ | 28365/65536 [4:51:48<6:19:55, 1.63it/s] 43%|████▎ | 28366/65536 [4:51:49<6:15:35, 1.65it/s] 43%|████▎ | 28367/65536 [4:51:49<6:32:29, 1.58it/s] 43%|████▎ | 28368/65536 [4:51:50<6:39:24, 1.55it/s] 43%|████▎ | 28369/65536 [4:51:51<6:33:17, 1.58it/s] 43%|████▎ | 28370/65536 [4:51:51<6:26:13, 1.60it/s] 43%|████▎ | 28371/65536 [4:51:52<6:24:29, 1.61it/s] 43%|████▎ | 28372/65536 [4:51:53<6:23:23, 1.62it/s] 43%|████▎ | 28373/65536 [4:51:53<6:23:21, 1.62it/s] 43%|████▎ | 28374/65536 [4:51:54<6:16:37, 1.64it/s] 43%|████▎ | 28375/65536 [4:51:54<6:25:34, 1.61it/s] 43%|████▎ | 28376/65536 [4:51:55<6:14:43, 1.65it/s] 43%|████▎ | 28377/65536 [4:51:56<6:24:56, 1.61it/s] 43%|████▎ | 28378/65536 [4:51:56<6:17:53, 1.64it/s] 43%|████▎ | 28379/65536 [4:51:57<6:14:15, 1.65it/s] 43%|████▎ | 28380/65536 [4:51:57<6:06:48, 1.69it/s] {'loss': 1.6875, 'learning_rate': 6.11821966450349e-07, 'epoch': 1751.85} + 43%|████▎ | 28380/65536 [4:51:57<6:06:48, 1.69it/s] 43%|████▎ | 28381/65536 [4:51:58<6:01:17, 1.71it/s] 43%|████▎ | 28382/65536 [4:51:59<6:08:16, 1.68it/s] 43%|████▎ | 28383/65536 [4:51:59<6:23:37, 1.61it/s] 43%|████▎ | 28384/65536 [4:52:00<6:26:13, 1.60it/s] 43%|████▎ | 28385/65536 [4:52:01<6:24:31, 1.61it/s] 43%|████▎ | 28386/65536 [4:52:01<6:18:37, 1.64it/s] 43%|████▎ | 28387/65536 [4:52:02<6:17:13, 1.64it/s] 43%|████▎ | 28388/65536 [4:52:02<6:13:08, 1.66it/s] 43%|████▎ | 28389/65536 [4:52:03<6:15:12, 1.65it/s] 43%|████▎ | 28390/65536 [4:52:04<6:12:17, 1.66it/s] 43%|████▎ | 28391/65536 [4:52:04<6:10:01, 1.67it/s] 43%|████▎ | 28392/65536 [4:52:05<6:20:58, 1.62it/s] 43%|████▎ | 28393/65536 [4:52:05<6:12:10, 1.66it/s] 43%|████▎ | 28394/65536 [4:52:06<6:27:25, 1.60it/s] 43%|████▎ | 28395/65536 [4:52:07<6:23:57, 1.61it/s] 43%|████▎ | 28396/65536 [4:52:07<6:27:51, 1.60it/s] 43%|████▎ | 28397/65536 [4:52:08<6:18:06, 1.64it/s] 43%|████▎ | 28398/65536 [4:52:08<6:16:49, 1.64it/s] 43%|████▎ | 28399/65536 [4:52:09<6:30:17, 1.59it/s] 43%|████▎ | 28400/65536 [4:52:10<6:26:25, 1.60it/s] {'loss': 1.7007, 'learning_rate': 6.115464674911228e-07, 'epoch': 1753.09} + 43%|████▎ | 28400/65536 [4:52:10<6:26:25, 1.60it/s] 43%|████▎ | 28401/65536 [4:52:10<6:23:20, 1.61it/s] 43%|████▎ | 28402/65536 [4:52:11<6:30:16, 1.59it/s] 43%|████▎ | 28403/65536 [4:52:12<6:37:54, 1.56it/s] 43%|████▎ | 28404/65536 [4:52:12<6:44:34, 1.53it/s] 43%|████▎ | 28405/65536 [4:52:13<6:34:12, 1.57it/s] 43%|████▎ | 28406/65536 [4:52:14<6:26:28, 1.60it/s] 43%|████▎ | 28407/65536 [4:52:14<6:16:52, 1.64it/s] 43%|████▎ | 28408/65536 [4:52:15<6:12:06, 1.66it/s] 43%|████▎ | 28409/65536 [4:52:15<6:21:32, 1.62it/s] 43%|████▎ | 28410/65536 [4:52:16<6:18:21, 1.64it/s] 43%|████▎ | 28411/65536 [4:52:17<6:21:41, 1.62it/s] 43%|████▎ | 28412/65536 [4:52:17<6:32:04, 1.58it/s] 43%|████▎ | 28413/65536 [4:52:18<6:33:45, 1.57it/s] 43%|████▎ | 28414/65536 [4:52:19<6:30:52, 1.58it/s] 43%|████▎ | 28415/65536 [4:52:19<6:39:48, 1.55it/s] 43%|████▎ | 28416/65536 [4:52:20<6:28:25, 1.59it/s] 43%|████▎ | 28417/65536 [4:52:20<6:13:08, 1.66it/s] 43%|████▎ | 28418/65536 [4:52:21<6:21:57, 1.62it/s] 43%|████▎ | 28419/65536 [4:52:22<6:14:39, 1.65it/s] 43%|████▎ | 28420/65536 [4:52:22<6:22:38, 1.62it/s] {'loss': 1.7035, 'learning_rate': 6.112709685318966e-07, 'epoch': 1754.32} + 43%|████▎ | 28420/65536 [4:52:22<6:22:38, 1.62it/s] 43%|████▎ | 28421/65536 [4:52:23<6:17:40, 1.64it/s] 43%|████▎ | 28422/65536 [4:52:23<6:19:56, 1.63it/s] 43%|████▎ | 28423/65536 [4:52:24<6:20:16, 1.63it/s] 43%|████▎ | 28424/65536 [4:52:25<6:19:58, 1.63it/s] 43%|████▎ | 28425/65536 [4:52:25<6:15:50, 1.65it/s] 43%|████▎ | 28426/65536 [4:52:26<6:28:38, 1.59it/s] 43%|████▎ | 28427/65536 [4:52:27<6:23:47, 1.61it/s] 43%|████▎ | 28428/65536 [4:52:27<6:34:04, 1.57it/s] 43%|████▎ | 28429/65536 [4:52:28<6:29:38, 1.59it/s] 43%|████▎ | 28430/65536 [4:52:28<6:15:20, 1.65it/s] 43%|████▎ | 28431/65536 [4:52:29<6:09:24, 1.67it/s] 43%|████▎ | 28432/65536 [4:52:30<6:27:55, 1.59it/s] 43%|████▎ | 28433/65536 [4:52:30<6:28:15, 1.59it/s] 43%|████▎ | 28434/65536 [4:52:31<6:24:57, 1.61it/s] 43%|████▎ | 28435/65536 [4:52:31<6:18:01, 1.64it/s] 43%|████▎ | 28436/65536 [4:52:32<6:25:09, 1.61it/s] 43%|████▎ | 28437/65536 [4:52:33<6:24:10, 1.61it/s] 43%|████▎ | 28438/65536 [4:52:33<6:20:00, 1.63it/s] 43%|████▎ | 28439/65536 [4:52:34<6:10:49, 1.67it/s] 43%|████▎ | 28440/65536 [4:52:35<6:15:09, 1.65it/s] {'loss': 1.6506, 'learning_rate': 6.109954695726704e-07, 'epoch': 1755.56} + 43%|████▎ | 28440/65536 [4:52:35<6:15:09, 1.65it/s] 43%|████▎ | 28441/65536 [4:52:35<6:24:32, 1.61it/s] 43%|████▎ | 28442/65536 [4:52:36<6:32:47, 1.57it/s] 43%|████▎ | 28443/65536 [4:52:36<6:28:15, 1.59it/s] 43%|████▎ | 28444/65536 [4:52:37<6:23:47, 1.61it/s] 43%|████▎ | 28445/65536 [4:52:38<6:22:03, 1.62it/s] 43%|████▎ | 28446/65536 [4:52:38<6:13:02, 1.66it/s] 43%|████▎ | 28447/65536 [4:52:39<6:15:51, 1.64it/s] 43%|████▎ | 28448/65536 [4:52:40<6:29:34, 1.59it/s] 43%|████▎ | 28449/65536 [4:52:40<6:35:59, 1.56it/s] 43%|████▎ | 28450/65536 [4:52:41<6:18:15, 1.63it/s] 43%|████▎ | 28451/65536 [4:52:41<6:11:19, 1.66it/s] 43%|████▎ | 28452/65536 [4:52:42<6:07:35, 1.68it/s] 43%|████▎ | 28453/65536 [4:52:43<6:15:13, 1.65it/s] 43%|████▎ | 28454/65536 [4:52:43<6:35:24, 1.56it/s] 43%|████▎ | 28455/65536 [4:52:44<6:42:52, 1.53it/s] 43%|████▎ | 28456/65536 [4:52:45<6:36:14, 1.56it/s] 43%|████▎ | 28457/65536 [4:52:45<6:17:29, 1.64it/s] 43%|████▎ | 28458/65536 [4:52:46<6:25:28, 1.60it/s] 43%|████▎ | 28459/65536 [4:52:46<6:20:09, 1.63it/s] 43%|████▎ | 28460/65536 [4:52:47<6:29:36, 1.59it/s] {'loss': 1.7308, 'learning_rate': 6.107199706134443e-07, 'epoch': 1756.79} + 43%|████▎ | 28460/65536 [4:52:47<6:29:36, 1.59it/s] 43%|████▎ | 28461/65536 [4:52:48<6:24:19, 1.61it/s] 43%|████▎ | 28462/65536 [4:52:48<6:26:43, 1.60it/s] 43%|████▎ | 28463/65536 [4:52:49<6:28:15, 1.59it/s] 43%|████▎ | 28464/65536 [4:52:50<6:30:27, 1.58it/s] 43%|████▎ | 28465/65536 [4:52:50<6:34:55, 1.56it/s] 43%|████▎ | 28466/65536 [4:52:51<6:35:15, 1.56it/s] 43%|████▎ | 28467/65536 [4:52:51<6:23:41, 1.61it/s] 43%|████▎ | 28468/65536 [4:52:52<6:34:51, 1.56it/s] 43%|████▎ | 28469/65536 [4:52:53<6:28:07, 1.59it/s] 43%|████▎ | 28470/65536 [4:52:53<6:20:02, 1.63it/s] 43%|████▎ | 28471/65536 [4:52:54<6:16:24, 1.64it/s] 43%|████▎ | 28472/65536 [4:52:54<6:11:04, 1.66it/s] 43%|████▎ | 28473/65536 [4:52:55<6:12:05, 1.66it/s] 43%|████▎ | 28474/65536 [4:52:56<6:12:27, 1.66it/s] 43%|████▎ | 28475/65536 [4:52:56<6:15:18, 1.65it/s] 43%|████▎ | 28476/65536 [4:52:57<6:18:21, 1.63it/s] 43%|████▎ | 28477/65536 [4:52:57<6:14:42, 1.65it/s] 43%|████▎ | 28478/65536 [4:52:58<6:07:59, 1.68it/s] 43%|████▎ | 28479/65536 [4:52:59<6:15:02, 1.65it/s] 43%|████▎ | 28480/65536 [4:52:59<6:28:16, 1.59it/s] {'loss': 1.7238, 'learning_rate': 6.104444716542182e-07, 'epoch': 1758.02} + 43%|████▎ | 28480/65536 [4:52:59<6:28:16, 1.59it/s] 43%|████▎ | 28481/65536 [4:53:00<6:32:33, 1.57it/s] 43%|████▎ | 28482/65536 [4:53:01<6:20:59, 1.62it/s] 43%|████▎ | 28483/65536 [4:53:01<6:14:48, 1.65it/s] 43%|████▎ | 28484/65536 [4:53:02<6:11:51, 1.66it/s] 43%|████▎ | 28485/65536 [4:53:02<6:07:07, 1.68it/s] 43%|████▎ | 28486/65536 [4:53:03<6:17:04, 1.64it/s] 43%|████▎ | 28487/65536 [4:53:04<6:16:47, 1.64it/s] 43%|████▎ | 28488/65536 [4:53:04<6:20:46, 1.62it/s] 43%|████▎ | 28489/65536 [4:53:05<6:21:47, 1.62it/s] 43%|████▎ | 28490/65536 [4:53:05<6:22:30, 1.61it/s] 43%|████▎ | 28491/65536 [4:53:06<6:22:10, 1.62it/s] 43%|████▎ | 28492/65536 [4:53:07<6:22:25, 1.61it/s] 43%|████▎ | 28493/65536 [4:53:07<6:19:06, 1.63it/s] 43%|████▎ | 28494/65536 [4:53:08<6:14:40, 1.65it/s] 43%|████▎ | 28495/65536 [4:53:08<6:11:58, 1.66it/s] 43%|████▎ | 28496/65536 [4:53:09<6:28:35, 1.59it/s] 43%|████▎ | 28497/65536 [4:53:10<6:22:47, 1.61it/s] 43%|████▎ | 28498/65536 [4:53:10<6:19:45, 1.63it/s] 43%|████▎ | 28499/65536 [4:53:11<6:16:25, 1.64it/s] 43%|████▎ | 28500/65536 [4:53:12<6:12:35, 1.66it/s] {'loss': 1.6749, 'learning_rate': 6.10168972694992e-07, 'epoch': 1759.26} + 43%|████▎ | 28500/65536 [4:53:12<6:12:35, 1.66it/s] 43%|████▎ | 28501/65536 [4:53:12<6:18:47, 1.63it/s] 43%|████▎ | 28502/65536 [4:53:13<6:17:05, 1.64it/s] 43%|████▎ | 28503/65536 [4:53:13<6:13:02, 1.65it/s] 43%|████▎ | 28504/65536 [4:53:14<6:14:06, 1.65it/s] 43%|████▎ | 28505/65536 [4:53:15<6:21:09, 1.62it/s] 43%|████▎ | 28506/65536 [4:53:15<6:19:07, 1.63it/s] 43%|████▎ | 28507/65536 [4:53:16<6:19:39, 1.63it/s] 43%|████▎ | 28508/65536 [4:53:17<6:26:48, 1.60it/s] 44%|████▎ | 28509/65536 [4:53:17<6:21:29, 1.62it/s] 44%|████▎ | 28510/65536 [4:53:18<6:21:05, 1.62it/s] 44%|████▎ | 28511/65536 [4:53:18<6:21:41, 1.62it/s] 44%|████▎ | 28512/65536 [4:53:19<6:35:03, 1.56it/s] 44%|████▎ | 28513/65536 [4:53:20<6:39:11, 1.55it/s] 44%|████▎ | 28514/65536 [4:53:20<6:31:13, 1.58it/s] 44%|████▎ | 28515/65536 [4:53:21<6:27:11, 1.59it/s] 44%|████▎ | 28516/65536 [4:53:22<6:20:40, 1.62it/s] 44%|████▎ | 28517/65536 [4:53:22<6:21:38, 1.62it/s] 44%|████▎ | 28518/65536 [4:53:23<6:23:33, 1.61it/s] 44%|████▎ | 28519/65536 [4:53:23<6:19:26, 1.63it/s] 44%|████▎ | 28520/65536 [4:53:24<6:31:39, 1.58it/s] {'loss': 1.6481, 'learning_rate': 6.098934737357659e-07, 'epoch': 1760.49} + 44%|████▎ | 28520/65536 [4:53:24<6:31:39, 1.58it/s] 44%|████▎ | 28521/65536 [4:53:25<6:22:20, 1.61it/s] 44%|████▎ | 28522/65536 [4:53:25<6:15:28, 1.64it/s] 44%|████▎ | 28523/65536 [4:53:26<6:12:22, 1.66it/s] 44%|████▎ | 28524/65536 [4:53:26<6:06:27, 1.68it/s] 44%|████��� | 28525/65536 [4:53:27<6:25:44, 1.60it/s] 44%|████▎ | 28526/65536 [4:53:28<6:18:28, 1.63it/s] 44%|████▎ | 28527/65536 [4:53:28<6:20:24, 1.62it/s] 44%|████▎ | 28528/65536 [4:53:29<6:16:53, 1.64it/s] 44%|████▎ | 28529/65536 [4:53:30<6:25:08, 1.60it/s] 44%|████▎ | 28530/65536 [4:53:30<6:27:02, 1.59it/s] 44%|████▎ | 28531/65536 [4:53:31<6:20:39, 1.62it/s] 44%|████▎ | 28532/65536 [4:53:31<6:17:34, 1.63it/s] 44%|████▎ | 28533/65536 [4:53:32<6:20:48, 1.62it/s] 44%|████▎ | 28534/65536 [4:53:33<6:13:50, 1.65it/s] 44%|████▎ | 28535/65536 [4:53:33<6:09:00, 1.67it/s] 44%|████▎ | 28536/65536 [4:53:34<6:09:13, 1.67it/s] 44%|████▎ | 28537/65536 [4:53:34<6:13:18, 1.65it/s] 44%|████▎ | 28538/65536 [4:53:35<6:10:00, 1.67it/s] 44%|████▎ | 28539/65536 [4:53:36<6:22:45, 1.61it/s] 44%|████▎ | 28540/65536 [4:53:36<6:27:45, 1.59it/s] {'loss': 1.7056, 'learning_rate': 6.096179747765397e-07, 'epoch': 1761.73} + 44%|████▎ | 28540/65536 [4:53:36<6:27:45, 1.59it/s] 44%|████▎ | 28541/65536 [4:53:37<6:18:12, 1.63it/s] 44%|████▎ | 28542/65536 [4:53:38<6:24:50, 1.60it/s] 44%|████▎ | 28543/65536 [4:53:38<6:19:42, 1.62it/s] 44%|████▎ | 28544/65536 [4:53:39<6:18:45, 1.63it/s] 44%|████▎ | 28545/65536 [4:53:39<6:32:32, 1.57it/s] 44%|████▎ | 28546/65536 [4:53:40<6:29:23, 1.58it/s] 44%|████▎ | 28547/65536 [4:53:41<6:20:59, 1.62it/s] 44%|████▎ | 28548/65536 [4:53:41<6:12:34, 1.65it/s] 44%|████▎ | 28549/65536 [4:53:42<6:18:23, 1.63it/s] 44%|████▎ | 28550/65536 [4:53:43<6:26:28, 1.60it/s] 44%|████▎ | 28551/65536 [4:53:43<6:15:14, 1.64it/s] 44%|████▎ | 28552/65536 [4:53:44<6:11:35, 1.66it/s] 44%|████▎ | 28553/65536 [4:53:44<6:23:19, 1.61it/s] 44%|████▎ | 28554/65536 [4:53:45<6:20:14, 1.62it/s] 44%|████▎ | 28555/65536 [4:53:46<6:16:43, 1.64it/s] 44%|████▎ | 28556/65536 [4:53:46<6:19:37, 1.62it/s] 44%|████▎ | 28557/65536 [4:53:47<6:11:58, 1.66it/s] 44%|████▎ | 28558/65536 [4:53:47<6:17:01, 1.63it/s] 44%|████▎ | 28559/65536 [4:53:48<6:12:51, 1.65it/s] 44%|████▎ | 28560/65536 [4:53:49<6:04:57, 1.69it/s] {'loss': 1.7139, 'learning_rate': 6.093424758173136e-07, 'epoch': 1762.96} + 44%|████▎ | 28560/65536 [4:53:49<6:04:57, 1.69it/s] 44%|████▎ | 28561/65536 [4:53:49<6:23:45, 1.61it/s] 44%|████▎ | 28562/65536 [4:53:50<6:32:26, 1.57it/s] 44%|████▎ | 28563/65536 [4:53:51<6:34:00, 1.56it/s] 44%|████▎ | 28564/65536 [4:53:51<6:28:21, 1.59it/s] 44%|████▎ | 28565/65536 [4:53:52<6:20:03, 1.62it/s] 44%|████▎ | 28566/65536 [4:53:52<6:12:12, 1.66it/s] 44%|████▎ | 28567/65536 [4:53:53<6:09:06, 1.67it/s] 44%|████▎ | 28568/65536 [4:53:53<6:09:50, 1.67it/s] 44%|████▎ | 28569/65536 [4:53:54<6:08:16, 1.67it/s] 44%|████▎ | 28570/65536 [4:53:55<6:03:07, 1.70it/s] 44%|████▎ | 28571/65536 [4:53:55<6:19:31, 1.62it/s] 44%|████▎ | 28572/65536 [4:53:56<6:17:56, 1.63it/s] 44%|████▎ | 28573/65536 [4:53:57<6:14:00, 1.65it/s] 44%|████▎ | 28574/65536 [4:53:57<6:21:51, 1.61it/s] 44%|████▎ | 28575/65536 [4:53:58<6:15:06, 1.64it/s] 44%|████▎ | 28576/65536 [4:53:58<6:12:38, 1.65it/s] 44%|████▎ | 28577/65536 [4:53:59<6:25:56, 1.60it/s] 44%|████▎ | 28578/65536 [4:54:00<6:24:02, 1.60it/s] 44%|████▎ | 28579/65536 [4:54:00<6:26:03, 1.60it/s] 44%|████▎ | 28580/65536 [4:54:01<6:22:58, 1.61it/s] {'loss': 1.6868, 'learning_rate': 6.090669768580873e-07, 'epoch': 1764.2} + 44%|████▎ | 28580/65536 [4:54:01<6:22:58, 1.61it/s] 44%|████▎ | 28581/65536 [4:54:02<6:24:57, 1.60it/s] 44%|████▎ | 28582/65536 [4:54:02<6:20:24, 1.62it/s] 44%|████▎ | 28583/65536 [4:54:03<6:19:57, 1.62it/s] 44%|████▎ | 28584/65536 [4:54:03<6:07:36, 1.68it/s] 44%|████▎ | 28585/65536 [4:54:04<6:17:48, 1.63it/s] 44%|████▎ | 28586/65536 [4:54:05<6:21:55, 1.61it/s] 44%|████▎ | 28587/65536 [4:54:05<6:32:16, 1.57it/s] 44%|████▎ | 28588/65536 [4:54:06<6:20:46, 1.62it/s] 44%|████▎ | 28589/65536 [4:54:06<6:13:52, 1.65it/s] 44%|████▎ | 28590/65536 [4:54:07<6:17:23, 1.63it/s] 44%|████▎ | 28591/65536 [4:54:08<6:03:48, 1.69it/s] 44%|████▎ | 28592/65536 [4:54:08<5:59:43, 1.71it/s] 44%|████▎ | 28593/65536 [4:54:09<6:05:17, 1.69it/s] 44%|████▎ | 28594/65536 [4:54:09<6:27:20, 1.59it/s] 44%|████▎ | 28595/65536 [4:54:10<6:15:00, 1.64it/s] 44%|████▎ | 28596/65536 [4:54:11<6:20:10, 1.62it/s] 44%|████▎ | 28597/65536 [4:54:11<6:26:00, 1.59it/s] 44%|████▎ | 28598/65536 [4:54:12<6:28:31, 1.58it/s] 44%|████▎ | 28599/65536 [4:54:13<6:14:23, 1.64it/s] 44%|████▎ | 28600/65536 [4:54:13<6:11:21, 1.66it/s] {'loss': 1.7287, 'learning_rate': 6.087914778988613e-07, 'epoch': 1765.43} + 44%|████▎ | 28600/65536 [4:54:13<6:11:21, 1.66it/s] 44%|████▎ | 28601/65536 [4:54:14<6:16:41, 1.63it/s] 44%|████▎ | 28602/65536 [4:54:14<6:20:54, 1.62it/s] 44%|████▎ | 28603/65536 [4:54:15<6:15:02, 1.64it/s] 44%|████▎ | 28604/65536 [4:54:16<6:12:04, 1.65it/s] 44%|████▎ | 28605/65536 [4:54:16<6:03:42, 1.69it/s] 44%|████▎ | 28606/65536 [4:54:17<6:09:08, 1.67it/s] 44%|████▎ | 28607/65536 [4:54:17<6:13:57, 1.65it/s] 44%|████▎ | 28608/65536 [4:54:18<6:17:27, 1.63it/s] 44%|████▎ | 28609/65536 [4:54:19<6:11:56, 1.65it/s] 44%|████▎ | 28610/65536 [4:54:19<6:27:31, 1.59it/s] 44%|████▎ | 28611/65536 [4:54:20<6:19:47, 1.62it/s] 44%|████▎ | 28612/65536 [4:54:20<6:10:10, 1.66it/s] 44%|████▎ | 28613/65536 [4:54:21<6:14:04, 1.65it/s] 44%|████▎ | 28614/65536 [4:54:22<6:29:56, 1.58it/s] 44%|████▎ | 28615/65536 [4:54:22<6:22:01, 1.61it/s] 44%|████▎ | 28616/65536 [4:54:23<6:11:49, 1.65it/s] 44%|████▎ | 28617/65536 [4:54:24<6:20:00, 1.62it/s] 44%|████▎ | 28618/65536 [4:54:24<6:24:27, 1.60it/s] 44%|████▎ | 28619/65536 [4:54:25<6:36:19, 1.55it/s] 44%|████▎ | 28620/65536 [4:54:25<6:24:50, 1.60it/s] {'loss': 1.676, 'learning_rate': 6.085159789396351e-07, 'epoch': 1766.67} + 44%|████▎ | 28620/65536 [4:54:25<6:24:50, 1.60it/s] 44%|████▎ | 28621/65536 [4:54:26<6:23:10, 1.61it/s] 44%|████▎ | 28622/65536 [4:54:27<6:17:39, 1.63it/s] 44%|████▎ | 28623/65536 [4:54:27<6:17:06, 1.63it/s] 44%|████▎ | 28624/65536 [4:54:28<6:21:38, 1.61it/s] 44%|████▎ | 28625/65536 [4:54:28<6:14:12, 1.64it/s] 44%|████▎ | 28626/65536 [4:54:29<6:31:25, 1.57it/s] 44%|████▎ | 28627/65536 [4:54:30<6:36:06, 1.55it/s] 44%|████▎ | 28628/65536 [4:54:30<6:33:12, 1.56it/s] 44%|████▎ | 28629/65536 [4:54:31<6:17:24, 1.63it/s] 44%|████▎ | 28630/65536 [4:54:32<6:21:39, 1.61it/s] 44%|████▎ | 28631/65536 [4:54:32<6:18:57, 1.62it/s] 44%|████▎ | 28632/65536 [4:54:33<6:11:53, 1.65it/s] 44%|████▎ | 28633/65536 [4:54:33<6:14:49, 1.64it/s] 44%|████▎ | 28634/65536 [4:54:34<6:12:41, 1.65it/s] 44%|████▎ | 28635/65536 [4:54:35<6:11:55, 1.65it/s] 44%|████▎ | 28636/65536 [4:54:35<6:19:09, 1.62it/s] 44%|████▎ | 28637/65536 [4:54:36<6:14:23, 1.64it/s] 44%|████▎ | 28638/65536 [4:54:37<6:14:30, 1.64it/s] 44%|████▎ | 28639/65536 [4:54:37<6:08:22, 1.67it/s] 44%|████▎ | 28640/65536 [4:54:38<6:05:22, 1.68it/s] {'loss': 1.7063, 'learning_rate': 6.082404799804089e-07, 'epoch': 1767.9} + 44%|████▎ | 28640/65536 [4:54:38<6:05:22, 1.68it/s] 44%|████▎ | 28641/65536 [4:54:38<6:09:12, 1.67it/s] 44%|████▎ | 28642/65536 [4:54:39<6:20:42, 1.62it/s] 44%|████▎ | 28643/65536 [4:54:40<6:12:21, 1.65it/s] 44%|████▎ | 28644/65536 [4:54:40<6:13:31, 1.65it/s] 44%|████▎ | 28645/65536 [4:54:41<6:14:28, 1.64it/s] 44%|████▎ | 28646/65536 [4:54:41<6:12:24, 1.65it/s] 44%|████▎ | 28647/65536 [4:54:42<6:05:24, 1.68it/s] 44%|████▎ | 28648/65536 [4:54:43<6:11:01, 1.66it/s] 44%|████▎ | 28649/65536 [4:54:43<6:20:48, 1.61it/s] 44%|████▎ | 28650/65536 [4:54:44<6:12:32, 1.65it/s] 44%|████▎ | 28651/65536 [4:54:44<6:35:27, 1.55it/s] 44%|████▎ | 28652/65536 [4:54:45<6:28:16, 1.58it/s] 44%|████▎ | 28653/65536 [4:54:46<6:25:46, 1.59it/s] 44%|████▎ | 28654/65536 [4:54:46<6:21:30, 1.61it/s] 44%|████▎ | 28655/65536 [4:54:47<6:17:18, 1.63it/s] 44%|████▎ | 28656/65536 [4:54:47<6:09:20, 1.66it/s] 44%|████▎ | 28657/65536 [4:54:48<6:14:07, 1.64it/s] 44%|████▎ | 28658/65536 [4:54:49<6:24:36, 1.60it/s] 44%|████▎ | 28659/65536 [4:54:49<6:15:55, 1.63it/s] 44%|████▎ | 28660/65536 [4:54:50<6:26:35, 1.59it/s] {'loss': 1.7094, 'learning_rate': 6.079649810211828e-07, 'epoch': 1769.14} + 44%|████▎ | 28660/65536 [4:54:50<6:26:35, 1.59it/s] 44%|████▎ | 28661/65536 [4:54:51<6:13:15, 1.65it/s] 44%|████▎ | 28662/65536 [4:54:51<6:09:19, 1.66it/s] 44%|████▎ | 28663/65536 [4:54:52<6:11:19, 1.66it/s] 44%|████▎ | 28664/65536 [4:54:52<6:18:43, 1.62it/s] 44%|████▎ | 28665/65536 [4:54:53<6:16:22, 1.63it/s] 44%|████▎ | 28666/65536 [4:54:54<6:14:23, 1.64it/s] 44%|████▎ | 28667/65536 [4:54:54<6:12:11, 1.65it/s] 44%|████▎ | 28668/65536 [4:54:55<6:09:26, 1.66it/s] 44%|████▎ | 28669/65536 [4:54:55<6:20:00, 1.62it/s] 44%|████▎ | 28670/65536 [4:54:56<6:15:36, 1.64it/s] 44%|████▎ | 28671/65536 [4:54:57<6:13:21, 1.65it/s] 44%|████▍ | 28672/65536 [4:54:57<6:13:01, 1.65it/s] 44%|████▍ | 28673/65536 [4:54:58<6:26:00, 1.59it/s] 44%|████▍ | 28674/65536 [4:54:59<6:16:30, 1.63it/s] 44%|████▍ | 28675/65536 [4:54:59<6:25:02, 1.60it/s] 44%|████▍ | 28676/65536 [4:55:00<6:14:32, 1.64it/s] 44%|████▍ | 28677/65536 [4:55:00<6:18:21, 1.62it/s] 44%|████▍ | 28678/65536 [4:55:01<6:15:04, 1.64it/s] 44%|████▍ | 28679/65536 [4:55:02<6:18:38, 1.62it/s] 44%|████▍ | 28680/65536 [4:55:02<6:14:02, 1.64it/s] {'loss': 1.7092, 'learning_rate': 6.076894820619566e-07, 'epoch': 1770.37} + 44%|████▍ | 28680/65536 [4:55:02<6:14:02, 1.64it/s] 44%|████▍ | 28681/65536 [4:55:03<6:13:46, 1.64it/s] 44%|████▍ | 28682/65536 [4:55:03<6:17:44, 1.63it/s] 44%|████▍ | 28683/65536 [4:55:04<6:15:27, 1.64it/s] 44%|████▍ | 28684/65536 [4:55:05<6:12:06, 1.65it/s] 44%|████▍ | 28685/65536 [4:55:05<6:11:13, 1.65it/s] 44%|████▍ | 28686/65536 [4:55:06<6:12:37, 1.65it/s] 44%|████▍ | 28687/65536 [4:55:06<6:11:04, 1.66it/s] 44%|████▍ | 28688/65536 [4:55:07<6:03:01, 1.69it/s] 44%|████▍ | 28689/65536 [4:55:08<6:11:16, 1.65it/s] 44%|████▍ | 28690/65536 [4:55:08<6:20:32, 1.61it/s] 44%|████▍ | 28691/65536 [4:55:09<6:38:49, 1.54it/s] 44%|████▍ | 28692/65536 [4:55:10<6:20:45, 1.61it/s] 44%|████▍ | 28693/65536 [4:55:10<6:16:21, 1.63it/s] 44%|████▍ | 28694/65536 [4:55:11<6:11:58, 1.65it/s] 44%|████▍ | 28695/65536 [4:55:11<6:07:06, 1.67it/s] 44%|████▍ | 28696/65536 [4:55:12<6:11:59, 1.65it/s] 44%|████▍ | 28697/65536 [4:55:13<6:17:01, 1.63it/s] 44%|████▍ | 28698/65536 [4:55:13<6:10:58, 1.65it/s] 44%|████▍ | 28699/65536 [4:55:14<6:07:49, 1.67it/s] 44%|████▍ | 28700/65536 [4:55:14<6:10:30, 1.66it/s] {'loss': 1.7185, 'learning_rate': 6.074139831027304e-07, 'epoch': 1771.6} + 44%|████▍ | 28700/65536 [4:55:14<6:10:30, 1.66it/s] 44%|████▍ | 28701/65536 [4:55:15<6:20:49, 1.61it/s] 44%|████▍ | 28702/65536 [4:55:16<6:23:34, 1.60it/s] 44%|████▍ | 28703/65536 [4:55:16<6:17:32, 1.63it/s] 44%|████▍ | 28704/65536 [4:55:17<6:16:04, 1.63it/s] 44%|████▍ | 28705/65536 [4:55:18<6:19:56, 1.62it/s] 44%|████▍ | 28706/65536 [4:55:18<6:25:15, 1.59it/s] 44%|████▍ | 28707/65536 [4:55:19<6:36:46, 1.55it/s] 44%|████▍ | 28708/65536 [4:55:19<6:32:42, 1.56it/s] 44%|████▍ | 28709/65536 [4:55:20<6:28:59, 1.58it/s] 44%|████▍ | 28710/65536 [4:55:21<6:25:21, 1.59it/s] 44%|████▍ | 28711/65536 [4:55:21<6:16:04, 1.63it/s] 44%|████▍ | 28712/65536 [4:55:22<6:18:46, 1.62it/s] 44%|████▍ | 28713/65536 [4:55:23<6:12:57, 1.65it/s] 44%|████▍ | 28714/65536 [4:55:23<6:18:21, 1.62it/s] 44%|████▍ | 28715/65536 [4:55:24<6:06:38, 1.67it/s] 44%|████▍ | 28716/65536 [4:55:24<6:12:33, 1.65it/s] 44%|████▍ | 28717/65536 [4:55:25<6:09:45, 1.66it/s] 44%|████▍ | 28718/65536 [4:55:26<6:12:13, 1.65it/s] 44%|████▍ | 28719/65536 [4:55:26<6:21:04, 1.61it/s] 44%|████▍ | 28720/65536 [4:55:27<6:15:02, 1.64it/s] {'loss': 1.666, 'learning_rate': 6.071384841435042e-07, 'epoch': 1772.84} + 44%|████▍ | 28720/65536 [4:55:27<6:15:02, 1.64it/s] 44%|████▍ | 28721/65536 [4:55:27<6:11:46, 1.65it/s] 44%|████▍ | 28722/65536 [4:55:28<6:07:37, 1.67it/s] 44%|████▍ | 28723/65536 [4:55:29<6:30:56, 1.57it/s] 44%|████▍ | 28724/65536 [4:55:29<6:31:13, 1.57it/s] 44%|████▍ | 28725/65536 [4:55:30<6:25:11, 1.59it/s] 44%|████▍ | 28726/65536 [4:55:31<6:22:01, 1.61it/s] 44%|████▍ | 28727/65536 [4:55:31<6:19:35, 1.62it/s] 44%|████▍ | 28728/65536 [4:55:32<6:15:59, 1.63it/s] 44%|████▍ | 28729/65536 [4:55:32<6:06:21, 1.67it/s] 44%|████▍ | 28730/65536 [4:55:33<6:08:27, 1.66it/s] 44%|████▍ | 28731/65536 [4:55:33<6:02:05, 1.69it/s] 44%|████▍ | 28732/65536 [4:55:34<6:10:28, 1.66it/s] 44%|████▍ | 28733/65536 [4:55:35<6:19:42, 1.62it/s] 44%|████▍ | 28734/65536 [4:55:35<6:26:50, 1.59it/s] 44%|████▍ | 28735/65536 [4:55:36<6:37:39, 1.54it/s] 44%|████▍ | 28736/65536 [4:55:37<6:33:55, 1.56it/s] 44%|████▍ | 28737/65536 [4:55:37<6:24:15, 1.60it/s] 44%|████▍ | 28738/65536 [4:55:38<6:12:17, 1.65it/s] 44%|████▍ | 28739/65536 [4:55:39<6:19:55, 1.61it/s] 44%|████▍ | 28740/65536 [4:55:39<6:09:37, 1.66it/s] {'loss': 1.7145, 'learning_rate': 6.068629851842781e-07, 'epoch': 1774.07} + 44%|████▍ | 28740/65536 [4:55:39<6:09:37, 1.66it/s] 44%|████▍ | 28741/65536 [4:55:40<6:02:10, 1.69it/s] 44%|████▍ | 28742/65536 [4:55:40<6:08:12, 1.67it/s] 44%|████▍ | 28743/65536 [4:55:41<6:07:43, 1.67it/s] 44%|████▍ | 28744/65536 [4:55:42<6:13:28, 1.64it/s] 44%|████▍ | 28745/65536 [4:55:42<6:12:57, 1.64it/s] 44%|████▍ | 28746/65536 [4:55:43<6:04:18, 1.68it/s] 44%|████▍ | 28747/65536 [4:55:43<6:11:52, 1.65it/s] 44%|████▍ | 28748/65536 [4:55:44<6:06:35, 1.67it/s] 44%|████▍ | 28749/65536 [4:55:45<6:09:10, 1.66it/s] 44%|████▍ | 28750/65536 [4:55:45<6:06:34, 1.67it/s] 44%|████▍ | 28751/65536 [4:55:46<6:16:07, 1.63it/s] 44%|████▍ | 28752/65536 [4:55:46<6:15:10, 1.63it/s] 44%|████▍ | 28753/65536 [4:55:47<6:17:58, 1.62it/s] 44%|████▍ | 28754/65536 [4:55:48<6:21:23, 1.61it/s] 44%|████▍ | 28755/65536 [4:55:48<6:23:44, 1.60it/s] 44%|████▍ | 28756/65536 [4:55:49<6:23:33, 1.60it/s] 44%|████▍ | 28757/65536 [4:55:50<6:23:54, 1.60it/s] 44%|████▍ | 28758/65536 [4:55:50<6:17:52, 1.62it/s] 44%|████▍ | 28759/65536 [4:55:51<6:13:38, 1.64it/s] 44%|████▍ | 28760/65536 [4:55:51<6:17:24, 1.62it/s] {'loss': 1.7243, 'learning_rate': 6.06587486225052e-07, 'epoch': 1775.31} + 44%|████▍ | 28760/65536 [4:55:51<6:17:24, 1.62it/s] 44%|████▍ | 28761/65536 [4:55:52<6:11:42, 1.65it/s] 44%|████▍ | 28762/65536 [4:55:53<6:22:30, 1.60it/s] 44%|████▍ | 28763/65536 [4:55:53<6:29:24, 1.57it/s] 44%|████▍ | 28764/65536 [4:55:54<6:23:54, 1.60it/s] 44%|████▍ | 28765/65536 [4:55:54<6:25:59, 1.59it/s] 44%|████▍ | 28766/65536 [4:55:55<6:31:02, 1.57it/s] 44%|████▍ | 28767/65536 [4:55:56<6:23:24, 1.60it/s] 44%|████▍ | 28768/65536 [4:55:56<6:24:21, 1.59it/s] 44%|████▍ | 28769/65536 [4:55:57<6:16:54, 1.63it/s] 44%|████▍ | 28770/65536 [4:55:58<6:13:28, 1.64it/s] 44%|████▍ | 28771/65536 [4:55:58<6:07:45, 1.67it/s] 44%|████▍ | 28772/65536 [4:55:59<6:29:32, 1.57it/s] 44%|████▍ | 28773/65536 [4:55:59<6:28:33, 1.58it/s] 44%|████▍ | 28774/65536 [4:56:00<6:15:09, 1.63it/s] 44%|████▍ | 28775/65536 [4:56:01<6:15:18, 1.63it/s] 44%|████▍ | 28776/65536 [4:56:01<6:11:10, 1.65it/s] 44%|████▍ | 28777/65536 [4:56:02<6:16:06, 1.63it/s] 44%|████▍ | 28778/65536 [4:56:03<6:20:16, 1.61it/s] 44%|████▍ | 28779/65536 [4:56:03<6:15:33, 1.63it/s] 44%|████▍ | 28780/65536 [4:56:04<6:07:23, 1.67it/s] {'loss': 1.7053, 'learning_rate': 6.063119872658258e-07, 'epoch': 1776.54} + 44%|████▍ | 28780/65536 [4:56:04<6:07:23, 1.67it/s] 44%|████▍ | 28781/65536 [4:56:04<6:12:37, 1.64it/s] 44%|████▍ | 28782/65536 [4:56:05<6:14:09, 1.64it/s] 44%|████▍ | 28783/65536 [4:56:06<6:16:20, 1.63it/s] 44%|████▍ | 28784/65536 [4:56:06<6:17:58, 1.62it/s] 44%|████▍ | 28785/65536 [4:56:07<6:13:49, 1.64it/s] 44%|████▍ | 28786/65536 [4:56:07<6:16:56, 1.62it/s] 44%|████▍ | 28787/65536 [4:56:08<6:08:35, 1.66it/s] 44%|████▍ | 28788/65536 [4:56:09<6:28:04, 1.58it/s] 44%|████▍ | 28789/65536 [4:56:09<6:25:04, 1.59it/s] 44%|████▍ | 28790/65536 [4:56:10<6:20:39, 1.61it/s] 44%|████▍ | 28791/65536 [4:56:11<6:21:42, 1.60it/s] 44%|████▍ | 28792/65536 [4:56:11<6:21:48, 1.60it/s] 44%|████▍ | 28793/65536 [4:56:12<6:13:35, 1.64it/s] 44%|████▍ | 28794/65536 [4:56:12<6:13:30, 1.64it/s] 44%|████▍ | 28795/65536 [4:56:13<6:05:37, 1.67it/s] 44%|████▍ | 28796/65536 [4:56:13<6:02:36, 1.69it/s] 44%|████▍ | 28797/65536 [4:56:14<5:57:27, 1.71it/s] 44%|████▍ | 28798/65536 [4:56:15<5:53:11, 1.73it/s] 44%|████▍ | 28799/65536 [4:56:15<5:59:51, 1.70it/s] 44%|████▍ | 28800/65536 [4:56:16<6:03:02, 1.69it/s] {'loss': 1.7349, 'learning_rate': 6.060364883065997e-07, 'epoch': 1777.78} + 44%|████▍ | 28800/65536 [4:56:16<6:03:02, 1.69it/s] 44%|████▍ | 28801/65536 [4:56:16<6:09:05, 1.66it/s] 44%|████▍ | 28802/65536 [4:56:17<6:06:16, 1.67it/s] 44%|████▍ | 28803/65536 [4:56:18<6:23:41, 1.60it/s] 44%|████▍ | 28804/65536 [4:56:18<6:37:21, 1.54it/s] 44%|████▍ | 28805/65536 [4:56:19<6:43:02, 1.52it/s] 44%|████▍ | 28806/65536 [4:56:20<6:35:52, 1.55it/s] 44%|████▍ | 28807/65536 [4:56:20<6:29:04, 1.57it/s] 44%|████▍ | 28808/65536 [4:56:21<6:15:38, 1.63it/s] 44%|████▍ | 28809/65536 [4:56:22<6:23:10, 1.60it/s] 44%|████▍ | 28810/65536 [4:56:22<6:31:42, 1.56it/s] 44%|████▍ | 28811/65536 [4:56:23<6:26:12, 1.58it/s] 44%|████▍ | 28812/65536 [4:56:23<6:23:54, 1.59it/s] 44%|████▍ | 28813/65536 [4:56:24<6:15:25, 1.63it/s] 44%|████▍ | 28814/65536 [4:56:25<6:08:19, 1.66it/s] 44%|████▍ | 28815/65536 [4:56:25<6:11:33, 1.65it/s] 44%|████▍ | 28816/65536 [4:56:26<6:15:19, 1.63it/s] 44%|████▍ | 28817/65536 [4:56:26<6:11:16, 1.65it/s] 44%|████▍ | 28818/65536 [4:56:27<6:06:59, 1.67it/s] 44%|████▍ | 28819/65536 [4:56:28<6:12:21, 1.64it/s] 44%|████▍ | 28820/65536 [4:56:28<6:15:25, 1.63it/s] {'loss': 1.7217, 'learning_rate': 6.057609893473735e-07, 'epoch': 1779.01} + 44%|████▍ | 28820/65536 [4:56:28<6:15:25, 1.63it/s] 44%|████▍ | 28821/65536 [4:56:29<6:22:00, 1.60it/s] 44%|████▍ | 28822/65536 [4:56:30<6:18:58, 1.61it/s] 44%|████▍ | 28823/65536 [4:56:30<6:19:36, 1.61it/s] 44%|████▍ | 28824/65536 [4:56:31<6:21:30, 1.60it/s] 44%|████▍ | 28825/65536 [4:56:31<6:18:35, 1.62it/s] 44%|████▍ | 28826/65536 [4:56:32<6:15:47, 1.63it/s] 44%|████▍ | 28827/65536 [4:56:33<6:11:49, 1.65it/s] 44%|████▍ | 28828/65536 [4:56:33<6:03:34, 1.68it/s] 44%|████▍ | 28829/65536 [4:56:34<6:06:34, 1.67it/s] 44%|████▍ | 28830/65536 [4:56:34<6:07:39, 1.66it/s] 44%|████▍ | 28831/65536 [4:56:35<6:16:25, 1.63it/s] 44%|████▍ | 28832/65536 [4:56:36<6:15:13, 1.63it/s] 44%|████▍ | 28833/65536 [4:56:36<6:21:15, 1.60it/s] 44%|████▍ | 28834/65536 [4:56:37<6:10:51, 1.65it/s] 44%|████▍ | 28835/65536 [4:56:37<6:05:23, 1.67it/s] 44%|████▍ | 28836/65536 [4:56:38<6:16:55, 1.62it/s] 44%|████▍ | 28837/65536 [4:56:39<6:36:01, 1.54it/s] 44%|████▍ | 28838/65536 [4:56:39<6:35:26, 1.55it/s] 44%|████▍ | 28839/65536 [4:56:40<6:32:25, 1.56it/s] 44%|████▍ | 28840/65536 [4:56:41<6:23:06, 1.60it/s] {'loss': 1.6761, 'learning_rate': 6.054854903881474e-07, 'epoch': 1780.25} + 44%|████▍ | 28840/65536 [4:56:41<6:23:06, 1.60it/s] 44%|████▍ | 28841/65536 [4:56:41<6:25:39, 1.59it/s] 44%|████▍ | 28842/65536 [4:56:42<6:17:06, 1.62it/s] 44%|████▍ | 28843/65536 [4:56:43<6:19:45, 1.61it/s] 44%|████▍ | 28844/65536 [4:56:43<6:13:16, 1.64it/s] 44%|████▍ | 28845/65536 [4:56:44<6:17:34, 1.62it/s] 44%|████▍ | 28846/65536 [4:56:44<6:10:40, 1.65it/s] 44%|████▍ | 28847/65536 [4:56:45<6:12:58, 1.64it/s] 44%|████▍ | 28848/65536 [4:56:46<6:19:00, 1.61it/s] 44%|████▍ | 28849/65536 [4:56:46<6:07:32, 1.66it/s] 44%|████▍ | 28850/65536 [4:56:47<6:10:16, 1.65it/s] 44%|████▍ | 28851/65536 [4:56:47<6:09:10, 1.66it/s] 44%|████▍ | 28852/65536 [4:56:48<6:09:57, 1.65it/s] 44%|████▍ | 28853/65536 [4:56:49<6:28:51, 1.57it/s] 44%|████▍ | 28854/65536 [4:56:49<6:20:45, 1.61it/s] 44%|████▍ | 28855/65536 [4:56:50<6:23:42, 1.59it/s] 44%|████▍ | 28856/65536 [4:56:51<6:20:03, 1.61it/s] 44%|████▍ | 28857/65536 [4:56:51<6:30:23, 1.57it/s] 44%|████▍ | 28858/65536 [4:56:52<6:18:30, 1.62it/s] 44%|████▍ | 28859/65536 [4:56:52<6:13:33, 1.64it/s] 44%|████▍ | 28860/65536 [4:56:53<6:17:19, 1.62it/s] {'loss': 1.6951, 'learning_rate': 6.052099914289213e-07, 'epoch': 1781.48} + 44%|████▍ | 28860/65536 [4:56:53<6:17:19, 1.62it/s] 44%|████▍ | 28861/65536 [4:56:54<6:21:35, 1.60it/s] 44%|████▍ | 28862/65536 [4:56:54<6:21:32, 1.60it/s] 44%|████▍ | 28863/65536 [4:56:55<6:21:02, 1.60it/s] 44%|████▍ | 28864/65536 [4:56:55<6:17:30, 1.62it/s] 44%|████▍ | 28865/65536 [4:56:56<6:14:12, 1.63it/s] 44%|████▍ | 28866/65536 [4:56:57<6:13:17, 1.64it/s] 44%|████▍ | 28867/65536 [4:56:57<6:09:30, 1.65it/s] 44%|████▍ | 28868/65536 [4:56:58<6:05:46, 1.67it/s] 44%|████▍ | 28869/65536 [4:56:59<6:24:52, 1.59it/s] 44%|████▍ | 28870/65536 [4:56:59<6:20:39, 1.61it/s] 44%|████▍ | 28871/65536 [4:57:00<6:26:11, 1.58it/s] 44%|████▍ | 28872/65536 [4:57:00<6:27:27, 1.58it/s] 44%|████▍ | 28873/65536 [4:57:01<6:29:22, 1.57it/s] 44%|████▍ | 28874/65536 [4:57:02<6:21:19, 1.60it/s] 44%|████▍ | 28875/65536 [4:57:02<6:31:41, 1.56it/s] 44%|████▍ | 28876/65536 [4:57:03<6:36:45, 1.54it/s] 44%|████▍ | 28877/65536 [4:57:04<6:28:26, 1.57it/s] 44%|████▍ | 28878/65536 [4:57:04<6:24:13, 1.59it/s] 44%|████▍ | 28879/65536 [4:57:05<6:14:16, 1.63it/s] 44%|████▍ | 28880/65536 [4:57:05<6:11:01, 1.65it/s] {'loss': 1.7104, 'learning_rate': 6.049344924696951e-07, 'epoch': 1782.72} + 44%|████▍ | 28880/65536 [4:57:05<6:11:01, 1.65it/s] 44%|████▍ | 28881/65536 [4:57:06<6:15:35, 1.63it/s] 44%|████▍ | 28882/65536 [4:57:07<6:11:24, 1.64it/s] 44%|████▍ | 28883/65536 [4:57:07<6:07:46, 1.66it/s] 44%|████▍ | 28884/65536 [4:57:08<6:17:56, 1.62it/s] 44%|████▍ | 28885/65536 [4:57:09<6:30:44, 1.56it/s] 44%|████▍ | 28886/65536 [4:57:09<6:26:26, 1.58it/s] 44%|████▍ | 28887/65536 [4:57:10<6:19:32, 1.61it/s] 44%|████▍ | 28888/65536 [4:57:10<6:17:35, 1.62it/s] 44%|████▍ | 28889/65536 [4:57:11<6:22:45, 1.60it/s] 44%|████▍ | 28890/65536 [4:57:12<6:13:39, 1.63it/s] 44%|████▍ | 28891/65536 [4:57:12<6:23:53, 1.59it/s] 44%|████▍ | 28892/65536 [4:57:13<6:18:39, 1.61it/s] 44%|████▍ | 28893/65536 [4:57:14<6:30:37, 1.56it/s] 44%|████▍ | 28894/65536 [4:57:14<6:20:23, 1.61it/s] 44%|████▍ | 28895/65536 [4:57:15<6:17:11, 1.62it/s] 44%|████▍ | 28896/65536 [4:57:15<6:19:24, 1.61it/s] 44%|████▍ | 28897/65536 [4:57:16<6:23:00, 1.59it/s] 44%|████▍ | 28898/65536 [4:57:17<6:15:49, 1.62it/s] 44%|████▍ | 28899/65536 [4:57:17<6:07:44, 1.66it/s] 44%|████▍ | 28900/65536 [4:57:18<6:05:29, 1.67it/s] {'loss': 1.6784, 'learning_rate': 6.04658993510469e-07, 'epoch': 1783.95} + 44%|████▍ | 28900/65536 [4:57:18<6:05:29, 1.67it/s] 44%|████▍ | 28901/65536 [4:57:18<6:20:43, 1.60it/s] 44%|████▍ | 28902/65536 [4:57:19<6:16:16, 1.62it/s] 44%|████▍ | 28903/65536 [4:57:20<6:16:39, 1.62it/s] 44%|████▍ | 28904/65536 [4:57:20<6:10:44, 1.65it/s] 44%|████▍ | 28905/65536 [4:57:21<6:12:21, 1.64it/s] 44%|████▍ | 28906/65536 [4:57:22<6:12:52, 1.64it/s] 44%|████▍ | 28907/65536 [4:57:22<6:11:05, 1.65it/s] 44%|████▍ | 28908/65536 [4:57:23<6:05:58, 1.67it/s] 44%|████▍ | 28909/65536 [4:57:23<5:59:17, 1.70it/s] 44%|████▍ | 28910/65536 [4:57:24<6:04:53, 1.67it/s] 44%|████▍ | 28911/65536 [4:57:24<6:03:25, 1.68it/s] 44%|████▍ | 28912/65536 [4:57:25<6:09:32, 1.65it/s] 44%|████▍ | 28913/65536 [4:57:26<6:16:31, 1.62it/s] 44%|████▍ | 28914/65536 [4:57:26<6:19:34, 1.61it/s] 44%|████▍ | 28915/65536 [4:57:27<6:28:28, 1.57it/s] 44%|████▍ | 28916/65536 [4:57:28<6:27:16, 1.58it/s] 44%|████▍ | 28917/65536 [4:57:28<6:21:16, 1.60it/s] 44%|████▍ | 28918/65536 [4:57:29<6:28:33, 1.57it/s] 44%|████▍ | 28919/65536 [4:57:30<6:25:47, 1.58it/s] 44%|████▍ | 28920/65536 [4:57:30<6:28:33, 1.57it/s] {'loss': 1.6877, 'learning_rate': 6.043834945512428e-07, 'epoch': 1785.19} + 44%|████▍ | 28920/65536 [4:57:30<6:28:33, 1.57it/s] 44%|████▍ | 28921/65536 [4:57:31<6:21:44, 1.60it/s] 44%|████▍ | 28922/65536 [4:57:31<6:30:08, 1.56it/s] 44%|████▍ | 28923/65536 [4:57:32<6:19:35, 1.61it/s] 44%|████▍ | 28924/65536 [4:57:33<6:13:08, 1.64it/s] 44%|████▍ | 28925/65536 [4:57:33<6:08:14, 1.66it/s] 44%|████▍ | 28926/65536 [4:57:34<6:06:19, 1.67it/s] 44%|████▍ | 28927/65536 [4:57:34<6:07:53, 1.66it/s] 44%|████▍ | 28928/65536 [4:57:35<6:08:18, 1.66it/s] 44%|████▍ | 28929/65536 [4:57:36<6:14:28, 1.63it/s] 44%|████▍ | 28930/65536 [4:57:36<6:14:09, 1.63it/s] 44%|████▍ | 28931/65536 [4:57:37<6:11:50, 1.64it/s] 44%|████▍ | 28932/65536 [4:57:37<6:02:52, 1.68it/s] 44%|████▍ | 28933/65536 [4:57:38<6:09:19, 1.65it/s] 44%|████▍ | 28934/65536 [4:57:39<6:25:21, 1.58it/s] 44%|████▍ | 28935/65536 [4:57:39<6:17:58, 1.61it/s] 44%|████▍ | 28936/65536 [4:57:40<6:11:11, 1.64it/s] 44%|████▍ | 28937/65536 [4:57:41<6:06:51, 1.66it/s] 44%|████▍ | 28938/65536 [4:57:41<6:04:00, 1.68it/s] 44%|████▍ | 28939/65536 [4:57:42<6:09:48, 1.65it/s] 44%|████▍ | 28940/65536 [4:57:42<6:18:36, 1.61it/s] {'loss': 1.6964, 'learning_rate': 6.041079955920167e-07, 'epoch': 1786.42} + 44%|████▍ | 28940/65536 [4:57:42<6:18:36, 1.61it/s] 44%|████▍ | 28941/65536 [4:57:43<6:20:37, 1.60it/s] 44%|████▍ | 28942/65536 [4:57:44<6:10:29, 1.65it/s] 44%|████▍ | 28943/65536 [4:57:44<6:14:47, 1.63it/s] 44%|████▍ | 28944/65536 [4:57:45<6:15:21, 1.62it/s] 44%|████▍ | 28945/65536 [4:57:45<6:17:43, 1.61it/s] 44%|████▍ | 28946/65536 [4:57:46<6:19:21, 1.61it/s] 44%|████▍ | 28947/65536 [4:57:47<6:22:48, 1.59it/s] 44%|████▍ | 28948/65536 [4:57:47<6:19:01, 1.61it/s] 44%|████▍ | 28949/65536 [4:57:48<6:20:56, 1.60it/s] 44%|████▍ | 28950/65536 [4:57:49<6:34:28, 1.55it/s] 44%|████▍ | 28951/65536 [4:57:49<6:29:29, 1.57it/s] 44%|████▍ | 28952/65536 [4:57:50<6:24:37, 1.59it/s] 44%|████▍ | 28953/65536 [4:57:51<6:18:18, 1.61it/s] 44%|████▍ | 28954/65536 [4:57:51<6:16:26, 1.62it/s] 44%|████▍ | 28955/65536 [4:57:52<6:28:18, 1.57it/s] 44%|████▍ | 28956/65536 [4:57:52<6:23:56, 1.59it/s] 44%|████▍ | 28957/65536 [4:57:53<6:16:51, 1.62it/s] 44%|████▍ | 28958/65536 [4:57:54<6:07:58, 1.66it/s] 44%|████▍ | 28959/65536 [4:57:54<6:19:55, 1.60it/s] 44%|████▍ | 28960/65536 [4:57:55<6:17:41, 1.61it/s] {'loss': 1.6788, 'learning_rate': 6.038324966327904e-07, 'epoch': 1787.65} + 44%|████▍ | 28960/65536 [4:57:55<6:17:41, 1.61it/s] 44%|████▍ | 28961/65536 [4:57:55<6:18:06, 1.61it/s] 44%|████▍ | 28962/65536 [4:57:56<6:17:31, 1.61it/s] 44%|████▍ | 28963/65536 [4:57:57<6:18:02, 1.61it/s] 44%|████▍ | 28964/65536 [4:57:57<6:21:53, 1.60it/s] 44%|████▍ | 28965/65536 [4:57:58<6:23:07, 1.59it/s] 44%|████▍ | 28966/65536 [4:57:59<6:39:11, 1.53it/s] 44%|████▍ | 28967/65536 [4:57:59<6:31:34, 1.56it/s] 44%|████▍ | 28968/65536 [4:58:00<6:27:29, 1.57it/s] 44%|████▍ | 28969/65536 [4:58:01<6:22:21, 1.59it/s] 44%|████▍ | 28970/65536 [4:58:01<6:29:07, 1.57it/s] 44%|████▍ | 28971/65536 [4:58:02<6:28:27, 1.57it/s] 44%|████▍ | 28972/65536 [4:58:02<6:24:55, 1.58it/s] 44%|████▍ | 28973/65536 [4:58:03<6:15:46, 1.62it/s] 44%|████▍ | 28974/65536 [4:58:04<6:04:37, 1.67it/s] 44%|████▍ | 28975/65536 [4:58:04<6:03:05, 1.68it/s] 44%|████▍ | 28976/65536 [4:58:05<5:57:56, 1.70it/s] 44%|████▍ | 28977/65536 [4:58:05<6:11:04, 1.64it/s] 44%|████▍ | 28978/65536 [4:58:06<6:18:22, 1.61it/s] 44%|████▍ | 28979/65536 [4:58:07<6:16:25, 1.62it/s] 44%|████▍ | 28980/65536 [4:58:07<6:06:28, 1.66it/s] {'loss': 1.6828, 'learning_rate': 6.035569976735643e-07, 'epoch': 1788.89} + 44%|████▍ | 28980/65536 [4:58:07<6:06:28, 1.66it/s] 44%|████▍ | 28981/65536 [4:58:08<6:10:58, 1.64it/s] 44%|████▍ | 28982/65536 [4:58:09<6:17:38, 1.61it/s] 44%|████▍ | 28983/65536 [4:58:09<6:16:56, 1.62it/s] 44%|████▍ | 28984/65536 [4:58:10<6:09:05, 1.65it/s] 44%|████▍ | 28985/65536 [4:58:10<6:07:56, 1.66it/s] 44%|████▍ | 28986/65536 [4:58:11<6:09:33, 1.65it/s] 44%|████▍ | 28987/65536 [4:58:12<6:11:13, 1.64it/s] 44%|████▍ | 28988/65536 [4:58:12<6:05:09, 1.67it/s] 44%|████▍ | 28989/65536 [4:58:13<6:10:01, 1.65it/s] 44%|████▍ | 28990/65536 [4:58:13<6:18:11, 1.61it/s] 44%|████▍ | 28991/65536 [4:58:14<6:22:28, 1.59it/s] 44%|████▍ | 28992/65536 [4:58:15<6:32:00, 1.55it/s] 44%|████▍ | 28993/65536 [4:58:15<6:32:56, 1.55it/s] 44%|████▍ | 28994/65536 [4:58:16<6:29:14, 1.56it/s] 44%|████▍ | 28995/65536 [4:58:17<6:15:37, 1.62it/s] 44%|████▍ | 28996/65536 [4:58:17<6:19:15, 1.61it/s] 44%|████▍ | 28997/65536 [4:58:18<6:16:43, 1.62it/s] 44%|████▍ | 28998/65536 [4:58:18<6:12:25, 1.64it/s] 44%|████▍ | 28999/65536 [4:58:19<6:24:20, 1.58it/s] 44%|████▍ | 29000/65536 [4:58:20<6:17:23, 1.61it/s] {'loss': 1.6915, 'learning_rate': 6.032814987143382e-07, 'epoch': 1790.12} + 44%|████▍ | 29000/65536 [4:58:20<6:17:23, 1.61it/s] 44%|████▍ | 29001/65536 [4:58:20<6:18:25, 1.61it/s] 44%|████▍ | 29002/65536 [4:58:21<6:11:25, 1.64it/s] 44%|████▍ | 29003/65536 [4:58:22<6:21:07, 1.60it/s] 44%|████▍ | 29004/65536 [4:58:22<6:12:25, 1.63it/s] 44%|████▍ | 29005/65536 [4:58:23<6:20:38, 1.60it/s] 44%|████▍ | 29006/65536 [4:58:23<6:13:09, 1.63it/s] 44%|████▍ | 29007/65536 [4:58:24<6:07:24, 1.66it/s] 44%|████▍ | 29008/65536 [4:58:25<6:11:11, 1.64it/s] 44%|████▍ | 29009/65536 [4:58:25<6:16:04, 1.62it/s] 44%|████▍ | 29010/65536 [4:58:26<6:16:31, 1.62it/s] 44%|████▍ | 29011/65536 [4:58:26<6:19:17, 1.60it/s] 44%|████▍ | 29012/65536 [4:58:27<6:18:30, 1.61it/s] 44%|████▍ | 29013/65536 [4:58:28<6:20:30, 1.60it/s] 44%|████▍ | 29014/65536 [4:58:28<6:28:40, 1.57it/s] 44%|████▍ | 29015/65536 [4:58:29<6:35:30, 1.54it/s] 44%|████▍ | 29016/65536 [4:58:30<6:30:44, 1.56it/s] 44%|████▍ | 29017/65536 [4:58:30<6:20:47, 1.60it/s] 44%|████▍ | 29018/65536 [4:58:31<6:11:24, 1.64it/s] 44%|████▍ | 29019/65536 [4:58:32<6:19:43, 1.60it/s] 44%|████▍ | 29020/65536 [4:58:32<6:25:44, 1.58it/s] {'loss': 1.6709, 'learning_rate': 6.03005999755112e-07, 'epoch': 1791.36} + 44%|████▍ | 29020/65536 [4:58:32<6:25:44, 1.58it/s] 44%|████▍ | 29021/65536 [4:58:33<6:23:40, 1.59it/s] 44%|████▍ | 29022/65536 [4:58:33<6:24:43, 1.58it/s] 44%|████▍ | 29023/65536 [4:58:34<6:21:54, 1.59it/s] 44%|████▍ | 29024/65536 [4:58:35<6:19:04, 1.61it/s] 44%|████▍ | 29025/65536 [4:58:35<6:09:17, 1.65it/s] 44%|████▍ | 29026/65536 [4:58:36<6:17:40, 1.61it/s] 44%|████▍ | 29027/65536 [4:58:36<6:11:00, 1.64it/s] 44%|████▍ | 29028/65536 [4:58:37<6:16:13, 1.62it/s] 44%|████▍ | 29029/65536 [4:58:38<6:21:01, 1.60it/s] 44%|████▍ | 29030/65536 [4:58:38<6:16:29, 1.62it/s] 44%|████▍ | 29031/65536 [4:58:39<6:20:22, 1.60it/s] 44%|████▍ | 29032/65536 [4:58:40<6:21:35, 1.59it/s] 44%|████▍ | 29033/65536 [4:58:40<6:09:30, 1.65it/s] 44%|████▍ | 29034/65536 [4:58:41<6:16:08, 1.62it/s] 44%|████▍ | 29035/65536 [4:58:41<6:19:59, 1.60it/s] 44%|████▍ | 29036/65536 [4:58:42<6:09:52, 1.64it/s] 44%|████▍ | 29037/65536 [4:58:43<6:05:28, 1.66it/s] 44%|████▍ | 29038/65536 [4:58:43<6:03:25, 1.67it/s] 44%|████▍ | 29039/65536 [4:58:44<6:08:45, 1.65it/s] 44%|████▍ | 29040/65536 [4:58:44<6:07:41, 1.65it/s] {'loss': 1.7406, 'learning_rate': 6.027305007958859e-07, 'epoch': 1792.59} + 44%|████▍ | 29040/65536 [4:58:44<6:07:41, 1.65it/s] 44%|████▍ | 29041/65536 [4:58:45<6:09:26, 1.65it/s] 44%|████▍ | 29042/65536 [4:58:46<6:08:58, 1.65it/s] 44%|████▍ | 29043/65536 [4:58:46<6:25:41, 1.58it/s] 44%|████▍ | 29044/65536 [4:58:47<6:30:13, 1.56it/s] 44%|████▍ | 29045/65536 [4:58:48<6:26:01, 1.58it/s] 44%|████▍ | 29046/65536 [4:58:48<6:19:00, 1.60it/s] 44%|████▍ | 29047/65536 [4:58:49<6:21:45, 1.59it/s] 44%|████▍ | 29048/65536 [4:58:50<6:28:23, 1.57it/s] 44%|████▍ | 29049/65536 [4:58:50<6:23:15, 1.59it/s] 44%|████▍ | 29050/65536 [4:58:51<6:17:59, 1.61it/s] 44%|████▍ | 29051/65536 [4:58:51<6:17:41, 1.61it/s] 44%|████▍ | 29052/65536 [4:58:52<6:19:55, 1.60it/s] 44%|████▍ | 29053/65536 [4:58:53<6:20:11, 1.60it/s] 44%|████▍ | 29054/65536 [4:58:53<6:10:34, 1.64it/s] 44%|████▍ | 29055/65536 [4:58:54<6:12:58, 1.63it/s] 44%|████▍ | 29056/65536 [4:58:54<6:14:51, 1.62it/s] 44%|████▍ | 29057/65536 [4:58:55<6:13:12, 1.63it/s] 44%|████▍ | 29058/65536 [4:58:56<6:09:42, 1.64it/s] 44%|████▍ | 29059/65536 [4:58:56<6:05:45, 1.66it/s] 44%|████▍ | 29060/65536 [4:58:57<5:58:36, 1.70it/s] {'loss': 1.6912, 'learning_rate': 6.024550018366597e-07, 'epoch': 1793.83} + 44%|████▍ | 29060/65536 [4:58:57<5:58:36, 1.70it/s] 44%|████▍ | 29061/65536 [4:58:57<6:09:25, 1.65it/s] 44%|████▍ | 29062/65536 [4:58:58<6:20:41, 1.60it/s] 44%|████▍ | 29063/65536 [4:58:59<6:28:00, 1.57it/s] 44%|████▍ | 29064/65536 [4:58:59<6:14:05, 1.62it/s] 44%|████▍ | 29065/65536 [4:59:00<6:10:24, 1.64it/s] 44%|████▍ | 29066/65536 [4:59:01<6:14:30, 1.62it/s] 44%|████▍ | 29067/65536 [4:59:01<6:08:20, 1.65it/s] 44%|████▍ | 29068/65536 [4:59:02<6:03:36, 1.67it/s] 44%|████▍ | 29069/65536 [4:59:02<6:01:41, 1.68it/s] 44%|████▍ | 29070/65536 [4:59:03<5:58:22, 1.70it/s] 44%|████▍ | 29071/65536 [4:59:03<5:59:00, 1.69it/s] 44%|████▍ | 29072/65536 [4:59:04<6:06:28, 1.66it/s] 44%|████▍ | 29073/65536 [4:59:05<6:06:58, 1.66it/s] 44%|████▍ | 29074/65536 [4:59:05<6:12:37, 1.63it/s] 44%|████▍ | 29075/65536 [4:59:06<6:23:41, 1.58it/s] 44%|████▍ | 29076/65536 [4:59:07<6:19:10, 1.60it/s] 44%|████▍ | 29077/65536 [4:59:07<6:13:33, 1.63it/s] 44%|████▍ | 29078/65536 [4:59:08<6:23:08, 1.59it/s] 44%|████▍ | 29079/65536 [4:59:09<6:21:39, 1.59it/s] 44%|████▍ | 29080/65536 [4:59:09<6:27:01, 1.57it/s] {'loss': 1.6927, 'learning_rate': 6.021795028774336e-07, 'epoch': 1795.06} + 44%|████▍ | 29080/65536 [4:59:09<6:27:01, 1.57it/s] 44%|████▍ | 29081/65536 [4:59:10<6:42:15, 1.51it/s] 44%|████▍ | 29082/65536 [4:59:11<6:33:02, 1.55it/s] 44%|████▍ | 29083/65536 [4:59:11<6:21:45, 1.59it/s] 44%|████▍ | 29084/65536 [4:59:12<6:21:05, 1.59it/s] 44%|████▍ | 29085/65536 [4:59:12<6:17:55, 1.61it/s] 44%|████▍ | 29086/65536 [4:59:13<6:12:59, 1.63it/s] 44%|████▍ | 29087/65536 [4:59:13<6:07:03, 1.65it/s] 44%|████▍ | 29088/65536 [4:59:14<6:07:52, 1.65it/s] 44%|████▍ | 29089/65536 [4:59:15<6:04:48, 1.67it/s] 44%|████▍ | 29090/65536 [4:59:15<6:09:12, 1.65it/s] 44%|████▍ | 29091/65536 [4:59:16<6:04:07, 1.67it/s] 44%|████▍ | 29092/65536 [4:59:16<5:57:46, 1.70it/s] 44%|████▍ | 29093/65536 [4:59:17<6:03:33, 1.67it/s] 44%|████▍ | 29094/65536 [4:59:18<6:03:01, 1.67it/s] 44%|████▍ | 29095/65536 [4:59:18<6:01:18, 1.68it/s] 44%|████▍ | 29096/65536 [4:59:19<6:14:26, 1.62it/s] 44%|████▍ | 29097/65536 [4:59:20<6:21:32, 1.59it/s] 44%|████▍ | 29098/65536 [4:59:20<6:25:24, 1.58it/s] 44%|████▍ | 29099/65536 [4:59:21<6:19:05, 1.60it/s] 44%|████▍ | 29100/65536 [4:59:21<6:24:02, 1.58it/s] {'loss': 1.725, 'learning_rate': 6.019040039182075e-07, 'epoch': 1796.3} + 44%|████▍ | 29100/65536 [4:59:21<6:24:02, 1.58it/s] 44%|████▍ | 29101/65536 [4:59:22<6:22:48, 1.59it/s] 44%|████▍ | 29102/65536 [4:59:23<6:17:03, 1.61it/s] 44%|████▍ | 29103/65536 [4:59:23<6:10:43, 1.64it/s] 44%|████▍ | 29104/65536 [4:59:24<6:06:27, 1.66it/s] 44%|████▍ | 29105/65536 [4:59:25<6:08:12, 1.65it/s] 44%|████▍ | 29106/65536 [4:59:25<6:09:30, 1.64it/s] 44%|████▍ | 29107/65536 [4:59:26<6:02:15, 1.68it/s] 44%|████▍ | 29108/65536 [4:59:26<6:03:08, 1.67it/s] 44%|████▍ | 29109/65536 [4:59:27<6:00:33, 1.68it/s] 44%|████▍ | 29110/65536 [4:59:28<6:12:59, 1.63it/s] 44%|████▍ | 29111/65536 [4:59:28<6:10:40, 1.64it/s] 44%|████▍ | 29112/65536 [4:59:29<6:24:40, 1.58it/s] 44%|████▍ | 29113/65536 [4:59:29<6:24:11, 1.58it/s] 44%|████▍ | 29114/65536 [4:59:30<6:22:31, 1.59it/s] 44%|████▍ | 29115/65536 [4:59:31<6:19:23, 1.60it/s] 44%|████▍ | 29116/65536 [4:59:31<6:32:35, 1.55it/s] 44%|████▍ | 29117/65536 [4:59:32<6:20:14, 1.60it/s] 44%|████▍ | 29118/65536 [4:59:33<6:28:09, 1.56it/s] 44%|████▍ | 29119/65536 [4:59:33<6:27:51, 1.56it/s] 44%|████▍ | 29120/65536 [4:59:34<6:19:02, 1.60it/s] {'loss': 1.6578, 'learning_rate': 6.016285049589813e-07, 'epoch': 1797.53} + 44%|████▍ | 29120/65536 [4:59:34<6:19:02, 1.60it/s] 44%|████▍ | 29121/65536 [4:59:34<6:12:03, 1.63it/s] 44%|████▍ | 29122/65536 [4:59:35<6:08:11, 1.65it/s] 44%|████▍ | 29123/65536 [4:59:36<6:12:38, 1.63it/s] 44%|████▍ | 29124/65536 [4:59:36<6:07:39, 1.65it/s] 44%|████▍ | 29125/65536 [4:59:37<6:02:27, 1.67it/s] 44%|████▍ | 29126/65536 [4:59:37<5:58:50, 1.69it/s] 44%|████▍ | 29127/65536 [4:59:38<5:56:02, 1.70it/s] 44%|████▍ | 29128/65536 [4:59:39<6:13:44, 1.62it/s] 44%|████▍ | 29129/65536 [4:59:39<6:09:56, 1.64it/s] 44%|████▍ | 29130/65536 [4:59:40<6:14:44, 1.62it/s] 44%|████▍ | 29131/65536 [4:59:41<6:13:03, 1.63it/s] 44%|████▍ | 29132/65536 [4:59:41<6:09:50, 1.64it/s] 44%|████▍ | 29133/65536 [4:59:42<6:15:29, 1.62it/s] 44%|████▍ | 29134/65536 [4:59:42<6:09:56, 1.64it/s] 44%|████▍ | 29135/65536 [4:59:43<6:15:10, 1.62it/s] 44%|████▍ | 29136/65536 [4:59:44<6:06:21, 1.66it/s] 44%|████▍ | 29137/65536 [4:59:44<6:15:22, 1.62it/s] 44%|████▍ | 29138/65536 [4:59:45<6:11:50, 1.63it/s] 44%|████▍ | 29139/65536 [4:59:45<6:08:17, 1.65it/s] 44%|████▍ | 29140/65536 [4:59:46<6:09:14, 1.64it/s] {'loss': 1.7392, 'learning_rate': 6.013530059997552e-07, 'epoch': 1798.77} + 44%|████▍ | 29140/65536 [4:59:46<6:09:14, 1.64it/s] 44%|████▍ | 29141/65536 [4:59:47<6:10:31, 1.64it/s] 44%|████▍ | 29142/65536 [4:59:47<6:02:41, 1.67it/s] 44%|████▍ | 29143/65536 [4:59:48<6:03:30, 1.67it/s] 44%|████▍ | 29144/65536 [4:59:48<6:14:20, 1.62it/s] 44%|████▍ | 29145/65536 [4:59:49<6:19:43, 1.60it/s] 44%|████▍ | 29146/65536 [4:59:50<6:08:50, 1.64it/s] 44%|████▍ | 29147/65536 [4:59:50<6:10:42, 1.64it/s] 44%|████▍ | 29148/65536 [4:59:51<6:12:02, 1.63it/s] 44%|████▍ | 29149/65536 [4:59:52<6:16:46, 1.61it/s] 44%|████▍ | 29150/65536 [4:59:52<6:12:27, 1.63it/s] 44%|████▍ | 29151/65536 [4:59:53<6:15:00, 1.62it/s] 44%|████▍ | 29152/65536 [4:59:53<6:18:25, 1.60it/s] 44%|████▍ | 29153/65536 [4:59:54<6:22:23, 1.59it/s] 44%|████▍ | 29154/65536 [4:59:55<6:23:07, 1.58it/s] 44%|████▍ | 29155/65536 [4:59:55<6:15:36, 1.61it/s] 44%|████▍ | 29156/65536 [4:59:56<6:13:11, 1.62it/s] 44%|████▍ | 29157/65536 [4:59:57<6:17:49, 1.60it/s] 44%|████▍ | 29158/65536 [4:59:57<6:11:20, 1.63it/s] 44%|████▍ | 29159/65536 [4:59:58<6:10:05, 1.64it/s] 44%|████▍ | 29160/65536 [4:59:58<6:04:18, 1.66it/s] {'loss': 1.6798, 'learning_rate': 6.010775070405289e-07, 'epoch': 1800.0} + 44%|████▍ | 29160/65536 [4:59:58<6:04:18, 1.66it/s] 44%|████▍ | 29161/65536 [4:59:59<6:12:26, 1.63it/s] 44%|████▍ | 29162/65536 [5:00:00<6:11:51, 1.63it/s] 44%|████▍ | 29163/65536 [5:00:00<6:15:54, 1.61it/s] 45%|████▍ | 29164/65536 [5:00:01<6:11:26, 1.63it/s] 45%|████▍ | 29165/65536 [5:00:01<6:13:33, 1.62it/s] 45%|████▍ | 29166/65536 [5:00:02<6:09:42, 1.64it/s] 45%|████▍ | 29167/65536 [5:00:03<6:16:12, 1.61it/s] 45%|████▍ | 29168/65536 [5:00:03<6:13:32, 1.62it/s] 45%|████▍ | 29169/65536 [5:00:04<6:12:33, 1.63it/s] 45%|████▍ | 29170/65536 [5:00:05<6:28:11, 1.56it/s] 45%|████▍ | 29171/65536 [5:00:05<6:24:23, 1.58it/s] 45%|████▍ | 29172/65536 [5:00:06<6:19:34, 1.60it/s] 45%|████▍ | 29173/65536 [5:00:06<6:16:14, 1.61it/s] 45%|████▍ | 29174/65536 [5:00:07<6:15:21, 1.61it/s] 45%|████▍ | 29175/65536 [5:00:08<6:08:44, 1.64it/s] 45%|████▍ | 29176/65536 [5:00:08<6:04:39, 1.66it/s] 45%|████▍ | 29177/65536 [5:00:09<6:14:31, 1.62it/s] 45%|████▍ | 29178/65536 [5:00:10<6:21:46, 1.59it/s] 45%|████▍ | 29179/65536 [5:00:10<6:15:08, 1.62it/s] 45%|████▍ | 29180/65536 [5:00:11<6:16:21, 1.61it/s] {'loss': 1.6791, 'learning_rate': 6.008020080813028e-07, 'epoch': 1801.23} + 45%|████▍ | 29180/65536 [5:00:11<6:16:21, 1.61it/s] 45%|████▍ | 29181/65536 [5:00:11<6:11:54, 1.63it/s] 45%|████▍ | 29182/65536 [5:00:12<6:11:55, 1.63it/s] 45%|████▍ | 29183/65536 [5:00:13<6:06:08, 1.65it/s] 45%|████▍ | 29184/65536 [5:00:13<6:12:16, 1.63it/s] 45%|████▍ | 29185/65536 [5:00:14<6:07:17, 1.65it/s] 45%|████▍ | 29186/65536 [5:00:14<6:09:39, 1.64it/s] 45%|████▍ | 29187/65536 [5:00:15<6:06:28, 1.65it/s] 45%|████▍ | 29188/65536 [5:00:16<6:21:39, 1.59it/s] 45%|████▍ | 29189/65536 [5:00:16<6:21:48, 1.59it/s] 45%|████▍ | 29190/65536 [5:00:17<6:16:18, 1.61it/s] 45%|████▍ | 29191/65536 [5:00:18<6:18:08, 1.60it/s] 45%|████▍ | 29192/65536 [5:00:18<6:10:07, 1.64it/s] 45%|████▍ | 29193/65536 [5:00:19<6:27:51, 1.56it/s] 45%|████▍ | 29194/65536 [5:00:19<6:32:44, 1.54it/s] 45%|████▍ | 29195/65536 [5:00:20<6:26:12, 1.57it/s] 45%|████▍ | 29196/65536 [5:00:21<6:27:01, 1.56it/s] 45%|████▍ | 29197/65536 [5:00:21<6:22:18, 1.58it/s] 45%|████▍ | 29198/65536 [5:00:22<6:08:10, 1.64it/s] 45%|████▍ | 29199/65536 [5:00:22<6:02:13, 1.67it/s] 45%|████▍ | 29200/65536 [5:00:23<6:00:19, 1.68it/s] {'loss': 1.6692, 'learning_rate': 6.005265091220766e-07, 'epoch': 1802.47} + 45%|████▍ | 29200/65536 [5:00:23<6:00:19, 1.68it/s] 45%|████▍ | 29201/65536 [5:00:24<6:12:49, 1.62it/s] 45%|████▍ | 29202/65536 [5:00:24<6:15:04, 1.61it/s] 45%|████▍ | 29203/65536 [5:00:25<6:17:08, 1.61it/s] 45%|████▍ | 29204/65536 [5:00:26<6:12:33, 1.63it/s] 45%|████▍ | 29205/65536 [5:00:26<6:19:40, 1.59it/s] 45%|████▍ | 29206/65536 [5:00:27<6:20:36, 1.59it/s] 45%|████▍ | 29207/65536 [5:00:27<6:16:28, 1.61it/s] 45%|████▍ | 29208/65536 [5:00:28<6:04:11, 1.66it/s] 45%|████▍ | 29209/65536 [5:00:29<6:16:17, 1.61it/s] 45%|████▍ | 29210/65536 [5:00:29<6:17:35, 1.60it/s] 45%|████▍ | 29211/65536 [5:00:30<6:14:13, 1.62it/s] 45%|████▍ | 29212/65536 [5:00:31<6:17:04, 1.61it/s] 45%|████▍ | 29213/65536 [5:00:31<6:17:48, 1.60it/s] 45%|████▍ | 29214/65536 [5:00:32<6:14:55, 1.61it/s] 45%|████▍ | 29215/65536 [5:00:32<6:16:45, 1.61it/s] 45%|████▍ | 29216/65536 [5:00:33<6:11:34, 1.63it/s] 45%|████▍ | 29217/65536 [5:00:34<6:03:39, 1.66it/s] 45%|████▍ | 29218/65536 [5:00:34<6:02:41, 1.67it/s] 45%|████▍ | 29219/65536 [5:00:35<5:58:04, 1.69it/s] 45%|████▍ | 29220/65536 [5:00:35<6:06:38, 1.65it/s] {'loss': 1.6823, 'learning_rate': 6.002510101628504e-07, 'epoch': 1803.7} + 45%|████▍ | 29220/65536 [5:00:35<6:06:38, 1.65it/s] 45%|████▍ | 29221/65536 [5:00:36<6:07:41, 1.65it/s] 45%|████▍ | 29222/65536 [5:00:37<5:56:28, 1.70it/s] 45%|████▍ | 29223/65536 [5:00:37<6:13:07, 1.62it/s] 45%|████▍ | 29224/65536 [5:00:38<6:14:23, 1.62it/s] 45%|████▍ | 29225/65536 [5:00:39<6:23:58, 1.58it/s] 45%|████▍ | 29226/65536 [5:00:39<6:29:50, 1.55it/s] 45%|████▍ | 29227/65536 [5:00:40<6:29:39, 1.55it/s] 45%|████▍ | 29228/65536 [5:00:40<6:22:15, 1.58it/s] 45%|████▍ | 29229/65536 [5:00:41<6:27:00, 1.56it/s] 45%|████▍ | 29230/65536 [5:00:42<6:12:54, 1.62it/s] 45%|████▍ | 29231/65536 [5:00:42<6:15:44, 1.61it/s] 45%|████▍ | 29232/65536 [5:00:43<6:05:54, 1.65it/s] 45%|████▍ | 29233/65536 [5:00:43<6:08:44, 1.64it/s] 45%|████▍ | 29234/65536 [5:00:44<6:09:14, 1.64it/s] 45%|████▍ | 29235/65536 [5:00:45<6:02:56, 1.67it/s] 45%|████▍ | 29236/65536 [5:00:45<6:04:11, 1.66it/s] 45%|████▍ | 29237/65536 [5:00:46<6:06:28, 1.65it/s] 45%|████▍ | 29238/65536 [5:00:46<6:04:50, 1.66it/s] 45%|████▍ | 29239/65536 [5:00:47<6:05:49, 1.65it/s] 45%|████▍ | 29240/65536 [5:00:48<6:00:11, 1.68it/s] {'loss': 1.6822, 'learning_rate': 5.999755112036243e-07, 'epoch': 1804.94} + 45%|████▍ | 29240/65536 [5:00:48<6:00:11, 1.68it/s] 45%|████▍ | 29241/65536 [5:00:48<6:08:16, 1.64it/s] 45%|████▍ | 29242/65536 [5:00:49<6:16:13, 1.61it/s] 45%|████▍ | 29243/65536 [5:00:50<6:14:23, 1.62it/s] 45%|████▍ | 29244/65536 [5:00:50<6:21:35, 1.59it/s] 45%|████▍ | 29245/65536 [5:00:51<6:23:01, 1.58it/s] 45%|████▍ | 29246/65536 [5:00:51<6:17:51, 1.60it/s] 45%|████▍ | 29247/65536 [5:00:52<6:27:04, 1.56it/s] 45%|████▍ | 29248/65536 [5:00:53<6:14:27, 1.62it/s] 45%|████▍ | 29249/65536 [5:00:53<6:12:08, 1.63it/s] 45%|████▍ | 29250/65536 [5:00:54<6:04:56, 1.66it/s] 45%|████▍ | 29251/65536 [5:00:55<6:09:49, 1.64it/s] 45%|████▍ | 29252/65536 [5:00:55<6:03:42, 1.66it/s] 45%|████▍ | 29253/65536 [5:00:56<6:02:53, 1.67it/s] 45%|████▍ | 29254/65536 [5:00:56<6:05:29, 1.65it/s] 45%|████▍ | 29255/65536 [5:00:57<6:02:12, 1.67it/s] 45%|████▍ | 29256/65536 [5:00:58<6:06:17, 1.65it/s] 45%|████▍ | 29257/65536 [5:00:58<6:18:48, 1.60it/s] 45%|████▍ | 29258/65536 [5:00:59<6:33:07, 1.54it/s] 45%|████▍ | 29259/65536 [5:01:00<6:27:20, 1.56it/s] 45%|████▍ | 29260/65536 [5:01:00<6:29:51, 1.55it/s] {'loss': 1.6831, 'learning_rate': 5.997000122443981e-07, 'epoch': 1806.17} + 45%|████▍ | 29260/65536 [5:01:00<6:29:51, 1.55it/s] 45%|████▍ | 29261/65536 [5:01:01<6:34:10, 1.53it/s] 45%|████▍ | 29262/65536 [5:01:01<6:22:48, 1.58it/s] 45%|████▍ | 29263/65536 [5:01:02<6:20:02, 1.59it/s] 45%|████▍ | 29264/65536 [5:01:03<6:23:04, 1.58it/s] 45%|████▍ | 29265/65536 [5:01:03<6:13:36, 1.62it/s] 45%|████▍ | 29266/65536 [5:01:04<6:11:57, 1.63it/s] 45%|████▍ | 29267/65536 [5:01:05<6:12:51, 1.62it/s] 45%|████▍ | 29268/65536 [5:01:05<6:05:41, 1.65it/s] 45%|████▍ | 29269/65536 [5:01:06<6:18:08, 1.60it/s] 45%|████▍ | 29270/65536 [5:01:06<6:18:12, 1.60it/s] 45%|████▍ | 29271/65536 [5:01:07<6:11:38, 1.63it/s] 45%|████▍ | 29272/65536 [5:01:08<5:59:22, 1.68it/s] 45%|████▍ | 29273/65536 [5:01:08<5:52:07, 1.72it/s] 45%|████▍ | 29274/65536 [5:01:09<6:05:16, 1.65it/s] 45%|████▍ | 29275/65536 [5:01:09<5:59:07, 1.68it/s] 45%|████▍ | 29276/65536 [5:01:10<5:58:42, 1.68it/s] 45%|████▍ | 29277/65536 [5:01:10<5:55:48, 1.70it/s] 45%|████▍ | 29278/65536 [5:01:11<5:57:50, 1.69it/s] 45%|████▍ | 29279/65536 [5:01:12<6:01:47, 1.67it/s] 45%|████▍ | 29280/65536 [5:01:12<6:08:08, 1.64it/s] {'loss': 1.7249, 'learning_rate': 5.99424513285172e-07, 'epoch': 1807.41} + 45%|████▍ | 29280/65536 [5:01:12<6:08:08, 1.64it/s] 45%|████▍ | 29281/65536 [5:01:13<6:11:20, 1.63it/s] 45%|████▍ | 29282/65536 [5:01:14<6:12:13, 1.62it/s] 45%|████▍ | 29283/65536 [5:01:14<6:15:05, 1.61it/s] 45%|████▍ | 29284/65536 [5:01:15<6:09:36, 1.63it/s] 45%|████▍ | 29285/65536 [5:01:15<6:09:58, 1.63it/s] 45%|████▍ | 29286/65536 [5:01:16<6:08:03, 1.64it/s] 45%|████▍ | 29287/65536 [5:01:17<6:09:11, 1.64it/s] 45%|████▍ | 29288/65536 [5:01:17<6:10:20, 1.63it/s] 45%|████▍ | 29289/65536 [5:01:18<6:24:35, 1.57it/s] 45%|████▍ | 29290/65536 [5:01:19<6:26:48, 1.56it/s] 45%|████▍ | 29291/65536 [5:01:19<6:20:45, 1.59it/s] 45%|████▍ | 29292/65536 [5:01:20<6:07:54, 1.64it/s] 45%|████▍ | 29293/65536 [5:01:20<6:05:49, 1.65it/s] 45%|████▍ | 29294/65536 [5:01:21<6:05:36, 1.65it/s] 45%|████▍ | 29295/65536 [5:01:22<6:14:45, 1.61it/s] 45%|████▍ | 29296/65536 [5:01:22<6:25:25, 1.57it/s] 45%|████▍ | 29297/65536 [5:01:23<6:21:42, 1.58it/s] 45%|████▍ | 29298/65536 [5:01:23<6:11:04, 1.63it/s] 45%|████▍ | 29299/65536 [5:01:24<6:05:59, 1.65it/s] 45%|████▍ | 29300/65536 [5:01:25<6:16:43, 1.60it/s] {'loss': 1.6903, 'learning_rate': 5.991490143259457e-07, 'epoch': 1808.64} + 45%|████▍ | 29300/65536 [5:01:25<6:16:43, 1.60it/s] 45%|████▍ | 29301/65536 [5:01:25<6:29:20, 1.55it/s] 45%|████▍ | 29302/65536 [5:01:26<6:28:05, 1.56it/s] 45%|████▍ | 29303/65536 [5:01:27<6:19:49, 1.59it/s] 45%|████▍ | 29304/65536 [5:01:27<6:08:55, 1.64it/s] 45%|████▍ | 29305/65536 [5:01:28<5:59:55, 1.68it/s] 45%|████▍ | 29306/65536 [5:01:28<6:07:29, 1.64it/s] 45%|████▍ | 29307/65536 [5:01:29<6:03:56, 1.66it/s] 45%|████▍ | 29308/65536 [5:01:30<6:23:32, 1.57it/s] 45%|████▍ | 29309/65536 [5:01:30<6:21:15, 1.58it/s] 45%|████▍ | 29310/65536 [5:01:31<6:20:20, 1.59it/s] 45%|████▍ | 29311/65536 [5:01:32<6:15:14, 1.61it/s] 45%|████▍ | 29312/65536 [5:01:32<6:10:46, 1.63it/s] 45%|████▍ | 29313/65536 [5:01:33<6:08:39, 1.64it/s] 45%|████▍ | 29314/65536 [5:01:33<6:08:11, 1.64it/s] 45%|████▍ | 29315/65536 [5:01:34<6:08:00, 1.64it/s] 45%|████▍ | 29316/65536 [5:01:35<6:04:10, 1.66it/s] 45%|████▍ | 29317/65536 [5:01:35<5:58:00, 1.69it/s] 45%|████▍ | 29318/65536 [5:01:36<6:10:21, 1.63it/s] 45%|████▍ | 29319/65536 [5:01:36<6:13:56, 1.61it/s] 45%|████▍ | 29320/65536 [5:01:37<6:17:03, 1.60it/s] {'loss': 1.6981, 'learning_rate': 5.988735153667197e-07, 'epoch': 1809.88} + 45%|████▍ | 29320/65536 [5:01:37<6:17:03, 1.60it/s] 45%|████▍ | 29321/65536 [5:01:38<6:13:18, 1.62it/s] 45%|████▍ | 29322/65536 [5:01:38<6:15:21, 1.61it/s] 45%|████▍ | 29323/65536 [5:01:39<6:35:42, 1.53it/s] 45%|████▍ | 29324/65536 [5:01:40<6:23:31, 1.57it/s] 45%|████▍ | 29325/65536 [5:01:40<6:23:51, 1.57it/s] 45%|████▍ | 29326/65536 [5:01:41<6:23:36, 1.57it/s] 45%|████▍ | 29327/65536 [5:01:42<6:19:03, 1.59it/s] 45%|████▍ | 29328/65536 [5:01:42<6:13:33, 1.62it/s] 45%|████▍ | 29329/65536 [5:01:43<6:02:01, 1.67it/s] 45%|████▍ | 29330/65536 [5:01:43<6:07:11, 1.64it/s] 45%|████▍ | 29331/65536 [5:01:44<6:04:47, 1.65it/s] 45%|████▍ | 29332/65536 [5:01:44<6:01:12, 1.67it/s] 45%|████▍ | 29333/65536 [5:01:45<6:01:43, 1.67it/s] 45%|████▍ | 29334/65536 [5:01:46<6:07:57, 1.64it/s] 45%|████▍ | 29335/65536 [5:01:46<6:04:30, 1.66it/s] 45%|████▍ | 29336/65536 [5:01:47<6:12:23, 1.62it/s] 45%|████▍ | 29337/65536 [5:01:48<6:11:00, 1.63it/s] 45%|████▍ | 29338/65536 [5:01:48<6:07:03, 1.64it/s] 45%|████▍ | 29339/65536 [5:01:49<6:22:22, 1.58it/s] 45%|████▍ | 29340/65536 [5:01:49<6:18:45, 1.59it/s] {'loss': 1.699, 'learning_rate': 5.985980164074936e-07, 'epoch': 1811.11} + 45%|████▍ | 29340/65536 [5:01:49<6:18:45, 1.59it/s] 45%|████▍ | 29341/65536 [5:01:50<6:24:22, 1.57it/s] 45%|████▍ | 29342/65536 [5:01:51<6:22:28, 1.58it/s] 45%|████▍ | 29343/65536 [5:01:51<6:13:42, 1.61it/s] 45%|████▍ | 29344/65536 [5:01:52<6:12:56, 1.62it/s] 45%|████▍ | 29345/65536 [5:01:53<6:09:33, 1.63it/s] 45%|████▍ | 29346/65536 [5:01:53<6:02:50, 1.66it/s] 45%|████▍ | 29347/65536 [5:01:54<6:03:07, 1.66it/s] 45%|████▍ | 29348/65536 [5:01:54<6:02:19, 1.66it/s] 45%|████▍ | 29349/65536 [5:01:55<5:57:30, 1.69it/s] 45%|████▍ | 29350/65536 [5:01:56<6:05:40, 1.65it/s] 45%|████▍ | 29351/65536 [5:01:56<6:08:35, 1.64it/s] 45%|████▍ | 29352/65536 [5:01:57<6:14:32, 1.61it/s] 45%|████▍ | 29353/65536 [5:01:57<6:10:11, 1.63it/s] 45%|████▍ | 29354/65536 [5:01:58<6:08:46, 1.64it/s] 45%|████▍ | 29355/65536 [5:01:59<6:16:17, 1.60it/s] 45%|████▍ | 29356/65536 [5:01:59<6:10:46, 1.63it/s] 45%|████▍ | 29357/65536 [5:02:00<6:07:07, 1.64it/s] 45%|████▍ | 29358/65536 [5:02:00<6:08:25, 1.64it/s] 45%|████▍ | 29359/65536 [5:02:01<6:07:11, 1.64it/s] 45%|████▍ | 29360/65536 [5:02:02<6:05:36, 1.65it/s] {'loss': 1.6921, 'learning_rate': 5.983225174482674e-07, 'epoch': 1812.35} + 45%|████▍ | 29360/65536 [5:02:02<6:05:36, 1.65it/s] 45%|████▍ | 29361/65536 [5:02:02<6:10:14, 1.63it/s] 45%|████▍ | 29362/65536 [5:02:03<6:07:22, 1.64it/s] 45%|████▍ | 29363/65536 [5:02:04<6:25:33, 1.56it/s] 45%|████▍ | 29364/65536 [5:02:04<6:20:48, 1.58it/s] 45%|████▍ | 29365/65536 [5:02:05<6:23:20, 1.57it/s] 45%|████▍ | 29366/65536 [5:02:06<6:27:22, 1.56it/s] 45%|████▍ | 29367/65536 [5:02:06<6:15:58, 1.60it/s] 45%|████▍ | 29368/65536 [5:02:07<6:18:14, 1.59it/s] 45%|████▍ | 29369/65536 [5:02:07<6:07:51, 1.64it/s] 45%|████▍ | 29370/65536 [5:02:08<5:58:51, 1.68it/s] 45%|████▍ | 29371/65536 [5:02:09<6:33:40, 1.53it/s] 45%|████▍ | 29372/65536 [5:02:09<6:27:41, 1.55it/s] 45%|████▍ | 29373/65536 [5:02:10<6:13:42, 1.61it/s] 45%|████▍ | 29374/65536 [5:02:10<6:12:01, 1.62it/s] 45%|████▍ | 29375/65536 [5:02:11<6:12:45, 1.62it/s] 45%|████▍ | 29376/65536 [5:02:12<6:18:30, 1.59it/s] 45%|████▍ | 29377/65536 [5:02:12<6:25:28, 1.56it/s] 45%|████▍ | 29378/65536 [5:02:13<6:17:48, 1.60it/s] 45%|████▍ | 29379/65536 [5:02:14<6:10:44, 1.63it/s] 45%|████▍ | 29380/65536 [5:02:14<5:58:58, 1.68it/s] {'loss': 1.69, 'learning_rate': 5.980470184890413e-07, 'epoch': 1813.58} + 45%|████▍ | 29380/65536 [5:02:14<5:58:58, 1.68it/s] 45%|████▍ | 29381/65536 [5:02:15<5:57:25, 1.69it/s] 45%|████▍ | 29382/65536 [5:02:15<5:56:34, 1.69it/s] 45%|████▍ | 29383/65536 [5:02:16<6:08:13, 1.64it/s] 45%|████▍ | 29384/65536 [5:02:17<6:10:55, 1.62it/s] 45%|████▍ | 29385/65536 [5:02:17<5:59:35, 1.68it/s] 45%|████▍ | 29386/65536 [5:02:18<6:04:25, 1.65it/s] 45%|████▍ | 29387/65536 [5:02:18<6:13:31, 1.61it/s] 45%|████▍ | 29388/65536 [5:02:19<6:06:44, 1.64it/s] 45%|████▍ | 29389/65536 [5:02:20<5:57:20, 1.69it/s] 45%|████▍ | 29390/65536 [5:02:20<6:07:06, 1.64it/s] 45%|████▍ | 29391/65536 [5:02:21<6:09:03, 1.63it/s] 45%|████▍ | 29392/65536 [5:02:21<6:06:06, 1.65it/s] 45%|████▍ | 29393/65536 [5:02:22<6:08:09, 1.64it/s] 45%|████▍ | 29394/65536 [5:02:23<6:04:12, 1.65it/s] 45%|████▍ | 29395/65536 [5:02:23<6:07:05, 1.64it/s] 45%|████▍ | 29396/65536 [5:02:24<5:59:43, 1.67it/s] 45%|████▍ | 29397/65536 [5:02:24<6:01:51, 1.66it/s] 45%|████▍ | 29398/65536 [5:02:25<6:00:05, 1.67it/s] 45%|████▍ | 29399/65536 [5:02:26<6:06:27, 1.64it/s] 45%|████▍ | 29400/65536 [5:02:26<6:13:23, 1.61it/s] {'loss': 1.6803, 'learning_rate': 5.977715195298151e-07, 'epoch': 1814.81} + 45%|████▍ | 29400/65536 [5:02:26<6:13:23, 1.61it/s] 45%|████▍ | 29401/65536 [5:02:27<6:30:54, 1.54it/s] 45%|████▍ | 29402/65536 [5:02:28<6:23:04, 1.57it/s] 45%|████▍ | 29403/65536 [5:02:28<6:20:38, 1.58it/s] 45%|████▍ | 29404/65536 [5:02:29<6:25:27, 1.56it/s] 45%|████▍ | 29405/65536 [5:02:30<6:21:52, 1.58it/s] 45%|████▍ | 29406/65536 [5:02:30<6:23:27, 1.57it/s] 45%|████▍ | 29407/65536 [5:02:31<6:20:12, 1.58it/s] 45%|████▍ | 29408/65536 [5:02:31<6:12:24, 1.62it/s] 45%|████▍ | 29409/65536 [5:02:32<6:26:17, 1.56it/s] 45%|████▍ | 29410/65536 [5:02:33<6:16:59, 1.60it/s] 45%|████▍ | 29411/65536 [5:02:33<6:17:28, 1.60it/s] 45%|████▍ | 29412/65536 [5:02:34<6:19:46, 1.59it/s] 45%|████▍ | 29413/65536 [5:02:35<6:17:04, 1.60it/s] 45%|████▍ | 29414/65536 [5:02:35<6:13:49, 1.61it/s] 45%|████▍ | 29415/65536 [5:02:36<6:12:10, 1.62it/s] 45%|████▍ | 29416/65536 [5:02:36<6:08:58, 1.63it/s] 45%|████▍ | 29417/65536 [5:02:37<6:19:43, 1.59it/s] 45%|████▍ | 29418/65536 [5:02:38<6:15:26, 1.60it/s] 45%|████▍ | 29419/65536 [5:02:38<6:06:49, 1.64it/s] 45%|████▍ | 29420/65536 [5:02:39<6:20:09, 1.58it/s] {'loss': 1.7028, 'learning_rate': 5.974960205705889e-07, 'epoch': 1816.05} + 45%|████▍ | 29420/65536 [5:02:39<6:20:09, 1.58it/s] 45%|████▍ | 29421/65536 [5:02:39<6:08:32, 1.63it/s] 45%|████▍ | 29422/65536 [5:02:40<6:07:39, 1.64it/s] 45%|████▍ | 29423/65536 [5:02:41<6:19:47, 1.58it/s] 45%|████▍ | 29424/65536 [5:02:41<6:18:43, 1.59it/s] 45%|████▍ | 29425/65536 [5:02:42<6:15:31, 1.60it/s] 45%|████▍ | 29426/65536 [5:02:43<6:12:19, 1.62it/s] 45%|████▍ | 29427/65536 [5:02:43<6:08:41, 1.63it/s] 45%|████▍ | 29428/65536 [5:02:44<6:04:15, 1.65it/s] 45%|████▍ | 29429/65536 [5:02:44<6:18:14, 1.59it/s] 45%|████▍ | 29430/65536 [5:02:45<6:11:17, 1.62it/s] 45%|████▍ | 29431/65536 [5:02:46<6:14:35, 1.61it/s] 45%|████▍ | 29432/65536 [5:02:46<6:13:07, 1.61it/s] 45%|████▍ | 29433/65536 [5:02:47<6:07:56, 1.64it/s] 45%|████▍ | 29434/65536 [5:02:48<6:11:23, 1.62it/s] 45%|████▍ | 29435/65536 [5:02:48<6:04:25, 1.65it/s] 45%|████▍ | 29436/65536 [5:02:49<6:17:32, 1.59it/s] 45%|████▍ | 29437/65536 [5:02:49<6:07:47, 1.64it/s] 45%|████▍ | 29438/65536 [5:02:50<6:07:11, 1.64it/s] 45%|████▍ | 29439/65536 [5:02:51<6:06:26, 1.64it/s] 45%|████▍ | 29440/65536 [5:02:51<6:08:15, 1.63it/s] {'loss': 1.6634, 'learning_rate': 5.972205216113627e-07, 'epoch': 1817.28} + 45%|████▍ | 29440/65536 [5:02:51<6:08:15, 1.63it/s] 45%|████▍ | 29441/65536 [5:02:52<6:09:11, 1.63it/s] 45%|████▍ | 29442/65536 [5:02:52<6:14:09, 1.61it/s] 45%|████▍ | 29443/65536 [5:02:53<6:12:45, 1.61it/s] 45%|████▍ | 29444/65536 [5:02:54<6:14:56, 1.60it/s] 45%|████▍ | 29445/65536 [5:02:54<6:18:59, 1.59it/s] 45%|████▍ | 29446/65536 [5:02:55<6:18:41, 1.59it/s] 45%|████▍ | 29447/65536 [5:02:56<6:15:36, 1.60it/s] 45%|████▍ | 29448/65536 [5:02:56<6:12:16, 1.62it/s] 45%|████▍ | 29449/65536 [5:02:57<6:05:52, 1.64it/s] 45%|████▍ | 29450/65536 [5:02:57<6:12:08, 1.62it/s] 45%|████▍ | 29451/65536 [5:02:58<6:05:17, 1.65it/s] 45%|████▍ | 29452/65536 [5:02:59<6:23:24, 1.57it/s] 45%|████▍ | 29453/65536 [5:02:59<6:22:28, 1.57it/s] 45%|████▍ | 29454/65536 [5:03:00<6:16:21, 1.60it/s] 45%|████▍ | 29455/65536 [5:03:01<6:12:34, 1.61it/s] 45%|████▍ | 29456/65536 [5:03:01<6:11:32, 1.62it/s] 45%|████▍ | 29457/65536 [5:03:02<6:12:53, 1.61it/s] 45%|████▍ | 29458/65536 [5:03:02<6:03:25, 1.65it/s] 45%|████▍ | 29459/65536 [5:03:03<5:57:17, 1.68it/s] 45%|████▍ | 29460/65536 [5:03:03<5:50:43, 1.71it/s] {'loss': 1.6827, 'learning_rate': 5.969450226521366e-07, 'epoch': 1818.52} + 45%|████▍ | 29460/65536 [5:03:03<5:50:43, 1.71it/s] 45%|████▍ | 29461/65536 [5:03:04<6:08:26, 1.63it/s] 45%|████▍ | 29462/65536 [5:03:05<6:12:20, 1.61it/s] 45%|████▍ | 29463/65536 [5:03:05<6:10:21, 1.62it/s] 45%|████▍ | 29464/65536 [5:03:06<6:08:26, 1.63it/s] 45%|████▍ | 29465/65536 [5:03:07<6:26:15, 1.56it/s] 45%|████▍ | 29466/65536 [5:03:07<6:28:24, 1.55it/s] 45%|████▍ | 29467/65536 [5:03:08<6:20:13, 1.58it/s] 45%|████▍ | 29468/65536 [5:03:09<6:25:38, 1.56it/s] 45%|████▍ | 29469/65536 [5:03:09<6:26:54, 1.55it/s] 45%|████▍ | 29470/65536 [5:03:10<6:24:05, 1.56it/s] 45%|████▍ | 29471/65536 [5:03:11<6:25:27, 1.56it/s] 45%|████▍ | 29472/65536 [5:03:11<6:22:40, 1.57it/s] 45%|████▍ | 29473/65536 [5:03:12<6:22:08, 1.57it/s] 45%|████▍ | 29474/65536 [5:03:12<6:13:56, 1.61it/s] 45%|████▍ | 29475/65536 [5:03:13<6:22:33, 1.57it/s] 45%|████▍ | 29476/65536 [5:03:14<6:09:44, 1.63it/s] 45%|████▍ | 29477/65536 [5:03:14<6:09:08, 1.63it/s] 45%|████▍ | 29478/65536 [5:03:15<6:03:45, 1.65it/s] 45%|████▍ | 29479/65536 [5:03:15<6:04:30, 1.65it/s] 45%|████▍ | 29480/65536 [5:03:16<6:07:09, 1.64it/s] {'loss': 1.6808, 'learning_rate': 5.966695236929104e-07, 'epoch': 1819.75} + 45%|████▍ | 29480/65536 [5:03:16<6:07:09, 1.64it/s] 45%|████▍ | 29481/65536 [5:03:17<6:04:30, 1.65it/s] 45%|████▍ | 29482/65536 [5:03:17<5:59:26, 1.67it/s] 45%|████▍ | 29483/65536 [5:03:18<6:07:48, 1.63it/s] 45%|████▍ | 29484/65536 [5:03:19<6:13:49, 1.61it/s] 45%|████▍ | 29485/65536 [5:03:19<6:23:20, 1.57it/s] 45%|████▍ | 29486/65536 [5:03:20<6:29:22, 1.54it/s] 45%|████▍ | 29487/65536 [5:03:21<6:25:26, 1.56it/s] 45%|████▍ | 29488/65536 [5:03:21<6:24:20, 1.56it/s] 45%|████▍ | 29489/65536 [5:03:22<6:16:23, 1.60it/s] 45%|████▍ | 29490/65536 [5:03:22<6:10:01, 1.62it/s] 45%|████▍ | 29491/65536 [5:03:23<6:10:59, 1.62it/s] 45%|████▌ | 29492/65536 [5:03:24<6:12:34, 1.61it/s] 45%|████▌ | 29493/65536 [5:03:24<6:07:38, 1.63it/s] 45%|████▌ | 29494/65536 [5:03:25<6:05:40, 1.64it/s] 45%|████▌ | 29495/65536 [5:03:25<5:58:59, 1.67it/s] 45%|████▌ | 29496/65536 [5:03:26<6:04:03, 1.65it/s] 45%|████▌ | 29497/65536 [5:03:27<6:06:12, 1.64it/s] 45%|████▌ | 29498/65536 [5:03:27<6:05:58, 1.64it/s] 45%|████▌ | 29499/65536 [5:03:28<6:17:32, 1.59it/s] 45%|████▌ | 29500/65536 [5:03:28<6:09:32, 1.63it/s] {'loss': 1.6874, 'learning_rate': 5.963940247336842e-07, 'epoch': 1820.99} + 45%|████▌ | 29500/65536 [5:03:28<6:09:32, 1.63it/s] 45%|████▌ | 29501/65536 [5:03:29<6:23:06, 1.57it/s] 45%|████▌ | 29502/65536 [5:03:30<6:26:25, 1.55it/s] 45%|████▌ | 29503/65536 [5:03:30<6:29:13, 1.54it/s] 45%|████▌ | 29504/65536 [5:03:31<6:17:47, 1.59it/s] 45%|████▌ | 29505/65536 [5:03:32<6:08:15, 1.63it/s] 45%|████▌ | 29506/65536 [5:03:32<6:12:42, 1.61it/s] 45%|████▌ | 29507/65536 [5:03:33<6:15:22, 1.60it/s] 45%|████▌ | 29508/65536 [5:03:33<6:07:15, 1.64it/s] 45%|████▌ | 29509/65536 [5:03:34<6:02:28, 1.66it/s] 45%|████▌ | 29510/65536 [5:03:35<6:10:48, 1.62it/s] 45%|████▌ | 29511/65536 [5:03:35<6:10:23, 1.62it/s] 45%|████▌ | 29512/65536 [5:03:36<6:08:44, 1.63it/s] 45%|████▌ | 29513/65536 [5:03:37<6:05:22, 1.64it/s] 45%|████▌ | 29514/65536 [5:03:37<6:04:33, 1.65it/s] 45%|████▌ | 29515/65536 [5:03:38<6:16:23, 1.60it/s] 45%|████▌ | 29516/65536 [5:03:38<6:16:04, 1.60it/s] 45%|████▌ | 29517/65536 [5:03:39<6:20:47, 1.58it/s] 45%|████▌ | 29518/65536 [5:03:40<6:12:08, 1.61it/s] 45%|████▌ | 29519/65536 [5:03:40<6:12:16, 1.61it/s] 45%|████▌ | 29520/65536 [5:03:41<6:05:34, 1.64it/s] {'loss': 1.7005, 'learning_rate': 5.961185257744581e-07, 'epoch': 1822.22} + 45%|████▌ | 29520/65536 [5:03:41<6:05:34, 1.64it/s] 45%|████▌ | 29521/65536 [5:03:41<6:05:29, 1.64it/s] 45%|████▌ | 29522/65536 [5:03:42<6:04:27, 1.65it/s] 45%|████▌ | 29523/65536 [5:03:43<6:16:37, 1.59it/s] 45%|████▌ | 29524/65536 [5:03:43<6:24:58, 1.56it/s] 45%|████▌ | 29525/65536 [5:03:44<6:28:29, 1.54it/s] 45%|████▌ | 29526/65536 [5:03:45<6:29:44, 1.54it/s] 45%|████▌ | 29527/65536 [5:03:45<6:17:05, 1.59it/s] 45%|████▌ | 29528/65536 [5:03:46<6:22:35, 1.57it/s] 45%|████▌ | 29529/65536 [5:03:47<6:16:19, 1.59it/s] 45%|████▌ | 29530/65536 [5:03:47<6:08:08, 1.63it/s] 45%|████▌ | 29531/65536 [5:03:48<6:08:48, 1.63it/s] 45%|████▌ | 29532/65536 [5:03:48<6:02:49, 1.65it/s] 45%|████▌ | 29533/65536 [5:03:49<6:12:03, 1.61it/s] 45%|████▌ | 29534/65536 [5:03:50<6:06:13, 1.64it/s] 45%|████▌ | 29535/65536 [5:03:50<6:02:46, 1.65it/s] 45%|████▌ | 29536/65536 [5:03:51<6:11:50, 1.61it/s] 45%|████▌ | 29537/65536 [5:03:51<6:08:17, 1.63it/s] 45%|████▌ | 29538/65536 [5:03:52<5:58:11, 1.67it/s] 45%|████▌ | 29539/65536 [5:03:53<6:03:09, 1.65it/s] 45%|████▌ | 29540/65536 [5:03:53<5:54:26, 1.69it/s] {'loss': 1.7055, 'learning_rate': 5.958430268152319e-07, 'epoch': 1823.46} + 45%|████▌ | 29540/65536 [5:03:53<5:54:26, 1.69it/s] 45%|████▌ | 29541/65536 [5:03:54<5:58:16, 1.67it/s] 45%|████▌ | 29542/65536 [5:03:54<5:59:21, 1.67it/s] 45%|████▌ | 29543/65536 [5:03:55<6:12:57, 1.61it/s] 45%|████▌ | 29544/65536 [5:03:56<6:07:50, 1.63it/s] 45%|████▌ | 29545/65536 [5:03:56<6:05:12, 1.64it/s] 45%|████▌ | 29546/65536 [5:03:57<6:04:21, 1.65it/s] 45%|████▌ | 29547/65536 [5:03:58<6:05:14, 1.64it/s] 45%|████▌ | 29548/65536 [5:03:58<6:24:35, 1.56it/s] 45%|████▌ | 29549/65536 [5:03:59<6:38:31, 1.51it/s] 45%|████▌ | 29550/65536 [5:04:00<6:25:25, 1.56it/s] 45%|████▌ | 29551/65536 [5:04:00<6:15:01, 1.60it/s] 45%|████▌ | 29552/65536 [5:04:01<6:03:18, 1.65it/s] 45%|████▌ | 29553/65536 [5:04:01<6:14:18, 1.60it/s] 45%|████▌ | 29554/65536 [5:04:02<6:07:36, 1.63it/s] 45%|████▌ | 29555/65536 [5:04:03<6:03:24, 1.65it/s] 45%|████▌ | 29556/65536 [5:04:03<6:09:55, 1.62it/s] 45%|████▌ | 29557/65536 [5:04:04<6:16:52, 1.59it/s] 45%|████▌ | 29558/65536 [5:04:04<6:09:12, 1.62it/s] 45%|████▌ | 29559/65536 [5:04:05<6:07:36, 1.63it/s] 45%|████▌ | 29560/65536 [5:04:06<6:09:45, 1.62it/s] {'loss': 1.6762, 'learning_rate': 5.955675278560058e-07, 'epoch': 1824.69} + 45%|████▌ | 29560/65536 [5:04:06<6:09:45, 1.62it/s] 45%|████▌ | 29561/65536 [5:04:06<6:11:51, 1.61it/s] 45%|████▌ | 29562/65536 [5:04:07<6:10:24, 1.62it/s] 45%|████▌ | 29563/65536 [5:04:07<6:05:16, 1.64it/s] 45%|████▌ | 29564/65536 [5:04:08<5:59:48, 1.67it/s] 45%|████▌ | 29565/65536 [5:04:09<6:07:52, 1.63it/s] 45%|████▌ | 29566/65536 [5:04:09<6:23:45, 1.56it/s] 45%|████▌ | 29567/65536 [5:04:10<6:19:43, 1.58it/s] 45%|████▌ | 29568/65536 [5:04:11<6:13:13, 1.61it/s] 45%|████▌ | 29569/65536 [5:04:11<6:07:54, 1.63it/s] 45%|████▌ | 29570/65536 [5:04:12<6:06:04, 1.64it/s] 45%|████▌ | 29571/65536 [5:04:12<6:02:40, 1.65it/s] 45%|████▌ | 29572/65536 [5:04:13<6:02:39, 1.65it/s] 45%|████▌ | 29573/65536 [5:04:14<6:03:56, 1.65it/s] 45%|████▌ | 29574/65536 [5:04:14<6:22:56, 1.57it/s] 45%|████▌ | 29575/65536 [5:04:15<6:20:31, 1.58it/s] 45%|████▌ | 29576/65536 [5:04:16<6:17:00, 1.59it/s] 45%|████▌ | 29577/65536 [5:04:16<6:18:09, 1.58it/s] 45%|████▌ | 29578/65536 [5:04:17<6:22:10, 1.57it/s] 45%|████▌ | 29579/65536 [5:04:17<6:21:38, 1.57it/s] 45%|████▌ | 29580/65536 [5:04:18<6:14:51, 1.60it/s] {'loss': 1.6896, 'learning_rate': 5.952920288967796e-07, 'epoch': 1825.93} + 45%|████▌ | 29580/65536 [5:04:18<6:14:51, 1.60it/s] 45%|████▌ | 29581/65536 [5:04:19<6:22:16, 1.57it/s] 45%|████▌ | 29582/65536 [5:04:19<6:28:35, 1.54it/s] 45%|████▌ | 29583/65536 [5:04:20<6:30:47, 1.53it/s] 45%|████▌ | 29584/65536 [5:04:21<6:16:36, 1.59it/s] 45%|████▌ | 29585/65536 [5:04:21<6:15:49, 1.59it/s] 45%|████▌ | 29586/65536 [5:04:22<6:09:19, 1.62it/s] 45%|████▌ | 29587/65536 [5:04:23<6:19:05, 1.58it/s] 45%|████▌ | 29588/65536 [5:04:23<6:18:02, 1.58it/s] 45%|████▌ | 29589/65536 [5:04:24<6:13:39, 1.60it/s] 45%|████▌ | 29590/65536 [5:04:24<6:12:34, 1.61it/s] 45%|████▌ | 29591/65536 [5:04:25<6:09:18, 1.62it/s] 45%|████▌ | 29592/65536 [5:04:26<6:11:57, 1.61it/s] 45%|████▌ | 29593/65536 [5:04:26<6:10:08, 1.62it/s] 45%|████▌ | 29594/65536 [5:04:27<6:13:38, 1.60it/s] 45%|████▌ | 29595/65536 [5:04:27<6:06:56, 1.63it/s] 45%|████▌ | 29596/65536 [5:04:28<6:06:47, 1.63it/s] 45%|████▌ | 29597/65536 [5:04:29<6:09:54, 1.62it/s] 45%|████▌ | 29598/65536 [5:04:29<6:22:51, 1.56it/s] 45%|████▌ | 29599/65536 [5:04:30<6:20:08, 1.58it/s] 45%|████▌ | 29600/65536 [5:04:31<6:33:48, 1.52it/s] {'loss': 1.6752, 'learning_rate': 5.950165299375535e-07, 'epoch': 1827.16} + 45%|████▌ | 29600/65536 [5:04:31<6:33:48, 1.52it/s] 45%|████▌ | 29601/65536 [5:04:31<6:22:08, 1.57it/s] 45%|████▌ | 29602/65536 [5:04:32<6:19:16, 1.58it/s] 45%|████▌ | 29603/65536 [5:04:33<6:18:59, 1.58it/s] 45%|████▌ | 29604/65536 [5:04:33<6:04:04, 1.64it/s] 45%|████▌ | 29605/65536 [5:04:34<6:08:01, 1.63it/s] 45%|████▌ | 29606/65536 [5:04:34<6:07:58, 1.63it/s] 45%|████▌ | 29607/65536 [5:04:35<6:03:52, 1.65it/s] 45%|████▌ | 29608/65536 [5:04:36<6:14:10, 1.60it/s] 45%|████▌ | 29609/65536 [5:04:36<6:22:47, 1.56it/s] 45%|████▌ | 29610/65536 [5:04:37<6:19:42, 1.58it/s] 45%|████▌ | 29611/65536 [5:04:38<6:20:07, 1.58it/s] 45%|████▌ | 29612/65536 [5:04:38<6:09:33, 1.62it/s] 45%|████▌ | 29613/65536 [5:04:39<6:03:43, 1.65it/s] 45%|████▌ | 29614/65536 [5:04:39<6:20:13, 1.57it/s] 45%|████▌ | 29615/65536 [5:04:40<6:16:52, 1.59it/s] 45%|████▌ | 29616/65536 [5:04:41<6:18:42, 1.58it/s] 45%|████▌ | 29617/65536 [5:04:41<6:06:03, 1.64it/s] 45%|████▌ | 29618/65536 [5:04:42<5:58:31, 1.67it/s] 45%|████▌ | 29619/65536 [5:04:42<6:02:19, 1.65it/s] 45%|████▌ | 29620/65536 [5:04:43<6:05:38, 1.64it/s] {'loss': 1.712, 'learning_rate': 5.947410309783275e-07, 'epoch': 1828.4} + 45%|████▌ | 29620/65536 [5:04:43<6:05:38, 1.64it/s] 45%|████▌ | 29621/65536 [5:04:44<6:05:23, 1.64it/s] 45%|████▌ | 29622/65536 [5:04:44<6:16:37, 1.59it/s] 45%|████▌ | 29623/65536 [5:04:45<6:17:50, 1.58it/s] 45%|████▌ | 29624/65536 [5:04:46<6:18:43, 1.58it/s] 45%|████▌ | 29625/65536 [5:04:46<6:18:52, 1.58it/s] 45%|████▌ | 29626/65536 [5:04:47<6:23:22, 1.56it/s] 45%|████▌ | 29627/65536 [5:04:48<6:18:16, 1.58it/s] 45%|████▌ | 29628/65536 [5:04:48<6:14:01, 1.60it/s] 45%|████▌ | 29629/65536 [5:04:49<6:09:17, 1.62it/s] 45%|████▌ | 29630/65536 [5:04:49<6:27:24, 1.54it/s] 45%|████▌ | 29631/65536 [5:04:50<6:27:15, 1.55it/s] 45%|████▌ | 29632/65536 [5:04:51<6:19:19, 1.58it/s] 45%|████▌ | 29633/65536 [5:04:51<6:19:19, 1.58it/s] 45%|████▌ | 29634/65536 [5:04:52<6:14:33, 1.60it/s] 45%|████▌ | 29635/65536 [5:04:53<6:09:47, 1.62it/s] 45%|████▌ | 29636/65536 [5:04:53<6:13:39, 1.60it/s] 45%|████▌ | 29637/65536 [5:04:54<6:14:42, 1.60it/s] 45%|████▌ | 29638/65536 [5:04:54<6:15:25, 1.59it/s] 45%|████▌ | 29639/65536 [5:04:55<6:11:54, 1.61it/s] 45%|████▌ | 29640/65536 [5:04:56<6:24:02, 1.56it/s] {'loss': 1.6502, 'learning_rate': 5.944655320191013e-07, 'epoch': 1829.63} + 45%|████▌ | 29640/65536 [5:04:56<6:24:02, 1.56it/s] 45%|████▌ | 29641/65536 [5:04:56<6:26:15, 1.55it/s] 45%|████▌ | 29642/65536 [5:04:57<6:14:15, 1.60it/s] 45%|████▌ | 29643/65536 [5:04:58<6:21:22, 1.57it/s] 45%|████▌ | 29644/65536 [5:04:58<6:20:04, 1.57it/s] 45%|████▌ | 29645/65536 [5:04:59<6:15:04, 1.59it/s] 45%|████▌ | 29646/65536 [5:04:59<6:07:51, 1.63it/s] 45%|████▌ | 29647/65536 [5:05:00<6:30:01, 1.53it/s] 45%|████▌ | 29648/65536 [5:05:01<6:27:16, 1.54it/s] 45%|████▌ | 29649/65536 [5:05:01<6:26:12, 1.55it/s] 45%|████▌ | 29650/65536 [5:05:02<6:16:37, 1.59it/s] 45%|████▌ | 29651/65536 [5:05:03<6:07:21, 1.63it/s] 45%|████▌ | 29652/65536 [5:05:03<6:10:14, 1.62it/s] 45%|████▌ | 29653/65536 [5:05:04<6:05:06, 1.64it/s] 45%|████▌ | 29654/65536 [5:05:05<6:15:48, 1.59it/s] 45%|████▌ | 29655/65536 [5:05:05<6:10:34, 1.61it/s] 45%|████▌ | 29656/65536 [5:05:06<6:13:02, 1.60it/s] 45%|████▌ | 29657/65536 [5:05:06<6:13:17, 1.60it/s] 45%|████▌ | 29658/65536 [5:05:07<6:20:07, 1.57it/s] 45%|████▌ | 29659/65536 [5:05:08<6:10:40, 1.61it/s] 45%|████▌ | 29660/65536 [5:05:08<6:07:59, 1.62it/s] {'loss': 1.7099, 'learning_rate': 5.941900330598752e-07, 'epoch': 1830.86} + 45%|████▌ | 29660/65536 [5:05:08<6:07:59, 1.62it/s] 45%|████▌ | 29661/65536 [5:05:09<6:01:24, 1.65it/s] 45%|████▌ | 29662/65536 [5:05:09<6:03:06, 1.65it/s] 45%|████▌ | 29663/65536 [5:05:10<6:17:59, 1.58it/s] 45%|████▌ | 29664/65536 [5:05:11<6:18:46, 1.58it/s] 45%|████▌ | 29665/65536 [5:05:11<6:11:38, 1.61it/s] 45%|████▌ | 29666/65536 [5:05:12<6:16:23, 1.59it/s] 45%|████▌ | 29667/65536 [5:05:13<6:13:46, 1.60it/s] 45%|████▌ | 29668/65536 [5:05:13<6:14:04, 1.60it/s] 45%|████▌ | 29669/65536 [5:05:14<6:15:03, 1.59it/s] 45%|████▌ | 29670/65536 [5:05:15<6:13:45, 1.60it/s] 45%|████▌ | 29671/65536 [5:05:15<6:19:17, 1.58it/s] 45%|████▌ | 29672/65536 [5:05:16<6:18:20, 1.58it/s] 45%|████▌ | 29673/65536 [5:05:16<6:15:07, 1.59it/s] 45%|████▌ | 29674/65536 [5:05:17<6:10:26, 1.61it/s] 45%|████▌ | 29675/65536 [5:05:18<6:22:29, 1.56it/s] 45%|████▌ | 29676/65536 [5:05:18<6:13:57, 1.60it/s] 45%|████▌ | 29677/65536 [5:05:19<6:23:51, 1.56it/s] 45%|████▌ | 29678/65536 [5:05:20<6:17:57, 1.58it/s] 45%|████▌ | 29679/65536 [5:05:20<6:25:37, 1.55it/s] 45%|████▌ | 29680/65536 [5:05:21<6:18:39, 1.58it/s] {'loss': 1.6656, 'learning_rate': 5.93914534100649e-07, 'epoch': 1832.1} + 45%|████▌ | 29680/65536 [5:05:21<6:18:39, 1.58it/s] 45%|████▌ | 29681/65536 [5:05:21<6:12:39, 1.60it/s] 45%|████▌ | 29682/65536 [5:05:22<6:06:25, 1.63it/s] 45%|████▌ | 29683/65536 [5:05:23<6:15:36, 1.59it/s] 45%|████▌ | 29684/65536 [5:05:23<6:18:28, 1.58it/s] 45%|████▌ | 29685/65536 [5:05:24<6:25:16, 1.55it/s] 45%|████▌ | 29686/65536 [5:05:25<6:19:15, 1.58it/s] 45%|████▌ | 29687/65536 [5:05:25<6:12:20, 1.60it/s] 45%|████▌ | 29688/65536 [5:05:26<6:06:56, 1.63it/s] 45%|████▌ | 29689/65536 [5:05:26<6:06:58, 1.63it/s] 45%|████▌ | 29690/65536 [5:05:27<6:14:09, 1.60it/s] 45%|████▌ | 29691/65536 [5:05:28<6:32:42, 1.52it/s] 45%|████▌ | 29692/65536 [5:05:28<6:21:54, 1.56it/s] 45%|████▌ | 29693/65536 [5:05:29<6:19:40, 1.57it/s] 45%|████▌ | 29694/65536 [5:05:30<6:15:09, 1.59it/s] 45%|████▌ | 29695/65536 [5:05:30<6:29:34, 1.53it/s] 45%|████▌ | 29696/65536 [5:05:31<6:32:51, 1.52it/s] 45%|████▌ | 29697/65536 [5:05:32<6:19:57, 1.57it/s] 45%|████▌ | 29698/65536 [5:05:32<6:20:56, 1.57it/s] 45%|████▌ | 29699/65536 [5:05:33<6:16:10, 1.59it/s] 45%|████▌ | 29700/65536 [5:05:34<6:20:15, 1.57it/s] {'loss': 1.6611, 'learning_rate': 5.936390351414228e-07, 'epoch': 1833.33} + 45%|████▌ | 29700/65536 [5:05:34<6:20:15, 1.57it/s] 45%|████▌ | 29701/65536 [5:05:34<6:25:06, 1.55it/s] 45%|████▌ | 29702/65536 [5:05:35<6:17:39, 1.58it/s] 45%|████▌ | 29703/65536 [5:05:35<6:15:41, 1.59it/s] 45%|████▌ | 29704/65536 [5:05:36<6:12:36, 1.60it/s] 45%|████▌ | 29705/65536 [5:05:37<6:17:05, 1.58it/s] 45%|████▌ | 29706/65536 [5:05:37<6:10:31, 1.61it/s] 45%|████▌ | 29707/65536 [5:05:38<6:06:23, 1.63it/s] 45%|████▌ | 29708/65536 [5:05:38<6:02:27, 1.65it/s] 45%|████▌ | 29709/65536 [5:05:39<6:01:09, 1.65it/s] 45%|████▌ | 29710/65536 [5:05:40<6:00:23, 1.66it/s] 45%|████▌ | 29711/65536 [5:05:40<6:05:45, 1.63it/s] 45%|████▌ | 29712/65536 [5:05:41<6:08:57, 1.62it/s] 45%|████▌ | 29713/65536 [5:05:42<6:12:38, 1.60it/s] 45%|████▌ | 29714/65536 [5:05:42<6:06:47, 1.63it/s] 45%|████▌ | 29715/65536 [5:05:43<6:05:18, 1.63it/s] 45%|████▌ | 29716/65536 [5:05:43<6:04:22, 1.64it/s] 45%|████▌ | 29717/65536 [5:05:44<6:06:14, 1.63it/s] 45%|████▌ | 29718/65536 [5:05:45<6:04:37, 1.64it/s] 45%|████▌ | 29719/65536 [5:05:45<6:08:39, 1.62it/s] 45%|████▌ | 29720/65536 [5:05:46<6:07:08, 1.63it/s] {'loss': 1.7461, 'learning_rate': 5.933635361821967e-07, 'epoch': 1834.57} + 45%|████▌ | 29720/65536 [5:05:46<6:07:08, 1.63it/s] 45%|████▌ | 29721/65536 [5:05:46<6:07:47, 1.62it/s] 45%|████▌ | 29722/65536 [5:05:47<6:06:03, 1.63it/s] 45%|████▌ | 29723/65536 [5:05:48<6:20:44, 1.57it/s] 45%|████▌ | 29724/65536 [5:05:48<6:25:51, 1.55it/s] 45%|████▌ | 29725/65536 [5:05:49<6:17:03, 1.58it/s] 45%|████▌ | 29726/65536 [5:05:50<6:16:30, 1.59it/s] 45%|████▌ | 29727/65536 [5:05:50<6:13:54, 1.60it/s] 45%|████▌ | 29728/65536 [5:05:51<6:26:21, 1.54it/s] 45%|████▌ | 29729/65536 [5:05:52<6:15:35, 1.59it/s] 45%|████▌ | 29730/65536 [5:05:52<6:19:49, 1.57it/s] 45%|████▌ | 29731/65536 [5:05:53<6:17:53, 1.58it/s] 45%|████▌ | 29732/65536 [5:05:53<6:08:40, 1.62it/s] 45%|████▌ | 29733/65536 [5:05:54<6:06:57, 1.63it/s] 45%|████▌ | 29734/65536 [5:05:55<6:07:14, 1.62it/s] 45%|████▌ | 29735/65536 [5:05:55<6:13:43, 1.60it/s] 45%|████▌ | 29736/65536 [5:05:56<6:11:43, 1.61it/s] 45%|████▌ | 29737/65536 [5:05:57<6:15:10, 1.59it/s] 45%|████▌ | 29738/65536 [5:05:57<6:21:24, 1.56it/s] 45%|████▌ | 29739/65536 [5:05:58<6:23:14, 1.56it/s] 45%|████▌ | 29740/65536 [5:05:58<6:14:59, 1.59it/s] {'loss': 1.7029, 'learning_rate': 5.930880372229705e-07, 'epoch': 1835.8} + 45%|████▌ | 29740/65536 [5:05:58<6:14:59, 1.59it/s] 45%|████▌ | 29741/65536 [5:05:59<6:18:01, 1.58it/s] 45%|████▌ | 29742/65536 [5:06:00<6:12:15, 1.60it/s] 45%|████▌ | 29743/65536 [5:06:00<6:03:16, 1.64it/s] 45%|████▌ | 29744/65536 [5:06:01<6:14:00, 1.59it/s] 45%|████▌ | 29745/65536 [5:06:02<6:12:24, 1.60it/s] 45%|████▌ | 29746/65536 [5:06:02<6:10:39, 1.61it/s] 45%|████▌ | 29747/65536 [5:06:03<6:04:15, 1.64it/s] 45%|████▌ | 29748/65536 [5:06:03<6:05:45, 1.63it/s] 45%|████▌ | 29749/65536 [5:06:04<6:05:05, 1.63it/s] 45%|████▌ | 29750/65536 [5:06:05<6:08:16, 1.62it/s] 45%|████▌ | 29751/65536 [5:06:05<6:10:47, 1.61it/s] 45%|████▌ | 29752/65536 [5:06:06<6:11:22, 1.61it/s] 45%|████▌ | 29753/65536 [5:06:06<6:10:59, 1.61it/s] 45%|████▌ | 29754/65536 [5:06:07<6:14:48, 1.59it/s] 45%|████▌ | 29755/65536 [5:06:08<6:13:14, 1.60it/s] 45%|████▌ | 29756/65536 [5:06:08<6:24:47, 1.55it/s] 45%|████▌ | 29757/65536 [5:06:09<6:14:03, 1.59it/s] 45%|████▌ | 29758/65536 [5:06:10<6:11:59, 1.60it/s] 45%|████▌ | 29759/65536 [5:06:10<6:16:16, 1.58it/s] 45%|████▌ | 29760/65536 [5:06:11<6:24:07, 1.55it/s] {'loss': 1.6585, 'learning_rate': 5.928125382637443e-07, 'epoch': 1837.04} + 45%|████▌ | 29760/65536 [5:06:11<6:24:07, 1.55it/s] 45%|████▌ | 29761/65536 [5:06:12<6:16:41, 1.58it/s] 45%|████▌ | 29762/65536 [5:06:12<6:11:32, 1.60it/s] 45%|████▌ | 29763/65536 [5:06:13<6:10:02, 1.61it/s] 45%|████▌ | 29764/65536 [5:06:13<6:09:35, 1.61it/s] 45%|████▌ | 29765/65536 [5:06:14<6:05:48, 1.63it/s] 45%|████▌ | 29766/65536 [5:06:15<6:06:44, 1.63it/s] 45%|████▌ | 29767/65536 [5:06:15<6:04:28, 1.64it/s] 45%|████▌ | 29768/65536 [5:06:16<6:07:33, 1.62it/s] 45%|████▌ | 29769/65536 [5:06:17<6:21:29, 1.56it/s] 45%|████▌ | 29770/65536 [5:06:17<6:16:13, 1.58it/s] 45%|████▌ | 29771/65536 [5:06:18<6:18:25, 1.58it/s] 45%|████▌ | 29772/65536 [5:06:18<6:14:25, 1.59it/s] 45%|████▌ | 29773/65536 [5:06:19<6:04:06, 1.64it/s] 45%|████▌ | 29774/65536 [5:06:20<6:15:39, 1.59it/s] 45%|████▌ | 29775/65536 [5:06:20<6:15:36, 1.59it/s] 45%|████▌ | 29776/65536 [5:06:21<6:27:27, 1.54it/s] 45%|████▌ | 29777/65536 [5:06:22<6:23:19, 1.55it/s] 45%|████▌ | 29778/65536 [5:06:22<6:24:44, 1.55it/s] 45%|████▌ | 29779/65536 [5:06:23<6:27:43, 1.54it/s] 45%|████▌ | 29780/65536 [5:06:24<6:23:12, 1.56it/s] {'loss': 1.6802, 'learning_rate': 5.925370393045181e-07, 'epoch': 1838.27} + 45%|████▌ | 29780/65536 [5:06:24<6:23:12, 1.56it/s] 45%|████▌ | 29781/65536 [5:06:24<6:21:19, 1.56it/s] 45%|████▌ | 29782/65536 [5:06:25<6:13:18, 1.60it/s] 45%|████▌ | 29783/65536 [5:06:25<6:12:20, 1.60it/s] 45%|████▌ | 29784/65536 [5:06:26<6:09:17, 1.61it/s] 45%|████▌ | 29785/65536 [5:06:27<6:06:33, 1.63it/s] 45%|████▌ | 29786/65536 [5:06:27<6:07:14, 1.62it/s] 45%|████▌ | 29787/65536 [5:06:28<6:02:35, 1.64it/s] 45%|████▌ | 29788/65536 [5:06:28<6:02:36, 1.64it/s] 45%|████▌ | 29789/65536 [5:06:29<5:59:40, 1.66it/s] 45%|████▌ | 29790/65536 [5:06:30<6:03:03, 1.64it/s] 45%|████▌ | 29791/65536 [5:06:30<6:17:58, 1.58it/s] 45%|████▌ | 29792/65536 [5:06:31<6:24:18, 1.55it/s] 45%|████▌ | 29793/65536 [5:06:32<6:21:11, 1.56it/s] 45%|████▌ | 29794/65536 [5:06:32<6:19:55, 1.57it/s] 45%|████▌ | 29795/65536 [5:06:33<6:08:44, 1.62it/s] 45%|████▌ | 29796/65536 [5:06:33<6:03:52, 1.64it/s] 45%|████▌ | 29797/65536 [5:06:34<5:58:57, 1.66it/s] 45%|████▌ | 29798/65536 [5:06:35<6:02:01, 1.65it/s] 45%|████▌ | 29799/65536 [5:06:35<6:01:33, 1.65it/s] 45%|████▌ | 29800/65536 [5:06:36<6:08:40, 1.62it/s] {'loss': 1.6939, 'learning_rate': 5.92261540345292e-07, 'epoch': 1839.51} + 45%|████▌ | 29800/65536 [5:06:36<6:08:40, 1.62it/s] 45%|████▌ | 29801/65536 [5:06:37<6:14:18, 1.59it/s] 45%|████▌ | 29802/65536 [5:06:37<6:19:03, 1.57it/s] 45%|████▌ | 29803/65536 [5:06:38<6:13:45, 1.59it/s] 45%|████▌ | 29804/65536 [5:06:38<6:11:00, 1.61it/s] 45%|████▌ | 29805/65536 [5:06:39<6:13:52, 1.59it/s] 45%|████▌ | 29806/65536 [5:06:40<6:13:11, 1.60it/s] 45%|████▌ | 29807/65536 [5:06:40<6:08:34, 1.62it/s] 45%|████▌ | 29808/65536 [5:06:41<6:17:03, 1.58it/s] 45%|████▌ | 29809/65536 [5:06:42<6:22:52, 1.56it/s] 45%|████▌ | 29810/65536 [5:06:42<6:15:31, 1.59it/s] 45%|████▌ | 29811/65536 [5:06:43<6:12:17, 1.60it/s] 45%|████▌ | 29812/65536 [5:06:43<6:02:26, 1.64it/s] 45%|████▌ | 29813/65536 [5:06:44<6:17:32, 1.58it/s] 45%|████▌ | 29814/65536 [5:06:45<6:08:28, 1.62it/s] 45%|████▌ | 29815/65536 [5:06:45<6:14:27, 1.59it/s] 45%|████▌ | 29816/65536 [5:06:46<6:34:07, 1.51it/s] 45%|████▌ | 29817/65536 [5:06:47<6:21:13, 1.56it/s] 45%|████▌ | 29818/65536 [5:06:47<6:21:02, 1.56it/s] 46%|████▌ | 29819/65536 [5:06:48<6:23:32, 1.55it/s] 46%|████▌ | 29820/65536 [5:06:49<6:18:53, 1.57it/s] {'loss': 1.6721, 'learning_rate': 5.919860413860658e-07, 'epoch': 1840.74} + 46%|████▌ | 29820/65536 [5:06:49<6:18:53, 1.57it/s] 46%|████▌ | 29821/65536 [5:06:49<6:15:01, 1.59it/s] 46%|████▌ | 29822/65536 [5:06:50<6:09:04, 1.61it/s] 46%|████▌ | 29823/65536 [5:06:50<6:06:54, 1.62it/s] 46%|████▌ | 29824/65536 [5:06:51<6:04:14, 1.63it/s] 46%|████▌ | 29825/65536 [5:06:52<6:18:17, 1.57it/s] 46%|████▌ | 29826/65536 [5:06:52<6:17:46, 1.58it/s] 46%|████▌ | 29827/65536 [5:06:53<6:11:24, 1.60it/s] 46%|████▌ | 29828/65536 [5:06:54<6:12:11, 1.60it/s] 46%|████▌ | 29829/65536 [5:06:54<6:11:46, 1.60it/s] 46%|████▌ | 29830/65536 [5:06:55<6:07:37, 1.62it/s] 46%|████▌ | 29831/65536 [5:06:55<6:10:52, 1.60it/s] 46%|████▌ | 29832/65536 [5:06:56<6:10:25, 1.61it/s] 46%|████▌ | 29833/65536 [5:06:57<6:10:56, 1.60it/s] 46%|████▌ | 29834/65536 [5:06:57<6:05:22, 1.63it/s] 46%|████▌ | 29835/65536 [5:06:58<6:15:18, 1.59it/s] 46%|████▌ | 29836/65536 [5:06:59<6:20:50, 1.56it/s] 46%|████▌ | 29837/65536 [5:06:59<6:17:02, 1.58it/s] 46%|████▌ | 29838/65536 [5:07:00<6:08:01, 1.62it/s] 46%|████▌ | 29839/65536 [5:07:00<6:04:21, 1.63it/s] 46%|████▌ | 29840/65536 [5:07:01<6:10:15, 1.61it/s] {'loss': 1.6698, 'learning_rate': 5.917105424268397e-07, 'epoch': 1841.98} + 46%|████▌ | 29840/65536 [5:07:01<6:10:15, 1.61it/s] 46%|████▌ | 29841/65536 [5:07:02<6:22:29, 1.56it/s] 46%|████▌ | 29842/65536 [5:07:02<6:12:29, 1.60it/s] 46%|████▌ | 29843/65536 [5:07:03<6:07:44, 1.62it/s] 46%|████▌ | 29844/65536 [5:07:04<6:09:28, 1.61it/s] 46%|████▌ | 29845/65536 [5:07:04<6:05:37, 1.63it/s] 46%|████▌ | 29846/65536 [5:07:05<6:10:11, 1.61it/s] 46%|████▌ | 29847/65536 [5:07:05<6:14:13, 1.59it/s] 46%|████▌ | 29848/65536 [5:07:06<6:09:08, 1.61it/s] 46%|████▌ | 29849/65536 [5:07:07<6:11:10, 1.60it/s] 46%|████▌ | 29850/65536 [5:07:07<6:21:00, 1.56it/s] 46%|████▌ | 29851/65536 [5:07:08<6:28:53, 1.53it/s] 46%|████▌ | 29852/65536 [5:07:09<6:19:13, 1.57it/s] 46%|████▌ | 29853/65536 [5:07:09<6:12:34, 1.60it/s] 46%|████▌ | 29854/65536 [5:07:10<6:10:47, 1.60it/s] 46%|████▌ | 29855/65536 [5:07:10<5:59:28, 1.65it/s] 46%|████▌ | 29856/65536 [5:07:11<6:02:28, 1.64it/s] 46%|████▌ | 29857/65536 [5:07:12<6:21:37, 1.56it/s] 46%|████▌ | 29858/65536 [5:07:12<6:11:39, 1.60it/s] 46%|████▌ | 29859/65536 [5:07:13<6:07:55, 1.62it/s] 46%|████▌ | 29860/65536 [5:07:14<6:05:40, 1.63it/s] {'loss': 1.7243, 'learning_rate': 5.914350434676136e-07, 'epoch': 1843.21} + 46%|████▌ | 29860/65536 [5:07:14<6:05:40, 1.63it/s] 46%|████▌ | 29861/65536 [5:07:14<6:07:39, 1.62it/s] 46%|████▌ | 29862/65536 [5:07:15<6:03:01, 1.64it/s] 46%|████▌ | 29863/65536 [5:07:15<5:56:43, 1.67it/s] 46%|████▌ | 29864/65536 [5:07:16<6:01:04, 1.65it/s] 46%|████▌ | 29865/65536 [5:07:17<6:20:32, 1.56it/s] 46%|████▌ | 29866/65536 [5:07:17<6:28:50, 1.53it/s] 46%|████▌ | 29867/65536 [5:07:18<6:15:37, 1.58it/s] 46%|████▌ | 29868/65536 [5:07:19<6:12:06, 1.60it/s] 46%|████▌ | 29869/65536 [5:07:19<6:10:17, 1.61it/s] 46%|████▌ | 29870/65536 [5:07:20<6:15:49, 1.58it/s] 46%|████▌ | 29871/65536 [5:07:20<6:11:16, 1.60it/s] 46%|████▌ | 29872/65536 [5:07:21<6:09:17, 1.61it/s] 46%|████▌ | 29873/65536 [5:07:22<6:20:07, 1.56it/s] 46%|████▌ | 29874/65536 [5:07:22<6:11:37, 1.60it/s] 46%|████▌ | 29875/65536 [5:07:23<6:08:42, 1.61it/s] 46%|████▌ | 29876/65536 [5:07:24<6:07:17, 1.62it/s] 46%|████▌ | 29877/65536 [5:07:24<6:03:38, 1.63it/s] 46%|████▌ | 29878/65536 [5:07:25<6:05:14, 1.63it/s] 46%|████▌ | 29879/65536 [5:07:25<5:58:50, 1.66it/s] 46%|████▌ | 29880/65536 [5:07:26<5:59:53, 1.65it/s] {'loss': 1.6764, 'learning_rate': 5.911595445083874e-07, 'epoch': 1844.44} + 46%|████▌ | 29880/65536 [5:07:26<5:59:53, 1.65it/s] 46%|████▌ | 29881/65536 [5:07:27<6:10:07, 1.61it/s] 46%|████▌ | 29882/65536 [5:07:27<6:10:56, 1.60it/s] 46%|████▌ | 29883/65536 [5:07:28<6:18:11, 1.57it/s] 46%|████▌ | 29884/65536 [5:07:29<6:19:58, 1.56it/s] 46%|████▌ | 29885/65536 [5:07:29<6:11:23, 1.60it/s] 46%|████▌ | 29886/65536 [5:07:30<6:05:47, 1.62it/s] 46%|████▌ | 29887/65536 [5:07:30<6:14:39, 1.59it/s] 46%|████▌ | 29888/65536 [5:07:31<6:17:26, 1.57it/s] 46%|████▌ | 29889/65536 [5:07:32<6:14:13, 1.59it/s] 46%|████▌ | 29890/65536 [5:07:32<6:30:28, 1.52it/s] 46%|████▌ | 29891/65536 [5:07:33<6:21:51, 1.56it/s] 46%|████▌ | 29892/65536 [5:07:34<6:18:28, 1.57it/s] 46%|████▌ | 29893/65536 [5:07:34<6:22:29, 1.55it/s] 46%|████▌ | 29894/65536 [5:07:35<6:31:27, 1.52it/s] 46%|████▌ | 29895/65536 [5:07:36<6:22:40, 1.55it/s] 46%|████▌ | 29896/65536 [5:07:36<6:10:23, 1.60it/s] 46%|████▌ | 29897/65536 [5:07:37<6:01:28, 1.64it/s] 46%|████▌ | 29898/65536 [5:07:37<6:01:30, 1.64it/s] 46%|████▌ | 29899/65536 [5:07:38<6:03:29, 1.63it/s] 46%|████▌ | 29900/65536 [5:07:39<5:55:21, 1.67it/s] {'loss': 1.6299, 'learning_rate': 5.908840455491613e-07, 'epoch': 1845.68} + 46%|████▌ | 29900/65536 [5:07:39<5:55:21, 1.67it/s] 46%|████▌ | 29901/65536 [5:07:39<5:57:39, 1.66it/s] 46%|████▌ | 29902/65536 [5:07:40<5:55:42, 1.67it/s] 46%|████▌ | 29903/65536 [5:07:40<6:02:26, 1.64it/s] 46%|████▌ | 29904/65536 [5:07:41<6:03:40, 1.63it/s] 46%|████▌ | 29905/65536 [5:07:42<5:59:12, 1.65it/s] 46%|████▌ | 29906/65536 [5:07:42<6:08:18, 1.61it/s] 46%|████▌ | 29907/65536 [5:07:43<6:05:57, 1.62it/s] 46%|████▌ | 29908/65536 [5:07:43<6:06:02, 1.62it/s] 46%|████▌ | 29909/65536 [5:07:44<6:02:35, 1.64it/s] 46%|████▌ | 29910/65536 [5:07:45<6:02:08, 1.64it/s] 46%|████▌ | 29911/65536 [5:07:45<6:07:20, 1.62it/s] 46%|████▌ | 29912/65536 [5:07:46<6:07:01, 1.62it/s] 46%|████▌ | 29913/65536 [5:07:46<6:01:21, 1.64it/s] 46%|████▌ | 29914/65536 [5:07:47<6:00:47, 1.65it/s] 46%|████▌ | 29915/65536 [5:07:48<6:02:37, 1.64it/s] 46%|████▌ | 29916/65536 [5:07:48<6:10:12, 1.60it/s] 46%|████▌ | 29917/65536 [5:07:49<6:02:48, 1.64it/s] 46%|████▌ | 29918/65536 [5:07:50<6:00:41, 1.65it/s] 46%|████▌ | 29919/65536 [5:07:50<6:00:47, 1.65it/s] 46%|████▌ | 29920/65536 [5:07:51<6:07:45, 1.61it/s] {'loss': 1.6761, 'learning_rate': 5.90608546589935e-07, 'epoch': 1846.91} + 46%|████▌ | 29920/65536 [5:07:51<6:07:45, 1.61it/s] 46%|████▌ | 29921/65536 [5:07:51<6:01:37, 1.64it/s] 46%|████▌ | 29922/65536 [5:07:52<6:12:49, 1.59it/s] 46%|████▌ | 29923/65536 [5:07:53<6:15:12, 1.58it/s] 46%|████▌ | 29924/65536 [5:07:53<6:05:01, 1.63it/s] 46%|████▌ | 29925/65536 [5:07:54<6:01:04, 1.64it/s] 46%|████▌ | 29926/65536 [5:07:54<5:57:02, 1.66it/s] 46%|████▌ | 29927/65536 [5:07:55<6:02:04, 1.64it/s] 46%|████▌ | 29928/65536 [5:07:56<5:56:10, 1.67it/s] 46%|████▌ | 29929/65536 [5:07:56<5:52:03, 1.69it/s] 46%|████▌ | 29930/65536 [5:07:57<5:54:56, 1.67it/s] 46%|████▌ | 29931/65536 [5:07:57<6:04:22, 1.63it/s] 46%|████▌ | 29932/65536 [5:07:58<6:00:50, 1.64it/s] 46%|████▌ | 29933/65536 [5:07:59<6:03:56, 1.63it/s] 46%|████▌ | 29934/65536 [5:07:59<6:01:59, 1.64it/s] 46%|████▌ | 29935/65536 [5:08:00<6:01:04, 1.64it/s] 46%|████▌ | 29936/65536 [5:08:01<6:10:00, 1.60it/s] 46%|████▌ | 29937/65536 [5:08:01<6:12:11, 1.59it/s] 46%|████▌ | 29938/65536 [5:08:02<6:21:15, 1.56it/s] 46%|████▌ | 29939/65536 [5:08:03<6:15:03, 1.58it/s] 46%|████▌ | 29940/65536 [5:08:03<6:14:15, 1.59it/s] {'loss': 1.689, 'learning_rate': 5.90333047630709e-07, 'epoch': 1848.15} + 46%|████▌ | 29940/65536 [5:08:03<6:14:15, 1.59it/s] 46%|████▌ | 29941/65536 [5:08:04<6:03:34, 1.63it/s] 46%|████▌ | 29942/65536 [5:08:04<6:13:05, 1.59it/s] 46%|████▌ | 29943/65536 [5:08:05<6:11:48, 1.60it/s] 46%|████▌ | 29944/65536 [5:08:06<6:10:37, 1.60it/s] 46%|████▌ | 29945/65536 [5:08:06<6:07:33, 1.61it/s] 46%|████▌ | 29946/65536 [5:08:07<6:04:00, 1.63it/s] 46%|████▌ | 29947/65536 [5:08:07<5:59:21, 1.65it/s] 46%|████▌ | 29948/65536 [5:08:08<6:11:35, 1.60it/s] 46%|████▌ | 29949/65536 [5:08:09<6:10:57, 1.60it/s] 46%|████▌ | 29950/65536 [5:08:09<6:04:19, 1.63it/s] 46%|████▌ | 29951/65536 [5:08:10<6:03:23, 1.63it/s] 46%|████▌ | 29952/65536 [5:08:11<6:05:34, 1.62it/s] 46%|████▌ | 29953/65536 [5:08:11<5:58:06, 1.66it/s] 46%|████▌ | 29954/65536 [5:08:12<6:05:02, 1.62it/s] 46%|████▌ | 29955/65536 [5:08:12<6:11:04, 1.60it/s] 46%|████▌ | 29956/65536 [5:08:13<5:59:47, 1.65it/s] 46%|████▌ | 29957/65536 [5:08:14<6:03:36, 1.63it/s] 46%|████▌ | 29958/65536 [5:08:14<5:57:59, 1.66it/s] 46%|████▌ | 29959/65536 [5:08:15<6:01:14, 1.64it/s] 46%|████▌ | 29960/65536 [5:08:15<6:08:23, 1.61it/s] {'loss': 1.6907, 'learning_rate': 5.900575486714828e-07, 'epoch': 1849.38} + 46%|████▌ | 29960/65536 [5:08:15<6:08:23, 1.61it/s] 46%|████▌ | 29961/65536 [5:08:16<6:14:33, 1.58it/s] 46%|████▌ | 29962/65536 [5:08:17<6:06:50, 1.62it/s] 46%|████▌ | 29963/65536 [5:08:17<6:11:58, 1.59it/s] 46%|████▌ | 29964/65536 [5:08:18<6:04:08, 1.63it/s] 46%|████▌ | 29965/65536 [5:08:19<6:07:03, 1.62it/s] 46%|████▌ | 29966/65536 [5:08:19<6:10:15, 1.60it/s] 46%|████▌ | 29967/65536 [5:08:20<6:02:37, 1.63it/s] 46%|████▌ | 29968/65536 [5:08:20<5:56:51, 1.66it/s] 46%|████▌ | 29969/65536 [5:08:21<6:05:48, 1.62it/s] 46%|████▌ | 29970/65536 [5:08:22<5:58:41, 1.65it/s] 46%|████▌ | 29971/65536 [5:08:22<6:06:35, 1.62it/s] 46%|████▌ | 29972/65536 [5:08:23<5:58:50, 1.65it/s] 46%|████▌ | 29973/65536 [5:08:23<6:00:59, 1.64it/s] 46%|████▌ | 29974/65536 [5:08:24<6:08:41, 1.61it/s] 46%|████▌ | 29975/65536 [5:08:25<5:59:18, 1.65it/s] 46%|████▌ | 29976/65536 [5:08:25<6:00:04, 1.65it/s] 46%|████▌ | 29977/65536 [5:08:26<6:06:11, 1.62it/s] 46%|████▌ | 29978/65536 [5:08:26<6:04:43, 1.62it/s] 46%|████▌ | 29979/65536 [5:08:27<6:26:15, 1.53it/s] 46%|████▌ | 29980/65536 [5:08:28<6:17:25, 1.57it/s] {'loss': 1.6604, 'learning_rate': 5.897820497122566e-07, 'epoch': 1850.62} + 46%|████▌ | 29980/65536 [5:08:28<6:17:25, 1.57it/s] 46%|████▌ | 29981/65536 [5:08:28<6:07:02, 1.61it/s] 46%|████▌ | 29982/65536 [5:08:29<6:08:12, 1.61it/s] 46%|████▌ | 29983/65536 [5:08:30<6:01:18, 1.64it/s] 46%|████▌ | 29984/65536 [5:08:30<5:57:58, 1.66it/s] 46%|████▌ | 29985/65536 [5:08:31<5:58:22, 1.65it/s] 46%|████▌ | 29986/65536 [5:08:31<5:53:39, 1.68it/s] 46%|████▌ | 29987/65536 [5:08:32<6:10:21, 1.60it/s] 46%|████▌ | 29988/65536 [5:08:33<6:10:25, 1.60it/s] 46%|████▌ | 29989/65536 [5:08:33<6:12:08, 1.59it/s] 46%|████▌ | 29990/65536 [5:08:34<6:03:23, 1.63it/s] 46%|████▌ | 29991/65536 [5:08:35<6:04:22, 1.63it/s] 46%|████▌ | 29992/65536 [5:08:35<6:01:05, 1.64it/s] 46%|████▌ | 29993/65536 [5:08:36<5:56:43, 1.66it/s] 46%|████▌ | 29994/65536 [5:08:36<6:03:04, 1.63it/s] 46%|████▌ | 29995/65536 [5:08:37<6:11:34, 1.59it/s] 46%|████▌ | 29996/65536 [5:08:38<6:11:26, 1.59it/s] 46%|████▌ | 29997/65536 [5:08:38<6:14:04, 1.58it/s] 46%|████▌ | 29998/65536 [5:08:39<6:07:04, 1.61it/s] 46%|████▌ | 29999/65536 [5:08:39<5:58:26, 1.65it/s] 46%|████▌ | 30000/65536 [5:08:40<5:57:43, 1.66it/s] {'loss': 1.6979, 'learning_rate': 5.895065507530305e-07, 'epoch': 1851.85} + 46%|████▌ | 30000/65536 [5:08:40<5:57:43, 1.66it/s]Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co./docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41. +Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2} + 46%|████▌ | 30001/65536 [5:08:59<61:38:07, 6.24s/it] 46%|████▌ | 30002/65536 [5:09:00<44:54:24, 4.55s/it]/opt/conda/lib/python3.10/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock. + self.pid = os.fork() + 46%|████▌ | 30003/65536 [5:09:01<33:32:37, 3.40s/it] 46%|████▌ | 30004/65536 [5:09:01<25:19:26, 2.57s/it] 46%|████▌ | 30005/65536 [5:09:02<19:32:08, 1.98s/it] 46%|████▌ | 30006/65536 [5:09:03<15:27:53, 1.57s/it] 46%|████▌ | 30007/65536 [5:09:03<12:47:58, 1.30s/it] 46%|████▌ | 30008/65536 [5:09:04<10:41:29, 1.08s/it] 46%|████▌ | 30009/65536 [5:09:05<9:26:36, 1.05it/s] 46%|████▌ | 30010/65536 [5:09:05<8:27:03, 1.17it/s] 46%|████▌ | 30011/65536 [5:09:06<7:45:18, 1.27it/s] 46%|████▌ | 30012/65536 [5:09:06<7:14:28, 1.36it/s] 46%|████▌ | 30013/65536 [5:09:07<6:51:39, 1.44it/s] 46%|████▌ | 30014/65536 [5:09:08<6:42:40, 1.47it/s] 46%|████▌ | 30015/65536 [5:09:08<6:34:08, 1.50it/s] 46%|████▌ | 30016/65536 [5:09:09<6:34:45, 1.50it/s] 46%|████▌ | 30017/65536 [5:09:10<6:40:32, 1.48it/s] 46%|████▌ | 30018/65536 [5:09:10<6:32:11, 1.51it/s] 46%|████▌ | 30019/65536 [5:09:11<6:36:24, 1.49it/s] 46%|████▌ | 30020/65536 [5:09:12<6:22:52, 1.55it/s] {'loss': 1.6912, 'learning_rate': 5.892310517938041e-07, 'epoch': 1853.09} + 46%|████▌ | 30020/65536 [5:09:12<6:22:52, 1.55it/s] 46%|████▌ | 30021/65536 [5:09:12<6:32:04, 1.51it/s] 46%|████▌ | 30022/65536 [5:09:13<6:29:02, 1.52it/s] 46%|████▌ | 30023/65536 [5:09:14<6:22:25, 1.55it/s] 46%|████▌ | 30024/65536 [5:09:14<6:15:18, 1.58it/s] 46%|████▌ | 30025/65536 [5:09:15<6:14:51, 1.58it/s] 46%|████▌ | 30026/65536 [5:09:15<6:15:32, 1.58it/s] 46%|████▌ | 30027/65536 [5:09:16<6:12:55, 1.59it/s] 46%|████▌ | 30028/65536 [5:09:17<6:18:19, 1.56it/s] 46%|████▌ | 30029/65536 [5:09:17<6:10:31, 1.60it/s] 46%|████▌ | 30030/65536 [5:09:18<6:13:29, 1.58it/s] 46%|████▌ | 30031/65536 [5:09:19<6:13:53, 1.58it/s] 46%|████▌ | 30032/65536 [5:09:19<6:09:12, 1.60it/s] 46%|████▌ | 30033/65536 [5:09:20<6:05:24, 1.62it/s] 46%|████▌ | 30034/65536 [5:09:20<6:01:45, 1.64it/s] 46%|████▌ | 30035/65536 [5:09:21<6:16:53, 1.57it/s] 46%|████▌ | 30036/65536 [5:09:22<6:20:09, 1.56it/s] 46%|████▌ | 30037/65536 [5:09:22<6:15:34, 1.58it/s] 46%|████▌ | 30038/65536 [5:09:23<6:09:29, 1.60it/s] 46%|████▌ | 30039/65536 [5:09:23<6:00:16, 1.64it/s] 46%|████▌ | 30040/65536 [5:09:24<5:57:04, 1.66it/s] {'loss': 1.6986, 'learning_rate': 5.889555528345781e-07, 'epoch': 1854.32} + 46%|████▌ | 30040/65536 [5:09:24<5:57:04, 1.66it/s] 46%|████▌ | 30041/65536 [5:09:25<6:02:26, 1.63it/s] 46%|████▌ | 30042/65536 [5:09:25<6:08:24, 1.61it/s] 46%|████▌ | 30043/65536 [5:09:26<6:18:41, 1.56it/s] 46%|████▌ | 30044/65536 [5:09:27<6:05:53, 1.62it/s] 46%|████▌ | 30045/65536 [5:09:27<6:08:24, 1.61it/s] 46%|████▌ | 30046/65536 [5:09:28<6:16:06, 1.57it/s] 46%|████▌ | 30047/65536 [5:09:29<6:12:43, 1.59it/s] 46%|████▌ | 30048/65536 [5:09:29<6:12:28, 1.59it/s] 46%|████▌ | 30049/65536 [5:09:30<6:17:36, 1.57it/s] 46%|████▌ | 30050/65536 [5:09:30<6:20:20, 1.56it/s] 46%|████▌ | 30051/65536 [5:09:31<6:17:25, 1.57it/s] 46%|████▌ | 30052/65536 [5:09:32<6:25:16, 1.53it/s] 46%|████▌ | 30053/65536 [5:09:32<6:22:12, 1.55it/s] 46%|████▌ | 30054/65536 [5:09:33<6:23:30, 1.54it/s] 46%|████▌ | 30055/65536 [5:09:34<6:14:28, 1.58it/s] 46%|████▌ | 30056/65536 [5:09:34<6:14:10, 1.58it/s] 46%|████▌ | 30057/65536 [5:09:35<6:22:37, 1.55it/s] 46%|████▌ | 30058/65536 [5:09:36<6:13:49, 1.58it/s] 46%|████▌ | 30059/65536 [5:09:36<6:12:28, 1.59it/s] 46%|████▌ | 30060/65536 [5:09:37<6:09:39, 1.60it/s] {'loss': 1.6842, 'learning_rate': 5.886800538753519e-07, 'epoch': 1855.56} + 46%|████▌ | 30060/65536 [5:09:37<6:09:39, 1.60it/s] 46%|████▌ | 30061/65536 [5:09:37<6:16:14, 1.57it/s] 46%|████▌ | 30062/65536 [5:09:38<6:13:11, 1.58it/s] 46%|████▌ | 30063/65536 [5:09:39<6:04:58, 1.62it/s] 46%|████▌ | 30064/65536 [5:09:39<6:09:44, 1.60it/s] 46%|████▌ | 30065/65536 [5:09:40<6:13:30, 1.58it/s] \ No newline at end of file