Adam commited on
Commit
e8da1ea
β€’
1 Parent(s): 9d04693

feat: updated links

Browse files
Files changed (1) hide show
  1. training.log +78 -78
training.log CHANGED
@@ -1,29 +1,29 @@
1
- /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
2
  warnings.warn(
3
  [2023-04-14 07:44:46,752] [WARNING] [runner.py:190:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only.
4
- [2023-04-14 07:44:48,341] [INFO] [runner.py:540:main] cmd = /home/minutiae/.conda/envs/py39/bin/python -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMCwgMSwgMiwgMywgNCwgNSwgNiwgN119 --master_addr=127.0.0.1 --master_port=29500 --enable_each_rank_log=None main.py --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets openai/webgpt_comparisons stanfordnlp/SHP --data_split 2,4,4 --model_name_or_path facebook/opt-350m --num_padding_at_beginning 1 --per_device_train_batch_size 8 --per_device_eval_batch_size 8 --max_seq_len 512 --learning_rate 5e-5 --weight_decay 0.1 --num_train_epochs 1 --gradient_accumulation_steps 1 --lr_scheduler_type cosine --num_warmup_steps 0 --seed 1234 --zero_stage 0 --deepspeed --output_dir /lus/grand/projects/BNN-Scale/chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/reward-models/350m
5
- /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
6
  warnings.warn(
7
  [2023-04-14 07:45:54,441] [INFO] [launch.py:229:main] WORLD INFO DICT: {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]}
8
  [2023-04-14 07:45:54,643] [INFO] [launch.py:235:main] nnodes=1, num_local_procs=8, node_rank=0
9
  [2023-04-14 07:45:54,643] [INFO] [launch.py:246:main] global_rank_mapping=defaultdict(<class 'list'>, {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]})
10
  [2023-04-14 07:45:54,643] [INFO] [launch.py:247:main] dist_world_size=8
11
  [2023-04-14 07:45:54,643] [INFO] [launch.py:249:main] Setting CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
12
- /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
13
  warnings.warn(
14
- /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
15
  warnings.warn(
16
- /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
17
  warnings.warn(
18
- /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
19
  warnings.warn(
20
- /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
21
  warnings.warn(
22
- /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
23
  warnings.warn(
24
- /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
25
  warnings.warn(
26
- /home/minutiae/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
27
  warnings.warn(
28
  [2023-04-14 07:49:22,604] [INFO] [comm.py:586:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl
29
 
@@ -32,99 +32,99 @@
32
 
33
 
34
 
35
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
36
-
37
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
 
 
38
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
39
 
40
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:00<00:00, 5.00it/s]
41
-
42
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
 
43
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
44
 
45
  0%| | 0/2 [00:00<?, ?it/s]
46
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.16s/it]
47
 
48
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:00<00:00, 2.25it/s]
49
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
50
-
51
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
 
 
52
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
53
 
54
  0%| | 0/2 [00:00<?, ?it/s]
55
 
56
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
57
 
58
  0%| | 0/2 [00:00<?, ?it/s]
59
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
60
-
61
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
 
 
62
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
63
 
64
 
65
  0%| | 0/2 [00:00<?, ?it/s]
66
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.14s/it]
67
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
68
-
69
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
 
 
70
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
71
 
72
  0%| | 0/2 [00:00<?, ?it/s]
73
 
74
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
75
 
76
  0%| | 0/2 [00:00<?, ?it/s]
77
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
78
 
79
  0%| | 0/2 [00:00<?, ?it/s]
80
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:00<00:00, 5.57it/s]
81
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
82
 
83
  0%| | 0/2 [00:00<?, ?it/s]
84
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
85
 
86
  0%| | 0/2 [00:00<?, ?it/s]
87
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.01s/it]
88
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
89
-
90
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
 
 
91
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
92
 
93
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:00<00:00, 3.38it/s]
94
  0%| | 0/2 [00:00<?, ?it/s]
95
 
96
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
97
-
98
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
 
 
99
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
100
 
101
-
102
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
 
103
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
104
 
105
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
106
 
107
  0%| | 0/1 [00:00<?, ?it/s]
108
 
109
  0%| | 0/1 [00:00<?, ?it/s]
110
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
111
 
112
  0%| | 0/1 [00:00<?, ?it/s]
113
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
114
 
115
  0%| | 0/1 [00:00<?, ?it/s]
116
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
117
-
118
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
 
 
119
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
120
 
121
 
122
  0%| | 0/1 [00:00<?, ?it/s]
123
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
124
-
125
  0%| | 0/2 [00:00<?, ?it/s]
126
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.31s/it]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
 
 
127
  0%| | 0/2 [00:00<?, ?it/s]
128
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.31s/it]Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
129
 
130
-
131
  0%| | 0/2 [00:00<?, ?it/s]
132
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:00<00:00, 3.08it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
 
133
  0%| | 0/2 [00:00<?, ?it/s]
134
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:00<00:00, 3.08it/s]Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
135
 
136
 
137
  0%| | 0/2 [00:00<?, ?it/s]
138
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
139
 
140
  0%| | 0/2 [00:00<?, ?it/s]
141
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:04<00:04, 4.27s/it]
142
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
143
 
144
  0%| | 0/2 [00:00<?, ?it/s]
145
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
146
-
147
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
 
 
148
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
149
 
150
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.74s/it]
151
  0%| | 0/2 [00:00<?, ?it/s]
152
 
153
- Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
154
 
155
  0%| | 0/2 [00:00<?, ?it/s]
156
- Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
157
-
158
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
 
 
159
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
160
 
161
 
162
  0%| | 0/1 [00:00<?, ?it/s]
163
- Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
164
-
165
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
 
 
166
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
167
 
168
 
169
  0%| | 0/1 [00:00<?, ?it/s]
170
- Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
171
-
172
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
 
 
173
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
174
 
175
 
176
  0%| | 0/1 [00:00<?, ?it/s]
177
- Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
178
- Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
179
 
180
  0%| | 0/1 [00:00<?, ?it/s]
181
  0%| | 0/1 [00:00<?, ?it/s]
182
 
183
- Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
184
-
185
  0%| | 0/3 [00:00<?, ?it/s]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
186
-
187
  0%| | 0/3 [00:00<?, ?it/s]
188
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:00<00:00, 2.07it/s]
189
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:00<00:00, 2.16it/s]
190
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:00<00:00, 4.36it/s]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
 
 
191
  0%| | 0/3 [00:00<?, ?it/s]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
 
192
  0%| | 0/3 [00:00<?, ?it/s]
193
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:00<00:00, 2.07it/s]
194
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:00<00:00, 2.16it/s]
195
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:00<00:00, 4.36it/s]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
196
 
197
 
198
-
199
  0%| | 0/3 [00:00<?, ?it/s]
200
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:07<00:15, 7.50s/it]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
 
201
  0%| | 0/3 [00:00<?, ?it/s]
202
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:07<00:15, 7.50s/it]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
203
 
204
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:14<00:07, 7.06s/it]
205
  0%| | 0/3 [00:00<?, ?it/s]
206
- Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
207
-
208
  0%| | 0/3 [00:00<?, ?it/s]
209
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:07<00:14, 7.26s/it]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
 
 
210
  0%| | 0/3 [00:00<?, ?it/s]
211
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:07<00:14, 7.26s/it]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
212
 
213
 
214
-
215
  0%| | 0/3 [00:00<?, ?it/s]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
216
-
217
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:03<00:06, 3.22s/it]
218
  0%| | 0/3 [00:00<?, ?it/s]
219
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.41s/it]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
 
220
  0%| | 0/3 [00:00<?, ?it/s]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
 
221
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:03<00:06, 3.22s/it]
222
  0%| | 0/3 [00:00<?, ?it/s]
223
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.41s/it]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
224
 
225
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:05, 2.85s/it]
226
 
227
  0%| | 0/3 [00:00<?, ?it/s]
228
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.25s/it]
229
 
@@ -186,18 +186,18 @@ To disable this warning, you can either:
186
  - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
187
  Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
188
  Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
189
- Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
190
- Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
191
  Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
192
- Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
193
  Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
194
- Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
195
  Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
196
- Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
197
  Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
198
- Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
199
  Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
200
- Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
201
  huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
202
  To disable this warning, you can either:
203
  - Avoid using `tokenizers` before the fork if possible
@@ -211,7 +211,7 @@ To disable this warning, you can either:
211
  - Avoid using `tokenizers` before the fork if possible
212
  - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
213
  Detected CUDA files, patching ldflags
214
- Emitting ninja build file /home/minutiae/.cache/torch_extensions/py39_cu113/fused_adam/build.ninja...
215
  Building extension module fused_adam...
216
  Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
217
  huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
@@ -242,7 +242,7 @@ To disable this warning, you can either:
242
  - Avoid using `tokenizers` before the fork if possible
243
  - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
244
  Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
245
- Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
246
  huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
247
  To disable this warning, you can either:
248
  - Avoid using `tokenizers` before the fork if possible
@@ -256,7 +256,7 @@ To disable this warning, you can either:
256
  - Avoid using `tokenizers` before the fork if possible
257
  - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
258
  Detected CUDA files, patching ldflags
259
- Emitting ninja build file /home/minutiae/.cache/torch_extensions/py39_cu113/fused_adam/build.ninja...
260
  Building extension module fused_adam...
261
  Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
262
  huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
@@ -273,15 +273,15 @@ Time to load fused_adam op: 15.54004192352295 seconds
273
  [2023-04-14 08:09:50,036] [INFO] [logging.py:96:log_dist] [Rank 0] Using client Optimizer as basic optimizer
274
  [2023-04-14 08:09:50,057] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam
275
  [2023-04-14 08:09:50,057] [INFO] [logging.py:96:log_dist] [Rank 0] Creating fp16 optimizer with dynamic loss scale
276
- Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
277
- Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
278
- Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
279
  [2023-04-14 08:09:50,233] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam
280
- Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
281
  [2023-04-14 08:09:50,233] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed using client LR scheduler
282
  [2023-04-14 08:09:50,233] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed LR Scheduler = <torch.optim.lr_scheduler.LambdaLR object at 0x7f281d4468b0>
283
  [2023-04-14 08:09:50,233] [INFO] [logging.py:96:log_dist] [Rank 0] step=0, skipped=0, lr=[5e-05, 5e-05], mom=[(0.9, 0.95), (0.9, 0.95)]
284
- Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
285
  [2023-04-14 08:09:50,234] [INFO] [config.py:953:print] DeepSpeedEngine configuration:
286
  [2023-04-14 08:09:50,235] [INFO] [config.py:957:print] activation_checkpointing_config {
287
  "partition_activations": false,
@@ -327,7 +327,7 @@ Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions ro
327
  [2023-04-14 08:09:50,235] [INFO] [config.py:957:print] communication_data_type ...... None
328
  [2023-04-14 08:09:50,235] [INFO] [config.py:957:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
329
  [2023-04-14 08:09:50,235] [INFO] [config.py:957:print] curriculum_enabled_legacy .... False
330
- Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...[2023-04-14 08:09:50,235] [INFO] [config.py:957:print] curriculum_params_legacy ..... False
331
 
332
  [2023-04-14 08:09:50,236] [INFO] [config.py:957:print] data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
333
  [2023-04-14 08:09:50,236] [INFO] [config.py:957:print] data_efficiency_enabled ...... False
@@ -344,7 +344,7 @@ Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions ro
344
  [2023-04-14 08:09:50,236] [INFO] [config.py:957:print] eigenvalue_tol ............... 0.01
345
  [2023-04-14 08:09:50,236] [INFO] [config.py:957:print] eigenvalue_verbose ........... False
346
  [2023-04-14 08:09:50,236] [INFO] [config.py:957:print] elasticity_enabled ........... False
347
- Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
348
  [2023-04-14 08:09:50,236] [INFO] [config.py:957:print] flops_profiler_config ........ {
349
  "enabled": false,
350
  "profile_step": 1,
@@ -429,7 +429,7 @@ Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions ro
429
  "tp_gather_partition_size": 8
430
  }
431
  }
432
- Using /home/minutiae/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
433
  huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
434
  To disable this warning, you can either:
435
  - Avoid using `tokenizers` before the fork if possible
@@ -442,7 +442,7 @@ huggingface/tokenizers: The current process just got forked, after parallelism h
442
  To disable this warning, you can either:
443
  - Avoid using `tokenizers` before the fork if possible
444
  - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
445
- Emitting ninja build file /home/minutiae/.cache/torch_extensions/py39_cu113/utils/build.ninja...
446
  Building extension module utils...
447
  Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
448
  huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 
1
+ /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
2
  warnings.warn(
3
  [2023-04-14 07:44:46,752] [WARNING] [runner.py:190:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only.
4
+ [2023-04-14 07:44:48,341] [INFO] [runner.py:540:main] cmd = /home/AdamG012/.conda/envs/py39/bin/python -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMCwgMSwgMiwgMywgNCwgNSwgNiwgN119 --master_addr=127.0.0.1 --master_port=29500 --enable_each_rank_log=None main.py --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets openai/webgpt_comparisons stanfordnlp/SHP --data_split 2,4,4 --model_name_or_path facebook/opt-350m --num_padding_at_beginning 1 --per_device_train_batch_size 8 --per_device_eval_batch_size 8 --max_seq_len 512 --learning_rate 5e-5 --weight_decay 0.1 --num_train_epochs 1 --gradient_accumulation_steps 1 --lr_scheduler_type cosine --num_warmup_steps 0 --seed 1234 --zero_stage 0 --deepspeed --output_dir /chatgpt/hf_runs/DeepSpeedExamples/applications/DeepSpeed-Chat/output/reward-models/350m
5
+ /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
6
  warnings.warn(
7
  [2023-04-14 07:45:54,441] [INFO] [launch.py:229:main] WORLD INFO DICT: {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]}
8
  [2023-04-14 07:45:54,643] [INFO] [launch.py:235:main] nnodes=1, num_local_procs=8, node_rank=0
9
  [2023-04-14 07:45:54,643] [INFO] [launch.py:246:main] global_rank_mapping=defaultdict(<class 'list'>, {'localhost': [0, 1, 2, 3, 4, 5, 6, 7]})
10
  [2023-04-14 07:45:54,643] [INFO] [launch.py:247:main] dist_world_size=8
11
  [2023-04-14 07:45:54,643] [INFO] [launch.py:249:main] Setting CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
12
+ /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
13
  warnings.warn(
14
+ /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
15
  warnings.warn(
16
+ /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
17
  warnings.warn(
18
+ /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
19
  warnings.warn(
20
+ /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
21
  warnings.warn(
22
+ /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
23
  warnings.warn(
24
+ /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
25
  warnings.warn(
26
+ /home/AdamG012/.conda/envs/py39/lib/python3.9/site-packages/requests/__init__.py:109: RequestsDependencyWarning: urllib3 (1.26.9) or chardet (5.0.0)/charset_normalizer (2.0.4) doesn't match a supported version!
27
  warnings.warn(
28
  [2023-04-14 07:49:22,604] [INFO] [comm.py:586:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl
29
 
 
32
 
33
 
34
 
 
 
35
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
36
+ Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
37
+
38
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
39
 
40
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:00<00:00, 5.00it/s]
 
41
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
42
+
43
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
44
 
45
  0%| | 0/2 [00:00<?, ?it/s]
46
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.16s/it]
47
 
48
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:00<00:00, 2.25it/s]
 
 
49
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
50
+ Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
51
+
52
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
53
 
54
  0%| | 0/2 [00:00<?, ?it/s]
55
 
56
+ Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
57
 
58
  0%| | 0/2 [00:00<?, ?it/s]
 
 
59
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
60
+ Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
61
+
62
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b9d2c4937d617106/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
63
 
64
 
65
  0%| | 0/2 [00:00<?, ?it/s]
66
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.14s/it]
 
 
67
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
68
+ Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
69
+
70
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
71
 
72
  0%| | 0/2 [00:00<?, ?it/s]
73
 
74
+ Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
75
 
76
  0%| | 0/2 [00:00<?, ?it/s]
77
+ Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
78
 
79
  0%| | 0/2 [00:00<?, ?it/s]
80
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:00<00:00, 5.57it/s]
81
+ Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
82
 
83
  0%| | 0/2 [00:00<?, ?it/s]
84
+ Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
85
 
86
  0%| | 0/2 [00:00<?, ?it/s]
87
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.01s/it]
 
 
88
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
89
+ Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
90
+
91
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/default-b25c081aeeca3652/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
92
 
93
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:00<00:00, 3.38it/s]
94
  0%| | 0/2 [00:00<?, ?it/s]
95
 
 
 
96
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
97
+ Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
98
+
99
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
100
 
 
101
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
102
+
103
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
104
 
105
+ Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
106
 
107
  0%| | 0/1 [00:00<?, ?it/s]
108
 
109
  0%| | 0/1 [00:00<?, ?it/s]
110
+ Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
111
 
112
  0%| | 0/1 [00:00<?, ?it/s]
113
+ Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
114
 
115
  0%| | 0/1 [00:00<?, ?it/s]
 
 
116
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
117
+ Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
118
+
119
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset parquet (/reward/Dahoas___parquet/Dahoas--synthetic-instruct-gptj-pairwise-0b2fd7bd9ea121cb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
120
 
121
 
122
  0%| | 0/1 [00:00<?, ?it/s]
 
 
123
  0%| | 0/2 [00:00<?, ?it/s]
124
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.31s/it]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
125
+ Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
126
+
127
  0%| | 0/2 [00:00<?, ?it/s]
128
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.31s/it]Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
129
 
 
130
  0%| | 0/2 [00:00<?, ?it/s]
131
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:00<00:00, 3.08it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
132
+
133
  0%| | 0/2 [00:00<?, ?it/s]
134
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:00<00:00, 3.08it/s]Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
135
 
136
 
137
  0%| | 0/2 [00:00<?, ?it/s]
138
+ Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
139
 
140
  0%| | 0/2 [00:00<?, ?it/s]
141
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:04<00:04, 4.27s/it]
142
+ Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
143
 
144
  0%| | 0/2 [00:00<?, ?it/s]
 
 
145
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/grand/projects/BNN-Scale/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
146
+ Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
147
+
148
  0%| | 0/2 [00:00<?, ?it/s]Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
149
 
150
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.74s/it]
151
  0%| | 0/2 [00:00<?, ?it/s]
152
 
153
+ Found cached dataset parquet (/reward/yitingxie___parquet/yitingxie--rlhf-reward-datasets-f2627438ff1fb9dd/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
154
 
155
  0%| | 0/2 [00:00<?, ?it/s]
 
 
156
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
157
+ Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
158
+
159
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
160
 
161
 
162
  0%| | 0/1 [00:00<?, ?it/s]
 
 
163
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
164
+ Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
165
+
166
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
167
 
168
 
169
  0%| | 0/1 [00:00<?, ?it/s]
 
 
170
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/grand/projects/BNN-Scale/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
171
+ Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
172
+
173
  0%| | 0/1 [00:00<?, ?it/s]Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
174
 
175
 
176
  0%| | 0/1 [00:00<?, ?it/s]
177
+ Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
178
+ Found cached dataset webgpt_comparisons (/reward/openai___webgpt_comparisons/default/0.0.0/8b5d5879cdc98c4c0099af6053dffe8d504588d43d3b11f1b1ec223ab1e8db0a)
179
 
180
  0%| | 0/1 [00:00<?, ?it/s]
181
  0%| | 0/1 [00:00<?, ?it/s]
182
 
 
 
183
  0%| | 0/3 [00:00<?, ?it/s]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
 
184
  0%| | 0/3 [00:00<?, ?it/s]
185
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:00<00:00, 2.07it/s]
186
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:00<00:00, 2.16it/s]
187
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:00<00:00, 4.36it/s]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
188
+ Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
189
+
190
  0%| | 0/3 [00:00<?, ?it/s]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
191
+
192
  0%| | 0/3 [00:00<?, ?it/s]
193
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:00<00:00, 2.07it/s]
194
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:00<00:00, 2.16it/s]
195
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:00<00:00, 4.36it/s]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
196
 
197
 
 
198
  0%| | 0/3 [00:00<?, ?it/s]
199
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:07<00:15, 7.50s/it]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
200
+
201
  0%| | 0/3 [00:00<?, ?it/s]
202
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:07<00:15, 7.50s/it]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
203
 
204
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:14<00:07, 7.06s/it]
205
  0%| | 0/3 [00:00<?, ?it/s]
 
 
206
  0%| | 0/3 [00:00<?, ?it/s]
207
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:07<00:14, 7.26s/it]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
208
+ Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
209
+
210
  0%| | 0/3 [00:00<?, ?it/s]
211
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:07<00:14, 7.26s/it]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
212
 
213
 
 
214
  0%| | 0/3 [00:00<?, ?it/s]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
 
215
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:03<00:06, 3.22s/it]
216
  0%| | 0/3 [00:00<?, ?it/s]
217
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.41s/it]Found cached dataset json (/grand/projects/BNN-Scale/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
218
+
219
  0%| | 0/3 [00:00<?, ?it/s]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
220
+
221
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:03<00:06, 3.22s/it]
222
  0%| | 0/3 [00:00<?, ?it/s]
223
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.41s/it]Found cached dataset json (/reward/stanfordnlp___json/stanfordnlp--SHP-10ead9e54f5a107d/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)
224
 
225
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:05, 2.85s/it]
226
 
227
  0%| | 0/3 [00:00<?, ?it/s]
228
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.25s/it]
229
 
 
186
  - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
187
  Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
188
  Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
189
+ Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
190
+ Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
191
  Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
192
+ Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
193
  Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
194
+ Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
195
  Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
196
+ Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
197
  Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
198
+ Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
199
  Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
200
+ Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
201
  huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
202
  To disable this warning, you can either:
203
  - Avoid using `tokenizers` before the fork if possible
 
211
  - Avoid using `tokenizers` before the fork if possible
212
  - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
213
  Detected CUDA files, patching ldflags
214
+ Emitting ninja build file /home/AdamG012/.cache/torch_extensions/py39_cu113/fused_adam/build.ninja...
215
  Building extension module fused_adam...
216
  Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
217
  huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 
242
  - Avoid using `tokenizers` before the fork if possible
243
  - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
244
  Installed CUDA version 11.4 does not match the version torch was compiled with 11.3 but since the APIs are compatible, accepting this combination
245
+ Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
246
  huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
247
  To disable this warning, you can either:
248
  - Avoid using `tokenizers` before the fork if possible
 
256
  - Avoid using `tokenizers` before the fork if possible
257
  - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
258
  Detected CUDA files, patching ldflags
259
+ Emitting ninja build file /home/AdamG012/.cache/torch_extensions/py39_cu113/fused_adam/build.ninja...
260
  Building extension module fused_adam...
261
  Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
262
  huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 
273
  [2023-04-14 08:09:50,036] [INFO] [logging.py:96:log_dist] [Rank 0] Using client Optimizer as basic optimizer
274
  [2023-04-14 08:09:50,057] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam
275
  [2023-04-14 08:09:50,057] [INFO] [logging.py:96:log_dist] [Rank 0] Creating fp16 optimizer with dynamic loss scale
276
+ Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
277
+ Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
278
+ Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
279
  [2023-04-14 08:09:50,233] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam
280
+ Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
281
  [2023-04-14 08:09:50,233] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed using client LR scheduler
282
  [2023-04-14 08:09:50,233] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed LR Scheduler = <torch.optim.lr_scheduler.LambdaLR object at 0x7f281d4468b0>
283
  [2023-04-14 08:09:50,233] [INFO] [logging.py:96:log_dist] [Rank 0] step=0, skipped=0, lr=[5e-05, 5e-05], mom=[(0.9, 0.95), (0.9, 0.95)]
284
+ Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
285
  [2023-04-14 08:09:50,234] [INFO] [config.py:953:print] DeepSpeedEngine configuration:
286
  [2023-04-14 08:09:50,235] [INFO] [config.py:957:print] activation_checkpointing_config {
287
  "partition_activations": false,
 
327
  [2023-04-14 08:09:50,235] [INFO] [config.py:957:print] communication_data_type ...... None
328
  [2023-04-14 08:09:50,235] [INFO] [config.py:957:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
329
  [2023-04-14 08:09:50,235] [INFO] [config.py:957:print] curriculum_enabled_legacy .... False
330
+ Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...[2023-04-14 08:09:50,235] [INFO] [config.py:957:print] curriculum_params_legacy ..... False
331
 
332
  [2023-04-14 08:09:50,236] [INFO] [config.py:957:print] data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'curriculum_learning': {'enabled': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
333
  [2023-04-14 08:09:50,236] [INFO] [config.py:957:print] data_efficiency_enabled ...... False
 
344
  [2023-04-14 08:09:50,236] [INFO] [config.py:957:print] eigenvalue_tol ............... 0.01
345
  [2023-04-14 08:09:50,236] [INFO] [config.py:957:print] eigenvalue_verbose ........... False
346
  [2023-04-14 08:09:50,236] [INFO] [config.py:957:print] elasticity_enabled ........... False
347
+ Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
348
  [2023-04-14 08:09:50,236] [INFO] [config.py:957:print] flops_profiler_config ........ {
349
  "enabled": false,
350
  "profile_step": 1,
 
429
  "tp_gather_partition_size": 8
430
  }
431
  }
432
+ Using /home/AdamG012/.cache/torch_extensions/py39_cu113 as PyTorch extensions root...
433
  huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
434
  To disable this warning, you can either:
435
  - Avoid using `tokenizers` before the fork if possible
 
442
  To disable this warning, you can either:
443
  - Avoid using `tokenizers` before the fork if possible
444
  - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
445
+ Emitting ninja build file /home/AdamG012/.cache/torch_extensions/py39_cu113/utils/build.ninja...
446
  Building extension module utils...
447
  Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
448
  huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...