picocreator commited on
Commit
7bfbdc3
1 Parent(s): 5747e1d

6d1f84dc2a6eebd464cd01c33bf56eb90d04ebf2ed9dd4ccb7c2bc2e8a0a0f68

Browse files
Files changed (1) hide show
  1. trainer-v4-unit-test/model-init.ipynb +45 -45
trainer-v4-unit-test/model-init.ipynb CHANGED
@@ -3,13 +3,13 @@
3
  {
4
  "attachments": {},
5
  "cell_type": "markdown",
6
- "id": "52b890f4",
7
  "metadata": {
8
  "papermill": {
9
- "duration": 0.002262,
10
- "end_time": "2023-08-23T10:53:59.090685",
11
  "exception": false,
12
- "start_time": "2023-08-23T10:53:59.088423",
13
  "status": "completed"
14
  },
15
  "tags": []
@@ -27,16 +27,16 @@
27
  {
28
  "attachments": {},
29
  "cell_type": "markdown",
30
- "id": "f89c248f",
31
  "metadata": {
32
  "notebookRunGroups": {
33
  "groupValue": ""
34
  },
35
  "papermill": {
36
- "duration": 0.001598,
37
- "end_time": "2023-08-23T10:53:59.094424",
38
  "exception": false,
39
- "start_time": "2023-08-23T10:53:59.092826",
40
  "status": "completed"
41
  },
42
  "tags": []
@@ -48,19 +48,19 @@
48
  {
49
  "cell_type": "code",
50
  "execution_count": 1,
51
- "id": "65ffd8d5",
52
  "metadata": {
53
  "execution": {
54
- "iopub.execute_input": "2023-08-23T10:53:59.099519Z",
55
- "iopub.status.busy": "2023-08-23T10:53:59.099000Z",
56
- "iopub.status.idle": "2023-08-23T10:53:59.849771Z",
57
- "shell.execute_reply": "2023-08-23T10:53:59.848760Z"
58
  },
59
  "papermill": {
60
- "duration": 0.756019,
61
- "end_time": "2023-08-23T10:53:59.852126",
62
  "exception": false,
63
- "start_time": "2023-08-23T10:53:59.096107",
64
  "status": "completed"
65
  },
66
  "tags": []
@@ -76,19 +76,19 @@
76
  {
77
  "cell_type": "code",
78
  "execution_count": 2,
79
- "id": "c3b4c1b4",
80
  "metadata": {
81
  "execution": {
82
- "iopub.execute_input": "2023-08-23T10:53:59.858552Z",
83
- "iopub.status.busy": "2023-08-23T10:53:59.858038Z",
84
- "iopub.status.idle": "2023-08-23T10:54:08.846245Z",
85
- "shell.execute_reply": "2023-08-23T10:54:08.844779Z"
86
  },
87
  "papermill": {
88
- "duration": 8.994708,
89
- "end_time": "2023-08-23T10:54:08.848911",
90
  "exception": false,
91
- "start_time": "2023-08-23T10:53:59.854203",
92
  "status": "completed"
93
  },
94
  "tags": []
@@ -98,14 +98,20 @@
98
  "name": "stdout",
99
  "output_type": "stream",
100
  "text": [
101
- "[2023-08-23 10:54:04,147] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
 
 
 
 
 
 
 
102
  ]
103
  },
104
  {
105
  "name": "stdout",
106
  "output_type": "stream",
107
  "text": [
108
- "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
109
  "---- Initializing model ----\r\n",
110
  "No of layers: 6\r\n",
111
  "Embedding size: 512\r\n",
@@ -129,22 +135,22 @@
129
  "output_type": "stream",
130
  "text": [
131
  "Building extension module wkv_1_bf16...\r\n",
132
- "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n",
133
- "ninja: no work to do.\r\n",
134
- "Loading extension module wkv_1_bf16...\r\n"
135
  ]
136
  },
137
  {
138
  "name": "stdout",
139
  "output_type": "stream",
140
  "text": [
141
- "[RWKV.model]: Finished initial model load\r\n"
 
142
  ]
143
  },
144
  {
145
  "name": "stdout",
146
  "output_type": "stream",
147
  "text": [
 
148
  "50277 512 -0.0001 emb.weight\r\n"
149
  ]
150
  },
@@ -207,15 +213,15 @@
207
  "text": [
208
  "512 512 0 blocks.3.att.receptance.weight\r\n",
209
  "512 512 0 blocks.3.att.output.weight\r\n",
210
- "2048 512 1.0 blocks.3.ffn.key.weight\r\n"
 
 
211
  ]
212
  },
213
  {
214
  "name": "stdout",
215
  "output_type": "stream",
216
  "text": [
217
- "512 512 0 blocks.3.ffn.receptance.weight\r\n",
218
- "512 2048 0 blocks.3.ffn.value.weight\r\n",
219
  "512 512 0 blocks.4.att.key.weight\r\n",
220
  "512 512 1.0 blocks.4.att.value.weight\r\n",
221
  "512 512 0 blocks.4.att.receptance.weight\r\n",
@@ -230,23 +236,17 @@
230
  "512 512 0 blocks.4.ffn.receptance.weight\r\n",
231
  "512 2048 0 blocks.4.ffn.value.weight\r\n",
232
  "512 512 0 blocks.5.att.key.weight\r\n",
233
- "512 512 1.0 blocks.5.att.value.weight\r\n"
234
- ]
235
- },
236
- {
237
- "name": "stdout",
238
- "output_type": "stream",
239
- "text": [
240
  "512 512 0 blocks.5.att.receptance.weight\r\n",
241
  "512 512 0 blocks.5.att.output.weight\r\n",
242
- "2048 512 1.0 blocks.5.ffn.key.weight\r\n",
243
- "512 512 0 blocks.5.ffn.receptance.weight\r\n"
244
  ]
245
  },
246
  {
247
  "name": "stdout",
248
  "output_type": "stream",
249
  "text": [
 
250
  "512 2048 0 blocks.5.ffn.value.weight\r\n",
251
  "50277 512 0.5 head.weight\r\n"
252
  ]
@@ -278,14 +278,14 @@
278
  },
279
  "papermill": {
280
  "default_parameters": {},
281
- "duration": 11.521627,
282
- "end_time": "2023-08-23T10:54:09.274570",
283
  "environment_variables": {},
284
  "exception": null,
285
  "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/trainer-v4-unit-test/model-init.ipynb",
286
  "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/trainer-v4-unit-test/model-init.ipynb",
287
  "parameters": {},
288
- "start_time": "2023-08-23T10:53:57.752943",
289
  "version": "2.4.0"
290
  }
291
  },
 
3
  {
4
  "attachments": {},
5
  "cell_type": "markdown",
6
+ "id": "ee8cfec1",
7
  "metadata": {
8
  "papermill": {
9
+ "duration": 0.001903,
10
+ "end_time": "2023-08-23T19:20:40.353029",
11
  "exception": false,
12
+ "start_time": "2023-08-23T19:20:40.351126",
13
  "status": "completed"
14
  },
15
  "tags": []
 
27
  {
28
  "attachments": {},
29
  "cell_type": "markdown",
30
+ "id": "7713f8b0",
31
  "metadata": {
32
  "notebookRunGroups": {
33
  "groupValue": ""
34
  },
35
  "papermill": {
36
+ "duration": 0.001217,
37
+ "end_time": "2023-08-23T19:20:40.355733",
38
  "exception": false,
39
+ "start_time": "2023-08-23T19:20:40.354516",
40
  "status": "completed"
41
  },
42
  "tags": []
 
48
  {
49
  "cell_type": "code",
50
  "execution_count": 1,
51
+ "id": "54684bc2",
52
  "metadata": {
53
  "execution": {
54
+ "iopub.execute_input": "2023-08-23T19:20:40.360445Z",
55
+ "iopub.status.busy": "2023-08-23T19:20:40.359491Z",
56
+ "iopub.status.idle": "2023-08-23T19:20:41.112004Z",
57
+ "shell.execute_reply": "2023-08-23T19:20:41.111011Z"
58
  },
59
  "papermill": {
60
+ "duration": 0.757092,
61
+ "end_time": "2023-08-23T19:20:41.114108",
62
  "exception": false,
63
+ "start_time": "2023-08-23T19:20:40.357016",
64
  "status": "completed"
65
  },
66
  "tags": []
 
76
  {
77
  "cell_type": "code",
78
  "execution_count": 2,
79
+ "id": "80da2afe",
80
  "metadata": {
81
  "execution": {
82
+ "iopub.execute_input": "2023-08-23T19:20:41.119086Z",
83
+ "iopub.status.busy": "2023-08-23T19:20:41.118584Z",
84
+ "iopub.status.idle": "2023-08-23T19:20:50.840612Z",
85
+ "shell.execute_reply": "2023-08-23T19:20:50.839437Z"
86
  },
87
  "papermill": {
88
+ "duration": 9.727624,
89
+ "end_time": "2023-08-23T19:20:50.843488",
90
  "exception": false,
91
+ "start_time": "2023-08-23T19:20:41.115864",
92
  "status": "completed"
93
  },
94
  "tags": []
 
98
  "name": "stdout",
99
  "output_type": "stream",
100
  "text": [
101
+ "[2023-08-23 19:20:45,786] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)\r\n"
102
+ ]
103
+ },
104
+ {
105
+ "name": "stdout",
106
+ "output_type": "stream",
107
+ "text": [
108
+ "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
109
  ]
110
  },
111
  {
112
  "name": "stdout",
113
  "output_type": "stream",
114
  "text": [
 
115
  "---- Initializing model ----\r\n",
116
  "No of layers: 6\r\n",
117
  "Embedding size: 512\r\n",
 
135
  "output_type": "stream",
136
  "text": [
137
  "Building extension module wkv_1_bf16...\r\n",
138
+ "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n"
 
 
139
  ]
140
  },
141
  {
142
  "name": "stdout",
143
  "output_type": "stream",
144
  "text": [
145
+ "ninja: no work to do.\r\n",
146
+ "Loading extension module wkv_1_bf16...\r\n"
147
  ]
148
  },
149
  {
150
  "name": "stdout",
151
  "output_type": "stream",
152
  "text": [
153
+ "[RWKV.model]: Finished initial model load\r\n",
154
  "50277 512 -0.0001 emb.weight\r\n"
155
  ]
156
  },
 
213
  "text": [
214
  "512 512 0 blocks.3.att.receptance.weight\r\n",
215
  "512 512 0 blocks.3.att.output.weight\r\n",
216
+ "2048 512 1.0 blocks.3.ffn.key.weight\r\n",
217
+ "512 512 0 blocks.3.ffn.receptance.weight\r\n",
218
+ "512 2048 0 blocks.3.ffn.value.weight\r\n"
219
  ]
220
  },
221
  {
222
  "name": "stdout",
223
  "output_type": "stream",
224
  "text": [
 
 
225
  "512 512 0 blocks.4.att.key.weight\r\n",
226
  "512 512 1.0 blocks.4.att.value.weight\r\n",
227
  "512 512 0 blocks.4.att.receptance.weight\r\n",
 
236
  "512 512 0 blocks.4.ffn.receptance.weight\r\n",
237
  "512 2048 0 blocks.4.ffn.value.weight\r\n",
238
  "512 512 0 blocks.5.att.key.weight\r\n",
239
+ "512 512 1.0 blocks.5.att.value.weight\r\n",
 
 
 
 
 
 
240
  "512 512 0 blocks.5.att.receptance.weight\r\n",
241
  "512 512 0 blocks.5.att.output.weight\r\n",
242
+ "2048 512 1.0 blocks.5.ffn.key.weight\r\n"
 
243
  ]
244
  },
245
  {
246
  "name": "stdout",
247
  "output_type": "stream",
248
  "text": [
249
+ "512 512 0 blocks.5.ffn.receptance.weight\r\n",
250
  "512 2048 0 blocks.5.ffn.value.weight\r\n",
251
  "50277 512 0.5 head.weight\r\n"
252
  ]
 
278
  },
279
  "papermill": {
280
  "default_parameters": {},
281
+ "duration": 12.198584,
282
+ "end_time": "2023-08-23T19:20:51.170107",
283
  "environment_variables": {},
284
  "exception": null,
285
  "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/trainer-v4-unit-test/model-init.ipynb",
286
  "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/trainer-v4-unit-test/model-init.ipynb",
287
  "parameters": {},
288
+ "start_time": "2023-08-23T19:20:38.971523",
289
  "version": "2.4.0"
290
  }
291
  },