Update README.md
Browse files
README.md
CHANGED
@@ -1,3 +1,29 @@
|
|
1 |
---
|
2 |
license: cc-by-nc-4.0
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
license: cc-by-nc-4.0
|
3 |
+
language:
|
4 |
+
- ja
|
5 |
---
|
6 |
+
|
7 |
+
BaseModel: RWKV-4-Pile-14B-Instruct-test5-20230329-ctx4096.pth
|
8 |
+
https://huggingface.co/BlinkDL/rwkv-4-pile-7b/tree/main
|
9 |
+
|
10 |
+
Training Code:
|
11 |
+
https://github.com/Blealtan/RWKV-LM-LoRA
|
12 |
+
|
13 |
+
Training Prompt:
|
14 |
+
python train.py \
|
15 |
+
--load_model RWKV-4-Pile-14B-Instruct-test5-20230329-ctx4096.pth \
|
16 |
+
--proj_dir out \
|
17 |
+
--data_file "train.npy" \
|
18 |
+
--data_type "numpy" \
|
19 |
+
--vocab_size 50277 \
|
20 |
+
--ctx_len 1024 \
|
21 |
+
--epoch_save 5 \
|
22 |
+
--epoch_count 100 \
|
23 |
+
--n_layer 40 \
|
24 |
+
--n_embd 5120 \
|
25 |
+
--epoch_steps 1000 --epoch_begin 0 --micro_bsz 1 --pre_ffn 0 --head_qk 0 --lr_init 1e-5 --lr_final 1e-5 --warmup_steps 0 --beta1 0.9 --beta2 0.999 --adam_eps 1e-8 --accelerator gpu --devices 1 --precision bf16 --strategy deepspeed_stage_2 --grad_cp 0 \
|
26 |
+
--lora --lora_r 8 --lora_alpha 32 --lora_dropout 0.01
|
27 |
+
|
28 |
+
Training Environment:
|
29 |
+
A6000x1 + XEON 256GB RAM
|