Update README.md
Browse files
README.md
CHANGED
@@ -1,3 +1,36 @@
|
|
1 |
---
|
2 |
license: apache-2.0
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
license: apache-2.0
|
3 |
+
datasets:
|
4 |
+
- TigerResearch/pretrain_zh
|
5 |
+
language:
|
6 |
+
- zh
|
7 |
+
pipeline_tag: text-generation
|
8 |
---
|
9 |
+
|
10 |
+
trained with [LLaMA-Efficient-Tuning](https://github.com/hiyouga/LLaMA-Efficient-Tuning)
|
11 |
+
|
12 |
+
```sh
|
13 |
+
python src/train_bash.py \
|
14 |
+
--stage pt \
|
15 |
+
--model_name_or_path gpt2 \
|
16 |
+
--do_train \
|
17 |
+
--finetuning_type full \
|
18 |
+
--dataset tiger_pretrain_zh \
|
19 |
+
--template ziya \
|
20 |
+
--use_fast_tokenizer \
|
21 |
+
--preprocessing_num_workers 64 \
|
22 |
+
--per_device_train_batch_size 8 \
|
23 |
+
--gradient_accumulation_steps 16 \
|
24 |
+
--output_dir .cache/gpt2-tigerResearch_pretrain_zh \
|
25 |
+
--lr_scheduler_type cosine \
|
26 |
+
--logging_steps 10 \
|
27 |
+
--save_steps 1000 \
|
28 |
+
--eval_steps 500 \
|
29 |
+
--learning_rate 5e-5 \
|
30 |
+
--num_train_epochs 1.0 \
|
31 |
+
--val_size 3000 \
|
32 |
+
--evaluation_strategy steps \
|
33 |
+
--plot_loss \
|
34 |
+
--max_source_length 1024 \
|
35 |
+
--bf16
|
36 |
+
```
|