trollek commited on
Commit
81f22b4
1 Parent(s): 8d57034

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +60 -3
README.md CHANGED
@@ -1,3 +1,60 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: h2oai/h2o-danube2-1.8b-base
4
+ datasets:
5
+ - cgato/SlimOrcaDedupCleaned
6
+ language:
7
+ - en
8
+ library_name: transformers
9
+ ---
10
+
11
+
12
+
13
+ ```yaml
14
+ ### model
15
+ model_name_or_path: /home/trolle/Documents/Projects/trollek/danube2/base/danube2-base-chatml
16
+
17
+ ### method
18
+ stage: sft
19
+ do_train: true
20
+ finetuning_type: full
21
+ use_badam: true
22
+ badam_switch_mode: ascending
23
+ badam_switch_interval: 50
24
+ badam_verbose: 1
25
+ badam_start_block: 13
26
+ seed: 314
27
+
28
+ ### dataset
29
+ dataset: slimorca_dedup_cleaned
30
+ template: ninja_chatml
31
+ cutoff_len: 8192
32
+ overwrite_cache: false
33
+ preprocessing_num_workers: 12
34
+
35
+ ### output
36
+ output_dir: /home/trolle/Documents/Projects/trollek/danube2/base/slim-chatml-badam
37
+ logging_steps: 5
38
+ save_steps: 1
39
+ save_strategy: epoch
40
+ plot_loss: true
41
+ overwrite_output_dir: false
42
+
43
+ ### train
44
+ per_device_train_batch_size: 2
45
+ gradient_accumulation_steps: 8
46
+ learning_rate: 0.00001
47
+ num_train_epochs: 1
48
+ lr_scheduler_type: cosine
49
+ warmup_ratio: 0.01
50
+ bf16: true
51
+ flash_attn: fa2
52
+ #neftune_noise_alpha: 5
53
+
54
+ ### eval
55
+ val_size: 0.01
56
+ per_device_eval_batch_size: 1
57
+ eval_strategy: steps
58
+ eval_steps: 2000
59
+ ```
60
+