plaguss HF staff commited on
Commit
cbf359d
1 Parent(s): 72788aa

Upload train-config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. train-config.yaml +19 -13
train-config.yaml CHANGED
@@ -7,6 +7,9 @@ format_args:
7
  model_args:
8
  pretrained_model_name_or_path: alignment-handbook/zephyr-7b-sft-full
9
  torch_dtype: float16
 
 
 
10
 
11
  peft_config:
12
  r: 16
@@ -26,41 +29,44 @@ peft_config:
26
  wandb_args:
27
  entity: argilla-io
28
  project: dibt-dpo
29
- name: zephyr-7b-lora-dpo-dibt-v0
30
 
31
  training_args:
32
- bf16: true
33
  beta: 0.1
 
 
34
  loss_type: sigmoid
 
 
35
  do_eval: true
36
  do_train: true
37
  evaluation_strategy: steps
38
- eval_steps: 15
39
- gradient_accumulation_steps: 2
40
  gradient_checkpointing: true
41
- gradient_checkpointing_kwargs:
42
- use_reentrant: False
43
  hub_model_id: plaguss/zephyr-7b-lora-dpo-dibt-v0
44
  hub_model_revision: v0
45
  hub_strategy: every_save
46
  hub_private_repo: true
47
  push_to_hub: true
48
- learning_rate: 5.0e-7
49
  logging_steps: 10
50
  lr_scheduler_type: cosine
51
- max_length: 1024
52
- max_prompt_length: 512
53
  num_train_epochs: 2
54
  optim: paged_adamw_32bit
55
  output_dir: data/zephyr-7b-sft-lora-dpo-v0
56
- per_device_train_batch_size: 8
57
- per_device_eval_batch_size: 8
58
- save_strategy: epoch
 
 
 
59
  save_total_limit: null
60
  seed: 42
61
  warmup_ratio: 0.1
62
  report_to:
63
  - wandb
64
 
65
- use_accelerate: true
66
  use_unsloth: false
 
7
  model_args:
8
  pretrained_model_name_or_path: alignment-handbook/zephyr-7b-sft-full
9
  torch_dtype: float16
10
+ quantization_config:
11
+ quant_method: bitsandbytes
12
+ load_in_4bit: true
13
 
14
  peft_config:
15
  r: 16
 
29
  wandb_args:
30
  entity: argilla-io
31
  project: dibt-dpo
32
+ name: zephyr-7b-lora-dpo-dibt-openhermes-params-v0
33
 
34
  training_args:
35
+ # `trl.DPOTrainer`
36
  beta: 0.1
37
+ max_length: 1536
38
+ max_prompt_length: 1024
39
  loss_type: sigmoid
40
+ # `transformers.Trainer`
41
+ bf16: true
42
  do_eval: true
43
  do_train: true
44
  evaluation_strategy: steps
45
+ eval_steps: 20
46
+ gradient_accumulation_steps: 4
47
  gradient_checkpointing: true
 
 
48
  hub_model_id: plaguss/zephyr-7b-lora-dpo-dibt-v0
49
  hub_model_revision: v0
50
  hub_strategy: every_save
51
  hub_private_repo: true
52
  push_to_hub: true
53
+ learning_rate: 5.0e-5
54
  logging_steps: 10
55
  lr_scheduler_type: cosine
 
 
56
  num_train_epochs: 2
57
  optim: paged_adamw_32bit
58
  output_dir: data/zephyr-7b-sft-lora-dpo-v0
59
+ load_best_model_at_end: true
60
+ metric_for_best_model: rewards/accuracies
61
+ greater_is_better: true
62
+ per_device_train_batch_size: 4
63
+ per_device_eval_batch_size: 16
64
+ save_strategy: steps
65
  save_total_limit: null
66
  seed: 42
67
  warmup_ratio: 0.1
68
  report_to:
69
  - wandb
70
 
71
+ use_accelerate: false
72
  use_unsloth: false