AjayP13 commited on
Commit
1bad3ba
·
verified ·
1 Parent(s): cd32c84

Pushed by DataDreamer

Browse files

Update training_args.json

Files changed (1) hide show
  1. training_args.json +10 -10
training_args.json CHANGED
@@ -1,25 +1,25 @@
1
  {
2
- "output_dir": "./output/train-styledistance-model/_checkpoints",
3
  "overwrite_output_dir": false,
4
  "do_train": false,
5
  "do_eval": true,
6
  "do_predict": false,
7
  "evaluation_strategy": "steps",
8
  "prediction_loss_only": false,
9
- "per_device_train_batch_size": 128,
10
- "per_device_eval_batch_size": 128,
11
  "per_gpu_train_batch_size": null,
12
  "per_gpu_eval_batch_size": null,
13
- "gradient_accumulation_steps": 1,
14
  "eval_accumulation_steps": 1,
15
  "eval_delay": 0,
16
- "learning_rate": 0.0001,
17
  "weight_decay": 0.01,
18
  "adam_beta1": 0.9,
19
  "adam_beta2": 0.999,
20
  "adam_epsilon": 1e-08,
21
  "max_grad_norm": 1.0,
22
- "num_train_epochs": 10,
23
  "max_steps": -1,
24
  "lr_scheduler_type": "linear",
25
  "lr_scheduler_kwargs": {},
@@ -28,13 +28,13 @@
28
  "log_level": "passive",
29
  "log_level_replica": "warning",
30
  "log_on_each_node": true,
31
- "logging_dir": "./output/train-styledistance-model/_checkpoints/runs/Jul17_08-46-22_nlpgpu04.seas.upenn.edu",
32
  "logging_strategy": "steps",
33
  "logging_first_step": false,
34
  "logging_steps": 1,
35
  "logging_nan_inf_filter": true,
36
  "save_strategy": "steps",
37
- "save_steps": 100,
38
  "save_total_limit": 1,
39
  "save_safetensors": false,
40
  "save_on_each_node": false,
@@ -59,11 +59,11 @@
59
  "tpu_metrics_debug": false,
60
  "debug": [],
61
  "dataloader_drop_last": false,
62
- "eval_steps": 100,
63
  "dataloader_num_workers": 0,
64
  "dataloader_prefetch_factor": null,
65
  "past_index": -1,
66
- "run_name": "DataDreamer - Train StyleDistance Model",
67
  "disable_tqdm": true,
68
  "remove_unused_columns": false,
69
  "label_names": null,
 
1
  {
2
+ "output_dir": "./output/train-wegmann--styledistance-model/_checkpoints",
3
  "overwrite_output_dir": false,
4
  "do_train": false,
5
  "do_eval": true,
6
  "do_predict": false,
7
  "evaluation_strategy": "steps",
8
  "prediction_loss_only": false,
9
+ "per_device_train_batch_size": 16,
10
+ "per_device_eval_batch_size": 16,
11
  "per_gpu_train_batch_size": null,
12
  "per_gpu_eval_batch_size": null,
13
+ "gradient_accumulation_steps": 8,
14
  "eval_accumulation_steps": 1,
15
  "eval_delay": 0,
16
+ "learning_rate": 0.001,
17
  "weight_decay": 0.01,
18
  "adam_beta1": 0.9,
19
  "adam_beta2": 0.999,
20
  "adam_epsilon": 1e-08,
21
  "max_grad_norm": 1.0,
22
+ "num_train_epochs": 20,
23
  "max_steps": -1,
24
  "lr_scheduler_type": "linear",
25
  "lr_scheduler_kwargs": {},
 
28
  "log_level": "passive",
29
  "log_level_replica": "warning",
30
  "log_on_each_node": true,
31
+ "logging_dir": "./output/train-wegmann--styledistance-model/_checkpoints/runs/Jul21_05-54-20_nlpgpu04.seas.upenn.edu",
32
  "logging_strategy": "steps",
33
  "logging_first_step": false,
34
  "logging_steps": 1,
35
  "logging_nan_inf_filter": true,
36
  "save_strategy": "steps",
37
+ "save_steps": 200,
38
  "save_total_limit": 1,
39
  "save_safetensors": false,
40
  "save_on_each_node": false,
 
59
  "tpu_metrics_debug": false,
60
  "debug": [],
61
  "dataloader_drop_last": false,
62
+ "eval_steps": 200,
63
  "dataloader_num_workers": 0,
64
  "dataloader_prefetch_factor": null,
65
  "past_index": -1,
66
+ "run_name": "DataDreamer - Train Wegmann + StyleDistance Model",
67
  "disable_tqdm": true,
68
  "remove_unused_columns": false,
69
  "label_names": null,