second commit

Browse files

Files changed (7) hide show

all_results.json +8 -7
generated_predictions.jsonl +0 -0
llamaboard_config.yaml +12 -59
predict_results.json +10 -0
running_log.txt +0 -0
trainer_log.jsonl +0 -0
training_args.yaml +10 -21

all_results.json CHANGED Viewed

@@ -1,9 +1,10 @@
 {
-    "epoch": 4.98793242156074,
-    "num_input_tokens_seen": 5132288,
-    "total_flos": 2.3110461174474342e+17,
-    "train_loss": 0.14078616270634925,
-    "train_runtime": 10317.9615,
-    "train_samples_per_second": 9.634,
-    "train_steps_per_second": 0.075
 }

 {
+    "predict_bleu-4": 86.52117906250001,
+    "predict_model_preparation_time": 0.005,
+    "predict_rouge-1": 94.453125,
+    "predict_rouge-2": 0.0,
+    "predict_rouge-l": 94.453125,
+    "predict_runtime": 15.8809,
+    "predict_samples_per_second": 160.822,
+    "predict_steps_per_second": 10.075
 }

generated_predictions.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llamaboard_config.yaml CHANGED Viewed

@@ -1,5 +1,16 @@
 top.booster: auto
-top.checkpoint_path: null
 top.finetuning_type: full
 top.model_name: LLaMA3.1-8B-Chat
 top.quantization_bit: none
@@ -7,61 +18,3 @@ top.quantization_method: bitsandbytes
 top.rope_scaling: none
 top.template: llama3
 top.visual_inputs: false
-train.additional_target: ''
-train.badam_mode: layer
-train.badam_switch_interval: 50
-train.badam_switch_mode: ascending
-train.badam_update_ratio: 0.05
-train.batch_size: 2
-train.compute_type: bf16
-train.create_new_adapter: false
-train.cutoff_len: 1024
-train.dataset:
-- truth_train
-train.dataset_dir: data
-train.ds_offload: false
-train.ds_stage: '2'
-train.freeze_extra_modules: ''
-train.freeze_trainable_layers: 2
-train.freeze_trainable_modules: all
-train.galore_rank: 16
-train.galore_scale: 0.25
-train.galore_target: all
-train.galore_update_interval: 200
-train.gradient_accumulation_steps: 8
-train.learning_rate: 5e-6
-train.logging_steps: 1
-train.lora_alpha: 16
-train.lora_dropout: 0
-train.lora_rank: 8
-train.lora_target: ''
-train.loraplus_lr_ratio: 0
-train.lr_scheduler_type: cosine
-train.mask_history: false
-train.max_grad_norm: '1.0'
-train.max_samples: '100000'
-train.neat_packing: false
-train.neftune_alpha: 0
-train.num_train_epochs: '5.0'
-train.optim: adamw_torch
-train.packing: false
-train.ppo_score_norm: false
-train.ppo_whiten_rewards: false
-train.pref_beta: 0.1
-train.pref_ftx: 0
-train.pref_loss: sigmoid
-train.report_to: false
-train.resize_vocab: false
-train.reward_model: null
-train.save_steps: 5000
-train.shift_attn: false
-train.train_on_prompt: false
-train.training_stage: Supervised Fine-Tuning
-train.use_badam: false
-train.use_dora: false
-train.use_galore: false
-train.use_llama_pro: false
-train.use_pissa: false
-train.use_rslora: false
-train.val_size: 0
-train.warmup_steps: 10

+eval.batch_size: 2
+eval.cutoff_len: 1024
+eval.dataset:
+- truth_dev
+eval.dataset_dir: data
+eval.max_new_tokens: 512
+eval.max_samples: '100000'
+eval.output_dir: eval_2024-07-29-16-36-04_llama3.1_reeval
+eval.predict: true
+eval.temperature: 0.95
+eval.top_p: 0.7
 top.booster: auto
+top.checkpoint_path: train_2024-07-29-16-36-04_llama3.1_reeval
 top.finetuning_type: full
 top.model_name: LLaMA3.1-8B-Chat
 top.quantization_bit: none
 top.rope_scaling: none
 top.template: llama3
 top.visual_inputs: false

predict_results.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "predict_bleu-4": 86.52117906250001,
+    "predict_model_preparation_time": 0.005,
+    "predict_rouge-1": 94.453125,
+    "predict_rouge-2": 0.0,
+    "predict_rouge-l": 94.453125,
+    "predict_runtime": 15.8809,
+    "predict_samples_per_second": 160.822,
+    "predict_steps_per_second": 10.075
+}

running_log.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff

trainer_log.jsonl CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.yaml CHANGED Viewed

@@ -1,29 +1,18 @@
-bf16: true
 cutoff_len: 1024
-dataset: truth_train
 dataset_dir: data
-ddp_timeout: 180000000
-deepspeed: cache/ds_z2_config.json
-do_train: true
 finetuning_type: full
 flash_attn: auto
-gradient_accumulation_steps: 8
-include_num_input_tokens_seen: true
-learning_rate: 5.0e-06
-logging_steps: 1
-lr_scheduler_type: cosine
-max_grad_norm: 1.0
 max_samples: 100000
-model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
-num_train_epochs: 5.0
-optim: adamw_torch
-output_dir: saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-16-36-04_llama3.1_reeval
-packing: false
-per_device_train_batch_size: 2
-plot_loss: true
 preprocessing_num_workers: 16
-report_to: none
-save_steps: 5000
 stage: sft
 template: llama3
-warmup_steps: 10

 cutoff_len: 1024
 dataset_dir: data
+do_predict: true
+eval_dataset: truth_dev
 finetuning_type: full
 flash_attn: auto
+max_new_tokens: 512
 max_samples: 100000
+model_name_or_path: saves/LLaMA3.1-8B-Chat/full/train_2024-07-29-16-36-04_llama3.1_reeval
+output_dir: saves/LLaMA3.1-8B-Chat/full/eval_2024-07-29-16-36-04_llama3.1_reeval
+per_device_eval_batch_size: 2
+predict_with_generate: true
 preprocessing_num_workers: 16
+quantization_method: bitsandbytes
 stage: sft
+temperature: 0.95
 template: llama3
+top_p: 0.7