kaist-ai
/

janus-7b

@@ -8,86 +8,6 @@ model-index:
   results: []
 ---
-[<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
-<details>
-  <summary>See axolotl config</summary>
-axolotl version: `0.4.0`
-```yaml
-base_model: alpindale/Mistral-7B-v0.2-hf
-model_type: MistralForCausalLM
-tokenizer_type: LlamaTokenizer
-load_in_8bit: false
-load_in_4bit: false
-strict: false
-datasets:
-  - path: /data/seongyun/open-instruct-2/augmented_diverse_response/preferences_v1_responses_for_training.jsonl
-    type:
-      system_prompt: ""
-      system_format: "[INST] {system}\n"
-      field_system: system
-      field_instruction: instruction
-      field_output: output
-      format: "{instruction} [/INST]"
-      no_input_format: "{instruction} [/INST]"
-    # conversation: mistral
-dataset_prepared_path:
-hub_model_id: kaist-ai/mpa-Mistral-7b-v0.2-hf-sft
-hub_strategy: checkpoint
-# val_set_size: 0
-output_dir: /data/suehyun/axolotl/outputs/mpa/mistral-7b-v0.2-hf
-sequence_len: 8192
-sample_packing: true
-pad_to_sequence_len: true
-eval_sample_packing: false
-wandb_project: mpa
-wandb_entity: suehyun
-wandb_watch:
-wandb_name: mpa_mistral-7b-v0.2-hf
-wandb_log_model:
-gradient_accumulation_steps: 4
-micro_batch_size: 2
-num_epochs: 4
-optimizer: adamw_bnb_8bit
-lr_scheduler: cosine
-learning_rate: 0.000005
-train_on_inputs: false
-group_by_length: false
-bf16: auto
-fp16:
-tf32: false
-gradient_checkpointing: true
-early_stopping_patience:
-resume_from_checkpoint:
-local_rank:
-logging_steps: 1
-xformers_attention:
-flash_attention: true
-warmup_steps: 10
-# evals_per_epoch: 4
-eval_table_size:
-# eval_max_new_tokens: 128
-saves_per_epoch: 1
-debug:
-deepspeed:
-weight_decay: 0.0
-fsdp:
-fsdp_config:
-special_tokens:
-```
-</details><br>
 ### Training hyperparameters
 The following hyperparameters were used during training:

   results: []
 ---
 ### Training hyperparameters
 The following hyperparameters were used during training: