#!/usr/bin/env bash set -eu # --ddp_timeout DDP_TIMEOUT # Overrides the default timeout for distributed training # (value should be given in seconds). (default: 1800) # --ddp_timeout 18000 --> 300min accelerate launch \ run_speech_recognition_ctc_bnb.py \ --ddp_timeout 180000 \ --activation_dropout="0.1" \ --dataset_name="gttsehu/basque_parliament_1" \ --do_train --do_eval \ --eval_metrics wer cer \ --eval_split_name="validation" \ --eval_steps="4000" \ --evaluation_strategy="steps" \ --fp16 \ --freeze_feature_encoder \ --gradient_accumulation_steps="2" \ --gradient_checkpointing \ --group_by_length \ --learning_rate="1e-4" \ --length_column_name="input_length" \ --logging_steps="4000" \ --model_name_or_path="facebook/wav2vec2-xls-r-300m" \ --num_train_epochs="6" \ --output_dir="./" \ --overwrite_output_dir \ --preprocessing_num_workers=32 \ --per_device_train_batch_size="4" \ --per_device_eval_batch_size="4" \ --save_strategy="no" \ --text_column_name="sentence" \ --train_split_name="train_clean" \ --warmup_steps="1000" \