| python -m torch.distributed.launch \ | |
| --nproc_per_node=8 \ | |
| run_xtreme_s.py \ | |
| --model_name_or_path="facebook/wav2vec2-xls-r-300m" \ | |
| --task="voxpopuli" \ | |
| --language="en" \ | |
| --output_dir="xtreme_s_xlsr_300m_voxpopuli_en" \ | |
| --overwrite_output_dir \ | |
| --num_train_epochs=10 \ | |
| --per_device_train_batch_size=8 \ | |
| --per_device_eval_batch_size=1 \ | |
| --gradient_accumulation_steps=1 \ | |
| --eval_accumulation_steps=10 \ | |
| --learning_rate="3e-4" \ | |
| --ctc_zero_infinity \ | |
| --warmup_steps=2000 \ | |
| --evaluation_strategy="steps" \ | |
| --max_duration_in_seconds=20 \ | |
| --preprocessing_num_workers=16 \ | |
| --save_steps=500 \ | |
| --eval_steps=500 \ | |
| --logging_steps=1 \ | |
| --layerdrop=0.0 \ | |
| --mask_time_prob=0.05 \ | |
| --mask_time_length=10 \ | |
| --mask_feature_prob=0.05 \ | |
| --mask_feature_length=64 \ | |
| --freeze_feature_encoder \ | |
| --gradient_checkpointing \ | |
| --fp16 \ | |
| --fp16_full_eval \ | |
| --group_by_length \ | |
| --do_train \ | |
| --do_eval \ | |
| --do_predict \ | |
| --per_lang_metrics=False \ | |
| --metric_for_best_model="wer" \ | |
| --greater_is_better=False \ | |
| --load_best_model_at_end \ | |
| --push_to_hub |