| CUDA_VISIBLE_DEVICES=0 swift sft \ | |
| --model /home/xj_data/jishengpeng/Code/Qwen2.5-Omni-3B \ | |
| --dataset /home/xj_data/jishengpeng/InteractSpeech/ms-swift/dataset.json \ | |
| --train_type full \ | |
| --output_dir /home/xj_data/jishengpeng/InteractSpeech/ms-swift/result/output_3B_fulltune_interact \ | |
| --torch_dtype bfloat16 \ | |
| --num_train_epochs 3 \ | |
| --per_device_train_batch_size 2 \ | |
| --per_device_eval_batch_size 1 \ | |
| # ... | |
| # # 8*A100 | |
| # NPROC_PER_NODE=8 \ | |
| # CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ | |
| # swift pt \ | |
| # --model Qwen/Qwen2.5-7B \ | |
| # --dataset swift/chinese-c4 \ | |
| # --streaming true \ | |
| # --train_type full \ | |
| # --deepspeed zero2 \ | |
| # --output_dir output \ | |
| # --max_steps 10000 \ | |
| # ... | |
| # --lora_rank 8 \ | |
| # --lora_alpha 32 \ | |
| # --target_modules all-linear \ | |
| # --gradient_accumulation_steps 16 \ | |
| # --eval_steps 50 \ | |
| # --save_steps 50 \ | |
| # --save_total_limit 2 \ | |
| # --logging_steps 5 \ | |
| # --max_length 2048 \ | |
| # --output_dir output \ | |
| # --system 'You are a helpful assistant.' \ | |
| # --warmup_ratio 0.05 \ | |
| # --dataloader_num_workers 4 \ | |
| # --model_author swift \ | |
| # --model_name swift-robot |