CUDA_VISIBLE_DEVICES=0 swift sft \ --model /home/xj_data/jishengpeng/Code/Qwen2.5-Omni-3B \ --dataset /home/xj_data/jishengpeng/InteractSpeech/ms-swift/dataset.json \ --train_type full \ --output_dir /home/xj_data/jishengpeng/InteractSpeech/ms-swift/result/output_3B_fulltune_interact \ --torch_dtype bfloat16 \ --num_train_epochs 3 \ --per_device_train_batch_size 2 \ --per_device_eval_batch_size 1 \ # ... # # 8*A100 # NPROC_PER_NODE=8 \ # CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ # swift pt \ # --model Qwen/Qwen2.5-7B \ # --dataset swift/chinese-c4 \ # --streaming true \ # --train_type full \ # --deepspeed zero2 \ # --output_dir output \ # --max_steps 10000 \ # ... # --lora_rank 8 \ # --lora_alpha 32 \ # --target_modules all-linear \ # --gradient_accumulation_steps 16 \ # --eval_steps 50 \ # --save_steps 50 \ # --save_total_limit 2 \ # --logging_steps 5 \ # --max_length 2048 \ # --output_dir output \ # --system 'You are a helpful assistant.' \ # --warmup_ratio 0.05 \ # --dataloader_num_workers 4 \ # --model_author swift \ # --model_name swift-robot