CUDA_VISIBLE_DEVICES=0 swift sft \
    --model /home/xj_data/jishengpeng/Code/Qwen2.5-Omni-3B \
    --dataset /home/xj_data/jishengpeng/InteractSpeech/ms-swift/dataset.json \
    --train_type full \
    --output_dir /home/xj_data/jishengpeng/InteractSpeech/ms-swift/result/output_3B_fulltune_interact \
    --torch_dtype bfloat16 \
    --num_train_epochs 3 \
    --per_device_train_batch_size 2 \
    --per_device_eval_batch_size 1 \
    # ...

# # 8*A100
# NPROC_PER_NODE=8 \
# CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
# swift pt \
#     --model Qwen/Qwen2.5-7B \
#     --dataset swift/chinese-c4 \
#     --streaming true \
#     --train_type full \
#     --deepspeed zero2 \
#     --output_dir output \
#     --max_steps 10000 \
#     ...


    # --lora_rank 8 \
    # --lora_alpha 32 \
    # --target_modules all-linear \
    # --gradient_accumulation_steps 16 \
    # --eval_steps 50 \
    # --save_steps 50 \
    # --save_total_limit 2 \
    # --logging_steps 5 \
    # --max_length 2048 \
    # --output_dir output \
    # --system 'You are a helpful assistant.' \
    # --warmup_ratio 0.05 \
    # --dataloader_num_workers 4 \
    # --model_author swift \
    # --model_name swift-robot