#!/bin/bash # 启动脚本:可选下载模型,并直接启动 Gradio 应用(内部调用 vLLM.LLM) set -euo pipefail MODEL_REPO="${MODEL_REPO:-stepfun-ai/Step-Audio-2-mini-Think}" MODEL_DIR="${MODEL_DIR:-/root/models/Step-Audio-2-mini-Think}" PRELOAD_MODEL="${PRELOAD_MODEL:-1}" GRADIO_PORT=${GRADIO_PORT:-7860} HOST=${HOST:-0.0.0.0} TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE:-4} MAX_MODEL_LEN=${MAX_MODEL_LEN:-8192} GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.9} TOKENIZER_MODE=${TOKENIZER_MODE:-step_audio_2} SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-step-audio-2-mini-think} echo "==========================================" echo "Step Audio 2 Gradio 启动脚本" echo "MODEL_REPO: $MODEL_REPO" echo "MODEL_DIR : $MODEL_DIR" echo "PRELOAD_MODEL: $PRELOAD_MODEL" echo "HOST/PORT: $HOST:$GRADIO_PORT" echo "TP: $TENSOR_PARALLEL_SIZE | MAX_LEN: $MAX_MODEL_LEN" echo "==========================================" download_model() { if command -v huggingface-cli &> /dev/null; then echo "[Download] 使用 huggingface-cli" huggingface-cli download "$MODEL_REPO" --local-dir "$MODEL_DIR" --local-dir-use-symlinks False else echo "[Download] 使用 python + huggingface_hub" python3 -c " from huggingface_hub import snapshot_download print('开始下载: $MODEL_REPO') snapshot_download(repo_id='$MODEL_REPO', local_dir='$MODEL_DIR', local_dir_use_symlinks=False) print('下载完成') " fi } if [[ "$PRELOAD_MODEL" == "1" ]]; then if [[ ! -d "$MODEL_DIR" ]] || [[ ! -f "$MODEL_DIR/config.json" ]]; then echo "模型未就绪,开始下载..." mkdir -p "$MODEL_DIR" download_model else echo "检测到本地模型: $MODEL_DIR" fi export MODEL_PATH="$MODEL_DIR" else echo "跳过预下载,直接使用仓库名称加载" export MODEL_PATH="${MODEL_PATH:-$MODEL_REPO}" fi echo "模型路径: ${MODEL_PATH}" echo "启动 Gradio..." python app.py \ --host "$HOST" \ --port "$GRADIO_PORT" \ --model "$MODEL_PATH" \ --tensor-parallel-size "$TENSOR_PARALLEL_SIZE" \ --max-model-len "$MAX_MODEL_LEN" \ --gpu-memory-utilization "$GPU_MEMORY_UTILIZATION" \ --tokenizer-mode "$TOKENIZER_MODE" \ --served-model-name "$SERVED_MODEL_NAME"