euIaxs22 commited on
Commit
27acc0d
·
verified ·
1 Parent(s): b5c6744

Xxxxxxxxxxxxxxxxxxxxxx

Browse files
Files changed (1) hide show
  1. builder.sh +187 -31
builder.sh CHANGED
@@ -1,10 +1,10 @@
1
  #!/usr/bin/env bash
2
  set -euo pipefail
3
 
4
- echo "🚀 Builder (Apex + Q8) — roda em runtime com GPU visível"
5
 
6
  # ===== Config e diretórios =====
7
- export SELF_HF_REPO_ID="${SELF_HF_REPO_ID:-euIaxs22/Aduc-sdr}" # Model repo no HF com wheels
8
  export HF_HOME="${HF_HOME:-/app/model_cache}"
9
  export HF_HUB_CACHE="${HF_HUB_CACHE:-$HF_HOME/hub}"
10
  export TORCH_HOME="${TORCH_HOME:-$HF_HOME/torch}"
@@ -15,6 +15,7 @@ mkdir -p /app/wheels /app/cuda_cache "$HF_HOME" "$TORCH_HOME" /app/wheels/src
15
  chmod -R 777 /app/wheels || true
16
  export CUDA_CACHE_PATH="/app/cuda_cache"
17
 
 
18
  if [ -f "/NGC-DL-CONTAINER-LICENSE" ]; then
19
  cp -f /NGC-DL-CONTAINER-LICENSE /app/wheels/NGC-DL-CONTAINER-LICENSE || true
20
  fi
@@ -44,14 +45,25 @@ PY
44
  )"
45
  echo "[env] PY_TAG=${PY_TAG} TORCH_VER=${TORCH_VER} CU_TAG=${CU_TAG}"
46
 
 
 
 
47
 
48
- git clone https://github.com/Dao-AILab/flash-attention
49
- cd flash-attention/csrc/layer_norm && MAX_JOBS=90 pip install .
50
-
51
-
 
 
 
 
 
 
 
 
 
52
 
53
- # ===== Checkers =====
54
- check_apex() {
55
  python - <<'PY'
56
  try:
57
  from apex.normalization import FusedLayerNorm, FusedRMSNorm
@@ -63,7 +75,7 @@ raise SystemExit(0 if ok else 1)
63
  PY
64
  }
65
 
66
- check_q8() {
67
  python - <<'PY'
68
  import importlib.util
69
  spec = importlib.util.find_spec("ltx_q8_kernels") or importlib.util.find_spec("q8_kernels")
@@ -71,15 +83,54 @@ raise SystemExit(0 if spec else 1)
71
  PY
72
  }
73
 
74
- # ===== Download do Hub =====
75
- install_from_hf () {
76
- local PKG="$1" # 'apex' ou 'q8_kernels'
77
- echo "[hub] Verificando wheel de ${PKG} no repositório ${SELF_HF_REPO_ID}"
78
- python - "$PKG" "$PY_TAG" "$CU_TAG" <<'PY' || exit 0
79
- import os, sys
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  from huggingface_hub import HfApi, hf_hub_download, HfFolder
81
 
82
- pkg, py_tag, cu_tag = sys.argv[1], sys.argv[2], sys.argv[3]
83
  repo = os.environ.get("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr")
84
  api = HfApi(token=os.getenv("HF_TOKEN") or HfFolder.get_token())
85
  try:
@@ -87,8 +138,19 @@ try:
87
  except Exception:
88
  raise SystemExit(0)
89
 
90
- cands = [f for f in files if f.endswith(".whl") and f.rsplit("/",1)[-1].startswith(pkg+"-") and py_tag in f]
91
- pref = [f for f in cands if cu_tag and cu_tag in f] or cands
 
 
 
 
 
 
 
 
 
 
 
92
  if not pref:
93
  raise SystemExit(0)
94
 
@@ -99,7 +161,61 @@ print(path)
99
  PY
100
  }
101
 
102
- # ===== Builders =====
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  build_apex () {
104
  local SRC="/app/wheels/src/apex"
105
  echo "[build] Preparando fonte Apex em ${SRC}"
@@ -144,20 +260,21 @@ build_q8 () {
144
  fi
145
  }
146
 
147
- # ===== Pipeline genérico =====
 
 
 
148
  ensure_pkg () {
149
  local PKG="$1" # apex | q8_kernels
150
  local CHECK_FN="$2" # check_apex | check_q8
151
  local BUILD_FN="$3" # build_apex | build_q8
152
-
153
  echo "[flow] === ${PKG} ==="
154
  if ${CHECK_FN}; then
155
  echo "[flow] ${PKG}: já instalado (import OK)"
156
  return 0
157
  fi
158
-
159
  echo "[flow] ${PKG}: tentando wheel do Hub (${SELF_HF_REPO_ID})"
160
- HF_OUT="$(install_from_hf "$PKG" || true)"
161
  if [ -n "${HF_OUT:-}" ]; then
162
  WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)"
163
  echo "[hub] Baixado: ${WHEEL_PATH}"
@@ -171,31 +288,71 @@ ensure_pkg () {
171
  else
172
  echo "[hub] Nenhuma wheel compatível encontrada para ${PKG}"
173
  fi
174
-
175
  echo "[flow] ${PKG}: compilando (fallback)"
176
  ${BUILD_FN}
177
  if ${CHECK_FN}; then
178
  echo "[flow] ${PKG}: sucesso após compilação"
179
  return 0
180
  fi
 
 
 
181
 
182
- echo "[flow] ${PKG}: falhou após build; registrando logs e seguindo"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  return 1
184
  }
185
 
186
- # ===== Execução: Apex e Q8 =====
 
 
 
 
 
 
 
187
  ensure_pkg "apex" check_apex build_apex || true
188
- #ensure_pkg "q8_kernels" check_q8 build_q8 || true
189
 
 
 
190
 
 
191
  python - <<'PY'
192
  import os
193
  from huggingface_hub import HfApi, HfFolder
194
- repo=os.environ.get("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr")
195
- token=os.getenv("HF_TOKEN") or HfFolder.get_token()
 
196
  if not token:
197
  raise SystemExit("HF_TOKEN ausente; upload desabilitado")
198
- api=HfApi(token=token)
 
199
  api.upload_folder(
200
  folder_path="/app/wheels",
201
  repo_id=repo,
@@ -206,6 +363,5 @@ api.upload_folder(
206
  print("Upload concluído (wheels + licença).")
207
  PY
208
 
209
-
210
  chmod -R 777 /app/wheels || true
211
  echo "✅ Builder finalizado."
 
1
  #!/usr/bin/env bash
2
  set -euo pipefail
3
 
4
+ echo "🚀 Builder (FlashAttn LayerNorm + Apex + Q8) — runtime com GPU visível"
5
 
6
  # ===== Config e diretórios =====
7
+ export SELF_HF_REPO_ID="${SELF_HF_REPO_ID:-euIaxs22/Aduc-sdr}" # Repo no HF para wheels
8
  export HF_HOME="${HF_HOME:-/app/model_cache}"
9
  export HF_HUB_CACHE="${HF_HUB_CACHE:-$HF_HOME/hub}"
10
  export TORCH_HOME="${TORCH_HOME:-$HF_HOME/torch}"
 
15
  chmod -R 777 /app/wheels || true
16
  export CUDA_CACHE_PATH="/app/cuda_cache"
17
 
18
+ # Preserve licença NGC em wheels (se presente no container base)
19
  if [ -f "/NGC-DL-CONTAINER-LICENSE" ]; then
20
  cp -f /NGC-DL-CONTAINER-LICENSE /app/wheels/NGC-DL-CONTAINER-LICENSE || true
21
  fi
 
45
  )"
46
  echo "[env] PY_TAG=${PY_TAG} TORCH_VER=${TORCH_VER} CU_TAG=${CU_TAG}"
47
 
48
+ # ============================================================================
49
+ # CHECKERS
50
+ # ============================================================================
51
 
52
+ check_flashln () {
53
+ python - <<'PY'
54
+ import importlib.util, pkgutil
55
+ cands = [
56
+ "flash_attn_layer_norm", # empacotamentos recentes
57
+ "dropout_layer_norm", # nome histórico do módulo C++
58
+ "flash_attn.ops.layer_norm", # caminho dentro do pacote flash_attn
59
+ ]
60
+ ok = any(importlib.util.find_spec(n) is not None for n in cands)
61
+ ok = ok or any(("flash" in m.name and "norm" in m.name) for m in pkgutil.iter_modules())
62
+ raise SystemExit(0 if ok else 1)
63
+ PY
64
+ }
65
 
66
+ check_apex () {
 
67
  python - <<'PY'
68
  try:
69
  from apex.normalization import FusedLayerNorm, FusedRMSNorm
 
75
  PY
76
  }
77
 
78
+ check_q8 () {
79
  python - <<'PY'
80
  import importlib.util
81
  spec = importlib.util.find_spec("ltx_q8_kernels") or importlib.util.find_spec("q8_kernels")
 
83
  PY
84
  }
85
 
86
+ # ============================================================================
87
+ # DOWNLOAD DO HUB (GENÉRICO)
88
+ # ============================================================================
89
+
90
+ # Instala uma wheel do HF por padrão de nome (prefixo) e tags.
91
+ # Uso: install_from_hf_by_prefix <prefixo>
92
+ install_from_hf_by_prefix () {
93
+ local PREFIX="$1"
94
+ echo "[hub] Procurando wheels '${PREFIX}-*.whl' em ${SELF_HF_REPO_ID} com tags ${PY_TAG}/${CU_TAG}"
95
+ python - "$PREFIX" "$PY_TAG" "$CU_TAG" <<'PY' || exit 0
96
+ import os, sys, re
97
+ from huggingface_hub import HfApi, hf_hub_download, HfFolder
98
+
99
+ prefix, py_tag, cu_tag = sys.argv[1], sys.argv[2], sys.argv[3]
100
+ repo = os.environ.get("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr")
101
+ api = HfApi(token=os.getenv("HF_TOKEN") or HfFolder.get_token())
102
+ try:
103
+ files = api.list_repo_files(repo_id=repo, repo_type="model")
104
+ except Exception:
105
+ raise SystemExit(0)
106
+
107
+ def match_tags(name: str) -> bool:
108
+ # Requer Python tag; CUDA tag preferencial se existir na wheel
109
+ if py_tag not in name:
110
+ return False
111
+ return True
112
+
113
+ cands = [f for f in files if f.endswith(".whl") and f.rsplit("/",1)[-1].startswith(prefix + "-") and match_tags(f)]
114
+ pref = [f for f in cands if cu_tag and cu_tag in f] or cands
115
+ if not pref:
116
+ raise SystemExit(0)
117
+
118
+ # Seleciona o "maior" por ordenação lexicográfica como heurística de versão/compat
119
+ target = sorted(pref, reverse=True)[0]
120
+ print(target)
121
+ path = hf_hub_download(repo_id=repo, filename=target, repo_type="model", local_dir="/app/wheels")
122
+ print(path)
123
+ PY
124
+ }
125
+
126
+ # Instala uma wheel do HF por padrão flexível para FlashAttn LayerNorm (aceita várias variantes de nome)
127
+ install_flashln_from_hf () {
128
+ echo "[hub] Procurando wheels de FlashAttention LayerNorm em ${SELF_HF_REPO_ID}"
129
+ python - "$PY_TAG" "$CU_TAG" <<'PY' || exit 0
130
+ import os, sys, re
131
  from huggingface_hub import HfApi, hf_hub_download, HfFolder
132
 
133
+ py_tag, cu_tag = sys.argv[1], sys.argv[2]
134
  repo = os.environ.get("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr")
135
  api = HfApi(token=os.getenv("HF_TOKEN") or HfFolder.get_token())
136
  try:
 
138
  except Exception:
139
  raise SystemExit(0)
140
 
141
+ def ok(fn: str) -> bool:
142
+ name = fn.rsplit("/",1)[-1]
143
+ if not name.endswith(".whl"): return False
144
+ if py_tag not in name: return False
145
+ # Padrões comuns de empacotamento
146
+ pats = [
147
+ r"^flash[_-]?attn[_-]?.*layer[_-]?norm-",
148
+ r"^dropout[_-]?layer[_-]?norm-",
149
+ ]
150
+ return any(re.search(p, name, flags=re.I) for p in pats)
151
+
152
+ cands = [f for f in files if ok(f)]
153
+ pref = [f for f in cands if cu_tag and cu_tag in f] or cands
154
  if not pref:
155
  raise SystemExit(0)
156
 
 
161
  PY
162
  }
163
 
164
+ # ============================================================================
165
+ # BUILDERS
166
+ # ============================================================================
167
+
168
+ build_flashln () {
169
+ local SRC="/app/wheels/src/flash-attn"
170
+ echo "[build] Preparando fonte FlashAttention (layer_norm) em ${SRC}"
171
+
172
+ # Sync/clone
173
+ if [ -d "$SRC/.git" ]; then
174
+ git -C "$SRC" fetch --all -p || true
175
+ git -C "$SRC" reset --hard origin/main || true
176
+ git -C "$SRC" clean -fdx || true
177
+ else
178
+ rm -rf "$SRC"
179
+ git clone --depth 1 https://github.com/Dao-AILab/flash-attention "$SRC"
180
+ fi
181
+
182
+ # Arquitetura CUDA alvo baseada na GPU ativa (uma só CC por build para wheel específica da máquina)
183
+ export TORCH_CUDA_ARCH_LIST="$(python - <<'PY'
184
+ import torch
185
+ try:
186
+ major, minor = torch.cuda.get_device_capability(0)
187
+ print(f"{major}.{minor}")
188
+ except Exception:
189
+ # fallback genérico caso não haja GPU visível no build host
190
+ print("8.9")
191
+ PY
192
+ )"
193
+ echo "[build] TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}"
194
+
195
+ # Compila apenas o submódulo csrc/layer_norm para wheel
196
+ pushd "$SRC/csrc/layer_norm" >/dev/null
197
+ export MAX_JOBS="${MAX_JOBS:-90}"
198
+ python -m pip wheel -v --no-build-isolation --no-deps . -w /app/wheels || true
199
+ popd >/dev/null
200
+
201
+ # Instala a wheel recém-criada
202
+ local W="$(ls -t /app/wheels/*flash*attn*layer*norm*-*.whl 2>/dev/null | head -n1 || true)"
203
+ if [ -z "${W}" ]; then
204
+ W="$(ls -t /app/wheels/*dropout*layer*norm*-*.whl 2>/dev/null | head -n1 || true)"
205
+ fi
206
+ if [ -z "${W}" ]; then
207
+ # fallback para qualquer wheel produzida no diretório
208
+ W="$(ls -t /app/wheels/*.whl 2>/dev/null | head -n1 || true)"
209
+ fi
210
+ if [ -n "${W}" ]; then
211
+ python -m pip install -v -U --no-deps "${W}" || true
212
+ echo "[build] FlashAttention LayerNorm instalado da wheel: ${W}"
213
+ else
214
+ echo "[build] Nenhuma wheel gerada; instalando do source (fallback)"
215
+ python -m pip install -v --no-build-isolation "$SRC/csrc/layer_norm" || true
216
+ fi
217
+ }
218
+
219
  build_apex () {
220
  local SRC="/app/wheels/src/apex"
221
  echo "[build] Preparando fonte Apex em ${SRC}"
 
260
  fi
261
  }
262
 
263
+ # ============================================================================
264
+ # PIPELINES DE GARANTIA
265
+ # ============================================================================
266
+
267
  ensure_pkg () {
268
  local PKG="$1" # apex | q8_kernels
269
  local CHECK_FN="$2" # check_apex | check_q8
270
  local BUILD_FN="$3" # build_apex | build_q8
 
271
  echo "[flow] === ${PKG} ==="
272
  if ${CHECK_FN}; then
273
  echo "[flow] ${PKG}: já instalado (import OK)"
274
  return 0
275
  fi
 
276
  echo "[flow] ${PKG}: tentando wheel do Hub (${SELF_HF_REPO_ID})"
277
+ HF_OUT="$(install_from_hf_by_prefix "$PKG" || true)"
278
  if [ -n "${HF_OUT:-}" ]; then
279
  WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)"
280
  echo "[hub] Baixado: ${WHEEL_PATH}"
 
288
  else
289
  echo "[hub] Nenhuma wheel compatível encontrada para ${PKG}"
290
  fi
 
291
  echo "[flow] ${PKG}: compilando (fallback)"
292
  ${BUILD_FN}
293
  if ${CHECK_FN}; then
294
  echo "[flow] ${PKG}: sucesso após compilação"
295
  return 0
296
  fi
297
+ echo "[flow] ${PKG}: falhou após build; seguindo"
298
+ return 1
299
+ }
300
 
301
+ ensure_flashln () {
302
+ echo "[flow] === flash_attn_layer_norm ==="
303
+ if check_flashln; then
304
+ echo "[flow] FlashAttn LayerNorm: já instalado (import OK)"
305
+ return 0
306
+ fi
307
+ echo "[flow] FlashAttn LayerNorm: tentando wheel do Hub (${SELF_HF_REPO_ID})"
308
+ HF_OUT="$(install_flashln_from_hf || true)"
309
+ if [ -n "${HF_OUT:-}" ]; then
310
+ WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)"
311
+ echo "[hub] Baixado: ${WHEEL_PATH}"
312
+ python -m pip install -v -U --no-build-isolation "${WHEEL_PATH}" || true
313
+ if check_flashln; then
314
+ echo "[flow] FlashAttn LayerNorm: sucesso via Hub (${WHEEL_PATH})"
315
+ return 0
316
+ else
317
+ echo "[flow] FlashAttn LayerNorm: import falhou após wheel do Hub; compilando"
318
+ fi
319
+ else
320
+ echo "[hub] Nenhuma wheel compatível encontrada para FlashAttn LayerNorm"
321
+ fi
322
+ echo "[flow] FlashAttn LayerNorm: compilando (fallback)"
323
+ build_flashln
324
+ if check_flashln; then
325
+ echo "[flow] FlashAttn LayerNorm: sucesso após compilação"
326
+ return 0
327
+ fi
328
+ echo "[flow] FlashAttn LayerNorm: falhou após build; seguindo"
329
  return 1
330
  }
331
 
332
+ # ============================================================================
333
+ # EXECUÇÃO
334
+ # ============================================================================
335
+
336
+ # 1) FlashAttention LayerNorm (novo cache em wheel no HF)
337
+ ensure_flashln || true
338
+
339
+ # 2) Apex (mantém pipeline existente)
340
  ensure_pkg "apex" check_apex build_apex || true
 
341
 
342
+ # 3) Q8 kernels (opcional)
343
+ # ensure_pkg "q8_kernels" check_q8 build_q8 || true
344
 
345
+ # 4) Upload de wheels produzidas para o HF (cache cross-restarts)
346
  python - <<'PY'
347
  import os
348
  from huggingface_hub import HfApi, HfFolder
349
+
350
+ repo = os.environ.get("SELF_HF_REPO_ID","euIaxs22/Aduc-sdr")
351
+ token = os.getenv("HF_TOKEN") or HfFolder.get_token()
352
  if not token:
353
  raise SystemExit("HF_TOKEN ausente; upload desabilitado")
354
+
355
+ api = HfApi(token=token)
356
  api.upload_folder(
357
  folder_path="/app/wheels",
358
  repo_id=repo,
 
363
  print("Upload concluído (wheels + licença).")
364
  PY
365
 
 
366
  chmod -R 777 /app/wheels || true
367
  echo "✅ Builder finalizado."