Spaces:

buxiangzhiren
/

GeoRemover

Running on Zero

App Files Files Community

zixinz commited on Oct 3

Commit

2f713b7

1 Parent(s): 5458ff3

chore: ignore pyc and pycache

Browse files

Files changed (3) hide show

.gitignore +42 -0
app.py +352 -49
get_assets.sh +59 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,42 @@

+# Python caches
+__pycache__/
+*.py[cod]
+*.pyo
+*.pyd
+# Build / venv
+*.egg-info/
+.dist/
+build/
+dist/
+.venv/
+venv/
+# Weights / checkpoints (download via get_assets.sh)
+code_depth/checkpoints/
+code_edit/stage1/
+code_edit/stage2/
+# Large demo assets - don't version them in Space
+code_depth/assets/
+code_edit/assets/
+code_edit/example_data/
+# Common big binaries
+*.mp4
+*.mov
+*.avi
+*.webm
+*.mkv
+*.safetensors
+*.pth
+*.pt
+*.npz
+*.exr
+*.zip
+*.tar
+*.tar.gz
+*.7z
+# Node caches (if any)
+node_modules/

app.py CHANGED Viewed

@@ -1,87 +1,390 @@
-# app.py
 import os
 import pathlib
 import subprocess
 import gradio as gr
 import spaces
 import torch
-from PIL import Image
 BASE_DIR = pathlib.Path(__file__).resolve().parent
-SCRIPT_DIR = BASE_DIR / "code_depth"
-GET_WEIGHTS_SH = SCRIPT_DIR / "get_weights.sh"
-# 让我们能 import 到 code_depth/depth_infer.py
-import sys
-if str(SCRIPT_DIR) not in sys.path:
-    sys.path.append(str(SCRIPT_DIR))
-from depth_infer import DepthModel  # noqa
 def _ensure_executable(p: pathlib.Path):
     if not p.exists():
         raise FileNotFoundError(f"Not found: {p}")
     os.chmod(p, os.stat(p).st_mode | 0o111)
-def ensure_weights():
-    """在 code_depth 目录下运行你的 get_weights.sh。"""
-    _ensure_executable(GET_WEIGHTS_SH)
     subprocess.run(
-        ["bash", str(GET_WEIGHTS_SH)],
         check=True,
-        cwd=str(SCRIPT_DIR),
         env={**os.environ, "HF_HUB_DISABLE_TELEMETRY": "1"},
     )
-    ckpt_dir = SCRIPT_DIR / "checkpoints"
-    if not ckpt_dir.exists():
-        raise RuntimeError("weights download script ran but checkpoints/ not found")
-    return str(ckpt_dir)
-# 启动时下载权重（不开持久化时，若环境重建会再次下载）
 try:
-    CKPT_DIR = ensure_weights()
-    print(f"✅ Weights ready in: {CKPT_DIR}")
 except Exception as e:
-    print(f"⚠️ Failed to prepare weights: {e}")
-# 模型缓存（按 encoder 复用）
 _MODELS: dict[str, DepthModel] = {}
 def get_model(encoder: str) -> DepthModel:
     if encoder not in _MODELS:
         _MODELS[encoder] = DepthModel(BASE_DIR, encoder=encoder)
     return _MODELS[encoder]
 @spaces.GPU
-def infer_depth(
     image: Image.Image,
-    encoder: str = "vitl",
-    max_res: int = 1280,
-    input_size: int = 518,
-    fp32: bool = False,
-    grayscale: bool = False,
-) -> Image.Image:
-    # 这里才真正触发 CUDA 设备占用
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    print(f"[infer] device={device}, encoder={encoder}, max_res={max_res}, input_size={input_size}, fp32={fp32}, gray={grayscale}")
-    model = get_model(encoder)
-    return model.infer(image, max_res=max_res, input_size=input_size, fp32=fp32, grayscale=grayscale)
 with gr.Blocks() as demo:
-    gr.Markdown("## GeoRemover · Depth Preview (Video-Depth-Anything)")
     with gr.Row():
-        with gr.Column():
-            inp = gr.Image(label="Upload image", type="pil")
-            encoder = gr.Dropdown(["vits", "vitl"], value="vitl", label="Encoder")
-            max_res = gr.Slider(512, 2048, value=1280, step=64, label="Max resolution")
-            input_size = gr.Slider(256, 1024, value=518, step=2, label="Model input_size")
-            fp32 = gr.Checkbox(False, label="Use FP32 (default FP16)")
-            gray = gr.Checkbox(False, label="Grayscale depth")
-            btn = gr.Button("Run")
-        with gr.Column():
-            out = gr.Image(label="Depth visualization")
-    btn.click(fn=infer_depth, inputs=[inp, encoder, max_res, input_size, fp32, gray], outputs=[out])
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import os
+import sys
 import pathlib
 import subprocess
+import random
+from typing import Optional, Tuple
 import gradio as gr
 import spaces
 import torch
+from PIL import Image, ImageOps
+import numpy as np
+import cv2
+# ---------------- Paths & assets ----------------
 BASE_DIR = pathlib.Path(__file__).resolve().parent
+CODE_DEPTH = BASE_DIR / "code_depth"
+CODE_EDIT = BASE_DIR / "code_edit"
+GET_ASSETS = BASE_DIR / "get_assets.sh"
+EXPECTED_ASSETS = [
+    BASE_DIR / "code_depth" / "checkpoints" / "video_depth_anything_vits.pth",
+    BASE_DIR / "code_depth" / "checkpoints" / "video_depth_anything_vitl.pth",
+    BASE_DIR / "code_edit" / "stage1" / "checkpoint-4800" / "pytorch_lora_weights.safetensors",
+    BASE_DIR / "code_edit" / "stage2" / "checkpoint-20000" / "pytorch_lora_weights.safetensors",
+]
+# import depth helper
+if str(CODE_DEPTH) not in sys.path:
+    sys.path.insert(0, str(CODE_DEPTH))
+from depth_infer import DepthModel  # noqa: E402
+# import your custom diffusers
+if str(CODE_EDIT / "diffusers") not in sys.path:
+    sys.path.insert(0, str(CODE_EDIT / "diffusers"))
+from diffusers.pipelines.flux.pipeline_flux_fill_unmasked_image_condition_version import (  # type: ignore # noqa: E402
+    FluxFillPipeline_token12_depth_only as FluxFillPipeline,
+)
+# ---------------- Assets ensure (on-demand) ----------------
+def _have_all_assets() -> bool:
+    return all(p.is_file() for p in EXPECTED_ASSETS)
 def _ensure_executable(p: pathlib.Path):
     if not p.exists():
         raise FileNotFoundError(f"Not found: {p}")
     os.chmod(p, os.stat(p).st_mode | 0o111)
+def ensure_assets_if_missing():
+    if os.getenv("SKIP_ASSET_DOWNLOAD") == "1":
+        print("↪️  SKIP_ASSET_DOWNLOAD=1 -> 跳过资产下载检查")
+        return
+    if _have_all_assets():
+        print("✅ Assets already present")
+        return
+    print("⬇️  Missing assets, running get_assets.sh ...")
+    _ensure_executable(GET_ASSETS)
     subprocess.run(
+        ["bash", str(GET_ASSETS)],
         check=True,
+        cwd=str(BASE_DIR),
         env={**os.environ, "HF_HUB_DISABLE_TELEMETRY": "1"},
     )
+    if not _have_all_assets():
+        missing = [str(p.relative_to(BASE_DIR)) for p in EXPECTED_ASSETS if not p.exists()]
+        raise RuntimeError(f"Assets missing after get_assets.sh: {missing}")
+    print("✅ Assets ready.")
 try:
+    ensure_assets_if_missing()
 except Exception as e:
+    print(f"⚠️ Asset prepare failed: {e}")
+# ---------------- Global singletons ----------------
 _MODELS: dict[str, DepthModel] = {}
+_PIPE: Optional[FluxFillPipeline] = None
 def get_model(encoder: str) -> DepthModel:
     if encoder not in _MODELS:
         _MODELS[encoder] = DepthModel(BASE_DIR, encoder=encoder)
     return _MODELS[encoder]
+def get_pipe() -> FluxFillPipeline:
+    global _PIPE
+    if _PIPE is not None:
+        return _PIPE
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    dtype = torch.bfloat16 if device == "cuda" else torch.float32
+    print(f"[pipe] load FLUX.1-Fill-dev dtype={dtype}, device={device}")
+    pipe = FluxFillPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev", torch_dtype=dtype).to(device)
+    # LoRA（stage1）
+    lora_dir = CODE_EDIT / "stage1" / "checkpoint-4800"
+    if lora_dir.exists():
+        try:
+            pipe.load_lora_weights(str(lora_dir))  # 需要 peft
+            print(f"[pipe] loaded LoRA from: {lora_dir}")
+        except Exception as e:
+            print(f"[pipe] load LoRA failed (continue without): {e}")
+    else:
+        print(f"[pipe] LoRA path not found: {lora_dir} (continue without)")
+    _PIPE = pipe
+    return pipe
+# ---------------- Mask helpers ----------------
+def to_grayscale_mask(im: Image.Image) -> Image.Image:
+    """
+    将任意 RGBA/RGB/L 的图转为 L。
+    输出：白=需要移除/填充区域，黑=保留。
+    """
+    if im.mode == "RGBA":
+        mask = im.split()[-1]     # alpha as mask
+    else:
+        mask = im.convert("L")
+    # 简单二值化，去噪
+    mask = mask.point(lambda p: 255 if p > 16 else 0)
+    return mask  # 不做 invert，白色=mask区域
+def dilate_mask(mask_l: Image.Image, px: int) -> Image.Image:
+    """对白色区域做膨胀；px 约等于扩大像素。"""
+    if px <= 0:
+        return mask_l
+    arr = np.array(mask_l, dtype=np.uint8)
+    kernel = np.ones((3, 3), np.uint8)
+    iters = max(1, int(px // 2))  # 经验
+    dilated = cv2.dilate(arr, kernel, iterations=iters)
+    return Image.fromarray(dilated, mode="L")
+def _mask_from_red(img: Image.Image, out_size: Tuple[int, int]) -> Image.Image:
+    """
+    从一张 RGBA/RGB 图里提取“纯红笔迹”为二值蒙版（白=画笔，黑=其他）。
+    阈值稍微宽一点以容忍压缩/插值。
+    """
+    arr = np.array(img.convert("RGBA"))
+    r, g, b, a = arr[..., 0], arr[..., 1], arr[..., 2], arr[..., 3]
+    # 条件：红高、绿低、蓝低、且 alpha>0
+    red_hit = (r >= 200) & (g <= 40) & (b <= 40) & (a > 0)
+    mask = (red_hit.astype(np.uint8) * 255)
+    m = Image.fromarray(mask, mode="L").resize(out_size, Image.NEAREST)
+    return m
+def pick_mask(
+    upload_mask: Optional[Image.Image],
+    sketch_data: Optional[dict],
+    base_image: Image.Image,
+    dilate_px: int = 0,
+) -> Optional[Image.Image]:
+    """
+    规则：
+      1) 若用户上传了 mask：直接用（白=mask）
+      2) 否则从 ImageEditor 返回里只“认红色笔迹”为 mask：
+         - 先看 sketch_data['mask']（有些版本会给）
+         - 不然遍历 sketch_data['layers'][*]['image']，合并其中的红色笔迹
+         - 若还没有，再退到 sketch_data['composite'] 里找红色笔迹
+    """
+    # 1) 上传优先
+    if isinstance(upload_mask, Image.Image):
+        m = to_grayscale_mask(upload_mask).resize(base_image.size, Image.NEAREST)
+        return dilate_mask(m, dilate_px) if dilate_px > 0 else m
+    # 2) 手绘（ImageEditor）
+    if isinstance(sketch_data, dict):
+        # 2a) 显式 mask（仍然支持）
+        m = sketch_data.get("mask")
+        if isinstance(m, Image.Image):
+            m = to_grayscale_mask(m).resize(base_image.size, Image.NEAREST)
+            return dilate_mask(m, dilate_px) if dilate_px > 0 else m
+        # 2b) 从 layers 里合并红色笔迹
+        layers = sketch_data.get("layers")
+        acc = None
+        if isinstance(layers, list) and layers:
+            acc = Image.new("L", base_image.size, 0)
+            for lyr in layers:
+                if not isinstance(lyr, dict):
+                    continue
+                li = lyr.get("image") or lyr.get("mask")
+                if isinstance(li, Image.Image):
+                    m_layer = _mask_from_red(li, base_image.size)
+                    # 合并：有任一层画过就算 mask
+                    acc = ImageOps.lighter(acc, m_layer)
+            if acc.getbbox() is not None:
+                return dilate_mask(acc, dilate_px) if dilate_px > 0 else acc
+        # 2c) 最后从 composite 里找红色笔迹
+        comp = sketch_data.get("composite")
+        if isinstance(comp, Image.Image):
+            m_comp = _mask_from_red(comp, base_image.size)
+            if m_comp.getbbox() is not None:
+                return dilate_mask(m_comp, dilate_px) if dilate_px > 0 else m_comp
+    # 3) 没拿到就返回 None（后面会提示“需要掩码”）
+    return None
+def _round_mult64(x: float, mode: str = "nearest") -> int:
+    """
+    把 x 对齐到 64 的倍数：
+      - mode="ceil"    向上取整
+      - mode="floor"   向下取整
+      - mode="nearest" 最近的倍数
+    """
+    if mode == "ceil":
+        return int((x + 63) // 64) * 64
+    elif mode == "floor":
+        return int(x // 64) * 64
+    else:  # nearest
+        return int((x + 32) // 64) * 64
+def prepare_size_for_flux(img: Image.Image, target_max: int = 1024) -> tuple[int, int]:
+    """
+    步骤：
+    1) 先把原始 w,h 向上对齐到 64 的倍数（避免小图过小）
+    2) 把长边固定为 target_max(默认1024)
+    3) 短边按比例缩放并对齐到 64 的倍数（至少 64）
+    """
+    w, h = img.size
+    # 1) 先各自向上对齐到 64 的倍数
+    w1 = max(64, _round_mult64(w, mode="ceil"))
+    h1 = max(64, _round_mult64(h, mode="ceil"))
+    # 2) 固定长边为 target_max，短边按比例
+    if w1 >= h1:
+        out_w = target_max  # 长边固定 1024
+        scaled_h = h1 * (target_max / w1)
+        out_h = max(64, _round_mult64(scaled_h, mode="nearest"))
+    else:
+        out_h = target_max
+        scaled_w = w1 * (target_max / h1)
+        out_w = max(64, _round_mult64(scaled_w, mode="nearest"))
+    return int(out_w), int(out_h)
+# ---------------- Preview depth for canvas (彩色) ----------------
+def preview_depth(image: Optional[Image.Image], encoder: str, max_res: int, input_size: int, fp32: bool):
+    if image is None:
+        return None
+    dm = get_model(encoder)
+    # 彩色可视化（RGB），严格按你之前的 colormap 风格
+    d_rgb = dm.infer(image=image, max_res=max_res, input_size=input_size, fp32=fp32, grayscale=False)
+    return d_rgb
+def prepare_canvas(image, depth_img, source):
+    base = depth_img if source == "depth" else image
+    if base is None:
+        raise gr.Error("请先上传图片（并等待深度预览出来），再点击\"Prepare canvas\"。")
+    # 对 ImageEditor 用通用的 gr.update 来设置 value
+    return gr.update(value=base)
+# ---------------- Two-stage pipeline: depth(color) -> fill ----------------
 @spaces.GPU
+def run_depth_and_fill(
     image: Image.Image,
+    mask_upload: Optional[Image.Image],
+    sketch: Optional[dict],
+    prompt: str,
+    encoder: str,
+    max_res: int,
+    input_size: int,
+    fp32: bool,
+    max_side: int,
+    mask_dilate_px: int,
+    guidance_scale: float,
+    steps: int,
+    seed: Optional[int],
+) -> Tuple[Image.Image, Image.Image]:
+    if image is None:
+        raise gr.Error("请先上传一张图片。")
+    # 1) 生成彩色深度图（RGB）
+    depth_model = get_model(encoder)
+    depth_rgb: Image.Image = depth_model.infer(
+        image=image, max_res=max_res, input_size=input_size, fp32=fp32, grayscale=False
+    ).convert("RGB")
+    print(f"[DEBUG] Depth RGB: mode={depth_rgb.mode}, size={depth_rgb.size}")
+    # 2) 提取 mask（上传 > 手绘）
+    mask_l = pick_mask(mask_upload, sketch, image, dilate_px=mask_dilate_px)
+    if (mask_l is None) or (mask_l.getbbox() is None):
+        raise gr.Error("没有检测到有效的 mask：请确认已在画布上涂抹或上传 mask 图片。")
+    print(f"[DEBUG] Mask: mode={mask_l.mode}, size={mask_l.size}, bbox={mask_l.getbbox()}")
+    # 3) 确定输出尺寸
+    width, height = prepare_size_for_flux(depth_rgb, target_max=max_side)
+    orig_w, orig_h = image.size
+    print(f"[DEBUG] FLUX size: {width}x{height}, original: {orig_w}x{orig_h}")
+    # 4) 运行 FLUX pipeline
+    # 关键修复：image 参数应该传入 depth_rgb 而不是原图
+    pipe = get_pipe()
+    generator = torch.Generator("cpu").manual_seed(int(seed)) if (seed is not None and seed >= 0) else torch.Generator("cpu").manual_seed(random.randint(0, 2**31 - 1))
+    result = pipe(
+        prompt=prompt,
+        image=depth_rgb,           # 修复：传入彩色深度图而不是原图
+        mask_image=mask_l,
+        width=width,
+        height=height,
+        guidance_scale=float(guidance_scale),
+        num_inference_steps=int(steps),
+        max_sequence_length=512,
+        generator=generator,
+        depth=depth_rgb,           # depth 参数也传入彩色深度图
+    ).images[0]
+    final_result = result.resize((orig_w, orig_h), Image.BICUBIC)
+    # 返回结果和 mask 预览
+    mask_preview = mask_l.resize((orig_w, orig_h), Image.NEAREST).convert("RGB")
+    return final_result, mask_preview
+# ---------------- UI ----------------
 with gr.Blocks() as demo:
+    gr.Markdown("## GeoRemover · Depth Removal (Depth(color) → FLUX Fill)")
     with gr.Row():
+        with gr.Column(scale=1):
+            # 输入图
+            img = gr.Image(label="Upload image", type="pil")
+            # Mask 两种方式：上传 or 画
+            with gr.Tab("Upload mask"):
+                mask_upload = gr.Image(label="Mask (optional)", type="pil")
+            with gr.Tab("Draw mask"):
+                draw_source = gr.Radio(["image", "depth"], value="image", label="Draw on")
+                prepare_btn = gr.Button("Prepare canvas")
+                sketch = gr.ImageEditor(
+                    label="Sketch mask (draw with brush)",
+                    type="pil",
+                    # 画笔只给纯红，方便我们精确提取笔迹
+                    brush=gr.Brush(colors=["#FF0000"], default_size=24)
+                )
+            # prompt
+            prompt = gr.Textbox(label="Prompt", value="A beautiful scene")
+            # 可调参数
+            with gr.Accordion("Advanced (Depth & FLUX)", open=False):
+                encoder = gr.Dropdown(["vits", "vitl"], value="vitl", label="Depth encoder")
+                max_res = gr.Slider(512, 2048, value=1280, step=64, label="Depth: max_res")
+                input_size = gr.Slider(256, 1024, value=518, step=2, label="Depth: input_size")
+                fp32 = gr.Checkbox(False, label="Depth: use FP32 (default FP16)")
+                max_side = gr.Slider(512, 1536, value=1024, step=64, label="FLUX: max side (px)")
+                mask_dilate_px = gr.Slider(0, 128, value=0, step=1, label="Mask dilation (px)")
+                guidance_scale = gr.Slider(0, 50, value=30, step=0.5, label="FLUX: guidance_scale")
+                steps = gr.Slider(10, 75, value=50, step=1, label="FLUX: steps")
+                seed = gr.Number(value=0, precision=0, label="Seed (>=0 固定；留空随机)")
+            run_btn = gr.Button("Run", variant="primary")
+        with gr.Column(scale=1):
+            depth_preview = gr.Image(label="Depth preview (colored)", interactive=False)
+            mask_preview = gr.Image(label="Mask preview (what will be removed)", interactive=False)
+            out = gr.Image(label="Output")
+    # 事件：上传图片后生成"彩色深度预览"
+    img.change(
+        fn=preview_depth,
+        inputs=[img, encoder, max_res, input_size, fp32],
+        outputs=[depth_preview],
+    )
+    # 准备画布：把原图或"彩色深度图"放进 ImageEditor
+    prepare_btn.click(
+        fn=prepare_canvas,
+        inputs=[img, depth_preview, draw_source],
+        outputs=[sketch],
+    )
+    # 运行
+    run_btn.click(
+        fn=run_depth_and_fill,
+        inputs=[img, mask_upload, sketch, prompt, encoder, max_res, input_size, fp32,
+                max_side, mask_dilate_px, guidance_scale, steps, seed],
+        outputs=[out, mask_preview],
+        api_name="run",
+    )
 if __name__ == "__main__":
+    os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")
+    demo.launch(server_name="0.0.0.0", server_port=7860)

get_assets.sh ADDED Viewed

	@@ -0,0 +1,59 @@

+#!/usr/bin/env bash
+set -euo pipefail
+# 必要目录
+mkdir -p code_depth/checkpoints \
+         code_edit/stage1/checkpoint-4800 \
+         code_edit/stage2/checkpoint-20000
+# 通用下载函数：优先 curl，退回 wget；内建重试和断点续传
+fetch() {
+  local url="$1"
+  local out="$2"
+  # 已存在就跳过
+  if [ -s "$out" ]; then
+    echo "✔ Exists: $out"
+    return 0
+  fi
+  echo "↓ Fetch: $url -> $out"
+  if command -v curl >/dev/null 2>&1; then
+    # --retry 对网络/5xx/超时都重试；-C - 断点续传；-f 让 4xx/5xx 变为非0退出
+    curl -fL --retry 5 --retry-all-errors --connect-timeout 20 -C - -o "$out" "${url}?download=1"
+  else
+    # wget 也加 tries 与 continue
+    wget --tries=5 -c -O "$out" "${url}?download=1"
+  fi
+}
+# 1) VDA 权重
+fetch "https://huggingface.co/depth-anything/Video-Depth-Anything-Small/resolve/main/video_depth_anything_vits.pth" \
+      "code_depth/checkpoints/video_depth_anything_vits.pth"
+fetch "https://huggingface.co/depth-anything/Video-Depth-Anything-Large/resolve/main/video_depth_anything_vitl.pth" \
+      "code_depth/checkpoints/video_depth_anything_vitl.pth"
+# 2) 你的 stage1 / stage2 两个 safetensors
+fetch "https://huggingface.co/buxiangzhiren/GeoRemover/resolve/main/stage1/checkpoint-4800/pytorch_lora_weights.safetensors" \
+      "code_edit/stage1/checkpoint-4800/pytorch_lora_weights.safetensors"
+fetch "https://huggingface.co/buxiangzhiren/GeoRemover/resolve/main/stage2/checkpoint-20000/pytorch_lora_weights.safetensors" \
+      "code_edit/stage2/checkpoint-20000/pytorch_lora_weights.safetensors"
+# 最终校验：缺哪个报名字
+missing=()
+need=(
+  "code_depth/checkpoints/video_depth_anything_vits.pth"
+  "code_depth/checkpoints/video_depth_anything_vitl.pth"
+  "code_edit/stage1/checkpoint-4800/pytorch_lora_weights.safetensors"
+  "code_edit/stage2/checkpoint-20000/pytorch_lora_weights.safetensors"
+)
+for f in "${need[@]}"; do
+  [ -s "$f" ] || missing+=("$f")
+done
+if [ ${#missing[@]} -ne 0 ]; then
+  echo "❌ Missing after download:" >&2
+  printf ' - %s\n' "${missing[@]}" >&2
+  exit 1
+fi
+echo "✅ All assets ready."

chore: ignore pyc and __pycache__

chore: ignore pyc and pycache