Spaces:

buxiangzhiren
/

GeoRemover

Running on Zero

App Files Files Community

zixinz commited on Oct 4

Commit

a01e858

1 Parent(s): 134053b

chore: ignore pyc and pycache

Browse files

Files changed (1) hide show

app.py +220 -119

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import sys, pathlib
 BASE_DIR = pathlib.Path(__file__).resolve().parent
 LOCAL_DIFFUSERS_SRC = BASE_DIR / "code_edit" / "diffusers" / "src"
 if (LOCAL_DIFFUSERS_SRC / "diffusers").exists():
     sys.path.insert(0, str(LOCAL_DIFFUSERS_SRC))
 else:
@@ -20,11 +21,9 @@ from diffusers.pipelines.flux.pipeline_flux_fill_unmasked_image_condition_versio
 # ===========================================================================
 import os
-import sys
-import pathlib
 import subprocess
 import random
-from typing import Optional, Tuple
 import torch
 from PIL import Image, ImageOps
@@ -44,19 +43,19 @@ EXPECTED_ASSETS = [
     BASE_DIR / "code_edit" / "stage2" / "checkpoint-20000" / "pytorch_lora_weights.safetensors",
 ]
-# import depth helper
 if str(CODE_DEPTH) not in sys.path:
     sys.path.insert(0, str(CODE_DEPTH))
 from depth_infer import DepthModel  # noqa: E402
-# import your custom diffusers
 if str(CODE_EDIT / "diffusers") not in sys.path:
     sys.path.insert(0, str(CODE_EDIT / "diffusers"))
 from diffusers.pipelines.flux.pipeline_flux_fill_unmasked_image_condition_version import (  # type: ignore # noqa: E402
     FluxFillPipeline_token12_depth_only as FluxFillPipeline,
 )
-# ---------------- Assets ensure (on-demand) ----------------
 def _have_all_assets() -> bool:
     return all(p.is_file() for p in EXPECTED_ASSETS)
@@ -66,6 +65,10 @@ def _ensure_executable(p: pathlib.Path):
     os.chmod(p, os.stat(p).st_mode | 0o111)
 def ensure_assets_if_missing():
     if os.getenv("SKIP_ASSET_DOWNLOAD") == "1":
         print("↪️  SKIP_ASSET_DOWNLOAD=1 -> skip asset download check")
         return
@@ -91,7 +94,7 @@ except Exception as e:
     print(f"⚠️ Asset prepare failed: {e}")
 # ---------------- Global singletons ----------------
-_MODELS: dict[str, DepthModel] = {}
 _PIPE: Optional[FluxFillPipeline] = None
 # ==== STAGE-2 ONLY ADDED: singleton ====
 _PIPE_STAGE2: Optional[FluxFillPipelineStage2] = None
@@ -103,6 +106,9 @@ def get_model(encoder: str) -> DepthModel:
     return _MODELS[encoder]
 def get_pipe() -> FluxFillPipeline:
     global _PIPE
     if _PIPE is not None:
         return _PIPE
@@ -124,11 +130,8 @@ def get_pipe() -> FluxFillPipeline:
     print(f"[pipe] loading FLUX.1-Fill-dev (dtype={dtype}, device={device}, local={use_local})")
     try:
         if use_local:
-            pipe = FluxFillPipeline.from_pretrained(
-                local_flux, torch_dtype=dtype
-            ).to(device)
         else:
-            # Fetch online (requires gated access + token)
             pipe = FluxFillPipeline.from_pretrained(
                 "black-forest-labs/FLUX.1-Fill-dev",
                 torch_dtype=dtype,
@@ -137,31 +140,28 @@ def get_pipe() -> FluxFillPipeline:
     except Exception as e:
         raise RuntimeError(
             "Failed to load FLUX.1-Fill-dev. "
-            "Make sure your account has access to the gated repo and HF_TOKEN is set as a Space secret, "
-            "or pre-download to a local cache directory."
         ) from e
-    # -------- LoRA (stage1) --------
     lora_dir = CODE_EDIT / "stage1" / "checkpoint-4800"
-    lora_file = "pytorch_lora_weights.safetensors"   # your actual file name
     adapter_name = "stage1"
     if lora_dir.exists():
         try:
-            import peft  # assert backend is present
             print(f"[pipe] loading LoRA from: {lora_dir}/{lora_file}")
             pipe.load_lora_weights(
                 str(lora_dir),
-                weight_name=lora_file,          # important: specify filename
-                adapter_name=adapter_name       # a switchable name
             )
-            # Newer diffusers prefer set_adapters
             try:
                 pipe.set_adapters(adapter_name, scale=1.0)
-                print(f"[pipe] set_adapters('{adapter_name}', scale=1.0)")
             except Exception as e_set:
                 print(f"[pipe] set_adapters not available ({e_set}); trying fuse_lora()")
-                # Older / pipelines without set_adapters: fuse LoRA
                 try:
                     pipe.fuse_lora(lora_scale=1.0)
                     print("[pipe] fuse_lora(lora_scale=1.0) done")
@@ -169,7 +169,7 @@ def get_pipe() -> FluxFillPipeline:
                     print(f"[pipe] fuse_lora failed: {e_fuse}")
             print("[pipe] LoRA ready ✅")
         except ImportError:
-            print("[pipe] peft not installed; LoRA will be skipped (add `peft>=0.11` to requirements).")
         except Exception as e:
             print(f"[pipe] load_lora_weights failed (continue without): {e}")
     else:
@@ -181,7 +181,7 @@ def get_pipe() -> FluxFillPipeline:
 # ==== STAGE-2 ONLY ADDED: Stage-2 loader (no change to Stage-1 logic) ====
 def get_pipe_stage2() -> FluxFillPipelineStage2:
     """
-    Load Stage-2 FluxFillPipeline_token12_depth and mount the Stage-2 LoRA.
     """
     global _PIPE_STAGE2
     if _PIPE_STAGE2 is not None:
@@ -230,16 +230,13 @@ def get_pipe_stage2() -> FluxFillPipelineStage2:
         raise RuntimeError(f"Stage-2 LoRA dir not found: {lora_dir2}")
     if weight_name is None:
         raise RuntimeError(
-            f"Stage-2 LoRA weight not found under {lora_dir2}. "
-            f"Tried: {candidate_names}"
         )
     try:
         import peft  # noqa: F401
     except Exception as e:
-        raise RuntimeError(
-            "peft is not installed (requires peft>=0.11 to load LoRA)."
-        ) from e
     try:
         print(f"[stage2] loading LoRA: {lora_dir2}/{weight_name}")
@@ -272,15 +269,15 @@ def to_grayscale_mask(im: Image.Image) -> Image.Image:
     Output: white = region to remove/fill, black = keep.
     """
     if im.mode == "RGBA":
-        mask = im.split()[-1]     # alpha as mask
     else:
         mask = im.convert("L")
-    # simple binarization & denoise
     mask = mask.point(lambda p: 255 if p > 16 else 0)
-    return mask  # do not invert; white = mask region
 def dilate_mask(mask_l: Image.Image, px: int) -> Image.Image:
-    """Dilate white region by ~px pixels."""
     if px <= 0:
         return mask_l
     arr = np.array(mask_l, dtype=np.uint8)
@@ -291,15 +288,12 @@ def dilate_mask(mask_l: Image.Image, px: int) -> Image.Image:
 def _mask_from_red(img: Image.Image, out_size: Tuple[int, int]) -> Image.Image:
     """
-    Extract "pure red strokes" as a binary mask (white=brush, black=others) from an RGBA/RGB image.
-    Thresholds are a bit lenient to tolerate compression/resampling.
     """
     arr = np.array(img.convert("RGBA"))
     r, g, b, a = arr[..., 0], arr[..., 1], arr[..., 2], arr[..., 3]
-    # condition: high red, low green/blue, and alpha>0
     red_hit = (r >= 200) & (g <= 40) & (b <= 40) & (a > 0)
     mask = (red_hit.astype(np.uint8) * 255)
     m = Image.fromarray(mask, mode="L").resize(out_size, Image.NEAREST)
     return m
@@ -311,9 +305,9 @@ def pick_mask(
     dilate_px: int = 0,
 ) -> Optional[Image.Image]:
     """
-    Rules:
-      1) If user uploaded a mask: use it directly (white=mask)
-      2) Otherwise, from ImageEditor output, only recognize "red strokes" as mask:
          - Try sketch_data['mask'] first (some versions provide it)
          - Else merge red strokes from sketch_data['layers'][*]['image']
          - If still none, try sketch_data['composite'] for red strokes
@@ -342,8 +336,7 @@ def pick_mask(
                 li = lyr.get("image") or lyr.get("mask")
                 if isinstance(li, Image.Image):
                     m_layer = _mask_from_red(li, base_image.size)
-                    # merge: any layer with strokes contributes to mask
-                    acc = ImageOps.lighter(acc, m_layer)
             if acc.getbbox() is not None:
                 return dilate_mask(acc, dilate_px) if dilate_px > 0 else acc
@@ -354,10 +347,9 @@ def pick_mask(
             if m_comp.getbbox() is not None:
                 return dilate_mask(m_comp, dilate_px) if dilate_px > 0 else m_comp
-    # 3) still none -> return None (caller will prompt for a mask)
     return None
 def _round_mult64(x: float, mode: str = "nearest") -> int:
     """
     Align x to a multiple of 64:
@@ -375,17 +367,14 @@ def _round_mult64(x: float, mode: str = "nearest") -> int:
 def prepare_size_for_flux(img: Image.Image, target_max: int = 1024) -> tuple[int, int]:
     """
     Steps:
-    1) First round w,h up to multiples of 64 (avoid too-small sizes)
     2) Fix the long side to target_max (default 1024)
-    3) Scale the short side proportionally and align to a multiple of 64 (at least 64)
     """
     w, h = img.size
-    # 1) round each up to multiple of 64
     w1 = max(64, _round_mult64(w, mode="ceil"))
     h1 = max(64, _round_mult64(h, mode="ceil"))
-    # 2) fix long side to target_max; scale short side
     if w1 >= h1:
         out_w = target_max
         scaled_h = h1 * (target_max / w1)
@@ -403,7 +392,6 @@ def preview_depth(image: Optional[Image.Image], encoder: str, max_res: int, inpu
     if image is None:
         return None
     dm = get_model(encoder)
-    # colored visualization (RGB), consistent with your previous colormap style
     d_rgb = dm.infer(image=image, max_res=max_res, input_size=input_size, fp32=fp32, grayscale=False)
     return d_rgb
@@ -411,10 +399,9 @@ def prepare_canvas(image, depth_img, source):
     base = depth_img if source == "depth" else image
     if base is None:
         raise gr.Error('Please upload an image (and wait for the depth preview), then click "Prepare canvas".')
-    # Use a generic gr.update to set ImageEditor value
     return gr.update(value=base)
-# ---------------- Two-stage pipeline: depth(color) -> fill ----------------
 @spaces.GPU
 def run_depth_and_fill(
     image: Image.Image,
@@ -439,14 +426,14 @@ def run_depth_and_fill(
     depth_rgb: Image.Image = depth_model.infer(
         image=image, max_res=max_res, input_size=input_size, fp32=fp32, grayscale=False
     ).convert("RGB")
     print(f"[DEBUG] Depth RGB: mode={depth_rgb.mode}, size={depth_rgb.size}")
     # 2) extract mask (uploaded > drawn)
     mask_l = pick_mask(mask_upload, sketch, image, dilate_px=mask_dilate_px)
     if (mask_l is None) or (mask_l.getbbox() is None):
-        raise gr.Error("No valid mask detected: please draw on the canvas or upload a mask image.")
     print(f"[DEBUG] Mask: mode={mask_l.mode}, size={mask_l.size}, bbox={mask_l.getbbox()}")
     # 3) decide output size
@@ -454,14 +441,17 @@ def run_depth_and_fill(
     orig_w, orig_h = image.size
     print(f"[DEBUG] FLUX size: {width}x{height}, original: {orig_w}x{orig_h}")
-    # 4) run FLUX pipeline
-    # Key fix: pass depth_rgb as `image` instead of the original image
     pipe = get_pipe()
-    generator = torch.Generator("cpu").manual_seed(int(seed)) if (seed is not None and seed >= 0) else torch.Generator("cpu").manual_seed(random.randint(0, 2**31 - 1))
     result = pipe(
         prompt=prompt,
-        image=depth_rgb,           # FIX: pass the colored depth map, not the original image
         mask_image=mask_l,
         width=width,
         height=height,
@@ -469,11 +459,11 @@ def run_depth_and_fill(
         num_inference_steps=int(steps),
         max_sequence_length=512,
         generator=generator,
-        depth=depth_rgb,           # also feed depth input (colored depth)
     ).images[0]
     final_result = result.resize((orig_w, orig_h), Image.BICUBIC)
     # return result and mask preview
     mask_preview = mask_l.resize((orig_w, orig_h), Image.NEAREST).convert("RGB")
     return final_result, mask_preview
@@ -482,21 +472,20 @@ def _to_pil_rgb(img_like) -> Image.Image:
     """Normalize input to PIL RGB. Supports PIL/L/RGBA/np.array."""
     if isinstance(img_like, Image.Image):
         return img_like.convert("RGB")
-    # numpy array -> PIL
     try:
         arr = np.array(img_like)
-        if arr.ndim == 2:  # grayscale
             arr = np.stack([arr, arr, arr], axis=-1)
         return Image.fromarray(arr.astype(np.uint8), mode="RGB")
     except Exception:
-        raise gr.Error("Stage-2: `depth` / `depth_image` is not a valid image. Please check the provided objects.")
-# ==== STAGE-2 ONLY ADDED: Stage-2 inference (takes Stage-1 output + Stage-1 depth preview) ====
 @spaces.GPU
 def run_stage2_refine(
     image: Image.Image,              # original image (RGB)
     stage1_out: Image.Image,         # output from Stage-1
-    depth_img_from_stage1_input: Image.Image,  # ★ new: Stage-1 depth preview (from UI)
     mask_upload: Optional[Image.Image],
     sketch: Optional[dict],
     prompt: str,
@@ -510,34 +499,38 @@ def run_stage2_refine(
     seed: Optional[int],
 ) -> Image.Image:
     if image is None or stage1_out is None:
-        raise gr.Error("Please complete Stage-1 generation first (needs original image and Stage-1 output).")
-    # allow refine without mask (use all-black -> no masked area)
     mask_l = pick_mask(mask_upload, sketch, image, dilate_px=0)
     if (mask_l is None) or (mask_l.getbbox() is None):
         mask_l = Image.new("L", image.size, 0)
-    # unify sizes (based on original image)
     width, height = prepare_size_for_flux(image, target_max=max_side)
     orig_w, orig_h = image.size
     pipe2 = get_pipe_stage2()
-    g2 = torch.Generator("cpu").manual_seed(int(seed)) if (seed is not None and seed >= 0) \
         else torch.Generator("cpu").manual_seed(random.randint(0, 2**31 - 1))
     depth_pil = _to_pil_rgb(stage1_out)                      # for `depth`
     depth_image_pil = _to_pil_rgb(depth_img_from_stage1_input)  # for `depth_image`
-    image_rgb = _to_pil_rgb(image)                           # normalize original image to RGB
-    # resize to (width, height)
     depth_pil = depth_pil.resize((width, height), Image.BICUBIC)
     depth_image_pil = depth_image_pil.resize((width, height), Image.BICUBIC)
-    # ★★ Mapping:
-    #    - image        = original RGB
-    #    - depth        = Stage-1 output (treated as updated geometry)
-    #    - depth_image  = Stage-1 input depth (UI's depth preview)
     out2 = pipe2(
         prompt=prompt,
-        image=image,                             # ← original RGB
         mask_image=mask_l,
         width=width,
         height=height,
@@ -545,99 +538,207 @@ def run_stage2_refine(
         num_inference_steps=int(steps),
         max_sequence_length=512,
         generator=g2,
-        depth=depth_pil,                         # ← Stage-1 output as `depth`
-        depth_image=depth_image_pil,             # ← Stage-1 depth preview as `depth_image`
     ).images[0]
-    out2 = out2.resize((orig_w * 3, orig_h), Image.BICUBIC)  # preserve your original ×3 display layout
     return out2
-# ===================================================================
 # ---------------- UI ----------------
 with gr.Blocks() as demo:
-    gr.Markdown("## GeoRemover · Depth Removal (Depth (colored) → FLUX Fill)")
     with gr.Row():
         with gr.Column(scale=1):
-            # input image
-            img = gr.Image(label="Upload image", type="pil")
             # Mask: upload or draw
             with gr.Tab("Upload mask"):
-                mask_upload = gr.Image(label="Mask (optional)", type="pil")
             with gr.Tab("Draw mask"):
-                draw_source = gr.Radio(["image", "depth"], value="image", label="Draw on")
-                prepare_btn = gr.Button("Prepare canvas")
                 sketch = gr.ImageEditor(
-                    label="Sketch mask (draw with brush)",
                     type="pil",
-                    # Provide red-only brush for precise extraction of strokes
-                    brush=gr.Brush(colors=["#FF0000"], default_size=24)
                 )
-            # prompt
-            prompt = gr.Textbox(label="Prompt", value="A beautiful scene")
-            # tunables
             with gr.Accordion("Advanced (Depth & FLUX)", open=False):
-                encoder = gr.Dropdown(["vits", "vitl"], value="vitl", label="Depth encoder")
-                max_res = gr.Slider(512, 2048, value=1280, step=64, label="Depth: max_res")
-                input_size = gr.Slider(256, 1024, value=518, step=2, label="Depth: input_size")
-                fp32 = gr.Checkbox(False, label="Depth: use FP32 (default FP16)")
-                max_side = gr.Slider(512, 1536, value=1024, step=64, label="FLUX: max side (px)")
-                mask_dilate_px = gr.Slider(0, 128, value=0, step=1, label="Mask dilation (px)")
-                guidance_scale = gr.Slider(0, 50, value=30, step=0.5, label="FLUX: guidance_scale")
-                steps = gr.Slider(10, 75, value=50, step=1, label="FLUX: steps")
-                seed = gr.Number(value=0, precision=0, label="Seed (>=0 fixed; empty = random)")
             run_btn = gr.Button("Run", variant="primary")
-            # ==== STAGE-2 ONLY ADDED: add Stage-2 button ====
-            run_btn_stage2 = gr.Button("Run Stage-2 (Refine)", variant="secondary")
-            # =================================================
         with gr.Column(scale=1):
-            depth_preview = gr.Image(label="Depth preview (colored)", interactive=False)
-            mask_preview = gr.Image(label="Mask preview (to be removed)", interactive=False)
-            out = gr.Image(label="Output")
-            # ==== STAGE-2 ONLY ADDED: Stage-2 output ====
-            out_stage2 = gr.Image(label="Output (Stage-2 refine)")
-            # ============================================
-    # Event: when image changes, compute the colored depth preview
     img.change(
         fn=preview_depth,
         inputs=[img, encoder, max_res, input_size, fp32],
         outputs=[depth_preview],
     )
-    # Prepare canvas: put original image or colored depth image into ImageEditor
     prepare_btn.click(
         fn=prepare_canvas,
         inputs=[img, depth_preview, draw_source],
         outputs=[sketch],
     )
-    # Run Stage-1 (wiring unchanged)
     run_btn.click(
         fn=run_depth_and_fill,
         inputs=[img, mask_upload, sketch, prompt, encoder, max_res, input_size, fp32,
                 max_side, mask_dilate_px, guidance_scale, steps, seed],
         outputs=[out, mask_preview],
         api_name="run",
     )
-    # ==== STAGE-2 ONLY ADDED: run after Stage-1 has produced a result ====
     run_btn_stage2.click(
         fn=run_stage2_refine,
-        inputs=[img, out, depth_preview,  # ← pass depth_preview as the 3rd input to Stage-2
                 mask_upload, sketch, prompt, encoder, max_res, input_size, fp32,
                 max_side, guidance_scale, steps, seed],
         outputs=[out_stage2],
         api_name="run_stage2",
     )
-    # ====================================================================
 if __name__ == "__main__":
     os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")

 BASE_DIR = pathlib.Path(__file__).resolve().parent
 LOCAL_DIFFUSERS_SRC = BASE_DIR / "code_edit" / "diffusers" / "src"
+# Ensure local diffusers is importable
 if (LOCAL_DIFFUSERS_SRC / "diffusers").exists():
     sys.path.insert(0, str(LOCAL_DIFFUSERS_SRC))
 else:
 # ===========================================================================
 import os
 import subprocess
 import random
+from typing import Optional, Tuple, Dict, Any
 import torch
 from PIL import Image, ImageOps
     BASE_DIR / "code_edit" / "stage2" / "checkpoint-20000" / "pytorch_lora_weights.safetensors",
 ]
+# Import depth helper
 if str(CODE_DEPTH) not in sys.path:
     sys.path.insert(0, str(CODE_DEPTH))
 from depth_infer import DepthModel  # noqa: E402
+# Import your custom diffusers (local fork)
 if str(CODE_EDIT / "diffusers") not in sys.path:
     sys.path.insert(0, str(CODE_EDIT / "diffusers"))
 from diffusers.pipelines.flux.pipeline_flux_fill_unmasked_image_condition_version import (  # type: ignore # noqa: E402
     FluxFillPipeline_token12_depth_only as FluxFillPipeline,
 )
+# ---------------- Asset preparation (on-demand) ----------------
 def _have_all_assets() -> bool:
     return all(p.is_file() for p in EXPECTED_ASSETS)
     os.chmod(p, os.stat(p).st_mode | 0o111)
 def ensure_assets_if_missing():
+    """
+    If SKIP_ASSET_DOWNLOAD=1 -> skip checks.
+    Otherwise ensure checkpoints/LoRAs exist; if missing, run get_assets.sh.
+    """
     if os.getenv("SKIP_ASSET_DOWNLOAD") == "1":
         print("↪️  SKIP_ASSET_DOWNLOAD=1 -> skip asset download check")
         return
     print(f"⚠️ Asset prepare failed: {e}")
 # ---------------- Global singletons ----------------
+_MODELS: Dict[str, DepthModel] = {}
 _PIPE: Optional[FluxFillPipeline] = None
 # ==== STAGE-2 ONLY ADDED: singleton ====
 _PIPE_STAGE2: Optional[FluxFillPipelineStage2] = None
     return _MODELS[encoder]
 def get_pipe() -> FluxFillPipeline:
+    """
+    Load Stage-1 pipeline (FluxFillPipeline_token12_depth_only) and mount Stage-1 LoRA if present.
+    """
     global _PIPE
     if _PIPE is not None:
         return _PIPE
     print(f"[pipe] loading FLUX.1-Fill-dev (dtype={dtype}, device={device}, local={use_local})")
     try:
         if use_local:
+            pipe = FluxFillPipeline.from_pretrained(local_flux, torch_dtype=dtype).to(device)
         else:
             pipe = FluxFillPipeline.from_pretrained(
                 "black-forest-labs/FLUX.1-Fill-dev",
                 torch_dtype=dtype,
     except Exception as e:
         raise RuntimeError(
             "Failed to load FLUX.1-Fill-dev. "
+            "Ensure gated access and HF_TOKEN; or pre-download to local cache."
         ) from e
+    # -------- LoRA (Stage-1) --------
     lora_dir = CODE_EDIT / "stage1" / "checkpoint-4800"
+    lora_file = "pytorch_lora_weights.safetensors"
     adapter_name = "stage1"
     if lora_dir.exists():
         try:
+            import peft  # assert backend presence
             print(f"[pipe] loading LoRA from: {lora_dir}/{lora_file}")
             pipe.load_lora_weights(
                 str(lora_dir),
+                weight_name=lora_file,
+                adapter_name=adapter_name,
             )
             try:
                 pipe.set_adapters(adapter_name, scale=1.0)
+                print(f"[pipe] set_adapters('{adapter_name}', 1.0)")
             except Exception as e_set:
                 print(f"[pipe] set_adapters not available ({e_set}); trying fuse_lora()")
                 try:
                     pipe.fuse_lora(lora_scale=1.0)
                     print("[pipe] fuse_lora(lora_scale=1.0) done")
                     print(f"[pipe] fuse_lora failed: {e_fuse}")
             print("[pipe] LoRA ready ✅")
         except ImportError:
+            print("[pipe] peft not installed; LoRA skipped (add `peft>=0.11`).")
         except Exception as e:
             print(f"[pipe] load_lora_weights failed (continue without): {e}")
     else:
 # ==== STAGE-2 ONLY ADDED: Stage-2 loader (no change to Stage-1 logic) ====
 def get_pipe_stage2() -> FluxFillPipelineStage2:
     """
+    Load Stage-2 FluxFillPipeline_token12_depth and mount Stage-2 LoRA.
     """
     global _PIPE_STAGE2
     if _PIPE_STAGE2 is not None:
         raise RuntimeError(f"Stage-2 LoRA dir not found: {lora_dir2}")
     if weight_name is None:
         raise RuntimeError(
+            f"Stage-2 LoRA weight not found under {lora_dir2}. Tried: {candidate_names}"
         )
     try:
         import peft  # noqa: F401
     except Exception as e:
+        raise RuntimeError("peft is not installed (requires peft>=0.11).") from e
     try:
         print(f"[stage2] loading LoRA: {lora_dir2}/{weight_name}")
     Output: white = region to remove/fill, black = keep.
     """
     if im.mode == "RGBA":
+        mask = im.split()[-1]  # alpha as mask
     else:
         mask = im.convert("L")
+    # Simple binarization & denoise
     mask = mask.point(lambda p: 255 if p > 16 else 0)
+    return mask  # Do not invert; white = mask region
 def dilate_mask(mask_l: Image.Image, px: int) -> Image.Image:
+    """Dilate the white region by ~px pixels."""
     if px <= 0:
         return mask_l
     arr = np.array(mask_l, dtype=np.uint8)
 def _mask_from_red(img: Image.Image, out_size: Tuple[int, int]) -> Image.Image:
     """
+    Extract "pure red strokes" as a binary mask (white=brush, black=others) from RGBA/RGB.
+    Thresholds are lenient to tolerate compression/resampling.
     """
     arr = np.array(img.convert("RGBA"))
     r, g, b, a = arr[..., 0], arr[..., 1], arr[..., 2], arr[..., 3]
     red_hit = (r >= 200) & (g <= 40) & (b <= 40) & (a > 0)
     mask = (red_hit.astype(np.uint8) * 255)
     m = Image.fromarray(mask, mode="L").resize(out_size, Image.NEAREST)
     return m
     dilate_px: int = 0,
 ) -> Optional[Image.Image]:
     """
+    Selection rules:
+      1) If a mask is uploaded: use it directly (white=mask)
+      2) Else from ImageEditor output, only red strokes are recognized as mask:
          - Try sketch_data['mask'] first (some versions provide it)
          - Else merge red strokes from sketch_data['layers'][*]['image']
          - If still none, try sketch_data['composite'] for red strokes
                 li = lyr.get("image") or lyr.get("mask")
                 if isinstance(li, Image.Image):
                     m_layer = _mask_from_red(li, base_image.size)
+                    acc = ImageOps.lighter(acc, m_layer)  # union
             if acc.getbbox() is not None:
                 return dilate_mask(acc, dilate_px) if dilate_px > 0 else acc
             if m_comp.getbbox() is not None:
                 return dilate_mask(m_comp, dilate_px) if dilate_px > 0 else m_comp
+    # 3) No valid mask
     return None
 def _round_mult64(x: float, mode: str = "nearest") -> int:
     """
     Align x to a multiple of 64:
 def prepare_size_for_flux(img: Image.Image, target_max: int = 1024) -> tuple[int, int]:
     """
     Steps:
+    1) Round w,h up to multiples of 64 (avoid too-small sizes)
     2) Fix the long side to target_max (default 1024)
+    3) Scale the short side proportionally and align to a multiple of 64 (>= 64)
     """
     w, h = img.size
     w1 = max(64, _round_mult64(w, mode="ceil"))
     h1 = max(64, _round_mult64(h, mode="ceil"))
     if w1 >= h1:
         out_w = target_max
         scaled_h = h1 * (target_max / w1)
     if image is None:
         return None
     dm = get_model(encoder)
     d_rgb = dm.infer(image=image, max_res=max_res, input_size=input_size, fp32=fp32, grayscale=False)
     return d_rgb
     base = depth_img if source == "depth" else image
     if base is None:
         raise gr.Error('Please upload an image (and wait for the depth preview), then click "Prepare canvas".')
     return gr.update(value=base)
+# ---------------- Stage-1: depth(color) -> fill ----------------
 @spaces.GPU
 def run_depth_and_fill(
     image: Image.Image,
     depth_rgb: Image.Image = depth_model.infer(
         image=image, max_res=max_res, input_size=input_size, fp32=fp32, grayscale=False
     ).convert("RGB")
     print(f"[DEBUG] Depth RGB: mode={depth_rgb.mode}, size={depth_rgb.size}")
     # 2) extract mask (uploaded > drawn)
     mask_l = pick_mask(mask_upload, sketch, image, dilate_px=mask_dilate_px)
     if (mask_l is None) or (mask_l.getbbox() is None):
+        raise gr.Error("No valid mask detected: please draw with the red brush or upload a binary mask.")
     print(f"[DEBUG] Mask: mode={mask_l.mode}, size={mask_l.size}, bbox={mask_l.getbbox()}")
     # 3) decide output size
     orig_w, orig_h = image.size
     print(f"[DEBUG] FLUX size: {width}x{height}, original: {orig_w}x{orig_h}")
+    # 4) run FLUX pipeline (key: use depth_rgb as both image and depth input)
     pipe = get_pipe()
+    generator = (
+        torch.Generator("cpu").manual_seed(int(seed))
+        if (seed is not None and seed >= 0)
+        else torch.Generator("cpu").manual_seed(random.randint(0, 2**31 - 1))
+    )
     result = pipe(
         prompt=prompt,
+        image=depth_rgb,           # use the colored depth map instead of original image
         mask_image=mask_l,
         width=width,
         height=height,
         num_inference_steps=int(steps),
         max_sequence_length=512,
         generator=generator,
+        depth=depth_rgb,           # feed depth (colored)
     ).images[0]
     final_result = result.resize((orig_w, orig_h), Image.BICUBIC)
     # return result and mask preview
     mask_preview = mask_l.resize((orig_w, orig_h), Image.NEAREST).convert("RGB")
     return final_result, mask_preview
     """Normalize input to PIL RGB. Supports PIL/L/RGBA/np.array."""
     if isinstance(img_like, Image.Image):
         return img_like.convert("RGB")
     try:
         arr = np.array(img_like)
+        if arr.ndim == 2:
             arr = np.stack([arr, arr, arr], axis=-1)
         return Image.fromarray(arr.astype(np.uint8), mode="RGB")
     except Exception:
+        raise gr.Error("Stage-2: `depth` / `depth_image` is not a valid image object.")
+# ---------------- Stage-2: REQUIRED refine/render ----------------
 @spaces.GPU
 def run_stage2_refine(
     image: Image.Image,              # original image (RGB)
     stage1_out: Image.Image,         # output from Stage-1
+    depth_img_from_stage1_input: Image.Image,  # Stage-1 depth preview (from UI)
     mask_upload: Optional[Image.Image],
     sketch: Optional[dict],
     prompt: str,
     seed: Optional[int],
 ) -> Image.Image:
     if image is None or stage1_out is None:
+        raise gr.Error("Please complete Stage-1 first (needs original image and Stage-1 output).")
+    # Allow refine without mask (use all-black)
     mask_l = pick_mask(mask_upload, sketch, image, dilate_px=0)
     if (mask_l is None) or (mask_l.getbbox() is None):
         mask_l = Image.new("L", image.size, 0)
+    # Unify sizes
     width, height = prepare_size_for_flux(image, target_max=max_side)
     orig_w, orig_h = image.size
     pipe2 = get_pipe_stage2()
+    g2 = (
+        torch.Generator("cpu").manual_seed(int(seed))
+        if (seed is not None and seed >= 0)
         else torch.Generator("cpu").manual_seed(random.randint(0, 2**31 - 1))
+    )
     depth_pil = _to_pil_rgb(stage1_out)                      # for `depth`
     depth_image_pil = _to_pil_rgb(depth_img_from_stage1_input)  # for `depth_image`
+    image_rgb = _to_pil_rgb(image)
+    # Resize to (width, height)
     depth_pil = depth_pil.resize((width, height), Image.BICUBIC)
     depth_image_pil = depth_image_pil.resize((width, height), Image.BICUBIC)
+    # Mapping:
+    #   image        = original RGB
+    #   depth        = Stage-1 output (updated geometry)
+    #   depth_image  = Stage-1 input depth (UI depth preview)
     out2 = pipe2(
         prompt=prompt,
+        image=image,               # original image
         mask_image=mask_l,
         width=width,
         height=height,
         num_inference_steps=int(steps),
         max_sequence_length=512,
         generator=g2,
+        depth=depth_pil,
+        depth_image=depth_image_pil,
     ).images[0]
+    out2 = out2.resize((orig_w * 3, orig_h), Image.BICUBIC)  # keep your 3× showcase layout
     return out2
 # ---------------- UI ----------------
 with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+# GeoRemover · Depth-Guided Object Removal (Two-Stage, Stage-2 REQUIRED)
+**Pipeline overview**
+1) Compute a **colored depth map** from your input image.
+2) You create a **removal mask** (red brush or upload).
+3) **Stage-1** runs FLUX Fill with depth guidance to get a first pass.
+4) **Stage-2 (REQUIRED)** renders the final result from depth → image using Stage-1 output and the original depth.
+> ⚠️ **Stage-2 is required.** Always click **Run Stage-2 (Render)** *after* Stage-1 finishes. Stage-1 alone is not the final output.
+---
+### Quick start
+1. **Upload image** (left). Wait for **Depth preview (colored)** (right).
+2. In **Draw mask**, pick **Draw on: _image_** or **_depth_**, then click **Prepare canvas**.
+3. Paint the region to remove using the **red brush** (**red = remove**).
+4. Optionally adjust **Mask dilation** for thin edges.
+5. Enter a concise **Prompt** describing the fill content.
+6. Click **Run** → produces **Stage-1** (first pass).
+7. Click **Run Stage-2 (Render)** → produces the **final** result.
+---
+### Mask rules & tips
+- Only **red strokes** are treated as mask (**white = remove, black = keep** internally).
+- Paint **slightly larger** than the object boundary to avoid seams/halos.
+- If you have a binary mask already, use **Upload mask**.
+- **Mask dilation (px)** expands the mask to cover thin borders.
+"""
+    )
     with gr.Row():
         with gr.Column(scale=1):
+            # Input image
+            img = gr.Image(
+                label="Upload image",
+                type="pil",
+            )
             # Mask: upload or draw
             with gr.Tab("Upload mask"):
+                mask_upload = gr.Image(
+                    label="Mask (optional)",
+                    type="pil",
+                )
             with gr.Tab("Draw mask"):
+                draw_source = gr.Radio(
+                    ["image", "depth"],
+                    value="image",
+                    label="Draw on",
+                )
+                prepare_btn = gr.Button("Prepare canvas", variant="secondary")
+                gr.Markdown(
+                    """
+**Canvas usage**
+- Click **Prepare canvas** after selecting *image* or *depth*.
+- Use the **red brush** only—red strokes are extracted as the removal mask.
+- Switch tabs anytime if you prefer uploading a ready-made mask.
+"""
+                )
                 sketch = gr.ImageEditor(
+                    label="Sketch mask (red = remove)",
                     type="pil",
+                    brush=gr.Brush(colors=["#FF0000"], default_size=24),
                 )
+            # Prompt
+            prompt = gr.Textbox(
+                label="Prompt",
+                value="A beautiful scene",
+                placeholder="don't change it",
+            )
+            # Tunables
             with gr.Accordion("Advanced (Depth & FLUX)", open=False):
+                encoder = gr.Dropdown(
+                    ["vits", "vitl"],
+                    value="vitl",
+                    label="Depth encoder",
+                )
+                max_res = gr.Slider(
+                    512, 2048, value=1280, step=64,
+                    label="Depth: max_res",
+                )
+                input_size = gr.Slider(
+                    256, 1024, value=518, step=2,
+                    label="Depth: input_size",
+                )
+                fp32 = gr.Checkbox(
+                    False,
+                    label="Depth: use FP32 (default FP16)",
+                )
+                max_side = gr.Slider(
+                    512, 1536, value=1024, step=64,
+                    label="FLUX: max side (px)",
+                )
+                mask_dilate_px = gr.Slider(
+                    0, 128, value=0, step=1,
+                    label="Mask dilation (px)",
+                )
+                guidance_scale = gr.Slider(
+                    0, 50, value=30, step=0.5,
+                    label="FLUX: guidance_scale",
+                )
+                steps = gr.Slider(
+                    10, 75, value=50, step=1,
+                    label="FLUX: steps",
+                )
+                seed = gr.Number(
+                    value=0, precision=0,
+                    label="Seed (>=0 = fixed; empty = random)",
+                )
             run_btn = gr.Button("Run", variant="primary")
+            # Stage-2 is REQUIRED: keep disabled until Stage-1 finishes
+            run_btn_stage2 = gr.Button("Run Stage-2 (Render)", variant="secondary", interactive=False)
         with gr.Column(scale=1):
+            depth_preview = gr.Image(
+                label="Depth preview (colored)",
+                interactive=False,
+            )
+            mask_preview = gr.Image(
+                label="Mask preview (areas to remove)",
+                interactive=False,
+            )
+            out = gr.Image(
+                label="Output (Stage-1 first pass)",
+            )
+            out_stage2 = gr.Image(
+                label="Final Output (Stage-2)",
+            )
+    gr.Markdown(
+        """
+### Why Stage-2 is required
+Stage-1 provides a depth-guided fill that is *not final*. **Stage-2 renders** the definitive image by leveraging:
+- **Stage-1 output** as updated geometry hints, and
+- **Original colored depth** as `depth_image` guidance.
+Skipping Stage-2 will leave the process incomplete.
+### Troubleshooting
+- **“No valid mask detected”**: Either upload a binary mask (white=remove) **or** draw with **red brush** after clicking **Prepare canvas**.
+- **Seams/halos**: Increase **Mask dilation (px)** (e.g., 8–16) and re-run both stages.
+- **Prompt not followed**: Lower **guidance_scale** (e.g., 18–24) and make the prompt more concrete.
+- **Depth looks noisy**: Use **vitl**, increase **Depth: max_res**, or enable **FP32**.
+"""
+    )
+    # ===== Helpers to toggle Stage-2 button =====
+    def _enable_button():
+        return gr.update(interactive=True)
+    # Auto depth preview on image change
     img.change(
         fn=preview_depth,
         inputs=[img, encoder, max_res, input_size, fp32],
         outputs=[depth_preview],
     )
+    # Prepare canvas for drawing on image or depth
     prepare_btn.click(
         fn=prepare_canvas,
         inputs=[img, depth_preview, draw_source],
         outputs=[sketch],
     )
+    # Stage-1
     run_btn.click(
         fn=run_depth_and_fill,
         inputs=[img, mask_upload, sketch, prompt, encoder, max_res, input_size, fp32,
                 max_side, mask_dilate_px, guidance_scale, steps, seed],
         outputs=[out, mask_preview],
         api_name="run",
+    ).then(  # Enable Stage-2 only after Stage-1 completes
+        fn=_enable_button,
+        inputs=[],
+        outputs=[run_btn_stage2],
     )
+    # Stage-2 (REQUIRED; unlocked after Stage-1)
     run_btn_stage2.click(
         fn=run_stage2_refine,
+        inputs=[img, out, depth_preview,
                 mask_upload, sketch, prompt, encoder, max_res, input_size, fp32,
                 max_side, guidance_scale, steps, seed],
         outputs=[out_stage2],
         api_name="run_stage2",
     )
 if __name__ == "__main__":
     os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")

chore: ignore pyc and __pycache__

chore: ignore pyc and pycache