Spaces:

oxkitsune
/

rerun-ml-depth-pro

Sleeping

App Files Files Community

oxkitsune commited on Oct 17, 2024

Commit

ac1916a

1 Parent(s): 69d2040

improve

Browse files

Files changed (4) hide show

app.py +68 -45
pyproject.toml +3 -2
requirements.txt +1 -1
uv.lock +10 -14

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ from gradio_rerun import Rerun
 import spaces
 from PIL import Image
 import tempfile
 # Run the script to get pretrained models
 if not os.path.exists("./checkpoints/depth_pro.pt"):
@@ -25,8 +26,8 @@ model = model.to(device)
 model.eval()
-def resize_image(image_path, max_size=1536):
-    with Image.open(image_path) as img:
         # Calculate the new size while maintaining aspect ratio
         ratio = max_size / max(img.size)
         new_size = tuple([int(x * ratio) for x in img.size])
@@ -73,7 +74,7 @@ def predict_depth(input_image):
 @rr.thread_local_stream("rerun_example_ml_depth_pro")
-def run_rerun(path_to_image):
     stream = rr.binary_stream()
     blueprint = rrb.Blueprint(
@@ -81,9 +82,9 @@ def run_rerun(path_to_image):
             rrb.Spatial3DView(origin="/"),
             rrb.Horizontal(
                 rrb.Spatial2DView(
-                    origin="/world/camera/depth",
                 ),
-                rrb.Spatial2DView(origin="/world/camera/image"),
             ),
         ),
         collapse_panels=True,
@@ -92,40 +93,62 @@ def run_rerun(path_to_image):
     rr.send_blueprint(blueprint)
     yield stream.read()
-    temp_file = None
-    try:
-        temp_file = resize_image(path_to_image)
-        rr.log("world/camera/image", rr.EncodedImage(path=temp_file))
-        yield stream.read()
-        depth, focal_length = predict_depth(temp_file)
-        rr.log(
-            "world/camera/depth",
-            rr.DepthImage(depth, meter=1),
-        )
-        rr.log(
-            "world/camera",
-            rr.Pinhole(
-                focal_length=focal_length,
-                width=depth.shape[1],
-                height=depth.shape[0],
-                principal_point=(depth.shape[1] / 2, depth.shape[0] / 2),
-                camera_xyz=rr.ViewCoordinates.FLU,
-                image_plane_distance=depth.max(),
-            ),
-        )
-    except Exception as e:
-        rr.log(
-            "error",
-            rr.TextLog(f"An error has occurred: {e}", level=rr.TextLogLevel.ERROR),
-        )
-    finally:
-        # Clean up the temporary file
-        if temp_file and os.path.exists(temp_file):
-            os.remove(temp_file)
     yield stream.read()
@@ -147,18 +170,18 @@ with gr.Blocks() as interface:
     )
     with gr.Row():
         with gr.Column(variant="compact"):
-            image = gr.Image(type="filepath", interactive=True, label="Image")
             visualize = gr.Button("Visualize ML Depth Pro")
-            examples = gr.Examples(
-                example_images,
-                label="Example Images",
-                inputs=[image],
-            )
         with gr.Column():
             viewer = Rerun(
                 streaming=True,
             )
-        visualize.click(run_rerun, inputs=[image], outputs=[viewer])
 if __name__ == "__main__":

 import spaces
 from PIL import Image
 import tempfile
+import cv2
 # Run the script to get pretrained models
 if not os.path.exists("./checkpoints/depth_pro.pt"):
 model.eval()
+def resize_image(image_buffer, max_size=128):
+    with Image.fromarray(image_buffer) as img:
         # Calculate the new size while maintaining aspect ratio
         ratio = max_size / max(img.size)
         new_size = tuple([int(x * ratio) for x in img.size])
 @rr.thread_local_stream("rerun_example_ml_depth_pro")
+def run_rerun(path_to_video):
     stream = rr.binary_stream()
     blueprint = rrb.Blueprint(
             rrb.Spatial3DView(origin="/"),
             rrb.Horizontal(
                 rrb.Spatial2DView(
+                    origin="/world/depth",
                 ),
+                rrb.Spatial2DView(origin="/world/video"),
             ),
         ),
         collapse_panels=True,
     rr.send_blueprint(blueprint)
     yield stream.read()
+    video_asset = rr.AssetVideo(path=path_to_video)
+    rr.log("world/video", video_asset, static=True)
+    # Send automatically determined video frame timestamps.
+    frame_timestamps_ns = video_asset.read_frame_timestamps_ns()
+    # load the video using opencv
+    cap = cv2.VideoCapture(path_to_video)
+    # loop through the video and log the frames using the video timestamps
+    for i in range(len(frame_timestamps_ns)):
+        ret, frame = cap.read()
+        if not ret:
+            break
+        temp_file = None
+        try:
+            temp_file = resize_image(frame, max_size=128)
+            depth, focal_length = predict_depth(temp_file)
+            rr.set_time_nanos("video_time", frame_timestamps_ns[i])
+            rr.log(
+                "world/depth",
+                rr.DepthImage(depth, meter=1),
+            )
+            rr.log(
+                "world/frame",
+                rr.VideoFrameReference(
+                    timestamp=rr.components.VideoTimestamp(seconds=1.0),
+                    video_reference="world/video",
+                ),
+            )
+            rr.log(
+                "world/camera",
+                rr.Pinhole(
+                    focal_length=focal_length,
+                    width=depth.shape[1],
+                    height=depth.shape[0],
+                    principal_point=(depth.shape[1] / 2, depth.shape[0] / 2),
+                    camera_xyz=rr.ViewCoordinates.FLU,
+                    image_plane_distance=depth.max(),
+                ),
+            )
+            yield stream.read()
+        except Exception as e:
+            rr.log(
+                "error",
+                rr.TextLog(f"An error has occurred: {e}", level=rr.TextLogLevel.ERROR),
+            )
+        finally:
+            # Clean up the temporary file
+            if temp_file and os.path.exists(temp_file):
+                os.remove(temp_file)
     yield stream.read()
     )
     with gr.Row():
         with gr.Column(variant="compact"):
+            video = gr.Video(format="mp4", interactive=True, label="Video")
             visualize = gr.Button("Visualize ML Depth Pro")
+            # examples = gr.Examples(
+            #     example_images,
+            #     label="Example Images",
+            #     inputs=[image],
+            # )
         with gr.Column():
             viewer = Rerun(
                 streaming=True,
             )
+        visualize.click(run_rerun, inputs=[video], outputs=[viewer])
 if __name__ == "__main__":

pyproject.toml CHANGED Viewed

@@ -8,8 +8,8 @@ dependencies = [
     "attrs>=24.2.0",
     "depth-pro",
     "gradio>=4.44.1",
-    "gradio-rerun>=0.0.6",
-    "rerun-sdk==0.18.2",
     "spaces>=0.30.3",
 ]
@@ -24,3 +24,4 @@ dev-dependencies = [
 [tool.uv.sources]
 depth-pro = { git = "https://github.com/apple/ml-depth-pro" }

     "attrs>=24.2.0",
     "depth-pro",
     "gradio>=4.44.1",
+    "gradio-rerun",
+    "rerun-sdk==0.19.0",
     "spaces>=0.30.3",
 ]
 [tool.uv.sources]
 depth-pro = { git = "https://github.com/apple/ml-depth-pro" }
+gradio-rerun = { git = "https://github.com/oxkitsune/gradio-rerun-viewer", branch = "gijs/rerun-0.19" }

requirements.txt CHANGED Viewed

@@ -72,7 +72,7 @@ pytz==2024.2
 pyyaml==6.0.2
 pyzmq==26.2.0
 requests==2.32.3
-rerun-sdk==0.18.2
 rich==13.9.2
 ruff==0.6.9
 safetensors==0.4.5

 pyyaml==6.0.2
 pyzmq==26.2.0
 requests==2.32.3
+rerun-sdk==0.19.0
 rich==13.9.2
 ruff==0.6.9
 safetensors==0.4.5

uv.lock CHANGED Viewed

@@ -500,17 +500,13 @@ wheels = [
 [[package]]
 name = "gradio-rerun"
-version = "0.0.6"
-source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "gradio" },
     { name = "opencv-python" },
     { name = "rerun-sdk" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/4f/78/0e70e572f159ca4060ded3a49f8bbb25fb4dca57ab7b0c2b0ca759e57c57/gradio_rerun-0.0.6.tar.gz", hash = "sha256:291f5c827f6db1ce01eeed1ef461ac252a4209cdaaf2792b40d7f06e8ffccba3", size = 52014801 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fc/d4/90fb4f54a7707b2beec2fb0ef5c97659eeb6776b45ff5d46193bea7c2f7b/gradio_rerun-0.0.6-py3-none-any.whl", hash = "sha256:5619aed9580c79f4eae6871602ca7409db2d67cf5cc35cfe4a6083be26a75fa2", size = 10842538 },
-]
 [[package]]
 name = "h11"
@@ -1730,8 +1726,8 @@ requires-dist = [
     { name = "attrs", specifier = ">=24.2.0" },
     { name = "depth-pro", git = "https://github.com/apple/ml-depth-pro" },
     { name = "gradio", specifier = ">=4.44.1" },
-    { name = "gradio-rerun", specifier = ">=0.0.6" },
-    { name = "rerun-sdk", specifier = "==0.18.2" },
     { name = "spaces", specifier = ">=0.30.3" },
 ]
@@ -1746,7 +1742,7 @@ dev = [
 [[package]]
 name = "rerun-sdk"
-version = "0.18.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "attrs" },
@@ -1756,11 +1752,11 @@ dependencies = [
     { name = "typing-extensions" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b8/57/28eca6dd15bb4f22d8db6b10107b0150f926bfee00d9921ee3666d94ea21/rerun_sdk-0.18.2-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:bc4e73275f428e4e9feb8e85f88db7a9fd18b997b1570de62f949a926978f1b2", size = 33042864 },
-    { url = "https://files.pythonhosted.org/packages/97/6c/712f82dcf774f1b9cd7bc4e9a7dc190f2088de22e33ad66486c8058c5455/rerun_sdk-0.18.2-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:efbba40a59710ae83607cb0dc140398a35979c2d2acf5190c9def2ac4697f6a8", size = 32104282 },
-    { url = "https://files.pythonhosted.org/packages/83/f9/fe349c911cd61df85167fccffdafedcc923dd2eed055f811b957c8b738b5/rerun_sdk-0.18.2-cp38-abi3-manylinux_2_31_aarch64.whl", hash = "sha256:2a5e3b618b6d1bfde09bd5614a898995f3c318cc69d8f6d569924a2cd41536ce", size = 38459061 },
-    { url = "https://files.pythonhosted.org/packages/c9/88/ee72d28bdf8e1821fe6cd3e1de85ca7931706452b4892cb4a90256d78cf0/rerun_sdk-0.18.2-cp38-abi3-manylinux_2_31_x86_64.whl", hash = "sha256:8fdfc4c51ef2e75cb68d39e56f0d7c196eff250cb9a0260c07d5e2d6736e31b0", size = 38761761 },
-    { url = "https://files.pythonhosted.org/packages/01/36/f0675261e1e620141515b837f65b015cda05a493153c59158b0ac1b80a3c/rerun_sdk-0.18.2-cp38-abi3-win_amd64.whl", hash = "sha256:c929ade91d3be301b26671b25e70fb529524ced915523d266641c6fc667a1eb5", size = 29534437 },
 ]
 [[package]]

 [[package]]
 name = "gradio-rerun"
+version = "0.0.7"
+source = { git = "https://github.com/oxkitsune/gradio-rerun-viewer?branch=gijs%2Frerun-0.19#19b28a2045047f0ae4480d0a450811a075c79056" }
 dependencies = [
     { name = "gradio" },
     { name = "opencv-python" },
     { name = "rerun-sdk" },
 ]
 [[package]]
 name = "h11"
     { name = "attrs", specifier = ">=24.2.0" },
     { name = "depth-pro", git = "https://github.com/apple/ml-depth-pro" },
     { name = "gradio", specifier = ">=4.44.1" },
+    { name = "gradio-rerun", git = "https://github.com/oxkitsune/gradio-rerun-viewer?branch=gijs%2Frerun-0.19" },
+    { name = "rerun-sdk", specifier = "==0.19.0" },
     { name = "spaces", specifier = ">=0.30.3" },
 ]
 [[package]]
 name = "rerun-sdk"
+version = "0.19.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "attrs" },
     { name = "typing-extensions" },
 ]
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/c4/59/cfea9527a2f56652c9f2e54151c8a0d2b572b7f1255ca9bc6ea8ad2fd7ce/rerun_sdk-0.19.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:49a48d6d5d7de662ef3e83dc262e65705fa719726f6bc6deefad27c4b6d34e98", size = 37415314 },
+    { url = "https://files.pythonhosted.org/packages/d9/16/0d7099d537bf2f73988ac93f5075d4fd717e96c25697b3ea16af8bcc2cda/rerun_sdk-0.19.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:c8012b4e517a911a782472dc97ebbdc6ed4261b44bfdd0e0a0c64496f17ddc91", size = 35743438 },
+    { url = "https://files.pythonhosted.org/packages/cf/35/eaabd19deaa2bbb121df3a6949206f02ad6cff122d8ad6ba0fcdeeb972c6/rerun_sdk-0.19.0-cp38-abi3-manylinux_2_31_aarch64.whl", hash = "sha256:65bb8ddf9611827c31d5502f9d1ee997c1facbbdf5dce9a268f0aa6bcaea5439", size = 39794278 },
+    { url = "https://files.pythonhosted.org/packages/8b/2c/1e06376a531431855c4ea12865aba0d83a1a1d0537a544191d09d3b44eea/rerun_sdk-0.19.0-cp38-abi3-manylinux_2_31_x86_64.whl", hash = "sha256:6a49fde1c9d0691166402707ec3b317bf0d82fb1b4412b98e02ccbf499d9b92d", size = 41301646 },
+    { url = "https://files.pythonhosted.org/packages/b2/57/948b518f3db30b8dd27d1dc0280acc6510289a79675dcb523ccfcced39d6/rerun_sdk-0.19.0-cp38-abi3-win_amd64.whl", hash = "sha256:da304927485cb4e6afe25ea8ed84c0cb7e63f3ba8ce2c72a1034ae1ffc69a6c0", size = 33573220 },
 ]
 [[package]]