Initial commit

Browse files

Files changed (10) hide show

assets/cat_dog.jpg +0 -0
flagged/img ndarray/0.jpg +0 -0
flagged/img ndarray/1.jpg +0 -0
flagged/log.csv +3 -0
flagged/output/0.png +0 -0
flagged/output/1.png +0 -0
gradcam/__pycache__/utils.cpython-38.pyc +0 -0
gradcam/app.py +61 -0
gradcam/utils.py +100 -0
requirements.txt +6 -0

assets/cat_dog.jpg ADDED Viewed

flagged/img ndarray/0.jpg ADDED Viewed

flagged/img ndarray/1.jpg ADDED Viewed

flagged/log.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+'text','img ndarray','output','timestamp'
+'big ship','img ndarray/0.jpg','output/0.png','2022-04-16 19:37:48.314750'
+'microphone','img ndarray/1.jpg','output/1.png','2022-04-16 21:45:35.413185'

flagged/output/0.png ADDED Viewed

flagged/output/1.png ADDED Viewed

gradcam/__pycache__/utils.cpython-38.pyc ADDED Viewed

Binary file (2.77 kB). View file

gradcam/app.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import gradio as gr
+import clip
+import torch
+import utils
+clip_model = "RN50x4"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model, preprocess = clip.load(clip_model, device=device, jit=False)
+model.eval()
+def grad_cam_fn(text, img, saliency_layer):
+    resize = model.visual.input_resolution
+    img = img.resize((resize, resize))
+    text_input = clip.tokenize([text]).to(device)
+    text_feature = model.encode_text(text_input).float()
+    image_input = preprocess(img).unsqueeze(0).to(device)
+    attn_map = utils.gradCAM(
+        model.visual,
+        image_input,
+        text_feature,
+        getattr(model.visual, saliency_layer)
+    )
+    attn_map = attn_map.squeeze().detach().cpu().numpy()
+    attn_map = utils.getAttMap(img, attn_map)
+    return attn_map
+if __name__ == '__main__':
+    interface = gr.Interface(
+        fn=grad_cam_fn,
+        inputs=[
+            gr.inputs.Textbox(
+                label="Target Text",
+                lines=1),
+            gr.inputs.Image(
+                label='Input Image',
+                image_mode="RGB",
+                type='pil',
+                shape=(512, 512)),
+            gr.inputs.Dropdown(
+                ["layer4", "layer3", "layer2", "layer1"],
+                default="layer4",
+                label="Saliency Layer")
+        ],
+        outputs=gr.outputs.Image(
+            type="pil",
+            label="Attention Map"),
+        examples=[
+            ['a cat lying on the floor', 'assets/cat_dog.jpg', 'layer4'],
+            ['a dog sitting', 'assets/cat_dog.jpg', 'layer4']
+        ],
+        description="OpenAI CLIP Grad CAM")
+    interface.launch(
+        server_name='0.0.0.0',
+        server_port=7861,
+        share=False)

gradcam/utils.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import matplotlib.cm
+from PIL import Image
+class Hook:
+    """Attaches to a module and records its activations and gradients."""
+    def __init__(self, module: nn.Module):
+        self.data = None
+        self.hook = module.register_forward_hook(self.save_grad)
+    def save_grad(self, module, input, output):
+        self.data = output
+        output.requires_grad_(True)
+        output.retain_grad()
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_value, exc_traceback):
+        self.hook.remove()
+    @property
+    def activation(self) -> torch.Tensor:
+        return self.data
+    @property
+    def gradient(self) -> torch.Tensor:
+        return self.data.grad
+# Reference: https://arxiv.org/abs/1610.02391
+def gradCAM(
+    model: nn.Module,
+    input: torch.Tensor,
+    target: torch.Tensor,
+    layer: nn.Module
+) -> torch.Tensor:
+    # Zero out any gradients at the input.
+    if input.grad is not None:
+        input.grad.data.zero_()
+    # Disable gradient settings.
+    requires_grad = {}
+    for name, param in model.named_parameters():
+        requires_grad[name] = param.requires_grad
+        param.requires_grad_(False)
+    # Attach a hook to the model at the desired layer.
+    assert isinstance(layer, nn.Module)
+    with Hook(layer) as hook:
+        # Do a forward and backward pass.
+        output = model(input)
+        output.backward(target)
+        grad = hook.gradient.float()
+        act = hook.activation.float()
+        # Global average pool gradient across spatial dimension
+        # to obtain importance weights.
+        alpha = grad.mean(dim=(2, 3), keepdim=True)
+        # Weighted combination of activation maps over channel
+        # dimension.
+        gradcam = torch.sum(act * alpha, dim=1, keepdim=True)
+        # We only want neurons with positive influence so we
+        # clamp any negative ones.
+        gradcam = torch.clamp(gradcam, min=0)
+    # Resize gradcam to input resolution.
+    gradcam = F.interpolate(
+        gradcam,
+        input.shape[2:],
+        mode='bicubic',
+        align_corners=False)
+    # Restore gradient settings.
+    for name, param in model.named_parameters():
+        param.requires_grad_(requires_grad[name])
+    return gradcam
+# Modified from: https://github.com/salesforce/ALBEF/blob/main/visualization.ipynb
+def getAttMap(img, attn_map):
+    # Normalize attention map
+    attn_map = attn_map - attn_map.min()
+    if attn_map.max() > 0:
+        attn_map = attn_map / attn_map.max()
+    H = matplotlib.cm.jet(attn_map)
+    H = (H * 255).astype(np.uint8)[:, :, :3]
+    img_heatmap = Image.fromarray(H)
+    img_heatmap = img_heatmap.resize((256, 256))
+    return Image.blend(
+        img.resize((256, 256)), img_heatmap, 0.4)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio>=2.9.0,<2.10.0
+torch>=1.10.0,<1.11.0
+git+https://github.com/openai/CLIP.git
+Pillow
+matplotlib
+numpy