Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import os | |
| import skimage | |
| import matplotlib.pyplot as plt | |
| from PIL import Image | |
| import numpy as np | |
| from collections import OrderedDict | |
| import torch | |
| from imagebind import data | |
| from imagebind.models import imagebind_model | |
| from imagebind.models.imagebind_model import ModalityType | |
| import torch.nn as nn | |
| import pickle | |
| device = "cpu" #"cuda:0" if torch.cuda.is_available() else "cpu" | |
| model = imagebind_model.imagebind_huge(pretrained=True) | |
| model.eval() | |
| model.to(device) | |
| image_features = pickle.load(open("./assets/image_features_norm_2.pkl","rb")) | |
| image_paths = pickle.load(open("./assets/image_paths.pkl","rb")) | |
| def generate_image(text): | |
| inputs = { | |
| ModalityType.TEXT: data.load_and_transform_text([text], device) | |
| } | |
| with torch.no_grad(): | |
| embeddings = model(inputs) | |
| text_features = embeddings[ModalityType.TEXT] | |
| text_features /= text_features.norm(dim=-1, keepdim=True) | |
| similarity = text_features.cpu().numpy() @ image_features.cpu().numpy().T | |
| index_img = np.argmax(similarity) | |
| img_name = os.path.basename(image_paths[index_img]) | |
| im = Image.open(f"./assets/images/{img_name}").convert("RGB") | |
| return im | |
| iface = gr.Interface( | |
| fn=generate_image, | |
| inputs="text", | |
| outputs="image", | |
| examples=[ | |
| ["a page of text about segmentation", "assets/images/page.png"], | |
| ["a facial photo of a tabby cat", "assets/images/chelsea.png"], | |
| ["a portrait of an astronaut with the American flag", "assets/images/astronaut.png"], | |
| ["a rocket standing on a launchpad", "assets/images/rocket.png"], | |
| ["a red motorcycle standing in a garage", "assets/images/motorcycle_right.png"], | |
| ["a person looking at a camera on a tripod", "assets/images/camera.png"], | |
| ["a black-and-white silhouette of a horse", "assets/images/horse.png"], | |
| ["a cup of coffee on a saucer", "assets/images/coffee.png"] | |
| ], | |
| title="Find the image most similar to the given text", | |
| description='''<p> | |
| Welcome to a straightforward demonstration of ImageBind. | |
| This simple demo is designed to find the image most similar to a given text | |
| using cosine similarity. For a comprehensive | |
| understanding of its capabilities, we encourage you to explore the original research <a href='https://arxiv.org/abs/2305.05665' target='_blank'>paper</a> | |
| and visit the <a href='https://github.com/facebookresearch/ImageBind' target='_blank'>repository</a> | |
| for more in-depth information.<p> | |
| ''' | |
| ) | |
| iface.launch() | |