{ "model_type": "clip2mt5-crossattention", "library": "pytorch", "architectures": ["CLIP2MT5_CrossAttention"], "pipeline_tag": "image-text-to-text", "description": "CLIP + mT5 VQA Model using cross-attention.", "author": "MUERIS" }