| { | |
| "_name_or_path": "/mnt/amlfs-01/home/seonghyeony/checkpoints/groot_s_idm_24P_300", | |
| "action_dim": 32, | |
| "action_head_cfg": { | |
| "_convert_": "object", | |
| "_target_": "gr00t.model.action_head.flow_matching_action_head_idm.FlowMatchingActionHeadIDM", | |
| "config": { | |
| "_recursive_": false, | |
| "_target_": "gr00t.model.action_head.flow_matching_action_head_idm.FlowMatchingActionHeadIDMConfig", | |
| "action_dim": 32, | |
| "action_horizon": 16, | |
| "add_pos_embed": true, | |
| "add_seperator_token": true, | |
| "add_view_embed": true, | |
| "backbone_features_projector_cfg": null, | |
| "diffusion_model_cfg": { | |
| "_target_": "gr00t.model.action_head.cross_attention_dit.DiT", | |
| "attention_head_dim": 64, | |
| "dropout": 0.2, | |
| "final_dropout": true, | |
| "interleave_self_attention": true, | |
| "norm_type": "ada_norm", | |
| "num_attention_heads": 16, | |
| "num_layers": 8, | |
| "output_dim": 1024, | |
| "positional_embeddings": null | |
| }, | |
| "hidden_size": 1024, | |
| "max_action_dim": 32, | |
| "max_num_views": 6, | |
| "max_state_dim": 44, | |
| "mm_projector_cfg": { | |
| "_convert_": "object", | |
| "_target_": "gr00t.model.action_head.multimodal_projector.MultimodalProjector", | |
| "config": { | |
| "_target_": "gr00t.model.action_head.multimodal_projector.MultimodalProjectorConfig", | |
| "hidden_size": 1024, | |
| "mm_hidden_size": 1024, | |
| "mm_projector_type": "mlp_doubledownsample" | |
| } | |
| }, | |
| "mm_vision_select_layer": -2, | |
| "model_dtype": "float32", | |
| "noise_beta_alpha": 1.5, | |
| "noise_beta_beta": 1.0, | |
| "noise_s": 0.999, | |
| "num_inference_timesteps": 16, | |
| "num_timestep_buckets": 1000, | |
| "siglip_hidden_size": 1024, | |
| "siglip_model_cfg": { | |
| "_convert_": "object", | |
| "_target_": "gr00t.model.action_head.siglip.SiglipModel.from_pretrained", | |
| "pretrained_model_name_or_path": "google/siglip2-large-patch16-256" | |
| }, | |
| "tune_vision_tower": true, | |
| "vl_self_attention_cfg": { | |
| "_target_": "gr00t.model.action_head.cross_attention_dit.SelfAttentionTransformer", | |
| "attention_head_dim": 64, | |
| "dropout": 0.2, | |
| "final_dropout": true, | |
| "num_attention_heads": 16, | |
| "num_layers": 4, | |
| "positional_embeddings": null | |
| } | |
| } | |
| }, | |
| "action_horizon": 16, | |
| "architectures": [ | |
| "DualBrain" | |
| ], | |
| "backbone_cfg": { | |
| "_target_": "gr00t.model.backbone.IdentityBackbone" | |
| }, | |
| "hidden_size": 0, | |
| "model_dtype": "float32", | |
| "model_type": "dual_brain", | |
| "resume_path": "/mnt/amlfs-01/home/seonghyeony/checkpoints/groot_s_idm_24P_300", | |
| "torch_dtype": "bfloat16", | |
| "transformers_version": "4.45.2" | |
| } | |