| model_name: molmo | |
| llm: | |
| d_model: 3584 | |
| n_heads: 28 | |
| n_kv_heads: 4 | |
| head_dim: null | |
| qkv_bias: true | |
| clip_qkv: null | |
| n_layers: 28 | |
| mlp_ratio: 4 | |
| mlp_hidden_size: 37888 | |
| activation_type: swiglu | |
| block_type: sequential | |
| rope: true | |
| rope_full_precision: true | |
| rope_theta: 1000000.0 | |
| rope_type: default | |
| rope_factor: null | |
| rope_high_freq_factor: null | |
| rope_low_freq_factor: null | |
| rope_original_max_position_embeddings: null | |
| attention_type: sdpa | |
| float32_attention: true | |
| attention_dropout: 0.0 | |
| attention_layer_norm: false | |
| attention_layer_norm_type: olmo | |
| residual_dropout: 0.1 | |
| response_residual_dropout: 0.0 | |
| layer_norm_type: rms | |
| layer_norm_with_affine: true | |
| layer_norm_eps: 1.0e-06 | |
| attention_layer_norm_with_affine: true | |
| max_sequence_length: 4096 | |
| max_position_embeddings: null | |
| include_bias: false | |
| bias_for_layer_norm: null | |
| norm_after: false | |
| moe_num_experts: 8 | |
| moe_top_k: 2 | |
| moe_mlp_impl: sparse | |
| moe_log_expert_assignment: false | |
| moe_shared_expert: false | |
| moe_lbl_in_fp32: false | |
| moe_interleave: false | |
| moe_loss_weight: 0.1 | |
| moe_zloss_weight: null | |
| moe_dropless: true | |
| moe_capacity_factor: 1.25 | |
| embedding_dropout: 0.0 | |
| scale_logits: false | |
| vocab_size: 152064 | |
| additional_vocab_size: 128 | |
| weight_tying: false | |
| embedding_size: 152064 | |
| use_position_ids: true | |
| tokenizer: | |
| identifier: Qwen/Qwen2.5-7B | |
| tokenizer_dir: null | |
| depth_tokens: true | |
| init_path: gs://mm-olmo/pretrained_llms/qwen2.5-7b.pt | |
| init_incremental: null | |
| new_embedding_init_range: 0.02 | |
| initializer_range: 0.02 | |
| normalize_input_embeds: false | |
| activation_checkpoint: whole_layer | |
| compile: blocks | |
| fix_pad_tokenizer: false | |
| resize_vocab: false | |
| init_std: 0.02 | |
| init_fn: normal | |
| init_cutoff_factor: null | |
| vision_backbone: | |
| vit: | |
| image_model_type: siglip | |
| image_default_input_size: | |
| - 378 | |
| - 378 | |
| image_patch_size: 14 | |
| image_pos_patch_size: 14 | |
| image_emb_dim: 1152 | |
| image_num_heads: 16 | |
| image_num_key_value_heads: 16 | |
| image_num_layers: 27 | |
| image_head_dim: 72 | |
| image_mlp_dim: 4304 | |
| image_mlp_activations: gelu_pytorch_tanh | |
| image_dropout_rate: 0.0 | |
| image_num_pos: 729 | |
| image_norm_eps: 1.0e-06 | |
| attention_dropout: 0.0 | |
| residual_dropout: 0.0 | |
| initializer_range: 0.02 | |
| float32_attention: true | |
| attention_type: sdpa | |
| activation_checkpointing: true | |
| init_path: gs://mm-olmo/pretrained_image_encoders/siglip2-so400m-14-384.pt | |
| resize_mode: siglip | |
| pad_value: 0.0 | |
| normalize: siglip | |
| image_pooling_2d: attention_meanq | |
| pooling_attention_mask: false | |
| image_projector: mlp | |
| image_padding_embed: null | |
| vit_layers: | |
| - -3 | |
| - -9 | |
| skip_unused_layers: true | |
| image_feature_dropout: 0.0 | |
| connector_activation_checkpointing: true | |
| compile_vit: blocks | |
| data_formatter: | |
| prompt_templates: uber_model | |
| message_format: role | |
| system_prompt: demo_or_style | |
| always_start_with_space: false | |
| default_inference_len: 65 | |
| select_answer: best | |
| debug: false | |
| image_last: false | |
| format_message_list: null | |
| p_one_message: 0.0 | |
| mm_preprocessor: | |
| crop_mode: overlap-and-resize-c2 | |
| max_crops: 8 | |
| max_images: 2 | |
| max_multi_image_crops: 8 | |
| pooling_w: 2 | |
| pooling_h: 2 | |
| overlap_margins: | |
| - 4 | |
| - 4 | |
| use_col_tokens: true | |
| loss_token_weighting: root_subsegments | |
| legacy_image_mask: false | |
| max_answer_len: null | |
| img_aug: true | |
| bi_directional_attn: null | |
| lora_enable: true | |
| lora_rank: 32 | |
| lora_alpha: 16 | |
| lora_dropout: 0.0 | |
| lora_bias: none | |
| n_action_bins: 256 | |
| norm_stats: | |
| libero_spatial_no_noops_modified: | |
| action: | |
| mean: | |
| - 0.15312479436397552 | |
| - 0.13707277178764343 | |
| - -0.15526802837848663 | |
| - -0.005176450591534376 | |
| - -0.01120874285697937 | |
| - -0.020194264128804207 | |
| - 0.4578818082809448 | |
| std: | |
| - 0.41272708773612976 | |
| - 0.34724321961402893 | |
| - 0.50869220495224 | |
| - 0.037266165018081665 | |
| - 0.07244449853897095 | |
| - 0.05762382969260216 | |
| - 0.49827873706817627 | |
| max: | |
| - 0.9375 | |
| - 0.9375 | |
| - 0.9375 | |
| - 0.1971428543329239 | |
| - 0.33642858266830444 | |
| - 0.375 | |
| - 1.0 | |
| min: | |
| - -0.9375 | |
| - -0.9375 | |
| - -0.9375 | |
| - -0.1875 | |
| - -0.3675000071525574 | |
| - -0.36000001430511475 | |
| - 0.0 | |
| q01: | |
| - -0.7454732114076613 | |
| - -0.6616071462631226 | |
| - -0.9375 | |
| - -0.1071428582072258 | |
| - -0.20678570866584778 | |
| - -0.1842857152223587 | |
| - 0.0 | |
| q99: | |
| - 0.9375 | |
| - 0.8758928775787354 | |
| - 0.9321428537368774 | |
| - 0.1039285734295845 | |
| - 0.17678570747375488 | |
| - 0.14571428298950195 | |
| - 1.0 | |
| proprio: | |
| mean: | |
| - -0.024462558329105377 | |
| - 0.106529600918293 | |
| - 1.0580483675003052 | |
| - 3.0628468990325928 | |
| - -0.10464039444923401 | |
| - 0.08307311683893204 | |
| - 0.0 | |
| - 0.01995457336306572 | |
| - -0.020162804052233696 | |
| std: | |
| - 0.1101478561758995 | |
| - 0.13784688711166382 | |
| - 0.1044282391667366 | |
| - 0.10451053828001022 | |
| - 0.4112098217010498 | |
| - 0.2176690548658371 | |
| - 0.0 | |
| - 0.017260896041989326 | |
| - 0.0171116404235363 | |
| max: | |
| - 0.1759040206670761 | |
| - 0.3904820382595062 | |
| - 1.3290715217590332 | |
| - 3.4566118717193604 | |
| - 1.2268599271774292 | |
| - 1.0429412126541138 | |
| - 0.0 | |
| - 0.041053611785173416 | |
| - 0.000775813648942858 | |
| min: | |
| - -0.3095473051071167 | |
| - -0.29250794649124146 | |
| - 0.9095591306686401 | |
| - 2.497488260269165 | |
| - -1.8006486892700195 | |
| - -0.7207611203193665 | |
| - 0.0 | |
| - -0.0004703797458205372 | |
| - -0.041536275297403336 | |
| q01: | |
| - -0.2727657300233841 | |
| - -0.23721413239836692 | |
| - 0.9160063165426254 | |
| - 2.77949666261673 | |
| - -1.3187511622905732 | |
| - -0.41989982962608335 | |
| - 0.0 | |
| - 0.001503719249740243 | |
| - -0.03989770736545324 | |
| q99: | |
| - 0.13529365032911292 | |
| - 0.3629165390133857 | |
| - 1.2862326657772063 | |
| - 3.2829698753356933 | |
| - 0.9332760351896285 | |
| - 0.6325724506378171 | |
| - 0.0 | |
| - 0.039933966137468815 | |
| - -0.001671919699292631 | |
| num_transitions: 52970 | |
| num_trajectories: 432 | |

