File size: 6,117 Bytes
2e8eebd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 |
model_name: molmo
llm:
d_model: 3584
n_heads: 28
n_kv_heads: 4
head_dim: null
qkv_bias: true
clip_qkv: null
n_layers: 28
mlp_ratio: 4
mlp_hidden_size: 37888
activation_type: swiglu
block_type: sequential
rope: true
rope_full_precision: true
rope_theta: 1000000.0
rope_type: default
rope_factor: null
rope_high_freq_factor: null
rope_low_freq_factor: null
rope_original_max_position_embeddings: null
attention_type: sdpa
float32_attention: true
attention_dropout: 0.0
attention_layer_norm: false
attention_layer_norm_type: olmo
residual_dropout: 0.1
response_residual_dropout: 0.0
layer_norm_type: rms
layer_norm_with_affine: true
layer_norm_eps: 1.0e-06
attention_layer_norm_with_affine: true
max_sequence_length: 4096
max_position_embeddings: null
include_bias: false
bias_for_layer_norm: null
norm_after: false
moe_num_experts: 8
moe_top_k: 2
moe_mlp_impl: sparse
moe_log_expert_assignment: false
moe_shared_expert: false
moe_lbl_in_fp32: false
moe_interleave: false
moe_loss_weight: 0.1
moe_zloss_weight: null
moe_dropless: true
moe_capacity_factor: 1.25
embedding_dropout: 0.0
scale_logits: false
vocab_size: 152064
additional_vocab_size: 128
weight_tying: false
embedding_size: 152064
use_position_ids: true
tokenizer:
identifier: Qwen/Qwen2.5-7B
tokenizer_dir: null
depth_tokens: true
init_path: gs://mm-olmo/pretrained_llms/qwen2.5-7b.pt
init_incremental: null
new_embedding_init_range: 0.02
initializer_range: 0.02
normalize_input_embeds: false
activation_checkpoint: whole_layer
compile: blocks
fix_pad_tokenizer: false
resize_vocab: false
init_std: 0.02
init_fn: normal
init_cutoff_factor: null
vision_backbone:
vit:
image_model_type: siglip
image_default_input_size:
- 378
- 378
image_patch_size: 14
image_pos_patch_size: 14
image_emb_dim: 1152
image_num_heads: 16
image_num_key_value_heads: 16
image_num_layers: 27
image_head_dim: 72
image_mlp_dim: 4304
image_mlp_activations: gelu_pytorch_tanh
image_dropout_rate: 0.0
image_num_pos: 729
image_norm_eps: 1.0e-06
attention_dropout: 0.0
residual_dropout: 0.0
initializer_range: 0.02
float32_attention: true
attention_type: sdpa
activation_checkpointing: true
init_path: gs://mm-olmo/pretrained_image_encoders/siglip2-so400m-14-384.pt
resize_mode: siglip
pad_value: 0.0
normalize: siglip
image_pooling_2d: attention_meanq
pooling_attention_mask: false
image_projector: mlp
image_padding_embed: null
vit_layers:
- -3
- -9
skip_unused_layers: true
image_feature_dropout: 0.0
connector_activation_checkpointing: true
compile_vit: blocks
data_formatter:
prompt_templates: uber_model
message_format: role
system_prompt: demo_or_style
always_start_with_space: false
default_inference_len: 65
select_answer: best
debug: false
image_last: false
format_message_list: null
p_one_message: 0.0
mm_preprocessor:
crop_mode: overlap-and-resize-c2
max_crops: 8
max_images: 2
max_multi_image_crops: 8
pooling_w: 2
pooling_h: 2
overlap_margins:
- 4
- 4
use_col_tokens: true
loss_token_weighting: root_subsegments
legacy_image_mask: false
max_answer_len: null
img_aug: true
bi_directional_attn: null
lora_enable: true
lora_rank: 32
lora_alpha: 16
lora_dropout: 0.0
lora_bias: none
n_action_bins: 256
norm_stats:
libero_spatial_no_noops_modified:
action:
mean:
- 0.15312479436397552
- 0.13707277178764343
- -0.15526802837848663
- -0.005176450591534376
- -0.01120874285697937
- -0.020194264128804207
- 0.4578818082809448
std:
- 0.41272708773612976
- 0.34724321961402893
- 0.50869220495224
- 0.037266165018081665
- 0.07244449853897095
- 0.05762382969260216
- 0.49827873706817627
max:
- 0.9375
- 0.9375
- 0.9375
- 0.1971428543329239
- 0.33642858266830444
- 0.375
- 1.0
min:
- -0.9375
- -0.9375
- -0.9375
- -0.1875
- -0.3675000071525574
- -0.36000001430511475
- 0.0
q01:
- -0.7454732114076613
- -0.6616071462631226
- -0.9375
- -0.1071428582072258
- -0.20678570866584778
- -0.1842857152223587
- 0.0
q99:
- 0.9375
- 0.8758928775787354
- 0.9321428537368774
- 0.1039285734295845
- 0.17678570747375488
- 0.14571428298950195
- 1.0
proprio:
mean:
- -0.024462558329105377
- 0.106529600918293
- 1.0580483675003052
- 3.0628468990325928
- -0.10464039444923401
- 0.08307311683893204
- 0.0
- 0.01995457336306572
- -0.020162804052233696
std:
- 0.1101478561758995
- 0.13784688711166382
- 0.1044282391667366
- 0.10451053828001022
- 0.4112098217010498
- 0.2176690548658371
- 0.0
- 0.017260896041989326
- 0.0171116404235363
max:
- 0.1759040206670761
- 0.3904820382595062
- 1.3290715217590332
- 3.4566118717193604
- 1.2268599271774292
- 1.0429412126541138
- 0.0
- 0.041053611785173416
- 0.000775813648942858
min:
- -0.3095473051071167
- -0.29250794649124146
- 0.9095591306686401
- 2.497488260269165
- -1.8006486892700195
- -0.7207611203193665
- 0.0
- -0.0004703797458205372
- -0.041536275297403336
q01:
- -0.2727657300233841
- -0.23721413239836692
- 0.9160063165426254
- 2.77949666261673
- -1.3187511622905732
- -0.41989982962608335
- 0.0
- 0.001503719249740243
- -0.03989770736545324
q99:
- 0.13529365032911292
- 0.3629165390133857
- 1.2862326657772063
- 3.2829698753356933
- 0.9332760351896285
- 0.6325724506378171
- 0.0
- 0.039933966137468815
- -0.001671919699292631
num_transitions: 52970
num_trajectories: 432
|