File size: 6,117 Bytes
2e8eebd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
model_name: molmo
llm:
  d_model: 3584
  n_heads: 28
  n_kv_heads: 4
  head_dim: null
  qkv_bias: true
  clip_qkv: null
  n_layers: 28
  mlp_ratio: 4
  mlp_hidden_size: 37888
  activation_type: swiglu
  block_type: sequential
  rope: true
  rope_full_precision: true
  rope_theta: 1000000.0
  rope_type: default
  rope_factor: null
  rope_high_freq_factor: null
  rope_low_freq_factor: null
  rope_original_max_position_embeddings: null
  attention_type: sdpa
  float32_attention: true
  attention_dropout: 0.0
  attention_layer_norm: false
  attention_layer_norm_type: olmo
  residual_dropout: 0.1
  response_residual_dropout: 0.0
  layer_norm_type: rms
  layer_norm_with_affine: true
  layer_norm_eps: 1.0e-06
  attention_layer_norm_with_affine: true
  max_sequence_length: 4096
  max_position_embeddings: null
  include_bias: false
  bias_for_layer_norm: null
  norm_after: false
  moe_num_experts: 8
  moe_top_k: 2
  moe_mlp_impl: sparse
  moe_log_expert_assignment: false
  moe_shared_expert: false
  moe_lbl_in_fp32: false
  moe_interleave: false
  moe_loss_weight: 0.1
  moe_zloss_weight: null
  moe_dropless: true
  moe_capacity_factor: 1.25
  embedding_dropout: 0.0
  scale_logits: false
  vocab_size: 152064
  additional_vocab_size: 128
  weight_tying: false
  embedding_size: 152064
  use_position_ids: true
  tokenizer:
    identifier: Qwen/Qwen2.5-7B
    tokenizer_dir: null
    depth_tokens: true
  init_path: gs://mm-olmo/pretrained_llms/qwen2.5-7b.pt
  init_incremental: null
  new_embedding_init_range: 0.02
  initializer_range: 0.02
  normalize_input_embeds: false
  activation_checkpoint: whole_layer
  compile: blocks
  fix_pad_tokenizer: false
  resize_vocab: false
  init_std: 0.02
  init_fn: normal
  init_cutoff_factor: null
vision_backbone:
  vit:
    image_model_type: siglip
    image_default_input_size:
    - 378
    - 378
    image_patch_size: 14
    image_pos_patch_size: 14
    image_emb_dim: 1152
    image_num_heads: 16
    image_num_key_value_heads: 16
    image_num_layers: 27
    image_head_dim: 72
    image_mlp_dim: 4304
    image_mlp_activations: gelu_pytorch_tanh
    image_dropout_rate: 0.0
    image_num_pos: 729
    image_norm_eps: 1.0e-06
    attention_dropout: 0.0
    residual_dropout: 0.0
    initializer_range: 0.02
    float32_attention: true
    attention_type: sdpa
    activation_checkpointing: true
    init_path: gs://mm-olmo/pretrained_image_encoders/siglip2-so400m-14-384.pt
    resize_mode: siglip
    pad_value: 0.0
    normalize: siglip
  image_pooling_2d: attention_meanq
  pooling_attention_mask: false
  image_projector: mlp
  image_padding_embed: null
  vit_layers:
  - -3
  - -9
  skip_unused_layers: true
  image_feature_dropout: 0.0
  connector_activation_checkpointing: true
  compile_vit: blocks
data_formatter:
  prompt_templates: uber_model
  message_format: role
  system_prompt: demo_or_style
  always_start_with_space: false
  default_inference_len: 65
  select_answer: best
  debug: false
  image_last: false
  format_message_list: null
  p_one_message: 0.0
mm_preprocessor:
  crop_mode: overlap-and-resize-c2
  max_crops: 8
  max_images: 2
  max_multi_image_crops: 8
  pooling_w: 2
  pooling_h: 2
  overlap_margins:
  - 4
  - 4
  use_col_tokens: true
  loss_token_weighting: root_subsegments
  legacy_image_mask: false
  max_answer_len: null
  img_aug: true
bi_directional_attn: null
lora_enable: true
lora_rank: 32
lora_alpha: 16
lora_dropout: 0.0
lora_bias: none
n_action_bins: 256
norm_stats:
  libero_spatial_no_noops_modified:
    action:
      mean:
      - 0.15312479436397552
      - 0.13707277178764343
      - -0.15526802837848663
      - -0.005176450591534376
      - -0.01120874285697937
      - -0.020194264128804207
      - 0.4578818082809448
      std:
      - 0.41272708773612976
      - 0.34724321961402893
      - 0.50869220495224
      - 0.037266165018081665
      - 0.07244449853897095
      - 0.05762382969260216
      - 0.49827873706817627
      max:
      - 0.9375
      - 0.9375
      - 0.9375
      - 0.1971428543329239
      - 0.33642858266830444
      - 0.375
      - 1.0
      min:
      - -0.9375
      - -0.9375
      - -0.9375
      - -0.1875
      - -0.3675000071525574
      - -0.36000001430511475
      - 0.0
      q01:
      - -0.7454732114076613
      - -0.6616071462631226
      - -0.9375
      - -0.1071428582072258
      - -0.20678570866584778
      - -0.1842857152223587
      - 0.0
      q99:
      - 0.9375
      - 0.8758928775787354
      - 0.9321428537368774
      - 0.1039285734295845
      - 0.17678570747375488
      - 0.14571428298950195
      - 1.0
    proprio:
      mean:
      - -0.024462558329105377
      - 0.106529600918293
      - 1.0580483675003052
      - 3.0628468990325928
      - -0.10464039444923401
      - 0.08307311683893204
      - 0.0
      - 0.01995457336306572
      - -0.020162804052233696
      std:
      - 0.1101478561758995
      - 0.13784688711166382
      - 0.1044282391667366
      - 0.10451053828001022
      - 0.4112098217010498
      - 0.2176690548658371
      - 0.0
      - 0.017260896041989326
      - 0.0171116404235363
      max:
      - 0.1759040206670761
      - 0.3904820382595062
      - 1.3290715217590332
      - 3.4566118717193604
      - 1.2268599271774292
      - 1.0429412126541138
      - 0.0
      - 0.041053611785173416
      - 0.000775813648942858
      min:
      - -0.3095473051071167
      - -0.29250794649124146
      - 0.9095591306686401
      - 2.497488260269165
      - -1.8006486892700195
      - -0.7207611203193665
      - 0.0
      - -0.0004703797458205372
      - -0.041536275297403336
      q01:
      - -0.2727657300233841
      - -0.23721413239836692
      - 0.9160063165426254
      - 2.77949666261673
      - -1.3187511622905732
      - -0.41989982962608335
      - 0.0
      - 0.001503719249740243
      - -0.03989770736545324
      q99:
      - 0.13529365032911292
      - 0.3629165390133857
      - 1.2862326657772063
      - 3.2829698753356933
      - 0.9332760351896285
      - 0.6325724506378171
      - 0.0
      - 0.039933966137468815
      - -0.001671919699292631
    num_transitions: 52970
    num_trajectories: 432