| { | |
| "metadata": { | |
| "ParamSize": 313, | |
| "ParamBytes": 3927297024.0, | |
| "BitsPerParam": 4.12551973205239 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 272498688, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_weight", | |
| "shape": [ | |
| 448, | |
| 152064 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272498688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f9bd383897bb7eea85d7cfea6849bcc5" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 272498688, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_weight", | |
| "shape": [ | |
| 152064, | |
| 448 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272498688, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b27c48fd877f7bca8a100ead45a7640b" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6feda4765ff9fd9fb1d66499b53d4d07" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5e45ded6746d67dce9598dcdc9f6d5de" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28752896, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_scale", | |
| "shape": [ | |
| 28, | |
| 152064 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8515584, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.embed_tokens.q_scale", | |
| "shape": [ | |
| 152064, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8515584, | |
| "byteOffset": 8515584 | |
| }, | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 17031168 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 17038336 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 18099200 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 20220928 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 20228096 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 20237312 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 28494848 | |
| } | |
| ], | |
| "md5sum": "a4b1415a9f4b8773c88e2a4b3ab9bcca" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d826fe0a5b3ff72590cd778c03bd12e2" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "829b257f4bc0f5d00ebb8e1c22090d35" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7e3e8a3f82d56e47f46c16133991631d" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c9cd9f1a0d72c0b07d6d35daee42b504" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28174336, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 6422528 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 6623232 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 6630400 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 7691264 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 9812992 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 9820160 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 9829376 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 18086912 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 24767488 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 24968192 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 24975360 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 26036224 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 28157952 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 28165120 | |
| } | |
| ], | |
| "md5sum": "5b35816109a61511ea81828e16521566" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9b9c1aaad714e6d3ff8c783af4fd2025" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4f0e1f23ac8c3624abba909f7bd8b947" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d921143499904282d6830fef22529054" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33490944, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 8257536 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 8515584 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 14938112 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 15138816 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 15145984 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 16206848 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 18328576 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 18335744 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 26602496 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 26860544 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 33283072 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 33483776 | |
| } | |
| ], | |
| "md5sum": "72de12bfc7359342c6e165567e3691fe" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "66ef3840aedcf0aa570381269152637d" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ffea5f438b126bf628c3b30dd775c450" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "401c4add3c1a9ebe686c41d5dd8c6540" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30059520, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 1060864 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3182592 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 3189760 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 3198976 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 11456512 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 11714560 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 18137088 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 18337792 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 19405824 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 21527552 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 21534720 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 21543936 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 29801472 | |
| } | |
| ], | |
| "md5sum": "c3bb2f170a5d14f6c3b2d38e2dbe6f36" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0bd49f3a21e7e0d06fd4ff7b60897089" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "20727165ba28ca6b1b3c0066ceaeb5d6" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "09a132e792b2a3d0e96efa4396b3c4bc" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ac49722f4d09c2ae0ac223877cbf368f" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28174336, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 6422528 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 6623232 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 6630400 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 7691264 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 9812992 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 9820160 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 9829376 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 18086912 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 24767488 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 24968192 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 24975360 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 26036224 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 28157952 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 28165120 | |
| } | |
| ], | |
| "md5sum": "66eca43a365cdb8f8d67836861995d4c" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b164bac716f752125c1127036c9e579e" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ed136ea7d8027d44f2f904db3dd5bf72" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33492992, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 8257536 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 8515584 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 14938112 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 15138816 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 15148032 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 23405568 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 23663616 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 30086144 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 30286848 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 30294016 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 31354880 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 33476608 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 33483776 | |
| } | |
| ], | |
| "md5sum": "2670a4f0dae00df2c74b2c76ae78d34d" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1c60a2249dfdf692838d7cd948e75da3" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "44c4b129a78968922ba1e582169e751b" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f705c3e5c408e0f9a19639d4316ffc30" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33490944, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 8257536 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 8515584 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 14938112 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 15138816 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 15145984 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 16206848 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 18328576 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 18335744 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 26602496 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 26860544 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 33283072 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 33483776 | |
| } | |
| ], | |
| "md5sum": "a12b8a05ed2372a66ef82a4489208d8f" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e486edb0b884313dc64f6129193b35bb" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "539faca9a243330fe604085bff3d7716" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5aa65d5afa94c522af0f3d06ce6f0184" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30059520, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 1060864 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3182592 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 3189760 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 3198976 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 11456512 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 11714560 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 18137088 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 18337792 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 19405824 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 21527552 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 21534720 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 21543936 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 29801472 | |
| } | |
| ], | |
| "md5sum": "251e79cf8c4e386c9b76077e90f321e3" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "42360c0fe5bed71782a3ed007f8e97db" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b9e182c37ceece10fd03b50eeb0cc7c8" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "09d8450e9582ade0c1edf88a1bc9da7a" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cf85273d81dafebea496fd9f3c9e8659" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28174336, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 6422528 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 6623232 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 6630400 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 7691264 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 9812992 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 9820160 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 9829376 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 18086912 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 24767488 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 24968192 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 24975360 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 26036224 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 28157952 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 28165120 | |
| } | |
| ], | |
| "md5sum": "4a316fc1d528ab1823fa7f2e72ebc088" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f1455ef8b8714d53391e4770df42c88c" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "06a5af79cbabfc408177ab3a54232e71" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f7778cace53c7a7edc59421ed0215dd6" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33490944, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 8257536 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 8515584 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 14938112 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 15138816 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 15145984 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 16206848 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 18328576 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 18335744 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 26602496 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 26860544 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 33283072 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 33483776 | |
| } | |
| ], | |
| "md5sum": "6b620bde7664f04b9fe969d7590672b0" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0febe39d94881452614fc70866c00b80" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f8399f2b1d82d86c14fbec3b84b49da1" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28984320, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 1060864 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3182592 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 3189760 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 3198976 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 11456512 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 11714560 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 18137088 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 18337792 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 20459520 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 20468736 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 28726272 | |
| } | |
| ], | |
| "md5sum": "6efbcf4542166f2bb375fdd5d28de2a0" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5668ddac10050f7800b4fa057856248b" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1e8b62d05f96a7919b4608dda31c066a" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6f2e8a0700164b6e2e506efd281a7935" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1c19e71884e65e12dd971a8a2bb3e1d8" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0a459534c2f8ec4785cfc911a6081b67" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b5ab69d743cd0d0315c18c0920883625" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "996205208b5429d0aa41dae09cbd8880" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32446464, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 6422528 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 6623232 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 6630400 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 7691264 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 9812992 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 9820160 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 9827328 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 10888192 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 13009920 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 13017088 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 13026304 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 21283840 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 21541888 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 27964416 | |
| }, | |
| { | |
| "name": "model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 28165120 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 28172288 | |
| }, | |
| { | |
| "name": "model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 29233152 | |
| }, | |
| { | |
| "name": "model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 29240320 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 29247488 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 30308352 | |
| }, | |
| { | |
| "name": "model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 32430080 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 32437248 | |
| } | |
| ], | |
| "md5sum": "52642ca274fa29e5b85218465bd8d919" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "79fbaa6cfe21ed13ba27a832beef77df" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0032a13f00e80d570fc237020c1dc3f7" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c787baa124d95cb11de94dcd8a92a562" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33490944, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 8257536 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 8515584 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 14938112 | |
| }, | |
| { | |
| "name": "model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 15138816 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 15145984 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 16206848 | |
| }, | |
| { | |
| "name": "model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 18328576 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 18335744 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 26602496 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 26860544 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 33283072 | |
| }, | |
| { | |
| "name": "model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 33483776 | |
| } | |
| ], | |
| "md5sum": "2992b63d3610a100d75708d36b7ee6ec" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6eed8c1d08c164c532caa77c3b4b7f13" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "99e1313416bc83965947615e357bf574" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fa22ed4dfd1c7a57818856af85af8b20" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30059520, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 1060864 | |
| }, | |
| { | |
| "name": "model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3182592 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 3189760 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 3198976 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 11456512 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 11714560 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 18137088 | |
| }, | |
| { | |
| "name": "model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 18337792 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 19405824 | |
| }, | |
| { | |
| "name": "model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 21527552 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 21534720 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 21543936 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 29801472 | |
| } | |
| ], | |
| "md5sum": "4a47b4874967c5da56fb6d7cd6759b14" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "867346aceebb3f3a3721d832f03b8079" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f2513130d5fb64fbb6ece0815695fd79" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9395fca4f544cdf017c6efbfa046a770" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1e0771bb7c86761130b4281df1f52e5f" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28174336, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 6422528 | |
| }, | |
| { | |
| "name": "model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 6623232 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 6630400 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 7691264 | |
| }, | |
| { | |
| "name": "model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 9812992 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 9820160 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 9829376 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 18086912 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 24767488 | |
| }, | |
| { | |
| "name": "model.layers.24.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 24968192 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 24975360 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 26036224 | |
| }, | |
| { | |
| "name": "model.layers.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 28157952 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 28165120 | |
| } | |
| ], | |
| "md5sum": "11ea69fd300538bf749cd2e3aa42e097" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5b11a99d2daf4de4410961c94dfb3cef" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "40423205646f168562f726a4aceca36c" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d2b31f364d3140a929c0898625a9131a" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33490944, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 8257536 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 8515584 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 14938112 | |
| }, | |
| { | |
| "name": "model.layers.25.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 15138816 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 15145984 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 16206848 | |
| }, | |
| { | |
| "name": "model.layers.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 18328576 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 18335744 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 26602496 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 26860544 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 33283072 | |
| }, | |
| { | |
| "name": "model.layers.26.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 33483776 | |
| } | |
| ], | |
| "md5sum": "1287248303c529bf1cfa0556584f98c8" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6cd6b8fbfb4a7c588279149f46d89776" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33947648, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 2368, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 33947648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d927eb03e7f009a632d0ec7bdc7688eb" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 67895296, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 37888 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 67895296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "705864cb30124f698f5ff4bd18d92ed2" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30059520, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 1060864 | |
| }, | |
| { | |
| "name": "model.layers.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 3182592 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 3189760 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 3198976 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 11456512 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 11714560 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 18137088 | |
| }, | |
| { | |
| "name": "model.layers.27.input_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 18337792 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 148, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1060864, | |
| "byteOffset": 18344960 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 37888 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2121728, | |
| "byteOffset": 19405824 | |
| }, | |
| { | |
| "name": "model.layers.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 21527552 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.bias", | |
| "shape": [ | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 9216, | |
| "byteOffset": 21534720 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 448, | |
| 4608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8257536, | |
| "byteOffset": 21543936 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 28, | |
| 4608 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 258048, | |
| "byteOffset": 29801472 | |
| } | |
| ], | |
| "md5sum": "0b26589c4878e736df66563f8a8bbb44" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 6630400, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 448, | |
| 3584 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6422528, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 28, | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 200704, | |
| "byteOffset": 6422528 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 3584 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 7168, | |
| "byteOffset": 6623232 | |
| } | |
| ], | |
| "md5sum": "073ee2ef79ca8518d6a9f0e1b0ccd8cf" | |
| } | |
| ] | |
| } |