{ "metadata": { "ParamSize": 565, "ParamBytes": 13906894848.0, "BitsPerParam": 5.0008425902732885 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "lm_head.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1370e6c5a1e9acab516aaa50de3d8712" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1fe2ae240432d349b60d5e083cdf86f6" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "97e0591d439e953e58fa137d2f3f5b46" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5e79ce8a40a2d88fd3db9fdfe9c516f8" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "lm_head.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 12582912 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 25165824 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 25178112 } ], "md5sum": "ebca18603db146d2ee9297a240ef66ca" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9e0861b036704262f48a668f686a3db6" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ec6f446be90a130849560476bbd0bbff" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5183041dd6efa731ff0454d3fb710215" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1d6ff9d8281c6896965644752b50d10a" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "5d80127cd407320a2ce5a84e905d5aba" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a5e87873064114493ca841cbf653a1af" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "6c9330f811d47f1939b432b94683858f" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "fd580a9fd4268d516600b5a4b0799bc3" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8ab3d2efe155105083d25b31b8d15838" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "1810a73237cc61e5cbf56f6544ddb94c" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "011398c03f8f10cc6f2f05f8f7e7a5ca" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d76a8338b4d19511015a9236151eb537" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "70091c43d785d3d388bc4d5a3141cf6d" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "aef73e7d5cfcc80654e79ad0e4b21f09" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "fc197ecc7c36ad6b14fb734fcf657cf5" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "cb99303b40b725a47a6a56a53c526ab5" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "886d0b99eda2de84f50d304c6a152652" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "68bd05e5b12da77ba3fbe5a2e29e9dfd" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b693b52acfe87ac35ebdc4fa91c87ba3" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "f6b80d8eaf173f69403769c99ce9ccb0" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bf4452c742889b7202dadf28d7a8208b" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "649a2b7a41d5d47043adf3fb83a5887c" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9052bd332ca433a00ffcbd79d1132f27" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e8bed673e63113456c08cb2b4a1ecdef" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "7c3379f5fe62336bd0bb3678d1b664eb" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "005d4122082f2eac097d1222920b6bed" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "fde86f9e90f9e1b73914ffbe4aec72f1" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ea30bb02bb7bfb00828be9ada09978cb" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8dbc9173963286c4ba8dcf452c482721" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "73b49939876b946040963953104df9a8" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "21c35d8de8a66ccb761e1e4085b7ce79" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0f645812e54ca5ee4b5d2094ca8c91e8" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "046477c31b6c266565aeb864ec37b37b" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "794b372db3f2b5ce6859df4c84bcad8f" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "5af2db5cc81ac40679ea3585051fd146" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4c5581341228a900540fe836a20091d8" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e899c7dc5a95c146b212e587a56a9eea" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9ac5d1b4f5b92c1c1c5450ea5f9a0c25" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "aecd30d07cad95f2a3ef63466034f2cc" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "5513599ca1a6230380dbba00388b52ef" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "36346e08dd13588ec8cb941b2f71ea50" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2fd25510c856de3f59468ac57498ae05" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "da49a8d97e2588b2fe77da5e4eace3dc" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "672e476e57d95f47b6fde22086914269" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "a87feee8a796e0ebe4c0223da2f38fd5" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a8a86257ffebfdee5f23cf128822c6dd" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "63006087d5ad8a1ef1426add7c800ba9" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3a0a452b2fd6f3d0311d1e3cbf80cce8" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7d6d3edcd08950b0796d0c95735d6f15" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "e9659d9a611352a3208f0b21f45bd24f" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "25c558a7f67bcabbd74bd1d505b59329" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "18e1c0a195ed899207da0f1bdcfbab97" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "da5ba0c917f1e9e75a3df1cc781493bb" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "9027cce383cff1891bb382fb62b3024f" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "4c998f8785de26d380ef5fc6ad3d1f59" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f1e01f57d90628342c21e010d933cc79" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5cf168269748a29673039902e3d45a17" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7ab649de6bd782b2536080d980d59cd0" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "909410bda6ea9f7d3ee7aebbc4dee7db" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "e469e9fea668e010cd1c29575080ff6e" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "20aeeb06c5be68cd22ab8ea558933699" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "624adcfdb61d2e86b3893e6790320215" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "bc5fa993793fe7d27239a6ba53597ed3" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "eb2f7aedcbf063853f6e120b3c864cf5" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "a393905cbb8ecc4fc627c22e7ab78d4d" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9c3e6664d04475f8dd16c4970a87febb" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "6369a9a10a70f3be93cbc1fc9337eda4" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "012c6d88e765b5bc751671ec62df0d14" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c5a38b433f6410926b19db07a565c643" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "89d576ae18090e68366ebf9ed510889a" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "314bbd0bf1e80d94c6b63fac18abc045" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "860470e16f7a8ca2553e426c3ef8c156" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "968b80016abee8824308b6dc852eaef6" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "73619cfa5eb5c0eb9e2f1d98485c3b17" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "4d27a6734b69465d237f6a0fe682b6af" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6683fe6294afff278779fb5f2dc1dc6b" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b9767edb7db03186d6c5e7439734811f" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "660818888af74fc0e0bcb2b9a8929eb3" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "18d1d57b36a110c9c966afdb7503ee18" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "7b0e2c5f48c41d80b340e911bbc6b949" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "43df192c902040d0a8b4e7fe55d3334d" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "50d0c346fa49c8e33b1c3395effc5292" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "afab4a9c39e6110b49d28b584ff1352b" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4b5f329b724ea0a98d1e7e60bec75d74" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "0b3f24244e9b946847c848e18f9aac2c" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ae28f9d407e27e856e3485b57ea445f1" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "6d8a683af16e3106365bd2125ad504d7" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "75ade20f9dbf0a5782f035cca6a58276" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a7aded3e7108abff090fb00dbe09f637" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "633615d1d26fc6e3fadcf20644dd5e40" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4f941fe4d3c87c27b2cbb0640971dff9" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "91d8b02b3aaa277b5b04133f0b0f926a" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ae2add09f38f00bae548d13a81214f22" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fafb0111714cbd5149f9e9a59d24091f" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "107718f10a3e402ab547c7a3ecb9ad1a" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d1bd8d9e3693a3e9546dc8dbfe11c28c" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ea486b0850a4956d43d8af6df986978c" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ee990942926d9dea2718d7b52607bc1e" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a158d1215de2ab4094de1c95fc7e45b7" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "7ccf52db220c4dfb9068d9672f7f6821" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b462a3cca5e9dc90233875f3705026a2" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "050768a1ea48eaae5f2e59c0b848cb8b" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ae459e833f6484ff13a2d7ce5717c499" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "05c7afc3d91bc0ae752a58cad8315f2e" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "62faa449143e8c5ebf7340cbc968e3f7" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fb398d0a8c29dce1110f6c1bc2033cba" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f0631b33db9d9e0cda5bafafdf78ec01" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a4697494d5d077c65d5e76b5b512bddf" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "06b74b6397f4512a1c5f1b836a8d1a15" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "2a1915e30004c7bae4d23413358804c8" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "12f47c004bdc0eb314973b88ab54f8c2" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "95f598a3f23b420578717f81ce9973b1" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "bd7095c2d7bdcd5678f7cc5a87cd5ae2" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b76d25eeb1ccf143f390805072dd5114" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "8d8de32a5739a0d93cc1fd8b80f02dfd" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "17a1d8965177f912a8a7a834fb571528" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "cbb8217a35a58066a5b7949737928aef" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "1775b6761813ba3fb913271d0ac93b6e" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6847f689ad531f74c6dc78f448f4f38f" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "b8c69f639121466acc7df019cc391378" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "91fec9f5dd5d445bbc15563efd48f866" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b3569779038c3686aa668d2bf6197029" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "647c7aad76073901cf5599b4765fb6e3" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4316029f45ee6a9713f76b23088f3142" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "47c0bed9f12719d766cac86f110f904c" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c5c5024d7afda81168f3f11d44e30559" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "173c98b68d33e911cbebed349cb61fc3" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ba6943fb919cb6ea56d4ca07680ad692" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1350a2fd961acde791376693942aee41" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "0374c30ed16ac9b98c4837398df475e2" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "df7e10b63604edf6abd5966685d9a424" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "979b755379086738bd7690743e886f8a" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "22f2159e452e531af95fcdf30b68d5a7" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1b4a6e2b0a9a6b91321277d93988283a" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "3e9e48470a75588df74d9931a31141e1" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c49e73b402539b4cd586e0927820b3ee" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "cd89fb68ec8b9bc11d3cd913c74f3f9c" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0cb6cf549c9a532ed1f7699b7a339f2f" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c90d049b7c0b1369ab931887ce015db9" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "785492e68edbc3b7df586b427c0b97b3" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1ef30d22a0136efed961c4fff154ad6c" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "6d0351496694a5eb3220bfe63a38784c" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5b3e7e2425613952f95f36c7d697664a" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3a489575b0a31cfeec35e7782d707b68" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "7970521d49f028b406fc33e81b582b49" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6a5b0a841d4dd650adb80c02626f4e0a" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "50a47cf0da77d98674f47238c9baddf3" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "cdbd2155622df84c8aa0e4153ea1df4c" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "37fd25fa54599da20e8aa037187a5f23" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "f9e24c1531c9b2ecf561c8855610f1e2" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7170d59eb0c9d44e92989bf3ef0b12cf" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "00d3d20ecd2517fbb77878f2bb689009" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "476f103a96351bf39ccfee7714a531c7" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "866f0d8f51a6b0948eebaaddae8c1f65" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "e2d697339a319dc315121ff37ac79f9d" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d0cec945df4afa00820a953fd6c93287" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "6925b6f1c0a3b132e271edcc153bea11" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "826a3d368c8c10b8ac6f93ae80d7252d" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b29b5de4a43efcdb5bb619008ae20373" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "9a378952c05ced018fa8caf4796397e1" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bd8136c52d8fda7fc90da758f38f3342" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "31b99b800e968f96cc7534f2e399f274" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8184df997d56162b886516f50f0f0417" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3a42a60f347b10cddb3771440c8e6588" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "1916ceba1d01c5ecff4380634d65a4a1" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "024f6e5da7aaf2a1dfeeae6daa13a935" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "53b20759356f64194c95380ff17aa70d" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3e8b1546b8deba76ef3937ce131dd684" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1e1af4c106ce4e5cc1696ae4199a7e6d" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "6faf18a687778b94e67808157990b032" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f1814de9e00cfdaf5e586b93830453c6" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e9f23ff53481ac27713b330062c2c104" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7e4530a8a5776fe06e3a5ea086ff27be" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4b7c4297c8c520187c8a9a9afda6c71c" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "25d5b655884492cf22c933a02fd112b3" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c067a4f36059b4db4bf7396608883f2a" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8c086480a256460cb72eb56a3c0c91a8" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "c69df993857017b389b3d9fea0b1f049" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "71128a9124553f6a259a680a3c3db499" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "185de13decef08bd95a776f813dc8155" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3bda6e5fbdecbacaa840fc3e1f547fda" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2be46291c780ef89a1d598df81f0fbdb" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "20c506f0e01ad21443d5d336a2a4baa9" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "71c18044e5e12165998d57c36900ff37" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "ac8e6e70422aef72dde80d13c67f65fe" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "73eff313d6f611cf5649c667f371295a" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "7c95fdf4fa85c0725ff79b92a184c088" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "e6007323f675e2d7fba558db07a6aa06" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6ec89587b631800e31646bfdb5bc5f88" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "c125a1fcc96c578cf9d4dc6a7d2864d5" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1c8528b6e55fce688a4e595dcc0160de" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b94dfae9bfa09d52aece901225117ce0" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "97751805df27d56619820c9922c7b785" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "77bc36fd565e1b86d6a57b9a4d59d053" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "51d94b2220ab155861b8b86908ea0f7a" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2af60692e73619b4e5f9abcf783f8f4e" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "7c57be45e4e0296e00bf39ca7efac6c7" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7dcf8c87d377906a1917610c481178a6" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "87e1add878e39f66450cacaaedfb3251" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "1dbb46e7696970d40f49238b7b840a00" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "dcc00338c7f25a3b9564d48473f7baf2" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5ba60a6ff0e3cf0780d5803bcb82f430" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "84c6dfe25eaea2a6373dd75c51f6a620" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0489123e6f6137ab50ce9708a0272ee4" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "f148b790c2bbf44466f05888bd9de9dc" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f6b79da558269ec912fe4dd4bf66856a" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a29d810daecc8d96a77432c68304066b" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "66d56539b996e0491acc154fcb0af05b" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "24ffbaa9f87cf242f84329068fd2ccb4" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "70ed327a220d5668c11fb16841264041" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1901c888e7870b1eba4741b8512ce22c" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2fb1a436a110fb9339de0c4b0fc93d27" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ed99fe2fd8a847e425d6e05bef0c4706" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "2927ba96dfaf7df7f3e4ccbca8715abc" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "eccad5d584d3b73654472a622ca716e3" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bdbe1ba4106cfb3035dc61cb612e2704" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b66ca018234250fd89f26bf944a9b53b" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0b4c365b0443f8166b71fb66cef2919d" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c8923e728c436b8ff088e65cd72a4f10" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "09b833d4a8277bc153baff2f690edca3" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "da0b4f3db78f5a1eefb6a6942f42d4ff" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a074d2570fe56fbfe17de74954050662" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ba4db3a4acf8b972a3888b86083c46e7" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "13564399a7f928a545dfb1349ac3f7ea" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "e95d646a172cea5c58c4776ab3bc1209" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "736087971484e8cec5222c09755ae017" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d2ceabe734f24001520e0a8acb4e2a8b" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3456f293776a36418ac08f5bd0e5b45b" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f3c882134391b0c59720c2fc5871d682" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "f6804fa285916e6d3c9d29286f04fffc" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a3c0fae6971de6bce6b9b4e1623bac77" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e58dc01df563123be3e98624153023c7" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8570829eccaadb2dae4d177c33f2d1db" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "86f0c3d82274b74d3eb4602dc5342682" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "d317a861a9161bb556e33e9aea01cc38" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a330ac4d9d2cec1bd192512e185fdd5a" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5a1b0c40f56b5df3c4bc401187451413" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0865512b35a9e52933bd858094ff46cb" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "66432a7b41fd59a8e8f0f8f7eca6206b" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "6fdd3533be45defa5e75c256f907381e" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "397a03451b647218e9710358c5877057" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "afee7d795fafeeae991838c70b665647" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0f9098e9f7b52bd16941ed1bf9f5ca28" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d0c7519835bd1c9ef0412766d0d1ad19" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "3c1a74ed9bca0856c7dd2f252e70d3da" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "95c4d299ff22d3500dec3983a07c39b4" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f7debda3a1d31a083339f6432b1ee1dc" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "e9bdd1217c5add834fb6c572c3c66d28" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6b2b5a28c3502102cd5e3448c77a24d9" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "ce1ae6ae0365737e9f96acc5347520be" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0141110c95d0ecaa94f21a3a6592a026" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f77fbf1445a49a25a9a84ac325a664f7" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8357d437c1aaa044f2af458c9ddb8248" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7c55ca2ec1e47937d8b981f7524882ae" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "2d36d22941478f2f4802d08e8fbbac81" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c5d90519ea4592ee6356270e42ef972b" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "97f4cb0513be05f795c54bcadc08aefc" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f9900e531cd38100bdb084395bb2b901" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1928f5094562e4aa706bb3f15e09f216" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "f7e8a225edef6708529e22ee76c9463a" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b76af6f02dceea9aa6bd311ec3617ab0" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "6ff51b497172ddcd509dc666818e903b" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "189e7edbfcfde3d1aa71adeffd730e40" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1c49f906ad8b33ab8202b5ee293ea0ea" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "c53507210822247ac5df57112f39a4c8" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bda00dfc7464596c67ca7e22c33fec99" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "90ce6dcbc0cb48053e91e71c10b6a9f5" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f2264adf18f7cc9f7d1f4eb3c7574763" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d4a8481257f27911ea9cadf20d4edb6b" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "2252c899ac5571a80e1adb554aa53659" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a773ed76106a2b58a07c0cfd4d9fd037" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "aaa59d82b486a79558e99ab3534df498" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "de14101fbab2aaf691d969407faa918d" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0e3e0b76c22437ecfe6c232397af3389" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "976c41cb5be2bcc399d11a7efb4c5dc4" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "73aa63921e433ea312c7b7d984b6d33a" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f7d9eec915fe8c0d9924e47acaf9322c" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 18112512, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18100224 } ], "md5sum": "f6e976af9cf3451b065b45d9fcb2bde7" } ] }