{ "metadata": { "ParamSize": 565, "ParamBytes": 12515094528.0, "BitsPerParam": 4.5003588810423265 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "lm_head.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ce0c068ef2f48f37f5966e18b1360610" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f08bdb06d4f1ed9eaf799049f008553f" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2363ef422c4b0ab0d67dbebfc5150a68" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5a9c97bb581a522dd3632139d65634b1" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "lm_head.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25165824 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25178112 } ], "md5sum": "298fb5bacb741b17510fe3880b7515a5" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d91be4d193dd78605fe3357abbb86c1c" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "30e20c0c84a8776e630b9b0facfb3710" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "bc493af6ca65264e77f53da8b51e06ba" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4eca35b1237dad2aabe41d7af5639f6d" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "428e386456ee7d8090eb8a943f0bd6ae" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ef2cb1fccb245192622e82e0437aa3b0" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "073014927467369c2e46b22f38d6bcb9" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "d6026d3f021801690c8ff5d3f607d2b1" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "953ea9f59ed817e38bd73476df4029f7" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "2bb768c9cd529f97858d28a5056c642e" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "26057d45d4f9022a59a0217383298b75" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e1a0f9abbcbcaf8fc9859504175618e2" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "730199342ebd71d245e55103ca02d59e" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "33a2f352e0305994d23ee18751b73888" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "2267852523194782b9bd940535f4df8f" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "cdbe491cb5c54dcb81d3626b1ce0e34c" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d2a6f4cb11ab05f0a8b9c628b3276ab5" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "db1abf9eb17ca251ae7085de7a2a0eb5" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "792848156707fb2dbb7ea67b06ced820" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "fdf44153075d8464abed27afb647fed0" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3470e2999d19c7a7af9bb71bf4ef2f5d" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "9fc1d12cd17fe70d7c61313c30dbce0f" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8674a30e873d9dae6867ad873c8a139a" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a0cef349d667e873f4997d65e32d849d" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "478184f2af13076b5ca1c7e8b08f026d" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "519459c853c0b0d3af6fb4c905583bfb" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4c1e8bcde6e589f2b8f41b38ca4eed39" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "b7ee0ed30a899e52df71d8ca722b8480" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a495d3c682255416eea2d413fcd80550" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "d36cf07e8655bfaa4682ca7ee42dd43b" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "93c34659cf4ca5114ba50ad2eb9e7e7c" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "547d7117202ea436f39691bef94d4b0a" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "b8665d9ec0be3d8764c9622c9aee8677" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a0b9afec3ef8e0fc1e05f64a94a85b5e" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "aaed173d4127459ffb283651fd2cc0ea" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7722668ebe84a227bab880731b5fdf46" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "04524c370f0145c5b7c769cfa9d79813" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f6a4f8cd3aa3f7d7626bf43cd26996c2" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e4b92e158d0a78423d2304d568291815" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "b7b4e0b720a218128fb2b332d4cdd593" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "23112645b6a973a32acff20cfa9d4dc0" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a26c907decae588d98bcdd6f681861de" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9a0ab807d5ac04250cb93e72b480070e" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5b4110db9ae4144cd1010f082856a7bb" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "050afeca78e418c968b3fb516e3e9432" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c451e3b28e53dd0aa1803adf4ae61838" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "09445620e96e45b097726abfd05574ba" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "c070ae5c1e8bb68c8540ca7ef3b66306" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3115995ac0820e8a9b3748be0469b2fe" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "e41f018c18eb42411fcac02fd39d0f01" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "97c1d9c1cde30ec9b70de1a0b371b118" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "6a669784caa2286121ea80f7bf334fce" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "693af78147f319d52cb3776f82a0f026" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "2e027efd1df11a32312dc0972500cc45" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "b0419486864eb109b59823188e6e9e2e" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "199a360952eb7d06290043e038566d82" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8f5e2121ff0ed7a06028c0f3087cd7f6" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "b3447a3a0f479a39fd91e26af34fb443" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ea401ba90059696cbb31695efc0c5160" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "da489a07de15d354682b419b3f09ac5a" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4bb614981624920d7e6eb74fd66af7ac" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "97a962ed0ca198493ab322962ed50473" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0b8024b95329a7813830656380e925e5" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e0221a69cc1e1d99ce4f4bce41fbf63a" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "dce3874c16784eb7079853527159b3bd" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f0ab85846135b3ad7d58a9463089f4f4" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "63f2a2ae114445048d6775955efa2b8a" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7197f98ffd3a13f1da85051e55eddc52" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e40f8af7b8b417950120a873e54ed7b3" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "5208502b4223f903c8709636bb2ae715" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2f9bff20a8cc0f113f160c65765c3ccd" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0218aaacc47b58de16d6f290ca2a7fcc" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7c204867f11b04bcbab18352b72f0ea0" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4eb6c59370be4e22f6ff0ab2b2225f9d" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "c51826002c4b9fd983fbb744ddf70ba3" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2eb3fc1f11a19566fc0c800300d8ccee" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ef19d9dba0e663df245b75f4b1ffd6d4" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ba8dba27ffecfbf45cc19b4f894c4f7a" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b0fd60ba504daee4fdc37213f5401a99" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "b8a9fd862757f34551e89cc98b51eb34" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8181aa304720f56e2b1f5d39ebee3678" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "07475fe12ff53b8f974e7e9802f9060e" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "418e46722cd9ee75a57359d80f87ee70" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fe338881b64bba40b1f6e271317b3d8a" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "9665950c949fc934983d67a9a35f7838" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "61db4fa27bc4455d0657233b6f0fcc8f" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ee7f8a8db918a4ba737dea1a8a277440" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "1b7f15edca0e079db2c93fc1d74f7f43" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5ab225b0926018711d2b518c270fc904" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "3f1e285e2fb3421706d88138fb47a9d7" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "125efb494d6c8a8c072e5bb9bfca0746" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f1c0b826564c66494939887d730d1c08" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "1a5a944c142f776dba0c0d4d42e91e20" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e5b20d10ab9e79fd88b76bc24573796e" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "11e4cd1c2528dd659025463a69e33497" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9cfa9ff9fcc0758752558418ff463e03" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4ea75083f3a02c97c426aded4e4087a3" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a118235867b11006e147ade73623af86" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "29ca6df71a704841ec24f4539513ad50" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "c2baaf30138ee607746e9f206aa1a001" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fbd1f8b6042f7ab6e31cc8a17d549b79" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e22b50b789c66edaa03321583f699e62" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7ff723cc80f40f55f8b302a4c3ab8d6b" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f10f8439972c3cc4b34028459dcdb471" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "f2ba81678293733dc1c28233badd05d4" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "db02380dade4b8241282e46c7c808ffe" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "da2977d7fade1cf47cf4531b689b831a" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8248008196abea2ccf3ca6aecb9e0074" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d9c474bffde0d5cca9dd109ca6627e26" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "35c3fe2904495350a337d7790119f541" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "76817e0d5635e123b798a8ca19c82cbf" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b40e24ba4f2ce28697ae3c565bdea6b5" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "785ad5c0d834fa11f7dbc79bd8f6f66f" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1b189a3ddb5bc9518b0da4627c1dca56" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "7acf2d54df708f139ae6ab1e2ea838b2" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "82c7d90eb2729ed10e1c7cfd5cae4106" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "45950028c99eb919b26ae4aeeb1958c5" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "710e7f7a2b342e1fa1e4befc20081ebf" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "603a40fa2b7f6bc99ad8a5a253e644bc" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "de4f7e8c3bf3a806c9da5a2ea9bc18d4" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c9355f0190ace6e7b0d702ef793fabb1" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "68c776d8a7a8205b470763d600e51ae5" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "219c1784f737a12e46bd42694310eada" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7a05b703bd6b86b5a7aa4f35e74851c9" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "06f05dc326f6598db38fedf569d9bf25" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7b1a803fb815281c433dd1674adddc55" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "50c1735622dfe6be795696a2f16cb827" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ea3526a5f819a2b0e59f1bb8ec85d4fa" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "827fa55fb53be18fbabcdff91a30c9ad" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "44b277e9501876c227de24e11e611e85" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "db9d45faef78417c65bb9d8960904e79" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "11deab4cca55880c653c8ec62bc10339" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "e16f7fda1400cd3336db9fc3835b0e89" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "bf4efb05232d18bfd48111ff0b3fe738" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "887465646eb2e99316793bd43972a539" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5bb4fa09f7c94c21c30e45b743cf40fd" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d6b1cf5b346ddb9e7ee8bbe94cca5d96" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8eaab1743e293e90a5a316ea97add52b" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "37619efbd382a27974d1a64154b5758c" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "52de9a81c715e40fbbd443bf209dedfa" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3e380163f1b639cd26bd8975487ae9a8" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "1397ea91fdbaf6b06636fc0cf8a613da" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "33806f5e52e4afedb20a41d91f610149" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5931472ab3a7d911322ccd66e0d1a531" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "ef860fcdd8797847b11562b0990d8d5a" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "11478e5857e79685b5067261bf979bae" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "95ae4704090ffe593898e9bb7a7d2d3c" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "b3396f36050e36ae7a52e8ede009008c" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3b2688cd2564d57f946338f970a61daf" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "bf768e6b993bed17abe2ca61eafb693f" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c22a227f304183fc26bbba99ec7b7227" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "83ea6c4d79a43c3d26ac795be702bc78" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2436a6046e890cd8e480e6adaa7ef132" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ccab2268d4695731cc1e3d131f293be6" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "e4a282a3a99f855a95d59d108f989326" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2a35edc630d271535ec651b51b547d76" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "430963aad55ebe53acd062d4cc568de0" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "18a0eef99916817f0bc5b4de974b4560" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ceef9c47348e36fa7c3a1da134113aff" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "6d4b51668be19dd96317078ccf393743" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "06b743c0206156bfe76b9f53a19c528b" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "13772e7f5295e7b4422c4ba37b221f47" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "79773deab149196da1d6979dabcdd929" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "33b4801c3d0d6201b454d76f0b5b72d7" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "fe2adf06e414fcde594902db0e5eb22a" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "73a001b50d59cf95f29b4005aa94b788" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "dfa758c38ba5434e2390e5489f86e478" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8bd4f8f6713b04fdd365bf0644f3497d" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a7d2c6c5ad793bc1256df4f115587069" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "6fd8dedbb494afd062e689dcc739b175" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e2fd8ca0b2b76064efacdc16944058ed" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "1dcbf225d6435aa0a187caf7b44d71ab" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "44ad8b2f7ee8f76b06de953e34ca4bbf" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d89d33acef9225e9ea3e19b84c0f17ef" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "373d97424adbf029453c38e22a485a54" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0b9ad61ee5c11518c559490c2823147c" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "eea28fef41fefa9b07a630d3f7de9b51" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "57f0c675639c6cbb33131f278e72f024" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8452ba168743ad1a48b8217d6fb6af2b" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "f542c90a3fe68b59552f6be127d3d836" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "462ca986d2b77f8694879064e64595fe" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4e0060c903fc068eb551d9bdb3077825" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ce049bbc69344abf64df1980cadf8dbd" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1d6c91a1df28b361861d91ee69c2be6b" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "5f646c0ada599495c16ac1d418f50816" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fb5c6461c4b586871b91bf913d9db36e" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "784f50a21a0405ed13b9fc84fd6b8aa0" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "e562a925ff64aac0a675972e1b5b1f5d" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ff7963e549805112d350f31b124bd9be" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "21606fac9ef6aa9de89d4573f0008f5c" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6dfbb7f3716f063d4a03c6ad9822c940" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "466aaeb860ee2faeea782f5487966621" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5ce91ecaff72d49e3a09be4d693b8464" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ae859226d0e42fe50c0aaef03d19d9d7" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "dfee214849c90a25354c53d68585e1dd" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "063aa6129f14ae8e89ff042796ef9aee" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "434591e5ed0e01f0c37f9365d183349d" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "c0737898fd40dfa687283df92fe6b142" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "07bb34f5a1bc6600ae4f902342a6bcca" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "b12660b1a90ddfdef482a6fef0fb72b1" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d6b39300eb60ea6d0c48eaa024b06c48" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0e4da05ccb2346ab3fdf3c79ca7611e5" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "82c7cb906b89e56e2f3d191bbe9cb256" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e154116e104f9494128cf8ffc69f2192" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "8a608085700fb4587e7f23854c170d0a" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fe2edae86c9bc5ce3ad7716db1b2899a" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "89976cbef29360b169caa9493bf998bf" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "d67fd5a42e840ac560f135a70e4130ec" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "842ca6ff6e8674d1c463b18d72d52b97" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "91f43497a53647a9a3540557a2cacad5" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "18240ede9985517d86630c042575ece8" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f9532e5433c372ec70c49881ca4af0b3" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "d5a587a41c0fecb0b773a6c69cf8a717" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1fd1451063e8bab2ba30bb42faee0f7b" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "c0d9ba902a160e8a4eef39df9e8ff4db" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "610d2fc4fad2fced713dc29686925020" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "bae5510e8532f459c8b5c729ff465607" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5024ce4e2421cfb3fbfe671032ca45af" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "cd30f73d15f34c71ad3a43531315548e" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "c0247afcdd059cfe57c520556f434c0f" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f2a61fd8f3726ffd669237abf92df8a4" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2b74fc6a24b1790f408c96f8ecbd0c28" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "112245a372e3e7e7d41ed8c4975ae521" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0e38d624552f28544c65aa545e52dcde" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "152e67b9b76d29bace72ac8a5ca95a99" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "594be5ede0f63b0b6b4120bd101eb21d" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "3f1a35abfff0a92769caab73556f88f3" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7af591277bfe2cdd5aa27a47c89342e6" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6d55f7589988a9e6b0f57ef95268be16" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "938bc5a7a1f8bca8a2af74b7458e2ed0" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3b0199f0932b4fe3006936537240d939" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e25a3e05eca77b805f811f7e7b4b8abe" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "e34529eb01a43a952e4848b90e0aa40d" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ed406e20c4a4d8a3d5218e80a586775c" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "f281f40ad164a4b1a6996701ae21457d" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "959c0f2bd065609a8338a8c3ed941cf3" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e911c97de27df773d8848adc97c669fd" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "512289fe17bcac3dbfa6b6fd0fbd2f11" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d077beddf73062e27d26cf7b5ffbb0e6" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "4b53840f892a09d63bed5f0b128564de" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3e51ce212a0694f7bccf8693622b9681" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "285ac4974bb285a315b4e9b912229be2" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ec06f2c56b7e47355ec7d48c81e2c7b4" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7425ad931ca8f8be5c1ec3b0984683e4" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "d58a8703e2a93240069510db2a617135" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5e8c39df2c10260cfda9066b150a2c6b" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "cd5c8134fa7df334ce234e17bb6ae720" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ed390bd827ba657d153a09fcea7dae0e" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e9c95a7fb7bb6567120525d07303b7a9" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "bf7d21733cbccad934f5f65f06cae5c8" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6c1d70d41f99e3162f0291e5513810f3" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "9fef4f482143369864a70fe390e379f3" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "400cb4b22443525832fae8a4ece3d8a6" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "652a4dc172fbfb300327cca7e29acee9" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "6da2d806737fc14e130e80b29e55ca03" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bac4159355d333ea87a1028a4d5c770d" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "75a03fe2c5f826991c128faccec0c9b9" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ac8524480e386871993bba5979521b87" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "66d24249d3fa745a39c4eab99f3e0338" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "9c1d79b2e47f392d712caf7bb4a9889a" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "472fc0c8d19f3dfe09bd7e883dc46b15" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f6a3df5057d6339adfd66b6c996a2424" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "878f71e5085958a6d4c23de29d3d9fff" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4220ecd5b4201d376eda959b13d23f04" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "78fdce46be7af421cf96a90b0e37ba9e" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1ae588b500aadb5e79c39073bbd2715d" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "25dff87c6ca9e5b3c07a6ee62ad636bd" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "6a5d6706e99f3bbf39482ab45183f321" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7456d1c192bf7b6335a9b729405e4376" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "fbbb363394d763d68f0d6603ccf5bd8c" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c7a54ce3da6bc1767379aabb7d28de88" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0652b4b9b2904b1a09dc91dbe1e15955" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8f3fe8aa6ef44f87a8571252a3d49e9f" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "cdcaa055adffb5c3c4af043d25344071" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 6144, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18112512 } ], "md5sum": "4a4111e561ac4796111b89d51719e6fd" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5d7543e26b883f178df927dc3d221b9f" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ef3b0959f521af2d83a88519d88ef3dc" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 18112512, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12595200 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15740928 }, { "name": "model.norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18100224 } ], "md5sum": "25db8b50be17b793eb9cd385fbad12c2" } ] }