| { | |
| "metadata": { | |
| "ParamSize": 269, | |
| "ParamBytes": 1033572352.0, | |
| "BitsPerParam": 4.501551474039708 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 155582464, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_weight", | |
| "shape": [ | |
| 151936, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 155582464, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5f0930c47ad64e377619c89161b663aa" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28901376, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_scale", | |
| "shape": [ | |
| 151936, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19447808, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19447808 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19460096 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25751552 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26537984 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28635136 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28897280 | |
| } | |
| ], | |
| "md5sum": "5371b7a6a7e287b08a0c195f974cc13f" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "37d71f502de2d720ccbf13e3b371015d" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "4f3692401592b8e8a5fbde1bef304564" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "7980eef9472808e7b64af041116f1f1d" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "579a592fb66cadb1444be6548237ed13" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "3c10f26729dd19846343e9f21e55ae2c" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "5420912553ba88ad446a4b835d41f475" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "4617b1c4f7ca83e64c71f10aa2e85eec" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "6d1a5c51473dd393bcb2fd80c2aca882" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "f362dcc35078d3f4c867f5e1ea09e301" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28483584, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.ln_f.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19025920 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19038208 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25329664 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26116096 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28213248 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28479488 | |
| } | |
| ], | |
| "md5sum": "024a22e34e5ec7b319d8083a87b60f7f" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "953d1ef771511ead595bc8c669fc0564" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "9e55c1655d55defc08e03f1f53f99fb5" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "e486572ccf92b5d21ce3b5679f0ba5cd" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "ae6db91f0845607cd55e78bd554bd3e2" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "31bfe020f99696bb11083543b8f0251f" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28483584, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19025920 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19038208 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25329664 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26116096 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28213248 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28479488 | |
| } | |
| ], | |
| "md5sum": "e381c9c430b79af07c3ba8809f56e1d6" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "9d9905aaef1bd4351d6f986c41ba696f" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "95279dd22fa9df3b0dcb955f056c2ab3" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "7d417f7af22d7185993fb6331921932f" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "1bbd8c6682ce7ed6130d8453521b60d8" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "832a89b950981b1d2c809302e89a601d" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "e70feecba6d72e6b763400d92dcb6927" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28479488, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_attn.bias", | |
| "shape": [ | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12288, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_attn.q_weight", | |
| "shape": [ | |
| 6144, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_attn.q_scale", | |
| "shape": [ | |
| 6144, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25325568 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 26112000 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 28209152 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln_1.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28471296 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln_2.weight", | |
| "shape": [ | |
| 2048 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4096, | |
| "byteOffset": 28475392 | |
| } | |
| ], | |
| "md5sum": "8aabd29f06ca59ad232b8581feafcd69" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 155582464, | |
| "records": [ | |
| { | |
| "name": "transformer.wte.q_weight", | |
| "shape": [ | |
| 151936, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 155582464, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ad0fb00af46897d301560f79b827841f" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19447808, | |
| "records": [ | |
| { | |
| "name": "transformer.wte.q_scale", | |
| "shape": [ | |
| 151936, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 19447808, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0773ce91c9bc2eda490f9005b9ba398b" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19021824, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mlp.c_proj.q_weight", | |
| "shape": [ | |
| 2048, | |
| 688 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_proj.q_scale", | |
| "shape": [ | |
| 2048, | |
| 172 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 704512, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 11008, | |
| 256 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 11272192, | |
| "byteOffset": 6340608 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 11008, | |
| 64 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1409024, | |
| "byteOffset": 17612800 | |
| } | |
| ], | |
| "md5sum": "a714d816d7b0350d4c375aaacbb392c1" | |
| } | |
| ] | |
| } |