| { | |
| "metadata": { | |
| "ParamSize": 293, | |
| "ParamBytes": 812572672.0, | |
| "BitsPerParam": 16.0 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 102926336, | |
| "records": [ | |
| { | |
| "name": "lm_head.weight", | |
| "shape": [ | |
| 50257, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 102926336, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eec32f50280ffcd1a236c7e7dbd83cb6" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 102926336, | |
| "records": [ | |
| { | |
| "name": "transformer.wte.weight", | |
| "shape": [ | |
| 50257, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 102926336, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eec32f50280ffcd1a236c7e7dbd83cb6" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27293696, | |
| "records": [ | |
| { | |
| "name": "transformer.wpe.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 2097152 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 2099200 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 2101248 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 8392704 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 8398848 | |
| }, | |
| { | |
| "name": "transformer.h.0.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 10496000 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 10498048 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 10500096 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 10502144 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 18890752 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.0.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 27287552 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 27289600 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 27291648 | |
| } | |
| ], | |
| "md5sum": "112a664a1110ab79703ac38f87b1fe8f" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31490048, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 6291456 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 6297600 | |
| }, | |
| { | |
| "name": "transformer.h.1.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 8394752 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 8398848 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8400896 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 16797696 | |
| }, | |
| { | |
| "name": "transformer.h.1.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 25192448 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 31483904 | |
| } | |
| ], | |
| "md5sum": "c8425e929b9cf76cb3643623b218aae3" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 27295744, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.2.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 2097152 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 2099200 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 2101248 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 2103296 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 10491904 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 10500096 | |
| }, | |
| { | |
| "name": "transformer.h.2.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 18888704 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 18890752 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 18892800 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 18894848 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 25192448 | |
| }, | |
| { | |
| "name": "transformer.h.3.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 27289600 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 27291648 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 27293696 | |
| } | |
| ], | |
| "md5sum": "116731931ee1771b82ec10524a88cf3c" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.3.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.4.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "fb5c75c0b282fef20b386c538b4c7c3c" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.4.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.5.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "ea4bcbd38e010438180664d3501567da" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.5.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.6.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "ef7889578a63dab6c66b4c070159d182" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.6.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.7.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "7d3b89d1dccc117d0b7ad85fc5edd64c" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.7.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.8.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "52a567337e48aae8179a77c111464d1d" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.8.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.9.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "ef47f0009672c223059f783172e0f815" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.9.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.10.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "fdf1efa1d84efed4dc338323e5693d04" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.10.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.11.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "7926ce1b2972cc186b5d2c1470acfd37" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.11.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.12.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "3b05c244b0d40fb843b1de4fffbf15dd" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.12.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.13.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "bcb2f7f911485a1452aa000e2e2c0e9b" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.13.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.14.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "4722a67d2e51a955ef61a5d03c3bcf94" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.14.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.15.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "35ff9c86676c975dbf63b9912ab81f33" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.15.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.16.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "01df1b9923d63e9d0f9a560804782440" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.16.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.17.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "92f106c3b756a5a5e926e43fdb5741f1" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.17.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.18.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "983dbeeab7f0141aa067be28562ea043" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.18.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.19.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "1057439dfb17bfcc20ba72d11a21434f" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.19.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.20.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "e45f78377a8fdd0c1f43506acd580d6e" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.20.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.21.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.21.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "ba1df963ea81fc211f28da7fee9a9165" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.21.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.22.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "e25144d1411e5b0a29c5c8d9c35dd029" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25192448, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.22.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln_1.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln_1.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_attn.weight", | |
| "shape": [ | |
| 3072, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6291456, | |
| "byteOffset": 16791552 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_attn.bias", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 23083008 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2097152, | |
| "byteOffset": 23089152 | |
| }, | |
| { | |
| "name": "transformer.h.23.attn.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25186304 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln_2.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25188352 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln_2.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 25190400 | |
| } | |
| ], | |
| "md5sum": "9c68c3d6b3ce48e1f4b5bebd16de3f13" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 16791552, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.c_fc.weight", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.c_fc.bias", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.c_proj.weight", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 8396800 | |
| }, | |
| { | |
| "name": "transformer.h.23.mlp.c_proj.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16785408 | |
| }, | |
| { | |
| "name": "transformer.ln_f.weight", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16787456 | |
| }, | |
| { | |
| "name": "transformer.ln_f.bias", | |
| "shape": [ | |
| 1024 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2048, | |
| "byteOffset": 16789504 | |
| } | |
| ], | |
| "md5sum": "5529160aa774a49a742e440f2113dbd1" | |
| } | |
| ] | |
| } |