{ "attn_dropout_p": 0.0, "d_model": 832, "ff_dim": 2048, "ffn_dropout_p": 0.2, "learn_te": true, "n_heads": 16, "n_layers": 12, "resid_dropout_p": 0.2, "s1_bits": 10, "s2_bits": 10, "token_dropout_p": 0.0 }