| { | |
| "model_type": "Qwen3", | |
| "architecture": "Qwen3-0.6B", | |
| "training_dataset": "TinyStories", | |
| "vocab_size": 151936, | |
| "context_length": 40960, | |
| "emb_dim": 1024, | |
| "n_heads": 16, | |
| "n_layers": 28, | |
| "hidden_dim": 3072, | |
| "head_dim": 128, | |
| "qk_norm": true, | |
| "n_kv_groups": 8, | |
| "rope_base": 1000000.0, | |
| "max_iters": 200000, | |
| "tokenizer": "gpt2" | |
| } |