| { | |
| "H_cycles": 1, | |
| "H_layers": 8, | |
| "L_cycles": 1, | |
| "L_layers": 2, | |
| "act_epsilon": 0.01, | |
| "act_threshold": 0.9, | |
| "architectures": [ | |
| "TRM" | |
| ], | |
| "depth_H": 2, | |
| "depth_L": 2, | |
| "dropout": 0.1, | |
| "dtype": "float32", | |
| "expansion": 4, | |
| "halt_epsilon": 0.01, | |
| "halt_max_steps": 4, | |
| "hidden_size": 32, | |
| "model_type": "trm", | |
| "num_heads": 4, | |
| "pad_token_id": 0, | |
| "seq_len": 4096, | |
| "transformers_version": "4.57.0", | |
| "vocab_size": 1183855 | |
| } | |