| { |
| "module": "keras_hub.src.models.smollm3.smollm3_causal_lm", |
| "class_name": "SmolLM3CausalLM", |
| "config": { |
| "backbone": { |
| "module": "keras_hub.src.models.smollm3.smollm3_backbone", |
| "class_name": "SmolLM3Backbone", |
| "config": { |
| "name": "smol_lm3_backbone", |
| "trainable": true, |
| "dtype": { |
| "module": "keras", |
| "class_name": "DTypePolicy", |
| "config": { |
| "name": "float32" |
| }, |
| "registered_name": null |
| }, |
| "vocabulary_size": 128256, |
| "hidden_dim": 2048, |
| "intermediate_dim": 11008, |
| "num_layers": 36, |
| "num_attention_heads": 16, |
| "num_key_value_heads": 4, |
| "attention_bias": false, |
| "attention_dropout": 0.0, |
| "rope_layer_enabled_list": [ |
| true, |
| true, |
| true, |
| false, |
| true, |
| true, |
| true, |
| false, |
| true, |
| true, |
| true, |
| false, |
| true, |
| true, |
| true, |
| false, |
| true, |
| true, |
| true, |
| false, |
| true, |
| true, |
| true, |
| false, |
| true, |
| true, |
| true, |
| false, |
| true, |
| true, |
| true, |
| false, |
| true, |
| true, |
| true, |
| false |
| ], |
| "layer_types": [ |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention", |
| "full_attention" |
| ], |
| "mlp_bias": false, |
| "layer_norm_epsilon": 1e-06, |
| "max_position_embeddings": 65536, |
| "rope_theta": 5000000.0, |
| "partial_rotary_factor": 1.0 |
| }, |
| "registered_name": "keras_hub>SmolLM3Backbone" |
| }, |
| "preprocessor": { |
| "module": "keras_hub.src.models.smollm3.smollm3_causal_lm_preprocessor", |
| "class_name": "SmolLM3CausalLMPreprocessor", |
| "config": { |
| "name": "smol_lm3_causal_lm_preprocessor_1", |
| "trainable": true, |
| "dtype": { |
| "module": "keras", |
| "class_name": "DTypePolicy", |
| "config": { |
| "name": "float32" |
| }, |
| "registered_name": null |
| }, |
| "tokenizer": { |
| "module": "keras_hub.src.models.smollm3.smollm3_tokenizer", |
| "class_name": "SmolLM3Tokenizer", |
| "config": { |
| "name": "smol_lm3_tokenizer", |
| "trainable": true, |
| "dtype": { |
| "module": "keras", |
| "class_name": "DTypePolicy", |
| "config": { |
| "name": "int32" |
| }, |
| "registered_name": null |
| }, |
| "config_file": "tokenizer.json", |
| "sequence_length": null, |
| "add_prefix_space": false, |
| "unsplittable_tokens": [ |
| "<think>", |
| "<tool_call>", |
| "<|end_header_id|>", |
| "<|eot_id|>", |
| "<|im_end|>", |
| "<|end_of_text|>", |
| "</think>", |
| "<tool_response>", |
| "<|im_start|>", |
| "</tool_response>", |
| "</tool_call>", |
| "<|start_header_id|>", |
| "<|begin_of_text|>", |
| "<|eom_id|>", |
| "<code>", |
| "</code>", |
| "<|python_tag|>", |
| "<|finetune_right_pad_id|>" |
| ] |
| }, |
| "registered_name": "keras_hub>SmolLM3Tokenizer" |
| }, |
| "config_file": "preprocessor.json", |
| "sequence_length": 1024, |
| "add_start_token": true, |
| "add_end_token": true |
| }, |
| "registered_name": "keras_hub>SmolLM3CausalLMPreprocessor" |
| }, |
| "name": "smol_lm3_causal_lm" |
| }, |
| "registered_name": "keras_hub>SmolLM3CausalLM" |
| } |