{ "model_type": "phi", "architecture": "MiniMax-M2", "vocab_size": 51200, "max_position_embeddings": 32768, "num_attention_heads": 16, "num_key_value_heads": 2, "num_hidden_layers": 36, "intermediate_size": 8192, "hidden_size": 2048, "rms_norm_epsilon": 1e-6, "rope_theta": 10000.0, "pad_token_id": 50256, "eos_token_id": 50256, "bos_token_id": 50256, "torch_dtype": "float16", "model_specifics": { "total_parameters": 3090000000, "non_embedding_parameters": 2770000000, "embedding_parameters": 320000000, "parameter_percentage": { "embedding_layer": 0.104, "transformer_layers": 0.793, "layer_norm": 0.003 } }, "optimization_config": { "quantization": { "supported_formats": ["fp32", "fp16", "int8", "int4"], "recommended": { "memory_optimized": "int8", "performance_optimized": "fp16", "memory_constrained": "int4" } }, "memory_requirements": { "fp32": 12.0, "fp16": 6.0, "int8": 3.5, "int4": 2.0, "runtime_activation": 0.5 }, "inference_optimization": { "flash_attention": true, "gradient_checkpointing": true, "mixed_precision": true, "dynamic_batching": false } }, "training_config": { "base_model": "microsoft/phi-2", "context_length": 32768, "batch_size": { "train": 8, "eval": 8, "gradient_accumulation": 4 }, "learning_rate": 1e-4, "num_epochs": 3, "warmup_steps": 1000, "max_grad_norm": 1.0, "weight_decay": 0.01, "logging_steps": 100, "save_steps": 1000, "eval_steps": 1000 }, "specialization": { "primary_languages": ["javascript", "typescript", "xml", "html", "css", "mdx"], "domain_focus": "web_development", "on_device_ready": true, "memory_optimized": true, "context_extended": true }, "evaluation_targets": { "mmlu_code_score": ">60%", "humaneval": ">40%", "codebleu": ">0.65", "syntax_validity": ">95%", "semantic_coherence": ">0.80" }, "tokenization": { "base_tokenizer": "microsoft/codebert-base", "tokenizer_max_length": 8192, "special_tokens": { "javascript": ["", "", "", "", "", ""], "xml": ["", "", "", "", "", ""], "mdx": ["", "", "", "", "", ""] } }, "dataset_distribution": { "total_training_tokens": "500B", "language_distribution": { "javascript_typescript": 0.35, "xml_html": 0.25, "mdx_markdown": 0.15, "css_scss": 0.10, "other_languages": 0.15 }, "task_distribution": { "code_completion": 0.40, "instruction_following": 0.25, "code_explanation": 0.20, "generation": 0.10, "debugging": 0.05 } }, "quality_metrics": { "data_quality_threshold": 0.85, "duplication_rate_max": 0.05, "language_accuracy": 0.95, "syntax_validity_min": 0.90, "semantic_coherence_min": 0.75 }, "deployment_config": { "target_memory_gb": "6-12", "quantization_strategies": { "mobile": "int8", "edge": "int8", "desktop": "fp16", "server": "fp16" }, "inference_time_target": { "512_tokens": "<100ms", "1024_tokens": "<200ms", "2048_tokens": "<400ms" } } }