| { | |
| "training_metadata": { | |
| "timestamp": "20251016_174948", | |
| "training_date": "2025-10-17", | |
| "training_time": "00:42:52", | |
| "final_epoch": 3.0382470119521914, | |
| "total_steps": 1431, | |
| "status": "completed" | |
| }, | |
| "model_config": { | |
| "base_model": "Qwen/Qwen2.5-Coder-32B-Instruct", | |
| "model_type": "causal_lm", | |
| "architecture": "Qwen2ForCausalLM" | |
| }, | |
| "lora_config": { | |
| "r": 64, | |
| "lora_alpha": 128, | |
| "lora_dropout": 0.05, | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj", | |
| "gate_proj", | |
| "up_proj", | |
| "down_proj" | |
| ] | |
| }, | |
| "training_config": { | |
| "num_epochs": 5, | |
| "per_device_train_batch_size": 2, | |
| "per_device_eval_batch_size": 1, | |
| "gradient_accumulation_steps": 8, | |
| "effective_batch_size": 32, | |
| "learning_rate": 5e-05, | |
| "lr_scheduler_type": "cosine", | |
| "warmup_ratio": 0.02, | |
| "weight_decay": 0.1, | |
| "max_grad_norm": 0.5, | |
| "bf16": true, | |
| "gradient_checkpointing": true, | |
| "optim": "adamw_torch", | |
| "logging_steps": 10, | |
| "save_steps": 50, | |
| "eval_steps": 25 | |
| }, | |
| "dataset_info": { | |
| "train_samples": 15057, | |
| "eval_samples": 1674, | |
| "max_seq_length": 8192, | |
| "sample_packing": false | |
| }, | |
| "hardware_config": { | |
| "num_gpus": 2, | |
| "gpu_model": "Unknown", | |
| "distributed_strategy": "DeepSpeed ZeRO-2", | |
| "flash_attention": "2.8.3" | |
| }, | |
| "performance_metrics": { | |
| "final_train_loss": 0.3949, | |
| "final_eval_loss": 0.4636613428592682, | |
| "final_train_perplexity": 1.4842357599234954, | |
| "final_eval_perplexity": 1.5898844535357601, | |
| "final_token_accuracy": 0.8872479304671288, | |
| "initial_loss": 1.724, | |
| "initial_perplexity": 5.606911313988792, | |
| "initial_accuracy": 0.5987553134560585 | |
| }, | |
| "framework_versions": { | |
| "torch": "2.4.1+cu124", | |
| "transformers": "4.57.1", | |
| "peft": "0.17.1", | |
| "trl": "0.23.1", | |
| "deepspeed": "0.18.0", | |
| "flash_attn": "2.8.3", | |
| "python": "3.12.3" | |
| }, | |
| "special_features": { | |
| "flash_attention_2": true, | |
| "gradient_checkpointing": true, | |
| "bf16_training": true, | |
| "sample_packing": false, | |
| "deepspeed_zero2": true, | |
| "distributed_training": true | |
| } | |
| } |