AdityaNarayan's picture
added training_info.json
c03dbc6 verified
{
"training_metadata": {
"timestamp": "20251016_174948",
"training_date": "2025-10-17",
"training_time": "00:42:52",
"final_epoch": 3.0382470119521914,
"total_steps": 1431,
"status": "completed"
},
"model_config": {
"base_model": "Qwen/Qwen2.5-Coder-32B-Instruct",
"model_type": "causal_lm",
"architecture": "Qwen2ForCausalLM"
},
"lora_config": {
"r": 64,
"lora_alpha": 128,
"lora_dropout": 0.05,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
]
},
"training_config": {
"num_epochs": 5,
"per_device_train_batch_size": 2,
"per_device_eval_batch_size": 1,
"gradient_accumulation_steps": 8,
"effective_batch_size": 32,
"learning_rate": 5e-05,
"lr_scheduler_type": "cosine",
"warmup_ratio": 0.02,
"weight_decay": 0.1,
"max_grad_norm": 0.5,
"bf16": true,
"gradient_checkpointing": true,
"optim": "adamw_torch",
"logging_steps": 10,
"save_steps": 50,
"eval_steps": 25
},
"dataset_info": {
"train_samples": 15057,
"eval_samples": 1674,
"max_seq_length": 8192,
"sample_packing": false
},
"hardware_config": {
"num_gpus": 2,
"gpu_model": "Unknown",
"distributed_strategy": "DeepSpeed ZeRO-2",
"flash_attention": "2.8.3"
},
"performance_metrics": {
"final_train_loss": 0.3949,
"final_eval_loss": 0.4636613428592682,
"final_train_perplexity": 1.4842357599234954,
"final_eval_perplexity": 1.5898844535357601,
"final_token_accuracy": 0.8872479304671288,
"initial_loss": 1.724,
"initial_perplexity": 5.606911313988792,
"initial_accuracy": 0.5987553134560585
},
"framework_versions": {
"torch": "2.4.1+cu124",
"transformers": "4.57.1",
"peft": "0.17.1",
"trl": "0.23.1",
"deepspeed": "0.18.0",
"flash_attn": "2.8.3",
"python": "3.12.3"
},
"special_features": {
"flash_attention_2": true,
"gradient_checkpointing": true,
"bf16_training": true,
"sample_packing": false,
"deepspeed_zero2": true,
"distributed_training": true
}
}