AdityaNarayan commited on
Commit
c03dbc6
·
verified ·
1 Parent(s): 27ac0d4

added training_info.json

Browse files
Files changed (1) hide show
  1. training_info.json +86 -0
training_info.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "training_metadata": {
3
+ "timestamp": "20251016_174948",
4
+ "training_date": "2025-10-17",
5
+ "training_time": "00:42:52",
6
+ "final_epoch": 3.0382470119521914,
7
+ "total_steps": 1431,
8
+ "status": "completed"
9
+ },
10
+ "model_config": {
11
+ "base_model": "Qwen/Qwen2.5-Coder-32B-Instruct",
12
+ "model_type": "causal_lm",
13
+ "architecture": "Qwen2ForCausalLM"
14
+ },
15
+ "lora_config": {
16
+ "r": 64,
17
+ "lora_alpha": 128,
18
+ "lora_dropout": 0.05,
19
+ "target_modules": [
20
+ "q_proj",
21
+ "k_proj",
22
+ "v_proj",
23
+ "o_proj",
24
+ "gate_proj",
25
+ "up_proj",
26
+ "down_proj"
27
+ ]
28
+ },
29
+ "training_config": {
30
+ "num_epochs": 5,
31
+ "per_device_train_batch_size": 2,
32
+ "per_device_eval_batch_size": 1,
33
+ "gradient_accumulation_steps": 8,
34
+ "effective_batch_size": 32,
35
+ "learning_rate": 5e-05,
36
+ "lr_scheduler_type": "cosine",
37
+ "warmup_ratio": 0.02,
38
+ "weight_decay": 0.1,
39
+ "max_grad_norm": 0.5,
40
+ "bf16": true,
41
+ "gradient_checkpointing": true,
42
+ "optim": "adamw_torch",
43
+ "logging_steps": 10,
44
+ "save_steps": 50,
45
+ "eval_steps": 25
46
+ },
47
+ "dataset_info": {
48
+ "train_samples": 15057,
49
+ "eval_samples": 1674,
50
+ "max_seq_length": 8192,
51
+ "sample_packing": false
52
+ },
53
+ "hardware_config": {
54
+ "num_gpus": 2,
55
+ "gpu_model": "Unknown",
56
+ "distributed_strategy": "DeepSpeed ZeRO-2",
57
+ "flash_attention": "2.8.3"
58
+ },
59
+ "performance_metrics": {
60
+ "final_train_loss": 0.3949,
61
+ "final_eval_loss": 0.4636613428592682,
62
+ "final_train_perplexity": 1.4842357599234954,
63
+ "final_eval_perplexity": 1.5898844535357601,
64
+ "final_token_accuracy": 0.8872479304671288,
65
+ "initial_loss": 1.724,
66
+ "initial_perplexity": 5.606911313988792,
67
+ "initial_accuracy": 0.5987553134560585
68
+ },
69
+ "framework_versions": {
70
+ "torch": "2.4.1+cu124",
71
+ "transformers": "4.57.1",
72
+ "peft": "0.17.1",
73
+ "trl": "0.23.1",
74
+ "deepspeed": "0.18.0",
75
+ "flash_attn": "2.8.3",
76
+ "python": "3.12.3"
77
+ },
78
+ "special_features": {
79
+ "flash_attention_2": true,
80
+ "gradient_checkpointing": true,
81
+ "bf16_training": true,
82
+ "sample_packing": false,
83
+ "deepspeed_zero2": true,
84
+ "distributed_training": true
85
+ }
86
+ }