Qwen-2.5-7B-GRPO-NoBaseline-Adam-FisherMaskToken-1e-5-HessianMaskToken-0.01-v2_5923
/
train_results.json
| { | |
| "total_flos": 0.0, | |
| "train_loss": -0.6797916829586029, | |
| "train_runtime": 15592.1045, | |
| "train_samples": 7500, | |
| "train_samples_per_second": 0.616, | |
| "train_steps_per_second": 0.006 | |
| } |