| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 60.0, | |
| "global_step": 6660, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 7.275e-06, | |
| "loss": 13.5875, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 1.4775e-05, | |
| "loss": 5.5556, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 2.2274999999999996e-05, | |
| "loss": 4.0171, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 2.9775e-05, | |
| "loss": 3.4219, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "eval_loss": 3.3127081394195557, | |
| "eval_runtime": 73.7584, | |
| "eval_samples_per_second": 22.031, | |
| "eval_steps_per_second": 1.383, | |
| "eval_wer": 1.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 3.7275e-05, | |
| "loss": 3.2104, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 4.4775e-05, | |
| "loss": 3.152, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 5.227499999999999e-05, | |
| "loss": 3.0987, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 5.9774999999999996e-05, | |
| "loss": 3.0399, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "eval_loss": 3.0330417156219482, | |
| "eval_runtime": 71.6796, | |
| "eval_samples_per_second": 22.67, | |
| "eval_steps_per_second": 1.423, | |
| "eval_wer": 1.0, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 8.11, | |
| "learning_rate": 6.7275e-05, | |
| "loss": 3.0035, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 7.477499999999999e-05, | |
| "loss": 2.7063, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "learning_rate": 7.371466431095406e-05, | |
| "loss": 2.0137, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 10.81, | |
| "learning_rate": 7.238957597173144e-05, | |
| "loss": 1.5756, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 10.81, | |
| "eval_loss": 0.6108289361000061, | |
| "eval_runtime": 70.7969, | |
| "eval_samples_per_second": 22.953, | |
| "eval_steps_per_second": 1.441, | |
| "eval_wer": 0.572425678586816, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 11.71, | |
| "learning_rate": 7.106448763250883e-05, | |
| "loss": 1.3527, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 12.61, | |
| "learning_rate": 6.973939929328621e-05, | |
| "loss": 1.2067, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 13.51, | |
| "learning_rate": 6.841431095406359e-05, | |
| "loss": 1.1508, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 14.41, | |
| "learning_rate": 6.708922261484098e-05, | |
| "loss": 1.0995, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 14.41, | |
| "eval_loss": 0.30910709500312805, | |
| "eval_runtime": 70.2841, | |
| "eval_samples_per_second": 23.12, | |
| "eval_steps_per_second": 1.451, | |
| "eval_wer": 0.31538130116329166, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 15.32, | |
| "learning_rate": 6.576413427561838e-05, | |
| "loss": 1.0495, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 16.22, | |
| "learning_rate": 6.443904593639576e-05, | |
| "loss": 1.0183, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 17.12, | |
| "learning_rate": 6.311395759717314e-05, | |
| "loss": 0.9867, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 18.02, | |
| "learning_rate": 6.178886925795053e-05, | |
| "loss": 0.9639, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 18.02, | |
| "eval_loss": 0.2596471905708313, | |
| "eval_runtime": 70.5333, | |
| "eval_samples_per_second": 23.039, | |
| "eval_steps_per_second": 1.446, | |
| "eval_wer": 0.2841016803102111, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 18.92, | |
| "learning_rate": 6.046378091872791e-05, | |
| "loss": 0.9383, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 19.82, | |
| "learning_rate": 5.913869257950529e-05, | |
| "loss": 0.9041, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 20.72, | |
| "learning_rate": 5.781360424028268e-05, | |
| "loss": 0.8936, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 21.62, | |
| "learning_rate": 5.648851590106007e-05, | |
| "loss": 0.9032, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 21.62, | |
| "eval_loss": 0.22702418267726898, | |
| "eval_runtime": 71.1235, | |
| "eval_samples_per_second": 22.848, | |
| "eval_steps_per_second": 1.434, | |
| "eval_wer": 0.2513571736320552, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 22.52, | |
| "learning_rate": 5.516342756183745e-05, | |
| "loss": 0.8739, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 23.42, | |
| "learning_rate": 5.383833922261484e-05, | |
| "loss": 0.8602, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 24.32, | |
| "learning_rate": 5.2513250883392223e-05, | |
| "loss": 0.8483, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 25.23, | |
| "learning_rate": 5.11881625441696e-05, | |
| "loss": 0.8145, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 25.23, | |
| "eval_loss": 0.21715673804283142, | |
| "eval_runtime": 70.4062, | |
| "eval_samples_per_second": 23.08, | |
| "eval_steps_per_second": 1.449, | |
| "eval_wer": 0.24834123222748816, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 26.13, | |
| "learning_rate": 4.986307420494699e-05, | |
| "loss": 0.8245, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 27.03, | |
| "learning_rate": 4.853798586572438e-05, | |
| "loss": 0.8085, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 27.93, | |
| "learning_rate": 4.721289752650177e-05, | |
| "loss": 0.8101, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 28.83, | |
| "learning_rate": 4.588780918727915e-05, | |
| "loss": 0.7845, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 28.83, | |
| "eval_loss": 0.2083793580532074, | |
| "eval_runtime": 70.1439, | |
| "eval_samples_per_second": 23.167, | |
| "eval_steps_per_second": 1.454, | |
| "eval_wer": 0.23326152520465315, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 29.73, | |
| "learning_rate": 4.4562720848056537e-05, | |
| "loss": 0.7816, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 30.63, | |
| "learning_rate": 4.3237632508833916e-05, | |
| "loss": 0.7655, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 31.53, | |
| "learning_rate": 4.19125441696113e-05, | |
| "loss": 0.762, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 32.43, | |
| "learning_rate": 4.058745583038869e-05, | |
| "loss": 0.7694, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 32.43, | |
| "eval_loss": 0.19744575023651123, | |
| "eval_runtime": 70.5594, | |
| "eval_samples_per_second": 23.03, | |
| "eval_steps_per_second": 1.446, | |
| "eval_wer": 0.22343817320120637, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 33.33, | |
| "learning_rate": 3.926236749116607e-05, | |
| "loss": 0.7517, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 34.23, | |
| "learning_rate": 3.7937279151943456e-05, | |
| "loss": 0.7401, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 35.14, | |
| "learning_rate": 3.661219081272084e-05, | |
| "loss": 0.7196, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 36.04, | |
| "learning_rate": 3.528710247349823e-05, | |
| "loss": 0.7333, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 36.04, | |
| "eval_loss": 0.20204411447048187, | |
| "eval_runtime": 69.6934, | |
| "eval_samples_per_second": 23.316, | |
| "eval_steps_per_second": 1.464, | |
| "eval_wer": 0.21852649719948297, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 36.94, | |
| "learning_rate": 3.3962014134275616e-05, | |
| "loss": 0.7251, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 37.84, | |
| "learning_rate": 3.2636925795053e-05, | |
| "loss": 0.7116, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 38.74, | |
| "learning_rate": 3.131183745583038e-05, | |
| "loss": 0.7236, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 39.64, | |
| "learning_rate": 2.998674911660777e-05, | |
| "loss": 0.693, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 39.64, | |
| "eval_loss": 0.194662407040596, | |
| "eval_runtime": 69.8883, | |
| "eval_samples_per_second": 23.251, | |
| "eval_steps_per_second": 1.459, | |
| "eval_wer": 0.21482119775958639, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 40.54, | |
| "learning_rate": 2.866166077738516e-05, | |
| "loss": 0.6943, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 41.44, | |
| "learning_rate": 2.7336572438162543e-05, | |
| "loss": 0.6938, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 42.34, | |
| "learning_rate": 2.6011484098939926e-05, | |
| "loss": 0.6831, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 43.24, | |
| "learning_rate": 2.469964664310954e-05, | |
| "loss": 0.6802, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 43.24, | |
| "eval_loss": 0.19601202011108398, | |
| "eval_runtime": 69.3985, | |
| "eval_samples_per_second": 23.416, | |
| "eval_steps_per_second": 1.47, | |
| "eval_wer": 0.2101680310211116, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 44.14, | |
| "learning_rate": 2.3374558303886924e-05, | |
| "loss": 0.6756, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 45.05, | |
| "learning_rate": 2.2049469964664307e-05, | |
| "loss": 0.67, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 45.95, | |
| "learning_rate": 2.0724381625441694e-05, | |
| "loss": 0.6753, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 46.85, | |
| "learning_rate": 1.939929328621908e-05, | |
| "loss": 0.667, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 46.85, | |
| "eval_loss": 0.19041335582733154, | |
| "eval_runtime": 70.1706, | |
| "eval_samples_per_second": 23.158, | |
| "eval_steps_per_second": 1.454, | |
| "eval_wer": 0.2072382593709608, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 47.75, | |
| "learning_rate": 1.8074204946996464e-05, | |
| "loss": 0.6562, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 48.65, | |
| "learning_rate": 1.674911660777385e-05, | |
| "loss": 0.666, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 49.55, | |
| "learning_rate": 1.5424028268551237e-05, | |
| "loss": 0.6572, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 50.45, | |
| "learning_rate": 1.409893992932862e-05, | |
| "loss": 0.6486, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 50.45, | |
| "eval_loss": 0.18806982040405273, | |
| "eval_runtime": 70.1099, | |
| "eval_samples_per_second": 23.178, | |
| "eval_steps_per_second": 1.455, | |
| "eval_wer": 0.2009478672985782, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 51.35, | |
| "learning_rate": 1.2773851590106007e-05, | |
| "loss": 0.6484, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 52.25, | |
| "learning_rate": 1.1448763250883392e-05, | |
| "loss": 0.6549, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 53.15, | |
| "learning_rate": 1.0123674911660777e-05, | |
| "loss": 0.6322, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 54.05, | |
| "learning_rate": 8.811837455830388e-06, | |
| "loss": 0.6339, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 54.05, | |
| "eval_loss": 0.1877446174621582, | |
| "eval_runtime": 70.1417, | |
| "eval_samples_per_second": 23.167, | |
| "eval_steps_per_second": 1.454, | |
| "eval_wer": 0.1988797931925894, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 54.95, | |
| "learning_rate": 7.486749116607773e-06, | |
| "loss": 0.6413, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 55.86, | |
| "learning_rate": 6.161660777385159e-06, | |
| "loss": 0.6392, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 56.76, | |
| "learning_rate": 4.836572438162544e-06, | |
| "loss": 0.6363, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 57.66, | |
| "learning_rate": 3.511484098939929e-06, | |
| "loss": 0.6254, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 57.66, | |
| "eval_loss": 0.18933533132076263, | |
| "eval_runtime": 69.6111, | |
| "eval_samples_per_second": 23.344, | |
| "eval_steps_per_second": 1.465, | |
| "eval_wer": 0.2002585092632486, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 58.56, | |
| "learning_rate": 2.1863957597173144e-06, | |
| "loss": 0.625, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 59.46, | |
| "learning_rate": 8.613074204946996e-07, | |
| "loss": 0.6379, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "step": 6660, | |
| "total_flos": 3.0269424793140363e+19, | |
| "train_loss": 1.3688410950852585, | |
| "train_runtime": 13291.4016, | |
| "train_samples_per_second": 16.016, | |
| "train_steps_per_second": 0.501 | |
| } | |
| ], | |
| "max_steps": 6660, | |
| "num_train_epochs": 60, | |
| "total_flos": 3.0269424793140363e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |