{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 3906, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "embedding_loss": 0.1733, "epoch": 0.0002560163850486431, "grad_norm": 1.3411569595336914, "learning_rate": 0.0, "step": 1 }, { "embedding_loss": 0.1425, "epoch": 0.012800819252432157, "grad_norm": 1.5893903970718384, "learning_rate": 2.5063938618925833e-06, "step": 50 }, { "embedding_loss": 0.0954, "epoch": 0.025601638504864313, "grad_norm": 1.3638615608215332, "learning_rate": 5.0639386189258325e-06, "step": 100 }, { "embedding_loss": 0.0483, "epoch": 0.03840245775729647, "grad_norm": 1.0745915174484253, "learning_rate": 7.62148337595908e-06, "step": 150 }, { "embedding_loss": 0.0297, "epoch": 0.051203277009728626, "grad_norm": 0.8514853715896606, "learning_rate": 1.0179028132992328e-05, "step": 200 }, { "embedding_loss": 0.0189, "epoch": 0.06400409626216078, "grad_norm": 0.7511357069015503, "learning_rate": 1.2736572890025576e-05, "step": 250 }, { "embedding_loss": 0.0122, "epoch": 0.07680491551459294, "grad_norm": 0.525897204875946, "learning_rate": 1.5294117647058822e-05, "step": 300 }, { "embedding_loss": 0.0099, "epoch": 0.08960573476702509, "grad_norm": 0.5095636248588562, "learning_rate": 1.7851662404092073e-05, "step": 350 }, { "embedding_loss": 0.0085, "epoch": 0.10240655401945725, "grad_norm": 1.5715042352676392, "learning_rate": 1.9954480796586062e-05, "step": 400 }, { "embedding_loss": 0.0067, "epoch": 0.1152073732718894, "grad_norm": 0.27569064497947693, "learning_rate": 1.9669985775248933e-05, "step": 450 }, { "embedding_loss": 0.0059, "epoch": 0.12800819252432155, "grad_norm": 0.09678972512483597, "learning_rate": 1.9385490753911807e-05, "step": 500 }, { "embedding_loss": 0.0076, "epoch": 0.1408090117767537, "grad_norm": 0.15479978919029236, "learning_rate": 1.910099573257468e-05, "step": 550 }, { "embedding_loss": 0.0061, "epoch": 0.15360983102918588, "grad_norm": 0.4873831272125244, "learning_rate": 1.8816500711237555e-05, "step": 600 }, { "embedding_loss": 0.0049, "epoch": 0.16641065028161803, "grad_norm": 0.09139434248209, "learning_rate": 1.8532005689900426e-05, "step": 650 }, { "embedding_loss": 0.003, "epoch": 0.17921146953405018, "grad_norm": 0.05168338865041733, "learning_rate": 1.8247510668563304e-05, "step": 700 }, { "embedding_loss": 0.0023, "epoch": 0.19201228878648233, "grad_norm": 0.05378415808081627, "learning_rate": 1.7963015647226174e-05, "step": 750 }, { "embedding_loss": 0.002, "epoch": 0.2048131080389145, "grad_norm": 0.026590052992105484, "learning_rate": 1.7678520625889048e-05, "step": 800 }, { "embedding_loss": 0.0037, "epoch": 0.21761392729134665, "grad_norm": 0.03566845878958702, "learning_rate": 1.739402560455192e-05, "step": 850 }, { "embedding_loss": 0.0007, "epoch": 0.2304147465437788, "grad_norm": 0.061990801244974136, "learning_rate": 1.7109530583214796e-05, "step": 900 }, { "embedding_loss": 0.0015, "epoch": 0.24321556579621095, "grad_norm": 0.027544036507606506, "learning_rate": 1.682503556187767e-05, "step": 950 }, { "embedding_loss": 0.0017, "epoch": 0.2560163850486431, "grad_norm": 0.04399234429001808, "learning_rate": 1.654054054054054e-05, "step": 1000 }, { "embedding_loss": 0.0014, "epoch": 0.26881720430107525, "grad_norm": 0.01828560046851635, "learning_rate": 1.6256045519203415e-05, "step": 1050 }, { "embedding_loss": 0.0003, "epoch": 0.2816180235535074, "grad_norm": 0.018775783479213715, "learning_rate": 1.597155049786629e-05, "step": 1100 }, { "embedding_loss": 0.0006, "epoch": 0.2944188428059396, "grad_norm": 0.01576610654592514, "learning_rate": 1.5687055476529163e-05, "step": 1150 }, { "embedding_loss": 0.001, "epoch": 0.30721966205837176, "grad_norm": 0.14026065170764923, "learning_rate": 1.5402560455192034e-05, "step": 1200 }, { "embedding_loss": 0.0007, "epoch": 0.3200204813108039, "grad_norm": 0.01965928263962269, "learning_rate": 1.511806543385491e-05, "step": 1250 }, { "embedding_loss": 0.0002, "epoch": 0.33282130056323606, "grad_norm": 0.014394218102097511, "learning_rate": 1.4833570412517782e-05, "step": 1300 }, { "embedding_loss": 0.0012, "epoch": 0.3456221198156682, "grad_norm": 0.018288280814886093, "learning_rate": 1.4549075391180656e-05, "step": 1350 }, { "embedding_loss": 0.0006, "epoch": 0.35842293906810035, "grad_norm": 0.017536135390400887, "learning_rate": 1.4264580369843529e-05, "step": 1400 }, { "embedding_loss": 0.0003, "epoch": 0.3712237583205325, "grad_norm": 0.012779198586940765, "learning_rate": 1.3980085348506403e-05, "step": 1450 }, { "embedding_loss": 0.0005, "epoch": 0.38402457757296465, "grad_norm": 0.19666947424411774, "learning_rate": 1.3695590327169275e-05, "step": 1500 }, { "embedding_loss": 0.0002, "epoch": 0.3968253968253968, "grad_norm": 0.008438820950686932, "learning_rate": 1.341109530583215e-05, "step": 1550 }, { "embedding_loss": 0.0004, "epoch": 0.409626216077829, "grad_norm": 0.009661088697612286, "learning_rate": 1.3126600284495022e-05, "step": 1600 }, { "embedding_loss": 0.0009, "epoch": 0.42242703533026116, "grad_norm": 0.02346787601709366, "learning_rate": 1.2842105263157896e-05, "step": 1650 }, { "embedding_loss": 0.0007, "epoch": 0.4352278545826933, "grad_norm": 0.0062308646738529205, "learning_rate": 1.255761024182077e-05, "step": 1700 }, { "embedding_loss": 0.0003, "epoch": 0.44802867383512546, "grad_norm": 0.013097619637846947, "learning_rate": 1.2273115220483642e-05, "step": 1750 }, { "embedding_loss": 0.0001, "epoch": 0.4608294930875576, "grad_norm": 0.008484157733619213, "learning_rate": 1.1988620199146516e-05, "step": 1800 }, { "embedding_loss": 0.0006, "epoch": 0.47363031233998976, "grad_norm": 0.010278033092617989, "learning_rate": 1.1704125177809389e-05, "step": 1850 }, { "embedding_loss": 0.0004, "epoch": 0.4864311315924219, "grad_norm": 0.012740347534418106, "learning_rate": 1.1419630156472263e-05, "step": 1900 }, { "embedding_loss": 0.0004, "epoch": 0.49923195084485406, "grad_norm": 0.022875774651765823, "learning_rate": 1.1135135135135135e-05, "step": 1950 }, { "embedding_loss": 0.0001, "epoch": 0.5120327700972862, "grad_norm": 0.007295957300812006, "learning_rate": 1.085064011379801e-05, "step": 2000 }, { "embedding_loss": 0.0004, "epoch": 0.5248335893497184, "grad_norm": 0.008994905278086662, "learning_rate": 1.0566145092460882e-05, "step": 2050 }, { "embedding_loss": 0.0003, "epoch": 0.5376344086021505, "grad_norm": 0.01373240165412426, "learning_rate": 1.0281650071123756e-05, "step": 2100 }, { "embedding_loss": 0.0012, "epoch": 0.5504352278545827, "grad_norm": 0.017664149403572083, "learning_rate": 9.99715504978663e-06, "step": 2150 }, { "embedding_loss": 0.0006, "epoch": 0.5632360471070148, "grad_norm": 0.026641126722097397, "learning_rate": 9.712660028449504e-06, "step": 2200 }, { "embedding_loss": 0.0001, "epoch": 0.576036866359447, "grad_norm": 0.015163728035986423, "learning_rate": 9.428165007112376e-06, "step": 2250 }, { "embedding_loss": 0.0003, "epoch": 0.5888376856118792, "grad_norm": 0.014809815213084221, "learning_rate": 9.14366998577525e-06, "step": 2300 }, { "embedding_loss": 0.0007, "epoch": 0.6016385048643114, "grad_norm": 0.007227804511785507, "learning_rate": 8.859174964438123e-06, "step": 2350 }, { "embedding_loss": 0.0003, "epoch": 0.6144393241167435, "grad_norm": 0.010935621336102486, "learning_rate": 8.574679943100997e-06, "step": 2400 }, { "embedding_loss": 0.0004, "epoch": 0.6272401433691757, "grad_norm": 0.0073186722584068775, "learning_rate": 8.29018492176387e-06, "step": 2450 }, { "embedding_loss": 0.0006, "epoch": 0.6400409626216078, "grad_norm": 0.006618503015488386, "learning_rate": 8.005689900426743e-06, "step": 2500 }, { "embedding_loss": 0.0001, "epoch": 0.65284178187404, "grad_norm": 0.008351747877895832, "learning_rate": 7.721194879089616e-06, "step": 2550 }, { "embedding_loss": 0.0003, "epoch": 0.6656426011264721, "grad_norm": 0.008504342287778854, "learning_rate": 7.43669985775249e-06, "step": 2600 }, { "embedding_loss": 0.0006, "epoch": 0.6784434203789043, "grad_norm": 0.015397731214761734, "learning_rate": 7.152204836415363e-06, "step": 2650 }, { "embedding_loss": 0.001, "epoch": 0.6912442396313364, "grad_norm": 0.004343625158071518, "learning_rate": 6.867709815078236e-06, "step": 2700 }, { "embedding_loss": 0.0004, "epoch": 0.7040450588837686, "grad_norm": 0.010210598818957806, "learning_rate": 6.58321479374111e-06, "step": 2750 }, { "embedding_loss": 0.0008, "epoch": 0.7168458781362007, "grad_norm": 0.021265419200062752, "learning_rate": 6.2987197724039836e-06, "step": 2800 }, { "embedding_loss": 0.0003, "epoch": 0.7296466973886329, "grad_norm": 0.006350652314722538, "learning_rate": 6.014224751066858e-06, "step": 2850 }, { "embedding_loss": 0.0007, "epoch": 0.742447516641065, "grad_norm": 0.042495131492614746, "learning_rate": 5.729729729729731e-06, "step": 2900 }, { "embedding_loss": 0.0007, "epoch": 0.7552483358934972, "grad_norm": 0.003976090345531702, "learning_rate": 5.445234708392604e-06, "step": 2950 }, { "embedding_loss": 0.0007, "epoch": 0.7680491551459293, "grad_norm": 0.008590229786932468, "learning_rate": 5.160739687055477e-06, "step": 3000 }, { "embedding_loss": 0.0003, "epoch": 0.7808499743983615, "grad_norm": 0.006639127153903246, "learning_rate": 4.8762446657183506e-06, "step": 3050 }, { "embedding_loss": 0.0003, "epoch": 0.7936507936507936, "grad_norm": 0.006947138346731663, "learning_rate": 4.591749644381224e-06, "step": 3100 }, { "embedding_loss": 0.0003, "epoch": 0.8064516129032258, "grad_norm": 0.00519227422773838, "learning_rate": 4.307254623044097e-06, "step": 3150 }, { "embedding_loss": 0.0007, "epoch": 0.819252432155658, "grad_norm": 0.013285805471241474, "learning_rate": 4.02275960170697e-06, "step": 3200 }, { "embedding_loss": 0.0001, "epoch": 0.8320532514080902, "grad_norm": 0.010509872809052467, "learning_rate": 3.7382645803698435e-06, "step": 3250 }, { "embedding_loss": 0.0006, "epoch": 0.8448540706605223, "grad_norm": 0.0032197178807109594, "learning_rate": 3.4537695590327167e-06, "step": 3300 }, { "embedding_loss": 0.0007, "epoch": 0.8576548899129545, "grad_norm": 0.007785377558320761, "learning_rate": 3.169274537695591e-06, "step": 3350 }, { "embedding_loss": 0.0003, "epoch": 0.8704557091653866, "grad_norm": 0.0052650910802185535, "learning_rate": 2.884779516358464e-06, "step": 3400 }, { "embedding_loss": 0.0009, "epoch": 0.8832565284178188, "grad_norm": 0.020432407036423683, "learning_rate": 2.6002844950213373e-06, "step": 3450 }, { "embedding_loss": 0.0003, "epoch": 0.8960573476702509, "grad_norm": 0.009411387145519257, "learning_rate": 2.3157894736842105e-06, "step": 3500 }, { "embedding_loss": 0.0004, "epoch": 0.9088581669226831, "grad_norm": 0.007589740678668022, "learning_rate": 2.031294452347084e-06, "step": 3550 }, { "embedding_loss": 0.0004, "epoch": 0.9216589861751152, "grad_norm": 0.12826202809810638, "learning_rate": 1.7467994310099576e-06, "step": 3600 }, { "embedding_loss": 0.0001, "epoch": 0.9344598054275474, "grad_norm": 0.012935018166899681, "learning_rate": 1.4623044096728308e-06, "step": 3650 }, { "embedding_loss": 0.0001, "epoch": 0.9472606246799795, "grad_norm": 0.008784984238445759, "learning_rate": 1.1778093883357043e-06, "step": 3700 }, { "embedding_loss": 0.0007, "epoch": 0.9600614439324117, "grad_norm": 0.004482260439544916, "learning_rate": 8.933143669985776e-07, "step": 3750 }, { "embedding_loss": 0.0006, "epoch": 0.9728622631848438, "grad_norm": 0.004357804544270039, "learning_rate": 6.08819345661451e-07, "step": 3800 }, { "embedding_loss": 0.0005, "epoch": 0.985663082437276, "grad_norm": 0.014645060524344444, "learning_rate": 3.2432432432432436e-07, "step": 3850 }, { "embedding_loss": 0.0003, "epoch": 0.9984639016897081, "grad_norm": 0.005299082491546869, "learning_rate": 3.9829302987197725e-08, "step": 3900 } ], "logging_steps": 50, "max_steps": 3906, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }