rbelanec commited on
Commit
88d377b
verified
1 Parent(s): 4cd9db7

Training in progress, step 39600

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +44 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47cc67fffccd0b103f91ba2bdfc02072b90265020750c18d979785ad90865db2
3
  size 460928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1d337989dbca085df5e7daea308213ce8a71bb9dacc8f7b6a55660632a1a092
3
  size 460928
trainer_log.jsonl CHANGED
@@ -8075,3 +8075,47 @@
8075
  {"current_steps": 39395, "total_steps": 40000, "loss": 0.0707, "lr": 0.00016986515083774467, "epoch": 1.9248772384139936, "percentage": 98.49, "elapsed_time": "1 day, 2:12:03", "remaining_time": "0:24:08", "throughput": 541.47, "total_tokens": 51073696}
8076
  {"current_steps": 39400, "total_steps": 40000, "loss": 0.0749, "lr": 0.00016707417762611975, "epoch": 1.9251215400776878, "percentage": 98.5, "elapsed_time": "1 day, 2:12:05", "remaining_time": "0:23:56", "throughput": 541.54, "total_tokens": 51080832}
8077
  {"current_steps": 39400, "total_steps": 40000, "eval_loss": 0.083449587225914, "epoch": 1.9251215400776878, "percentage": 98.5, "elapsed_time": "1 day, 2:18:48", "remaining_time": "0:24:02", "throughput": 539.23, "total_tokens": 51080832}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8075
  {"current_steps": 39395, "total_steps": 40000, "loss": 0.0707, "lr": 0.00016986515083774467, "epoch": 1.9248772384139936, "percentage": 98.49, "elapsed_time": "1 day, 2:12:03", "remaining_time": "0:24:08", "throughput": 541.47, "total_tokens": 51073696}
8076
  {"current_steps": 39400, "total_steps": 40000, "loss": 0.0749, "lr": 0.00016707417762611975, "epoch": 1.9251215400776878, "percentage": 98.5, "elapsed_time": "1 day, 2:12:05", "remaining_time": "0:23:56", "throughput": 541.54, "total_tokens": 51080832}
8077
  {"current_steps": 39400, "total_steps": 40000, "eval_loss": 0.083449587225914, "epoch": 1.9251215400776878, "percentage": 98.5, "elapsed_time": "1 day, 2:18:48", "remaining_time": "0:24:02", "throughput": 539.23, "total_tokens": 51080832}
8078
+ {"current_steps": 39405, "total_steps": 40000, "loss": 0.0787, "lr": 0.00016430631053459543, "epoch": 1.9253658417413821, "percentage": 98.51, "elapsed_time": "1 day, 2:18:52", "remaining_time": "0:23:50", "throughput": 539.28, "total_tokens": 51087200}
8079
+ {"current_steps": 39410, "total_steps": 40000, "loss": 0.0587, "lr": 0.0001615615499899803, "epoch": 1.9256101434050765, "percentage": 98.52, "elapsed_time": "1 day, 2:18:54", "remaining_time": "0:23:38", "throughput": 539.34, "total_tokens": 51093600}
8080
+ {"current_steps": 39415, "total_steps": 40000, "loss": 0.0755, "lr": 0.00015883989641556905, "epoch": 1.925854445068771, "percentage": 98.54, "elapsed_time": "1 day, 2:18:56", "remaining_time": "0:23:26", "throughput": 539.39, "total_tokens": 51099936}
8081
+ {"current_steps": 39420, "total_steps": 40000, "loss": 0.0738, "lr": 0.00015614135023105934, "epoch": 1.9260987467324653, "percentage": 98.55, "elapsed_time": "1 day, 2:18:58", "remaining_time": "0:23:13", "throughput": 539.45, "total_tokens": 51106336}
8082
+ {"current_steps": 39425, "total_steps": 40000, "loss": 0.0869, "lr": 0.00015346591185261827, "epoch": 1.9263430483961597, "percentage": 98.56, "elapsed_time": "1 day, 2:19:00", "remaining_time": "0:23:01", "throughput": 539.51, "total_tokens": 51112864}
8083
+ {"current_steps": 39430, "total_steps": 40000, "loss": 0.083, "lr": 0.00015081358169281576, "epoch": 1.926587350059854, "percentage": 98.58, "elapsed_time": "1 day, 2:19:01", "remaining_time": "0:22:49", "throughput": 539.57, "total_tokens": 51119552}
8084
+ {"current_steps": 39435, "total_steps": 40000, "loss": 0.0721, "lr": 0.00014818436016069135, "epoch": 1.9268316517235482, "percentage": 98.59, "elapsed_time": "1 day, 2:19:03", "remaining_time": "0:22:37", "throughput": 539.62, "total_tokens": 51125696}
8085
+ {"current_steps": 39440, "total_steps": 40000, "loss": 0.0897, "lr": 0.00014557824766168735, "epoch": 1.9270759533872426, "percentage": 98.6, "elapsed_time": "1 day, 2:19:05", "remaining_time": "0:22:25", "throughput": 539.68, "total_tokens": 51132192}
8086
+ {"current_steps": 39445, "total_steps": 40000, "loss": 0.0683, "lr": 0.00014299524459769896, "epoch": 1.9273202550509367, "percentage": 98.61, "elapsed_time": "1 day, 2:19:07", "remaining_time": "0:22:13", "throughput": 539.74, "total_tokens": 51138880}
8087
+ {"current_steps": 39450, "total_steps": 40000, "loss": 0.1051, "lr": 0.0001404353513670742, "epoch": 1.9275645567146311, "percentage": 98.62, "elapsed_time": "1 day, 2:19:09", "remaining_time": "0:22:00", "throughput": 539.79, "total_tokens": 51145312}
8088
+ {"current_steps": 39455, "total_steps": 40000, "loss": 0.0995, "lr": 0.0001378985683645806, "epoch": 1.9278088583783255, "percentage": 98.64, "elapsed_time": "1 day, 2:19:11", "remaining_time": "0:21:48", "throughput": 539.85, "total_tokens": 51151808}
8089
+ {"current_steps": 39460, "total_steps": 40000, "loss": 0.0815, "lr": 0.0001353848959813886, "epoch": 1.9280531600420199, "percentage": 98.65, "elapsed_time": "1 day, 2:19:13", "remaining_time": "0:21:36", "throughput": 539.91, "total_tokens": 51158592}
8090
+ {"current_steps": 39465, "total_steps": 40000, "loss": 0.0722, "lr": 0.00013289433460517142, "epoch": 1.9282974617057143, "percentage": 98.66, "elapsed_time": "1 day, 2:19:15", "remaining_time": "0:21:24", "throughput": 539.97, "total_tokens": 51165024}
8091
+ {"current_steps": 39470, "total_steps": 40000, "loss": 0.0695, "lr": 0.00013042688462000518, "epoch": 1.9285417633694086, "percentage": 98.67, "elapsed_time": "1 day, 2:19:17", "remaining_time": "0:21:12", "throughput": 540.03, "total_tokens": 51171680}
8092
+ {"current_steps": 39475, "total_steps": 40000, "loss": 0.0858, "lr": 0.0001279825464063855, "epoch": 1.928786065033103, "percentage": 98.69, "elapsed_time": "1 day, 2:19:19", "remaining_time": "0:21:00", "throughput": 540.09, "total_tokens": 51178176}
8093
+ {"current_steps": 39480, "total_steps": 40000, "loss": 0.0635, "lr": 0.00012556132034126087, "epoch": 1.9290303666967972, "percentage": 98.7, "elapsed_time": "1 day, 2:19:21", "remaining_time": "0:20:48", "throughput": 540.15, "total_tokens": 51184736}
8094
+ {"current_steps": 39485, "total_steps": 40000, "loss": 0.0641, "lr": 0.0001231632067980326, "epoch": 1.9292746683604916, "percentage": 98.71, "elapsed_time": "1 day, 2:19:23", "remaining_time": "0:20:35", "throughput": 540.2, "total_tokens": 51191328}
8095
+ {"current_steps": 39490, "total_steps": 40000, "loss": 0.0923, "lr": 0.00012078820614650486, "epoch": 1.9295189700241857, "percentage": 98.72, "elapsed_time": "1 day, 2:19:24", "remaining_time": "0:20:23", "throughput": 540.26, "total_tokens": 51197792}
8096
+ {"current_steps": 39495, "total_steps": 40000, "loss": 0.1039, "lr": 0.00011843631875291804, "epoch": 1.92976327168788, "percentage": 98.74, "elapsed_time": "1 day, 2:19:26", "remaining_time": "0:20:11", "throughput": 540.32, "total_tokens": 51204416}
8097
+ {"current_steps": 39500, "total_steps": 40000, "loss": 0.1051, "lr": 0.00011610754497999863, "epoch": 1.9300075733515745, "percentage": 98.75, "elapsed_time": "1 day, 2:19:28", "remaining_time": "0:19:59", "throughput": 540.38, "total_tokens": 51210976}
8098
+ {"current_steps": 39505, "total_steps": 40000, "loss": 0.0867, "lr": 0.0001138018851868594, "epoch": 1.9302518750152688, "percentage": 98.76, "elapsed_time": "1 day, 2:19:30", "remaining_time": "0:19:47", "throughput": 540.43, "total_tokens": 51217120}
8099
+ {"current_steps": 39510, "total_steps": 40000, "loss": 0.095, "lr": 0.0001115193397290326, "epoch": 1.9304961766789632, "percentage": 98.78, "elapsed_time": "1 day, 2:19:32", "remaining_time": "0:19:35", "throughput": 540.49, "total_tokens": 51223488}
8100
+ {"current_steps": 39515, "total_steps": 40000, "loss": 0.0655, "lr": 0.00010925990895856996, "epoch": 1.9307404783426576, "percentage": 98.79, "elapsed_time": "1 day, 2:19:34", "remaining_time": "0:19:23", "throughput": 540.54, "total_tokens": 51229504}
8101
+ {"current_steps": 39520, "total_steps": 40000, "loss": 0.1092, "lr": 0.00010702359322385946, "epoch": 1.930984780006352, "percentage": 98.8, "elapsed_time": "1 day, 2:19:36", "remaining_time": "0:19:11", "throughput": 540.6, "total_tokens": 51235904}
8102
+ {"current_steps": 39525, "total_steps": 40000, "loss": 0.0749, "lr": 0.00010481039286977523, "epoch": 1.9312290816700461, "percentage": 98.81, "elapsed_time": "1 day, 2:19:38", "remaining_time": "0:18:59", "throughput": 540.66, "total_tokens": 51242528}
8103
+ {"current_steps": 39530, "total_steps": 40000, "loss": 0.0732, "lr": 0.00010262030823764423, "epoch": 1.9314733833337405, "percentage": 98.83, "elapsed_time": "1 day, 2:19:40", "remaining_time": "0:18:46", "throughput": 540.72, "total_tokens": 51249536}
8104
+ {"current_steps": 39535, "total_steps": 40000, "loss": 0.0743, "lr": 0.00010045333966517966, "epoch": 1.9317176849974347, "percentage": 98.84, "elapsed_time": "1 day, 2:19:42", "remaining_time": "0:18:34", "throughput": 540.78, "total_tokens": 51255936}
8105
+ {"current_steps": 39540, "total_steps": 40000, "loss": 0.0872, "lr": 9.83094874865642e-05, "epoch": 1.931961986661129, "percentage": 98.85, "elapsed_time": "1 day, 2:19:44", "remaining_time": "0:18:22", "throughput": 540.84, "total_tokens": 51262624}
8106
+ {"current_steps": 39545, "total_steps": 40000, "loss": 0.0598, "lr": 9.618875203241672e-05, "epoch": 1.9322062883248234, "percentage": 98.86, "elapsed_time": "1 day, 2:19:46", "remaining_time": "0:18:10", "throughput": 540.9, "total_tokens": 51269568}
8107
+ {"current_steps": 39550, "total_steps": 40000, "loss": 0.0875, "lr": 9.409113362977561e-05, "epoch": 1.9324505899885178, "percentage": 98.88, "elapsed_time": "1 day, 2:19:47", "remaining_time": "0:17:58", "throughput": 540.95, "total_tokens": 51275616}
8108
+ {"current_steps": 39555, "total_steps": 40000, "loss": 0.0558, "lr": 9.20166326020988e-05, "epoch": 1.9326948916522122, "percentage": 98.89, "elapsed_time": "1 day, 2:19:49", "remaining_time": "0:17:46", "throughput": 541.01, "total_tokens": 51282080}
8109
+ {"current_steps": 39560, "total_steps": 40000, "loss": 0.0857, "lr": 8.996524926933035e-05, "epoch": 1.9329391933159066, "percentage": 98.9, "elapsed_time": "1 day, 2:19:51", "remaining_time": "0:17:34", "throughput": 541.07, "total_tokens": 51288928}
8110
+ {"current_steps": 39565, "total_steps": 40000, "loss": 0.0635, "lr": 8.793698394781723e-05, "epoch": 1.933183494979601, "percentage": 98.91, "elapsed_time": "1 day, 2:19:53", "remaining_time": "0:17:22", "throughput": 541.12, "total_tokens": 51295168}
8111
+ {"current_steps": 39570, "total_steps": 40000, "loss": 0.0905, "lr": 8.593183695030926e-05, "epoch": 1.9334277966432951, "percentage": 98.92, "elapsed_time": "1 day, 2:19:55", "remaining_time": "0:17:10", "throughput": 541.18, "total_tokens": 51301568}
8112
+ {"current_steps": 39575, "total_steps": 40000, "loss": 0.0816, "lr": 8.39498085860757e-05, "epoch": 1.9336720983069895, "percentage": 98.94, "elapsed_time": "1 day, 2:19:57", "remaining_time": "0:16:58", "throughput": 541.24, "total_tokens": 51308000}
8113
+ {"current_steps": 39580, "total_steps": 40000, "loss": 0.077, "lr": 8.199089916072211e-05, "epoch": 1.9339163999706837, "percentage": 98.95, "elapsed_time": "1 day, 2:19:59", "remaining_time": "0:16:45", "throughput": 541.29, "total_tokens": 51314208}
8114
+ {"current_steps": 39585, "total_steps": 40000, "loss": 0.1108, "lr": 8.005510897637346e-05, "epoch": 1.934160701634378, "percentage": 98.96, "elapsed_time": "1 day, 2:20:01", "remaining_time": "0:16:33", "throughput": 541.35, "total_tokens": 51320384}
8115
+ {"current_steps": 39590, "total_steps": 40000, "loss": 0.089, "lr": 7.8142438331541e-05, "epoch": 1.9344050032980724, "percentage": 98.98, "elapsed_time": "1 day, 2:20:03", "remaining_time": "0:16:21", "throughput": 541.4, "total_tokens": 51326816}
8116
+ {"current_steps": 39595, "total_steps": 40000, "loss": 0.0605, "lr": 7.625288752117209e-05, "epoch": 1.9346493049617668, "percentage": 98.99, "elapsed_time": "1 day, 2:20:05", "remaining_time": "0:16:09", "throughput": 541.46, "total_tokens": 51332960}
8117
+ {"current_steps": 39600, "total_steps": 40000, "loss": 0.0717, "lr": 7.4386456836667e-05, "epoch": 1.9348936066254612, "percentage": 99.0, "elapsed_time": "1 day, 2:20:07", "remaining_time": "0:15:57", "throughput": 541.51, "total_tokens": 51339424}
8118
+ {"current_steps": 39600, "total_steps": 40000, "eval_loss": 0.08348038792610168, "epoch": 1.9348936066254612, "percentage": 99.0, "elapsed_time": "1 day, 2:26:49", "remaining_time": "0:16:01", "throughput": 539.22, "total_tokens": 51339424}
8119
+ {"current_steps": 39605, "total_steps": 40000, "loss": 0.0884, "lr": 7.254314656586214e-05, "epoch": 1.9351379082891556, "percentage": 99.01, "elapsed_time": "1 day, 2:26:57", "remaining_time": "0:15:49", "throughput": 539.25, "total_tokens": 51346080}
8120
+ {"current_steps": 39610, "total_steps": 40000, "loss": 0.0526, "lr": 7.07229569929968e-05, "epoch": 1.93538220995285, "percentage": 99.02, "elapsed_time": "1 day, 2:26:58", "remaining_time": "0:15:37", "throughput": 539.31, "total_tokens": 51352640}
8121
+ {"current_steps": 39615, "total_steps": 40000, "loss": 0.084, "lr": 6.892588839879643e-05, "epoch": 1.935626511616544, "percentage": 99.04, "elapsed_time": "1 day, 2:27:00", "remaining_time": "0:15:25", "throughput": 539.37, "total_tokens": 51358912}