rbelanec commited on
Commit
167c8c0
verified
1 Parent(s): ef89899

Training in progress, step 40000

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +42 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb097045d48da2b207cd2a3390fd3df957c91fdb58c597a5e731af42193705ac
3
  size 1638528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92a7bed36ceea64716be294d009dd461ffbf6dc97c3d7d7bc443e9d8a0f14ed8
3
  size 1638528
trainer_log.jsonl CHANGED
@@ -8157,3 +8157,45 @@
8157
  {"current_steps": 39795, "total_steps": 40000, "loss": 0.2722, "lr": 1.9632065447422463e-05, "epoch": 82.73388773388774, "percentage": 99.49, "elapsed_time": "6:44:37", "remaining_time": "0:02:05", "throughput": 1250.23, "total_tokens": 30352568}
8158
  {"current_steps": 39800, "total_steps": 40000, "loss": 0.2449, "lr": 1.8690637803880916e-05, "epoch": 82.74428274428274, "percentage": 99.5, "elapsed_time": "6:44:40", "remaining_time": "0:02:02", "throughput": 1250.25, "total_tokens": 30356408}
8159
  {"current_steps": 39800, "total_steps": 40000, "eval_loss": 0.2485324889421463, "epoch": 82.74428274428274, "percentage": 99.5, "elapsed_time": "6:44:53", "remaining_time": "0:02:02", "throughput": 1249.57, "total_tokens": 30356408}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8157
  {"current_steps": 39795, "total_steps": 40000, "loss": 0.2722, "lr": 1.9632065447422463e-05, "epoch": 82.73388773388774, "percentage": 99.49, "elapsed_time": "6:44:37", "remaining_time": "0:02:05", "throughput": 1250.23, "total_tokens": 30352568}
8158
  {"current_steps": 39800, "total_steps": 40000, "loss": 0.2449, "lr": 1.8690637803880916e-05, "epoch": 82.74428274428274, "percentage": 99.5, "elapsed_time": "6:44:40", "remaining_time": "0:02:02", "throughput": 1250.25, "total_tokens": 30356408}
8159
  {"current_steps": 39800, "total_steps": 40000, "eval_loss": 0.2485324889421463, "epoch": 82.74428274428274, "percentage": 99.5, "elapsed_time": "6:44:53", "remaining_time": "0:02:02", "throughput": 1249.57, "total_tokens": 30356408}
8160
+ {"current_steps": 39805, "total_steps": 40000, "loss": 0.2541, "lr": 1.7772339163019123e-05, "epoch": 82.75467775467776, "percentage": 99.51, "elapsed_time": "6:44:57", "remaining_time": "0:01:59", "throughput": 1249.53, "total_tokens": 30360216}
8161
+ {"current_steps": 39810, "total_steps": 40000, "loss": 0.2616, "lr": 1.6877169666457138e-05, "epoch": 82.76507276507276, "percentage": 99.52, "elapsed_time": "6:44:59", "remaining_time": "0:01:55", "throughput": 1249.55, "total_tokens": 30363896}
8162
+ {"current_steps": 39815, "total_steps": 40000, "loss": 0.2592, "lr": 1.6005129452234532e-05, "epoch": 82.77546777546777, "percentage": 99.54, "elapsed_time": "6:45:02", "remaining_time": "0:01:52", "throughput": 1249.56, "total_tokens": 30367672}
8163
+ {"current_steps": 39820, "total_steps": 40000, "loss": 0.2667, "lr": 1.5156218654843733e-05, "epoch": 82.78586278586279, "percentage": 99.55, "elapsed_time": "6:45:05", "remaining_time": "0:01:49", "throughput": 1249.58, "total_tokens": 30371480}
8164
+ {"current_steps": 39825, "total_steps": 40000, "loss": 0.2775, "lr": 1.4330437405196683e-05, "epoch": 82.79625779625779, "percentage": 99.56, "elapsed_time": "6:45:07", "remaining_time": "0:01:46", "throughput": 1249.6, "total_tokens": 30375256}
8165
+ {"current_steps": 39830, "total_steps": 40000, "loss": 0.2972, "lr": 1.352778583062486e-05, "epoch": 82.8066528066528, "percentage": 99.58, "elapsed_time": "6:45:10", "remaining_time": "0:01:43", "throughput": 1249.62, "total_tokens": 30379160}
8166
+ {"current_steps": 39835, "total_steps": 40000, "loss": 0.2532, "lr": 1.2748264054929237e-05, "epoch": 82.81704781704782, "percentage": 99.59, "elapsed_time": "6:45:13", "remaining_time": "0:01:40", "throughput": 1249.65, "total_tokens": 30383064}
8167
+ {"current_steps": 39840, "total_steps": 40000, "loss": 0.2205, "lr": 1.1991872198297004e-05, "epoch": 82.82744282744282, "percentage": 99.6, "elapsed_time": "6:45:16", "remaining_time": "0:01:37", "throughput": 1249.66, "total_tokens": 30386872}
8168
+ {"current_steps": 39845, "total_steps": 40000, "loss": 0.2508, "lr": 1.1258610377384847e-05, "epoch": 82.83783783783784, "percentage": 99.61, "elapsed_time": "6:45:18", "remaining_time": "0:01:34", "throughput": 1249.69, "total_tokens": 30390808}
8169
+ {"current_steps": 39850, "total_steps": 40000, "loss": 0.2721, "lr": 1.0548478705268982e-05, "epoch": 82.84823284823285, "percentage": 99.62, "elapsed_time": "6:45:21", "remaining_time": "0:01:31", "throughput": 1249.7, "total_tokens": 30394488}
8170
+ {"current_steps": 39855, "total_steps": 40000, "loss": 0.2369, "lr": 9.86147729147846e-06, "epoch": 82.85862785862786, "percentage": 99.64, "elapsed_time": "6:45:24", "remaining_time": "0:01:28", "throughput": 1249.72, "total_tokens": 30398168}
8171
+ {"current_steps": 39860, "total_steps": 40000, "loss": 0.2426, "lr": 9.197606241928557e-06, "epoch": 82.86902286902287, "percentage": 99.65, "elapsed_time": "6:45:26", "remaining_time": "0:01:25", "throughput": 1249.74, "total_tokens": 30402072}
8172
+ {"current_steps": 39865, "total_steps": 40000, "loss": 0.2922, "lr": 8.556865659004042e-06, "epoch": 82.87941787941789, "percentage": 99.66, "elapsed_time": "6:45:29", "remaining_time": "0:01:22", "throughput": 1249.76, "total_tokens": 30405944}
8173
+ {"current_steps": 39870, "total_steps": 40000, "loss": 0.2534, "lr": 7.939255641525867e-06, "epoch": 82.88981288981289, "percentage": 99.67, "elapsed_time": "6:45:32", "remaining_time": "0:01:19", "throughput": 1249.77, "total_tokens": 30409592}
8174
+ {"current_steps": 39875, "total_steps": 40000, "loss": 0.2646, "lr": 7.344776284751164e-06, "epoch": 82.9002079002079, "percentage": 99.69, "elapsed_time": "6:45:34", "remaining_time": "0:01:16", "throughput": 1249.79, "total_tokens": 30413272}
8175
+ {"current_steps": 39880, "total_steps": 40000, "loss": 0.2522, "lr": 6.773427680323296e-06, "epoch": 82.9106029106029, "percentage": 99.7, "elapsed_time": "6:45:37", "remaining_time": "0:01:13", "throughput": 1249.81, "total_tokens": 30417016}
8176
+ {"current_steps": 39885, "total_steps": 40000, "loss": 0.2504, "lr": 6.225209916355112e-06, "epoch": 82.92099792099792, "percentage": 99.71, "elapsed_time": "6:45:40", "remaining_time": "0:01:10", "throughput": 1249.82, "total_tokens": 30420824}
8177
+ {"current_steps": 39890, "total_steps": 40000, "loss": 0.274, "lr": 5.7001230774123e-06, "epoch": 82.93139293139293, "percentage": 99.72, "elapsed_time": "6:45:42", "remaining_time": "0:01:07", "throughput": 1249.84, "total_tokens": 30424664}
8178
+ {"current_steps": 39895, "total_steps": 40000, "loss": 0.2699, "lr": 5.198167244446772e-06, "epoch": 82.94178794178794, "percentage": 99.74, "elapsed_time": "6:45:45", "remaining_time": "0:01:04", "throughput": 1249.87, "total_tokens": 30428632}
8179
+ {"current_steps": 39900, "total_steps": 40000, "loss": 0.2715, "lr": 4.71934249487993e-06, "epoch": 82.95218295218295, "percentage": 99.75, "elapsed_time": "6:45:48", "remaining_time": "0:01:01", "throughput": 1249.88, "total_tokens": 30432344}
8180
+ {"current_steps": 39905, "total_steps": 40000, "loss": 0.2503, "lr": 4.2636489025527075e-06, "epoch": 82.96257796257797, "percentage": 99.76, "elapsed_time": "6:45:50", "remaining_time": "0:00:57", "throughput": 1249.89, "total_tokens": 30435928}
8181
+ {"current_steps": 39910, "total_steps": 40000, "loss": 0.27, "lr": 3.831086537742223e-06, "epoch": 82.97297297297297, "percentage": 99.78, "elapsed_time": "6:45:53", "remaining_time": "0:00:54", "throughput": 1249.92, "total_tokens": 30439832}
8182
+ {"current_steps": 39915, "total_steps": 40000, "loss": 0.2518, "lr": 3.4216554671451236e-06, "epoch": 82.98336798336798, "percentage": 99.79, "elapsed_time": "6:45:56", "remaining_time": "0:00:51", "throughput": 1249.94, "total_tokens": 30443736}
8183
+ {"current_steps": 39920, "total_steps": 40000, "loss": 0.2552, "lr": 3.035355753894242e-06, "epoch": 82.993762993763, "percentage": 99.8, "elapsed_time": "6:45:58", "remaining_time": "0:00:48", "throughput": 1249.96, "total_tokens": 30447576}
8184
+ {"current_steps": 39925, "total_steps": 40000, "loss": 0.2584, "lr": 2.6721874575752477e-06, "epoch": 83.004158004158, "percentage": 99.81, "elapsed_time": "6:46:01", "remaining_time": "0:00:45", "throughput": 1249.96, "total_tokens": 30451280}
8185
+ {"current_steps": 39930, "total_steps": 40000, "loss": 0.2492, "lr": 2.3321506341933418e-06, "epoch": 83.01455301455302, "percentage": 99.83, "elapsed_time": "6:46:04", "remaining_time": "0:00:42", "throughput": 1249.97, "total_tokens": 30455088}
8186
+ {"current_steps": 39935, "total_steps": 40000, "loss": 0.2897, "lr": 2.0152453361732546e-06, "epoch": 83.02494802494803, "percentage": 99.84, "elapsed_time": "6:46:07", "remaining_time": "0:00:39", "throughput": 1250.0, "total_tokens": 30458960}
8187
+ {"current_steps": 39940, "total_steps": 40000, "loss": 0.2632, "lr": 1.7214716123925554e-06, "epoch": 83.03534303534303, "percentage": 99.85, "elapsed_time": "6:46:09", "remaining_time": "0:00:36", "throughput": 1250.01, "total_tokens": 30462640}
8188
+ {"current_steps": 39945, "total_steps": 40000, "loss": 0.2362, "lr": 1.4508295081649968e-06, "epoch": 83.04573804573805, "percentage": 99.86, "elapsed_time": "6:46:12", "remaining_time": "0:00:33", "throughput": 1250.03, "total_tokens": 30466512}
8189
+ {"current_steps": 39950, "total_steps": 40000, "loss": 0.2562, "lr": 1.2033190652238623e-06, "epoch": 83.05613305613305, "percentage": 99.88, "elapsed_time": "6:46:15", "remaining_time": "0:00:30", "throughput": 1250.05, "total_tokens": 30470224}
8190
+ {"current_steps": 39955, "total_steps": 40000, "loss": 0.2591, "lr": 9.78940321721966e-07, "epoch": 83.06652806652806, "percentage": 99.89, "elapsed_time": "6:46:17", "remaining_time": "0:00:27", "throughput": 1250.06, "total_tokens": 30473872}
8191
+ {"current_steps": 39960, "total_steps": 40000, "loss": 0.2608, "lr": 7.776933122816132e-07, "epoch": 83.07692307692308, "percentage": 99.9, "elapsed_time": "6:46:20", "remaining_time": "0:00:24", "throughput": 1250.08, "total_tokens": 30477680}
8192
+ {"current_steps": 39965, "total_steps": 40000, "loss": 0.2651, "lr": 5.99578067927986e-07, "epoch": 83.08731808731808, "percentage": 99.91, "elapsed_time": "6:46:23", "remaining_time": "0:00:21", "throughput": 1250.1, "total_tokens": 30481456}
8193
+ {"current_steps": 39970, "total_steps": 40000, "loss": 0.2543, "lr": 4.445946161224512e-07, "epoch": 83.0977130977131, "percentage": 99.92, "elapsed_time": "6:46:25", "remaining_time": "0:00:18", "throughput": 1250.12, "total_tokens": 30485360}
8194
+ {"current_steps": 39975, "total_steps": 40000, "loss": 0.2502, "lr": 3.127429807792126e-07, "epoch": 83.10810810810811, "percentage": 99.94, "elapsed_time": "6:46:28", "remaining_time": "0:00:15", "throughput": 1250.14, "total_tokens": 30489200}
8195
+ {"current_steps": 39980, "total_steps": 40000, "loss": 0.2554, "lr": 2.040231822320049e-07, "epoch": 83.11850311850311, "percentage": 99.95, "elapsed_time": "6:46:31", "remaining_time": "0:00:12", "throughput": 1250.16, "total_tokens": 30493136}
8196
+ {"current_steps": 39985, "total_steps": 40000, "loss": 0.2593, "lr": 1.1843523723409354e-07, "epoch": 83.12889812889813, "percentage": 99.96, "elapsed_time": "6:46:34", "remaining_time": "0:00:09", "throughput": 1250.18, "total_tokens": 30496880}
8197
+ {"current_steps": 39990, "total_steps": 40000, "loss": 0.2623, "lr": 5.597915897492811e-08, "epoch": 83.13929313929314, "percentage": 99.98, "elapsed_time": "6:46:36", "remaining_time": "0:00:06", "throughput": 1250.2, "total_tokens": 30500656}
8198
+ {"current_steps": 39995, "total_steps": 40000, "loss": 0.2839, "lr": 1.6654957113448885e-08, "epoch": 83.14968814968815, "percentage": 99.99, "elapsed_time": "6:46:39", "remaining_time": "0:00:03", "throughput": 1250.22, "total_tokens": 30504496}
8199
+ {"current_steps": 40000, "total_steps": 40000, "loss": 0.2824, "lr": 4.626377114735902e-10, "epoch": 83.16008316008316, "percentage": 100.0, "elapsed_time": "6:46:42", "remaining_time": "0:00:00", "throughput": 1250.23, "total_tokens": 30508240}
8200
+ {"current_steps": 40000, "total_steps": 40000, "eval_loss": 0.2492801398038864, "epoch": 83.16008316008316, "percentage": 100.0, "elapsed_time": "6:46:55", "remaining_time": "0:00:00", "throughput": 1249.55, "total_tokens": 30508240}
8201
+ {"current_steps": 40000, "total_steps": 40000, "epoch": 83.16008316008316, "percentage": 100.0, "elapsed_time": "6:46:56", "remaining_time": "0:00:00", "throughput": 1249.49, "total_tokens": 30508240}