Training in progress, step 3000
Browse files
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4976698672
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e3e23b8244888af0e7b13c4d2d1d5867473f352635cb055dd529e90a721f5d1
|
| 3 |
size 4976698672
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4999802720
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea2b464260613d87fcab353c6413bb836afec9b6a9cb4de742f9075981c7c715
|
| 3 |
size 4999802720
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4915916176
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c51574b302d2d24ce33e7e76bf6d0e74592775fc8d6e6f9aa4d7c7cedcbd3303
|
| 3 |
size 4915916176
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1168138808
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f1594b536c444e5181286d1af6b74da9b788c07989a708b7d02ed2c1f6f1f27
|
| 3 |
size 1168138808
|
trainer_log.jsonl
CHANGED
|
@@ -298,3 +298,63 @@
|
|
| 298 |
{"current_steps": 2490, "total_steps": 3751, "loss": 0.2306, "lr": 3.0667134524766173e-06, "epoch": 0.6638119199560124, "percentage": 66.38, "elapsed_time": "13:16:39", "remaining_time": "6:43:26"}
|
| 299 |
{"current_steps": 2500, "total_steps": 3751, "loss": 0.2304, "lr": 3.023875598978419e-06, "epoch": 0.666477831281137, "percentage": 66.65, "elapsed_time": "13:18:59", "remaining_time": "6:39:48"}
|
| 300 |
{"current_steps": 2500, "total_steps": 3751, "eval_loss": 0.22111880779266357, "epoch": 0.666477831281137, "percentage": 66.65, "elapsed_time": "13:23:22", "remaining_time": "6:42:00"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
{"current_steps": 2490, "total_steps": 3751, "loss": 0.2306, "lr": 3.0667134524766173e-06, "epoch": 0.6638119199560124, "percentage": 66.38, "elapsed_time": "13:16:39", "remaining_time": "6:43:26"}
|
| 299 |
{"current_steps": 2500, "total_steps": 3751, "loss": 0.2304, "lr": 3.023875598978419e-06, "epoch": 0.666477831281137, "percentage": 66.65, "elapsed_time": "13:18:59", "remaining_time": "6:39:48"}
|
| 300 |
{"current_steps": 2500, "total_steps": 3751, "eval_loss": 0.22111880779266357, "epoch": 0.666477831281137, "percentage": 66.65, "elapsed_time": "13:23:22", "remaining_time": "6:42:00"}
|
| 301 |
+
{"current_steps": 2510, "total_steps": 3751, "loss": 0.2308, "lr": 2.981208968966721e-06, "epoch": 0.6691437426062615, "percentage": 66.92, "elapsed_time": "13:27:00", "remaining_time": "6:38:59"}
|
| 302 |
+
{"current_steps": 2520, "total_steps": 3751, "loss": 0.2279, "lr": 2.9387172593389148e-06, "epoch": 0.6718096539313861, "percentage": 67.18, "elapsed_time": "13:29:18", "remaining_time": "6:35:20"}
|
| 303 |
+
{"current_steps": 2530, "total_steps": 3751, "loss": 0.2269, "lr": 2.896404151836227e-06, "epoch": 0.6744755652565106, "percentage": 67.45, "elapsed_time": "13:31:39", "remaining_time": "6:31:43"}
|
| 304 |
+
{"current_steps": 2540, "total_steps": 3751, "loss": 0.2302, "lr": 2.8542733127247024e-06, "epoch": 0.6771414765816351, "percentage": 67.72, "elapsed_time": "13:33:58", "remaining_time": "6:28:04"}
|
| 305 |
+
{"current_steps": 2550, "total_steps": 3751, "loss": 0.2139, "lr": 2.812328392477536e-06, "epoch": 0.6798073879067598, "percentage": 67.98, "elapsed_time": "13:36:16", "remaining_time": "6:24:27"}
|
| 306 |
+
{"current_steps": 2550, "total_steps": 3751, "eval_loss": 0.2199471890926361, "epoch": 0.6798073879067598, "percentage": 67.98, "elapsed_time": "13:40:39", "remaining_time": "6:26:31"}
|
| 307 |
+
{"current_steps": 2560, "total_steps": 3751, "loss": 0.2237, "lr": 2.7705730254587802e-06, "epoch": 0.6824732992318843, "percentage": 68.25, "elapsed_time": "13:43:00", "remaining_time": "6:22:53"}
|
| 308 |
+
{"current_steps": 2570, "total_steps": 3751, "loss": 0.2166, "lr": 2.729010829608442e-06, "epoch": 0.6851392105570089, "percentage": 68.52, "elapsed_time": "13:45:17", "remaining_time": "6:19:14"}
|
| 309 |
+
{"current_steps": 2580, "total_steps": 3751, "loss": 0.2291, "lr": 2.6876454061289892e-06, "epoch": 0.6878051218821334, "percentage": 68.78, "elapsed_time": "13:47:36", "remaining_time": "6:15:37"}
|
| 310 |
+
{"current_steps": 2590, "total_steps": 3751, "loss": 0.2226, "lr": 2.646480339173337e-06, "epoch": 0.6904710332072579, "percentage": 69.05, "elapsed_time": "13:49:59", "remaining_time": "6:12:03"}
|
| 311 |
+
{"current_steps": 2600, "total_steps": 3751, "loss": 0.2186, "lr": 2.6055191955342886e-06, "epoch": 0.6931369445323825, "percentage": 69.31, "elapsed_time": "13:52:24", "remaining_time": "6:08:30"}
|
| 312 |
+
{"current_steps": 2600, "total_steps": 3751, "eval_loss": 0.219247967004776, "epoch": 0.6931369445323825, "percentage": 69.31, "elapsed_time": "13:56:47", "remaining_time": "6:10:26"}
|
| 313 |
+
{"current_steps": 2610, "total_steps": 3751, "loss": 0.2249, "lr": 2.564765524335478e-06, "epoch": 0.695802855857507, "percentage": 69.58, "elapsed_time": "13:59:05", "remaining_time": "6:06:49"}
|
| 314 |
+
{"current_steps": 2620, "total_steps": 3751, "loss": 0.2244, "lr": 2.524222856723869e-06, "epoch": 0.6984687671826316, "percentage": 69.85, "elapsed_time": "14:01:27", "remaining_time": "6:03:14"}
|
| 315 |
+
{"current_steps": 2630, "total_steps": 3751, "loss": 0.2168, "lr": 2.483894705563778e-06, "epoch": 0.7011346785077561, "percentage": 70.11, "elapsed_time": "14:03:45", "remaining_time": "5:59:38"}
|
| 316 |
+
{"current_steps": 2640, "total_steps": 3751, "loss": 0.2175, "lr": 2.4437845651325116e-06, "epoch": 0.7038005898328806, "percentage": 70.38, "elapsed_time": "14:06:05", "remaining_time": "5:56:03"}
|
| 317 |
+
{"current_steps": 2650, "total_steps": 3751, "loss": 0.2156, "lr": 2.403895910817593e-06, "epoch": 0.7064665011580052, "percentage": 70.65, "elapsed_time": "14:08:29", "remaining_time": "5:52:31"}
|
| 318 |
+
{"current_steps": 2650, "total_steps": 3751, "eval_loss": 0.21830175817012787, "epoch": 0.7064665011580052, "percentage": 70.65, "elapsed_time": "14:12:52", "remaining_time": "5:54:20"}
|
| 319 |
+
{"current_steps": 2660, "total_steps": 3751, "loss": 0.2111, "lr": 2.364232198815638e-06, "epoch": 0.7091324124831297, "percentage": 70.91, "elapsed_time": "14:15:09", "remaining_time": "5:50:44"}
|
| 320 |
+
{"current_steps": 2670, "total_steps": 3751, "loss": 0.2224, "lr": 2.3247968658328825e-06, "epoch": 0.7117983238082544, "percentage": 71.18, "elapsed_time": "14:17:28", "remaining_time": "5:47:09"}
|
| 321 |
+
{"current_steps": 2680, "total_steps": 3751, "loss": 0.213, "lr": 2.285593328787414e-06, "epoch": 0.7144642351333789, "percentage": 71.45, "elapsed_time": "14:19:47", "remaining_time": "5:43:35"}
|
| 322 |
+
{"current_steps": 2690, "total_steps": 3751, "loss": 0.2283, "lr": 2.246624984513099e-06, "epoch": 0.7171301464585034, "percentage": 71.71, "elapsed_time": "14:22:03", "remaining_time": "5:40:00"}
|
| 323 |
+
{"current_steps": 2700, "total_steps": 3751, "loss": 0.2187, "lr": 2.2078952094652705e-06, "epoch": 0.719796057783628, "percentage": 71.98, "elapsed_time": "14:24:19", "remaining_time": "5:36:26"}
|
| 324 |
+
{"current_steps": 2700, "total_steps": 3751, "eval_loss": 0.21585261821746826, "epoch": 0.719796057783628, "percentage": 71.98, "elapsed_time": "14:28:42", "remaining_time": "5:38:09"}
|
| 325 |
+
{"current_steps": 2710, "total_steps": 3751, "loss": 0.2221, "lr": 2.1694073594281663e-06, "epoch": 0.7224619691087525, "percentage": 72.25, "elapsed_time": "14:31:02", "remaining_time": "5:34:35"}
|
| 326 |
+
{"current_steps": 2720, "total_steps": 3751, "loss": 0.2192, "lr": 2.131164769224164e-06, "epoch": 0.7251278804338771, "percentage": 72.51, "elapsed_time": "14:33:19", "remaining_time": "5:31:01"}
|
| 327 |
+
{"current_steps": 2730, "total_steps": 3751, "loss": 0.2249, "lr": 2.0931707524248268e-06, "epoch": 0.7277937917590016, "percentage": 72.78, "elapsed_time": "14:35:36", "remaining_time": "5:27:28"}
|
| 328 |
+
{"current_steps": 2740, "total_steps": 3751, "loss": 0.2112, "lr": 2.0554286010638076e-06, "epoch": 0.7304597030841261, "percentage": 73.05, "elapsed_time": "14:37:56", "remaining_time": "5:23:56"}
|
| 329 |
+
{"current_steps": 2750, "total_steps": 3751, "loss": 0.222, "lr": 2.017941585351591e-06, "epoch": 0.7331256144092507, "percentage": 73.31, "elapsed_time": "14:40:14", "remaining_time": "5:20:24"}
|
| 330 |
+
{"current_steps": 2750, "total_steps": 3751, "eval_loss": 0.21738137304782867, "epoch": 0.7331256144092507, "percentage": 73.31, "elapsed_time": "14:44:37", "remaining_time": "5:22:00"}
|
| 331 |
+
{"current_steps": 2760, "total_steps": 3751, "loss": 0.2091, "lr": 1.98071295339216e-06, "epoch": 0.7357915257343752, "percentage": 73.58, "elapsed_time": "14:47:00", "remaining_time": "5:18:29"}
|
| 332 |
+
{"current_steps": 2770, "total_steps": 3751, "loss": 0.2133, "lr": 1.9437459309015426e-06, "epoch": 0.7384574370594998, "percentage": 73.85, "elapsed_time": "14:49:22", "remaining_time": "5:14:58"}
|
| 333 |
+
{"current_steps": 2780, "total_steps": 3751, "loss": 0.2102, "lr": 1.9070437209283304e-06, "epoch": 0.7411233483846243, "percentage": 74.11, "elapsed_time": "14:51:43", "remaining_time": "5:11:27"}
|
| 334 |
+
{"current_steps": 2790, "total_steps": 3751, "loss": 0.2138, "lr": 1.8706095035761418e-06, "epoch": 0.7437892597097489, "percentage": 74.38, "elapsed_time": "14:54:02", "remaining_time": "5:07:56"}
|
| 335 |
+
{"current_steps": 2800, "total_steps": 3751, "loss": 0.2162, "lr": 1.8344464357280722e-06, "epoch": 0.7464551710348735, "percentage": 74.65, "elapsed_time": "14:56:27", "remaining_time": "5:04:28"}
|
| 336 |
+
{"current_steps": 2800, "total_steps": 3751, "eval_loss": 0.21530871093273163, "epoch": 0.7464551710348735, "percentage": 74.65, "elapsed_time": "15:00:50", "remaining_time": "5:05:57"}
|
| 337 |
+
{"current_steps": 2810, "total_steps": 3751, "loss": 0.2245, "lr": 1.7985576507731744e-06, "epoch": 0.749121082359998, "percentage": 74.91, "elapsed_time": "15:03:08", "remaining_time": "5:02:26"}
|
| 338 |
+
{"current_steps": 2820, "total_steps": 3751, "loss": 0.2196, "lr": 1.762946258334951e-06, "epoch": 0.7517869936851226, "percentage": 75.18, "elapsed_time": "15:05:25", "remaining_time": "4:58:55"}
|
| 339 |
+
{"current_steps": 2830, "total_steps": 3751, "loss": 0.2119, "lr": 1.727615344001926e-06, "epoch": 0.7544529050102471, "percentage": 75.45, "elapsed_time": "15:07:44", "remaining_time": "4:55:25"}
|
| 340 |
+
{"current_steps": 2840, "total_steps": 3751, "loss": 0.2106, "lr": 1.6925679690602876e-06, "epoch": 0.7571188163353716, "percentage": 75.71, "elapsed_time": "15:10:02", "remaining_time": "4:51:55"}
|
| 341 |
+
{"current_steps": 2850, "total_steps": 3751, "loss": 0.2253, "lr": 1.6578071702286396e-06, "epoch": 0.7597847276604962, "percentage": 75.98, "elapsed_time": "15:12:25", "remaining_time": "4:48:27"}
|
| 342 |
+
{"current_steps": 2850, "total_steps": 3751, "eval_loss": 0.21321707963943481, "epoch": 0.7597847276604962, "percentage": 75.98, "elapsed_time": "15:16:49", "remaining_time": "4:49:50"}
|
| 343 |
+
{"current_steps": 2860, "total_steps": 3751, "loss": 0.22, "lr": 1.6233359593948777e-06, "epoch": 0.7624506389856207, "percentage": 76.25, "elapsed_time": "15:19:06", "remaining_time": "4:46:20"}
|
| 344 |
+
{"current_steps": 2870, "total_steps": 3751, "loss": 0.2133, "lr": 1.5891573233552315e-06, "epoch": 0.7651165503107453, "percentage": 76.51, "elapsed_time": "15:21:22", "remaining_time": "4:42:50"}
|
| 345 |
+
{"current_steps": 2880, "total_steps": 3751, "loss": 0.2161, "lr": 1.5552742235554551e-06, "epoch": 0.7677824616358698, "percentage": 76.78, "elapsed_time": "15:23:39", "remaining_time": "4:39:20"}
|
| 346 |
+
{"current_steps": 2890, "total_steps": 3751, "loss": 0.2129, "lr": 1.521689595834246e-06, "epoch": 0.7704483729609943, "percentage": 77.05, "elapsed_time": "15:25:57", "remaining_time": "4:35:51"}
|
| 347 |
+
{"current_steps": 2900, "total_steps": 3751, "loss": 0.2066, "lr": 1.4884063501688539e-06, "epoch": 0.773114284286119, "percentage": 77.31, "elapsed_time": "15:28:19", "remaining_time": "4:32:24"}
|
| 348 |
+
{"current_steps": 2900, "total_steps": 3751, "eval_loss": 0.21342259645462036, "epoch": 0.773114284286119, "percentage": 77.31, "elapsed_time": "15:32:42", "remaining_time": "4:33:42"}
|
| 349 |
+
{"current_steps": 2910, "total_steps": 3751, "loss": 0.2244, "lr": 1.4554273704229494e-06, "epoch": 0.7757801956112435, "percentage": 77.58, "elapsed_time": "15:35:02", "remaining_time": "4:30:13"}
|
| 350 |
+
{"current_steps": 2920, "total_steps": 3751, "loss": 0.2156, "lr": 1.4227555140967402e-06, "epoch": 0.7784461069363681, "percentage": 77.85, "elapsed_time": "15:37:17", "remaining_time": "4:26:44"}
|
| 351 |
+
{"current_steps": 2930, "total_steps": 3751, "loss": 0.2192, "lr": 1.3903936120793926e-06, "epoch": 0.7811120182614926, "percentage": 78.11, "elapsed_time": "15:39:38", "remaining_time": "4:23:17"}
|
| 352 |
+
{"current_steps": 2940, "total_steps": 3751, "loss": 0.2156, "lr": 1.3583444684037312e-06, "epoch": 0.7837779295866171, "percentage": 78.38, "elapsed_time": "15:41:59", "remaining_time": "4:19:50"}
|
| 353 |
+
{"current_steps": 2950, "total_steps": 3751, "loss": 0.2113, "lr": 1.3266108600032928e-06, "epoch": 0.7864438409117417, "percentage": 78.65, "elapsed_time": "15:44:17", "remaining_time": "4:16:23"}
|
| 354 |
+
{"current_steps": 2950, "total_steps": 3751, "eval_loss": 0.2107125222682953, "epoch": 0.7864438409117417, "percentage": 78.65, "elapsed_time": "15:48:40", "remaining_time": "4:17:35"}
|
| 355 |
+
{"current_steps": 2960, "total_steps": 3751, "loss": 0.2143, "lr": 1.2951955364717116e-06, "epoch": 0.7891097522368662, "percentage": 78.91, "elapsed_time": "15:50:59", "remaining_time": "4:14:08"}
|
| 356 |
+
{"current_steps": 2970, "total_steps": 3751, "loss": 0.2065, "lr": 1.2641012198244718e-06, "epoch": 0.7917756635619908, "percentage": 79.18, "elapsed_time": "15:53:18", "remaining_time": "4:10:41"}
|
| 357 |
+
{"current_steps": 2980, "total_steps": 3751, "loss": 0.2201, "lr": 1.2333306042630672e-06, "epoch": 0.7944415748871153, "percentage": 79.45, "elapsed_time": "15:55:38", "remaining_time": "4:07:14"}
|
| 358 |
+
{"current_steps": 2990, "total_steps": 3751, "loss": 0.2097, "lr": 1.202886355941546e-06, "epoch": 0.7971074862122398, "percentage": 79.71, "elapsed_time": "15:57:58", "remaining_time": "4:03:49"}
|
| 359 |
+
{"current_steps": 3000, "total_steps": 3751, "loss": 0.2107, "lr": 1.1727711127355118e-06, "epoch": 0.7997733975373644, "percentage": 79.98, "elapsed_time": "16:00:17", "remaining_time": "4:00:23"}
|
| 360 |
+
{"current_steps": 3000, "total_steps": 3751, "eval_loss": 0.20849083364009857, "epoch": 0.7997733975373644, "percentage": 79.98, "elapsed_time": "16:04:39", "remaining_time": "4:01:29"}
|