Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cf4065b86318076a5a3bf07fbb9394e80c4967d0cc2d0b00d9081e01d6255844
 size 289452128

 version https://git-lfs.github.com/spec/v1
+oid sha256:13575dc492c38144c843898acb618c0d1c5c3cce098049617b226fa6e5a9bf1f
 size 289452128

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:deed5070fbdcbadb6bebbac4b41994775d56b22dc55821d48674673ded86dea1
 size 147360212

 version https://git-lfs.github.com/spec/v1
+oid sha256:35d9c12828b58b5d7a740be66bce3b8fc4bebe3371b71434c5376512444b12f8
 size 147360212

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f19ecba1a9b42fe6be94318bc786fb251ad59183916dae322684bac89df8ad1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d248970e0bc892afe0a4bc9030ea20f7786073778fbe970ebb95fa58d9dc75c8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9096f15f02bac6b0fc27aa7aa4986f85d87d53fca310a75657e0015357af5c5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:705cabf5cbc3a6ab0feb67c77b9b453d59efcc939ce90d310af96e621810f990
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.0824118852615356,
-  "best_model_checkpoint": "miner_id_24/checkpoint-350",
-  "epoch": 0.023463949317869475,
   "eval_steps": 50,
-  "global_step": 350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2521,6 +2521,364 @@
       "eval_samples_per_second": 24.289,
       "eval_steps_per_second": 6.073,
       "step": 350
     }
   ],
   "logging_steps": 1,
@@ -2544,12 +2902,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.158164036714496e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.0815964937210083,
+  "best_model_checkpoint": "miner_id_24/checkpoint-400",
+  "epoch": 0.026815942077565113,
   "eval_steps": 50,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 24.289,
       "eval_steps_per_second": 6.073,
       "step": 350
+    },
+    {
+      "epoch": 0.023530989173063385,
+      "grad_norm": 0.288426011800766,
+      "learning_rate": 3.844650207332562e-06,
+      "loss": 1.1302,
+      "step": 351
+    },
+    {
+      "epoch": 0.0235980290282573,
+      "grad_norm": 0.28361886739730835,
+      "learning_rate": 3.691267552111183e-06,
+      "loss": 1.0888,
+      "step": 352
+    },
+    {
+      "epoch": 0.023665068883451212,
+      "grad_norm": 0.28806284070014954,
+      "learning_rate": 3.54088980417534e-06,
+      "loss": 1.0634,
+      "step": 353
+    },
+    {
+      "epoch": 0.023732108738645126,
+      "grad_norm": 0.2982894480228424,
+      "learning_rate": 3.393526721321616e-06,
+      "loss": 1.0926,
+      "step": 354
+    },
+    {
+      "epoch": 0.023799148593839036,
+      "grad_norm": 0.29122960567474365,
+      "learning_rate": 3.249187865729264e-06,
+      "loss": 1.0786,
+      "step": 355
+    },
+    {
+      "epoch": 0.02386618844903295,
+      "grad_norm": 0.30477002263069153,
+      "learning_rate": 3.1078826033397843e-06,
+      "loss": 1.0664,
+      "step": 356
+    },
+    {
+      "epoch": 0.023933228304226863,
+      "grad_norm": 0.2922193706035614,
+      "learning_rate": 2.9696201032491434e-06,
+      "loss": 1.1082,
+      "step": 357
+    },
+    {
+      "epoch": 0.024000268159420777,
+      "grad_norm": 0.2954815626144409,
+      "learning_rate": 2.8344093371128424e-06,
+      "loss": 1.1143,
+      "step": 358
+    },
+    {
+      "epoch": 0.024067308014614687,
+      "grad_norm": 0.28901568055152893,
+      "learning_rate": 2.70225907856374e-06,
+      "loss": 1.0572,
+      "step": 359
+    },
+    {
+      "epoch": 0.0241343478698086,
+      "grad_norm": 0.2976773679256439,
+      "learning_rate": 2.573177902642726e-06,
+      "loss": 1.0953,
+      "step": 360
+    },
+    {
+      "epoch": 0.024201387725002514,
+      "grad_norm": 0.298807829618454,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.059,
+      "step": 361
+    },
+    {
+      "epoch": 0.024268427580196428,
+      "grad_norm": 0.3115805983543396,
+      "learning_rate": 2.324256102563188e-06,
+      "loss": 1.0684,
+      "step": 362
+    },
+    {
+      "epoch": 0.024335467435390338,
+      "grad_norm": 0.3207703232765198,
+      "learning_rate": 2.204431630583548e-06,
+      "loss": 1.0746,
+      "step": 363
+    },
+    {
+      "epoch": 0.02440250729058425,
+      "grad_norm": 0.2976981997489929,
+      "learning_rate": 2.087708544541689e-06,
+      "loss": 1.0713,
+      "step": 364
+    },
+    {
+      "epoch": 0.024469547145778165,
+      "grad_norm": 0.3164154291152954,
+      "learning_rate": 1.974094418431388e-06,
+      "loss": 1.1538,
+      "step": 365
+    },
+    {
+      "epoch": 0.02453658700097208,
+      "grad_norm": 0.3003906011581421,
+      "learning_rate": 1.8635966245104664e-06,
+      "loss": 1.0924,
+      "step": 366
+    },
+    {
+      "epoch": 0.024603626856165992,
+      "grad_norm": 0.30181068181991577,
+      "learning_rate": 1.7562223328224325e-06,
+      "loss": 1.0568,
+      "step": 367
+    },
+    {
+      "epoch": 0.024670666711359902,
+      "grad_norm": 0.3205750584602356,
+      "learning_rate": 1.6519785107311891e-06,
+      "loss": 1.0588,
+      "step": 368
+    },
+    {
+      "epoch": 0.024737706566553816,
+      "grad_norm": 0.3129303753376007,
+      "learning_rate": 1.5508719224689717e-06,
+      "loss": 1.0774,
+      "step": 369
+    },
+    {
+      "epoch": 0.02480474642174773,
+      "grad_norm": 0.31309744715690613,
+      "learning_rate": 1.4529091286973995e-06,
+      "loss": 1.1387,
+      "step": 370
+    },
+    {
+      "epoch": 0.024871786276941643,
+      "grad_norm": 0.31708234548568726,
+      "learning_rate": 1.358096486081778e-06,
+      "loss": 1.0754,
+      "step": 371
+    },
+    {
+      "epoch": 0.024938826132135553,
+      "grad_norm": 0.31589797139167786,
+      "learning_rate": 1.2664401468786114e-06,
+      "loss": 1.1179,
+      "step": 372
+    },
+    {
+      "epoch": 0.025005865987329467,
+      "grad_norm": 0.326985627412796,
+      "learning_rate": 1.1779460585363944e-06,
+      "loss": 1.0237,
+      "step": 373
+    },
+    {
+      "epoch": 0.02507290584252338,
+      "grad_norm": 0.337021142244339,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 1.0416,
+      "step": 374
+    },
+    {
+      "epoch": 0.025139945697717294,
+      "grad_norm": 0.3299597501754761,
+      "learning_rate": 1.0104673978866164e-06,
+      "loss": 1.0428,
+      "step": 375
+    },
+    {
+      "epoch": 0.025206985552911204,
+      "grad_norm": 0.31924912333488464,
+      "learning_rate": 9.314936930293283e-07,
+      "loss": 0.9773,
+      "step": 376
+    },
+    {
+      "epoch": 0.025274025408105118,
+      "grad_norm": 0.33366861939430237,
+      "learning_rate": 8.557039732283944e-07,
+      "loss": 1.1019,
+      "step": 377
+    },
+    {
+      "epoch": 0.02534106526329903,
+      "grad_norm": 0.3544885516166687,
+      "learning_rate": 7.83103156370113e-07,
+      "loss": 1.0851,
+      "step": 378
+    },
+    {
+      "epoch": 0.025408105118492945,
+      "grad_norm": 0.3410674035549164,
+      "learning_rate": 7.136959534174592e-07,
+      "loss": 1.1716,
+      "step": 379
+    },
+    {
+      "epoch": 0.02547514497368686,
+      "grad_norm": 0.3247200548648834,
+      "learning_rate": 6.474868681043578e-07,
+      "loss": 1.0356,
+      "step": 380
+    },
+    {
+      "epoch": 0.02554218482888077,
+      "grad_norm": 0.3541271686553955,
+      "learning_rate": 5.844801966434832e-07,
+      "loss": 1.1073,
+      "step": 381
+    },
+    {
+      "epoch": 0.025609224684074682,
+      "grad_norm": 0.36653196811676025,
+      "learning_rate": 5.246800274474439e-07,
+      "loss": 1.0629,
+      "step": 382
+    },
+    {
+      "epoch": 0.025676264539268596,
+      "grad_norm": 0.39403557777404785,
+      "learning_rate": 4.680902408635335e-07,
+      "loss": 1.1672,
+      "step": 383
+    },
+    {
+      "epoch": 0.02574330439446251,
+      "grad_norm": 0.3814004957675934,
+      "learning_rate": 4.1471450892189846e-07,
+      "loss": 1.1773,
+      "step": 384
+    },
+    {
+      "epoch": 0.02581034424965642,
+      "grad_norm": 0.36794358491897583,
+      "learning_rate": 3.6455629509730136e-07,
+      "loss": 1.0303,
+      "step": 385
+    },
+    {
+      "epoch": 0.025877384104850333,
+      "grad_norm": 0.39013010263442993,
+      "learning_rate": 3.1761885408435054e-07,
+      "loss": 1.0535,
+      "step": 386
+    },
+    {
+      "epoch": 0.025944423960044247,
+      "grad_norm": 0.3895156681537628,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 1.1058,
+      "step": 387
+    },
+    {
+      "epoch": 0.02601146381523816,
+      "grad_norm": 0.4038650393486023,
+      "learning_rate": 2.334182641175686e-07,
+      "loss": 1.09,
+      "step": 388
+    },
+    {
+      "epoch": 0.02607850367043207,
+      "grad_norm": 0.4419088363647461,
+      "learning_rate": 1.9616057881935436e-07,
+      "loss": 1.0795,
+      "step": 389
+    },
+    {
+      "epoch": 0.026145543525625984,
+      "grad_norm": 0.4492166340351105,
+      "learning_rate": 1.6213459328950352e-07,
+      "loss": 1.0993,
+      "step": 390
+    },
+    {
+      "epoch": 0.026212583380819898,
+      "grad_norm": 0.44956785440444946,
+      "learning_rate": 1.3134251542544774e-07,
+      "loss": 0.9423,
+      "step": 391
+    },
+    {
+      "epoch": 0.02627962323601381,
+      "grad_norm": 0.47182002663612366,
+      "learning_rate": 1.0378634328099269e-07,
+      "loss": 1.0153,
+      "step": 392
+    },
+    {
+      "epoch": 0.02634666309120772,
+      "grad_norm": 0.4756089746952057,
+      "learning_rate": 7.946786493666647e-08,
+      "loss": 1.0133,
+      "step": 393
+    },
+    {
+      "epoch": 0.026413702946401635,
+      "grad_norm": 0.4827803671360016,
+      "learning_rate": 5.838865838366792e-08,
+      "loss": 0.9773,
+      "step": 394
+    },
+    {
+      "epoch": 0.02648074280159555,
+      "grad_norm": 0.5431700944900513,
+      "learning_rate": 4.055009142152067e-08,
+      "loss": 0.9337,
+      "step": 395
+    },
+    {
+      "epoch": 0.026547782656789462,
+      "grad_norm": 0.5491564273834229,
+      "learning_rate": 2.595332156925534e-08,
+      "loss": 1.0198,
+      "step": 396
+    },
+    {
+      "epoch": 0.026614822511983376,
+      "grad_norm": 0.618582010269165,
+      "learning_rate": 1.4599295990352924e-08,
+      "loss": 1.0385,
+      "step": 397
+    },
+    {
+      "epoch": 0.026681862367177286,
+      "grad_norm": 0.6139496564865112,
+      "learning_rate": 6.488751431266149e-09,
+      "loss": 1.0359,
+      "step": 398
+    },
+    {
+      "epoch": 0.0267489022223712,
+      "grad_norm": 0.8561084270477295,
+      "learning_rate": 1.622214173602199e-09,
+      "loss": 1.2018,
+      "step": 399
+    },
+    {
+      "epoch": 0.026815942077565113,
+      "grad_norm": 0.8448209762573242,
+      "learning_rate": 0.0,
+      "loss": 1.2637,
+      "step": 400
+    },
+    {
+      "epoch": 0.026815942077565113,
+      "eval_loss": 1.0815964937210083,
+      "eval_runtime": 1034.6144,
+      "eval_samples_per_second": 24.282,
+      "eval_steps_per_second": 6.071,
+      "step": 400
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.323616041959424e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null