Training in progress, step 1500, checkpoint

Browse files

Files changed (2) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/trainer_state.json +529 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b6b7f89ce2dbc9d7698526683ddd6fa0a782bf452e52e57948bdc0f1bfe7dd6
 size 3443585096

 version https://git-lfs.github.com/spec/v1
+oid sha256:71868c70f9ff065ed3d46aa5c75810e7cd3261588f6b4897569bc4ada4740a06
 size 3443585096

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.29862792574657,
   "eval_steps": 500,
-  "global_step": 1425,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -9983,6 +9983,531 @@
       "learning_rate": 0.0002,
       "loss": 1.4128,
       "step": 1425
     }
   ],
   "logging_steps": 1,
@@ -9997,12 +10522,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9.368691379141607e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.419693301049233,
   "eval_steps": 500,
+  "global_step": 1500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0002,
       "loss": 1.4128,
       "step": 1425
+    },
+    {
+      "epoch": 2.3002421307506054,
+      "grad_norm": 0.5141810178756714,
+      "learning_rate": 0.0002,
+      "loss": 1.3929,
+      "step": 1426
+    },
+    {
+      "epoch": 2.301856335754641,
+      "grad_norm": 0.4419632852077484,
+      "learning_rate": 0.0002,
+      "loss": 1.3551,
+      "step": 1427
+    },
+    {
+      "epoch": 2.3034705407586764,
+      "grad_norm": 0.4233209490776062,
+      "learning_rate": 0.0002,
+      "loss": 1.2845,
+      "step": 1428
+    },
+    {
+      "epoch": 2.305084745762712,
+      "grad_norm": 0.41972845792770386,
+      "learning_rate": 0.0002,
+      "loss": 1.3905,
+      "step": 1429
+    },
+    {
+      "epoch": 2.3066989507667475,
+      "grad_norm": 0.40918785333633423,
+      "learning_rate": 0.0002,
+      "loss": 1.4356,
+      "step": 1430
+    },
+    {
+      "epoch": 2.308313155770783,
+      "grad_norm": 0.4390774071216583,
+      "learning_rate": 0.0002,
+      "loss": 1.2642,
+      "step": 1431
+    },
+    {
+      "epoch": 2.3099273607748185,
+      "grad_norm": 0.42124879360198975,
+      "learning_rate": 0.0002,
+      "loss": 1.3974,
+      "step": 1432
+    },
+    {
+      "epoch": 2.311541565778854,
+      "grad_norm": 0.418536514043808,
+      "learning_rate": 0.0002,
+      "loss": 1.3645,
+      "step": 1433
+    },
+    {
+      "epoch": 2.3131557707828896,
+      "grad_norm": 9.660236358642578,
+      "learning_rate": 0.0002,
+      "loss": 1.3999,
+      "step": 1434
+    },
+    {
+      "epoch": 2.314769975786925,
+      "grad_norm": 0.45394179224967957,
+      "learning_rate": 0.0002,
+      "loss": 1.4067,
+      "step": 1435
+    },
+    {
+      "epoch": 2.3163841807909606,
+      "grad_norm": 0.47176432609558105,
+      "learning_rate": 0.0002,
+      "loss": 1.3179,
+      "step": 1436
+    },
+    {
+      "epoch": 2.317998385794996,
+      "grad_norm": 0.4437849819660187,
+      "learning_rate": 0.0002,
+      "loss": 1.2497,
+      "step": 1437
+    },
+    {
+      "epoch": 2.3196125907990313,
+      "grad_norm": 0.43986475467681885,
+      "learning_rate": 0.0002,
+      "loss": 1.4395,
+      "step": 1438
+    },
+    {
+      "epoch": 2.3212267958030672,
+      "grad_norm": 0.6889885067939758,
+      "learning_rate": 0.0002,
+      "loss": 1.359,
+      "step": 1439
+    },
+    {
+      "epoch": 2.3228410008071023,
+      "grad_norm": 0.42875397205352783,
+      "learning_rate": 0.0002,
+      "loss": 1.3374,
+      "step": 1440
+    },
+    {
+      "epoch": 2.324455205811138,
+      "grad_norm": 0.48654845356941223,
+      "learning_rate": 0.0002,
+      "loss": 1.3067,
+      "step": 1441
+    },
+    {
+      "epoch": 2.3260694108151734,
+      "grad_norm": 0.44666141271591187,
+      "learning_rate": 0.0002,
+      "loss": 1.2824,
+      "step": 1442
+    },
+    {
+      "epoch": 2.327683615819209,
+      "grad_norm": 0.4244503676891327,
+      "learning_rate": 0.0002,
+      "loss": 1.365,
+      "step": 1443
+    },
+    {
+      "epoch": 2.3292978208232444,
+      "grad_norm": 0.41780194640159607,
+      "learning_rate": 0.0002,
+      "loss": 1.3565,
+      "step": 1444
+    },
+    {
+      "epoch": 2.33091202582728,
+      "grad_norm": 0.4327908754348755,
+      "learning_rate": 0.0002,
+      "loss": 1.2449,
+      "step": 1445
+    },
+    {
+      "epoch": 2.3325262308313155,
+      "grad_norm": 0.427132785320282,
+      "learning_rate": 0.0002,
+      "loss": 1.3742,
+      "step": 1446
+    },
+    {
+      "epoch": 2.334140435835351,
+      "grad_norm": 0.9838108420372009,
+      "learning_rate": 0.0002,
+      "loss": 1.3473,
+      "step": 1447
+    },
+    {
+      "epoch": 2.3357546408393866,
+      "grad_norm": 0.4349774718284607,
+      "learning_rate": 0.0002,
+      "loss": 1.3301,
+      "step": 1448
+    },
+    {
+      "epoch": 2.337368845843422,
+      "grad_norm": 0.4829374849796295,
+      "learning_rate": 0.0002,
+      "loss": 1.293,
+      "step": 1449
+    },
+    {
+      "epoch": 2.3389830508474576,
+      "grad_norm": 0.4401623606681824,
+      "learning_rate": 0.0002,
+      "loss": 1.2349,
+      "step": 1450
+    },
+    {
+      "epoch": 2.340597255851493,
+      "grad_norm": 0.5126479864120483,
+      "learning_rate": 0.0002,
+      "loss": 1.2948,
+      "step": 1451
+    },
+    {
+      "epoch": 2.3422114608555287,
+      "grad_norm": 0.42908668518066406,
+      "learning_rate": 0.0002,
+      "loss": 1.2782,
+      "step": 1452
+    },
+    {
+      "epoch": 2.343825665859564,
+      "grad_norm": 0.4352446496486664,
+      "learning_rate": 0.0002,
+      "loss": 1.1775,
+      "step": 1453
+    },
+    {
+      "epoch": 2.3454398708635997,
+      "grad_norm": 0.47312191128730774,
+      "learning_rate": 0.0002,
+      "loss": 1.3177,
+      "step": 1454
+    },
+    {
+      "epoch": 2.3470540758676353,
+      "grad_norm": 0.42954206466674805,
+      "learning_rate": 0.0002,
+      "loss": 1.2397,
+      "step": 1455
+    },
+    {
+      "epoch": 2.348668280871671,
+      "grad_norm": 0.46389418840408325,
+      "learning_rate": 0.0002,
+      "loss": 1.3343,
+      "step": 1456
+    },
+    {
+      "epoch": 2.3502824858757063,
+      "grad_norm": 0.43719297647476196,
+      "learning_rate": 0.0002,
+      "loss": 1.2942,
+      "step": 1457
+    },
+    {
+      "epoch": 2.351896690879742,
+      "grad_norm": 0.5461978912353516,
+      "learning_rate": 0.0002,
+      "loss": 1.3442,
+      "step": 1458
+    },
+    {
+      "epoch": 2.3535108958837774,
+      "grad_norm": 0.43839031457901,
+      "learning_rate": 0.0002,
+      "loss": 1.258,
+      "step": 1459
+    },
+    {
+      "epoch": 2.355125100887813,
+      "grad_norm": 0.4903876483440399,
+      "learning_rate": 0.0002,
+      "loss": 1.465,
+      "step": 1460
+    },
+    {
+      "epoch": 2.3567393058918484,
+      "grad_norm": 0.42305469512939453,
+      "learning_rate": 0.0002,
+      "loss": 1.3441,
+      "step": 1461
+    },
+    {
+      "epoch": 2.358353510895884,
+      "grad_norm": 0.4420433044433594,
+      "learning_rate": 0.0002,
+      "loss": 1.3811,
+      "step": 1462
+    },
+    {
+      "epoch": 2.359967715899919,
+      "grad_norm": 0.46115559339523315,
+      "learning_rate": 0.0002,
+      "loss": 1.2991,
+      "step": 1463
+    },
+    {
+      "epoch": 2.361581920903955,
+      "grad_norm": 0.4190042018890381,
+      "learning_rate": 0.0002,
+      "loss": 1.3862,
+      "step": 1464
+    },
+    {
+      "epoch": 2.36319612590799,
+      "grad_norm": 0.41592875123023987,
+      "learning_rate": 0.0002,
+      "loss": 1.2438,
+      "step": 1465
+    },
+    {
+      "epoch": 2.3648103309120256,
+      "grad_norm": 0.4431193768978119,
+      "learning_rate": 0.0002,
+      "loss": 1.3611,
+      "step": 1466
+    },
+    {
+      "epoch": 2.366424535916061,
+      "grad_norm": 0.4248901307582855,
+      "learning_rate": 0.0002,
+      "loss": 1.1827,
+      "step": 1467
+    },
+    {
+      "epoch": 2.3680387409200967,
+      "grad_norm": 0.49995511770248413,
+      "learning_rate": 0.0002,
+      "loss": 1.3848,
+      "step": 1468
+    },
+    {
+      "epoch": 2.3696529459241322,
+      "grad_norm": 0.4702857732772827,
+      "learning_rate": 0.0002,
+      "loss": 1.3926,
+      "step": 1469
+    },
+    {
+      "epoch": 2.3712671509281678,
+      "grad_norm": 0.5258844494819641,
+      "learning_rate": 0.0002,
+      "loss": 1.3391,
+      "step": 1470
+    },
+    {
+      "epoch": 2.3728813559322033,
+      "grad_norm": 0.5130214095115662,
+      "learning_rate": 0.0002,
+      "loss": 1.4088,
+      "step": 1471
+    },
+    {
+      "epoch": 2.374495560936239,
+      "grad_norm": 0.7444900274276733,
+      "learning_rate": 0.0002,
+      "loss": 1.2021,
+      "step": 1472
+    },
+    {
+      "epoch": 2.3761097659402743,
+      "grad_norm": 0.48592880368232727,
+      "learning_rate": 0.0002,
+      "loss": 1.4223,
+      "step": 1473
+    },
+    {
+      "epoch": 2.37772397094431,
+      "grad_norm": 0.6075024008750916,
+      "learning_rate": 0.0002,
+      "loss": 1.2963,
+      "step": 1474
+    },
+    {
+      "epoch": 2.3793381759483454,
+      "grad_norm": 0.434675931930542,
+      "learning_rate": 0.0002,
+      "loss": 1.3272,
+      "step": 1475
+    },
+    {
+      "epoch": 2.380952380952381,
+      "grad_norm": 0.4828976094722748,
+      "learning_rate": 0.0002,
+      "loss": 1.2819,
+      "step": 1476
+    },
+    {
+      "epoch": 2.3825665859564165,
+      "grad_norm": 0.513092041015625,
+      "learning_rate": 0.0002,
+      "loss": 1.3674,
+      "step": 1477
+    },
+    {
+      "epoch": 2.384180790960452,
+      "grad_norm": 0.42832380533218384,
+      "learning_rate": 0.0002,
+      "loss": 1.2564,
+      "step": 1478
+    },
+    {
+      "epoch": 2.3857949959644875,
+      "grad_norm": 0.4438645541667938,
+      "learning_rate": 0.0002,
+      "loss": 1.3712,
+      "step": 1479
+    },
+    {
+      "epoch": 2.387409200968523,
+      "grad_norm": 0.42463281750679016,
+      "learning_rate": 0.0002,
+      "loss": 1.2953,
+      "step": 1480
+    },
+    {
+      "epoch": 2.3890234059725586,
+      "grad_norm": 0.42697665095329285,
+      "learning_rate": 0.0002,
+      "loss": 1.313,
+      "step": 1481
+    },
+    {
+      "epoch": 2.390637610976594,
+      "grad_norm": 0.43315592408180237,
+      "learning_rate": 0.0002,
+      "loss": 1.322,
+      "step": 1482
+    },
+    {
+      "epoch": 2.3922518159806296,
+      "grad_norm": 0.4209153354167938,
+      "learning_rate": 0.0002,
+      "loss": 1.3051,
+      "step": 1483
+    },
+    {
+      "epoch": 2.393866020984665,
+      "grad_norm": 0.43778765201568604,
+      "learning_rate": 0.0002,
+      "loss": 1.4258,
+      "step": 1484
+    },
+    {
+      "epoch": 2.3954802259887007,
+      "grad_norm": 0.41469642519950867,
+      "learning_rate": 0.0002,
+      "loss": 1.398,
+      "step": 1485
+    },
+    {
+      "epoch": 2.3970944309927362,
+      "grad_norm": 0.41460326313972473,
+      "learning_rate": 0.0002,
+      "loss": 1.2997,
+      "step": 1486
+    },
+    {
+      "epoch": 2.3987086359967718,
+      "grad_norm": 0.43409091234207153,
+      "learning_rate": 0.0002,
+      "loss": 1.3761,
+      "step": 1487
+    },
+    {
+      "epoch": 2.4003228410008073,
+      "grad_norm": 0.43002137541770935,
+      "learning_rate": 0.0002,
+      "loss": 1.3879,
+      "step": 1488
+    },
+    {
+      "epoch": 2.401937046004843,
+      "grad_norm": 0.4376080632209778,
+      "learning_rate": 0.0002,
+      "loss": 1.3356,
+      "step": 1489
+    },
+    {
+      "epoch": 2.403551251008878,
+      "grad_norm": 0.4308399260044098,
+      "learning_rate": 0.0002,
+      "loss": 1.3483,
+      "step": 1490
+    },
+    {
+      "epoch": 2.405165456012914,
+      "grad_norm": 0.4664413034915924,
+      "learning_rate": 0.0002,
+      "loss": 1.356,
+      "step": 1491
+    },
+    {
+      "epoch": 2.406779661016949,
+      "grad_norm": 0.5452325940132141,
+      "learning_rate": 0.0002,
+      "loss": 1.5682,
+      "step": 1492
+    },
+    {
+      "epoch": 2.4083938660209845,
+      "grad_norm": 0.4430229365825653,
+      "learning_rate": 0.0002,
+      "loss": 1.3031,
+      "step": 1493
+    },
+    {
+      "epoch": 2.41000807102502,
+      "grad_norm": 0.429807186126709,
+      "learning_rate": 0.0002,
+      "loss": 1.339,
+      "step": 1494
+    },
+    {
+      "epoch": 2.4116222760290555,
+      "grad_norm": 0.42216193675994873,
+      "learning_rate": 0.0002,
+      "loss": 1.3242,
+      "step": 1495
+    },
+    {
+      "epoch": 2.413236481033091,
+      "grad_norm": 0.4356923997402191,
+      "learning_rate": 0.0002,
+      "loss": 1.3524,
+      "step": 1496
+    },
+    {
+      "epoch": 2.4148506860371266,
+      "grad_norm": 0.43242383003234863,
+      "learning_rate": 0.0002,
+      "loss": 1.4138,
+      "step": 1497
+    },
+    {
+      "epoch": 2.416464891041162,
+      "grad_norm": 0.4492044448852539,
+      "learning_rate": 0.0002,
+      "loss": 1.3394,
+      "step": 1498
+    },
+    {
+      "epoch": 2.4180790960451977,
+      "grad_norm": 0.40164169669151306,
+      "learning_rate": 0.0002,
+      "loss": 1.2786,
+      "step": 1499
+    },
+    {
+      "epoch": 2.419693301049233,
+      "grad_norm": 0.4147217869758606,
+      "learning_rate": 0.0002,
+      "loss": 1.2922,
+      "step": 1500
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 9.86212666947561e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null