Training in progress, epoch 1, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +640 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95c516fe4d7ba1b8df18d4e76c625fa01d2ce34ffe0a062438bec237a014244b
 size 203456160

 version https://git-lfs.github.com/spec/v1
+oid sha256:4054fc3341df3ec62ad23eb40df504bf559ae3fe109c9358a6fe2fbe13c2b5db
 size 203456160

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:888abf7621e98b6dff03f8f532216dec008c8ba5cbf838bb0ff7a81c8f50bbd6
 size 407127126

 version https://git-lfs.github.com/spec/v1
+oid sha256:f7de5b56bb44d80a59d57ae0d74443cf71e7c3a04fcea2547755d8ae3f6ab98b
 size 407127126

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1ba92e1c99bef2498a249fd40a486215006468f687654840f88285270cef3c8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d4d2e3c9c2c299605f839dd9681034a9a825180b34e15be15b76272f4fb00418
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:30d4acc946a913917ad083137cc06e69b7e71ebdbeee53948832810de27f25fc
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:6dfefdd350a9c50a5ce8f17f77222cc43ade97bd1bc4dfe20825b1649c6776a6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9994088669950739,
   "eval_steps": 500,
-  "global_step": 634,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -637,6 +637,643 @@
       "learning_rate": 6.861468292009727e-05,
       "loss": 0.7384,
       "step": 630
     }
   ],
   "logging_steps": 7,
@@ -656,7 +1293,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.20931500638208e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.9988177339901478,
   "eval_steps": 500,
+  "global_step": 1268,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.861468292009727e-05,
       "loss": 0.7384,
       "step": 630
+    },
+    {
+      "epoch": 1.0041379310344827,
+      "grad_norm": 0.28151699900627136,
+      "learning_rate": 6.788347664153447e-05,
+      "loss": 0.7612,
+      "step": 637
+    },
+    {
+      "epoch": 1.0151724137931035,
+      "grad_norm": 0.2590833604335785,
+      "learning_rate": 6.714785788270658e-05,
+      "loss": 0.6778,
+      "step": 644
+    },
+    {
+      "epoch": 1.0262068965517241,
+      "grad_norm": 0.268964558839798,
+      "learning_rate": 6.640800814653503e-05,
+      "loss": 0.6337,
+      "step": 651
+    },
+    {
+      "epoch": 1.0372413793103448,
+      "grad_norm": 0.26795223355293274,
+      "learning_rate": 6.566410997987163e-05,
+      "loss": 0.6548,
+      "step": 658
+    },
+    {
+      "epoch": 1.0482758620689656,
+      "grad_norm": 0.25789910554885864,
+      "learning_rate": 6.49163469284578e-05,
+      "loss": 0.6494,
+      "step": 665
+    },
+    {
+      "epoch": 1.0593103448275862,
+      "grad_norm": 0.27773144841194153,
+      "learning_rate": 6.416490349163748e-05,
+      "loss": 0.6541,
+      "step": 672
+    },
+    {
+      "epoch": 1.0703448275862069,
+      "grad_norm": 0.28650370240211487,
+      "learning_rate": 6.340996507683458e-05,
+      "loss": 0.6677,
+      "step": 679
+    },
+    {
+      "epoch": 1.0813793103448275,
+      "grad_norm": 0.2758565843105316,
+      "learning_rate": 6.265171795380659e-05,
+      "loss": 0.6299,
+      "step": 686
+    },
+    {
+      "epoch": 1.0924137931034483,
+      "grad_norm": 0.2588457465171814,
+      "learning_rate": 6.189034920868522e-05,
+      "loss": 0.6391,
+      "step": 693
+    },
+    {
+      "epoch": 1.103448275862069,
+      "grad_norm": 0.2821789085865021,
+      "learning_rate": 6.112604669781572e-05,
+      "loss": 0.6239,
+      "step": 700
+    },
+    {
+      "epoch": 1.1144827586206896,
+      "grad_norm": 0.2735387086868286,
+      "learning_rate": 6.0358999001406156e-05,
+      "loss": 0.6581,
+      "step": 707
+    },
+    {
+      "epoch": 1.1255172413793104,
+      "grad_norm": 0.2622285485267639,
+      "learning_rate": 5.9589395376998e-05,
+      "loss": 0.6396,
+      "step": 714
+    },
+    {
+      "epoch": 1.136551724137931,
+      "grad_norm": 0.27206316590309143,
+      "learning_rate": 5.8817425712769794e-05,
+      "loss": 0.6305,
+      "step": 721
+    },
+    {
+      "epoch": 1.1475862068965517,
+      "grad_norm": 0.2703116536140442,
+      "learning_rate": 5.804328048068492e-05,
+      "loss": 0.6498,
+      "step": 728
+    },
+    {
+      "epoch": 1.1586206896551725,
+      "grad_norm": 0.2688596844673157,
+      "learning_rate": 5.7267150689495644e-05,
+      "loss": 0.6453,
+      "step": 735
+    },
+    {
+      "epoch": 1.1696551724137931,
+      "grad_norm": 0.27926501631736755,
+      "learning_rate": 5.648922783761443e-05,
+      "loss": 0.6715,
+      "step": 742
+    },
+    {
+      "epoch": 1.1806896551724138,
+      "grad_norm": 0.2714243531227112,
+      "learning_rate": 5.570970386586469e-05,
+      "loss": 0.6502,
+      "step": 749
+    },
+    {
+      "epoch": 1.1917241379310344,
+      "grad_norm": 0.27098771929740906,
+      "learning_rate": 5.492877111012218e-05,
+      "loss": 0.6382,
+      "step": 756
+    },
+    {
+      "epoch": 1.2027586206896552,
+      "grad_norm": 0.28337958455085754,
+      "learning_rate": 5.414662225385903e-05,
+      "loss": 0.6383,
+      "step": 763
+    },
+    {
+      "epoch": 1.2137931034482758,
+      "grad_norm": 0.28135356307029724,
+      "learning_rate": 5.336345028060199e-05,
+      "loss": 0.6433,
+      "step": 770
+    },
+    {
+      "epoch": 1.2248275862068965,
+      "grad_norm": 0.268600195646286,
+      "learning_rate": 5.257944842631658e-05,
+      "loss": 0.6503,
+      "step": 777
+    },
+    {
+      "epoch": 1.2358620689655173,
+      "grad_norm": 0.268028199672699,
+      "learning_rate": 5.179481013172912e-05,
+      "loss": 0.64,
+      "step": 784
+    },
+    {
+      "epoch": 1.246896551724138,
+      "grad_norm": 0.26292359828948975,
+      "learning_rate": 5.100972899459796e-05,
+      "loss": 0.6211,
+      "step": 791
+    },
+    {
+      "epoch": 1.2579310344827586,
+      "grad_norm": 0.2784179151058197,
+      "learning_rate": 5.022439872194629e-05,
+      "loss": 0.6466,
+      "step": 798
+    },
+    {
+      "epoch": 1.2689655172413792,
+      "grad_norm": 0.28252890706062317,
+      "learning_rate": 4.943901308226771e-05,
+      "loss": 0.6368,
+      "step": 805
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 0.2834304869174957,
+      "learning_rate": 4.865376585771687e-05,
+      "loss": 0.6336,
+      "step": 812
+    },
+    {
+      "epoch": 1.2910344827586206,
+      "grad_norm": 0.2688741981983185,
+      "learning_rate": 4.7868850796296495e-05,
+      "loss": 0.653,
+      "step": 819
+    },
+    {
+      "epoch": 1.3020689655172415,
+      "grad_norm": 0.2756083905696869,
+      "learning_rate": 4.708446156405307e-05,
+      "loss": 0.6581,
+      "step": 826
+    },
+    {
+      "epoch": 1.3131034482758621,
+      "grad_norm": 0.2754676640033722,
+      "learning_rate": 4.630079169729257e-05,
+      "loss": 0.6393,
+      "step": 833
+    },
+    {
+      "epoch": 1.3241379310344827,
+      "grad_norm": 0.26543307304382324,
+      "learning_rate": 4.551803455482833e-05,
+      "loss": 0.6264,
+      "step": 840
+    },
+    {
+      "epoch": 1.3351724137931034,
+      "grad_norm": 0.2671962380409241,
+      "learning_rate": 4.473638327027259e-05,
+      "loss": 0.6266,
+      "step": 847
+    },
+    {
+      "epoch": 1.3462068965517242,
+      "grad_norm": 0.2819383442401886,
+      "learning_rate": 4.395603070438373e-05,
+      "loss": 0.6426,
+      "step": 854
+    },
+    {
+      "epoch": 1.3572413793103448,
+      "grad_norm": 0.2790848910808563,
+      "learning_rate": 4.31771693974807e-05,
+      "loss": 0.633,
+      "step": 861
+    },
+    {
+      "epoch": 1.3682758620689655,
+      "grad_norm": 0.2653134763240814,
+      "learning_rate": 4.239999152193664e-05,
+      "loss": 0.637,
+      "step": 868
+    },
+    {
+      "epoch": 1.3793103448275863,
+      "grad_norm": 0.27828529477119446,
+      "learning_rate": 4.162468883476319e-05,
+      "loss": 0.6273,
+      "step": 875
+    },
+    {
+      "epoch": 1.390344827586207,
+      "grad_norm": 0.27933961153030396,
+      "learning_rate": 4.085145263029726e-05,
+      "loss": 0.6477,
+      "step": 882
+    },
+    {
+      "epoch": 1.4013793103448275,
+      "grad_norm": 0.2893773317337036,
+      "learning_rate": 4.008047369300218e-05,
+      "loss": 0.6374,
+      "step": 889
+    },
+    {
+      "epoch": 1.4124137931034482,
+      "grad_norm": 0.29266366362571716,
+      "learning_rate": 3.9311942250394276e-05,
+      "loss": 0.6051,
+      "step": 896
+    },
+    {
+      "epoch": 1.423448275862069,
+      "grad_norm": 0.28573766350746155,
+      "learning_rate": 3.8546047926107256e-05,
+      "loss": 0.6371,
+      "step": 903
+    },
+    {
+      "epoch": 1.4344827586206896,
+      "grad_norm": 0.2910100817680359,
+      "learning_rate": 3.778297969310529e-05,
+      "loss": 0.6491,
+      "step": 910
+    },
+    {
+      "epoch": 1.4455172413793105,
+      "grad_norm": 0.2896603047847748,
+      "learning_rate": 3.7022925827056884e-05,
+      "loss": 0.6332,
+      "step": 917
+    },
+    {
+      "epoch": 1.456551724137931,
+      "grad_norm": 0.26403993368148804,
+      "learning_rate": 3.62660738598805e-05,
+      "loss": 0.6387,
+      "step": 924
+    },
+    {
+      "epoch": 1.4675862068965517,
+      "grad_norm": 0.27510949969291687,
+      "learning_rate": 3.551261053347404e-05,
+      "loss": 0.6217,
+      "step": 931
+    },
+    {
+      "epoch": 1.4786206896551723,
+      "grad_norm": 0.28636762499809265,
+      "learning_rate": 3.4762721753638995e-05,
+      "loss": 0.6186,
+      "step": 938
+    },
+    {
+      "epoch": 1.489655172413793,
+      "grad_norm": 0.2784745991230011,
+      "learning_rate": 3.401659254421094e-05,
+      "loss": 0.6392,
+      "step": 945
+    },
+    {
+      "epoch": 1.5006896551724138,
+      "grad_norm": 0.2818538546562195,
+      "learning_rate": 3.3274407001407735e-05,
+      "loss": 0.6311,
+      "step": 952
+    },
+    {
+      "epoch": 1.5117241379310344,
+      "grad_norm": 0.2834908068180084,
+      "learning_rate": 3.2536348248406534e-05,
+      "loss": 0.6205,
+      "step": 959
+    },
+    {
+      "epoch": 1.5227586206896553,
+      "grad_norm": 0.27675577998161316,
+      "learning_rate": 3.1802598390160784e-05,
+      "loss": 0.6191,
+      "step": 966
+    },
+    {
+      "epoch": 1.533793103448276,
+      "grad_norm": 0.27893996238708496,
+      "learning_rate": 3.107333846846872e-05,
+      "loss": 0.6263,
+      "step": 973
+    },
+    {
+      "epoch": 1.5448275862068965,
+      "grad_norm": 0.2641468048095703,
+      "learning_rate": 3.0348748417303823e-05,
+      "loss": 0.6163,
+      "step": 980
+    },
+    {
+      "epoch": 1.5558620689655172,
+      "grad_norm": 0.2818867862224579,
+      "learning_rate": 2.9629007018418985e-05,
+      "loss": 0.6201,
+      "step": 987
+    },
+    {
+      "epoch": 1.5668965517241378,
+      "grad_norm": 0.28501781821250916,
+      "learning_rate": 2.8914291857234636e-05,
+      "loss": 0.6308,
+      "step": 994
+    },
+    {
+      "epoch": 1.5779310344827586,
+      "grad_norm": 0.2769615352153778,
+      "learning_rate": 2.8204779279022276e-05,
+      "loss": 0.6063,
+      "step": 1001
+    },
+    {
+      "epoch": 1.5889655172413795,
+      "grad_norm": 0.2790084481239319,
+      "learning_rate": 2.7500644345393943e-05,
+      "loss": 0.6325,
+      "step": 1008
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 0.2709619402885437,
+      "learning_rate": 2.68020607911083e-05,
+      "loss": 0.5982,
+      "step": 1015
+    },
+    {
+      "epoch": 1.6110344827586207,
+      "grad_norm": 0.2806256115436554,
+      "learning_rate": 2.610920098120424e-05,
+      "loss": 0.6453,
+      "step": 1022
+    },
+    {
+      "epoch": 1.6220689655172413,
+      "grad_norm": 0.2840186059474945,
+      "learning_rate": 2.5422235868472345e-05,
+      "loss": 0.6243,
+      "step": 1029
+    },
+    {
+      "epoch": 1.633103448275862,
+      "grad_norm": 0.2787083089351654,
+      "learning_rate": 2.4741334951274947e-05,
+      "loss": 0.6097,
+      "step": 1036
+    },
+    {
+      "epoch": 1.6441379310344828,
+      "grad_norm": 0.27798426151275635,
+      "learning_rate": 2.40666662317248e-05,
+      "loss": 0.616,
+      "step": 1043
+    },
+    {
+      "epoch": 1.6551724137931034,
+      "grad_norm": 0.27529022097587585,
+      "learning_rate": 2.3398396174233178e-05,
+      "loss": 0.6207,
+      "step": 1050
+    },
+    {
+      "epoch": 1.6662068965517243,
+      "grad_norm": 0.2800416946411133,
+      "learning_rate": 2.2736689664437217e-05,
+      "loss": 0.6273,
+      "step": 1057
+    },
+    {
+      "epoch": 1.677241379310345,
+      "grad_norm": 0.2721683979034424,
+      "learning_rate": 2.2081709968516866e-05,
+      "loss": 0.6,
+      "step": 1064
+    },
+    {
+      "epoch": 1.6882758620689655,
+      "grad_norm": 0.29215288162231445,
+      "learning_rate": 2.1433618692911467e-05,
+      "loss": 0.6105,
+      "step": 1071
+    },
+    {
+      "epoch": 1.6993103448275861,
+      "grad_norm": 0.2790670692920685,
+      "learning_rate": 2.0792575744445653e-05,
+      "loss": 0.6028,
+      "step": 1078
+    },
+    {
+      "epoch": 1.7103448275862068,
+      "grad_norm": 0.28281012177467346,
+      "learning_rate": 2.015873929087482e-05,
+      "loss": 0.6168,
+      "step": 1085
+    },
+    {
+      "epoch": 1.7213793103448276,
+      "grad_norm": 0.2883938252925873,
+      "learning_rate": 1.95322657218596e-05,
+      "loss": 0.6045,
+      "step": 1092
+    },
+    {
+      "epoch": 1.7324137931034482,
+      "grad_norm": 0.27805426716804504,
+      "learning_rate": 1.8913309610379015e-05,
+      "loss": 0.6154,
+      "step": 1099
+    },
+    {
+      "epoch": 1.743448275862069,
+      "grad_norm": 0.27731558680534363,
+      "learning_rate": 1.8302023674591935e-05,
+      "loss": 0.6098,
+      "step": 1106
+    },
+    {
+      "epoch": 1.7544827586206897,
+      "grad_norm": 0.2910196781158447,
+      "learning_rate": 1.7698558740156135e-05,
+      "loss": 0.6106,
+      "step": 1113
+    },
+    {
+      "epoch": 1.7655172413793103,
+      "grad_norm": 0.27517473697662354,
+      "learning_rate": 1.7103063703014372e-05,
+      "loss": 0.6239,
+      "step": 1120
+    },
+    {
+      "epoch": 1.776551724137931,
+      "grad_norm": 0.26991525292396545,
+      "learning_rate": 1.6515685492656467e-05,
+      "loss": 0.602,
+      "step": 1127
+    },
+    {
+      "epoch": 1.7875862068965516,
+      "grad_norm": 0.28398916125297546,
+      "learning_rate": 1.59365690358667e-05,
+      "loss": 0.6138,
+      "step": 1134
+    },
+    {
+      "epoch": 1.7986206896551724,
+      "grad_norm": 0.2737048268318176,
+      "learning_rate": 1.5365857220965275e-05,
+      "loss": 0.6034,
+      "step": 1141
+    },
+    {
+      "epoch": 1.8096551724137933,
+      "grad_norm": 0.27919578552246094,
+      "learning_rate": 1.4803690862552755e-05,
+      "loss": 0.6126,
+      "step": 1148
+    },
+    {
+      "epoch": 1.8206896551724139,
+      "grad_norm": 0.2809952199459076,
+      "learning_rate": 1.4250208666766235e-05,
+      "loss": 0.607,
+      "step": 1155
+    },
+    {
+      "epoch": 1.8317241379310345,
+      "grad_norm": 0.28192582726478577,
+      "learning_rate": 1.3705547197055584e-05,
+      "loss": 0.6064,
+      "step": 1162
+    },
+    {
+      "epoch": 1.8427586206896551,
+      "grad_norm": 0.2873040735721588,
+      "learning_rate": 1.3169840840488501e-05,
+      "loss": 0.6173,
+      "step": 1169
+    },
+    {
+      "epoch": 1.8537931034482757,
+      "grad_norm": 0.2937031388282776,
+      "learning_rate": 1.2643221774592518e-05,
+      "loss": 0.6148,
+      "step": 1176
+    },
+    {
+      "epoch": 1.8648275862068966,
+      "grad_norm": 0.28401997685432434,
+      "learning_rate": 1.2125819934742188e-05,
+      "loss": 0.6027,
+      "step": 1183
+    },
+    {
+      "epoch": 1.8758620689655172,
+      "grad_norm": 0.2690780758857727,
+      "learning_rate": 1.1617762982099446e-05,
+      "loss": 0.6089,
+      "step": 1190
+    },
+    {
+      "epoch": 1.886896551724138,
+      "grad_norm": 0.28273099660873413,
+      "learning_rate": 1.1119176272115128e-05,
+      "loss": 0.6017,
+      "step": 1197
+    },
+    {
+      "epoch": 1.8979310344827587,
+      "grad_norm": 0.2761295437812805,
+      "learning_rate": 1.0630182823599399e-05,
+      "loss": 0.6156,
+      "step": 1204
+    },
+    {
+      "epoch": 1.9089655172413793,
+      "grad_norm": 0.2870751917362213,
+      "learning_rate": 1.0150903288368741e-05,
+      "loss": 0.6038,
+      "step": 1211
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 0.2764483690261841,
+      "learning_rate": 9.681455921476839e-06,
+      "loss": 0.603,
+      "step": 1218
+    },
+    {
+      "epoch": 1.9310344827586206,
+      "grad_norm": 0.2793201208114624,
+      "learning_rate": 9.221956552036992e-06,
+      "loss": 0.6194,
+      "step": 1225
+    },
+    {
+      "epoch": 1.9420689655172414,
+      "grad_norm": 0.27977249026298523,
+      "learning_rate": 8.772518554642973e-06,
+      "loss": 0.622,
+      "step": 1232
+    },
+    {
+      "epoch": 1.953103448275862,
+      "grad_norm": 0.27671122550964355,
+      "learning_rate": 8.333252821395526e-06,
+      "loss": 0.6083,
+      "step": 1239
+    },
+    {
+      "epoch": 1.9641379310344829,
+      "grad_norm": 0.2808462679386139,
+      "learning_rate": 7.904267734541498e-06,
+      "loss": 0.6155,
+      "step": 1246
+    },
+    {
+      "epoch": 1.9751724137931035,
+      "grad_norm": 0.2745875418186188,
+      "learning_rate": 7.485669139732004e-06,
+      "loss": 0.5934,
+      "step": 1253
+    },
+    {
+      "epoch": 1.986206896551724,
+      "grad_norm": 0.2853251099586487,
+      "learning_rate": 7.077560319906695e-06,
+      "loss": 0.5992,
+      "step": 1260
+    },
+    {
+      "epoch": 1.9972413793103447,
+      "grad_norm": 0.2884569764137268,
+      "learning_rate": 6.680041969810203e-06,
+      "loss": 0.5927,
+      "step": 1267
     }
   ],
   "logging_steps": 7,
       "attributes": {}
     }
   },
+  "total_flos": 8.41863001276416e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null