Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37db4b5c5859f8f3f1ee09458a3530068602cf34bca3274413ac2a1c3d86eb4c
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d86e193d1e30d861bdabe4afba56d402c0a2de3845ef2098a4c01abdc73a909
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4981f763ea989718f43b36782ff779355555ae547395f430a9a5a089d2fb0adb
 size 1342555602

 version https://git-lfs.github.com/spec/v1
+oid sha256:d6f632b512cc29b2053813c812de19182897a3e5bf64cc46fc11620f49209fea
 size 1342555602

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:314476803f1494174f81db3c10fd6eb76aa1dad6f3434f1f74552c5f4b48e698
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0de0e06e42bcebf883316de8930d271e6249504b354a3aa92c301aa73061972e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d90116c540b4ff0066495fbccc9c914a568905fb44c6564f227952cc4231b00
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:03ad66011cfc1fc727a51190602a41adc332b48eeef62a5ee87c2ca9f9b90b2b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.5323200225830078,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.0013505118439888718,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 9.008,
       "eval_steps_per_second": 2.252,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.621042343070925e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.4810292720794678,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.0027010236879777437,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.008,
       "eval_steps_per_second": 2.252,
       "step": 50
+    },
+    {
+      "epoch": 0.0013775220808686492,
+      "grad_norm": 2.0075578689575195,
+      "learning_rate": 2.3816778784387097e-05,
+      "loss": 1.6417,
+      "step": 51
+    },
+    {
+      "epoch": 0.0014045323177484267,
+      "grad_norm": 2.1036274433135986,
+      "learning_rate": 2.3263454721781537e-05,
+      "loss": 1.4913,
+      "step": 52
+    },
+    {
+      "epoch": 0.001431542554628204,
+      "grad_norm": 2.0413341522216797,
+      "learning_rate": 2.2693489161088592e-05,
+      "loss": 1.6212,
+      "step": 53
+    },
+    {
+      "epoch": 0.0014585527915079814,
+      "grad_norm": 1.942448616027832,
+      "learning_rate": 2.210802993709498e-05,
+      "loss": 1.6439,
+      "step": 54
+    },
+    {
+      "epoch": 0.001485563028387759,
+      "grad_norm": 2.1481826305389404,
+      "learning_rate": 2.1508256086763372e-05,
+      "loss": 1.7214,
+      "step": 55
+    },
+    {
+      "epoch": 0.0015125732652675364,
+      "grad_norm": 1.8801171779632568,
+      "learning_rate": 2.0895375474808857e-05,
+      "loss": 1.5555,
+      "step": 56
+    },
+    {
+      "epoch": 0.001539583502147314,
+      "grad_norm": 2.44754958152771,
+      "learning_rate": 2.0270622361220143e-05,
+      "loss": 1.8116,
+      "step": 57
+    },
+    {
+      "epoch": 0.0015665937390270913,
+      "grad_norm": 2.611750364303589,
+      "learning_rate": 1.963525491562421e-05,
+      "loss": 2.0997,
+      "step": 58
+    },
+    {
+      "epoch": 0.0015936039759068686,
+      "grad_norm": 2.4594104290008545,
+      "learning_rate": 1.8990552683500128e-05,
+      "loss": 1.7715,
+      "step": 59
+    },
+    {
+      "epoch": 0.0016206142127866462,
+      "grad_norm": 2.877983808517456,
+      "learning_rate": 1.8337814009344716e-05,
+      "loss": 1.7933,
+      "step": 60
+    },
+    {
+      "epoch": 0.0016476244496664235,
+      "grad_norm": 2.124140739440918,
+      "learning_rate": 1.767835342197955e-05,
+      "loss": 1.4663,
+      "step": 61
+    },
+    {
+      "epoch": 0.0016746346865462011,
+      "grad_norm": 2.2420833110809326,
+      "learning_rate": 1.7013498987264832e-05,
+      "loss": 1.7904,
+      "step": 62
+    },
+    {
+      "epoch": 0.0017016449234259785,
+      "grad_norm": 2.717524290084839,
+      "learning_rate": 1.6344589633551502e-05,
+      "loss": 1.7165,
+      "step": 63
+    },
+    {
+      "epoch": 0.0017286551603057558,
+      "grad_norm": 2.5003247261047363,
+      "learning_rate": 1.5672972455257726e-05,
+      "loss": 1.7485,
+      "step": 64
+    },
+    {
+      "epoch": 0.0017556653971855334,
+      "grad_norm": 2.5034983158111572,
+      "learning_rate": 1.5e-05,
+      "loss": 1.5087,
+      "step": 65
+    },
+    {
+      "epoch": 0.0017826756340653107,
+      "grad_norm": 3.265855312347412,
+      "learning_rate": 1.4327027544742281e-05,
+      "loss": 1.8923,
+      "step": 66
+    },
+    {
+      "epoch": 0.001809685870945088,
+      "grad_norm": 3.260542392730713,
+      "learning_rate": 1.36554103664485e-05,
+      "loss": 1.5948,
+      "step": 67
+    },
+    {
+      "epoch": 0.0018366961078248656,
+      "grad_norm": 3.1845810413360596,
+      "learning_rate": 1.2986501012735174e-05,
+      "loss": 1.7641,
+      "step": 68
+    },
+    {
+      "epoch": 0.001863706344704643,
+      "grad_norm": 2.402411699295044,
+      "learning_rate": 1.2321646578020452e-05,
+      "loss": 1.485,
+      "step": 69
+    },
+    {
+      "epoch": 0.0018907165815844206,
+      "grad_norm": 3.055525541305542,
+      "learning_rate": 1.1662185990655285e-05,
+      "loss": 1.4272,
+      "step": 70
+    },
+    {
+      "epoch": 0.001917726818464198,
+      "grad_norm": 2.2134780883789062,
+      "learning_rate": 1.1009447316499875e-05,
+      "loss": 1.2956,
+      "step": 71
+    },
+    {
+      "epoch": 0.0019447370553439753,
+      "grad_norm": 2.314101219177246,
+      "learning_rate": 1.036474508437579e-05,
+      "loss": 1.4229,
+      "step": 72
+    },
+    {
+      "epoch": 0.001971747292223753,
+      "grad_norm": 2.550278425216675,
+      "learning_rate": 9.729377638779859e-06,
+      "loss": 1.5253,
+      "step": 73
+    },
+    {
+      "epoch": 0.0019987575291035304,
+      "grad_norm": 1.8057441711425781,
+      "learning_rate": 9.104624525191147e-06,
+      "loss": 1.1277,
+      "step": 74
+    },
+    {
+      "epoch": 0.0020257677659833075,
+      "grad_norm": 2.356515884399414,
+      "learning_rate": 8.491743913236629e-06,
+      "loss": 1.5744,
+      "step": 75
+    },
+    {
+      "epoch": 0.002052778002863085,
+      "grad_norm": 1.8322139978408813,
+      "learning_rate": 7.89197006290502e-06,
+      "loss": 1.22,
+      "step": 76
+    },
+    {
+      "epoch": 0.0020797882397428627,
+      "grad_norm": 2.382089853286743,
+      "learning_rate": 7.30651083891141e-06,
+      "loss": 1.3513,
+      "step": 77
+    },
+    {
+      "epoch": 0.00210679847662264,
+      "grad_norm": 2.511622667312622,
+      "learning_rate": 6.736545278218464e-06,
+      "loss": 1.4673,
+      "step": 78
+    },
+    {
+      "epoch": 0.0021338087135024174,
+      "grad_norm": 2.1049392223358154,
+      "learning_rate": 6.1832212156129045e-06,
+      "loss": 1.3462,
+      "step": 79
+    },
+    {
+      "epoch": 0.002160818950382195,
+      "grad_norm": 1.8335689306259155,
+      "learning_rate": 5.647652972118998e-06,
+      "loss": 1.2784,
+      "step": 80
+    },
+    {
+      "epoch": 0.0021878291872619725,
+      "grad_norm": 2.07208251953125,
+      "learning_rate": 5.130919110904311e-06,
+      "loss": 1.3297,
+      "step": 81
+    },
+    {
+      "epoch": 0.0022148394241417496,
+      "grad_norm": 2.2498092651367188,
+      "learning_rate": 4.6340602651970304e-06,
+      "loss": 1.385,
+      "step": 82
+    },
+    {
+      "epoch": 0.002241849661021527,
+      "grad_norm": 2.364032030105591,
+      "learning_rate": 4.158077042589129e-06,
+      "loss": 1.2464,
+      "step": 83
+    },
+    {
+      "epoch": 0.0022688598979013048,
+      "grad_norm": 1.9826806783676147,
+      "learning_rate": 3.7039280099458373e-06,
+      "loss": 1.5284,
+      "step": 84
+    },
+    {
+      "epoch": 0.002295870134781082,
+      "grad_norm": 2.1717441082000732,
+      "learning_rate": 3.272527762979553e-06,
+      "loss": 1.4557,
+      "step": 85
+    },
+    {
+      "epoch": 0.0023228803716608595,
+      "grad_norm": 2.2217328548431396,
+      "learning_rate": 2.86474508437579e-06,
+      "loss": 1.4088,
+      "step": 86
+    },
+    {
+      "epoch": 0.002349890608540637,
+      "grad_norm": 2.221723794937134,
+      "learning_rate": 2.4814011941804603e-06,
+      "loss": 1.3783,
+      "step": 87
+    },
+    {
+      "epoch": 0.002376900845420414,
+      "grad_norm": 1.8951011896133423,
+      "learning_rate": 2.1232680959720085e-06,
+      "loss": 1.3405,
+      "step": 88
+    },
+    {
+      "epoch": 0.0024039110823001917,
+      "grad_norm": 2.2864327430725098,
+      "learning_rate": 1.79106702214893e-06,
+      "loss": 1.3055,
+      "step": 89
+    },
+    {
+      "epoch": 0.0024309213191799693,
+      "grad_norm": 2.2295827865600586,
+      "learning_rate": 1.4854669814637145e-06,
+      "loss": 1.4331,
+      "step": 90
+    },
+    {
+      "epoch": 0.0024579315560597464,
+      "grad_norm": 2.1161422729492188,
+      "learning_rate": 1.2070834117282414e-06,
+      "loss": 1.6016,
+      "step": 91
+    },
+    {
+      "epoch": 0.002484941792939524,
+      "grad_norm": 2.2798967361450195,
+      "learning_rate": 9.56476940403942e-07,
+      "loss": 1.34,
+      "step": 92
+    },
+    {
+      "epoch": 0.0025119520298193016,
+      "grad_norm": 1.9478846788406372,
+      "learning_rate": 7.341522555726971e-07,
+      "loss": 1.1043,
+      "step": 93
+    },
+    {
+      "epoch": 0.002538962266699079,
+      "grad_norm": 2.527766227722168,
+      "learning_rate": 5.405570895622014e-07,
+      "loss": 1.507,
+      "step": 94
+    },
+    {
+      "epoch": 0.0025659725035788563,
+      "grad_norm": 2.620147228240967,
+      "learning_rate": 3.760813172726457e-07,
+      "loss": 1.5329,
+      "step": 95
+    },
+    {
+      "epoch": 0.002592982740458634,
+      "grad_norm": 2.2767059803009033,
+      "learning_rate": 2.41056171020555e-07,
+      "loss": 1.438,
+      "step": 96
+    },
+    {
+      "epoch": 0.0026199929773384114,
+      "grad_norm": 1.911629319190979,
+      "learning_rate": 1.357535734809795e-07,
+      "loss": 1.0212,
+      "step": 97
+    },
+    {
+      "epoch": 0.0026470032142181885,
+      "grad_norm": 2.204927682876587,
+      "learning_rate": 6.038559007141397e-08,
+      "loss": 1.1526,
+      "step": 98
+    },
+    {
+      "epoch": 0.002674013451097966,
+      "grad_norm": 2.275649309158325,
+      "learning_rate": 1.510400188028116e-08,
+      "loss": 1.3758,
+      "step": 99
+    },
+    {
+      "epoch": 0.0027010236879777437,
+      "grad_norm": 3.0716145038604736,
+      "learning_rate": 0.0,
+      "loss": 1.7359,
+      "step": 100
+    },
+    {
+      "epoch": 0.0027010236879777437,
+      "eval_loss": 1.4810292720794678,
+      "eval_runtime": 1731.0852,
+      "eval_samples_per_second": 9.005,
+      "eval_steps_per_second": 2.252,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.204371846673203e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null