Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:907ab6a0ae89db772ae066fb2052a5616b7b71563fa71453ebf2c8e2f0fcd5b2
 size 295488936

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2431c21492adadf8d386f6d157cb6ddd19a4deff9958c3e71856170845abbed
 size 295488936

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:74815f4c6901145dc008341c2caf282cceddf39d2dc5ef73927e806423c85a5e
 size 591203178

 version https://git-lfs.github.com/spec/v1
+oid sha256:844b967ee6b8024991820293e92eabb8ce9d75b32d0036cda6e2ba82ae487514
 size 591203178

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2cf2b2d7ca16a98ed374235967d4dea68bc581943625ddd333bc11a0a503cc75
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:df98dcc31574f5bb83e66e0bca03b277b55813524adea9b669dde0b1561d3713
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e0244c146d76bf610ae39789eea36d0bff336b81d211db008e020e66921060c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4c9c807f0681c8b7e53ada9b6ec3dba530d303de7da0d0a0562a3d8d0bbba08
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.6289290189743042,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.09854644000985464,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,364 @@
       "eval_samples_per_second": 25.733,
       "eval_steps_per_second": 12.874,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -759,7 +1117,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.09564631154688e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.6193792223930359,
+  "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.14781966001478197,
   "eval_steps": 50,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 25.733,
       "eval_steps_per_second": 12.874,
       "step": 100
+    },
+    {
+      "epoch": 0.09953190440995319,
+      "grad_norm": 0.06508608162403107,
+      "learning_rate": 5.782172325201155e-05,
+      "loss": 0.0293,
+      "step": 101
+    },
+    {
+      "epoch": 0.10051736881005174,
+      "grad_norm": 0.10537426918745041,
+      "learning_rate": 5.695865504800327e-05,
+      "loss": 0.1911,
+      "step": 102
+    },
+    {
+      "epoch": 0.10150283321015029,
+      "grad_norm": 0.10470440238714218,
+      "learning_rate": 5.6093467170257374e-05,
+      "loss": 0.2062,
+      "step": 103
+    },
+    {
+      "epoch": 0.10248829761024883,
+      "grad_norm": 0.1329120248556137,
+      "learning_rate": 5.522642316338268e-05,
+      "loss": 0.2705,
+      "step": 104
+    },
+    {
+      "epoch": 0.10347376201034737,
+      "grad_norm": 0.1280706822872162,
+      "learning_rate": 5.435778713738292e-05,
+      "loss": 0.2274,
+      "step": 105
+    },
+    {
+      "epoch": 0.10445922641044593,
+      "grad_norm": 0.16757312417030334,
+      "learning_rate": 5.348782368720626e-05,
+      "loss": 0.4932,
+      "step": 106
+    },
+    {
+      "epoch": 0.10544469081054447,
+      "grad_norm": 0.1751958429813385,
+      "learning_rate": 5.26167978121472e-05,
+      "loss": 0.5401,
+      "step": 107
+    },
+    {
+      "epoch": 0.10643015521064302,
+      "grad_norm": 0.1717756688594818,
+      "learning_rate": 5.174497483512506e-05,
+      "loss": 0.6055,
+      "step": 108
+    },
+    {
+      "epoch": 0.10741561961074156,
+      "grad_norm": 0.19734135270118713,
+      "learning_rate": 5.0872620321864185e-05,
+      "loss": 0.5538,
+      "step": 109
+    },
+    {
+      "epoch": 0.10840108401084012,
+      "grad_norm": 0.18675287067890167,
+      "learning_rate": 5e-05,
+      "loss": 0.5751,
+      "step": 110
+    },
+    {
+      "epoch": 0.10938654841093866,
+      "grad_norm": 0.19595959782600403,
+      "learning_rate": 4.912737967813583e-05,
+      "loss": 0.5783,
+      "step": 111
+    },
+    {
+      "epoch": 0.1103720128110372,
+      "grad_norm": 0.20387478172779083,
+      "learning_rate": 4.825502516487497e-05,
+      "loss": 0.6778,
+      "step": 112
+    },
+    {
+      "epoch": 0.11135747721113574,
+      "grad_norm": 0.19632427394390106,
+      "learning_rate": 4.738320218785281e-05,
+      "loss": 0.7262,
+      "step": 113
+    },
+    {
+      "epoch": 0.1123429416112343,
+      "grad_norm": 0.22888943552970886,
+      "learning_rate": 4.6512176312793736e-05,
+      "loss": 0.6576,
+      "step": 114
+    },
+    {
+      "epoch": 0.11332840601133284,
+      "grad_norm": 0.21430076658725739,
+      "learning_rate": 4.564221286261709e-05,
+      "loss": 0.6493,
+      "step": 115
+    },
+    {
+      "epoch": 0.11431387041143139,
+      "grad_norm": 0.20526853203773499,
+      "learning_rate": 4.477357683661734e-05,
+      "loss": 0.66,
+      "step": 116
+    },
+    {
+      "epoch": 0.11529933481152993,
+      "grad_norm": 0.2236650139093399,
+      "learning_rate": 4.390653282974264e-05,
+      "loss": 0.6334,
+      "step": 117
+    },
+    {
+      "epoch": 0.11628479921162849,
+      "grad_norm": 0.22159314155578613,
+      "learning_rate": 4.3041344951996746e-05,
+      "loss": 0.7894,
+      "step": 118
+    },
+    {
+      "epoch": 0.11727026361172703,
+      "grad_norm": 0.22929464280605316,
+      "learning_rate": 4.2178276747988446e-05,
+      "loss": 0.6049,
+      "step": 119
+    },
+    {
+      "epoch": 0.11825572801182557,
+      "grad_norm": 0.2338849902153015,
+      "learning_rate": 4.131759111665349e-05,
+      "loss": 0.685,
+      "step": 120
+    },
+    {
+      "epoch": 0.11924119241192412,
+      "grad_norm": 0.2361065298318863,
+      "learning_rate": 4.045955023117276e-05,
+      "loss": 0.7811,
+      "step": 121
+    },
+    {
+      "epoch": 0.12022665681202267,
+      "grad_norm": 0.23754455149173737,
+      "learning_rate": 3.960441545911204e-05,
+      "loss": 0.7872,
+      "step": 122
+    },
+    {
+      "epoch": 0.12121212121212122,
+      "grad_norm": 0.2533068358898163,
+      "learning_rate": 3.875244728280676e-05,
+      "loss": 0.7897,
+      "step": 123
+    },
+    {
+      "epoch": 0.12219758561221976,
+      "grad_norm": 0.25761741399765015,
+      "learning_rate": 3.790390522001662e-05,
+      "loss": 0.7821,
+      "step": 124
+    },
+    {
+      "epoch": 0.1231830500123183,
+      "grad_norm": 0.262465238571167,
+      "learning_rate": 3.705904774487396e-05,
+      "loss": 0.8736,
+      "step": 125
+    },
+    {
+      "epoch": 0.12416851441241686,
+      "grad_norm": 0.28506287932395935,
+      "learning_rate": 3.6218132209150045e-05,
+      "loss": 0.7855,
+      "step": 126
+    },
+    {
+      "epoch": 0.1251539788125154,
+      "grad_norm": 0.2701311707496643,
+      "learning_rate": 3.5381414763863166e-05,
+      "loss": 0.6851,
+      "step": 127
+    },
+    {
+      "epoch": 0.12613944321261394,
+      "grad_norm": 0.2602234184741974,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 0.6349,
+      "step": 128
+    },
+    {
+      "epoch": 0.1271249076127125,
+      "grad_norm": 0.26517555117607117,
+      "learning_rate": 3.372159227714218e-05,
+      "loss": 0.6741,
+      "step": 129
+    },
+    {
+      "epoch": 0.12811037201281103,
+      "grad_norm": 0.2652198076248169,
+      "learning_rate": 3.289899283371657e-05,
+      "loss": 0.569,
+      "step": 130
+    },
+    {
+      "epoch": 0.1290958364129096,
+      "grad_norm": 0.27912837266921997,
+      "learning_rate": 3.2081602522734986e-05,
+      "loss": 0.7768,
+      "step": 131
+    },
+    {
+      "epoch": 0.13008130081300814,
+      "grad_norm": 0.28829455375671387,
+      "learning_rate": 3.12696703292044e-05,
+      "loss": 0.6819,
+      "step": 132
+    },
+    {
+      "epoch": 0.13106676521310667,
+      "grad_norm": 0.2873208522796631,
+      "learning_rate": 3.046344357553632e-05,
+      "loss": 0.8448,
+      "step": 133
+    },
+    {
+      "epoch": 0.13205222961320523,
+      "grad_norm": 0.3321005702018738,
+      "learning_rate": 2.9663167846209998e-05,
+      "loss": 0.8613,
+      "step": 134
+    },
+    {
+      "epoch": 0.13303769401330376,
+      "grad_norm": 0.3113601505756378,
+      "learning_rate": 2.886908691296504e-05,
+      "loss": 0.757,
+      "step": 135
+    },
+    {
+      "epoch": 0.13402315841340232,
+      "grad_norm": 0.3224503993988037,
+      "learning_rate": 2.8081442660546125e-05,
+      "loss": 0.8078,
+      "step": 136
+    },
+    {
+      "epoch": 0.13500862281350087,
+      "grad_norm": 0.3039480745792389,
+      "learning_rate": 2.7300475013022663e-05,
+      "loss": 0.5903,
+      "step": 137
+    },
+    {
+      "epoch": 0.1359940872135994,
+      "grad_norm": 0.3399895429611206,
+      "learning_rate": 2.6526421860705473e-05,
+      "loss": 0.681,
+      "step": 138
+    },
+    {
+      "epoch": 0.13697955161369796,
+      "grad_norm": 0.32148513197898865,
+      "learning_rate": 2.575951898768315e-05,
+      "loss": 0.6537,
+      "step": 139
+    },
+    {
+      "epoch": 0.13796501601379652,
+      "grad_norm": 0.3466744124889374,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 0.7301,
+      "step": 140
+    },
+    {
+      "epoch": 0.13895048041389504,
+      "grad_norm": 0.35640135407447815,
+      "learning_rate": 2.4248096254497288e-05,
+      "loss": 0.7172,
+      "step": 141
+    },
+    {
+      "epoch": 0.1399359448139936,
+      "grad_norm": 0.38685786724090576,
+      "learning_rate": 2.350403678833976e-05,
+      "loss": 0.7447,
+      "step": 142
+    },
+    {
+      "epoch": 0.14092140921409213,
+      "grad_norm": 0.3715401291847229,
+      "learning_rate": 2.2768048249248648e-05,
+      "loss": 0.6587,
+      "step": 143
+    },
+    {
+      "epoch": 0.1419068736141907,
+      "grad_norm": 0.41979506611824036,
+      "learning_rate": 2.2040354826462668e-05,
+      "loss": 0.7954,
+      "step": 144
+    },
+    {
+      "epoch": 0.14289233801428924,
+      "grad_norm": 0.4046609103679657,
+      "learning_rate": 2.132117818244771e-05,
+      "loss": 0.7503,
+      "step": 145
+    },
+    {
+      "epoch": 0.14387780241438777,
+      "grad_norm": 0.4440525472164154,
+      "learning_rate": 2.061073738537635e-05,
+      "loss": 0.8386,
+      "step": 146
+    },
+    {
+      "epoch": 0.14486326681448633,
+      "grad_norm": 0.4576321542263031,
+      "learning_rate": 1.9909248842397584e-05,
+      "loss": 0.6455,
+      "step": 147
+    },
+    {
+      "epoch": 0.1458487312145849,
+      "grad_norm": 0.5070066452026367,
+      "learning_rate": 1.9216926233717085e-05,
+      "loss": 0.8399,
+      "step": 148
+    },
+    {
+      "epoch": 0.14683419561468342,
+      "grad_norm": 0.5966192483901978,
+      "learning_rate": 1.8533980447508137e-05,
+      "loss": 0.7003,
+      "step": 149
+    },
+    {
+      "epoch": 0.14781966001478197,
+      "grad_norm": 0.7911564707756042,
+      "learning_rate": 1.7860619515673033e-05,
+      "loss": 0.6801,
+      "step": 150
+    },
+    {
+      "epoch": 0.14781966001478197,
+      "eval_loss": 0.6193792223930359,
+      "eval_runtime": 66.3669,
+      "eval_samples_per_second": 25.751,
+      "eval_steps_per_second": 12.883,
+      "step": 150
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.640067658186752e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null