Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83f367fa7dcee21db995bb0076a1f1bcbf4f460bd317b2c75873e494ea129706
 size 406863720

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9d5bbd3213e6430b7bd596627c1ce88a247e50e9754125122417c3af644cfdc
 size 406863720

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:63bfd0060cea9bc6d8c0bac7e2949fe528ec84db091c1cf14f9f8961b431480b
 size 207013892

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa9ec085507c5e6e013494ce81a6ab42fee48888afaafec9fabf506633d6b99d
 size 207013892

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:845d44bea51ae8c852bae051b817d8a4aa017c8030dafad8ac128fab7fa69003
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1f8e11a89fbf6be88c29777fdbbfe4d4259c26b65e372ad0b0265d65afb8afb8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d359eb5d29e75fb2bbe5b7026981da69b95b8ad1fea469302d13cde104f7e8a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0ddb9588ea654e56e83effcf81a2bc03480954babcf6415cb44d41d3bfb8039f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.10318926721811295,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.013686443577636351,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 23.373,
       "eval_steps_per_second": 5.846,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -401,7 +759,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.384514477817856e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.0689723864197731,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.027372887155272703,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 23.373,
       "eval_steps_per_second": 5.846,
       "step": 50
+    },
+    {
+      "epoch": 0.013960172449189078,
+      "grad_norm": 1.7688506841659546,
+      "learning_rate": 8.894386393810563e-05,
+      "loss": 0.4599,
+      "step": 51
+    },
+    {
+      "epoch": 0.014233901320741805,
+      "grad_norm": 0.6852141618728638,
+      "learning_rate": 8.842005554284296e-05,
+      "loss": 0.1715,
+      "step": 52
+    },
+    {
+      "epoch": 0.014507630192294532,
+      "grad_norm": 0.64682537317276,
+      "learning_rate": 8.788574348801675e-05,
+      "loss": 0.1896,
+      "step": 53
+    },
+    {
+      "epoch": 0.01478135906384726,
+      "grad_norm": 0.4129149615764618,
+      "learning_rate": 8.73410738492077e-05,
+      "loss": 0.1405,
+      "step": 54
+    },
+    {
+      "epoch": 0.015055087935399987,
+      "grad_norm": 0.39000850915908813,
+      "learning_rate": 8.678619553365659e-05,
+      "loss": 0.0792,
+      "step": 55
+    },
+    {
+      "epoch": 0.015328816806952713,
+      "grad_norm": 0.7193068265914917,
+      "learning_rate": 8.622126023955446e-05,
+      "loss": 0.1492,
+      "step": 56
+    },
+    {
+      "epoch": 0.01560254567850544,
+      "grad_norm": 0.2947554588317871,
+      "learning_rate": 8.564642241456986e-05,
+      "loss": 0.0963,
+      "step": 57
+    },
+    {
+      "epoch": 0.01587627455005817,
+      "grad_norm": 0.28656435012817383,
+      "learning_rate": 8.506183921362443e-05,
+      "loss": 0.0775,
+      "step": 58
+    },
+    {
+      "epoch": 0.016150003421610895,
+      "grad_norm": 0.28851979970932007,
+      "learning_rate": 8.44676704559283e-05,
+      "loss": 0.1041,
+      "step": 59
+    },
+    {
+      "epoch": 0.016423732293163622,
+      "grad_norm": 0.5464861989021301,
+      "learning_rate": 8.386407858128706e-05,
+      "loss": 0.0947,
+      "step": 60
+    },
+    {
+      "epoch": 0.01669746116471635,
+      "grad_norm": 0.39874088764190674,
+      "learning_rate": 8.32512286056924e-05,
+      "loss": 0.1459,
+      "step": 61
+    },
+    {
+      "epoch": 0.016971190036269075,
+      "grad_norm": 0.39730069041252136,
+      "learning_rate": 8.262928807620843e-05,
+      "loss": 0.0872,
+      "step": 62
+    },
+    {
+      "epoch": 0.017244918907821802,
+      "grad_norm": 0.3217560052871704,
+      "learning_rate": 8.199842702516583e-05,
+      "loss": 0.1013,
+      "step": 63
+    },
+    {
+      "epoch": 0.01751864777937453,
+      "grad_norm": 0.36995869874954224,
+      "learning_rate": 8.135881792367686e-05,
+      "loss": 0.0794,
+      "step": 64
+    },
+    {
+      "epoch": 0.017792376650927255,
+      "grad_norm": 0.29235830903053284,
+      "learning_rate": 8.07106356344834e-05,
+      "loss": 0.0753,
+      "step": 65
+    },
+    {
+      "epoch": 0.018066105522479982,
+      "grad_norm": 0.40605628490448,
+      "learning_rate": 8.005405736415126e-05,
+      "loss": 0.1249,
+      "step": 66
+    },
+    {
+      "epoch": 0.018339834394032712,
+      "grad_norm": 0.24858100712299347,
+      "learning_rate": 7.938926261462366e-05,
+      "loss": 0.0819,
+      "step": 67
+    },
+    {
+      "epoch": 0.01861356326558544,
+      "grad_norm": 0.30691301822662354,
+      "learning_rate": 7.871643313414718e-05,
+      "loss": 0.0559,
+      "step": 68
+    },
+    {
+      "epoch": 0.018887292137138165,
+      "grad_norm": 0.27972522377967834,
+      "learning_rate": 7.803575286758364e-05,
+      "loss": 0.0858,
+      "step": 69
+    },
+    {
+      "epoch": 0.019161021008690892,
+      "grad_norm": 0.28502631187438965,
+      "learning_rate": 7.734740790612136e-05,
+      "loss": 0.0782,
+      "step": 70
+    },
+    {
+      "epoch": 0.01943474988024362,
+      "grad_norm": 0.27517464756965637,
+      "learning_rate": 7.66515864363997e-05,
+      "loss": 0.1164,
+      "step": 71
+    },
+    {
+      "epoch": 0.019708478751796345,
+      "grad_norm": 0.2742948830127716,
+      "learning_rate": 7.594847868906076e-05,
+      "loss": 0.0812,
+      "step": 72
+    },
+    {
+      "epoch": 0.019982207623349072,
+      "grad_norm": 0.2698785364627838,
+      "learning_rate": 7.52382768867422e-05,
+      "loss": 0.0526,
+      "step": 73
+    },
+    {
+      "epoch": 0.0202559364949018,
+      "grad_norm": 0.1924898773431778,
+      "learning_rate": 7.452117519152542e-05,
+      "loss": 0.0653,
+      "step": 74
+    },
+    {
+      "epoch": 0.020529665366454525,
+      "grad_norm": 0.24038903415203094,
+      "learning_rate": 7.379736965185368e-05,
+      "loss": 0.0571,
+      "step": 75
+    },
+    {
+      "epoch": 0.020803394238007252,
+      "grad_norm": 0.2771959602832794,
+      "learning_rate": 7.30670581489344e-05,
+      "loss": 0.0678,
+      "step": 76
+    },
+    {
+      "epoch": 0.021077123109559982,
+      "grad_norm": 0.3027803599834442,
+      "learning_rate": 7.233044034264034e-05,
+      "loss": 0.0824,
+      "step": 77
+    },
+    {
+      "epoch": 0.02135085198111271,
+      "grad_norm": 0.34813180565834045,
+      "learning_rate": 7.158771761692464e-05,
+      "loss": 0.0428,
+      "step": 78
+    },
+    {
+      "epoch": 0.021624580852665436,
+      "grad_norm": 0.18736878037452698,
+      "learning_rate": 7.083909302476453e-05,
+      "loss": 0.0164,
+      "step": 79
+    },
+    {
+      "epoch": 0.021898309724218162,
+      "grad_norm": 0.1618126630783081,
+      "learning_rate": 7.008477123264848e-05,
+      "loss": 0.0252,
+      "step": 80
+    },
+    {
+      "epoch": 0.02217203859577089,
+      "grad_norm": 0.18529681861400604,
+      "learning_rate": 6.932495846462261e-05,
+      "loss": 0.0256,
+      "step": 81
+    },
+    {
+      "epoch": 0.022445767467323616,
+      "grad_norm": 0.5240387916564941,
+      "learning_rate": 6.855986244591104e-05,
+      "loss": 0.0771,
+      "step": 82
+    },
+    {
+      "epoch": 0.022719496338876342,
+      "grad_norm": 0.3223040699958801,
+      "learning_rate": 6.778969234612584e-05,
+      "loss": 0.1034,
+      "step": 83
+    },
+    {
+      "epoch": 0.02299322521042907,
+      "grad_norm": 0.342072993516922,
+      "learning_rate": 6.701465872208216e-05,
+      "loss": 0.0586,
+      "step": 84
+    },
+    {
+      "epoch": 0.023266954081981796,
+      "grad_norm": 0.2215188443660736,
+      "learning_rate": 6.623497346023418e-05,
+      "loss": 0.0384,
+      "step": 85
+    },
+    {
+      "epoch": 0.023540682953534522,
+      "grad_norm": 0.1570575088262558,
+      "learning_rate": 6.545084971874738e-05,
+      "loss": 0.0218,
+      "step": 86
+    },
+    {
+      "epoch": 0.023814411825087253,
+      "grad_norm": 0.19102266430854797,
+      "learning_rate": 6.466250186922325e-05,
+      "loss": 0.0424,
+      "step": 87
+    },
+    {
+      "epoch": 0.02408814069663998,
+      "grad_norm": 0.1055581122636795,
+      "learning_rate": 6.387014543809223e-05,
+      "loss": 0.0121,
+      "step": 88
+    },
+    {
+      "epoch": 0.024361869568192706,
+      "grad_norm": 0.3091917037963867,
+      "learning_rate": 6.307399704769099e-05,
+      "loss": 0.079,
+      "step": 89
+    },
+    {
+      "epoch": 0.024635598439745433,
+      "grad_norm": 0.1684788465499878,
+      "learning_rate": 6.227427435703997e-05,
+      "loss": 0.0372,
+      "step": 90
+    },
+    {
+      "epoch": 0.02490932731129816,
+      "grad_norm": 0.25582513213157654,
+      "learning_rate": 6.147119600233758e-05,
+      "loss": 0.0406,
+      "step": 91
+    },
+    {
+      "epoch": 0.025183056182850886,
+      "grad_norm": 0.10428531467914581,
+      "learning_rate": 6.066498153718735e-05,
+      "loss": 0.0058,
+      "step": 92
+    },
+    {
+      "epoch": 0.025456785054403613,
+      "grad_norm": 0.25681793689727783,
+      "learning_rate": 5.985585137257401e-05,
+      "loss": 0.0333,
+      "step": 93
+    },
+    {
+      "epoch": 0.02573051392595634,
+      "grad_norm": 0.09947719424962997,
+      "learning_rate": 5.90440267166055e-05,
+      "loss": 0.0055,
+      "step": 94
+    },
+    {
+      "epoch": 0.026004242797509066,
+      "grad_norm": 0.05656822770833969,
+      "learning_rate": 5.8229729514036705e-05,
+      "loss": 0.0031,
+      "step": 95
+    },
+    {
+      "epoch": 0.026277971669061793,
+      "grad_norm": 0.10895723849534988,
+      "learning_rate": 5.74131823855921e-05,
+      "loss": 0.0061,
+      "step": 96
+    },
+    {
+      "epoch": 0.026551700540614523,
+      "grad_norm": 0.30041539669036865,
+      "learning_rate": 5.6594608567103456e-05,
+      "loss": 0.0417,
+      "step": 97
+    },
+    {
+      "epoch": 0.02682542941216725,
+      "grad_norm": 0.20105572044849396,
+      "learning_rate": 5.577423184847932e-05,
+      "loss": 0.0128,
+      "step": 98
+    },
+    {
+      "epoch": 0.027099158283719976,
+      "grad_norm": 0.09207609295845032,
+      "learning_rate": 5.495227651252315e-05,
+      "loss": 0.005,
+      "step": 99
+    },
+    {
+      "epoch": 0.027372887155272703,
+      "grad_norm": 0.07330376654863358,
+      "learning_rate": 5.4128967273616625e-05,
+      "loss": 0.0041,
+      "step": 100
+    },
+    {
+      "epoch": 0.027372887155272703,
+      "eval_loss": 0.0689723864197731,
+      "eval_runtime": 263.1747,
+      "eval_samples_per_second": 23.38,
+      "eval_steps_per_second": 5.848,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 6.752190575149056e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null