Training in progress, step 2000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3c07d9f82d0dc1cd318f9c20b0b1529213bd8a4dff1faf8ce688ecb58571b31
 size 69527352

 version https://git-lfs.github.com/spec/v1
+oid sha256:04f0ffe434929d301fdbb4a477e60354c9ebe32f51f7b66c3b76af3239107135
 size 69527352

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8c6e3eb9856154789133b5419a7189f12fe843ca44f1cdff35733d6104dd62c3
 size 35778900

 version https://git-lfs.github.com/spec/v1
+oid sha256:fdbfaab9c4b638c5be7b9b85b9ef2e77afadf2b9fc84a8c66872d06993947042
 size 35778900

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d0b81074e1bc1527d9374bce86bdfa8d8c27dcb2ca9fd9115819f321defb1fa
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:65a6bdf298e0592c7069487ab9f49a4212493c8c923f2cd738adc0345d5bd504
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0cf0b4281d39a881ce1af5e046e19d073ff2816738244a5102a35ebec9a11074
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ff620fa731932bfad032c9b2869fcd08718601bc76c1b44cf37971591fc72fbd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.3732219636440277,
   "best_model_checkpoint": "miner_id_24/checkpoint-1850",
-  "epoch": 0.2677559987642031,
   "eval_steps": 50,
-  "global_step": 1950,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -13977,6 +13977,364 @@
       "eval_samples_per_second": 13.321,
       "eval_steps_per_second": 6.66,
       "step": 1950
     }
   ],
   "logging_steps": 1,
@@ -13991,7 +14349,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -14000,12 +14358,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.049408924844032e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.3732219636440277,
   "best_model_checkpoint": "miner_id_24/checkpoint-1850",
+  "epoch": 0.27462153719405447,
   "eval_steps": 50,
+  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 13.321,
       "eval_steps_per_second": 6.66,
       "step": 1950
+    },
+    {
+      "epoch": 0.2678933095328001,
+      "grad_norm": 0.0776594951748848,
+      "learning_rate": 2.990468147050729e-07,
+      "loss": 0.315,
+      "step": 1951
+    },
+    {
+      "epoch": 0.2680306203013971,
+      "grad_norm": 0.07933443784713745,
+      "learning_rate": 2.869711549758014e-07,
+      "loss": 0.3611,
+      "step": 1952
+    },
+    {
+      "epoch": 0.26816793106999415,
+      "grad_norm": 0.07569830119609833,
+      "learning_rate": 2.7514400612855815e-07,
+      "loss": 0.3146,
+      "step": 1953
+    },
+    {
+      "epoch": 0.26830524183859117,
+      "grad_norm": 0.08394397795200348,
+      "learning_rate": 2.635653976396979e-07,
+      "loss": 0.3673,
+      "step": 1954
+    },
+    {
+      "epoch": 0.26844255260718825,
+      "grad_norm": 0.07972821593284607,
+      "learning_rate": 2.522353583661263e-07,
+      "loss": 0.358,
+      "step": 1955
+    },
+    {
+      "epoch": 0.26857986337578527,
+      "grad_norm": 0.08286363631486893,
+      "learning_rate": 2.4115391654524477e-07,
+      "loss": 0.3535,
+      "step": 1956
+    },
+    {
+      "epoch": 0.2687171741443823,
+      "grad_norm": 0.0780869573354721,
+      "learning_rate": 2.303210997949168e-07,
+      "loss": 0.3211,
+      "step": 1957
+    },
+    {
+      "epoch": 0.2688544849129793,
+      "grad_norm": 0.0882067084312439,
+      "learning_rate": 2.1973693511334604e-07,
+      "loss": 0.339,
+      "step": 1958
+    },
+    {
+      "epoch": 0.26899179568157633,
+      "grad_norm": 0.07985836267471313,
+      "learning_rate": 2.094014488790097e-07,
+      "loss": 0.3112,
+      "step": 1959
+    },
+    {
+      "epoch": 0.26912910645017335,
+      "grad_norm": 0.09253552556037903,
+      "learning_rate": 1.9931466685065847e-07,
+      "loss": 0.3242,
+      "step": 1960
+    },
+    {
+      "epoch": 0.26926641721877037,
+      "grad_norm": 0.08512873947620392,
+      "learning_rate": 1.894766141671833e-07,
+      "loss": 0.2778,
+      "step": 1961
+    },
+    {
+      "epoch": 0.2694037279873674,
+      "grad_norm": 0.09439520537853241,
+      "learning_rate": 1.7988731534757108e-07,
+      "loss": 0.3481,
+      "step": 1962
+    },
+    {
+      "epoch": 0.2695410387559644,
+      "grad_norm": 0.0956578180193901,
+      "learning_rate": 1.7054679429086007e-07,
+      "loss": 0.3387,
+      "step": 1963
+    },
+    {
+      "epoch": 0.2696783495245615,
+      "grad_norm": 0.10661093145608902,
+      "learning_rate": 1.6145507427606232e-07,
+      "loss": 0.3995,
+      "step": 1964
+    },
+    {
+      "epoch": 0.2698156602931585,
+      "grad_norm": 0.10120601952075958,
+      "learning_rate": 1.5261217796211923e-07,
+      "loss": 0.3887,
+      "step": 1965
+    },
+    {
+      "epoch": 0.26995297106175553,
+      "grad_norm": 0.11400753259658813,
+      "learning_rate": 1.4401812738782383e-07,
+      "loss": 0.38,
+      "step": 1966
+    },
+    {
+      "epoch": 0.27009028183035255,
+      "grad_norm": 0.1119614839553833,
+      "learning_rate": 1.3567294397180964e-07,
+      "loss": 0.3617,
+      "step": 1967
+    },
+    {
+      "epoch": 0.2702275925989496,
+      "grad_norm": 0.1144985556602478,
+      "learning_rate": 1.2757664851245078e-07,
+      "loss": 0.3941,
+      "step": 1968
+    },
+    {
+      "epoch": 0.2703649033675466,
+      "grad_norm": 0.12001071870326996,
+      "learning_rate": 1.1972926118780647e-07,
+      "loss": 0.3585,
+      "step": 1969
+    },
+    {
+      "epoch": 0.2705022141361436,
+      "grad_norm": 0.10658982396125793,
+      "learning_rate": 1.1213080155564326e-07,
+      "loss": 0.3269,
+      "step": 1970
+    },
+    {
+      "epoch": 0.27063952490474064,
+      "grad_norm": 0.1165112853050232,
+      "learning_rate": 1.0478128855327952e-07,
+      "loss": 0.4069,
+      "step": 1971
+    },
+    {
+      "epoch": 0.27077683567333766,
+      "grad_norm": 0.12376672774553299,
+      "learning_rate": 9.768074049762988e-08,
+      "loss": 0.4226,
+      "step": 1972
+    },
+    {
+      "epoch": 0.27091414644193473,
+      "grad_norm": 0.13154427707195282,
+      "learning_rate": 9.082917508510536e-08,
+      "loss": 0.4798,
+      "step": 1973
+    },
+    {
+      "epoch": 0.27105145721053175,
+      "grad_norm": 0.1435077041387558,
+      "learning_rate": 8.42266093916022e-08,
+      "loss": 0.4423,
+      "step": 1974
+    },
+    {
+      "epoch": 0.2711887679791288,
+      "grad_norm": 0.13770116865634918,
+      "learning_rate": 7.787305987243532e-08,
+      "loss": 0.3837,
+      "step": 1975
+    },
+    {
+      "epoch": 0.2713260787477258,
+      "grad_norm": 0.11175678670406342,
+      "learning_rate": 7.1768542362316e-08,
+      "loss": 0.3307,
+      "step": 1976
+    },
+    {
+      "epoch": 0.2714633895163228,
+      "grad_norm": 0.13395845890045166,
+      "learning_rate": 6.591307207527431e-08,
+      "loss": 0.3738,
+      "step": 1977
+    },
+    {
+      "epoch": 0.27160070028491984,
+      "grad_norm": 0.1312989890575409,
+      "learning_rate": 6.030666360469228e-08,
+      "loss": 0.4427,
+      "step": 1978
+    },
+    {
+      "epoch": 0.27173801105351686,
+      "grad_norm": 0.14491136372089386,
+      "learning_rate": 5.494933092318189e-08,
+      "loss": 0.4382,
+      "step": 1979
+    },
+    {
+      "epoch": 0.2718753218221139,
+      "grad_norm": 0.12479250878095627,
+      "learning_rate": 4.9841087382618276e-08,
+      "loss": 0.3451,
+      "step": 1980
+    },
+    {
+      "epoch": 0.2720126325907109,
+      "grad_norm": 0.12496712803840637,
+      "learning_rate": 4.498194571409542e-08,
+      "loss": 0.3951,
+      "step": 1981
+    },
+    {
+      "epoch": 0.272149943359308,
+      "grad_norm": 0.1415296047925949,
+      "learning_rate": 4.037191802783724e-08,
+      "loss": 0.407,
+      "step": 1982
+    },
+    {
+      "epoch": 0.272287254127905,
+      "grad_norm": 0.12588736414909363,
+      "learning_rate": 3.6011015813253166e-08,
+      "loss": 0.3288,
+      "step": 1983
+    },
+    {
+      "epoch": 0.272424564896502,
+      "grad_norm": 0.1493213176727295,
+      "learning_rate": 3.18992499388493e-08,
+      "loss": 0.437,
+      "step": 1984
+    },
+    {
+      "epoch": 0.27256187566509904,
+      "grad_norm": 0.15246812999248505,
+      "learning_rate": 2.8036630652206187e-08,
+      "loss": 0.3758,
+      "step": 1985
+    },
+    {
+      "epoch": 0.27269918643369606,
+      "grad_norm": 0.15464863181114197,
+      "learning_rate": 2.4423167579978868e-08,
+      "loss": 0.4871,
+      "step": 1986
+    },
+    {
+      "epoch": 0.2728364972022931,
+      "grad_norm": 0.15227794647216797,
+      "learning_rate": 2.105886972787463e-08,
+      "loss": 0.4728,
+      "step": 1987
+    },
+    {
+      "epoch": 0.2729738079708901,
+      "grad_norm": 0.15416234731674194,
+      "learning_rate": 1.7943745480586417e-08,
+      "loss": 0.4987,
+      "step": 1988
+    },
+    {
+      "epoch": 0.2731111187394871,
+      "grad_norm": 0.15620464086532593,
+      "learning_rate": 1.5077802601826118e-08,
+      "loss": 0.4204,
+      "step": 1989
+    },
+    {
+      "epoch": 0.27324842950808415,
+      "grad_norm": 0.14840669929981232,
+      "learning_rate": 1.2461048234269079e-08,
+      "loss": 0.3661,
+      "step": 1990
+    },
+    {
+      "epoch": 0.2733857402766812,
+      "grad_norm": 0.1491561383008957,
+      "learning_rate": 1.0093488899554082e-08,
+      "loss": 0.3791,
+      "step": 1991
+    },
+    {
+      "epoch": 0.27352305104527824,
+      "grad_norm": 0.13692405819892883,
+      "learning_rate": 7.975130498238948e-09,
+      "loss": 0.3049,
+      "step": 1992
+    },
+    {
+      "epoch": 0.27366036181387526,
+      "grad_norm": 0.19245244562625885,
+      "learning_rate": 6.105978309856042e-09,
+      "loss": 0.4476,
+      "step": 1993
+    },
+    {
+      "epoch": 0.2737976725824723,
+      "grad_norm": 0.19619667530059814,
+      "learning_rate": 4.486036992790155e-09,
+      "loss": 0.4232,
+      "step": 1994
+    },
+    {
+      "epoch": 0.2739349833510693,
+      "grad_norm": 0.1624627262353897,
+      "learning_rate": 3.115310584367315e-09,
+      "loss": 0.3799,
+      "step": 1995
+    },
+    {
+      "epoch": 0.2740722941196663,
+      "grad_norm": 0.14835092425346375,
+      "learning_rate": 1.99380250079928e-09,
+      "loss": 0.2921,
+      "step": 1996
+    },
+    {
+      "epoch": 0.27420960488826335,
+      "grad_norm": 0.16958799958229065,
+      "learning_rate": 1.1215155371835373e-09,
+      "loss": 0.3982,
+      "step": 1997
+    },
+    {
+      "epoch": 0.27434691565686037,
+      "grad_norm": 0.17734107375144958,
+      "learning_rate": 4.984518674699956e-10,
+      "loss": 0.415,
+      "step": 1998
+    },
+    {
+      "epoch": 0.2744842264254574,
+      "grad_norm": 0.18290874361991882,
+      "learning_rate": 1.2461304450539502e-10,
+      "loss": 0.4189,
+      "step": 1999
+    },
+    {
+      "epoch": 0.27462153719405447,
+      "grad_norm": 0.18318958580493927,
+      "learning_rate": 0.0,
+      "loss": 0.3441,
+      "step": 2000
+    },
+    {
+      "epoch": 0.27462153719405447,
+      "eval_loss": 0.37363573908805847,
+      "eval_runtime": 89.1948,
+      "eval_samples_per_second": 13.342,
+      "eval_steps_per_second": 6.671,
+      "step": 2000
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.09999650455552e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null