PJMixers-Dev
/

Gemma-3-Starshine-Earthen-v0.4-12B-QLoRA

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d45b5952e88114a6552c77c415d93bccbdb91c0d5b4e0002e1cd41c4383b7f82
 size 1047628488

 version https://git-lfs.github.com/spec/v1
+oid sha256:eef2a76f8c9c355a2831ca4dce9743f1165d7eec991513bcb577ef444d71f7a6
 size 1047628488

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0017791220032913758,
   "eval_steps": 10,
-  "global_step": 40,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -328,6 +328,474 @@
       "eval_samples_per_second": 0.273,
       "eval_steps_per_second": 0.068,
       "step": 40
     }
   ],
   "logging_steps": 1,
@@ -347,7 +815,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8.994902180364288e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.00444780500822844,
   "eval_steps": 10,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 0.273,
       "eval_steps_per_second": 0.068,
       "step": 40
+    },
+    {
+      "epoch": 0.0018236000533736602,
+      "grad_norm": 0.45847609639167786,
+      "learning_rate": 9.998264731957889e-07,
+      "loss": 1.8686,
+      "step": 41
+    },
+    {
+      "epoch": 0.0018680781034559446,
+      "grad_norm": 0.33932119607925415,
+      "learning_rate": 9.998222337069841e-07,
+      "loss": 2.2302,
+      "step": 42
+    },
+    {
+      "epoch": 0.001912556153538229,
+      "grad_norm": 1.3315902948379517,
+      "learning_rate": 9.998179938781784e-07,
+      "loss": 1.8721,
+      "step": 43
+    },
+    {
+      "epoch": 0.001957034203620513,
+      "grad_norm": 1.2163060903549194,
+      "learning_rate": 9.99813753709331e-07,
+      "loss": 2.1642,
+      "step": 44
+    },
+    {
+      "epoch": 0.0020015122537027975,
+      "grad_norm": 0.4775317907333374,
+      "learning_rate": 9.99809513200401e-07,
+      "loss": 2.4106,
+      "step": 45
+    },
+    {
+      "epoch": 0.002045990303785082,
+      "grad_norm": 0.28207871317863464,
+      "learning_rate": 9.998052723513476e-07,
+      "loss": 2.058,
+      "step": 46
+    },
+    {
+      "epoch": 0.0020904683538673662,
+      "grad_norm": 0.5639588236808777,
+      "learning_rate": 9.998010311621295e-07,
+      "loss": 2.2522,
+      "step": 47
+    },
+    {
+      "epoch": 0.002134946403949651,
+      "grad_norm": 0.45129290223121643,
+      "learning_rate": 9.997967896327061e-07,
+      "loss": 2.3281,
+      "step": 48
+    },
+    {
+      "epoch": 0.0021794244540319354,
+      "grad_norm": 0.4590243101119995,
+      "learning_rate": 9.997925477630364e-07,
+      "loss": 2.1991,
+      "step": 49
+    },
+    {
+      "epoch": 0.00222390250411422,
+      "grad_norm": 0.3881557881832123,
+      "learning_rate": 9.997883055530797e-07,
+      "loss": 2.2259,
+      "step": 50
+    },
+    {
+      "epoch": 0.00222390250411422,
+      "eval_loss": 1.9868642091751099,
+      "eval_runtime": 233.6732,
+      "eval_samples_per_second": 0.274,
+      "eval_steps_per_second": 0.068,
+      "step": 50
+    },
+    {
+      "epoch": 0.002268380554196504,
+      "grad_norm": 0.4352044463157654,
+      "learning_rate": 9.997840630027944e-07,
+      "loss": 2.2623,
+      "step": 51
+    },
+    {
+      "epoch": 0.0023128586042787886,
+      "grad_norm": 0.43397316336631775,
+      "learning_rate": 9.997798201121402e-07,
+      "loss": 1.8368,
+      "step": 52
+    },
+    {
+      "epoch": 0.002357336654361073,
+      "grad_norm": 0.38807961344718933,
+      "learning_rate": 9.99775576881076e-07,
+      "loss": 2.0101,
+      "step": 53
+    },
+    {
+      "epoch": 0.0024018147044433573,
+      "grad_norm": 0.6237647533416748,
+      "learning_rate": 9.997713333095603e-07,
+      "loss": 2.0238,
+      "step": 54
+    },
+    {
+      "epoch": 0.0024462927545256417,
+      "grad_norm": 0.34039369225502014,
+      "learning_rate": 9.997670893975529e-07,
+      "loss": 2.1,
+      "step": 55
+    },
+    {
+      "epoch": 0.002490770804607926,
+      "grad_norm": 0.4221118986606598,
+      "learning_rate": 9.997628451450122e-07,
+      "loss": 2.1649,
+      "step": 56
+    },
+    {
+      "epoch": 0.0025352488546902105,
+      "grad_norm": 0.38328638672828674,
+      "learning_rate": 9.997586005518976e-07,
+      "loss": 2.1151,
+      "step": 57
+    },
+    {
+      "epoch": 0.002579726904772495,
+      "grad_norm": 0.3308090567588806,
+      "learning_rate": 9.997543556181679e-07,
+      "loss": 2.1499,
+      "step": 58
+    },
+    {
+      "epoch": 0.0026242049548547792,
+      "grad_norm": 0.3598516881465912,
+      "learning_rate": 9.99750110343782e-07,
+      "loss": 2.1146,
+      "step": 59
+    },
+    {
+      "epoch": 0.0026686830049370636,
+      "grad_norm": 0.38582974672317505,
+      "learning_rate": 9.997458647286993e-07,
+      "loss": 1.8236,
+      "step": 60
+    },
+    {
+      "epoch": 0.0026686830049370636,
+      "eval_loss": 1.9794503450393677,
+      "eval_runtime": 312.5608,
+      "eval_samples_per_second": 0.205,
+      "eval_steps_per_second": 0.051,
+      "step": 60
+    },
+    {
+      "epoch": 0.002713161055019348,
+      "grad_norm": 0.5031485557556152,
+      "learning_rate": 9.997416187728787e-07,
+      "loss": 2.3825,
+      "step": 61
+    },
+    {
+      "epoch": 0.0027576391051016324,
+      "grad_norm": 0.40436115860939026,
+      "learning_rate": 9.997373724762788e-07,
+      "loss": 1.9051,
+      "step": 62
+    },
+    {
+      "epoch": 0.0028021171551839167,
+      "grad_norm": 0.3216610252857208,
+      "learning_rate": 9.997331258388588e-07,
+      "loss": 1.7655,
+      "step": 63
+    },
+    {
+      "epoch": 0.002846595205266201,
+      "grad_norm": 0.30226317048072815,
+      "learning_rate": 9.997288788605777e-07,
+      "loss": 2.215,
+      "step": 64
+    },
+    {
+      "epoch": 0.0028910732553484855,
+      "grad_norm": 0.34857413172721863,
+      "learning_rate": 9.997246315413945e-07,
+      "loss": 2.1704,
+      "step": 65
+    },
+    {
+      "epoch": 0.00293555130543077,
+      "grad_norm": 0.4939625859260559,
+      "learning_rate": 9.99720383881268e-07,
+      "loss": 2.1591,
+      "step": 66
+    },
+    {
+      "epoch": 0.0029800293555130542,
+      "grad_norm": 0.6396478414535522,
+      "learning_rate": 9.997161358801571e-07,
+      "loss": 2.4183,
+      "step": 67
+    },
+    {
+      "epoch": 0.0030245074055953386,
+      "grad_norm": 0.3547438681125641,
+      "learning_rate": 9.99711887538021e-07,
+      "loss": 2.3338,
+      "step": 68
+    },
+    {
+      "epoch": 0.003068985455677623,
+      "grad_norm": 0.455522745847702,
+      "learning_rate": 9.997076388548186e-07,
+      "loss": 2.2559,
+      "step": 69
+    },
+    {
+      "epoch": 0.0031134635057599074,
+      "grad_norm": 0.5139729976654053,
+      "learning_rate": 9.997033898305084e-07,
+      "loss": 2.4271,
+      "step": 70
+    },
+    {
+      "epoch": 0.0031134635057599074,
+      "eval_loss": 1.974100947380066,
+      "eval_runtime": 231.1208,
+      "eval_samples_per_second": 0.277,
+      "eval_steps_per_second": 0.069,
+      "step": 70
+    },
+    {
+      "epoch": 0.0031579415558421918,
+      "grad_norm": 0.3868389427661896,
+      "learning_rate": 9.996991404650499e-07,
+      "loss": 1.8754,
+      "step": 71
+    },
+    {
+      "epoch": 0.003202419605924476,
+      "grad_norm": 0.4664241373538971,
+      "learning_rate": 9.996948907584016e-07,
+      "loss": 1.9934,
+      "step": 72
+    },
+    {
+      "epoch": 0.0032468976560067605,
+      "grad_norm": 0.3952767848968506,
+      "learning_rate": 9.996906407105226e-07,
+      "loss": 2.0883,
+      "step": 73
+    },
+    {
+      "epoch": 0.003291375706089045,
+      "grad_norm": 0.4785691797733307,
+      "learning_rate": 9.996863903213718e-07,
+      "loss": 2.3203,
+      "step": 74
+    },
+    {
+      "epoch": 0.0033358537561713293,
+      "grad_norm": 0.4103385806083679,
+      "learning_rate": 9.99682139590908e-07,
+      "loss": 2.0406,
+      "step": 75
+    },
+    {
+      "epoch": 0.0033803318062536137,
+      "grad_norm": 0.45813262462615967,
+      "learning_rate": 9.996778885190904e-07,
+      "loss": 2.1745,
+      "step": 76
+    },
+    {
+      "epoch": 0.003424809856335898,
+      "grad_norm": 0.34197866916656494,
+      "learning_rate": 9.996736371058771e-07,
+      "loss": 2.0839,
+      "step": 77
+    },
+    {
+      "epoch": 0.0034692879064181824,
+      "grad_norm": 0.4917107820510864,
+      "learning_rate": 9.996693853512279e-07,
+      "loss": 1.9646,
+      "step": 78
+    },
+    {
+      "epoch": 0.0035137659565004672,
+      "grad_norm": 0.570755124092102,
+      "learning_rate": 9.99665133255101e-07,
+      "loss": 2.4128,
+      "step": 79
+    },
+    {
+      "epoch": 0.0035582440065827516,
+      "grad_norm": 0.6334550380706787,
+      "learning_rate": 9.996608808174557e-07,
+      "loss": 2.3972,
+      "step": 80
+    },
+    {
+      "epoch": 0.0035582440065827516,
+      "eval_loss": 1.9703996181488037,
+      "eval_runtime": 236.153,
+      "eval_samples_per_second": 0.271,
+      "eval_steps_per_second": 0.068,
+      "step": 80
+    },
+    {
+      "epoch": 0.003602722056665036,
+      "grad_norm": 0.44049328565597534,
+      "learning_rate": 9.996566280382507e-07,
+      "loss": 2.389,
+      "step": 81
+    },
+    {
+      "epoch": 0.0036472001067473204,
+      "grad_norm": 0.5198694467544556,
+      "learning_rate": 9.996523749174444e-07,
+      "loss": 1.8092,
+      "step": 82
+    },
+    {
+      "epoch": 0.0036916781568296047,
+      "grad_norm": 0.4297351837158203,
+      "learning_rate": 9.996481214549966e-07,
+      "loss": 1.9158,
+      "step": 83
+    },
+    {
+      "epoch": 0.003736156206911889,
+      "grad_norm": 0.5207564234733582,
+      "learning_rate": 9.996438676508653e-07,
+      "loss": 2.3368,
+      "step": 84
+    },
+    {
+      "epoch": 0.0037806342569941735,
+      "grad_norm": 3.4639275074005127,
+      "learning_rate": 9.996396135050097e-07,
+      "loss": 2.0157,
+      "step": 85
+    },
+    {
+      "epoch": 0.003825112307076458,
+      "grad_norm": 0.5132240056991577,
+      "learning_rate": 9.996353590173885e-07,
+      "loss": 2.1738,
+      "step": 86
+    },
+    {
+      "epoch": 0.0038695903571587423,
+      "grad_norm": 0.559355616569519,
+      "learning_rate": 9.996311041879605e-07,
+      "loss": 2.2668,
+      "step": 87
+    },
+    {
+      "epoch": 0.003914068407241026,
+      "grad_norm": 0.40078872442245483,
+      "learning_rate": 9.996268490166847e-07,
+      "loss": 2.0339,
+      "step": 88
+    },
+    {
+      "epoch": 0.003958546457323311,
+      "grad_norm": 0.43362948298454285,
+      "learning_rate": 9.996225935035196e-07,
+      "loss": 2.3476,
+      "step": 89
+    },
+    {
+      "epoch": 0.004003024507405595,
+      "grad_norm": 0.6087605953216553,
+      "learning_rate": 9.99618337648424e-07,
+      "loss": 2.3268,
+      "step": 90
+    },
+    {
+      "epoch": 0.004003024507405595,
+      "eval_loss": 1.9674957990646362,
+      "eval_runtime": 241.976,
+      "eval_samples_per_second": 0.264,
+      "eval_steps_per_second": 0.066,
+      "step": 90
+    },
+    {
+      "epoch": 0.004047502557487879,
+      "grad_norm": 3.6922800540924072,
+      "learning_rate": 9.996140814513573e-07,
+      "loss": 2.2244,
+      "step": 91
+    },
+    {
+      "epoch": 0.004091980607570164,
+      "grad_norm": 0.6901090145111084,
+      "learning_rate": 9.996098249122776e-07,
+      "loss": 2.127,
+      "step": 92
+    },
+    {
+      "epoch": 0.004136458657652448,
+      "grad_norm": 0.42567893862724304,
+      "learning_rate": 9.99605568031144e-07,
+      "loss": 1.99,
+      "step": 93
+    },
+    {
+      "epoch": 0.0041809367077347325,
+      "grad_norm": 0.37529805302619934,
+      "learning_rate": 9.996013108079149e-07,
+      "loss": 2.1369,
+      "step": 94
+    },
+    {
+      "epoch": 0.004225414757817018,
+      "grad_norm": 0.5616635084152222,
+      "learning_rate": 9.995970532425493e-07,
+      "loss": 1.8421,
+      "step": 95
+    },
+    {
+      "epoch": 0.004269892807899302,
+      "grad_norm": 0.3917858898639679,
+      "learning_rate": 9.995927953350061e-07,
+      "loss": 2.0905,
+      "step": 96
+    },
+    {
+      "epoch": 0.0043143708579815865,
+      "grad_norm": 0.3693113923072815,
+      "learning_rate": 9.99588537085244e-07,
+      "loss": 2.326,
+      "step": 97
+    },
+    {
+      "epoch": 0.004358848908063871,
+      "grad_norm": 0.4595705270767212,
+      "learning_rate": 9.995842784932216e-07,
+      "loss": 1.8433,
+      "step": 98
+    },
+    {
+      "epoch": 0.004403326958146155,
+      "grad_norm": 0.46681633591651917,
+      "learning_rate": 9.995800195588977e-07,
+      "loss": 2.21,
+      "step": 99
+    },
+    {
+      "epoch": 0.00444780500822844,
+      "grad_norm": 0.5430231690406799,
+      "learning_rate": 9.99575760282231e-07,
+      "loss": 2.4849,
+      "step": 100
+    },
+    {
+      "epoch": 0.00444780500822844,
+      "eval_loss": 1.964964509010315,
+      "eval_runtime": 231.4209,
+      "eval_samples_per_second": 0.277,
+      "eval_steps_per_second": 0.069,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.248725545091072e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null