Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

adapter_config.json +5 -5
adapter_model.safetensors +1 -1
optimizer.pt +1 -1
rng_state.pth +1 -1
trainer_state.json +122 -122
training_args.bin +1 -1

adapter_config.json CHANGED Viewed

@@ -29,13 +29,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "down_proj",
-    "q_proj",
-    "up_proj",
     "v_proj",
-    "k_proj",
     "o_proj",
-    "gate_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "v_proj",
+    "gate_proj",
+    "up_proj",
+    "down_proj",
     "o_proj",
+    "k_proj",
+    "q_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83080fb8e776556dccbd8566a149036b04b680b1f7df6595087306d141fdf23b
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:a2b45559552d06fe1c78ca0665c4d8888961c58b9cf95a2b6b5d1c38613465d3
 size 167832240

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61f579989cf726fb56ac8be5726f4f2a782044d114d09e570f66648d859e60f2
 size 85733206

 version https://git-lfs.github.com/spec/v1
+oid sha256:448d48b2fd3f7e8ac497ceb48185550a3df0e7bbe24f8e58e2540b35dac37fa9
 size 85733206

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ebd8221843f67894d180101c2a23325b58062bbda224ad38cb35fd657d1b50d6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:29cc7ab668b5b91e4d0d31bfeb0a9ac01024cb93ee27cea394110ef2ae77f6b5
 size 14244

trainer_state.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.112139052425007,
   "eval_steps": 179,
   "global_step": 100,
   "is_hyper_param_search": false,
@@ -10,203 +10,203 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "entropy": 1.696909672021866,
-      "epoch": 0.005606952621250351,
-      "grad_norm": 0.7663310170173645,
       "learning_rate": 0.00016,
-      "loss": 2.1596,
-      "mean_token_accuracy": 0.5812449663877487,
-      "num_tokens": 8218.0,
       "step": 5
     },
     {
-      "entropy": 1.875484037399292,
-      "epoch": 0.011213905242500702,
-      "grad_norm": 0.8613530397415161,
       "learning_rate": 0.00019909808342728297,
-      "loss": 1.6298,
-      "mean_token_accuracy": 0.6346893429756164,
-      "num_tokens": 19584.0,
       "step": 10
     },
     {
-      "entropy": 1.6492740571498872,
-      "epoch": 0.01682085786375105,
-      "grad_norm": 0.8438306450843811,
       "learning_rate": 0.0001979706877113867,
-      "loss": 1.448,
-      "mean_token_accuracy": 0.667498791217804,
-      "num_tokens": 28392.0,
       "step": 15
     },
     {
-      "entropy": 1.5424207150936127,
-      "epoch": 0.022427810485001403,
-      "grad_norm": 0.5755366086959839,
       "learning_rate": 0.00019684329199549043,
-      "loss": 1.5714,
-      "mean_token_accuracy": 0.6525402277708053,
-      "num_tokens": 40558.0,
       "step": 20
     },
     {
-      "entropy": 1.6563467800617218,
-      "epoch": 0.02803476310625175,
-      "grad_norm": 0.640796422958374,
       "learning_rate": 0.00019571589627959414,
-      "loss": 1.4843,
-      "mean_token_accuracy": 0.6737909287214279,
-      "num_tokens": 50005.0,
       "step": 25
     },
     {
-      "entropy": 1.477371919155121,
-      "epoch": 0.0336417157275021,
-      "grad_norm": 0.7493678331375122,
       "learning_rate": 0.00019458850056369787,
-      "loss": 1.3474,
-      "mean_token_accuracy": 0.6935703039169312,
-      "num_tokens": 58738.0,
       "step": 30
     },
     {
-      "entropy": 1.464887660741806,
-      "epoch": 0.03924866834875245,
-      "grad_norm": 0.6396933794021606,
       "learning_rate": 0.00019346110484780158,
-      "loss": 1.3963,
-      "mean_token_accuracy": 0.6689792603254319,
-      "num_tokens": 68528.0,
       "step": 35
     },
     {
-      "entropy": 1.4757700502872466,
-      "epoch": 0.044855620970002806,
-      "grad_norm": 0.5516763925552368,
       "learning_rate": 0.0001923337091319053,
-      "loss": 1.4236,
-      "mean_token_accuracy": 0.6675747632980347,
-      "num_tokens": 77986.0,
       "step": 40
     },
     {
-      "entropy": 1.4118095993995667,
-      "epoch": 0.050462573591253154,
-      "grad_norm": 0.6395580172538757,
       "learning_rate": 0.00019120631341600902,
-      "loss": 1.2766,
-      "mean_token_accuracy": 0.6935016334056854,
-      "num_tokens": 86893.0,
       "step": 45
     },
     {
-      "entropy": 1.4825987100601197,
-      "epoch": 0.0560695262125035,
-      "grad_norm": 0.7649742960929871,
       "learning_rate": 0.00019007891770011275,
-      "loss": 1.4255,
-      "mean_token_accuracy": 0.6750761657953263,
-      "num_tokens": 95692.0,
       "step": 50
     },
     {
-      "entropy": 1.3713403642177582,
-      "epoch": 0.06167647883375386,
-      "grad_norm": 0.6055657863616943,
       "learning_rate": 0.00018895152198421646,
-      "loss": 1.2919,
-      "mean_token_accuracy": 0.6923940628767014,
-      "num_tokens": 104810.0,
       "step": 55
     },
     {
-      "entropy": 1.3107981920242309,
-      "epoch": 0.0672834314550042,
-      "grad_norm": 0.932307243347168,
       "learning_rate": 0.0001878241262683202,
-      "loss": 1.2072,
-      "mean_token_accuracy": 0.7068106323480606,
-      "num_tokens": 112767.0,
       "step": 60
     },
     {
-      "entropy": 1.290432232618332,
-      "epoch": 0.07289038407625456,
-      "grad_norm": 0.657538115978241,
       "learning_rate": 0.00018669673055242392,
-      "loss": 1.1683,
-      "mean_token_accuracy": 0.714971786737442,
-      "num_tokens": 122104.0,
       "step": 65
     },
     {
-      "entropy": 1.3310194253921508,
-      "epoch": 0.0784973366975049,
-      "grad_norm": 0.5447025299072266,
       "learning_rate": 0.00018556933483652763,
-      "loss": 1.3566,
-      "mean_token_accuracy": 0.6949202805757523,
-      "num_tokens": 132347.0,
       "step": 70
     },
     {
-      "entropy": 1.3567017048597336,
-      "epoch": 0.08410428931875526,
-      "grad_norm": 0.6126067042350769,
       "learning_rate": 0.00018444193912063134,
-      "loss": 1.2616,
-      "mean_token_accuracy": 0.6886427521705627,
-      "num_tokens": 140294.0,
       "step": 75
     },
     {
-      "entropy": 1.3231679052114487,
-      "epoch": 0.08971124194000561,
-      "grad_norm": 0.5827459096908569,
       "learning_rate": 0.00018331454340473507,
-      "loss": 1.2312,
-      "mean_token_accuracy": 0.6998110383749008,
-      "num_tokens": 149796.0,
       "step": 80
     },
     {
-      "entropy": 1.3795920431613922,
-      "epoch": 0.09531819456125595,
-      "grad_norm": 0.6522558331489563,
       "learning_rate": 0.0001821871476888388,
-      "loss": 1.3163,
-      "mean_token_accuracy": 0.6797463029623032,
-      "num_tokens": 160094.0,
       "step": 85
     },
     {
-      "entropy": 1.4354715049266815,
-      "epoch": 0.10092514718250631,
-      "grad_norm": 0.5437538623809814,
       "learning_rate": 0.0001810597519729425,
-      "loss": 1.4219,
-      "mean_token_accuracy": 0.6741667121648789,
-      "num_tokens": 167520.0,
       "step": 90
     },
     {
-      "entropy": 1.3719047516584397,
-      "epoch": 0.10653209980375666,
-      "grad_norm": 0.6490810513496399,
       "learning_rate": 0.00017993235625704624,
-      "loss": 1.3259,
-      "mean_token_accuracy": 0.69256811439991,
-      "num_tokens": 177631.0,
       "step": 95
     },
     {
-      "entropy": 1.381699651479721,
-      "epoch": 0.112139052425007,
-      "grad_norm": 0.6738480925559998,
       "learning_rate": 0.00017880496054114995,
-      "loss": 1.3181,
-      "mean_token_accuracy": 0.696441325545311,
-      "num_tokens": 186948.0,
       "step": 100
     }
   ],
@@ -227,7 +227,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8465230950137856.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.11217049915872125,
   "eval_steps": 179,
   "global_step": 100,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "entropy": 1.5857114374637604,
+      "epoch": 0.005608524957936063,
+      "grad_norm": 0.6155414581298828,
       "learning_rate": 0.00016,
+      "loss": 1.9706,
+      "mean_token_accuracy": 0.6006834208965302,
+      "num_tokens": 8800.0,
       "step": 5
     },
     {
+      "entropy": 1.9297356605529785,
+      "epoch": 0.011217049915872126,
+      "grad_norm": 0.7395716905593872,
       "learning_rate": 0.00019909808342728297,
+      "loss": 1.7721,
+      "mean_token_accuracy": 0.6421857982873916,
+      "num_tokens": 17002.0,
       "step": 10
     },
     {
+      "entropy": 1.619718110561371,
+      "epoch": 0.01682557487380819,
+      "grad_norm": 0.6860098838806152,
       "learning_rate": 0.0001979706877113867,
+      "loss": 1.4967,
+      "mean_token_accuracy": 0.6729415714740753,
+      "num_tokens": 25283.0,
       "step": 15
     },
     {
+      "entropy": 1.5023219525814056,
+      "epoch": 0.022434099831744252,
+      "grad_norm": 0.5842112898826599,
       "learning_rate": 0.00019684329199549043,
+      "loss": 1.4969,
+      "mean_token_accuracy": 0.6710207283496856,
+      "num_tokens": 33505.0,
       "step": 20
     },
     {
+      "entropy": 1.6429471731185914,
+      "epoch": 0.028042624789680313,
+      "grad_norm": 0.5911830067634583,
       "learning_rate": 0.00019571589627959414,
+      "loss": 1.5174,
+      "mean_token_accuracy": 0.6597635358572006,
+      "num_tokens": 43397.0,
       "step": 25
     },
     {
+      "entropy": 1.6084780812263488,
+      "epoch": 0.03365114974761638,
+      "grad_norm": 0.7406187653541565,
       "learning_rate": 0.00019458850056369787,
+      "loss": 1.4757,
+      "mean_token_accuracy": 0.6691249191761017,
+      "num_tokens": 52556.0,
       "step": 30
     },
     {
+      "entropy": 1.4238544702529907,
+      "epoch": 0.03925967470555244,
+      "grad_norm": 0.611213743686676,
       "learning_rate": 0.00019346110484780158,
+      "loss": 1.4085,
+      "mean_token_accuracy": 0.692791685461998,
+      "num_tokens": 61579.0,
       "step": 35
     },
     {
+      "entropy": 1.3825733065605164,
+      "epoch": 0.044868199663488505,
+      "grad_norm": 0.6608020663261414,
       "learning_rate": 0.0001923337091319053,
+      "loss": 1.3955,
+      "mean_token_accuracy": 0.6937497437000275,
+      "num_tokens": 68479.0,
       "step": 40
     },
     {
+      "entropy": 1.4096888184547425,
+      "epoch": 0.050476724621424565,
+      "grad_norm": 0.5221259593963623,
       "learning_rate": 0.00019120631341600902,
+      "loss": 1.2979,
+      "mean_token_accuracy": 0.6925529271364212,
+      "num_tokens": 77911.0,
       "step": 45
     },
     {
+      "entropy": 1.3391252905130386,
+      "epoch": 0.056085249579360626,
+      "grad_norm": 0.6178808212280273,
       "learning_rate": 0.00019007891770011275,
+      "loss": 1.3092,
+      "mean_token_accuracy": 0.704131829738617,
+      "num_tokens": 86382.0,
       "step": 50
     },
     {
+      "entropy": 1.3084194093942643,
+      "epoch": 0.06169377453729669,
+      "grad_norm": 0.570563554763794,
       "learning_rate": 0.00018895152198421646,
+      "loss": 1.228,
+      "mean_token_accuracy": 0.6969290852546692,
+      "num_tokens": 94306.0,
       "step": 55
     },
     {
+      "entropy": 1.418030035495758,
+      "epoch": 0.06730229949523275,
+      "grad_norm": 0.6073914766311646,
       "learning_rate": 0.0001878241262683202,
+      "loss": 1.3252,
+      "mean_token_accuracy": 0.6798107504844666,
+      "num_tokens": 103567.0,
       "step": 60
     },
     {
+      "entropy": 1.5420262813568115,
+      "epoch": 0.07291082445316882,
+      "grad_norm": 0.4949992001056671,
       "learning_rate": 0.00018669673055242392,
+      "loss": 1.4262,
+      "mean_token_accuracy": 0.6678558409214019,
+      "num_tokens": 113923.0,
       "step": 65
     },
     {
+      "entropy": 1.3928685992956162,
+      "epoch": 0.07851934941110487,
+      "grad_norm": 0.5758721828460693,
       "learning_rate": 0.00018556933483652763,
+      "loss": 1.3512,
+      "mean_token_accuracy": 0.6777496755123138,
+      "num_tokens": 126199.0,
       "step": 70
     },
     {
+      "entropy": 1.336549162864685,
+      "epoch": 0.08412787436904094,
+      "grad_norm": 0.678063154220581,
       "learning_rate": 0.00018444193912063134,
+      "loss": 1.2387,
+      "mean_token_accuracy": 0.6992105931043625,
+      "num_tokens": 135737.0,
       "step": 75
     },
     {
+      "entropy": 1.5632237881422042,
+      "epoch": 0.08973639932697701,
+      "grad_norm": 0.5325204730033875,
       "learning_rate": 0.00018331454340473507,
+      "loss": 1.479,
+      "mean_token_accuracy": 0.6516371637582778,
+      "num_tokens": 146847.0,
       "step": 80
     },
     {
+      "entropy": 1.4212194442749024,
+      "epoch": 0.09534492428491306,
+      "grad_norm": 0.8020451664924622,
       "learning_rate": 0.0001821871476888388,
+      "loss": 1.3261,
+      "mean_token_accuracy": 0.6801748961210251,
+      "num_tokens": 154755.0,
       "step": 85
     },
     {
+      "entropy": 1.2850608110427857,
+      "epoch": 0.10095344924284913,
+      "grad_norm": 0.9955788254737854,
       "learning_rate": 0.0001810597519729425,
+      "loss": 1.1832,
+      "mean_token_accuracy": 0.7192482769489288,
+      "num_tokens": 162857.0,
       "step": 90
     },
     {
+      "entropy": 1.24569151699543,
+      "epoch": 0.1065619742007852,
+      "grad_norm": 0.6132731437683105,
       "learning_rate": 0.00017993235625704624,
+      "loss": 1.1905,
+      "mean_token_accuracy": 0.7139606773853302,
+      "num_tokens": 171381.0,
       "step": 95
     },
     {
+      "entropy": 1.3500551611185074,
+      "epoch": 0.11217049915872125,
+      "grad_norm": 0.604263186454773,
       "learning_rate": 0.00017880496054114995,
+      "loss": 1.3683,
+      "mean_token_accuracy": 0.6883647471666337,
+      "num_tokens": 179672.0,
       "step": 100
     }
   ],
       "attributes": {}
     }
   },
+  "total_flos": 8135764893302784.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:79d41fa02013525705cb7a82d4f608a53737fdbc7baa1d76305c242ebd4e870e
 size 5816

 version https://git-lfs.github.com/spec/v1
+oid sha256:75cf331c13a33e1598e45e9013486c4013f3af1f377da7304e21a0d1c22d72cb
 size 5816