Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

adapter_config.json +5 -5
adapter_model.safetensors +1 -1
optimizer.pt +2 -2
rng_state.pth +1 -1
scaler.pt +3 -0
scheduler.pt +1 -1
trainer_state.json +143 -143
training_args.bin +1 -1

adapter_config.json CHANGED Viewed

@@ -29,13 +29,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
-    "gate_proj",
-    "up_proj",
     "down_proj",
-    "o_proj",
     "k_proj",
-    "q_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "down_proj",
+    "q_proj",
+    "up_proj",
+    "v_proj",
     "k_proj",
+    "o_proj",
+    "gate_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e39c5190b449964d6320622caa9cedc9fbf478ef850d1b88a909ea119aebf579
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:83080fb8e776556dccbd8566a149036b04b680b1f7df6595087306d141fdf23b
 size 167832240

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:872ae2fbee06480034ccbfa235079202342578802cf8fff7f3dd693c59d9bba0
-size 335812922

 version https://git-lfs.github.com/spec/v1
+oid sha256:61f579989cf726fb56ac8be5726f4f2a782044d114d09e570f66648d859e60f2
+size 85733206

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cebaa66b205ddfb8b31f5eb5a32b25b2bf5b2b20793922b5ca0b959d1b26d3ec
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ebd8221843f67894d180101c2a23325b58062bbda224ad38cb35fd657d1b50d6
 size 14244

scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac4a558c5b93581a5c41e2922404490319bcd15a10296c927a72a41012ff7f27
+size 988

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dce1dba4991ed6328501072e92b13c1d75d1f773bde8a373c913b4ccf6b6c264
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e0587f905a8c9b2be59055a995476e795d7c0ee0915ef354e4295be6940ca474
 size 1064

trainer_state.json CHANGED Viewed

@@ -2,216 +2,216 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0560695262125035,
-  "eval_steps": 150,
   "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "entropy": 1.590625,
-      "epoch": 0.0028034763106251754,
-      "grad_norm": 0.9896485805511475,
       "learning_rate": 0.00016,
-      "loss": 2.0673,
-      "mean_token_accuracy": 0.5922202587127685,
-      "num_tokens": 4193.0,
       "step": 5
     },
     {
-      "entropy": 2.00361328125,
-      "epoch": 0.005606952621250351,
-      "grad_norm": 0.9075812101364136,
-      "learning_rate": 0.0001989261744966443,
-      "loss": 1.9308,
-      "mean_token_accuracy": 0.6079224228858948,
-      "num_tokens": 8218.0,
       "step": 10
     },
     {
-      "entropy": 1.76513671875,
-      "epoch": 0.008410428931875526,
-      "grad_norm": 0.9663995504379272,
-      "learning_rate": 0.00019758389261744966,
-      "loss": 1.482,
-      "mean_token_accuracy": 0.6631967008113862,
-      "num_tokens": 14014.0,
       "step": 15
     },
     {
-      "entropy": 1.4521484375,
-      "epoch": 0.011213905242500702,
-      "grad_norm": 0.6758019328117371,
-      "learning_rate": 0.00019624161073825505,
-      "loss": 1.4712,
-      "mean_token_accuracy": 0.6659617304801941,
-      "num_tokens": 19584.0,
       "step": 20
     },
     {
-      "entropy": 1.56689453125,
-      "epoch": 0.014017381553125876,
-      "grad_norm": 0.9318982362747192,
-      "learning_rate": 0.0001948993288590604,
-      "loss": 1.3829,
-      "mean_token_accuracy": 0.6853179931640625,
-      "num_tokens": 24194.0,
       "step": 25
     },
     {
-      "entropy": 1.5326171875,
-      "epoch": 0.01682085786375105,
-      "grad_norm": 0.934631884098053,
-      "learning_rate": 0.0001935570469798658,
-      "loss": 1.4176,
-      "mean_token_accuracy": 0.6775492548942565,
-      "num_tokens": 28392.0,
       "step": 30
     },
     {
-      "entropy": 1.56787109375,
-      "epoch": 0.019624334174376225,
-      "grad_norm": 1.0512644052505493,
-      "learning_rate": 0.00019221476510067115,
-      "loss": 1.4598,
-      "mean_token_accuracy": 0.6650450527667999,
-      "num_tokens": 34574.0,
       "step": 35
     },
     {
-      "entropy": 1.63740234375,
-      "epoch": 0.022427810485001403,
-      "grad_norm": 0.8699678182601929,
-      "learning_rate": 0.00019087248322147653,
-      "loss": 1.515,
-      "mean_token_accuracy": 0.6554247856140136,
-      "num_tokens": 40558.0,
       "step": 40
     },
     {
-      "entropy": 1.60029296875,
-      "epoch": 0.025231286795626577,
-      "grad_norm": 0.7725480198860168,
-      "learning_rate": 0.0001895302013422819,
-      "loss": 1.4544,
-      "mean_token_accuracy": 0.6661459267139435,
-      "num_tokens": 46416.0,
       "step": 45
     },
     {
-      "entropy": 1.53515625,
-      "epoch": 0.02803476310625175,
-      "grad_norm": 1.1531834602355957,
-      "learning_rate": 0.00018818791946308724,
-      "loss": 1.3171,
-      "mean_token_accuracy": 0.6909306168556213,
-      "num_tokens": 50005.0,
       "step": 50
     },
     {
-      "entropy": 1.40400390625,
-      "epoch": 0.03083823941687693,
-      "grad_norm": 0.8206179738044739,
-      "learning_rate": 0.00018684563758389263,
-      "loss": 1.3265,
-      "mean_token_accuracy": 0.6994331538677215,
-      "num_tokens": 54550.0,
       "step": 55
     },
     {
-      "entropy": 1.37587890625,
-      "epoch": 0.0336417157275021,
-      "grad_norm": 0.9528921246528625,
-      "learning_rate": 0.00018550335570469799,
-      "loss": 1.2327,
-      "mean_token_accuracy": 0.6945347368717194,
-      "num_tokens": 58738.0,
       "step": 60
     },
     {
-      "entropy": 1.498828125,
-      "epoch": 0.03644519203812728,
-      "grad_norm": 0.7587556838989258,
-      "learning_rate": 0.00018416107382550337,
-      "loss": 1.3617,
-      "mean_token_accuracy": 0.6782361149787903,
-      "num_tokens": 62775.0,
       "step": 65
     },
     {
-      "entropy": 1.47353515625,
-      "epoch": 0.03924866834875245,
-      "grad_norm": 0.817040741443634,
-      "learning_rate": 0.00018281879194630873,
-      "loss": 1.3694,
-      "mean_token_accuracy": 0.6701848864555359,
-      "num_tokens": 68528.0,
       "step": 70
     },
     {
-      "entropy": 1.48798828125,
-      "epoch": 0.04205214465937763,
-      "grad_norm": 0.9585862159729004,
-      "learning_rate": 0.0001814765100671141,
-      "loss": 1.3025,
-      "mean_token_accuracy": 0.6777489185333252,
-      "num_tokens": 72845.0,
       "step": 75
     },
     {
-      "entropy": 1.509765625,
-      "epoch": 0.044855620970002806,
-      "grad_norm": 0.9537568688392639,
-      "learning_rate": 0.00018013422818791947,
-      "loss": 1.4538,
-      "mean_token_accuracy": 0.6687214255332947,
-      "num_tokens": 77986.0,
       "step": 80
     },
     {
-      "entropy": 1.43916015625,
-      "epoch": 0.04765909728062798,
-      "grad_norm": 0.6577709913253784,
-      "learning_rate": 0.00017879194630872485,
-      "loss": 1.2956,
-      "mean_token_accuracy": 0.6882057845592499,
-      "num_tokens": 83043.0,
       "step": 85
     },
     {
-      "entropy": 1.31279296875,
-      "epoch": 0.050462573591253154,
-      "grad_norm": 1.0970784425735474,
-      "learning_rate": 0.0001774496644295302,
-      "loss": 1.2447,
-      "mean_token_accuracy": 0.709147822856903,
-      "num_tokens": 86893.0,
       "step": 90
     },
     {
-      "entropy": 1.53671875,
-      "epoch": 0.05326604990187833,
-      "grad_norm": 0.8757184147834778,
-      "learning_rate": 0.0001761073825503356,
-      "loss": 1.4292,
-      "mean_token_accuracy": 0.6783790171146393,
-      "num_tokens": 91659.0,
       "step": 95
     },
     {
-      "entropy": 1.52294921875,
-      "epoch": 0.0560695262125035,
-      "grad_norm": 0.858314037322998,
-      "learning_rate": 0.00017476510067114095,
-      "loss": 1.4,
-      "mean_token_accuracy": 0.6753414094448089,
-      "num_tokens": 95692.0,
       "step": 100
     }
   ],
   "logging_steps": 5,
-  "max_steps": 750,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 100,
@@ -227,7 +227,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4333049190580224.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.112139052425007,
+  "eval_steps": 179,
   "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "entropy": 1.696909672021866,
+      "epoch": 0.005606952621250351,
+      "grad_norm": 0.7663310170173645,
       "learning_rate": 0.00016,
+      "loss": 2.1596,
+      "mean_token_accuracy": 0.5812449663877487,
+      "num_tokens": 8218.0,
       "step": 5
     },
     {
+      "entropy": 1.875484037399292,
+      "epoch": 0.011213905242500702,
+      "grad_norm": 0.8613530397415161,
+      "learning_rate": 0.00019909808342728297,
+      "loss": 1.6298,
+      "mean_token_accuracy": 0.6346893429756164,
+      "num_tokens": 19584.0,
       "step": 10
     },
     {
+      "entropy": 1.6492740571498872,
+      "epoch": 0.01682085786375105,
+      "grad_norm": 0.8438306450843811,
+      "learning_rate": 0.0001979706877113867,
+      "loss": 1.448,
+      "mean_token_accuracy": 0.667498791217804,
+      "num_tokens": 28392.0,
       "step": 15
     },
     {
+      "entropy": 1.5424207150936127,
+      "epoch": 0.022427810485001403,
+      "grad_norm": 0.5755366086959839,
+      "learning_rate": 0.00019684329199549043,
+      "loss": 1.5714,
+      "mean_token_accuracy": 0.6525402277708053,
+      "num_tokens": 40558.0,
       "step": 20
     },
     {
+      "entropy": 1.6563467800617218,
+      "epoch": 0.02803476310625175,
+      "grad_norm": 0.640796422958374,
+      "learning_rate": 0.00019571589627959414,
+      "loss": 1.4843,
+      "mean_token_accuracy": 0.6737909287214279,
+      "num_tokens": 50005.0,
       "step": 25
     },
     {
+      "entropy": 1.477371919155121,
+      "epoch": 0.0336417157275021,
+      "grad_norm": 0.7493678331375122,
+      "learning_rate": 0.00019458850056369787,
+      "loss": 1.3474,
+      "mean_token_accuracy": 0.6935703039169312,
+      "num_tokens": 58738.0,
       "step": 30
     },
     {
+      "entropy": 1.464887660741806,
+      "epoch": 0.03924866834875245,
+      "grad_norm": 0.6396933794021606,
+      "learning_rate": 0.00019346110484780158,
+      "loss": 1.3963,
+      "mean_token_accuracy": 0.6689792603254319,
+      "num_tokens": 68528.0,
       "step": 35
     },
     {
+      "entropy": 1.4757700502872466,
+      "epoch": 0.044855620970002806,
+      "grad_norm": 0.5516763925552368,
+      "learning_rate": 0.0001923337091319053,
+      "loss": 1.4236,
+      "mean_token_accuracy": 0.6675747632980347,
+      "num_tokens": 77986.0,
       "step": 40
     },
     {
+      "entropy": 1.4118095993995667,
+      "epoch": 0.050462573591253154,
+      "grad_norm": 0.6395580172538757,
+      "learning_rate": 0.00019120631341600902,
+      "loss": 1.2766,
+      "mean_token_accuracy": 0.6935016334056854,
+      "num_tokens": 86893.0,
       "step": 45
     },
     {
+      "entropy": 1.4825987100601197,
+      "epoch": 0.0560695262125035,
+      "grad_norm": 0.7649742960929871,
+      "learning_rate": 0.00019007891770011275,
+      "loss": 1.4255,
+      "mean_token_accuracy": 0.6750761657953263,
+      "num_tokens": 95692.0,
       "step": 50
     },
     {
+      "entropy": 1.3713403642177582,
+      "epoch": 0.06167647883375386,
+      "grad_norm": 0.6055657863616943,
+      "learning_rate": 0.00018895152198421646,
+      "loss": 1.2919,
+      "mean_token_accuracy": 0.6923940628767014,
+      "num_tokens": 104810.0,
       "step": 55
     },
     {
+      "entropy": 1.3107981920242309,
+      "epoch": 0.0672834314550042,
+      "grad_norm": 0.932307243347168,
+      "learning_rate": 0.0001878241262683202,
+      "loss": 1.2072,
+      "mean_token_accuracy": 0.7068106323480606,
+      "num_tokens": 112767.0,
       "step": 60
     },
     {
+      "entropy": 1.290432232618332,
+      "epoch": 0.07289038407625456,
+      "grad_norm": 0.657538115978241,
+      "learning_rate": 0.00018669673055242392,
+      "loss": 1.1683,
+      "mean_token_accuracy": 0.714971786737442,
+      "num_tokens": 122104.0,
       "step": 65
     },
     {
+      "entropy": 1.3310194253921508,
+      "epoch": 0.0784973366975049,
+      "grad_norm": 0.5447025299072266,
+      "learning_rate": 0.00018556933483652763,
+      "loss": 1.3566,
+      "mean_token_accuracy": 0.6949202805757523,
+      "num_tokens": 132347.0,
       "step": 70
     },
     {
+      "entropy": 1.3567017048597336,
+      "epoch": 0.08410428931875526,
+      "grad_norm": 0.6126067042350769,
+      "learning_rate": 0.00018444193912063134,
+      "loss": 1.2616,
+      "mean_token_accuracy": 0.6886427521705627,
+      "num_tokens": 140294.0,
       "step": 75
     },
     {
+      "entropy": 1.3231679052114487,
+      "epoch": 0.08971124194000561,
+      "grad_norm": 0.5827459096908569,
+      "learning_rate": 0.00018331454340473507,
+      "loss": 1.2312,
+      "mean_token_accuracy": 0.6998110383749008,
+      "num_tokens": 149796.0,
       "step": 80
     },
     {
+      "entropy": 1.3795920431613922,
+      "epoch": 0.09531819456125595,
+      "grad_norm": 0.6522558331489563,
+      "learning_rate": 0.0001821871476888388,
+      "loss": 1.3163,
+      "mean_token_accuracy": 0.6797463029623032,
+      "num_tokens": 160094.0,
       "step": 85
     },
     {
+      "entropy": 1.4354715049266815,
+      "epoch": 0.10092514718250631,
+      "grad_norm": 0.5437538623809814,
+      "learning_rate": 0.0001810597519729425,
+      "loss": 1.4219,
+      "mean_token_accuracy": 0.6741667121648789,
+      "num_tokens": 167520.0,
       "step": 90
     },
     {
+      "entropy": 1.3719047516584397,
+      "epoch": 0.10653209980375666,
+      "grad_norm": 0.6490810513496399,
+      "learning_rate": 0.00017993235625704624,
+      "loss": 1.3259,
+      "mean_token_accuracy": 0.69256811439991,
+      "num_tokens": 177631.0,
       "step": 95
     },
     {
+      "entropy": 1.381699651479721,
+      "epoch": 0.112139052425007,
+      "grad_norm": 0.6738480925559998,
+      "learning_rate": 0.00017880496054114995,
+      "loss": 1.3181,
+      "mean_token_accuracy": 0.696441325545311,
+      "num_tokens": 186948.0,
       "step": 100
     }
   ],
   "logging_steps": 5,
+  "max_steps": 892,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 8465230950137856.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8792b8c492ea3ce975f08186d26490bce1a0e166ad9cf7331c22520bc661bf75
 size 5816

 version https://git-lfs.github.com/spec/v1
+oid sha256:79d41fa02013525705cb7a82d4f608a53737fdbc7baa1d76305c242ebd4e870e
 size 5816