Training in progress, step 110

Browse files

Files changed (10) hide show

adapter_config.json +4 -4
adapter_model.safetensors +2 -2
config.json +2 -1
dynamic_sampling_log.txt +80 -0
generation_config.json +4 -1
model-00001-of-00004.safetensors +1 -1
model-00002-of-00004.safetensors +1 -1
model-00003-of-00004.safetensors +1 -1
tokenizer.json +2 -2
training_args.bin +1 -1

adapter_config.json CHANGED Viewed

@@ -29,13 +29,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "gate_proj",
     "v_proj",
-    "k_proj",
     "o_proj",
     "up_proj",
-    "down_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "v_proj",
+    "down_proj",
     "o_proj",
     "up_proj",
+    "q_proj",
+    "k_proj",
+    "gate_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:60d95b10b6e140a9626a7058d5038528f2ff80148dc4569b881db56052046509
-size 40

 version https://git-lfs.github.com/spec/v1
+oid sha256:e29f20fde548c0097042b6cdacfabace5e93fd257e1e6b96007e508248fc830a
+size 323014560

config.json CHANGED Viewed

@@ -3,7 +3,7 @@
     "Qwen2ForCausalLM"
   ],
   "attention_dropout": 0.0,
-  "bos_token_id": 151643,
   "dtype": "bfloat16",
   "eos_token_id": 151643,
   "hidden_act": "silu",
@@ -46,6 +46,7 @@
   "num_attention_heads": 28,
   "num_hidden_layers": 28,
   "num_key_value_heads": 4,
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "rope_theta": 10000,

     "Qwen2ForCausalLM"
   ],
   "attention_dropout": 0.0,
+  "bos_token_id": 151646,
   "dtype": "bfloat16",
   "eos_token_id": 151643,
   "hidden_act": "silu",
   "num_attention_heads": 28,
   "num_hidden_layers": 28,
   "num_key_value_heads": 4,
+  "pad_token_id": 151643,
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "rope_theta": 10000,

dynamic_sampling_log.txt CHANGED Viewed

@@ -803,3 +803,83 @@ Step     Attempts   Total Prompts   Valid Prompts   Efficiency
 99       3          18              6               33.3        %
 99       2          12              6               50.0        %
 99       3          18              6               33.3        %

 99       3          18              6               33.3        %
 99       2          12              6               50.0        %
 99       3          18              6               33.3        %
+100      4          24              6               25.0        %
+100      3          18              6               33.3        %
+100      4          24              6               25.0        %
+100      2          12              6               50.0        %
+100      3          18              6               33.3        %
+100      7          42              6               14.3        %
+100      3          18              6               33.3        %
+100      3          18              6               33.3        %
+101      5          30              6               20.0        %
+101      2          12              6               50.0        %
+101      2          12              6               50.0        %
+101      3          18              6               33.3        %
+101      4          24              6               25.0        %
+101      4          24              6               25.0        %
+101      3          18              6               33.3        %
+101      8          48              6               12.5        %
+102      4          24              6               25.0        %
+102      5          30              6               20.0        %
+102      4          24              6               25.0        %
+102      3          18              6               33.3        %
+102      3          18              6               33.3        %
+102      2          12              6               50.0        %
+102      5          30              6               20.0        %
+102      4          24              6               25.0        %
+103      1          6               6               100.0       %
+103      4          24              6               25.0        %
+103      3          18              6               33.3        %
+103      2          12              6               50.0        %
+103      2          12              6               50.0        %
+103      9          54              6               11.1        %
+103      6          36              6               16.7        %
+103      2          12              6               50.0        %
+104      2          12              6               50.0        %
+104      1          6               6               100.0       %
+104      8          48              6               12.5        %
+104      4          24              6               25.0        %
+104      6          36              6               16.7        %
+104      2          12              6               50.0        %
+104      2          12              6               50.0        %
+104      8          48              6               12.5        %
+105      4          24              6               25.0        %
+105      3          18              6               33.3        %
+105      3          18              6               33.3        %
+105      7          42              6               14.3        %
+105      1          6               6               100.0       %
+105      2          12              6               50.0        %
+105      1          6               6               100.0       %
+105      8          48              6               12.5        %
+106      2          12              6               50.0        %
+106      3          18              6               33.3        %
+106      1          6               6               100.0       %
+106      2          12              6               50.0        %
+106      2          12              6               50.0        %
+106      3          18              6               33.3        %
+106      7          42              6               14.3        %
+106      3          18              6               33.3        %
+107      1          6               6               100.0       %
+107      2          12              6               50.0        %
+107      6          36              6               16.7        %
+107      2          12              6               50.0        %
+107      3          18              6               33.3        %
+107      2          12              6               50.0        %
+107      3          18              6               33.3        %
+107      2          12              6               50.0        %
+108      1          6               6               100.0       %
+108      5          30              6               20.0        %
+108      4          24              6               25.0        %
+108      2          12              6               50.0        %
+108      2          12              6               50.0        %
+108      8          48              6               12.5        %
+108      2          12              6               50.0        %
+108      2          12              6               50.0        %
+109      1          6               6               100.0       %
+109      3          18              6               33.3        %
+109      6          36              6               16.7        %
+109      4          24              6               25.0        %
+109      3          18              6               33.3        %
+109      3          18              6               33.3        %
+109      2          12              6               50.0        %
+109      6          36              6               16.7        %

generation_config.json CHANGED Viewed

@@ -2,7 +2,10 @@
   "_from_model_config": true,
   "bos_token_id": 151646,
   "do_sample": true,
-  "eos_token_id": 151643,
   "temperature": 0.6,
   "top_p": 0.95,
   "transformers_version": "4.57.1"

   "_from_model_config": true,
   "bos_token_id": 151646,
   "do_sample": true,
+  "eos_token_id": [
+    151643
+  ],
+  "pad_token_id": 151643,
   "temperature": 0.6,
   "top_p": 0.95,
   "transformers_version": "4.57.1"

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2547acd2ed4221452815cc1e43cfe6d8fe5eb93d6b0a17746940f62042edb8fc
 size 4877660776

 version https://git-lfs.github.com/spec/v1
+oid sha256:7acf113b5afb31f6c395014a5891df9213109fa76ea978fd940f523c445b39c0
 size 4877660776

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e053f6e3d3b18529c8951cf7ec083ae92e37d8a9f6e7ec486c091092321d906
 size 4932751008

 version https://git-lfs.github.com/spec/v1
+oid sha256:57e38c04cca6a418ed413c932e303eec1886922e960cd068410e821338ad7178
 size 4932751008

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:09055605bc0ed847bfb863ab09528796f3a086ba145fb9d4f6d92ad8310d6646
 size 4330865200

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2ac7350e0ef280872dc11072e1a5ab14b8123b0347c603b525523a92b2091be
 size 4330865200

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
-size 11422778

 version https://git-lfs.github.com/spec/v1
+oid sha256:a4256422650d141f228fe954acee98679da412984c29a569877eefd3af69315a
+size 11422959

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:92df2953de292a8a4d447867c90e350e8357338da5214d2a17070cb10ce845a7
 size 8760

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d047cbc55cda44b752f416d36e3a69e56abc0bd3f8f2902b298452f531a4525
 size 8760