Training in progress, step 30, checkpoint
Browse files- last-checkpoint/adapter_config.json +2 -2
- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +94 -59
- last-checkpoint/training_args.bin +1 -1
last-checkpoint/adapter_config.json
CHANGED
|
@@ -21,12 +21,12 @@
|
|
| 21 |
"revision": null,
|
| 22 |
"target_modules": [
|
| 23 |
"up_proj",
|
|
|
|
| 24 |
"v_proj",
|
| 25 |
"down_proj",
|
| 26 |
"o_proj",
|
| 27 |
"q_proj",
|
| 28 |
-
"k_proj"
|
| 29 |
-
"gate_proj"
|
| 30 |
],
|
| 31 |
"task_type": "CAUSAL_LM",
|
| 32 |
"use_dora": false,
|
|
|
|
| 21 |
"revision": null,
|
| 22 |
"target_modules": [
|
| 23 |
"up_proj",
|
| 24 |
+
"gate_proj",
|
| 25 |
"v_proj",
|
| 26 |
"down_proj",
|
| 27 |
"o_proj",
|
| 28 |
"q_proj",
|
| 29 |
+
"k_proj"
|
|
|
|
| 30 |
],
|
| 31 |
"task_type": "CAUSAL_LM",
|
| 32 |
"use_dora": false,
|
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 203456160
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56d434d598901331d000d69f27bf2d6a200a08071d7cddb9fcc74d618b8fdcb0
|
| 3 |
size 203456160
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 407121750
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:807c2c215b63a89b54f864694bcb59b5d66e08d35c2d3cb7b85205e700e1b415
|
| 3 |
size 407121750
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0017d054f399ae02d66bec2e7c0b81e4c469b5e125926b0e041136c01e7debf8
|
| 3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:409b87eefbbfc5af04df48b8cccf0428548d3efea659f10ffd25c2e2bc3de30b
|
| 3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2a3eaa55c6d5537c27e4456768a8f475ec9b8f739daf816e638d7ac888011bb
|
| 3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9ed5c5b37f0763e77790b53d0f88a4fc69d787befcb63013221803055bb03c9
|
| 3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0e9cc72c20ddd925ef39b6005e82a4d8730b1dde32cfcd070d74c83a8a3564a
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,16 +1,16 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 1.
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-25",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 25,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
"epoch": 0.10191082802547771,
|
| 13 |
-
"grad_norm": 0.
|
| 14 |
"learning_rate": 5e-05,
|
| 15 |
"loss": 1.2924,
|
| 16 |
"step": 1
|
|
@@ -18,186 +18,221 @@
|
|
| 18 |
{
|
| 19 |
"epoch": 0.10191082802547771,
|
| 20 |
"eval_loss": 1.4096554517745972,
|
| 21 |
-
"eval_runtime": 2.
|
| 22 |
-
"eval_samples_per_second":
|
| 23 |
-
"eval_steps_per_second": 3.
|
| 24 |
"step": 1
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"epoch": 0.20382165605095542,
|
| 28 |
-
"grad_norm": 0.
|
| 29 |
"learning_rate": 0.0001,
|
| 30 |
"loss": 1.3123,
|
| 31 |
"step": 2
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"epoch": 0.3057324840764331,
|
| 35 |
-
"grad_norm": 0.
|
| 36 |
"learning_rate": 9.968561049466214e-05,
|
| 37 |
-
"loss": 1.
|
| 38 |
"step": 3
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"epoch": 0.40764331210191085,
|
| 42 |
-
"grad_norm": 0.
|
| 43 |
"learning_rate": 9.874639560909117e-05,
|
| 44 |
-
"loss": 1.
|
| 45 |
"step": 4
|
| 46 |
},
|
| 47 |
{
|
| 48 |
"epoch": 0.5095541401273885,
|
| 49 |
-
"grad_norm": 0.
|
| 50 |
"learning_rate": 9.719416651541839e-05,
|
| 51 |
-
"loss": 1.
|
| 52 |
"step": 5
|
| 53 |
},
|
| 54 |
{
|
| 55 |
"epoch": 0.6114649681528662,
|
| 56 |
-
"grad_norm": 0.
|
| 57 |
"learning_rate": 9.504844339512095e-05,
|
| 58 |
-
"loss": 1.
|
| 59 |
"step": 6
|
| 60 |
},
|
| 61 |
{
|
| 62 |
"epoch": 0.7133757961783439,
|
| 63 |
-
"grad_norm": 0.
|
| 64 |
"learning_rate": 9.233620996141421e-05,
|
| 65 |
-
"loss": 1.
|
| 66 |
"step": 7
|
| 67 |
},
|
| 68 |
{
|
| 69 |
"epoch": 0.8152866242038217,
|
| 70 |
-
"grad_norm": 0.
|
| 71 |
"learning_rate": 8.90915741234015e-05,
|
| 72 |
-
"loss": 1.
|
| 73 |
"step": 8
|
| 74 |
},
|
| 75 |
{
|
| 76 |
"epoch": 0.9171974522292994,
|
| 77 |
-
"grad_norm": 0.
|
| 78 |
"learning_rate": 8.535533905932738e-05,
|
| 79 |
-
"loss": 1.
|
| 80 |
"step": 9
|
| 81 |
},
|
| 82 |
{
|
| 83 |
"epoch": 1.0636942675159236,
|
| 84 |
-
"grad_norm": 0.
|
| 85 |
"learning_rate": 8.117449009293668e-05,
|
| 86 |
-
"loss": 1.
|
| 87 |
"step": 10
|
| 88 |
},
|
| 89 |
{
|
| 90 |
"epoch": 1.1656050955414012,
|
| 91 |
-
"grad_norm": 0.
|
| 92 |
"learning_rate": 7.660160382576683e-05,
|
| 93 |
-
"loss": 0.
|
| 94 |
"step": 11
|
| 95 |
},
|
| 96 |
{
|
| 97 |
"epoch": 1.267515923566879,
|
| 98 |
-
"grad_norm": 0.
|
| 99 |
"learning_rate": 7.169418695587791e-05,
|
| 100 |
-
"loss": 1.
|
| 101 |
"step": 12
|
| 102 |
},
|
| 103 |
{
|
| 104 |
"epoch": 1.3694267515923566,
|
| 105 |
-
"grad_norm": 0.
|
| 106 |
"learning_rate": 6.651395309775837e-05,
|
| 107 |
-
"loss": 1.
|
| 108 |
"step": 13
|
| 109 |
},
|
| 110 |
{
|
| 111 |
"epoch": 1.4713375796178343,
|
| 112 |
-
"grad_norm": 0.
|
| 113 |
"learning_rate": 6.112604669781572e-05,
|
| 114 |
-
"loss": 1.
|
| 115 |
"step": 14
|
| 116 |
},
|
| 117 |
{
|
| 118 |
"epoch": 1.573248407643312,
|
| 119 |
-
"grad_norm": 0.
|
| 120 |
"learning_rate": 5.559822380516539e-05,
|
| 121 |
-
"loss": 1.
|
| 122 |
"step": 15
|
| 123 |
},
|
| 124 |
{
|
| 125 |
"epoch": 1.6751592356687897,
|
| 126 |
-
"grad_norm": 0.
|
| 127 |
"learning_rate": 5e-05,
|
| 128 |
-
"loss": 1.
|
| 129 |
"step": 16
|
| 130 |
},
|
| 131 |
{
|
| 132 |
"epoch": 1.7770700636942676,
|
| 133 |
-
"grad_norm": 0.
|
| 134 |
"learning_rate": 4.4401776194834613e-05,
|
| 135 |
-
"loss": 1.
|
| 136 |
"step": 17
|
| 137 |
},
|
| 138 |
{
|
| 139 |
"epoch": 1.8789808917197452,
|
| 140 |
-
"grad_norm": 0.
|
| 141 |
"learning_rate": 3.887395330218429e-05,
|
| 142 |
-
"loss": 0.
|
| 143 |
"step": 18
|
| 144 |
},
|
| 145 |
{
|
| 146 |
"epoch": 2.0254777070063694,
|
| 147 |
-
"grad_norm": 0.
|
| 148 |
"learning_rate": 3.3486046902241664e-05,
|
| 149 |
-
"loss": 1.
|
| 150 |
"step": 19
|
| 151 |
},
|
| 152 |
{
|
| 153 |
"epoch": 2.127388535031847,
|
| 154 |
-
"grad_norm": 0.
|
| 155 |
"learning_rate": 2.8305813044122097e-05,
|
| 156 |
-
"loss": 0.
|
| 157 |
"step": 20
|
| 158 |
},
|
| 159 |
{
|
| 160 |
"epoch": 2.229299363057325,
|
| 161 |
-
"grad_norm": 0.
|
| 162 |
"learning_rate": 2.3398396174233178e-05,
|
| 163 |
"loss": 0.925,
|
| 164 |
"step": 21
|
| 165 |
},
|
| 166 |
{
|
| 167 |
"epoch": 2.3312101910828025,
|
| 168 |
-
"grad_norm": 0.
|
| 169 |
"learning_rate": 1.8825509907063327e-05,
|
| 170 |
-
"loss": 1.
|
| 171 |
"step": 22
|
| 172 |
},
|
| 173 |
{
|
| 174 |
"epoch": 2.43312101910828,
|
| 175 |
-
"grad_norm": 0.
|
| 176 |
"learning_rate": 1.4644660940672627e-05,
|
| 177 |
-
"loss": 0.
|
| 178 |
"step": 23
|
| 179 |
},
|
| 180 |
{
|
| 181 |
"epoch": 2.535031847133758,
|
| 182 |
-
"grad_norm": 0.
|
| 183 |
"learning_rate": 1.090842587659851e-05,
|
| 184 |
-
"loss": 0.
|
| 185 |
"step": 24
|
| 186 |
},
|
| 187 |
{
|
| 188 |
"epoch": 2.6369426751592355,
|
| 189 |
-
"grad_norm": 0.
|
| 190 |
"learning_rate": 7.663790038585793e-06,
|
| 191 |
-
"loss": 0.
|
| 192 |
"step": 25
|
| 193 |
},
|
| 194 |
{
|
| 195 |
"epoch": 2.6369426751592355,
|
| 196 |
-
"eval_loss": 1.
|
| 197 |
-
"eval_runtime": 2.
|
| 198 |
-
"eval_samples_per_second":
|
| 199 |
-
"eval_steps_per_second": 3.
|
| 200 |
"step": 25
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
}
|
| 202 |
],
|
| 203 |
"logging_steps": 1,
|
|
@@ -221,12 +256,12 @@
|
|
| 221 |
"should_evaluate": false,
|
| 222 |
"should_log": false,
|
| 223 |
"should_save": true,
|
| 224 |
-
"should_training_stop":
|
| 225 |
},
|
| 226 |
"attributes": {}
|
| 227 |
}
|
| 228 |
},
|
| 229 |
-
"total_flos": 1.
|
| 230 |
"train_batch_size": 2,
|
| 231 |
"trial_name": null,
|
| 232 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 1.0476467609405518,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-25",
|
| 4 |
+
"epoch": 3.1910828025477707,
|
| 5 |
"eval_steps": 25,
|
| 6 |
+
"global_step": 30,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
"epoch": 0.10191082802547771,
|
| 13 |
+
"grad_norm": 0.3121834695339203,
|
| 14 |
"learning_rate": 5e-05,
|
| 15 |
"loss": 1.2924,
|
| 16 |
"step": 1
|
|
|
|
| 18 |
{
|
| 19 |
"epoch": 0.10191082802547771,
|
| 20 |
"eval_loss": 1.4096554517745972,
|
| 21 |
+
"eval_runtime": 2.2711,
|
| 22 |
+
"eval_samples_per_second": 29.061,
|
| 23 |
+
"eval_steps_per_second": 3.963,
|
| 24 |
"step": 1
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"epoch": 0.20382165605095542,
|
| 28 |
+
"grad_norm": 0.2996855676174164,
|
| 29 |
"learning_rate": 0.0001,
|
| 30 |
"loss": 1.3123,
|
| 31 |
"step": 2
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"epoch": 0.3057324840764331,
|
| 35 |
+
"grad_norm": 0.3492814004421234,
|
| 36 |
"learning_rate": 9.968561049466214e-05,
|
| 37 |
+
"loss": 1.4114,
|
| 38 |
"step": 3
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"epoch": 0.40764331210191085,
|
| 42 |
+
"grad_norm": 0.2742891311645508,
|
| 43 |
"learning_rate": 9.874639560909117e-05,
|
| 44 |
+
"loss": 1.1823,
|
| 45 |
"step": 4
|
| 46 |
},
|
| 47 |
{
|
| 48 |
"epoch": 0.5095541401273885,
|
| 49 |
+
"grad_norm": 0.3076689541339874,
|
| 50 |
"learning_rate": 9.719416651541839e-05,
|
| 51 |
+
"loss": 1.3595,
|
| 52 |
"step": 5
|
| 53 |
},
|
| 54 |
{
|
| 55 |
"epoch": 0.6114649681528662,
|
| 56 |
+
"grad_norm": 0.3100813329219818,
|
| 57 |
"learning_rate": 9.504844339512095e-05,
|
| 58 |
+
"loss": 1.2393,
|
| 59 |
"step": 6
|
| 60 |
},
|
| 61 |
{
|
| 62 |
"epoch": 0.7133757961783439,
|
| 63 |
+
"grad_norm": 0.24378710985183716,
|
| 64 |
"learning_rate": 9.233620996141421e-05,
|
| 65 |
+
"loss": 1.2006,
|
| 66 |
"step": 7
|
| 67 |
},
|
| 68 |
{
|
| 69 |
"epoch": 0.8152866242038217,
|
| 70 |
+
"grad_norm": 0.30991846323013306,
|
| 71 |
"learning_rate": 8.90915741234015e-05,
|
| 72 |
+
"loss": 1.1693,
|
| 73 |
"step": 8
|
| 74 |
},
|
| 75 |
{
|
| 76 |
"epoch": 0.9171974522292994,
|
| 77 |
+
"grad_norm": 0.298220694065094,
|
| 78 |
"learning_rate": 8.535533905932738e-05,
|
| 79 |
+
"loss": 1.1559,
|
| 80 |
"step": 9
|
| 81 |
},
|
| 82 |
{
|
| 83 |
"epoch": 1.0636942675159236,
|
| 84 |
+
"grad_norm": 0.5674415826797485,
|
| 85 |
"learning_rate": 8.117449009293668e-05,
|
| 86 |
+
"loss": 1.7409,
|
| 87 |
"step": 10
|
| 88 |
},
|
| 89 |
{
|
| 90 |
"epoch": 1.1656050955414012,
|
| 91 |
+
"grad_norm": 0.28200918436050415,
|
| 92 |
"learning_rate": 7.660160382576683e-05,
|
| 93 |
+
"loss": 0.9992,
|
| 94 |
"step": 11
|
| 95 |
},
|
| 96 |
{
|
| 97 |
"epoch": 1.267515923566879,
|
| 98 |
+
"grad_norm": 0.25404325127601624,
|
| 99 |
"learning_rate": 7.169418695587791e-05,
|
| 100 |
+
"loss": 1.0336,
|
| 101 |
"step": 12
|
| 102 |
},
|
| 103 |
{
|
| 104 |
"epoch": 1.3694267515923566,
|
| 105 |
+
"grad_norm": 0.2391156703233719,
|
| 106 |
"learning_rate": 6.651395309775837e-05,
|
| 107 |
+
"loss": 1.0757,
|
| 108 |
"step": 13
|
| 109 |
},
|
| 110 |
{
|
| 111 |
"epoch": 1.4713375796178343,
|
| 112 |
+
"grad_norm": 0.2868117690086365,
|
| 113 |
"learning_rate": 6.112604669781572e-05,
|
| 114 |
+
"loss": 1.0221,
|
| 115 |
"step": 14
|
| 116 |
},
|
| 117 |
{
|
| 118 |
"epoch": 1.573248407643312,
|
| 119 |
+
"grad_norm": 0.26096099615097046,
|
| 120 |
"learning_rate": 5.559822380516539e-05,
|
| 121 |
+
"loss": 1.1211,
|
| 122 |
"step": 15
|
| 123 |
},
|
| 124 |
{
|
| 125 |
"epoch": 1.6751592356687897,
|
| 126 |
+
"grad_norm": 0.2496553361415863,
|
| 127 |
"learning_rate": 5e-05,
|
| 128 |
+
"loss": 1.0423,
|
| 129 |
"step": 16
|
| 130 |
},
|
| 131 |
{
|
| 132 |
"epoch": 1.7770700636942676,
|
| 133 |
+
"grad_norm": 0.26016390323638916,
|
| 134 |
"learning_rate": 4.4401776194834613e-05,
|
| 135 |
+
"loss": 1.0078,
|
| 136 |
"step": 17
|
| 137 |
},
|
| 138 |
{
|
| 139 |
"epoch": 1.8789808917197452,
|
| 140 |
+
"grad_norm": 0.23476140201091766,
|
| 141 |
"learning_rate": 3.887395330218429e-05,
|
| 142 |
+
"loss": 0.9879,
|
| 143 |
"step": 18
|
| 144 |
},
|
| 145 |
{
|
| 146 |
"epoch": 2.0254777070063694,
|
| 147 |
+
"grad_norm": 0.3431682288646698,
|
| 148 |
"learning_rate": 3.3486046902241664e-05,
|
| 149 |
+
"loss": 1.6151,
|
| 150 |
"step": 19
|
| 151 |
},
|
| 152 |
{
|
| 153 |
"epoch": 2.127388535031847,
|
| 154 |
+
"grad_norm": 0.21315336227416992,
|
| 155 |
"learning_rate": 2.8305813044122097e-05,
|
| 156 |
+
"loss": 0.9605,
|
| 157 |
"step": 20
|
| 158 |
},
|
| 159 |
{
|
| 160 |
"epoch": 2.229299363057325,
|
| 161 |
+
"grad_norm": 0.20435573160648346,
|
| 162 |
"learning_rate": 2.3398396174233178e-05,
|
| 163 |
"loss": 0.925,
|
| 164 |
"step": 21
|
| 165 |
},
|
| 166 |
{
|
| 167 |
"epoch": 2.3312101910828025,
|
| 168 |
+
"grad_norm": 0.2289644032716751,
|
| 169 |
"learning_rate": 1.8825509907063327e-05,
|
| 170 |
+
"loss": 1.032,
|
| 171 |
"step": 22
|
| 172 |
},
|
| 173 |
{
|
| 174 |
"epoch": 2.43312101910828,
|
| 175 |
+
"grad_norm": 0.22667603194713593,
|
| 176 |
"learning_rate": 1.4644660940672627e-05,
|
| 177 |
+
"loss": 0.944,
|
| 178 |
"step": 23
|
| 179 |
},
|
| 180 |
{
|
| 181 |
"epoch": 2.535031847133758,
|
| 182 |
+
"grad_norm": 0.21269646286964417,
|
| 183 |
"learning_rate": 1.090842587659851e-05,
|
| 184 |
+
"loss": 0.9952,
|
| 185 |
"step": 24
|
| 186 |
},
|
| 187 |
{
|
| 188 |
"epoch": 2.6369426751592355,
|
| 189 |
+
"grad_norm": 0.21037527918815613,
|
| 190 |
"learning_rate": 7.663790038585793e-06,
|
| 191 |
+
"loss": 0.9544,
|
| 192 |
"step": 25
|
| 193 |
},
|
| 194 |
{
|
| 195 |
"epoch": 2.6369426751592355,
|
| 196 |
+
"eval_loss": 1.0476467609405518,
|
| 197 |
+
"eval_runtime": 2.2679,
|
| 198 |
+
"eval_samples_per_second": 29.102,
|
| 199 |
+
"eval_steps_per_second": 3.968,
|
| 200 |
"step": 25
|
| 201 |
+
},
|
| 202 |
+
{
|
| 203 |
+
"epoch": 2.738853503184713,
|
| 204 |
+
"grad_norm": 0.2182152420282364,
|
| 205 |
+
"learning_rate": 4.951556604879048e-06,
|
| 206 |
+
"loss": 0.9549,
|
| 207 |
+
"step": 26
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"epoch": 2.840764331210191,
|
| 211 |
+
"grad_norm": 0.20187175273895264,
|
| 212 |
+
"learning_rate": 2.8058334845816213e-06,
|
| 213 |
+
"loss": 0.9287,
|
| 214 |
+
"step": 27
|
| 215 |
+
},
|
| 216 |
+
{
|
| 217 |
+
"epoch": 2.9426751592356686,
|
| 218 |
+
"grad_norm": 0.4659386873245239,
|
| 219 |
+
"learning_rate": 1.2536043909088191e-06,
|
| 220 |
+
"loss": 1.4175,
|
| 221 |
+
"step": 28
|
| 222 |
+
},
|
| 223 |
+
{
|
| 224 |
+
"epoch": 3.089171974522293,
|
| 225 |
+
"grad_norm": 0.2993531823158264,
|
| 226 |
+
"learning_rate": 3.143895053378698e-07,
|
| 227 |
+
"loss": 1.082,
|
| 228 |
+
"step": 29
|
| 229 |
+
},
|
| 230 |
+
{
|
| 231 |
+
"epoch": 3.1910828025477707,
|
| 232 |
+
"grad_norm": 0.25595200061798096,
|
| 233 |
+
"learning_rate": 0.0,
|
| 234 |
+
"loss": 0.8877,
|
| 235 |
+
"step": 30
|
| 236 |
}
|
| 237 |
],
|
| 238 |
"logging_steps": 1,
|
|
|
|
| 256 |
"should_evaluate": false,
|
| 257 |
"should_log": false,
|
| 258 |
"should_save": true,
|
| 259 |
+
"should_training_stop": true
|
| 260 |
},
|
| 261 |
"attributes": {}
|
| 262 |
}
|
| 263 |
},
|
| 264 |
+
"total_flos": 1.68704369229824e+17,
|
| 265 |
"train_batch_size": 2,
|
| 266 |
"trial_name": null,
|
| 267 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6776
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9feb44dd06351a4ebb3399d9273df0fe7c81cda65ffedc861c36f176fba73ef5
|
| 3 |
size 6776
|