mamung commited on
Commit
52654f5
·
verified ·
1 Parent(s): f90f043

Training in progress, step 27, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4067b0eaec418c2f628fb51d5a070c4a712a9f1b4002e70aad6ba6e83016b712
3
  size 100966336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91270bfed96898597e98a0dc329e749214ec79fc85295ed43e1076f41803c84a
3
  size 100966336
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b2306a49040dac8712d96023099d7ac11c0252b0a8f841164afd269598ad2c9
3
  size 202110330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22e6c4ca044bb438e090f58ead16e15cbf7d5f67d276507a6456b85d0f6b1746
3
  size 202110330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96541d88a7cf43aafce1f16ea1e16556284f71e18b276294cc3dc81a783e005f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04b005073ce3487d1a9dc5d2831d6dd25abdac86b29562322c49da0afe478a92
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4b679dfcad285902b67d18379c2cc1ca0aebfc7646cb33fa82a3fb8ed15c820
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f9e237c4e244cd6a21b3069d52ab1ce3e784c965dcb77abb8266616185916c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.24252631578947367,
5
  "eval_steps": 50,
6
- "global_step": 18,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -141,6 +141,69 @@
141
  "learning_rate": 0.000135,
142
  "loss": 1.6469,
143
  "step": 18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  }
145
  ],
146
  "logging_steps": 1,
@@ -160,7 +223,7 @@
160
  "attributes": {}
161
  }
162
  },
163
- "total_flos": 1.5001495057465344e+16,
164
  "train_batch_size": 2,
165
  "trial_name": null,
166
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.36378947368421055,
5
  "eval_steps": 50,
6
+ "global_step": 27,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
141
  "learning_rate": 0.000135,
142
  "loss": 1.6469,
143
  "step": 18
144
+ },
145
+ {
146
+ "epoch": 0.256,
147
+ "grad_norm": 1.5747898817062378,
148
+ "learning_rate": 0.0001425,
149
+ "loss": 1.6895,
150
+ "step": 19
151
+ },
152
+ {
153
+ "epoch": 0.2694736842105263,
154
+ "grad_norm": 0.9997685551643372,
155
+ "learning_rate": 0.00015,
156
+ "loss": 1.5248,
157
+ "step": 20
158
+ },
159
+ {
160
+ "epoch": 0.2829473684210526,
161
+ "grad_norm": 1.195119857788086,
162
+ "learning_rate": 0.00014994217771805422,
163
+ "loss": 1.5649,
164
+ "step": 21
165
+ },
166
+ {
167
+ "epoch": 0.296421052631579,
168
+ "grad_norm": 0.8751718401908875,
169
+ "learning_rate": 0.00014976880002998458,
170
+ "loss": 1.5405,
171
+ "step": 22
172
+ },
173
+ {
174
+ "epoch": 0.3098947368421053,
175
+ "grad_norm": 0.8566117882728577,
176
+ "learning_rate": 0.00014948013427161947,
177
+ "loss": 1.5504,
178
+ "step": 23
179
+ },
180
+ {
181
+ "epoch": 0.3233684210526316,
182
+ "grad_norm": 0.7322584390640259,
183
+ "learning_rate": 0.00014907662554463532,
184
+ "loss": 1.5034,
185
+ "step": 24
186
+ },
187
+ {
188
+ "epoch": 0.3368421052631579,
189
+ "grad_norm": 0.9539948105812073,
190
+ "learning_rate": 0.00014855889603024227,
191
+ "loss": 1.4513,
192
+ "step": 25
193
+ },
194
+ {
195
+ "epoch": 0.3503157894736842,
196
+ "grad_norm": 0.7042058110237122,
197
+ "learning_rate": 0.00014792774402982574,
198
+ "loss": 1.5281,
199
+ "step": 26
200
+ },
201
+ {
202
+ "epoch": 0.36378947368421055,
203
+ "grad_norm": 0.6478146910667419,
204
+ "learning_rate": 0.0001471841427340235,
205
+ "loss": 1.5117,
206
+ "step": 27
207
  }
208
  ],
209
  "logging_steps": 1,
 
223
  "attributes": {}
224
  }
225
  },
226
+ "total_flos": 2.2502242586198016e+16,
227
  "train_batch_size": 2,
228
  "trial_name": null,
229
  "trial_params": null