mamung commited on
Commit
ea2b08a
·
verified ·
1 Parent(s): 098ea41

Training in progress, step 36, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91270bfed96898597e98a0dc329e749214ec79fc85295ed43e1076f41803c84a
3
  size 100966336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e2a11bd08b0ede80777f9f8f3c7437a225c58391a66b01506eb3cece79f6024
3
  size 100966336
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22e6c4ca044bb438e090f58ead16e15cbf7d5f67d276507a6456b85d0f6b1746
3
  size 202110330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27e07579212c22b474078f9b9c9bfd9ca5219d64cd55eabce4922a6cbd24d85f
3
  size 202110330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04b005073ce3487d1a9dc5d2831d6dd25abdac86b29562322c49da0afe478a92
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73fcedd9bdb45aa888b5c9fc0ef207fe6bded7b834698fcaf3cd7d681d79886f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1f9e237c4e244cd6a21b3069d52ab1ce3e784c965dcb77abb8266616185916c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9573e9d18ad7803441c819253e1705231d5811c0e4f29747c0574440f588f00
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.36378947368421055,
5
  "eval_steps": 50,
6
- "global_step": 27,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -204,6 +204,69 @@
204
  "learning_rate": 0.0001471841427340235,
205
  "loss": 1.5117,
206
  "step": 27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  }
208
  ],
209
  "logging_steps": 1,
@@ -223,7 +286,7 @@
223
  "attributes": {}
224
  }
225
  },
226
- "total_flos": 2.2502242586198016e+16,
227
  "train_batch_size": 2,
228
  "trial_name": null,
229
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.48505263157894735,
5
  "eval_steps": 50,
6
+ "global_step": 36,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
204
  "learning_rate": 0.0001471841427340235,
205
  "loss": 1.5117,
206
  "step": 27
207
+ },
208
+ {
209
+ "epoch": 0.37726315789473686,
210
+ "grad_norm": 0.6267299652099609,
211
+ "learning_rate": 0.00014632923872213652,
212
+ "loss": 1.383,
213
+ "step": 28
214
+ },
215
+ {
216
+ "epoch": 0.39073684210526316,
217
+ "grad_norm": 0.7713648676872253,
218
+ "learning_rate": 0.0001453643501941863,
219
+ "loss": 1.4844,
220
+ "step": 29
221
+ },
222
+ {
223
+ "epoch": 0.40421052631578946,
224
+ "grad_norm": 0.6838952898979187,
225
+ "learning_rate": 0.0001442909649383465,
226
+ "loss": 1.4825,
227
+ "step": 30
228
+ },
229
+ {
230
+ "epoch": 0.41768421052631577,
231
+ "grad_norm": 0.71690434217453,
232
+ "learning_rate": 0.0001431107380368811,
233
+ "loss": 1.4357,
234
+ "step": 31
235
+ },
236
+ {
237
+ "epoch": 0.43115789473684213,
238
+ "grad_norm": 0.6745509505271912,
239
+ "learning_rate": 0.00014182548931412757,
240
+ "loss": 1.4733,
241
+ "step": 32
242
+ },
243
+ {
244
+ "epoch": 0.44463157894736843,
245
+ "grad_norm": 0.7103040814399719,
246
+ "learning_rate": 0.0001404372005304598,
247
+ "loss": 1.3857,
248
+ "step": 33
249
+ },
250
+ {
251
+ "epoch": 0.45810526315789474,
252
+ "grad_norm": 0.6221896409988403,
253
+ "learning_rate": 0.0001389480123265569,
254
+ "loss": 1.2527,
255
+ "step": 34
256
+ },
257
+ {
258
+ "epoch": 0.47157894736842104,
259
+ "grad_norm": 0.562971293926239,
260
+ "learning_rate": 0.0001373602209226909,
261
+ "loss": 1.4486,
262
+ "step": 35
263
+ },
264
+ {
265
+ "epoch": 0.48505263157894735,
266
+ "grad_norm": 0.5778741240501404,
267
+ "learning_rate": 0.00013567627457812106,
268
+ "loss": 1.4134,
269
+ "step": 36
270
  }
271
  ],
272
  "logging_steps": 1,
 
286
  "attributes": {}
287
  }
288
  },
289
+ "total_flos": 3.000299011493069e+16,
290
  "train_batch_size": 2,
291
  "trial_name": null,
292
  "trial_params": null