mamung commited on
Commit
afc6718
·
verified ·
1 Parent(s): d2416d8

Training in progress, step 63, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbc6981a9a3b464a92ca4893919b3ce592883f8cb80e5c66b94bd99f24ab4f8c
3
  size 100966336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:198b80ec28ce5ce0b3d455500f182b56ea364aa1528ced46756d7be144210032
3
  size 100966336
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca03c1d13032d2c82a90bfbcf54d219887e9cb321beff9e40593f7d0a5370d1e
3
  size 202110330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce776e8b16137f8ecdb7f2407056410613034570db744a54ed5a4598b6eed8d5
3
  size 202110330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de49c048068e4ff056f002888046f6d50e4b4c2a5714da6751173a978b5ccfc4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa8e202d1e724bd48d211480607ebff7d50e6294a8b7441e06c81ba075040699
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f68b4a4ba4603d307a8be653dd70e693a9f3506ea1af5f24d2d241dc54cfcb1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d85eac2daddfe8e3f78a5d6ef1e9ba13c04651694635a9ed76369a20726389db
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7275789473684211,
5
  "eval_steps": 50,
6
- "global_step": 54,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -401,6 +401,69 @@
401
  "learning_rate": 9.25084022891929e-05,
402
  "loss": 1.3016,
403
  "step": 54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
  }
405
  ],
406
  "logging_steps": 1,
@@ -420,7 +483,7 @@
420
  "attributes": {}
421
  }
422
  },
423
- "total_flos": 4.500448517239603e+16,
424
  "train_batch_size": 2,
425
  "trial_name": null,
426
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8488421052631578,
5
  "eval_steps": 50,
6
+ "global_step": 63,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
401
  "learning_rate": 9.25084022891929e-05,
402
  "loss": 1.3016,
403
  "step": 54
404
+ },
405
+ {
406
+ "epoch": 0.7410526315789474,
407
+ "grad_norm": 0.6199703812599182,
408
+ "learning_rate": 8.963177415120962e-05,
409
+ "loss": 1.5274,
410
+ "step": 55
411
+ },
412
+ {
413
+ "epoch": 0.7545263157894737,
414
+ "grad_norm": 0.6474433541297913,
415
+ "learning_rate": 8.673258487801731e-05,
416
+ "loss": 1.3812,
417
+ "step": 56
418
+ },
419
+ {
420
+ "epoch": 0.768,
421
+ "grad_norm": 0.5689646601676941,
422
+ "learning_rate": 8.381530480933783e-05,
423
+ "loss": 1.4005,
424
+ "step": 57
425
+ },
426
+ {
427
+ "epoch": 0.7814736842105263,
428
+ "grad_norm": 0.5995835661888123,
429
+ "learning_rate": 8.088443217958837e-05,
430
+ "loss": 1.2694,
431
+ "step": 58
432
+ },
433
+ {
434
+ "epoch": 0.7949473684210526,
435
+ "grad_norm": 0.5566097497940063,
436
+ "learning_rate": 7.794448618193015e-05,
437
+ "loss": 1.4341,
438
+ "step": 59
439
+ },
440
+ {
441
+ "epoch": 0.8084210526315789,
442
+ "grad_norm": 0.6118280291557312,
443
+ "learning_rate": 7.5e-05,
444
+ "loss": 1.378,
445
+ "step": 60
446
+ },
447
+ {
448
+ "epoch": 0.8218947368421052,
449
+ "grad_norm": 0.5662732124328613,
450
+ "learning_rate": 7.205551381806987e-05,
451
+ "loss": 1.4138,
452
+ "step": 61
453
+ },
454
+ {
455
+ "epoch": 0.8353684210526315,
456
+ "grad_norm": 0.5312877893447876,
457
+ "learning_rate": 6.911556782041163e-05,
458
+ "loss": 1.4356,
459
+ "step": 62
460
+ },
461
+ {
462
+ "epoch": 0.8488421052631578,
463
+ "grad_norm": 0.5712493062019348,
464
+ "learning_rate": 6.618469519066217e-05,
465
+ "loss": 1.4667,
466
+ "step": 63
467
  }
468
  ],
469
  "logging_steps": 1,
 
483
  "attributes": {}
484
  }
485
  },
486
+ "total_flos": 5.25052327011287e+16,
487
  "train_batch_size": 2,
488
  "trial_name": null,
489
  "trial_params": null