mamung commited on
Commit
51386f7
·
verified ·
1 Parent(s): 4faeb63

Training in progress, step 72, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:198b80ec28ce5ce0b3d455500f182b56ea364aa1528ced46756d7be144210032
3
  size 100966336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6e702b14829d11f50416692b7314645eed8edb9ef55004a134630ab89f21564
3
  size 100966336
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce776e8b16137f8ecdb7f2407056410613034570db744a54ed5a4598b6eed8d5
3
  size 202110330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51a6dc0f4490063fd77dbc7abd15e069a4ceba0555b0127116aa94a284c08b7f
3
  size 202110330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa8e202d1e724bd48d211480607ebff7d50e6294a8b7441e06c81ba075040699
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9df98b952a993c028712d01917c33ffa810e4469add5fd029bc7022e9ce56793
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d85eac2daddfe8e3f78a5d6ef1e9ba13c04651694635a9ed76369a20726389db
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d43bd90ad476e419738deb9472ad85fd5991005a147e1627aa99867bdfc5655
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8488421052631578,
5
  "eval_steps": 50,
6
- "global_step": 63,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -464,6 +464,69 @@
464
  "learning_rate": 6.618469519066217e-05,
465
  "loss": 1.4667,
466
  "step": 63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467
  }
468
  ],
469
  "logging_steps": 1,
@@ -483,7 +546,7 @@
483
  "attributes": {}
484
  }
485
  },
486
- "total_flos": 5.25052327011287e+16,
487
  "train_batch_size": 2,
488
  "trial_name": null,
489
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9701052631578947,
5
  "eval_steps": 50,
6
+ "global_step": 72,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
464
  "learning_rate": 6.618469519066217e-05,
465
  "loss": 1.4667,
466
  "step": 63
467
+ },
468
+ {
469
+ "epoch": 0.8623157894736843,
470
+ "grad_norm": 0.5462284684181213,
471
+ "learning_rate": 6.326741512198266e-05,
472
+ "loss": 1.4505,
473
+ "step": 64
474
+ },
475
+ {
476
+ "epoch": 0.8757894736842106,
477
+ "grad_norm": 0.6210593581199646,
478
+ "learning_rate": 6.036822584879038e-05,
479
+ "loss": 1.2947,
480
+ "step": 65
481
+ },
482
+ {
483
+ "epoch": 0.8892631578947369,
484
+ "grad_norm": 0.6453770399093628,
485
+ "learning_rate": 5.7491597710807114e-05,
486
+ "loss": 1.3575,
487
+ "step": 66
488
+ },
489
+ {
490
+ "epoch": 0.9027368421052632,
491
+ "grad_norm": 0.6173303127288818,
492
+ "learning_rate": 5.464196626011943e-05,
493
+ "loss": 1.3685,
494
+ "step": 67
495
+ },
496
+ {
497
+ "epoch": 0.9162105263157895,
498
+ "grad_norm": 0.6161783933639526,
499
+ "learning_rate": 5.182372542187895e-05,
500
+ "loss": 1.5084,
501
+ "step": 68
502
+ },
503
+ {
504
+ "epoch": 0.9296842105263158,
505
+ "grad_norm": 0.5926702618598938,
506
+ "learning_rate": 4.904122071918801e-05,
507
+ "loss": 1.5106,
508
+ "step": 69
509
+ },
510
+ {
511
+ "epoch": 0.9431578947368421,
512
+ "grad_norm": 0.7311588525772095,
513
+ "learning_rate": 4.6298742572618266e-05,
514
+ "loss": 1.3789,
515
+ "step": 70
516
+ },
517
+ {
518
+ "epoch": 0.9566315789473684,
519
+ "grad_norm": 0.5569392442703247,
520
+ "learning_rate": 4.360051968469291e-05,
521
+ "loss": 1.2037,
522
+ "step": 71
523
+ },
524
+ {
525
+ "epoch": 0.9701052631578947,
526
+ "grad_norm": 0.49740126729011536,
527
+ "learning_rate": 4.095071251953399e-05,
528
+ "loss": 1.3472,
529
+ "step": 72
530
  }
531
  ],
532
  "logging_steps": 1,
 
546
  "attributes": {}
547
  }
548
  },
549
+ "total_flos": 6.000598022986138e+16,
550
  "train_batch_size": 2,
551
  "trial_name": null,
552
  "trial_params": null