mamung commited on
Commit
ddcb544
·
verified ·
1 Parent(s): 271cfbd

Training in progress, step 81, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6e702b14829d11f50416692b7314645eed8edb9ef55004a134630ab89f21564
3
  size 100966336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:703111afd8de5da31434bbffcf1751ae8c1729bfa301e3e14ad6aaec1d7b1465
3
  size 100966336
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51a6dc0f4490063fd77dbc7abd15e069a4ceba0555b0127116aa94a284c08b7f
3
  size 202110330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03db6c7c5ee7f73fa9f854f673d04717d0ae9a0bf00b4dbd1c0ad0b07dbff2a0
3
  size 202110330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9df98b952a993c028712d01917c33ffa810e4469add5fd029bc7022e9ce56793
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09fbf705d0b0068815c843aaa33a8af71fae90cfe1af355c27f9a01b44f5b835
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d43bd90ad476e419738deb9472ad85fd5991005a147e1627aa99867bdfc5655
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09ab64d5ace66796aa9c2fa2fc4e0206d69a1eb4ef03f574f85ee8eb16a64b71
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9701052631578947,
5
  "eval_steps": 50,
6
- "global_step": 72,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -527,6 +527,69 @@
527
  "learning_rate": 4.095071251953399e-05,
528
  "loss": 1.3472,
529
  "step": 72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
530
  }
531
  ],
532
  "logging_steps": 1,
@@ -546,7 +609,7 @@
546
  "attributes": {}
547
  }
548
  },
549
- "total_flos": 6.000598022986138e+16,
550
  "train_batch_size": 2,
551
  "trial_name": null,
552
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0913684210526315,
5
  "eval_steps": 50,
6
+ "global_step": 81,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
527
  "learning_rate": 4.095071251953399e-05,
528
  "loss": 1.3472,
529
  "step": 72
530
+ },
531
+ {
532
+ "epoch": 0.983578947368421,
533
+ "grad_norm": 0.5706843733787537,
534
+ "learning_rate": 3.83534068877284e-05,
535
+ "loss": 1.4041,
536
+ "step": 73
537
+ },
538
+ {
539
+ "epoch": 0.9970526315789474,
540
+ "grad_norm": 0.5967234373092651,
541
+ "learning_rate": 3.5812607646303834e-05,
542
+ "loss": 1.31,
543
+ "step": 74
544
+ },
545
+ {
546
+ "epoch": 1.0105263157894737,
547
+ "grad_norm": 1.052331566810608,
548
+ "learning_rate": 3.333223252352985e-05,
549
+ "loss": 2.0664,
550
+ "step": 75
551
+ },
552
+ {
553
+ "epoch": 1.024,
554
+ "grad_norm": 0.6153193712234497,
555
+ "learning_rate": 3.091610607806452e-05,
556
+ "loss": 1.516,
557
+ "step": 76
558
+ },
559
+ {
560
+ "epoch": 1.0374736842105263,
561
+ "grad_norm": 0.5821354389190674,
562
+ "learning_rate": 2.856795380176244e-05,
563
+ "loss": 1.2732,
564
+ "step": 77
565
+ },
566
+ {
567
+ "epoch": 1.0509473684210526,
568
+ "grad_norm": 0.6261878609657288,
569
+ "learning_rate": 2.6291396375236232e-05,
570
+ "loss": 1.2817,
571
+ "step": 78
572
+ },
573
+ {
574
+ "epoch": 1.064421052631579,
575
+ "grad_norm": 0.5795064568519592,
576
+ "learning_rate": 2.4089944085029363e-05,
577
+ "loss": 1.3216,
578
+ "step": 79
579
+ },
580
+ {
581
+ "epoch": 1.0778947368421052,
582
+ "grad_norm": 0.5134410262107849,
583
+ "learning_rate": 2.1966991411008938e-05,
584
+ "loss": 1.2917,
585
+ "step": 80
586
+ },
587
+ {
588
+ "epoch": 1.0913684210526315,
589
+ "grad_norm": 0.6312588453292847,
590
+ "learning_rate": 1.99258117923236e-05,
591
+ "loss": 1.1945,
592
+ "step": 81
593
  }
594
  ],
595
  "logging_steps": 1,
 
609
  "attributes": {}
610
  }
611
  },
612
+ "total_flos": 6.750672775859405e+16,
613
  "train_batch_size": 2,
614
  "trial_name": null,
615
  "trial_params": null