aleegis commited on
Commit
b97bae9
·
verified ·
1 Parent(s): 86c5b89

Training in progress, step 900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11d6aca27259c04e48ff1e6833110c9ce7f7cb359ca11513b3eb5c3401694577
3
  size 101752088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6451841ef2b8c92c8971b888cb431015469f38e2b5cec1d959730dc0e1c42e7
3
  size 101752088
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:110f815b2e61a6607ab28d0131cd25dcd3134a1d6d7e0a35eabcde2387a38bb9
3
  size 203719079
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fa8802dc8d44f42b89ec24821a6c0e450a89c27ef01f720e2c0102ae33bb5fb
3
  size 203719079
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bc83dc2d6c811943f930285a433310949280eb049ff76a77b592b75863af96c
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dec92e4cc6b814795e4f84a0ca7417301ac04559d3e6f54a353841c3dcd3b8d2
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13170990a11005b433d8dae9c4d2d14d2d8b2818aeb5b8e3b1626f654dee20a1
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a402f7671070d601bcd659808589d9caf0f10398950fa5d29b8d3b20f1d6b390
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.09504197687311897,
6
  "eval_steps": 500,
7
- "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -428,6 +428,216 @@
428
  "learning_rate": 9.700449715497961e-05,
429
  "loss": 1.5359,
430
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
  }
432
  ],
433
  "logging_steps": 10,
@@ -447,7 +657,7 @@
447
  "attributes": {}
448
  }
449
  },
450
- "total_flos": 1.97561520488448e+17,
451
  "train_batch_size": 16,
452
  "trial_name": null,
453
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.14256296530967844,
6
  "eval_steps": 500,
7
+ "global_step": 900,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
428
  "learning_rate": 9.700449715497961e-05,
429
  "loss": 1.5359,
430
  "step": 600
431
+ },
432
+ {
433
+ "epoch": 0.09662600982100428,
434
+ "grad_norm": 0.7383710741996765,
435
+ "learning_rate": 9.680297678694867e-05,
436
+ "loss": 1.5258,
437
+ "step": 610
438
+ },
439
+ {
440
+ "epoch": 0.09821004276888959,
441
+ "grad_norm": 0.6048529148101807,
442
+ "learning_rate": 9.659512004506057e-05,
443
+ "loss": 1.3593,
444
+ "step": 620
445
+ },
446
+ {
447
+ "epoch": 0.09979407571677491,
448
+ "grad_norm": 0.6163527369499207,
449
+ "learning_rate": 9.63809550697909e-05,
450
+ "loss": 1.3932,
451
+ "step": 630
452
+ },
453
+ {
454
+ "epoch": 0.10137810866466022,
455
+ "grad_norm": 0.6468575596809387,
456
+ "learning_rate": 9.616051085564906e-05,
457
+ "loss": 1.4886,
458
+ "step": 640
459
+ },
460
+ {
461
+ "epoch": 0.10296214161254554,
462
+ "grad_norm": 0.642622709274292,
463
+ "learning_rate": 9.593381724725285e-05,
464
+ "loss": 1.412,
465
+ "step": 650
466
+ },
467
+ {
468
+ "epoch": 0.10454617456043086,
469
+ "grad_norm": 0.6709442138671875,
470
+ "learning_rate": 9.570090493528809e-05,
471
+ "loss": 1.4161,
472
+ "step": 660
473
+ },
474
+ {
475
+ "epoch": 0.10613020750831617,
476
+ "grad_norm": 0.6280019283294678,
477
+ "learning_rate": 9.546180545235344e-05,
478
+ "loss": 1.4344,
479
+ "step": 670
480
+ },
481
+ {
482
+ "epoch": 0.10771424045620148,
483
+ "grad_norm": 0.5947321057319641,
484
+ "learning_rate": 9.52165511686915e-05,
485
+ "loss": 1.3838,
486
+ "step": 680
487
+ },
488
+ {
489
+ "epoch": 0.10929827340408681,
490
+ "grad_norm": 0.7332488894462585,
491
+ "learning_rate": 9.496517528780637e-05,
492
+ "loss": 1.3477,
493
+ "step": 690
494
+ },
495
+ {
496
+ "epoch": 0.11088230635197212,
497
+ "grad_norm": 0.6739678978919983,
498
+ "learning_rate": 9.47077118419684e-05,
499
+ "loss": 1.487,
500
+ "step": 700
501
+ },
502
+ {
503
+ "epoch": 0.11246633929985744,
504
+ "grad_norm": 0.6523484587669373,
505
+ "learning_rate": 9.444419568760684e-05,
506
+ "loss": 1.484,
507
+ "step": 710
508
+ },
509
+ {
510
+ "epoch": 0.11405037224774275,
511
+ "grad_norm": 0.6200110912322998,
512
+ "learning_rate": 9.417466250059073e-05,
513
+ "loss": 1.3793,
514
+ "step": 720
515
+ },
516
+ {
517
+ "epoch": 0.11563440519562806,
518
+ "grad_norm": 0.6055252552032471,
519
+ "learning_rate": 9.389914877139903e-05,
520
+ "loss": 1.3878,
521
+ "step": 730
522
+ },
523
+ {
524
+ "epoch": 0.11721843814351339,
525
+ "grad_norm": 0.620250940322876,
526
+ "learning_rate": 9.361769180018038e-05,
527
+ "loss": 1.3316,
528
+ "step": 740
529
+ },
530
+ {
531
+ "epoch": 0.1188024710913987,
532
+ "grad_norm": 0.590551495552063,
533
+ "learning_rate": 9.333032969170326e-05,
534
+ "loss": 1.3479,
535
+ "step": 750
536
+ },
537
+ {
538
+ "epoch": 0.12038650403928401,
539
+ "grad_norm": 0.6573076844215393,
540
+ "learning_rate": 9.30371013501972e-05,
541
+ "loss": 1.3434,
542
+ "step": 760
543
+ },
544
+ {
545
+ "epoch": 0.12197053698716934,
546
+ "grad_norm": 0.6856533288955688,
547
+ "learning_rate": 9.273804647408575e-05,
548
+ "loss": 1.3815,
549
+ "step": 770
550
+ },
551
+ {
552
+ "epoch": 0.12355456993505465,
553
+ "grad_norm": 0.6879425644874573,
554
+ "learning_rate": 9.243320555061205e-05,
555
+ "loss": 1.3747,
556
+ "step": 780
557
+ },
558
+ {
559
+ "epoch": 0.12513860288293996,
560
+ "grad_norm": 0.5395861864089966,
561
+ "learning_rate": 9.212261985035739e-05,
562
+ "loss": 1.4633,
563
+ "step": 790
564
+ },
565
+ {
566
+ "epoch": 0.12672263583082527,
567
+ "grad_norm": 0.66850346326828,
568
+ "learning_rate": 9.180633142165384e-05,
569
+ "loss": 1.4978,
570
+ "step": 800
571
+ },
572
+ {
573
+ "epoch": 0.1283066687787106,
574
+ "grad_norm": 0.6203956007957458,
575
+ "learning_rate": 9.148438308489168e-05,
576
+ "loss": 1.3428,
577
+ "step": 810
578
+ },
579
+ {
580
+ "epoch": 0.12989070172659592,
581
+ "grad_norm": 0.8913874626159668,
582
+ "learning_rate": 9.11568184267221e-05,
583
+ "loss": 1.4052,
584
+ "step": 820
585
+ },
586
+ {
587
+ "epoch": 0.13147473467448123,
588
+ "grad_norm": 0.745405375957489,
589
+ "learning_rate": 9.082368179415632e-05,
590
+ "loss": 1.3781,
591
+ "step": 830
592
+ },
593
+ {
594
+ "epoch": 0.13305876762236654,
595
+ "grad_norm": 0.7052398324012756,
596
+ "learning_rate": 9.04850182885617e-05,
597
+ "loss": 1.378,
598
+ "step": 840
599
+ },
600
+ {
601
+ "epoch": 0.13464280057025185,
602
+ "grad_norm": 0.7111234664916992,
603
+ "learning_rate": 9.014087375955573e-05,
604
+ "loss": 1.4304,
605
+ "step": 850
606
+ },
607
+ {
608
+ "epoch": 0.13622683351813716,
609
+ "grad_norm": 0.620119571685791,
610
+ "learning_rate": 8.979129479879873e-05,
611
+ "loss": 1.3285,
612
+ "step": 860
613
+ },
614
+ {
615
+ "epoch": 0.1378108664660225,
616
+ "grad_norm": 0.7514825463294983,
617
+ "learning_rate": 8.943632873368611e-05,
618
+ "loss": 1.3782,
619
+ "step": 870
620
+ },
621
+ {
622
+ "epoch": 0.1393948994139078,
623
+ "grad_norm": 0.6254695057868958,
624
+ "learning_rate": 8.907602362094094e-05,
625
+ "loss": 1.4062,
626
+ "step": 880
627
+ },
628
+ {
629
+ "epoch": 0.14097893236179312,
630
+ "grad_norm": 0.6469830870628357,
631
+ "learning_rate": 8.871042824010791e-05,
632
+ "loss": 1.2769,
633
+ "step": 890
634
+ },
635
+ {
636
+ "epoch": 0.14256296530967844,
637
+ "grad_norm": 0.7647883296012878,
638
+ "learning_rate": 8.833959208694929e-05,
639
+ "loss": 1.3646,
640
+ "step": 900
641
  }
642
  ],
643
  "logging_steps": 10,
 
657
  "attributes": {}
658
  }
659
  },
660
+ "total_flos": 2.96342280732672e+17,
661
  "train_batch_size": 16,
662
  "trial_name": null,
663
  "trial_params": null