robertou2 commited on
Commit
09af9dc
·
verified ·
1 Parent(s): 51ba8c3

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6f665aea08d472cc3f5d0f6e71d5e883bc0279c9d062195f885f34c2bb67b48
3
  size 957942768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f67fb93d73ef689e29fced3646888b9acac71ded7ce8bdc2e47a329b3d916111
3
  size 957942768
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a2f762093b999ed3fc8a8354947768bf9687041ad4dc1b3381d3beccc4d1ed3
3
  size 1916174411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74d8c8563d3fd92da4fa183b9c5a3bef0b8fabc91f3062232d31df923404a061
3
  size 1916174411
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2534e434cd5abbb8f7668d3eab0549db0ef95d6a797a3efa86b712e8e32266a7
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5b517d1b8e2b0f837c8b00170b154961d4d989feba4326ac25583df7a55c57a
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b81e6637930ee4c266e39d1098c46fbcb467cca865ee0c4abd8c44486bdac38
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c4e44404b58ce3af1b46c3d4a85a59edbbc386f340c476e894715a1199e1aed
3
  size 1465
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 3.0,
6
  "eval_steps": 500,
7
- "global_step": 60,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -428,6 +428,286 @@
428
  "learning_rate": 2.361471312710075e-05,
429
  "loss": 0.5706,
430
  "step": 60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
  }
432
  ],
433
  "logging_steps": 1,
@@ -442,12 +722,12 @@
442
  "should_evaluate": false,
443
  "should_log": false,
444
  "should_save": true,
445
- "should_training_stop": false
446
  },
447
  "attributes": {}
448
  }
449
  },
450
- "total_flos": 1.837409766703104e+16,
451
  "train_batch_size": 2,
452
  "trial_name": null,
453
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 5.0,
6
  "eval_steps": 500,
7
+ "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
428
  "learning_rate": 2.361471312710075e-05,
429
  "loss": 0.5706,
430
  "step": 60
431
+ },
432
+ {
433
+ "epoch": 3.050632911392405,
434
+ "grad_norm": 5.2546257972717285,
435
+ "learning_rate": 2.2693291013417453e-05,
436
+ "loss": 0.6983,
437
+ "step": 61
438
+ },
439
+ {
440
+ "epoch": 3.1012658227848102,
441
+ "grad_norm": 5.815437316894531,
442
+ "learning_rate": 2.1775019586744923e-05,
443
+ "loss": 0.9768,
444
+ "step": 62
445
+ },
446
+ {
447
+ "epoch": 3.151898734177215,
448
+ "grad_norm": 5.194660186767578,
449
+ "learning_rate": 2.0861153095396748e-05,
450
+ "loss": 0.6243,
451
+ "step": 63
452
+ },
453
+ {
454
+ "epoch": 3.2025316455696204,
455
+ "grad_norm": 4.012391567230225,
456
+ "learning_rate": 1.995293977107475e-05,
457
+ "loss": 0.469,
458
+ "step": 64
459
+ },
460
+ {
461
+ "epoch": 3.2531645569620253,
462
+ "grad_norm": 5.675468444824219,
463
+ "learning_rate": 1.9051620123934537e-05,
464
+ "loss": 0.6084,
465
+ "step": 65
466
+ },
467
+ {
468
+ "epoch": 3.3037974683544302,
469
+ "grad_norm": 5.8908209800720215,
470
+ "learning_rate": 1.815842524819793e-05,
471
+ "loss": 0.648,
472
+ "step": 66
473
+ },
474
+ {
475
+ "epoch": 3.3544303797468356,
476
+ "grad_norm": 7.725429534912109,
477
+ "learning_rate": 1.7274575140626318e-05,
478
+ "loss": 0.6949,
479
+ "step": 67
480
+ },
481
+ {
482
+ "epoch": 3.4050632911392404,
483
+ "grad_norm": 6.168173313140869,
484
+ "learning_rate": 1.6401277034151798e-05,
485
+ "loss": 0.9213,
486
+ "step": 68
487
+ },
488
+ {
489
+ "epoch": 3.4556962025316453,
490
+ "grad_norm": 6.947693347930908,
491
+ "learning_rate": 1.5539723748942245e-05,
492
+ "loss": 0.7397,
493
+ "step": 69
494
+ },
495
+ {
496
+ "epoch": 3.5063291139240507,
497
+ "grad_norm": 5.9794206619262695,
498
+ "learning_rate": 1.4691092063152417e-05,
499
+ "loss": 0.5009,
500
+ "step": 70
501
+ },
502
+ {
503
+ "epoch": 3.5569620253164556,
504
+ "grad_norm": 5.66774320602417,
505
+ "learning_rate": 1.3856541105586545e-05,
506
+ "loss": 0.5204,
507
+ "step": 71
508
+ },
509
+ {
510
+ "epoch": 3.607594936708861,
511
+ "grad_norm": 8.234807014465332,
512
+ "learning_rate": 1.303721077246784e-05,
513
+ "loss": 0.8793,
514
+ "step": 72
515
+ },
516
+ {
517
+ "epoch": 3.6582278481012658,
518
+ "grad_norm": 8.785400390625,
519
+ "learning_rate": 1.223422017047733e-05,
520
+ "loss": 0.6229,
521
+ "step": 73
522
+ },
523
+ {
524
+ "epoch": 3.708860759493671,
525
+ "grad_norm": 6.376526832580566,
526
+ "learning_rate": 1.1448666088188764e-05,
527
+ "loss": 0.6154,
528
+ "step": 74
529
+ },
530
+ {
531
+ "epoch": 3.759493670886076,
532
+ "grad_norm": 7.004448413848877,
533
+ "learning_rate": 1.068162149798737e-05,
534
+ "loss": 0.7203,
535
+ "step": 75
536
+ },
537
+ {
538
+ "epoch": 3.810126582278481,
539
+ "grad_norm": 5.858279705047607,
540
+ "learning_rate": 9.934134090518593e-06,
541
+ "loss": 0.5153,
542
+ "step": 76
543
+ },
544
+ {
545
+ "epoch": 3.8607594936708862,
546
+ "grad_norm": 7.578220844268799,
547
+ "learning_rate": 9.207224843668732e-06,
548
+ "loss": 0.7153,
549
+ "step": 77
550
+ },
551
+ {
552
+ "epoch": 3.911392405063291,
553
+ "grad_norm": 7.869601249694824,
554
+ "learning_rate": 8.50188662803194e-06,
555
+ "loss": 0.6988,
556
+ "step": 78
557
+ },
558
+ {
559
+ "epoch": 3.962025316455696,
560
+ "grad_norm": 6.777385234832764,
561
+ "learning_rate": 7.819082850768434e-06,
562
+ "loss": 0.6007,
563
+ "step": 79
564
+ },
565
+ {
566
+ "epoch": 4.0,
567
+ "grad_norm": 6.161752223968506,
568
+ "learning_rate": 7.159746139706194e-06,
569
+ "loss": 0.4779,
570
+ "step": 80
571
+ },
572
+ {
573
+ "epoch": 4.050632911392405,
574
+ "grad_norm": 5.206139087677002,
575
+ "learning_rate": 6.524777069483526e-06,
576
+ "loss": 0.4173,
577
+ "step": 81
578
+ },
579
+ {
580
+ "epoch": 4.10126582278481,
581
+ "grad_norm": 4.832441329956055,
582
+ "learning_rate": 5.915042931472425e-06,
583
+ "loss": 0.4491,
584
+ "step": 82
585
+ },
586
+ {
587
+ "epoch": 4.151898734177215,
588
+ "grad_norm": 4.783233165740967,
589
+ "learning_rate": 5.33137654916292e-06,
590
+ "loss": 0.3311,
591
+ "step": 83
592
+ },
593
+ {
594
+ "epoch": 4.2025316455696204,
595
+ "grad_norm": 3.099482536315918,
596
+ "learning_rate": 4.7745751406263165e-06,
597
+ "loss": 0.2116,
598
+ "step": 84
599
+ },
600
+ {
601
+ "epoch": 4.253164556962025,
602
+ "grad_norm": 5.326932907104492,
603
+ "learning_rate": 4.245399229611238e-06,
604
+ "loss": 0.3897,
605
+ "step": 85
606
+ },
607
+ {
608
+ "epoch": 4.30379746835443,
609
+ "grad_norm": 4.431222915649414,
610
+ "learning_rate": 3.7445716067596503e-06,
611
+ "loss": 0.4973,
612
+ "step": 86
613
+ },
614
+ {
615
+ "epoch": 4.3544303797468356,
616
+ "grad_norm": 4.217422008514404,
617
+ "learning_rate": 3.2727763423617913e-06,
618
+ "loss": 0.182,
619
+ "step": 87
620
+ },
621
+ {
622
+ "epoch": 4.405063291139241,
623
+ "grad_norm": 5.346303462982178,
624
+ "learning_rate": 2.8306578519984527e-06,
625
+ "loss": 0.5239,
626
+ "step": 88
627
+ },
628
+ {
629
+ "epoch": 4.455696202531645,
630
+ "grad_norm": 8.100042343139648,
631
+ "learning_rate": 2.418820016346779e-06,
632
+ "loss": 0.2284,
633
+ "step": 89
634
+ },
635
+ {
636
+ "epoch": 4.506329113924051,
637
+ "grad_norm": 4.507992267608643,
638
+ "learning_rate": 2.0378253563519247e-06,
639
+ "loss": 0.3344,
640
+ "step": 90
641
+ },
642
+ {
643
+ "epoch": 4.556962025316456,
644
+ "grad_norm": 4.841477394104004,
645
+ "learning_rate": 1.6881942648911076e-06,
646
+ "loss": 0.2872,
647
+ "step": 91
648
+ },
649
+ {
650
+ "epoch": 4.6075949367088604,
651
+ "grad_norm": 4.839809417724609,
652
+ "learning_rate": 1.3704042959795132e-06,
653
+ "loss": 0.5436,
654
+ "step": 92
655
+ },
656
+ {
657
+ "epoch": 4.658227848101266,
658
+ "grad_norm": 3.7410666942596436,
659
+ "learning_rate": 1.0848895124889818e-06,
660
+ "loss": 0.3488,
661
+ "step": 93
662
+ },
663
+ {
664
+ "epoch": 4.708860759493671,
665
+ "grad_norm": 5.837460041046143,
666
+ "learning_rate": 8.320398932703144e-07,
667
+ "loss": 0.458,
668
+ "step": 94
669
+ },
670
+ {
671
+ "epoch": 4.759493670886076,
672
+ "grad_norm": 5.102079391479492,
673
+ "learning_rate": 6.122008004890851e-07,
674
+ "loss": 0.2965,
675
+ "step": 95
676
+ },
677
+ {
678
+ "epoch": 4.810126582278481,
679
+ "grad_norm": 4.543964385986328,
680
+ "learning_rate": 4.256725079024554e-07,
681
+ "loss": 0.2352,
682
+ "step": 96
683
+ },
684
+ {
685
+ "epoch": 4.860759493670886,
686
+ "grad_norm": 4.671619415283203,
687
+ "learning_rate": 2.7270979072135104e-07,
688
+ "loss": 0.3958,
689
+ "step": 97
690
+ },
691
+ {
692
+ "epoch": 4.911392405063291,
693
+ "grad_norm": 5.004724979400635,
694
+ "learning_rate": 1.5352157761815977e-07,
695
+ "loss": 0.3075,
696
+ "step": 98
697
+ },
698
+ {
699
+ "epoch": 4.962025316455696,
700
+ "grad_norm": 4.00545597076416,
701
+ "learning_rate": 6.827066535529946e-08,
702
+ "loss": 0.3152,
703
+ "step": 99
704
+ },
705
+ {
706
+ "epoch": 5.0,
707
+ "grad_norm": 5.139050483703613,
708
+ "learning_rate": 1.7073496424427348e-08,
709
+ "loss": 0.1744,
710
+ "step": 100
711
  }
712
  ],
713
  "logging_steps": 1,
 
722
  "should_evaluate": false,
723
  "should_log": false,
724
  "should_save": true,
725
+ "should_training_stop": true
726
  },
727
  "attributes": {}
728
  }
729
  },
730
+ "total_flos": 3.0794717131554816e+16,
731
  "train_batch_size": 2,
732
  "trial_name": null,
733
  "trial_params": null