xzuyn commited on
Commit
6809a40
·
verified ·
1 Parent(s): 38a1331

Upload Step 100/22483

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_state.json +471 -3
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d45b5952e88114a6552c77c415d93bccbdb91c0d5b4e0002e1cd41c4383b7f82
3
  size 1047628488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eef2a76f8c9c355a2831ca4dce9743f1165d7eec991513bcb577ef444d71f7a6
3
  size 1047628488
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0017791220032913758,
6
  "eval_steps": 10,
7
- "global_step": 40,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -328,6 +328,474 @@
328
  "eval_samples_per_second": 0.273,
329
  "eval_steps_per_second": 0.068,
330
  "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
  }
332
  ],
333
  "logging_steps": 1,
@@ -347,7 +815,7 @@
347
  "attributes": {}
348
  }
349
  },
350
- "total_flos": 8.994902180364288e+16,
351
  "train_batch_size": 4,
352
  "trial_name": null,
353
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.00444780500822844,
6
  "eval_steps": 10,
7
+ "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
328
  "eval_samples_per_second": 0.273,
329
  "eval_steps_per_second": 0.068,
330
  "step": 40
331
+ },
332
+ {
333
+ "epoch": 0.0018236000533736602,
334
+ "grad_norm": 0.45847609639167786,
335
+ "learning_rate": 9.998264731957889e-07,
336
+ "loss": 1.8686,
337
+ "step": 41
338
+ },
339
+ {
340
+ "epoch": 0.0018680781034559446,
341
+ "grad_norm": 0.33932119607925415,
342
+ "learning_rate": 9.998222337069841e-07,
343
+ "loss": 2.2302,
344
+ "step": 42
345
+ },
346
+ {
347
+ "epoch": 0.001912556153538229,
348
+ "grad_norm": 1.3315902948379517,
349
+ "learning_rate": 9.998179938781784e-07,
350
+ "loss": 1.8721,
351
+ "step": 43
352
+ },
353
+ {
354
+ "epoch": 0.001957034203620513,
355
+ "grad_norm": 1.2163060903549194,
356
+ "learning_rate": 9.99813753709331e-07,
357
+ "loss": 2.1642,
358
+ "step": 44
359
+ },
360
+ {
361
+ "epoch": 0.0020015122537027975,
362
+ "grad_norm": 0.4775317907333374,
363
+ "learning_rate": 9.99809513200401e-07,
364
+ "loss": 2.4106,
365
+ "step": 45
366
+ },
367
+ {
368
+ "epoch": 0.002045990303785082,
369
+ "grad_norm": 0.28207871317863464,
370
+ "learning_rate": 9.998052723513476e-07,
371
+ "loss": 2.058,
372
+ "step": 46
373
+ },
374
+ {
375
+ "epoch": 0.0020904683538673662,
376
+ "grad_norm": 0.5639588236808777,
377
+ "learning_rate": 9.998010311621295e-07,
378
+ "loss": 2.2522,
379
+ "step": 47
380
+ },
381
+ {
382
+ "epoch": 0.002134946403949651,
383
+ "grad_norm": 0.45129290223121643,
384
+ "learning_rate": 9.997967896327061e-07,
385
+ "loss": 2.3281,
386
+ "step": 48
387
+ },
388
+ {
389
+ "epoch": 0.0021794244540319354,
390
+ "grad_norm": 0.4590243101119995,
391
+ "learning_rate": 9.997925477630364e-07,
392
+ "loss": 2.1991,
393
+ "step": 49
394
+ },
395
+ {
396
+ "epoch": 0.00222390250411422,
397
+ "grad_norm": 0.3881557881832123,
398
+ "learning_rate": 9.997883055530797e-07,
399
+ "loss": 2.2259,
400
+ "step": 50
401
+ },
402
+ {
403
+ "epoch": 0.00222390250411422,
404
+ "eval_loss": 1.9868642091751099,
405
+ "eval_runtime": 233.6732,
406
+ "eval_samples_per_second": 0.274,
407
+ "eval_steps_per_second": 0.068,
408
+ "step": 50
409
+ },
410
+ {
411
+ "epoch": 0.002268380554196504,
412
+ "grad_norm": 0.4352044463157654,
413
+ "learning_rate": 9.997840630027944e-07,
414
+ "loss": 2.2623,
415
+ "step": 51
416
+ },
417
+ {
418
+ "epoch": 0.0023128586042787886,
419
+ "grad_norm": 0.43397316336631775,
420
+ "learning_rate": 9.997798201121402e-07,
421
+ "loss": 1.8368,
422
+ "step": 52
423
+ },
424
+ {
425
+ "epoch": 0.002357336654361073,
426
+ "grad_norm": 0.38807961344718933,
427
+ "learning_rate": 9.99775576881076e-07,
428
+ "loss": 2.0101,
429
+ "step": 53
430
+ },
431
+ {
432
+ "epoch": 0.0024018147044433573,
433
+ "grad_norm": 0.6237647533416748,
434
+ "learning_rate": 9.997713333095603e-07,
435
+ "loss": 2.0238,
436
+ "step": 54
437
+ },
438
+ {
439
+ "epoch": 0.0024462927545256417,
440
+ "grad_norm": 0.34039369225502014,
441
+ "learning_rate": 9.997670893975529e-07,
442
+ "loss": 2.1,
443
+ "step": 55
444
+ },
445
+ {
446
+ "epoch": 0.002490770804607926,
447
+ "grad_norm": 0.4221118986606598,
448
+ "learning_rate": 9.997628451450122e-07,
449
+ "loss": 2.1649,
450
+ "step": 56
451
+ },
452
+ {
453
+ "epoch": 0.0025352488546902105,
454
+ "grad_norm": 0.38328638672828674,
455
+ "learning_rate": 9.997586005518976e-07,
456
+ "loss": 2.1151,
457
+ "step": 57
458
+ },
459
+ {
460
+ "epoch": 0.002579726904772495,
461
+ "grad_norm": 0.3308090567588806,
462
+ "learning_rate": 9.997543556181679e-07,
463
+ "loss": 2.1499,
464
+ "step": 58
465
+ },
466
+ {
467
+ "epoch": 0.0026242049548547792,
468
+ "grad_norm": 0.3598516881465912,
469
+ "learning_rate": 9.99750110343782e-07,
470
+ "loss": 2.1146,
471
+ "step": 59
472
+ },
473
+ {
474
+ "epoch": 0.0026686830049370636,
475
+ "grad_norm": 0.38582974672317505,
476
+ "learning_rate": 9.997458647286993e-07,
477
+ "loss": 1.8236,
478
+ "step": 60
479
+ },
480
+ {
481
+ "epoch": 0.0026686830049370636,
482
+ "eval_loss": 1.9794503450393677,
483
+ "eval_runtime": 312.5608,
484
+ "eval_samples_per_second": 0.205,
485
+ "eval_steps_per_second": 0.051,
486
+ "step": 60
487
+ },
488
+ {
489
+ "epoch": 0.002713161055019348,
490
+ "grad_norm": 0.5031485557556152,
491
+ "learning_rate": 9.997416187728787e-07,
492
+ "loss": 2.3825,
493
+ "step": 61
494
+ },
495
+ {
496
+ "epoch": 0.0027576391051016324,
497
+ "grad_norm": 0.40436115860939026,
498
+ "learning_rate": 9.997373724762788e-07,
499
+ "loss": 1.9051,
500
+ "step": 62
501
+ },
502
+ {
503
+ "epoch": 0.0028021171551839167,
504
+ "grad_norm": 0.3216610252857208,
505
+ "learning_rate": 9.997331258388588e-07,
506
+ "loss": 1.7655,
507
+ "step": 63
508
+ },
509
+ {
510
+ "epoch": 0.002846595205266201,
511
+ "grad_norm": 0.30226317048072815,
512
+ "learning_rate": 9.997288788605777e-07,
513
+ "loss": 2.215,
514
+ "step": 64
515
+ },
516
+ {
517
+ "epoch": 0.0028910732553484855,
518
+ "grad_norm": 0.34857413172721863,
519
+ "learning_rate": 9.997246315413945e-07,
520
+ "loss": 2.1704,
521
+ "step": 65
522
+ },
523
+ {
524
+ "epoch": 0.00293555130543077,
525
+ "grad_norm": 0.4939625859260559,
526
+ "learning_rate": 9.99720383881268e-07,
527
+ "loss": 2.1591,
528
+ "step": 66
529
+ },
530
+ {
531
+ "epoch": 0.0029800293555130542,
532
+ "grad_norm": 0.6396478414535522,
533
+ "learning_rate": 9.997161358801571e-07,
534
+ "loss": 2.4183,
535
+ "step": 67
536
+ },
537
+ {
538
+ "epoch": 0.0030245074055953386,
539
+ "grad_norm": 0.3547438681125641,
540
+ "learning_rate": 9.99711887538021e-07,
541
+ "loss": 2.3338,
542
+ "step": 68
543
+ },
544
+ {
545
+ "epoch": 0.003068985455677623,
546
+ "grad_norm": 0.455522745847702,
547
+ "learning_rate": 9.997076388548186e-07,
548
+ "loss": 2.2559,
549
+ "step": 69
550
+ },
551
+ {
552
+ "epoch": 0.0031134635057599074,
553
+ "grad_norm": 0.5139729976654053,
554
+ "learning_rate": 9.997033898305084e-07,
555
+ "loss": 2.4271,
556
+ "step": 70
557
+ },
558
+ {
559
+ "epoch": 0.0031134635057599074,
560
+ "eval_loss": 1.974100947380066,
561
+ "eval_runtime": 231.1208,
562
+ "eval_samples_per_second": 0.277,
563
+ "eval_steps_per_second": 0.069,
564
+ "step": 70
565
+ },
566
+ {
567
+ "epoch": 0.0031579415558421918,
568
+ "grad_norm": 0.3868389427661896,
569
+ "learning_rate": 9.996991404650499e-07,
570
+ "loss": 1.8754,
571
+ "step": 71
572
+ },
573
+ {
574
+ "epoch": 0.003202419605924476,
575
+ "grad_norm": 0.4664241373538971,
576
+ "learning_rate": 9.996948907584016e-07,
577
+ "loss": 1.9934,
578
+ "step": 72
579
+ },
580
+ {
581
+ "epoch": 0.0032468976560067605,
582
+ "grad_norm": 0.3952767848968506,
583
+ "learning_rate": 9.996906407105226e-07,
584
+ "loss": 2.0883,
585
+ "step": 73
586
+ },
587
+ {
588
+ "epoch": 0.003291375706089045,
589
+ "grad_norm": 0.4785691797733307,
590
+ "learning_rate": 9.996863903213718e-07,
591
+ "loss": 2.3203,
592
+ "step": 74
593
+ },
594
+ {
595
+ "epoch": 0.0033358537561713293,
596
+ "grad_norm": 0.4103385806083679,
597
+ "learning_rate": 9.99682139590908e-07,
598
+ "loss": 2.0406,
599
+ "step": 75
600
+ },
601
+ {
602
+ "epoch": 0.0033803318062536137,
603
+ "grad_norm": 0.45813262462615967,
604
+ "learning_rate": 9.996778885190904e-07,
605
+ "loss": 2.1745,
606
+ "step": 76
607
+ },
608
+ {
609
+ "epoch": 0.003424809856335898,
610
+ "grad_norm": 0.34197866916656494,
611
+ "learning_rate": 9.996736371058771e-07,
612
+ "loss": 2.0839,
613
+ "step": 77
614
+ },
615
+ {
616
+ "epoch": 0.0034692879064181824,
617
+ "grad_norm": 0.4917107820510864,
618
+ "learning_rate": 9.996693853512279e-07,
619
+ "loss": 1.9646,
620
+ "step": 78
621
+ },
622
+ {
623
+ "epoch": 0.0035137659565004672,
624
+ "grad_norm": 0.570755124092102,
625
+ "learning_rate": 9.99665133255101e-07,
626
+ "loss": 2.4128,
627
+ "step": 79
628
+ },
629
+ {
630
+ "epoch": 0.0035582440065827516,
631
+ "grad_norm": 0.6334550380706787,
632
+ "learning_rate": 9.996608808174557e-07,
633
+ "loss": 2.3972,
634
+ "step": 80
635
+ },
636
+ {
637
+ "epoch": 0.0035582440065827516,
638
+ "eval_loss": 1.9703996181488037,
639
+ "eval_runtime": 236.153,
640
+ "eval_samples_per_second": 0.271,
641
+ "eval_steps_per_second": 0.068,
642
+ "step": 80
643
+ },
644
+ {
645
+ "epoch": 0.003602722056665036,
646
+ "grad_norm": 0.44049328565597534,
647
+ "learning_rate": 9.996566280382507e-07,
648
+ "loss": 2.389,
649
+ "step": 81
650
+ },
651
+ {
652
+ "epoch": 0.0036472001067473204,
653
+ "grad_norm": 0.5198694467544556,
654
+ "learning_rate": 9.996523749174444e-07,
655
+ "loss": 1.8092,
656
+ "step": 82
657
+ },
658
+ {
659
+ "epoch": 0.0036916781568296047,
660
+ "grad_norm": 0.4297351837158203,
661
+ "learning_rate": 9.996481214549966e-07,
662
+ "loss": 1.9158,
663
+ "step": 83
664
+ },
665
+ {
666
+ "epoch": 0.003736156206911889,
667
+ "grad_norm": 0.5207564234733582,
668
+ "learning_rate": 9.996438676508653e-07,
669
+ "loss": 2.3368,
670
+ "step": 84
671
+ },
672
+ {
673
+ "epoch": 0.0037806342569941735,
674
+ "grad_norm": 3.4639275074005127,
675
+ "learning_rate": 9.996396135050097e-07,
676
+ "loss": 2.0157,
677
+ "step": 85
678
+ },
679
+ {
680
+ "epoch": 0.003825112307076458,
681
+ "grad_norm": 0.5132240056991577,
682
+ "learning_rate": 9.996353590173885e-07,
683
+ "loss": 2.1738,
684
+ "step": 86
685
+ },
686
+ {
687
+ "epoch": 0.0038695903571587423,
688
+ "grad_norm": 0.559355616569519,
689
+ "learning_rate": 9.996311041879605e-07,
690
+ "loss": 2.2668,
691
+ "step": 87
692
+ },
693
+ {
694
+ "epoch": 0.003914068407241026,
695
+ "grad_norm": 0.40078872442245483,
696
+ "learning_rate": 9.996268490166847e-07,
697
+ "loss": 2.0339,
698
+ "step": 88
699
+ },
700
+ {
701
+ "epoch": 0.003958546457323311,
702
+ "grad_norm": 0.43362948298454285,
703
+ "learning_rate": 9.996225935035196e-07,
704
+ "loss": 2.3476,
705
+ "step": 89
706
+ },
707
+ {
708
+ "epoch": 0.004003024507405595,
709
+ "grad_norm": 0.6087605953216553,
710
+ "learning_rate": 9.99618337648424e-07,
711
+ "loss": 2.3268,
712
+ "step": 90
713
+ },
714
+ {
715
+ "epoch": 0.004003024507405595,
716
+ "eval_loss": 1.9674957990646362,
717
+ "eval_runtime": 241.976,
718
+ "eval_samples_per_second": 0.264,
719
+ "eval_steps_per_second": 0.066,
720
+ "step": 90
721
+ },
722
+ {
723
+ "epoch": 0.004047502557487879,
724
+ "grad_norm": 3.6922800540924072,
725
+ "learning_rate": 9.996140814513573e-07,
726
+ "loss": 2.2244,
727
+ "step": 91
728
+ },
729
+ {
730
+ "epoch": 0.004091980607570164,
731
+ "grad_norm": 0.6901090145111084,
732
+ "learning_rate": 9.996098249122776e-07,
733
+ "loss": 2.127,
734
+ "step": 92
735
+ },
736
+ {
737
+ "epoch": 0.004136458657652448,
738
+ "grad_norm": 0.42567893862724304,
739
+ "learning_rate": 9.99605568031144e-07,
740
+ "loss": 1.99,
741
+ "step": 93
742
+ },
743
+ {
744
+ "epoch": 0.0041809367077347325,
745
+ "grad_norm": 0.37529805302619934,
746
+ "learning_rate": 9.996013108079149e-07,
747
+ "loss": 2.1369,
748
+ "step": 94
749
+ },
750
+ {
751
+ "epoch": 0.004225414757817018,
752
+ "grad_norm": 0.5616635084152222,
753
+ "learning_rate": 9.995970532425493e-07,
754
+ "loss": 1.8421,
755
+ "step": 95
756
+ },
757
+ {
758
+ "epoch": 0.004269892807899302,
759
+ "grad_norm": 0.3917858898639679,
760
+ "learning_rate": 9.995927953350061e-07,
761
+ "loss": 2.0905,
762
+ "step": 96
763
+ },
764
+ {
765
+ "epoch": 0.0043143708579815865,
766
+ "grad_norm": 0.3693113923072815,
767
+ "learning_rate": 9.99588537085244e-07,
768
+ "loss": 2.326,
769
+ "step": 97
770
+ },
771
+ {
772
+ "epoch": 0.004358848908063871,
773
+ "grad_norm": 0.4595705270767212,
774
+ "learning_rate": 9.995842784932216e-07,
775
+ "loss": 1.8433,
776
+ "step": 98
777
+ },
778
+ {
779
+ "epoch": 0.004403326958146155,
780
+ "grad_norm": 0.46681633591651917,
781
+ "learning_rate": 9.995800195588977e-07,
782
+ "loss": 2.21,
783
+ "step": 99
784
+ },
785
+ {
786
+ "epoch": 0.00444780500822844,
787
+ "grad_norm": 0.5430231690406799,
788
+ "learning_rate": 9.99575760282231e-07,
789
+ "loss": 2.4849,
790
+ "step": 100
791
+ },
792
+ {
793
+ "epoch": 0.00444780500822844,
794
+ "eval_loss": 1.964964509010315,
795
+ "eval_runtime": 231.4209,
796
+ "eval_samples_per_second": 0.277,
797
+ "eval_steps_per_second": 0.069,
798
+ "step": 100
799
  }
800
  ],
801
  "logging_steps": 1,
 
815
  "attributes": {}
816
  }
817
  },
818
+ "total_flos": 2.248725545091072e+17,
819
  "train_batch_size": 4,
820
  "trial_name": null,
821
  "trial_params": null