qingy2024 commited on
Commit
9b0bbc3
·
verified ·
1 Parent(s): 74b9597

Upload checkpoint 600

Browse files
Files changed (6) hide show
  1. README.md +5 -28
  2. adapter_config.json +1 -1
  3. loss.png +2 -2
  4. optimizer.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +703 -3
README.md CHANGED
@@ -2,7 +2,7 @@
2
  base_model: Qwen/Qwen2.5-7B-Instruct
3
  library_name: peft
4
  ---
5
- # Gradience T1 7B (Step 500 Checkpoint)
6
 
7
  > [!NOTE]
8
  > Training in progress...
@@ -11,38 +11,15 @@ library_name: peft
11
  <html lang="en">
12
  <head>
13
  <meta charset="UTF-8">
14
- <title>Progress Bar Example</title>
15
- <style>
16
- .progress-container {
17
- width: 100%;
18
- background-color: #e0e0e0;
19
- border-radius: 25px;
20
- overflow: hidden;
21
- margin: 20px 0;
22
- }
23
- .progress-bar {
24
- height: 30px;
25
- width: 0;
26
- background-color: #44965a;
27
- text-align: center;
28
- line-height: 30px;
29
- color: white;
30
- border-radius: 25px 0 0 25px;
31
- }
32
- .progress-text {
33
- margin-top: 10px;
34
- font-size: 16px;
35
- font-family: Arial, sans-serif;
36
- }
37
- </style>
38
  </head>
39
  <body>
40
  <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
41
- <div style="height: 30px; width: 10.17%; background-color: #76c7c0; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
42
- <!-- 10.17% -->
43
  </div>
44
  </div>
45
- <p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 500 out of 4918 steps</p>
46
  </body>
47
  </html>
48
 
 
2
  base_model: Qwen/Qwen2.5-7B-Instruct
3
  library_name: peft
4
  ---
5
+ # Gradience T1 7B (Step 600 Checkpoint)
6
 
7
  > [!NOTE]
8
  > Training in progress...
 
11
  <html lang="en">
12
  <head>
13
  <meta charset="UTF-8">
14
+ <title>Progress Bar</title>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  </head>
16
  <body>
17
  <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
18
+ <div style="height: 30px; width: 12.20%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
19
+ <!-- 12.20% -->
20
  </div>
21
  </div>
22
+ <p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 600 out of 4918 steps</p>
23
  </body>
24
  </html>
25
 
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "./Qwen-2.5-7B-Instruct",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
loss.png CHANGED

Git LFS Details

  • SHA256: 37445386f1b6c39db8d196daef02425fcaf1b834fccea3e0507b3c9f5c7ccba1
  • Pointer size: 131 Bytes
  • Size of remote file: 172 kB

Git LFS Details

  • SHA256: 696ed8c45d9aa3c00fcd86909bdb3ff567e3bd10bee05a6c658a2d08c670ea7a
  • Pointer size: 131 Bytes
  • Size of remote file: 172 kB
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93e2360a952b23953330ac855c3123cb594108db5b2578b0a23ede55ab321afa
3
  size 82461044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26571ef1b19c681cb39c0a2f1990922bf6b675e94079e41f21dd9577fef323d1
3
  size 82461044
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fe90944217b87f2db5382971d3c067633cfbdffc5e253607df747929a34a722
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cad174af49d220e06a73da64e877b11980706b182f379142714c6fa5747b447
3
  size 1064
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.20329335230737955,
6
  "eval_steps": 500,
7
- "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3508,6 +3508,706 @@
3508
  "learning_rate": 0.00017989008752289845,
3509
  "loss": 1.085,
3510
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3511
  }
3512
  ],
3513
  "logging_steps": 1,
@@ -3527,7 +4227,7 @@
3527
  "attributes": {}
3528
  }
3529
  },
3530
- "total_flos": 1.5824065174102671e+18,
3531
  "train_batch_size": 16,
3532
  "trial_name": null,
3533
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.24395202276885547,
6
  "eval_steps": 500,
7
+ "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3508
  "learning_rate": 0.00017989008752289845,
3509
  "loss": 1.085,
3510
  "step": 500
3511
+ },
3512
+ {
3513
+ "epoch": 0.20369993901199432,
3514
+ "grad_norm": 0.08499190211296082,
3515
+ "learning_rate": 0.000179849379198046,
3516
+ "loss": 0.9235,
3517
+ "step": 501
3518
+ },
3519
+ {
3520
+ "epoch": 0.20410652571660906,
3521
+ "grad_norm": 0.09169955551624298,
3522
+ "learning_rate": 0.00017980867087319358,
3523
+ "loss": 0.9836,
3524
+ "step": 502
3525
+ },
3526
+ {
3527
+ "epoch": 0.20451311242122383,
3528
+ "grad_norm": 0.10331466048955917,
3529
+ "learning_rate": 0.00017976796254834114,
3530
+ "loss": 1.0255,
3531
+ "step": 503
3532
+ },
3533
+ {
3534
+ "epoch": 0.20491969912583857,
3535
+ "grad_norm": 0.0900363028049469,
3536
+ "learning_rate": 0.00017972725422348872,
3537
+ "loss": 0.9691,
3538
+ "step": 504
3539
+ },
3540
+ {
3541
+ "epoch": 0.20532628583045334,
3542
+ "grad_norm": 0.10095544904470444,
3543
+ "learning_rate": 0.00017968654589863627,
3544
+ "loss": 1.0289,
3545
+ "step": 505
3546
+ },
3547
+ {
3548
+ "epoch": 0.2057328725350681,
3549
+ "grad_norm": 0.0992627814412117,
3550
+ "learning_rate": 0.00017964583757378383,
3551
+ "loss": 0.9785,
3552
+ "step": 506
3553
+ },
3554
+ {
3555
+ "epoch": 0.20613945923968285,
3556
+ "grad_norm": 0.0954422652721405,
3557
+ "learning_rate": 0.00017960512924893144,
3558
+ "loss": 1.0105,
3559
+ "step": 507
3560
+ },
3561
+ {
3562
+ "epoch": 0.20654604594429762,
3563
+ "grad_norm": 0.0994410440325737,
3564
+ "learning_rate": 0.000179564420924079,
3565
+ "loss": 1.0894,
3566
+ "step": 508
3567
+ },
3568
+ {
3569
+ "epoch": 0.2069526326489124,
3570
+ "grad_norm": 0.08866444230079651,
3571
+ "learning_rate": 0.00017952371259922654,
3572
+ "loss": 0.9725,
3573
+ "step": 509
3574
+ },
3575
+ {
3576
+ "epoch": 0.20735921935352714,
3577
+ "grad_norm": 0.09361348301172256,
3578
+ "learning_rate": 0.00017948300427437412,
3579
+ "loss": 1.0441,
3580
+ "step": 510
3581
+ },
3582
+ {
3583
+ "epoch": 0.2077658060581419,
3584
+ "grad_norm": 0.08215323090553284,
3585
+ "learning_rate": 0.00017944229594952168,
3586
+ "loss": 0.9214,
3587
+ "step": 511
3588
+ },
3589
+ {
3590
+ "epoch": 0.20817239276275665,
3591
+ "grad_norm": 0.09752262383699417,
3592
+ "learning_rate": 0.00017940158762466926,
3593
+ "loss": 0.9456,
3594
+ "step": 512
3595
+ },
3596
+ {
3597
+ "epoch": 0.20857897946737142,
3598
+ "grad_norm": 0.10021419823169708,
3599
+ "learning_rate": 0.00017936087929981681,
3600
+ "loss": 1.1158,
3601
+ "step": 513
3602
+ },
3603
+ {
3604
+ "epoch": 0.2089855661719862,
3605
+ "grad_norm": 0.09550227969884872,
3606
+ "learning_rate": 0.0001793201709749644,
3607
+ "loss": 0.9789,
3608
+ "step": 514
3609
+ },
3610
+ {
3611
+ "epoch": 0.20939215287660093,
3612
+ "grad_norm": 0.09059977531433105,
3613
+ "learning_rate": 0.00017927946265011195,
3614
+ "loss": 0.9649,
3615
+ "step": 515
3616
+ },
3617
+ {
3618
+ "epoch": 0.2097987395812157,
3619
+ "grad_norm": 0.09227627515792847,
3620
+ "learning_rate": 0.00017923875432525953,
3621
+ "loss": 0.9779,
3622
+ "step": 516
3623
+ },
3624
+ {
3625
+ "epoch": 0.21020532628583044,
3626
+ "grad_norm": 0.09919798374176025,
3627
+ "learning_rate": 0.00017919804600040708,
3628
+ "loss": 1.0155,
3629
+ "step": 517
3630
+ },
3631
+ {
3632
+ "epoch": 0.2106119129904452,
3633
+ "grad_norm": 0.09044051915407181,
3634
+ "learning_rate": 0.00017915733767555464,
3635
+ "loss": 0.9428,
3636
+ "step": 518
3637
+ },
3638
+ {
3639
+ "epoch": 0.21101849969505998,
3640
+ "grad_norm": 0.09017504006624222,
3641
+ "learning_rate": 0.00017911662935070225,
3642
+ "loss": 0.9244,
3643
+ "step": 519
3644
+ },
3645
+ {
3646
+ "epoch": 0.21142508639967472,
3647
+ "grad_norm": 0.09257036447525024,
3648
+ "learning_rate": 0.0001790759210258498,
3649
+ "loss": 1.0168,
3650
+ "step": 520
3651
+ },
3652
+ {
3653
+ "epoch": 0.2118316731042895,
3654
+ "grad_norm": 0.0926235020160675,
3655
+ "learning_rate": 0.00017903521270099735,
3656
+ "loss": 0.9363,
3657
+ "step": 521
3658
+ },
3659
+ {
3660
+ "epoch": 0.21223825980890426,
3661
+ "grad_norm": 0.08785069733858109,
3662
+ "learning_rate": 0.00017899450437614494,
3663
+ "loss": 0.9428,
3664
+ "step": 522
3665
+ },
3666
+ {
3667
+ "epoch": 0.212644846513519,
3668
+ "grad_norm": 0.09824348986148834,
3669
+ "learning_rate": 0.0001789537960512925,
3670
+ "loss": 1.0378,
3671
+ "step": 523
3672
+ },
3673
+ {
3674
+ "epoch": 0.21305143321813377,
3675
+ "grad_norm": 0.0915142148733139,
3676
+ "learning_rate": 0.00017891308772644007,
3677
+ "loss": 0.9603,
3678
+ "step": 524
3679
+ },
3680
+ {
3681
+ "epoch": 0.21345801992274852,
3682
+ "grad_norm": 0.09466978907585144,
3683
+ "learning_rate": 0.00017887237940158763,
3684
+ "loss": 1.013,
3685
+ "step": 525
3686
+ },
3687
+ {
3688
+ "epoch": 0.21386460662736329,
3689
+ "grad_norm": 0.09305880963802338,
3690
+ "learning_rate": 0.0001788316710767352,
3691
+ "loss": 0.9386,
3692
+ "step": 526
3693
+ },
3694
+ {
3695
+ "epoch": 0.21427119333197805,
3696
+ "grad_norm": 0.09210691601037979,
3697
+ "learning_rate": 0.00017879096275188276,
3698
+ "loss": 0.9797,
3699
+ "step": 527
3700
+ },
3701
+ {
3702
+ "epoch": 0.2146777800365928,
3703
+ "grad_norm": 0.10415366291999817,
3704
+ "learning_rate": 0.00017875025442703031,
3705
+ "loss": 1.0125,
3706
+ "step": 528
3707
+ },
3708
+ {
3709
+ "epoch": 0.21508436674120757,
3710
+ "grad_norm": 0.10259640216827393,
3711
+ "learning_rate": 0.0001787095461021779,
3712
+ "loss": 1.0473,
3713
+ "step": 529
3714
+ },
3715
+ {
3716
+ "epoch": 0.2154909534458223,
3717
+ "grad_norm": 0.09523239731788635,
3718
+ "learning_rate": 0.00017866883777732548,
3719
+ "loss": 0.9603,
3720
+ "step": 530
3721
+ },
3722
+ {
3723
+ "epoch": 0.21589754015043708,
3724
+ "grad_norm": 0.10005185008049011,
3725
+ "learning_rate": 0.00017862812945247306,
3726
+ "loss": 1.0768,
3727
+ "step": 531
3728
+ },
3729
+ {
3730
+ "epoch": 0.21630412685505185,
3731
+ "grad_norm": 0.09643250703811646,
3732
+ "learning_rate": 0.0001785874211276206,
3733
+ "loss": 1.0799,
3734
+ "step": 532
3735
+ },
3736
+ {
3737
+ "epoch": 0.2167107135596666,
3738
+ "grad_norm": 0.09473159909248352,
3739
+ "learning_rate": 0.00017854671280276817,
3740
+ "loss": 1.0657,
3741
+ "step": 533
3742
+ },
3743
+ {
3744
+ "epoch": 0.21711730026428136,
3745
+ "grad_norm": 0.09550385922193527,
3746
+ "learning_rate": 0.00017850600447791575,
3747
+ "loss": 1.0389,
3748
+ "step": 534
3749
+ },
3750
+ {
3751
+ "epoch": 0.21752388696889613,
3752
+ "grad_norm": 0.09414463490247726,
3753
+ "learning_rate": 0.0001784652961530633,
3754
+ "loss": 1.0317,
3755
+ "step": 535
3756
+ },
3757
+ {
3758
+ "epoch": 0.21793047367351087,
3759
+ "grad_norm": 0.090250164270401,
3760
+ "learning_rate": 0.00017842458782821088,
3761
+ "loss": 1.0212,
3762
+ "step": 536
3763
+ },
3764
+ {
3765
+ "epoch": 0.21833706037812564,
3766
+ "grad_norm": 0.09635050594806671,
3767
+ "learning_rate": 0.00017838387950335844,
3768
+ "loss": 0.9473,
3769
+ "step": 537
3770
+ },
3771
+ {
3772
+ "epoch": 0.21874364708274038,
3773
+ "grad_norm": 0.0985347330570221,
3774
+ "learning_rate": 0.00017834317117850602,
3775
+ "loss": 1.1372,
3776
+ "step": 538
3777
+ },
3778
+ {
3779
+ "epoch": 0.21915023378735515,
3780
+ "grad_norm": 0.09789203107357025,
3781
+ "learning_rate": 0.00017830246285365357,
3782
+ "loss": 1.0369,
3783
+ "step": 539
3784
+ },
3785
+ {
3786
+ "epoch": 0.21955682049196992,
3787
+ "grad_norm": 0.09777568280696869,
3788
+ "learning_rate": 0.00017826175452880113,
3789
+ "loss": 1.0746,
3790
+ "step": 540
3791
+ },
3792
+ {
3793
+ "epoch": 0.21996340719658466,
3794
+ "grad_norm": 0.09013503789901733,
3795
+ "learning_rate": 0.0001782210462039487,
3796
+ "loss": 1.0124,
3797
+ "step": 541
3798
+ },
3799
+ {
3800
+ "epoch": 0.22036999390119943,
3801
+ "grad_norm": 0.10604355484247208,
3802
+ "learning_rate": 0.0001781803378790963,
3803
+ "loss": 1.0158,
3804
+ "step": 542
3805
+ },
3806
+ {
3807
+ "epoch": 0.22077658060581418,
3808
+ "grad_norm": 0.09194648265838623,
3809
+ "learning_rate": 0.00017813962955424387,
3810
+ "loss": 0.9544,
3811
+ "step": 543
3812
+ },
3813
+ {
3814
+ "epoch": 0.22118316731042895,
3815
+ "grad_norm": 0.09223110228776932,
3816
+ "learning_rate": 0.00017809892122939142,
3817
+ "loss": 1.0094,
3818
+ "step": 544
3819
+ },
3820
+ {
3821
+ "epoch": 0.22158975401504372,
3822
+ "grad_norm": 0.09049870073795319,
3823
+ "learning_rate": 0.00017805821290453898,
3824
+ "loss": 0.8829,
3825
+ "step": 545
3826
+ },
3827
+ {
3828
+ "epoch": 0.22199634071965846,
3829
+ "grad_norm": 0.10157813131809235,
3830
+ "learning_rate": 0.00017801750457968656,
3831
+ "loss": 1.0904,
3832
+ "step": 546
3833
+ },
3834
+ {
3835
+ "epoch": 0.22240292742427323,
3836
+ "grad_norm": 0.09934356063604355,
3837
+ "learning_rate": 0.0001779767962548341,
3838
+ "loss": 1.0708,
3839
+ "step": 547
3840
+ },
3841
+ {
3842
+ "epoch": 0.222809514128888,
3843
+ "grad_norm": 0.09037156403064728,
3844
+ "learning_rate": 0.0001779360879299817,
3845
+ "loss": 0.916,
3846
+ "step": 548
3847
+ },
3848
+ {
3849
+ "epoch": 0.22321610083350274,
3850
+ "grad_norm": 0.09347829967737198,
3851
+ "learning_rate": 0.00017789537960512925,
3852
+ "loss": 1.0328,
3853
+ "step": 549
3854
+ },
3855
+ {
3856
+ "epoch": 0.2236226875381175,
3857
+ "grad_norm": 0.087796151638031,
3858
+ "learning_rate": 0.00017785467128027683,
3859
+ "loss": 0.9961,
3860
+ "step": 550
3861
+ },
3862
+ {
3863
+ "epoch": 0.22402927424273225,
3864
+ "grad_norm": 0.09518422931432724,
3865
+ "learning_rate": 0.00017781396295542438,
3866
+ "loss": 0.9855,
3867
+ "step": 551
3868
+ },
3869
+ {
3870
+ "epoch": 0.22443586094734702,
3871
+ "grad_norm": 0.09606748074293137,
3872
+ "learning_rate": 0.00017777325463057194,
3873
+ "loss": 0.954,
3874
+ "step": 552
3875
+ },
3876
+ {
3877
+ "epoch": 0.2248424476519618,
3878
+ "grad_norm": 0.09338165074586868,
3879
+ "learning_rate": 0.00017773254630571955,
3880
+ "loss": 1.0876,
3881
+ "step": 553
3882
+ },
3883
+ {
3884
+ "epoch": 0.22524903435657653,
3885
+ "grad_norm": 0.09242440015077591,
3886
+ "learning_rate": 0.0001776918379808671,
3887
+ "loss": 0.9418,
3888
+ "step": 554
3889
+ },
3890
+ {
3891
+ "epoch": 0.2256556210611913,
3892
+ "grad_norm": 0.0990302637219429,
3893
+ "learning_rate": 0.00017765112965601468,
3894
+ "loss": 1.0641,
3895
+ "step": 555
3896
+ },
3897
+ {
3898
+ "epoch": 0.22606220776580604,
3899
+ "grad_norm": 0.09444238990545273,
3900
+ "learning_rate": 0.00017761042133116224,
3901
+ "loss": 1.0315,
3902
+ "step": 556
3903
+ },
3904
+ {
3905
+ "epoch": 0.22646879447042081,
3906
+ "grad_norm": 0.08771083503961563,
3907
+ "learning_rate": 0.0001775697130063098,
3908
+ "loss": 0.9898,
3909
+ "step": 557
3910
+ },
3911
+ {
3912
+ "epoch": 0.22687538117503558,
3913
+ "grad_norm": 0.10041147470474243,
3914
+ "learning_rate": 0.00017752900468145737,
3915
+ "loss": 1.0478,
3916
+ "step": 558
3917
+ },
3918
+ {
3919
+ "epoch": 0.22728196787965033,
3920
+ "grad_norm": 0.0933571383357048,
3921
+ "learning_rate": 0.00017748829635660492,
3922
+ "loss": 1.0002,
3923
+ "step": 559
3924
+ },
3925
+ {
3926
+ "epoch": 0.2276885545842651,
3927
+ "grad_norm": 0.0912991389632225,
3928
+ "learning_rate": 0.0001774475880317525,
3929
+ "loss": 1.0807,
3930
+ "step": 560
3931
+ },
3932
+ {
3933
+ "epoch": 0.22809514128887987,
3934
+ "grad_norm": 0.09350984543561935,
3935
+ "learning_rate": 0.00017740687970690006,
3936
+ "loss": 0.8962,
3937
+ "step": 561
3938
+ },
3939
+ {
3940
+ "epoch": 0.2285017279934946,
3941
+ "grad_norm": 0.0978541299700737,
3942
+ "learning_rate": 0.00017736617138204764,
3943
+ "loss": 1.0339,
3944
+ "step": 562
3945
+ },
3946
+ {
3947
+ "epoch": 0.22890831469810938,
3948
+ "grad_norm": 0.08964958041906357,
3949
+ "learning_rate": 0.0001773254630571952,
3950
+ "loss": 1.051,
3951
+ "step": 563
3952
+ },
3953
+ {
3954
+ "epoch": 0.22931490140272412,
3955
+ "grad_norm": 0.09241898357868195,
3956
+ "learning_rate": 0.00017728475473234275,
3957
+ "loss": 0.903,
3958
+ "step": 564
3959
+ },
3960
+ {
3961
+ "epoch": 0.2297214881073389,
3962
+ "grad_norm": 0.09366483986377716,
3963
+ "learning_rate": 0.00017724404640749036,
3964
+ "loss": 1.0055,
3965
+ "step": 565
3966
+ },
3967
+ {
3968
+ "epoch": 0.23012807481195366,
3969
+ "grad_norm": 0.10184673964977264,
3970
+ "learning_rate": 0.0001772033380826379,
3971
+ "loss": 1.004,
3972
+ "step": 566
3973
+ },
3974
+ {
3975
+ "epoch": 0.2305346615165684,
3976
+ "grad_norm": 0.09287306666374207,
3977
+ "learning_rate": 0.0001771626297577855,
3978
+ "loss": 0.9667,
3979
+ "step": 567
3980
+ },
3981
+ {
3982
+ "epoch": 0.23094124822118317,
3983
+ "grad_norm": 0.08905091136693954,
3984
+ "learning_rate": 0.00017712192143293305,
3985
+ "loss": 0.9295,
3986
+ "step": 568
3987
+ },
3988
+ {
3989
+ "epoch": 0.2313478349257979,
3990
+ "grad_norm": 0.0908786877989769,
3991
+ "learning_rate": 0.0001770812131080806,
3992
+ "loss": 0.8957,
3993
+ "step": 569
3994
+ },
3995
+ {
3996
+ "epoch": 0.23175442163041268,
3997
+ "grad_norm": 0.10284281522035599,
3998
+ "learning_rate": 0.00017704050478322818,
3999
+ "loss": 1.1311,
4000
+ "step": 570
4001
+ },
4002
+ {
4003
+ "epoch": 0.23216100833502745,
4004
+ "grad_norm": 0.09007006883621216,
4005
+ "learning_rate": 0.00017699979645837574,
4006
+ "loss": 0.9919,
4007
+ "step": 571
4008
+ },
4009
+ {
4010
+ "epoch": 0.2325675950396422,
4011
+ "grad_norm": 0.09025272727012634,
4012
+ "learning_rate": 0.00017695908813352332,
4013
+ "loss": 0.9057,
4014
+ "step": 572
4015
+ },
4016
+ {
4017
+ "epoch": 0.23297418174425696,
4018
+ "grad_norm": 0.0994710698723793,
4019
+ "learning_rate": 0.00017691837980867087,
4020
+ "loss": 1.1472,
4021
+ "step": 573
4022
+ },
4023
+ {
4024
+ "epoch": 0.23338076844887173,
4025
+ "grad_norm": 0.09117428958415985,
4026
+ "learning_rate": 0.00017687767148381845,
4027
+ "loss": 0.9665,
4028
+ "step": 574
4029
+ },
4030
+ {
4031
+ "epoch": 0.23378735515348648,
4032
+ "grad_norm": 0.0893009826540947,
4033
+ "learning_rate": 0.000176836963158966,
4034
+ "loss": 0.951,
4035
+ "step": 575
4036
+ },
4037
+ {
4038
+ "epoch": 0.23419394185810125,
4039
+ "grad_norm": 0.08649599552154541,
4040
+ "learning_rate": 0.0001767962548341136,
4041
+ "loss": 0.925,
4042
+ "step": 576
4043
+ },
4044
+ {
4045
+ "epoch": 0.234600528562716,
4046
+ "grad_norm": 0.0928448736667633,
4047
+ "learning_rate": 0.00017675554650926117,
4048
+ "loss": 0.9253,
4049
+ "step": 577
4050
+ },
4051
+ {
4052
+ "epoch": 0.23500711526733076,
4053
+ "grad_norm": 0.10335158556699753,
4054
+ "learning_rate": 0.00017671483818440872,
4055
+ "loss": 1.1171,
4056
+ "step": 578
4057
+ },
4058
+ {
4059
+ "epoch": 0.23541370197194553,
4060
+ "grad_norm": 0.09889842569828033,
4061
+ "learning_rate": 0.0001766741298595563,
4062
+ "loss": 1.0005,
4063
+ "step": 579
4064
+ },
4065
+ {
4066
+ "epoch": 0.23582028867656027,
4067
+ "grad_norm": 0.09655506163835526,
4068
+ "learning_rate": 0.00017663342153470386,
4069
+ "loss": 1.0273,
4070
+ "step": 580
4071
+ },
4072
+ {
4073
+ "epoch": 0.23622687538117504,
4074
+ "grad_norm": 0.09516560286283493,
4075
+ "learning_rate": 0.0001765927132098514,
4076
+ "loss": 1.024,
4077
+ "step": 581
4078
+ },
4079
+ {
4080
+ "epoch": 0.23663346208578978,
4081
+ "grad_norm": 0.10024843364953995,
4082
+ "learning_rate": 0.000176552004884999,
4083
+ "loss": 1.0299,
4084
+ "step": 582
4085
+ },
4086
+ {
4087
+ "epoch": 0.23704004879040455,
4088
+ "grad_norm": 0.10152596235275269,
4089
+ "learning_rate": 0.00017651129656014655,
4090
+ "loss": 0.9658,
4091
+ "step": 583
4092
+ },
4093
+ {
4094
+ "epoch": 0.23744663549501932,
4095
+ "grad_norm": 0.09654249995946884,
4096
+ "learning_rate": 0.00017647058823529413,
4097
+ "loss": 1.0722,
4098
+ "step": 584
4099
+ },
4100
+ {
4101
+ "epoch": 0.23785322219963406,
4102
+ "grad_norm": 0.09112072736024857,
4103
+ "learning_rate": 0.00017642987991044168,
4104
+ "loss": 0.9846,
4105
+ "step": 585
4106
+ },
4107
+ {
4108
+ "epoch": 0.23825980890424883,
4109
+ "grad_norm": 0.09640034288167953,
4110
+ "learning_rate": 0.00017638917158558926,
4111
+ "loss": 1.0501,
4112
+ "step": 586
4113
+ },
4114
+ {
4115
+ "epoch": 0.2386663956088636,
4116
+ "grad_norm": 0.09564584493637085,
4117
+ "learning_rate": 0.00017634846326073682,
4118
+ "loss": 0.955,
4119
+ "step": 587
4120
+ },
4121
+ {
4122
+ "epoch": 0.23907298231347834,
4123
+ "grad_norm": 0.10815359652042389,
4124
+ "learning_rate": 0.0001763077549358844,
4125
+ "loss": 1.203,
4126
+ "step": 588
4127
+ },
4128
+ {
4129
+ "epoch": 0.2394795690180931,
4130
+ "grad_norm": 0.09078256040811539,
4131
+ "learning_rate": 0.00017626704661103198,
4132
+ "loss": 0.9881,
4133
+ "step": 589
4134
+ },
4135
+ {
4136
+ "epoch": 0.23988615572270786,
4137
+ "grad_norm": 0.09075487405061722,
4138
+ "learning_rate": 0.00017622633828617954,
4139
+ "loss": 0.984,
4140
+ "step": 590
4141
+ },
4142
+ {
4143
+ "epoch": 0.24029274242732263,
4144
+ "grad_norm": 0.09048381447792053,
4145
+ "learning_rate": 0.00017618562996132712,
4146
+ "loss": 1.0235,
4147
+ "step": 591
4148
+ },
4149
+ {
4150
+ "epoch": 0.2406993291319374,
4151
+ "grad_norm": 0.09820905327796936,
4152
+ "learning_rate": 0.00017614492163647467,
4153
+ "loss": 0.9763,
4154
+ "step": 592
4155
+ },
4156
+ {
4157
+ "epoch": 0.24110591583655214,
4158
+ "grad_norm": 0.0961097925901413,
4159
+ "learning_rate": 0.00017610421331162222,
4160
+ "loss": 1.1035,
4161
+ "step": 593
4162
+ },
4163
+ {
4164
+ "epoch": 0.2415125025411669,
4165
+ "grad_norm": 0.0877358540892601,
4166
+ "learning_rate": 0.0001760635049867698,
4167
+ "loss": 0.8962,
4168
+ "step": 594
4169
+ },
4170
+ {
4171
+ "epoch": 0.24191908924578168,
4172
+ "grad_norm": 0.09730017930269241,
4173
+ "learning_rate": 0.00017602279666191736,
4174
+ "loss": 1.1232,
4175
+ "step": 595
4176
+ },
4177
+ {
4178
+ "epoch": 0.24232567595039642,
4179
+ "grad_norm": 0.09486240148544312,
4180
+ "learning_rate": 0.00017598208833706494,
4181
+ "loss": 1.0566,
4182
+ "step": 596
4183
+ },
4184
+ {
4185
+ "epoch": 0.2427322626550112,
4186
+ "grad_norm": 0.09367606788873672,
4187
+ "learning_rate": 0.0001759413800122125,
4188
+ "loss": 0.9934,
4189
+ "step": 597
4190
+ },
4191
+ {
4192
+ "epoch": 0.24313884935962593,
4193
+ "grad_norm": 0.09046703577041626,
4194
+ "learning_rate": 0.00017590067168736008,
4195
+ "loss": 0.9137,
4196
+ "step": 598
4197
+ },
4198
+ {
4199
+ "epoch": 0.2435454360642407,
4200
+ "grad_norm": 0.09512536972761154,
4201
+ "learning_rate": 0.00017585996336250766,
4202
+ "loss": 0.9733,
4203
+ "step": 599
4204
+ },
4205
+ {
4206
+ "epoch": 0.24395202276885547,
4207
+ "grad_norm": 0.08619649708271027,
4208
+ "learning_rate": 0.0001758192550376552,
4209
+ "loss": 0.8777,
4210
+ "step": 600
4211
  }
4212
  ],
4213
  "logging_steps": 1,
 
4227
  "attributes": {}
4228
  }
4229
  },
4230
+ "total_flos": 1.906257354398122e+18,
4231
  "train_batch_size": 16,
4232
  "trial_name": null,
4233
  "trial_params": null