diaenra commited on
Commit
5bbd2b6
·
verified ·
1 Parent(s): 219bef6

Training in progress, step 13145, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:136be8ce064f9a44b3844da635ccb65a4156593ca5a9a3525c030f5d3f3a3673
3
  size 98447936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f883b4f265b6b8064b67009ea7bfe448f26ae43c44e65e959b3de74a0b473f8
3
  size 98447936
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52356d446e159e9dbc7effbd79e4681f101ebe757027c7fea425ddeb0a2b3991
3
  size 196978810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15f89a104fdd62216eac28fe99f857669937aa8b4fed6aa1a2b53d8e286f81f9
3
  size 196978810
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87f507471b1e3c2bfec42c45613534c08e05b68efec15506cc4696ee5b2dacbd
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19e7284245a01704e7547fac857930f61337e742c5987dcc39fdf9c642d77c9e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bdba567ab352cb596cc221bd95ba24bffb3d4d63cb6da21b4755e2a3a1a8040
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5928a4230aa28b0327c505e24d3c41016e7b6d560dcb0f1569e7770b3922423
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9651871517780354,
5
  "eval_steps": 500,
6
- "global_step": 12906,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -90349,6 +90349,1679 @@
90349
  "learning_rate": 3.0262150381350494e-07,
90350
  "loss": 0.0439,
90351
  "step": 12906
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90352
  }
90353
  ],
90354
  "logging_steps": 1,
@@ -90368,7 +92041,7 @@
90368
  "attributes": {}
90369
  }
90370
  },
90371
- "total_flos": 8.886261680544154e+16,
90372
  "train_batch_size": 4,
90373
  "trial_name": null,
90374
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9830609879220731,
5
  "eval_steps": 500,
6
+ "global_step": 13145,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
90349
  "learning_rate": 3.0262150381350494e-07,
90350
  "loss": 0.0439,
90351
  "step": 12906
90352
+ },
90353
+ {
90354
+ "epoch": 0.9652619377033242,
90355
+ "grad_norm": 0.4368337094783783,
90356
+ "learning_rate": 3.0132261302588395e-07,
90357
+ "loss": 0.0399,
90358
+ "step": 12907
90359
+ },
90360
+ {
90361
+ "epoch": 0.9653367236286131,
90362
+ "grad_norm": 0.6395098567008972,
90363
+ "learning_rate": 3.000265073157038e-07,
90364
+ "loss": 0.0591,
90365
+ "step": 12908
90366
+ },
90367
+ {
90368
+ "epoch": 0.965411509553902,
90369
+ "grad_norm": 0.43370211124420166,
90370
+ "learning_rate": 2.987331867556009e-07,
90371
+ "loss": 0.0339,
90372
+ "step": 12909
90373
+ },
90374
+ {
90375
+ "epoch": 0.9654862954791908,
90376
+ "grad_norm": 0.5491053462028503,
90377
+ "learning_rate": 2.974426514180506e-07,
90378
+ "loss": 0.0459,
90379
+ "step": 12910
90380
+ },
90381
+ {
90382
+ "epoch": 0.9655610814044797,
90383
+ "grad_norm": 0.48986148834228516,
90384
+ "learning_rate": 2.961549013753784e-07,
90385
+ "loss": 0.04,
90386
+ "step": 12911
90387
+ },
90388
+ {
90389
+ "epoch": 0.9656358673297686,
90390
+ "grad_norm": 0.8874034881591797,
90391
+ "learning_rate": 2.9486993669974315e-07,
90392
+ "loss": 0.0708,
90393
+ "step": 12912
90394
+ },
90395
+ {
90396
+ "epoch": 0.9657106532550574,
90397
+ "grad_norm": 0.7711852192878723,
90398
+ "learning_rate": 2.9358775746315956e-07,
90399
+ "loss": 0.101,
90400
+ "step": 12913
90401
+ },
90402
+ {
90403
+ "epoch": 0.9657854391803462,
90404
+ "grad_norm": 0.5177712440490723,
90405
+ "learning_rate": 2.9230836373747574e-07,
90406
+ "loss": 0.0518,
90407
+ "step": 12914
90408
+ },
90409
+ {
90410
+ "epoch": 0.9658602251056351,
90411
+ "grad_norm": 0.8569120168685913,
90412
+ "learning_rate": 2.910317555943898e-07,
90413
+ "loss": 0.085,
90414
+ "step": 12915
90415
+ },
90416
+ {
90417
+ "epoch": 0.9659350110309239,
90418
+ "grad_norm": 0.6278529167175293,
90419
+ "learning_rate": 2.8975793310543897e-07,
90420
+ "loss": 0.0411,
90421
+ "step": 12916
90422
+ },
90423
+ {
90424
+ "epoch": 0.9660097969562128,
90425
+ "grad_norm": 1.1338576078414917,
90426
+ "learning_rate": 2.884868963420162e-07,
90427
+ "loss": 0.0452,
90428
+ "step": 12917
90429
+ },
90430
+ {
90431
+ "epoch": 0.9660845828815017,
90432
+ "grad_norm": 0.792792022228241,
90433
+ "learning_rate": 2.8721864537533116e-07,
90434
+ "loss": 0.0578,
90435
+ "step": 12918
90436
+ },
90437
+ {
90438
+ "epoch": 0.9661593688067905,
90439
+ "grad_norm": 0.4867582321166992,
90440
+ "learning_rate": 2.85953180276477e-07,
90441
+ "loss": 0.034,
90442
+ "step": 12919
90443
+ },
90444
+ {
90445
+ "epoch": 0.9662341547320794,
90446
+ "grad_norm": 0.5390796661376953,
90447
+ "learning_rate": 2.8469050111635253e-07,
90448
+ "loss": 0.0326,
90449
+ "step": 12920
90450
+ },
90451
+ {
90452
+ "epoch": 0.9663089406573683,
90453
+ "grad_norm": 0.4971838593482971,
90454
+ "learning_rate": 2.8343060796572897e-07,
90455
+ "loss": 0.0411,
90456
+ "step": 12921
90457
+ },
90458
+ {
90459
+ "epoch": 0.9663837265826571,
90460
+ "grad_norm": 0.8017480969429016,
90461
+ "learning_rate": 2.8217350089520533e-07,
90462
+ "loss": 0.0638,
90463
+ "step": 12922
90464
+ },
90465
+ {
90466
+ "epoch": 0.966458512507946,
90467
+ "grad_norm": 0.6261386275291443,
90468
+ "learning_rate": 2.809191799752309e-07,
90469
+ "loss": 0.0423,
90470
+ "step": 12923
90471
+ },
90472
+ {
90473
+ "epoch": 0.9665332984332349,
90474
+ "grad_norm": 0.5139853358268738,
90475
+ "learning_rate": 2.7966764527608827e-07,
90476
+ "loss": 0.0428,
90477
+ "step": 12924
90478
+ },
90479
+ {
90480
+ "epoch": 0.9666080843585237,
90481
+ "grad_norm": 0.8327328562736511,
90482
+ "learning_rate": 2.7841889686792686e-07,
90483
+ "loss": 0.0712,
90484
+ "step": 12925
90485
+ },
90486
+ {
90487
+ "epoch": 0.9666828702838126,
90488
+ "grad_norm": 0.9425419569015503,
90489
+ "learning_rate": 2.7717293482071303e-07,
90490
+ "loss": 0.0712,
90491
+ "step": 12926
90492
+ },
90493
+ {
90494
+ "epoch": 0.9667576562091015,
90495
+ "grad_norm": 0.999728262424469,
90496
+ "learning_rate": 2.759297592042742e-07,
90497
+ "loss": 0.1424,
90498
+ "step": 12927
90499
+ },
90500
+ {
90501
+ "epoch": 0.9668324421343903,
90502
+ "grad_norm": 0.6338492035865784,
90503
+ "learning_rate": 2.7468937008828246e-07,
90504
+ "loss": 0.0392,
90505
+ "step": 12928
90506
+ },
90507
+ {
90508
+ "epoch": 0.9669072280596792,
90509
+ "grad_norm": 0.7793694734573364,
90510
+ "learning_rate": 2.734517675422377e-07,
90511
+ "loss": 0.0659,
90512
+ "step": 12929
90513
+ },
90514
+ {
90515
+ "epoch": 0.9669820139849681,
90516
+ "grad_norm": 0.9696511030197144,
90517
+ "learning_rate": 2.7221695163550107e-07,
90518
+ "loss": 0.1005,
90519
+ "step": 12930
90520
+ },
90521
+ {
90522
+ "epoch": 0.9670567999102568,
90523
+ "grad_norm": 1.540802001953125,
90524
+ "learning_rate": 2.709849224372729e-07,
90525
+ "loss": 0.1015,
90526
+ "step": 12931
90527
+ },
90528
+ {
90529
+ "epoch": 0.9671315858355457,
90530
+ "grad_norm": 0.5976976156234741,
90531
+ "learning_rate": 2.697556800165868e-07,
90532
+ "loss": 0.0303,
90533
+ "step": 12932
90534
+ },
90535
+ {
90536
+ "epoch": 0.9672063717608346,
90537
+ "grad_norm": 0.8872543573379517,
90538
+ "learning_rate": 2.685292244423376e-07,
90539
+ "loss": 0.0686,
90540
+ "step": 12933
90541
+ },
90542
+ {
90543
+ "epoch": 0.9672811576861234,
90544
+ "grad_norm": 0.710059642791748,
90545
+ "learning_rate": 2.673055557832538e-07,
90546
+ "loss": 0.054,
90547
+ "step": 12934
90548
+ },
90549
+ {
90550
+ "epoch": 0.9673559436114123,
90551
+ "grad_norm": 1.1238468885421753,
90552
+ "learning_rate": 2.660846741079026e-07,
90553
+ "loss": 0.1244,
90554
+ "step": 12935
90555
+ },
90556
+ {
90557
+ "epoch": 0.9674307295367012,
90558
+ "grad_norm": 0.9026004672050476,
90559
+ "learning_rate": 2.6486657948470163e-07,
90560
+ "loss": 0.0706,
90561
+ "step": 12936
90562
+ },
90563
+ {
90564
+ "epoch": 0.96750551546199,
90565
+ "grad_norm": 0.9717168807983398,
90566
+ "learning_rate": 2.636512719819184e-07,
90567
+ "loss": 0.1016,
90568
+ "step": 12937
90569
+ },
90570
+ {
90571
+ "epoch": 0.9675803013872789,
90572
+ "grad_norm": 0.7005811929702759,
90573
+ "learning_rate": 2.6243875166765406e-07,
90574
+ "loss": 0.0552,
90575
+ "step": 12938
90576
+ },
90577
+ {
90578
+ "epoch": 0.9676550873125678,
90579
+ "grad_norm": 0.8264267444610596,
90580
+ "learning_rate": 2.6122901860985425e-07,
90581
+ "loss": 0.0389,
90582
+ "step": 12939
90583
+ },
90584
+ {
90585
+ "epoch": 0.9677298732378566,
90586
+ "grad_norm": 0.7877691984176636,
90587
+ "learning_rate": 2.600220728763203e-07,
90588
+ "loss": 0.0707,
90589
+ "step": 12940
90590
+ },
90591
+ {
90592
+ "epoch": 0.9678046591631455,
90593
+ "grad_norm": 0.8559983968734741,
90594
+ "learning_rate": 2.588179145346814e-07,
90595
+ "loss": 0.0521,
90596
+ "step": 12941
90597
+ },
90598
+ {
90599
+ "epoch": 0.9678794450884344,
90600
+ "grad_norm": 1.7342501878738403,
90601
+ "learning_rate": 2.576165436524225e-07,
90602
+ "loss": 0.1444,
90603
+ "step": 12942
90604
+ },
90605
+ {
90606
+ "epoch": 0.9679542310137232,
90607
+ "grad_norm": 1.3305034637451172,
90608
+ "learning_rate": 2.5641796029686194e-07,
90609
+ "loss": 0.0944,
90610
+ "step": 12943
90611
+ },
90612
+ {
90613
+ "epoch": 0.9680290169390121,
90614
+ "grad_norm": 0.9448521137237549,
90615
+ "learning_rate": 2.552221645351738e-07,
90616
+ "loss": 0.0652,
90617
+ "step": 12944
90618
+ },
90619
+ {
90620
+ "epoch": 0.968103802864301,
90621
+ "grad_norm": 1.5735996961593628,
90622
+ "learning_rate": 2.5402915643436e-07,
90623
+ "loss": 0.1673,
90624
+ "step": 12945
90625
+ },
90626
+ {
90627
+ "epoch": 0.9681785887895898,
90628
+ "grad_norm": 0.888843834400177,
90629
+ "learning_rate": 2.528389360612837e-07,
90630
+ "loss": 0.1212,
90631
+ "step": 12946
90632
+ },
90633
+ {
90634
+ "epoch": 0.9682533747148787,
90635
+ "grad_norm": 0.892540454864502,
90636
+ "learning_rate": 2.516515034826472e-07,
90637
+ "loss": 0.0598,
90638
+ "step": 12947
90639
+ },
90640
+ {
90641
+ "epoch": 0.9683281606401675,
90642
+ "grad_norm": 0.6834811568260193,
90643
+ "learning_rate": 2.504668587649805e-07,
90644
+ "loss": 0.0483,
90645
+ "step": 12948
90646
+ },
90647
+ {
90648
+ "epoch": 0.9684029465654563,
90649
+ "grad_norm": 0.7159473299980164,
90650
+ "learning_rate": 2.4928500197468616e-07,
90651
+ "loss": 0.0455,
90652
+ "step": 12949
90653
+ },
90654
+ {
90655
+ "epoch": 0.9684777324907452,
90656
+ "grad_norm": 1.5077083110809326,
90657
+ "learning_rate": 2.4810593317798336e-07,
90658
+ "loss": 0.1202,
90659
+ "step": 12950
90660
+ },
90661
+ {
90662
+ "epoch": 0.9685525184160341,
90663
+ "grad_norm": 0.5121647715568542,
90664
+ "learning_rate": 2.46929652440947e-07,
90665
+ "loss": 0.067,
90666
+ "step": 12951
90667
+ },
90668
+ {
90669
+ "epoch": 0.9686273043413229,
90670
+ "grad_norm": 0.5928946137428284,
90671
+ "learning_rate": 2.457561598294966e-07,
90672
+ "loss": 0.0578,
90673
+ "step": 12952
90674
+ },
90675
+ {
90676
+ "epoch": 0.9687020902666118,
90677
+ "grad_norm": 0.5399084687232971,
90678
+ "learning_rate": 2.445854554094018e-07,
90679
+ "loss": 0.0413,
90680
+ "step": 12953
90681
+ },
90682
+ {
90683
+ "epoch": 0.9687768761919007,
90684
+ "grad_norm": 0.8208631277084351,
90685
+ "learning_rate": 2.4341753924625457e-07,
90686
+ "loss": 0.0741,
90687
+ "step": 12954
90688
+ },
90689
+ {
90690
+ "epoch": 0.9688516621171895,
90691
+ "grad_norm": 0.7083703279495239,
90692
+ "learning_rate": 2.422524114055136e-07,
90693
+ "loss": 0.0987,
90694
+ "step": 12955
90695
+ },
90696
+ {
90697
+ "epoch": 0.9689264480424784,
90698
+ "grad_norm": 0.6513927578926086,
90699
+ "learning_rate": 2.4109007195246556e-07,
90700
+ "loss": 0.0743,
90701
+ "step": 12956
90702
+ },
90703
+ {
90704
+ "epoch": 0.9690012339677673,
90705
+ "grad_norm": 0.8623580932617188,
90706
+ "learning_rate": 2.399305209522529e-07,
90707
+ "loss": 0.0642,
90708
+ "step": 12957
90709
+ },
90710
+ {
90711
+ "epoch": 0.9690760198930561,
90712
+ "grad_norm": 0.591946005821228,
90713
+ "learning_rate": 2.387737584698513e-07,
90714
+ "loss": 0.0366,
90715
+ "step": 12958
90716
+ },
90717
+ {
90718
+ "epoch": 0.969150805818345,
90719
+ "grad_norm": 0.7606440186500549,
90720
+ "learning_rate": 2.376197845700867e-07,
90721
+ "loss": 0.0733,
90722
+ "step": 12959
90723
+ },
90724
+ {
90725
+ "epoch": 0.9692255917436339,
90726
+ "grad_norm": 0.3158939480781555,
90727
+ "learning_rate": 2.3646859931762965e-07,
90728
+ "loss": 0.0171,
90729
+ "step": 12960
90730
+ },
90731
+ {
90732
+ "epoch": 0.9693003776689227,
90733
+ "grad_norm": 0.47718873620033264,
90734
+ "learning_rate": 2.3532020277698407e-07,
90735
+ "loss": 0.0427,
90736
+ "step": 12961
90737
+ },
90738
+ {
90739
+ "epoch": 0.9693751635942116,
90740
+ "grad_norm": 0.6722490787506104,
90741
+ "learning_rate": 2.3417459501251515e-07,
90742
+ "loss": 0.0535,
90743
+ "step": 12962
90744
+ },
90745
+ {
90746
+ "epoch": 0.9694499495195005,
90747
+ "grad_norm": 0.55564284324646,
90748
+ "learning_rate": 2.3303177608841597e-07,
90749
+ "loss": 0.0618,
90750
+ "step": 12963
90751
+ },
90752
+ {
90753
+ "epoch": 0.9695247354447893,
90754
+ "grad_norm": 0.6552045345306396,
90755
+ "learning_rate": 2.3189174606872976e-07,
90756
+ "loss": 0.0825,
90757
+ "step": 12964
90758
+ },
90759
+ {
90760
+ "epoch": 0.9695995213700781,
90761
+ "grad_norm": 0.6629922986030579,
90762
+ "learning_rate": 2.307545050173443e-07,
90763
+ "loss": 0.0459,
90764
+ "step": 12965
90765
+ },
90766
+ {
90767
+ "epoch": 0.969674307295367,
90768
+ "grad_norm": 0.851646363735199,
90769
+ "learning_rate": 2.2962005299798639e-07,
90770
+ "loss": 0.0874,
90771
+ "step": 12966
90772
+ },
90773
+ {
90774
+ "epoch": 0.9697490932206558,
90775
+ "grad_norm": 0.9766066670417786,
90776
+ "learning_rate": 2.2848839007423295e-07,
90777
+ "loss": 0.1098,
90778
+ "step": 12967
90779
+ },
90780
+ {
90781
+ "epoch": 0.9698238791459447,
90782
+ "grad_norm": 1.0904663801193237,
90783
+ "learning_rate": 2.2735951630950548e-07,
90784
+ "loss": 0.1515,
90785
+ "step": 12968
90786
+ },
90787
+ {
90788
+ "epoch": 0.9698986650712336,
90789
+ "grad_norm": 0.7357199192047119,
90790
+ "learning_rate": 2.2623343176705892e-07,
90791
+ "loss": 0.062,
90792
+ "step": 12969
90793
+ },
90794
+ {
90795
+ "epoch": 0.9699734509965224,
90796
+ "grad_norm": 0.513715386390686,
90797
+ "learning_rate": 2.2511013650999835e-07,
90798
+ "loss": 0.0408,
90799
+ "step": 12970
90800
+ },
90801
+ {
90802
+ "epoch": 0.9700482369218113,
90803
+ "grad_norm": 0.8974568247795105,
90804
+ "learning_rate": 2.2398963060127342e-07,
90805
+ "loss": 0.0687,
90806
+ "step": 12971
90807
+ },
90808
+ {
90809
+ "epoch": 0.9701230228471002,
90810
+ "grad_norm": 0.4329313039779663,
90811
+ "learning_rate": 2.2287191410367836e-07,
90812
+ "loss": 0.0332,
90813
+ "step": 12972
90814
+ },
90815
+ {
90816
+ "epoch": 0.970197808772389,
90817
+ "grad_norm": 0.751781165599823,
90818
+ "learning_rate": 2.2175698707984638e-07,
90819
+ "loss": 0.0691,
90820
+ "step": 12973
90821
+ },
90822
+ {
90823
+ "epoch": 0.9702725946976779,
90824
+ "grad_norm": 0.5423529148101807,
90825
+ "learning_rate": 2.2064484959226083e-07,
90826
+ "loss": 0.0449,
90827
+ "step": 12974
90828
+ },
90829
+ {
90830
+ "epoch": 0.9703473806229668,
90831
+ "grad_norm": 0.8365032076835632,
90832
+ "learning_rate": 2.195355017032441e-07,
90833
+ "loss": 0.0968,
90834
+ "step": 12975
90835
+ },
90836
+ {
90837
+ "epoch": 0.9704221665482556,
90838
+ "grad_norm": 0.9210798144340515,
90839
+ "learning_rate": 2.1842894347496312e-07,
90840
+ "loss": 0.1159,
90841
+ "step": 12976
90842
+ },
90843
+ {
90844
+ "epoch": 0.9704969524735445,
90845
+ "grad_norm": 0.7737799286842346,
90846
+ "learning_rate": 2.1732517496942383e-07,
90847
+ "loss": 0.0661,
90848
+ "step": 12977
90849
+ },
90850
+ {
90851
+ "epoch": 0.9705717383988334,
90852
+ "grad_norm": 0.7045473456382751,
90853
+ "learning_rate": 2.1622419624848788e-07,
90854
+ "loss": 0.0383,
90855
+ "step": 12978
90856
+ },
90857
+ {
90858
+ "epoch": 0.9706465243241222,
90859
+ "grad_norm": 1.0195949077606201,
90860
+ "learning_rate": 2.1512600737385037e-07,
90861
+ "loss": 0.0988,
90862
+ "step": 12979
90863
+ },
90864
+ {
90865
+ "epoch": 0.9707213102494111,
90866
+ "grad_norm": 0.8737218379974365,
90867
+ "learning_rate": 2.1403060840704536e-07,
90868
+ "loss": 0.0793,
90869
+ "step": 12980
90870
+ },
90871
+ {
90872
+ "epoch": 0.9707960961747,
90873
+ "grad_norm": 0.531203031539917,
90874
+ "learning_rate": 2.129379994094738e-07,
90875
+ "loss": 0.0303,
90876
+ "step": 12981
90877
+ },
90878
+ {
90879
+ "epoch": 0.9708708820999887,
90880
+ "grad_norm": 1.0328282117843628,
90881
+ "learning_rate": 2.1184818044235332e-07,
90882
+ "loss": 0.0792,
90883
+ "step": 12982
90884
+ },
90885
+ {
90886
+ "epoch": 0.9709456680252776,
90887
+ "grad_norm": 0.734826922416687,
90888
+ "learning_rate": 2.1076115156675735e-07,
90889
+ "loss": 0.0536,
90890
+ "step": 12983
90891
+ },
90892
+ {
90893
+ "epoch": 0.9710204539505665,
90894
+ "grad_norm": 0.5005146861076355,
90895
+ "learning_rate": 2.0967691284360935e-07,
90896
+ "loss": 0.0325,
90897
+ "step": 12984
90898
+ },
90899
+ {
90900
+ "epoch": 0.9710952398758553,
90901
+ "grad_norm": 1.020372748374939,
90902
+ "learning_rate": 2.0859546433366072e-07,
90903
+ "loss": 0.1083,
90904
+ "step": 12985
90905
+ },
90906
+ {
90907
+ "epoch": 0.9711700258011442,
90908
+ "grad_norm": 0.5662137866020203,
90909
+ "learning_rate": 2.0751680609751856e-07,
90910
+ "loss": 0.0426,
90911
+ "step": 12986
90912
+ },
90913
+ {
90914
+ "epoch": 0.9712448117264331,
90915
+ "grad_norm": 0.6404293179512024,
90916
+ "learning_rate": 2.064409381956345e-07,
90917
+ "loss": 0.0384,
90918
+ "step": 12987
90919
+ },
90920
+ {
90921
+ "epoch": 0.9713195976517219,
90922
+ "grad_norm": 0.6235434412956238,
90923
+ "learning_rate": 2.0536786068828805e-07,
90924
+ "loss": 0.0293,
90925
+ "step": 12988
90926
+ },
90927
+ {
90928
+ "epoch": 0.9713943835770108,
90929
+ "grad_norm": 1.5746077299118042,
90930
+ "learning_rate": 2.0429757363562563e-07,
90931
+ "loss": 0.1573,
90932
+ "step": 12989
90933
+ },
90934
+ {
90935
+ "epoch": 0.9714691695022997,
90936
+ "grad_norm": 0.35104164481163025,
90937
+ "learning_rate": 2.0323007709762144e-07,
90938
+ "loss": 0.0212,
90939
+ "step": 12990
90940
+ },
90941
+ {
90942
+ "epoch": 0.9715439554275885,
90943
+ "grad_norm": 0.8248312473297119,
90944
+ "learning_rate": 2.021653711340943e-07,
90945
+ "loss": 0.1061,
90946
+ "step": 12991
90947
+ },
90948
+ {
90949
+ "epoch": 0.9716187413528774,
90950
+ "grad_norm": 1.168135404586792,
90951
+ "learning_rate": 2.0110345580470756e-07,
90952
+ "loss": 0.1196,
90953
+ "step": 12992
90954
+ },
90955
+ {
90956
+ "epoch": 0.9716935272781663,
90957
+ "grad_norm": 0.6298313736915588,
90958
+ "learning_rate": 2.0004433116898037e-07,
90959
+ "loss": 0.039,
90960
+ "step": 12993
90961
+ },
90962
+ {
90963
+ "epoch": 0.9717683132034551,
90964
+ "grad_norm": 2.1503021717071533,
90965
+ "learning_rate": 1.9898799728625407e-07,
90966
+ "loss": 0.3274,
90967
+ "step": 12994
90968
+ },
90969
+ {
90970
+ "epoch": 0.971843099128744,
90971
+ "grad_norm": 2.1687769889831543,
90972
+ "learning_rate": 1.979344542157313e-07,
90973
+ "loss": 0.0948,
90974
+ "step": 12995
90975
+ },
90976
+ {
90977
+ "epoch": 0.9719178850540329,
90978
+ "grad_norm": 1.4140548706054688,
90979
+ "learning_rate": 1.9688370201644822e-07,
90980
+ "loss": 0.1153,
90981
+ "step": 12996
90982
+ },
90983
+ {
90984
+ "epoch": 0.9719926709793217,
90985
+ "grad_norm": 1.139455795288086,
90986
+ "learning_rate": 1.958357407472966e-07,
90987
+ "loss": 0.0983,
90988
+ "step": 12997
90989
+ },
90990
+ {
90991
+ "epoch": 0.9720674569046106,
90992
+ "grad_norm": 2.854573965072632,
90993
+ "learning_rate": 1.9479057046699056e-07,
90994
+ "loss": 0.6571,
90995
+ "step": 12998
90996
+ },
90997
+ {
90998
+ "epoch": 0.9721422428298994,
90999
+ "grad_norm": 1.1940664052963257,
91000
+ "learning_rate": 1.9374819123411104e-07,
91001
+ "loss": 0.0986,
91002
+ "step": 12999
91003
+ },
91004
+ {
91005
+ "epoch": 0.9722170287551882,
91006
+ "grad_norm": 1.5254565477371216,
91007
+ "learning_rate": 1.927086031070724e-07,
91008
+ "loss": 0.1172,
91009
+ "step": 13000
91010
+ },
91011
+ {
91012
+ "epoch": 0.9722918146804771,
91013
+ "grad_norm": 0.5440202355384827,
91014
+ "learning_rate": 1.9167180614411695e-07,
91015
+ "loss": 0.0491,
91016
+ "step": 13001
91017
+ },
91018
+ {
91019
+ "epoch": 0.972366600605766,
91020
+ "grad_norm": 0.7353929281234741,
91021
+ "learning_rate": 1.9063780040336488e-07,
91022
+ "loss": 0.0792,
91023
+ "step": 13002
91024
+ },
91025
+ {
91026
+ "epoch": 0.9724413865310548,
91027
+ "grad_norm": 0.67140793800354,
91028
+ "learning_rate": 1.8960658594275316e-07,
91029
+ "loss": 0.0683,
91030
+ "step": 13003
91031
+ },
91032
+ {
91033
+ "epoch": 0.9725161724563437,
91034
+ "grad_norm": 0.8194896578788757,
91035
+ "learning_rate": 1.885781628200689e-07,
91036
+ "loss": 0.0734,
91037
+ "step": 13004
91038
+ },
91039
+ {
91040
+ "epoch": 0.9725909583816326,
91041
+ "grad_norm": 0.714746356010437,
91042
+ "learning_rate": 1.8755253109294935e-07,
91043
+ "loss": 0.0611,
91044
+ "step": 13005
91045
+ },
91046
+ {
91047
+ "epoch": 0.9726657443069214,
91048
+ "grad_norm": 0.7146202921867371,
91049
+ "learning_rate": 1.865296908188652e-07,
91050
+ "loss": 0.0714,
91051
+ "step": 13006
91052
+ },
91053
+ {
91054
+ "epoch": 0.9727405302322103,
91055
+ "grad_norm": 0.4795819818973541,
91056
+ "learning_rate": 1.855096420551372e-07,
91057
+ "loss": 0.0331,
91058
+ "step": 13007
91059
+ },
91060
+ {
91061
+ "epoch": 0.9728153161574992,
91062
+ "grad_norm": 0.5267304182052612,
91063
+ "learning_rate": 1.844923848589253e-07,
91064
+ "loss": 0.0694,
91065
+ "step": 13008
91066
+ },
91067
+ {
91068
+ "epoch": 0.972890102082788,
91069
+ "grad_norm": 0.9844954013824463,
91070
+ "learning_rate": 1.8347791928724488e-07,
91071
+ "loss": 0.0921,
91072
+ "step": 13009
91073
+ },
91074
+ {
91075
+ "epoch": 0.9729648880080769,
91076
+ "grad_norm": 0.6365022659301758,
91077
+ "learning_rate": 1.824662453969339e-07,
91078
+ "loss": 0.068,
91079
+ "step": 13010
91080
+ },
91081
+ {
91082
+ "epoch": 0.9730396739333658,
91083
+ "grad_norm": 0.43110954761505127,
91084
+ "learning_rate": 1.8145736324469697e-07,
91085
+ "loss": 0.026,
91086
+ "step": 13011
91087
+ },
91088
+ {
91089
+ "epoch": 0.9731144598586546,
91090
+ "grad_norm": 0.550247073173523,
91091
+ "learning_rate": 1.804512728870611e-07,
91092
+ "loss": 0.0515,
91093
+ "step": 13012
91094
+ },
91095
+ {
91096
+ "epoch": 0.9731892457839435,
91097
+ "grad_norm": 0.4430747330188751,
91098
+ "learning_rate": 1.794479743804145e-07,
91099
+ "loss": 0.0328,
91100
+ "step": 13013
91101
+ },
91102
+ {
91103
+ "epoch": 0.9732640317092324,
91104
+ "grad_norm": 0.5010110139846802,
91105
+ "learning_rate": 1.7844746778097887e-07,
91106
+ "loss": 0.0228,
91107
+ "step": 13014
91108
+ },
91109
+ {
91110
+ "epoch": 0.9733388176345212,
91111
+ "grad_norm": 0.831089973449707,
91112
+ "learning_rate": 1.7744975314482047e-07,
91113
+ "loss": 0.0817,
91114
+ "step": 13015
91115
+ },
91116
+ {
91117
+ "epoch": 0.97341360355981,
91118
+ "grad_norm": 0.4925210773944855,
91119
+ "learning_rate": 1.7645483052785017e-07,
91120
+ "loss": 0.0318,
91121
+ "step": 13016
91122
+ },
91123
+ {
91124
+ "epoch": 0.9734883894850989,
91125
+ "grad_norm": 0.5358295440673828,
91126
+ "learning_rate": 1.7546269998582333e-07,
91127
+ "loss": 0.0459,
91128
+ "step": 13017
91129
+ },
91130
+ {
91131
+ "epoch": 0.9735631754103877,
91132
+ "grad_norm": 0.6370837688446045,
91133
+ "learning_rate": 1.7447336157434545e-07,
91134
+ "loss": 0.0613,
91135
+ "step": 13018
91136
+ },
91137
+ {
91138
+ "epoch": 0.9736379613356766,
91139
+ "grad_norm": 0.5662913918495178,
91140
+ "learning_rate": 1.7348681534884447e-07,
91141
+ "loss": 0.0432,
91142
+ "step": 13019
91143
+ },
91144
+ {
91145
+ "epoch": 0.9737127472609655,
91146
+ "grad_norm": 0.6075406074523926,
91147
+ "learning_rate": 1.7250306136461502e-07,
91148
+ "loss": 0.0367,
91149
+ "step": 13020
91150
+ },
91151
+ {
91152
+ "epoch": 0.9737875331862543,
91153
+ "grad_norm": 0.4553796947002411,
91154
+ "learning_rate": 1.7152209967678523e-07,
91155
+ "loss": 0.0303,
91156
+ "step": 13021
91157
+ },
91158
+ {
91159
+ "epoch": 0.9738623191115432,
91160
+ "grad_norm": 1.0038201808929443,
91161
+ "learning_rate": 1.7054393034032779e-07,
91162
+ "loss": 0.0675,
91163
+ "step": 13022
91164
+ },
91165
+ {
91166
+ "epoch": 0.9739371050368321,
91167
+ "grad_norm": 0.8288964629173279,
91168
+ "learning_rate": 1.695685534100544e-07,
91169
+ "loss": 0.0465,
91170
+ "step": 13023
91171
+ },
91172
+ {
91173
+ "epoch": 0.9740118909621209,
91174
+ "grad_norm": 0.7483683824539185,
91175
+ "learning_rate": 1.685959689406269e-07,
91176
+ "loss": 0.0743,
91177
+ "step": 13024
91178
+ },
91179
+ {
91180
+ "epoch": 0.9740866768874098,
91181
+ "grad_norm": 0.5443363189697266,
91182
+ "learning_rate": 1.6762617698654615e-07,
91183
+ "loss": 0.0478,
91184
+ "step": 13025
91185
+ },
91186
+ {
91187
+ "epoch": 0.9741614628126987,
91188
+ "grad_norm": 0.699834406375885,
91189
+ "learning_rate": 1.666591776021631e-07,
91190
+ "loss": 0.0734,
91191
+ "step": 13026
91192
+ },
91193
+ {
91194
+ "epoch": 0.9742362487379875,
91195
+ "grad_norm": 0.8187717795372009,
91196
+ "learning_rate": 1.6569497084166773e-07,
91197
+ "loss": 0.0655,
91198
+ "step": 13027
91199
+ },
91200
+ {
91201
+ "epoch": 0.9743110346632764,
91202
+ "grad_norm": 0.4898703992366791,
91203
+ "learning_rate": 1.647335567590891e-07,
91204
+ "loss": 0.0679,
91205
+ "step": 13028
91206
+ },
91207
+ {
91208
+ "epoch": 0.9743858205885653,
91209
+ "grad_norm": 0.5239977836608887,
91210
+ "learning_rate": 1.6377493540830625e-07,
91211
+ "loss": 0.0406,
91212
+ "step": 13029
91213
+ },
91214
+ {
91215
+ "epoch": 0.9744606065138541,
91216
+ "grad_norm": 0.7604244351387024,
91217
+ "learning_rate": 1.6281910684303737e-07,
91218
+ "loss": 0.0753,
91219
+ "step": 13030
91220
+ },
91221
+ {
91222
+ "epoch": 0.974535392439143,
91223
+ "grad_norm": 0.842095136642456,
91224
+ "learning_rate": 1.618660711168507e-07,
91225
+ "loss": 0.0494,
91226
+ "step": 13031
91227
+ },
91228
+ {
91229
+ "epoch": 0.9746101783644319,
91230
+ "grad_norm": 0.8381233215332031,
91231
+ "learning_rate": 1.6091582828315355e-07,
91232
+ "loss": 0.0549,
91233
+ "step": 13032
91234
+ },
91235
+ {
91236
+ "epoch": 0.9746849642897206,
91237
+ "grad_norm": 0.5621647238731384,
91238
+ "learning_rate": 1.5996837839519218e-07,
91239
+ "loss": 0.0483,
91240
+ "step": 13033
91241
+ },
91242
+ {
91243
+ "epoch": 0.9747597502150095,
91244
+ "grad_norm": 0.987895667552948,
91245
+ "learning_rate": 1.5902372150606304e-07,
91246
+ "loss": 0.0699,
91247
+ "step": 13034
91248
+ },
91249
+ {
91250
+ "epoch": 0.9748345361402984,
91251
+ "grad_norm": 0.7235648036003113,
91252
+ "learning_rate": 1.5808185766870153e-07,
91253
+ "loss": 0.0347,
91254
+ "step": 13035
91255
+ },
91256
+ {
91257
+ "epoch": 0.9749093220655872,
91258
+ "grad_norm": 0.6615068316459656,
91259
+ "learning_rate": 1.5714278693589878e-07,
91260
+ "loss": 0.0481,
91261
+ "step": 13036
91262
+ },
91263
+ {
91264
+ "epoch": 0.9749841079908761,
91265
+ "grad_norm": 0.6945041418075562,
91266
+ "learning_rate": 1.562065093602738e-07,
91267
+ "loss": 0.0366,
91268
+ "step": 13037
91269
+ },
91270
+ {
91271
+ "epoch": 0.975058893916165,
91272
+ "grad_norm": 0.8700776100158691,
91273
+ "learning_rate": 1.5527302499428465e-07,
91274
+ "loss": 0.0893,
91275
+ "step": 13038
91276
+ },
91277
+ {
91278
+ "epoch": 0.9751336798414538,
91279
+ "grad_norm": 0.7300289273262024,
91280
+ "learning_rate": 1.5434233389026165e-07,
91281
+ "loss": 0.0467,
91282
+ "step": 13039
91283
+ },
91284
+ {
91285
+ "epoch": 0.9752084657667427,
91286
+ "grad_norm": 0.6579194664955139,
91287
+ "learning_rate": 1.5341443610034645e-07,
91288
+ "loss": 0.0633,
91289
+ "step": 13040
91290
+ },
91291
+ {
91292
+ "epoch": 0.9752832516920316,
91293
+ "grad_norm": 1.026484727859497,
91294
+ "learning_rate": 1.5248933167654188e-07,
91295
+ "loss": 0.0938,
91296
+ "step": 13041
91297
+ },
91298
+ {
91299
+ "epoch": 0.9753580376173204,
91300
+ "grad_norm": 0.8569180369377136,
91301
+ "learning_rate": 1.515670206706954e-07,
91302
+ "loss": 0.0779,
91303
+ "step": 13042
91304
+ },
91305
+ {
91306
+ "epoch": 0.9754328235426093,
91307
+ "grad_norm": 1.164454698562622,
91308
+ "learning_rate": 1.506475031344823e-07,
91309
+ "loss": 0.1158,
91310
+ "step": 13043
91311
+ },
91312
+ {
91313
+ "epoch": 0.9755076094678982,
91314
+ "grad_norm": 0.6989597082138062,
91315
+ "learning_rate": 1.497307791194391e-07,
91316
+ "loss": 0.057,
91317
+ "step": 13044
91318
+ },
91319
+ {
91320
+ "epoch": 0.975582395393187,
91321
+ "grad_norm": 0.9949011206626892,
91322
+ "learning_rate": 1.488168486769359e-07,
91323
+ "loss": 0.0714,
91324
+ "step": 13045
91325
+ },
91326
+ {
91327
+ "epoch": 0.9756571813184759,
91328
+ "grad_norm": 1.5810455083847046,
91329
+ "learning_rate": 1.4790571185818724e-07,
91330
+ "loss": 0.1696,
91331
+ "step": 13046
91332
+ },
91333
+ {
91334
+ "epoch": 0.9757319672437648,
91335
+ "grad_norm": 1.3241075277328491,
91336
+ "learning_rate": 1.469973687142523e-07,
91337
+ "loss": 0.1038,
91338
+ "step": 13047
91339
+ },
91340
+ {
91341
+ "epoch": 0.9758067531690536,
91342
+ "grad_norm": 0.8140578269958496,
91343
+ "learning_rate": 1.4609181929604033e-07,
91344
+ "loss": 0.0522,
91345
+ "step": 13048
91346
+ },
91347
+ {
91348
+ "epoch": 0.9758815390943425,
91349
+ "grad_norm": 1.0757066011428833,
91350
+ "learning_rate": 1.4518906365429408e-07,
91351
+ "loss": 0.0717,
91352
+ "step": 13049
91353
+ },
91354
+ {
91355
+ "epoch": 0.9759563250196313,
91356
+ "grad_norm": 1.9058736562728882,
91357
+ "learning_rate": 1.4428910183960087e-07,
91358
+ "loss": 0.1612,
91359
+ "step": 13050
91360
+ },
91361
+ {
91362
+ "epoch": 0.9760311109449201,
91363
+ "grad_norm": 0.6265395879745483,
91364
+ "learning_rate": 1.4339193390239813e-07,
91365
+ "loss": 0.0873,
91366
+ "step": 13051
91367
+ },
91368
+ {
91369
+ "epoch": 0.976105896870209,
91370
+ "grad_norm": 1.08207106590271,
91371
+ "learning_rate": 1.4249755989295677e-07,
91372
+ "loss": 0.0839,
91373
+ "step": 13052
91374
+ },
91375
+ {
91376
+ "epoch": 0.9761806827954979,
91377
+ "grad_norm": 0.6696078777313232,
91378
+ "learning_rate": 1.4160597986140335e-07,
91379
+ "loss": 0.0655,
91380
+ "step": 13053
91381
+ },
91382
+ {
91383
+ "epoch": 0.9762554687207867,
91384
+ "grad_norm": 0.5708719491958618,
91385
+ "learning_rate": 1.4071719385769234e-07,
91386
+ "loss": 0.075,
91387
+ "step": 13054
91388
+ },
91389
+ {
91390
+ "epoch": 0.9763302546460756,
91391
+ "grad_norm": 0.39571017026901245,
91392
+ "learning_rate": 1.3983120193163946e-07,
91393
+ "loss": 0.0286,
91394
+ "step": 13055
91395
+ },
91396
+ {
91397
+ "epoch": 0.9764050405713645,
91398
+ "grad_norm": 1.0295779705047607,
91399
+ "learning_rate": 1.3894800413289388e-07,
91400
+ "loss": 0.1031,
91401
+ "step": 13056
91402
+ },
91403
+ {
91404
+ "epoch": 0.9764798264966533,
91405
+ "grad_norm": 0.42557287216186523,
91406
+ "learning_rate": 1.3806760051094935e-07,
91407
+ "loss": 0.0271,
91408
+ "step": 13057
91409
+ },
91410
+ {
91411
+ "epoch": 0.9765546124219422,
91412
+ "grad_norm": 0.8260351419448853,
91413
+ "learning_rate": 1.3718999111513308e-07,
91414
+ "loss": 0.0546,
91415
+ "step": 13058
91416
+ },
91417
+ {
91418
+ "epoch": 0.9766293983472311,
91419
+ "grad_norm": 0.7049921154975891,
91420
+ "learning_rate": 1.3631517599463907e-07,
91421
+ "loss": 0.0798,
91422
+ "step": 13059
91423
+ },
91424
+ {
91425
+ "epoch": 0.9767041842725199,
91426
+ "grad_norm": 0.5924288034439087,
91427
+ "learning_rate": 1.3544315519848916e-07,
91428
+ "loss": 0.0518,
91429
+ "step": 13060
91430
+ },
91431
+ {
91432
+ "epoch": 0.9767789701978088,
91433
+ "grad_norm": 1.14959716796875,
91434
+ "learning_rate": 1.3457392877553876e-07,
91435
+ "loss": 0.0876,
91436
+ "step": 13061
91437
+ },
91438
+ {
91439
+ "epoch": 0.9768537561230977,
91440
+ "grad_norm": 0.4802398085594177,
91441
+ "learning_rate": 1.3370749677451554e-07,
91442
+ "loss": 0.0389,
91443
+ "step": 13062
91444
+ },
91445
+ {
91446
+ "epoch": 0.9769285420483865,
91447
+ "grad_norm": 0.6689320802688599,
91448
+ "learning_rate": 1.3284385924396403e-07,
91449
+ "loss": 0.0361,
91450
+ "step": 13063
91451
+ },
91452
+ {
91453
+ "epoch": 0.9770033279736754,
91454
+ "grad_norm": 0.6408694386482239,
91455
+ "learning_rate": 1.3198301623227883e-07,
91456
+ "loss": 0.0544,
91457
+ "step": 13064
91458
+ },
91459
+ {
91460
+ "epoch": 0.9770781138989643,
91461
+ "grad_norm": 0.6966913342475891,
91462
+ "learning_rate": 1.3112496778771022e-07,
91463
+ "loss": 0.0607,
91464
+ "step": 13065
91465
+ },
91466
+ {
91467
+ "epoch": 0.9771528998242531,
91468
+ "grad_norm": 0.8057048916816711,
91469
+ "learning_rate": 1.3026971395833642e-07,
91470
+ "loss": 0.0675,
91471
+ "step": 13066
91472
+ },
91473
+ {
91474
+ "epoch": 0.9772276857495419,
91475
+ "grad_norm": 0.645700216293335,
91476
+ "learning_rate": 1.2941725479208578e-07,
91477
+ "loss": 0.0397,
91478
+ "step": 13067
91479
+ },
91480
+ {
91481
+ "epoch": 0.9773024716748308,
91482
+ "grad_norm": 0.812199056148529,
91483
+ "learning_rate": 1.2856759033673116e-07,
91484
+ "loss": 0.0548,
91485
+ "step": 13068
91486
+ },
91487
+ {
91488
+ "epoch": 0.9773772576001196,
91489
+ "grad_norm": 1.3543890714645386,
91490
+ "learning_rate": 1.2772072063989003e-07,
91491
+ "loss": 0.0872,
91492
+ "step": 13069
91493
+ },
91494
+ {
91495
+ "epoch": 0.9774520435254085,
91496
+ "grad_norm": 0.48335230350494385,
91497
+ "learning_rate": 1.2687664574901336e-07,
91498
+ "loss": 0.0335,
91499
+ "step": 13070
91500
+ },
91501
+ {
91502
+ "epoch": 0.9775268294506974,
91503
+ "grad_norm": 0.7605381011962891,
91504
+ "learning_rate": 1.2603536571140773e-07,
91505
+ "loss": 0.0757,
91506
+ "step": 13071
91507
+ },
91508
+ {
91509
+ "epoch": 0.9776016153759862,
91510
+ "grad_norm": 0.8594883680343628,
91511
+ "learning_rate": 1.2519688057421318e-07,
91512
+ "loss": 0.0716,
91513
+ "step": 13072
91514
+ },
91515
+ {
91516
+ "epoch": 0.9776764013012751,
91517
+ "grad_norm": 0.8642313480377197,
91518
+ "learning_rate": 1.2436119038441996e-07,
91519
+ "loss": 0.0716,
91520
+ "step": 13073
91521
+ },
91522
+ {
91523
+ "epoch": 0.977751187226564,
91524
+ "grad_norm": 0.7008442282676697,
91525
+ "learning_rate": 1.2352829518886831e-07,
91526
+ "loss": 0.0606,
91527
+ "step": 13074
91528
+ },
91529
+ {
91530
+ "epoch": 0.9778259731518528,
91531
+ "grad_norm": 0.6041141748428345,
91532
+ "learning_rate": 1.2269819503421542e-07,
91533
+ "loss": 0.032,
91534
+ "step": 13075
91535
+ },
91536
+ {
91537
+ "epoch": 0.9779007590771417,
91538
+ "grad_norm": 0.73857182264328,
91539
+ "learning_rate": 1.2187088996699625e-07,
91540
+ "loss": 0.0572,
91541
+ "step": 13076
91542
+ },
91543
+ {
91544
+ "epoch": 0.9779755450024306,
91545
+ "grad_norm": 0.7224127650260925,
91546
+ "learning_rate": 1.2104638003355705e-07,
91547
+ "loss": 0.0564,
91548
+ "step": 13077
91549
+ },
91550
+ {
91551
+ "epoch": 0.9780503309277194,
91552
+ "grad_norm": 0.49023354053497314,
91553
+ "learning_rate": 1.2022466528011645e-07,
91554
+ "loss": 0.0395,
91555
+ "step": 13078
91556
+ },
91557
+ {
91558
+ "epoch": 0.9781251168530083,
91559
+ "grad_norm": 0.6345604658126831,
91560
+ "learning_rate": 1.194057457527209e-07,
91561
+ "loss": 0.0457,
91562
+ "step": 13079
91563
+ },
91564
+ {
91565
+ "epoch": 0.9781999027782972,
91566
+ "grad_norm": 0.7459271550178528,
91567
+ "learning_rate": 1.185896214972504e-07,
91568
+ "loss": 0.0605,
91569
+ "step": 13080
91570
+ },
91571
+ {
91572
+ "epoch": 0.978274688703586,
91573
+ "grad_norm": 0.9838876724243164,
91574
+ "learning_rate": 1.1777629255945167e-07,
91575
+ "loss": 0.0737,
91576
+ "step": 13081
91577
+ },
91578
+ {
91579
+ "epoch": 0.9783494746288749,
91580
+ "grad_norm": 0.6713008880615234,
91581
+ "learning_rate": 1.1696575898489936e-07,
91582
+ "loss": 0.0688,
91583
+ "step": 13082
91584
+ },
91585
+ {
91586
+ "epoch": 0.9784242605541638,
91587
+ "grad_norm": 0.8351396918296814,
91588
+ "learning_rate": 1.1615802081901272e-07,
91589
+ "loss": 0.0405,
91590
+ "step": 13083
91591
+ },
91592
+ {
91593
+ "epoch": 0.9784990464794525,
91594
+ "grad_norm": 0.7137233018875122,
91595
+ "learning_rate": 1.1535307810706108e-07,
91596
+ "loss": 0.0456,
91597
+ "step": 13084
91598
+ },
91599
+ {
91600
+ "epoch": 0.9785738324047414,
91601
+ "grad_norm": 0.57951420545578,
91602
+ "learning_rate": 1.1455093089415281e-07,
91603
+ "loss": 0.031,
91604
+ "step": 13085
91605
+ },
91606
+ {
91607
+ "epoch": 0.9786486183300303,
91608
+ "grad_norm": 0.938587486743927,
91609
+ "learning_rate": 1.1375157922523527e-07,
91610
+ "loss": 0.1027,
91611
+ "step": 13086
91612
+ },
91613
+ {
91614
+ "epoch": 0.9787234042553191,
91615
+ "grad_norm": 0.7072553038597107,
91616
+ "learning_rate": 1.1295502314510598e-07,
91617
+ "loss": 0.0458,
91618
+ "step": 13087
91619
+ },
91620
+ {
91621
+ "epoch": 0.978798190180608,
91622
+ "grad_norm": 0.5360969305038452,
91623
+ "learning_rate": 1.1216126269840699e-07,
91624
+ "loss": 0.0417,
91625
+ "step": 13088
91626
+ },
91627
+ {
91628
+ "epoch": 0.9788729761058969,
91629
+ "grad_norm": 0.8541834950447083,
91630
+ "learning_rate": 1.1137029792961384e-07,
91631
+ "loss": 0.0572,
91632
+ "step": 13089
91633
+ },
91634
+ {
91635
+ "epoch": 0.9789477620311857,
91636
+ "grad_norm": 0.8962938189506531,
91637
+ "learning_rate": 1.105821288830522e-07,
91638
+ "loss": 0.0871,
91639
+ "step": 13090
91640
+ },
91641
+ {
91642
+ "epoch": 0.9790225479564746,
91643
+ "grad_norm": 1.4392637014389038,
91644
+ "learning_rate": 1.0979675560289781e-07,
91645
+ "loss": 0.1042,
91646
+ "step": 13091
91647
+ },
91648
+ {
91649
+ "epoch": 0.9790973338817635,
91650
+ "grad_norm": 1.2927100658416748,
91651
+ "learning_rate": 1.090141781331544e-07,
91652
+ "loss": 0.1091,
91653
+ "step": 13092
91654
+ },
91655
+ {
91656
+ "epoch": 0.9791721198070523,
91657
+ "grad_norm": 1.6105395555496216,
91658
+ "learning_rate": 1.0823439651767575e-07,
91659
+ "loss": 0.1397,
91660
+ "step": 13093
91661
+ },
91662
+ {
91663
+ "epoch": 0.9792469057323412,
91664
+ "grad_norm": 1.2756192684173584,
91665
+ "learning_rate": 1.0745741080017135e-07,
91666
+ "loss": 0.0866,
91667
+ "step": 13094
91668
+ },
91669
+ {
91670
+ "epoch": 0.9793216916576301,
91671
+ "grad_norm": 1.3140145540237427,
91672
+ "learning_rate": 1.0668322102417305e-07,
91673
+ "loss": 0.1108,
91674
+ "step": 13095
91675
+ },
91676
+ {
91677
+ "epoch": 0.9793964775829189,
91678
+ "grad_norm": 1.489345908164978,
91679
+ "learning_rate": 1.0591182723306836e-07,
91680
+ "loss": 0.1239,
91681
+ "step": 13096
91682
+ },
91683
+ {
91684
+ "epoch": 0.9794712635082078,
91685
+ "grad_norm": 0.8814570903778076,
91686
+ "learning_rate": 1.0514322947008382e-07,
91687
+ "loss": 0.0919,
91688
+ "step": 13097
91689
+ },
91690
+ {
91691
+ "epoch": 0.9795460494334967,
91692
+ "grad_norm": 1.595694661140442,
91693
+ "learning_rate": 1.0437742777829606e-07,
91694
+ "loss": 0.1223,
91695
+ "step": 13098
91696
+ },
91697
+ {
91698
+ "epoch": 0.9796208353587855,
91699
+ "grad_norm": 1.1279810667037964,
91700
+ "learning_rate": 1.0361442220061524e-07,
91701
+ "loss": 0.0767,
91702
+ "step": 13099
91703
+ },
91704
+ {
91705
+ "epoch": 0.9796956212840744,
91706
+ "grad_norm": 2.32450270652771,
91707
+ "learning_rate": 1.0285421277980156e-07,
91708
+ "loss": 0.2051,
91709
+ "step": 13100
91710
+ },
91711
+ {
91712
+ "epoch": 0.9797704072093631,
91713
+ "grad_norm": 0.7000548243522644,
91714
+ "learning_rate": 1.0209679955845985e-07,
91715
+ "loss": 0.0823,
91716
+ "step": 13101
91717
+ },
91718
+ {
91719
+ "epoch": 0.979845193134652,
91720
+ "grad_norm": 0.5720806121826172,
91721
+ "learning_rate": 1.013421825790284e-07,
91722
+ "loss": 0.0701,
91723
+ "step": 13102
91724
+ },
91725
+ {
91726
+ "epoch": 0.9799199790599409,
91727
+ "grad_norm": 0.9151096343994141,
91728
+ "learning_rate": 1.0059036188380111e-07,
91729
+ "loss": 0.0914,
91730
+ "step": 13103
91731
+ },
91732
+ {
91733
+ "epoch": 0.9799947649852297,
91734
+ "grad_norm": 0.5501378774642944,
91735
+ "learning_rate": 9.984133751490543e-08,
91736
+ "loss": 0.05,
91737
+ "step": 13104
91738
+ },
91739
+ {
91740
+ "epoch": 0.9800695509105186,
91741
+ "grad_norm": 0.6816980838775635,
91742
+ "learning_rate": 9.909510951431888e-08,
91743
+ "loss": 0.041,
91744
+ "step": 13105
91745
+ },
91746
+ {
91747
+ "epoch": 0.9801443368358075,
91748
+ "grad_norm": 0.47302597761154175,
91749
+ "learning_rate": 9.835167792385803e-08,
91750
+ "loss": 0.0259,
91751
+ "step": 13106
91752
+ },
91753
+ {
91754
+ "epoch": 0.9802191227610964,
91755
+ "grad_norm": 0.5860419273376465,
91756
+ "learning_rate": 9.761104278518396e-08,
91757
+ "loss": 0.0405,
91758
+ "step": 13107
91759
+ },
91760
+ {
91761
+ "epoch": 0.9802939086863852,
91762
+ "grad_norm": 0.7843829989433289,
91763
+ "learning_rate": 9.687320413980239e-08,
91764
+ "loss": 0.0765,
91765
+ "step": 13108
91766
+ },
91767
+ {
91768
+ "epoch": 0.9803686946116741,
91769
+ "grad_norm": 0.48027125000953674,
91770
+ "learning_rate": 9.613816202906356e-08,
91771
+ "loss": 0.0365,
91772
+ "step": 13109
91773
+ },
91774
+ {
91775
+ "epoch": 0.980443480536963,
91776
+ "grad_norm": 1.1443341970443726,
91777
+ "learning_rate": 9.54059164941512e-08,
91778
+ "loss": 0.08,
91779
+ "step": 13110
91780
+ },
91781
+ {
91782
+ "epoch": 0.9805182664622518,
91783
+ "grad_norm": 0.6189633011817932,
91784
+ "learning_rate": 9.467646757611026e-08,
91785
+ "loss": 0.0579,
91786
+ "step": 13111
91787
+ },
91788
+ {
91789
+ "epoch": 0.9805930523875407,
91790
+ "grad_norm": 0.576843798160553,
91791
+ "learning_rate": 9.394981531580804e-08,
91792
+ "loss": 0.0628,
91793
+ "step": 13112
91794
+ },
91795
+ {
91796
+ "epoch": 0.9806678383128296,
91797
+ "grad_norm": 0.6445755362510681,
91798
+ "learning_rate": 9.322595975397308e-08,
91799
+ "loss": 0.1089,
91800
+ "step": 13113
91801
+ },
91802
+ {
91803
+ "epoch": 0.9807426242381184,
91804
+ "grad_norm": 0.6944069862365723,
91805
+ "learning_rate": 9.250490093116737e-08,
91806
+ "loss": 0.0606,
91807
+ "step": 13114
91808
+ },
91809
+ {
91810
+ "epoch": 0.9808174101634073,
91811
+ "grad_norm": 0.5240738987922668,
91812
+ "learning_rate": 9.178663888779748e-08,
91813
+ "loss": 0.0372,
91814
+ "step": 13115
91815
+ },
91816
+ {
91817
+ "epoch": 0.9808921960886962,
91818
+ "grad_norm": 0.5983749628067017,
91819
+ "learning_rate": 9.107117366411456e-08,
91820
+ "loss": 0.0414,
91821
+ "step": 13116
91822
+ },
91823
+ {
91824
+ "epoch": 0.980966982013985,
91825
+ "grad_norm": 0.6702715158462524,
91826
+ "learning_rate": 9.035850530021428e-08,
91827
+ "loss": 0.0489,
91828
+ "step": 13117
91829
+ },
91830
+ {
91831
+ "epoch": 0.9810417679392738,
91832
+ "grad_norm": 0.44082406163215637,
91833
+ "learning_rate": 8.96486338360314e-08,
91834
+ "loss": 0.0266,
91835
+ "step": 13118
91836
+ },
91837
+ {
91838
+ "epoch": 0.9811165538645626,
91839
+ "grad_norm": 0.904138445854187,
91840
+ "learning_rate": 8.894155931135073e-08,
91841
+ "loss": 0.0778,
91842
+ "step": 13119
91843
+ },
91844
+ {
91845
+ "epoch": 0.9811913397898515,
91846
+ "grad_norm": 0.7605939507484436,
91847
+ "learning_rate": 8.82372817657906e-08,
91848
+ "loss": 0.1153,
91849
+ "step": 13120
91850
+ },
91851
+ {
91852
+ "epoch": 0.9812661257151404,
91853
+ "grad_norm": 0.9398657083511353,
91854
+ "learning_rate": 8.753580123882499e-08,
91855
+ "loss": 0.0912,
91856
+ "step": 13121
91857
+ },
91858
+ {
91859
+ "epoch": 0.9813409116404292,
91860
+ "grad_norm": 0.44631102681159973,
91861
+ "learning_rate": 8.683711776976133e-08,
91862
+ "loss": 0.0276,
91863
+ "step": 13122
91864
+ },
91865
+ {
91866
+ "epoch": 0.9814156975657181,
91867
+ "grad_norm": 0.7926178574562073,
91868
+ "learning_rate": 8.61412313977461e-08,
91869
+ "loss": 0.0831,
91870
+ "step": 13123
91871
+ },
91872
+ {
91873
+ "epoch": 0.981490483491007,
91874
+ "grad_norm": 0.8973842859268188,
91875
+ "learning_rate": 8.544814216179253e-08,
91876
+ "loss": 0.0874,
91877
+ "step": 13124
91878
+ },
91879
+ {
91880
+ "epoch": 0.9815652694162958,
91881
+ "grad_norm": 0.8152311444282532,
91882
+ "learning_rate": 8.475785010072512e-08,
91883
+ "loss": 0.0522,
91884
+ "step": 13125
91885
+ },
91886
+ {
91887
+ "epoch": 0.9816400553415847,
91888
+ "grad_norm": 0.8099016547203064,
91889
+ "learning_rate": 8.40703552532296e-08,
91890
+ "loss": 0.0697,
91891
+ "step": 13126
91892
+ },
91893
+ {
91894
+ "epoch": 0.9817148412668736,
91895
+ "grad_norm": 1.021374225616455,
91896
+ "learning_rate": 8.338565765784179e-08,
91897
+ "loss": 0.1113,
91898
+ "step": 13127
91899
+ },
91900
+ {
91901
+ "epoch": 0.9817896271921625,
91902
+ "grad_norm": 1.1013946533203125,
91903
+ "learning_rate": 8.270375735292547e-08,
91904
+ "loss": 0.1259,
91905
+ "step": 13128
91906
+ },
91907
+ {
91908
+ "epoch": 0.9818644131174513,
91909
+ "grad_norm": 0.6119487881660461,
91910
+ "learning_rate": 8.202465437669449e-08,
91911
+ "loss": 0.0635,
91912
+ "step": 13129
91913
+ },
91914
+ {
91915
+ "epoch": 0.9819391990427402,
91916
+ "grad_norm": 0.6561717391014099,
91917
+ "learning_rate": 8.134834876720732e-08,
91918
+ "loss": 0.0358,
91919
+ "step": 13130
91920
+ },
91921
+ {
91922
+ "epoch": 0.982013984968029,
91923
+ "grad_norm": 0.8759846687316895,
91924
+ "learning_rate": 8.067484056235586e-08,
91925
+ "loss": 0.0563,
91926
+ "step": 13131
91927
+ },
91928
+ {
91929
+ "epoch": 0.9820887708933179,
91930
+ "grad_norm": 0.6102621555328369,
91931
+ "learning_rate": 8.000412979989325e-08,
91932
+ "loss": 0.0394,
91933
+ "step": 13132
91934
+ },
91935
+ {
91936
+ "epoch": 0.9821635568186068,
91937
+ "grad_norm": 0.9649103283882141,
91938
+ "learning_rate": 7.933621651740053e-08,
91939
+ "loss": 0.1023,
91940
+ "step": 13133
91941
+ },
91942
+ {
91943
+ "epoch": 0.9822383427438957,
91944
+ "grad_norm": 1.011826753616333,
91945
+ "learning_rate": 7.867110075231444e-08,
91946
+ "loss": 0.0957,
91947
+ "step": 13134
91948
+ },
91949
+ {
91950
+ "epoch": 0.9823131286691844,
91951
+ "grad_norm": 0.8232734799385071,
91952
+ "learning_rate": 7.800878254188848e-08,
91953
+ "loss": 0.0801,
91954
+ "step": 13135
91955
+ },
91956
+ {
91957
+ "epoch": 0.9823879145944733,
91958
+ "grad_norm": 0.6169044971466064,
91959
+ "learning_rate": 7.734926192325965e-08,
91960
+ "loss": 0.0352,
91961
+ "step": 13136
91962
+ },
91963
+ {
91964
+ "epoch": 0.9824627005197621,
91965
+ "grad_norm": 0.6818606853485107,
91966
+ "learning_rate": 7.669253893337614e-08,
91967
+ "loss": 0.0494,
91968
+ "step": 13137
91969
+ },
91970
+ {
91971
+ "epoch": 0.982537486445051,
91972
+ "grad_norm": 1.4228134155273438,
91973
+ "learning_rate": 7.603861360904186e-08,
91974
+ "loss": 0.3075,
91975
+ "step": 13138
91976
+ },
91977
+ {
91978
+ "epoch": 0.9826122723703399,
91979
+ "grad_norm": 0.8413154482841492,
91980
+ "learning_rate": 7.538748598690526e-08,
91981
+ "loss": 0.0662,
91982
+ "step": 13139
91983
+ },
91984
+ {
91985
+ "epoch": 0.9826870582956287,
91986
+ "grad_norm": 0.8035567402839661,
91987
+ "learning_rate": 7.473915610344828e-08,
91988
+ "loss": 0.0769,
91989
+ "step": 13140
91990
+ },
91991
+ {
91992
+ "epoch": 0.9827618442209176,
91993
+ "grad_norm": 0.9257685542106628,
91994
+ "learning_rate": 7.409362399501407e-08,
91995
+ "loss": 0.0857,
91996
+ "step": 13141
91997
+ },
91998
+ {
91999
+ "epoch": 0.9828366301462065,
92000
+ "grad_norm": 2.2359378337860107,
92001
+ "learning_rate": 7.345088969776259e-08,
92002
+ "loss": 0.2031,
92003
+ "step": 13142
92004
+ },
92005
+ {
92006
+ "epoch": 0.9829114160714953,
92007
+ "grad_norm": 0.49772417545318604,
92008
+ "learning_rate": 7.281095324772613e-08,
92009
+ "loss": 0.0257,
92010
+ "step": 13143
92011
+ },
92012
+ {
92013
+ "epoch": 0.9829862019967842,
92014
+ "grad_norm": 0.7169822454452515,
92015
+ "learning_rate": 7.217381468075934e-08,
92016
+ "loss": 0.0399,
92017
+ "step": 13144
92018
+ },
92019
+ {
92020
+ "epoch": 0.9830609879220731,
92021
+ "grad_norm": 1.068713903427124,
92022
+ "learning_rate": 7.1539474032567e-08,
92023
+ "loss": 0.0722,
92024
+ "step": 13145
92025
  }
92026
  ],
92027
  "logging_steps": 1,
 
92041
  "attributes": {}
92042
  }
92043
  },
92044
+ "total_flos": 9.048973682147328e+16,
92045
  "train_batch_size": 4,
92046
  "trial_name": null,
92047
  "trial_params": null