cimol commited on
Commit
e7a8046
·
verified ·
1 Parent(s): 749808f

Training in progress, step 81, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6915d42db371058ef32156ea0decca77d5b1559401f47183d68360b6c2a6b80e
3
  size 645975704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b8846489eb25da7868d33d3c2fb9373b953ff5157c7f5136e444dd20cf1b9a9
3
  size 645975704
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55365e54885b298af3274c60138d308d632fda42a05db76b0bcefb67fcba0abc
3
  size 328468404
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8009d9e5c44c7201a70ed9b7d64a2f80a641db42b0fd02cdd43bc700fc14840
3
  size 328468404
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5f5c2c2a16e860375be0e9f837ec1869b4e6d5aae0646458ea20bbc2be252ab
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a8b1a975fb264be0975b1b22a3134f349edb51d892d5238d951b2fb9cf37b99
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5933e6699ee8c37d0d6cf7c18bfade48c78d2e4195e4053ab5ac56f6ab72ca12
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15c83c4658d3ed3264e77fd045cb34ce040430ae8e59f34b160be9f9a7916ece
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.7665985822677612,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 1.8518518518518519,
5
  "eval_steps": 50,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -373,6 +373,223 @@
373
  "eval_samples_per_second": 13.64,
374
  "eval_steps_per_second": 3.558,
375
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  }
377
  ],
378
  "logging_steps": 1,
@@ -396,12 +613,12 @@
396
  "should_evaluate": false,
397
  "should_log": false,
398
  "should_save": true,
399
- "should_training_stop": false
400
  },
401
  "attributes": {}
402
  }
403
  },
404
- "total_flos": 7.460472195907584e+16,
405
  "train_batch_size": 8,
406
  "trial_name": null,
407
  "trial_params": null
 
1
  {
2
  "best_metric": 1.7665985822677612,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 3.0,
5
  "eval_steps": 50,
6
+ "global_step": 81,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
373
  "eval_samples_per_second": 13.64,
374
  "eval_steps_per_second": 3.558,
375
  "step": 50
376
+ },
377
+ {
378
+ "epoch": 1.8888888888888888,
379
+ "grad_norm": 1.49898099899292,
380
+ "learning_rate": 2.6566130414018495e-05,
381
+ "loss": 1.427,
382
+ "step": 51
383
+ },
384
+ {
385
+ "epoch": 1.925925925925926,
386
+ "grad_norm": 1.2517292499542236,
387
+ "learning_rate": 2.5071837758299613e-05,
388
+ "loss": 1.124,
389
+ "step": 52
390
+ },
391
+ {
392
+ "epoch": 1.9629629629629628,
393
+ "grad_norm": 1.6222271919250488,
394
+ "learning_rate": 2.359697994639589e-05,
395
+ "loss": 1.318,
396
+ "step": 53
397
+ },
398
+ {
399
+ "epoch": 2.0,
400
+ "grad_norm": 1.829454779624939,
401
+ "learning_rate": 2.2144444081733517e-05,
402
+ "loss": 1.7384,
403
+ "step": 54
404
+ },
405
+ {
406
+ "epoch": 2.037037037037037,
407
+ "grad_norm": 1.2274819612503052,
408
+ "learning_rate": 2.071707357147872e-05,
409
+ "loss": 0.9252,
410
+ "step": 55
411
+ },
412
+ {
413
+ "epoch": 2.074074074074074,
414
+ "grad_norm": 1.5283606052398682,
415
+ "learning_rate": 1.931766256043475e-05,
416
+ "loss": 1.0436,
417
+ "step": 56
418
+ },
419
+ {
420
+ "epoch": 2.111111111111111,
421
+ "grad_norm": 1.5208336114883423,
422
+ "learning_rate": 1.7948950461372128e-05,
423
+ "loss": 1.2807,
424
+ "step": 57
425
+ },
426
+ {
427
+ "epoch": 2.148148148148148,
428
+ "grad_norm": 1.5189377069473267,
429
+ "learning_rate": 1.6613616592499547e-05,
430
+ "loss": 0.9818,
431
+ "step": 58
432
+ },
433
+ {
434
+ "epoch": 2.185185185185185,
435
+ "grad_norm": 1.881172776222229,
436
+ "learning_rate": 1.5314274932572676e-05,
437
+ "loss": 1.1691,
438
+ "step": 59
439
+ },
440
+ {
441
+ "epoch": 2.2222222222222223,
442
+ "grad_norm": 1.8424291610717773,
443
+ "learning_rate": 1.4053469003907992e-05,
444
+ "loss": 1.246,
445
+ "step": 60
446
+ },
447
+ {
448
+ "epoch": 2.259259259259259,
449
+ "grad_norm": 1.9025232791900635,
450
+ "learning_rate": 1.2833666893318349e-05,
451
+ "loss": 1.0384,
452
+ "step": 61
453
+ },
454
+ {
455
+ "epoch": 2.2962962962962963,
456
+ "grad_norm": 1.7848095893859863,
457
+ "learning_rate": 1.165725642071722e-05,
458
+ "loss": 1.0459,
459
+ "step": 62
460
+ },
461
+ {
462
+ "epoch": 2.3333333333333335,
463
+ "grad_norm": 1.7614284753799438,
464
+ "learning_rate": 1.0526540464849008e-05,
465
+ "loss": 1.1616,
466
+ "step": 63
467
+ },
468
+ {
469
+ "epoch": 2.3703703703703702,
470
+ "grad_norm": 2.083524465560913,
471
+ "learning_rate": 9.443732455295803e-06,
472
+ "loss": 1.2212,
473
+ "step": 64
474
+ },
475
+ {
476
+ "epoch": 2.4074074074074074,
477
+ "grad_norm": 2.120060682296753,
478
+ "learning_rate": 8.410952039585034e-06,
479
+ "loss": 1.2193,
480
+ "step": 65
481
+ },
482
+ {
483
+ "epoch": 2.4444444444444446,
484
+ "grad_norm": 1.8416856527328491,
485
+ "learning_rate": 7.430220933879868e-06,
486
+ "loss": 1.0275,
487
+ "step": 66
488
+ },
489
+ {
490
+ "epoch": 2.4814814814814814,
491
+ "grad_norm": 1.6970349550247192,
492
+ "learning_rate": 6.503458965374907e-06,
493
+ "loss": 0.9866,
494
+ "step": 67
495
+ },
496
+ {
497
+ "epoch": 2.5185185185185186,
498
+ "grad_norm": 1.7311978340148926,
499
+ "learning_rate": 5.632480314144302e-06,
500
+ "loss": 1.1222,
501
+ "step": 68
502
+ },
503
+ {
504
+ "epoch": 2.5555555555555554,
505
+ "grad_norm": 1.8647996187210083,
506
+ "learning_rate": 4.818989961799024e-06,
507
+ "loss": 1.1971,
508
+ "step": 69
509
+ },
510
+ {
511
+ "epoch": 2.5925925925925926,
512
+ "grad_norm": 1.8562637567520142,
513
+ "learning_rate": 4.064580353905361e-06,
514
+ "loss": 1.0966,
515
+ "step": 70
516
+ },
517
+ {
518
+ "epoch": 2.6296296296296298,
519
+ "grad_norm": 2.1647939682006836,
520
+ "learning_rate": 3.3707282826978684e-06,
521
+ "loss": 1.2084,
522
+ "step": 71
523
+ },
524
+ {
525
+ "epoch": 2.6666666666666665,
526
+ "grad_norm": 2.267409563064575,
527
+ "learning_rate": 2.7387919961892603e-06,
528
+ "loss": 1.2536,
529
+ "step": 72
530
+ },
531
+ {
532
+ "epoch": 2.7037037037037037,
533
+ "grad_norm": 1.875108003616333,
534
+ "learning_rate": 2.170008539336139e-06,
535
+ "loss": 1.0385,
536
+ "step": 73
537
+ },
538
+ {
539
+ "epoch": 2.7407407407407405,
540
+ "grad_norm": 1.690350890159607,
541
+ "learning_rate": 1.665491332465404e-06,
542
+ "loss": 0.9002,
543
+ "step": 74
544
+ },
545
+ {
546
+ "epoch": 2.7777777777777777,
547
+ "grad_norm": 1.8636929988861084,
548
+ "learning_rate": 1.2262279917016548e-06,
549
+ "loss": 1.1853,
550
+ "step": 75
551
+ },
552
+ {
553
+ "epoch": 2.814814814814815,
554
+ "grad_norm": 1.7024513483047485,
555
+ "learning_rate": 8.530783956622628e-07,
556
+ "loss": 1.1387,
557
+ "step": 76
558
+ },
559
+ {
560
+ "epoch": 2.851851851851852,
561
+ "grad_norm": 1.7570838928222656,
562
+ "learning_rate": 5.467730022046046e-07,
563
+ "loss": 0.9747,
564
+ "step": 77
565
+ },
566
+ {
567
+ "epoch": 2.888888888888889,
568
+ "grad_norm": 1.8747435808181763,
569
+ "learning_rate": 3.0791141852049006e-07,
570
+ "loss": 1.1896,
571
+ "step": 78
572
+ },
573
+ {
574
+ "epoch": 2.925925925925926,
575
+ "grad_norm": 1.7294906377792358,
576
+ "learning_rate": 1.369612273769316e-07,
577
+ "loss": 0.9973,
578
+ "step": 79
579
+ },
580
+ {
581
+ "epoch": 2.962962962962963,
582
+ "grad_norm": 1.958431363105774,
583
+ "learning_rate": 3.4257071800923855e-08,
584
+ "loss": 1.2993,
585
+ "step": 80
586
+ },
587
+ {
588
+ "epoch": 3.0,
589
+ "grad_norm": 1.917493462562561,
590
+ "learning_rate": 0.0,
591
+ "loss": 1.2325,
592
+ "step": 81
593
  }
594
  ],
595
  "logging_steps": 1,
 
613
  "should_evaluate": false,
614
  "should_log": false,
615
  "should_save": true,
616
+ "should_training_stop": true
617
  },
618
  "attributes": {}
619
  }
620
  },
621
+ "total_flos": 1.2072723618988032e+17,
622
  "train_batch_size": 8,
623
  "trial_name": null,
624
  "trial_params": null