nttx commited on
Commit
d7ceb43
·
verified ·
1 Parent(s): 5820b17

Training in progress, step 2850, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66cf16cd354c25c44c4affc7ad3953ecc9f969c500d44e5a55a98a3083ff528c
3
  size 39131224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e09c2a5dcf2e7fc1395c6f500fd04f6e9ed509fdd50cd012a60a68626bc519e
3
  size 39131224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4331c4dc32baee3c4394c03eab5c75389c7b5fc5069e5147279ab8c9ae82e89d
3
  size 78510334
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:489e57bb05f8b7b131b825404e7f5532f91305b731eb0b5d4894936cbda854c0
3
  size 78510334
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4aa67eb9bf7ad79dc7716ba58f0306c8677d78abe85ca7f2754cd79281aaf012
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1c26ee950e9a69b9213b7e74235a077ae09cbcb03643e4b3877a05d223c05e8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:126ec9e64208d79536bffb656694fa4cf727027deefd223caf2c667dc5c78bc8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f76a102ac755d5bd19e6f0e55ad50e49a03a307409b32eeac7fb5a2a13752ba
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7818862795829773,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-2700",
4
- "epoch": 4.007419773696903,
5
  "eval_steps": 150,
6
- "global_step": 2700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -537,6 +537,35 @@
537
  "eval_samples_per_second": 40.417,
538
  "eval_steps_per_second": 20.209,
539
  "step": 2700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
  }
541
  ],
542
  "logging_steps": 50,
@@ -551,7 +580,7 @@
551
  "early_stopping_threshold": 0.0
552
  },
553
  "attributes": {
554
- "early_stopping_patience_counter": 0
555
  }
556
  },
557
  "TrainerControl": {
@@ -565,7 +594,7 @@
565
  "attributes": {}
566
  }
567
  },
568
- "total_flos": 3.07814914326528e+16,
569
  "train_batch_size": 2,
570
  "trial_name": null,
571
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7818862795829773,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-2700",
4
+ "epoch": 4.230012984603969,
5
  "eval_steps": 150,
6
+ "global_step": 2850,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
537
  "eval_samples_per_second": 40.417,
538
  "eval_steps_per_second": 20.209,
539
  "step": 2700
540
+ },
541
+ {
542
+ "epoch": 4.081617510665924,
543
+ "grad_norm": 0.18808983266353607,
544
+ "learning_rate": 3.5232131185484076e-06,
545
+ "loss": 0.7197,
546
+ "step": 2750
547
+ },
548
+ {
549
+ "epoch": 4.155815247634947,
550
+ "grad_norm": 0.1956692934036255,
551
+ "learning_rate": 2.259661018213333e-06,
552
+ "loss": 0.7152,
553
+ "step": 2800
554
+ },
555
+ {
556
+ "epoch": 4.230012984603969,
557
+ "grad_norm": 0.1999446451663971,
558
+ "learning_rate": 1.2731645278655445e-06,
559
+ "loss": 0.7127,
560
+ "step": 2850
561
+ },
562
+ {
563
+ "epoch": 4.230012984603969,
564
+ "eval_loss": 0.7822389006614685,
565
+ "eval_runtime": 14.2177,
566
+ "eval_samples_per_second": 39.95,
567
+ "eval_steps_per_second": 19.975,
568
+ "step": 2850
569
  }
570
  ],
571
  "logging_steps": 50,
 
580
  "early_stopping_threshold": 0.0
581
  },
582
  "attributes": {
583
+ "early_stopping_patience_counter": 1
584
  }
585
  },
586
  "TrainerControl": {
 
594
  "attributes": {}
595
  }
596
  },
597
+ "total_flos": 3.24915742900224e+16,
598
  "train_batch_size": 2,
599
  "trial_name": null,
600
  "trial_params": null