JoshMe1 commited on
Commit
12738d4
·
verified ·
1 Parent(s): d4ba22d

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9922e1874360c5d3ded2c9ed2fd99b63837902088c92eda9829f69ff472cd492
3
  size 406863720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fde924ed5b2854fc50c7f4413752cdc2607456bd0589555fb2e0283347f9026
3
  size 406863720
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2bae67899bf5f6de8b4b2c4478b5f9b8531b9f6eea93f8911f3fd1ed75119f9
3
  size 813846202
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3a07fc0c34fe9f0a0ec5c8add79702d0ca9bbcdc4720dfec2a75113c11ba395
3
  size 813846202
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd500a52508235aa1058024023c5d8d67f7eeca6132f7f072b4968a268b130f2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c908825aa92f46140b6d696f8d61661b3051ae819a433addd295d1cfeb1a5121
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ad14be3cfc4bdd4617df51db7c5a619cc49827d154811e9ceac7d05200dbc79
3
  size 1192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e20bc151958b8024ee9b8d86cd744a0bdd0c11aca692c6be0481a19a04fd29d6
3
  size 1192
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.1825116872787476,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-400",
4
- "epoch": 0.03181199113240747,
5
  "eval_steps": 100,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -327,6 +327,84 @@
327
  "eval_samples_per_second": 1.991,
328
  "eval_steps_per_second": 0.996,
329
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  }
331
  ],
332
  "logging_steps": 10,
@@ -350,12 +428,12 @@
350
  "should_evaluate": false,
351
  "should_log": false,
352
  "should_save": true,
353
- "should_training_stop": false
354
  },
355
  "attributes": {}
356
  }
357
  },
358
- "total_flos": 3.3238963080658944e+17,
359
  "train_batch_size": 2,
360
  "trial_name": null,
361
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.1672043800354004,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
+ "epoch": 0.03976498891550934,
5
  "eval_steps": 100,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
327
  "eval_samples_per_second": 1.991,
328
  "eval_steps_per_second": 0.996,
329
  "step": 400
330
+ },
331
+ {
332
+ "epoch": 0.03260729091071766,
333
+ "grad_norm": 0.5597347021102905,
334
+ "learning_rate": 5e-06,
335
+ "loss": 0.9983,
336
+ "step": 410
337
+ },
338
+ {
339
+ "epoch": 0.033402590689027845,
340
+ "grad_norm": 0.7700969576835632,
341
+ "learning_rate": 5e-06,
342
+ "loss": 1.0358,
343
+ "step": 420
344
+ },
345
+ {
346
+ "epoch": 0.03419789046733803,
347
+ "grad_norm": 0.719897449016571,
348
+ "learning_rate": 5e-06,
349
+ "loss": 0.9576,
350
+ "step": 430
351
+ },
352
+ {
353
+ "epoch": 0.03499319024564822,
354
+ "grad_norm": 0.6075172424316406,
355
+ "learning_rate": 5e-06,
356
+ "loss": 0.9278,
357
+ "step": 440
358
+ },
359
+ {
360
+ "epoch": 0.035788490023958404,
361
+ "grad_norm": 0.595727801322937,
362
+ "learning_rate": 5e-06,
363
+ "loss": 0.9158,
364
+ "step": 450
365
+ },
366
+ {
367
+ "epoch": 0.03658378980226859,
368
+ "grad_norm": 0.8409410715103149,
369
+ "learning_rate": 5e-06,
370
+ "loss": 1.0138,
371
+ "step": 460
372
+ },
373
+ {
374
+ "epoch": 0.03737908958057878,
375
+ "grad_norm": 0.8422474265098572,
376
+ "learning_rate": 5e-06,
377
+ "loss": 0.9747,
378
+ "step": 470
379
+ },
380
+ {
381
+ "epoch": 0.03817438935888896,
382
+ "grad_norm": 0.7934198975563049,
383
+ "learning_rate": 5e-06,
384
+ "loss": 0.9566,
385
+ "step": 480
386
+ },
387
+ {
388
+ "epoch": 0.03896968913719915,
389
+ "grad_norm": 0.8485593795776367,
390
+ "learning_rate": 5e-06,
391
+ "loss": 1.0365,
392
+ "step": 490
393
+ },
394
+ {
395
+ "epoch": 0.03976498891550934,
396
+ "grad_norm": 0.7482882142066956,
397
+ "learning_rate": 5e-06,
398
+ "loss": 0.9254,
399
+ "step": 500
400
+ },
401
+ {
402
+ "epoch": 0.03976498891550934,
403
+ "eval_loss": 1.1672043800354004,
404
+ "eval_runtime": 5317.9321,
405
+ "eval_samples_per_second": 1.991,
406
+ "eval_steps_per_second": 0.996,
407
+ "step": 500
408
  }
409
  ],
410
  "logging_steps": 10,
 
428
  "should_evaluate": false,
429
  "should_log": false,
430
  "should_save": true,
431
+ "should_training_stop": true
432
  },
433
  "attributes": {}
434
  }
435
  },
436
+ "total_flos": 4.154870385082368e+17,
437
  "train_batch_size": 2,
438
  "trial_name": null,
439
  "trial_params": null