shibajustfor's picture
Training in progress, step 200, checkpoint
dcb9d7d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.35842293906810035,
"eval_steps": 50,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0017921146953405018,
"eval_loss": 3.68882155418396,
"eval_runtime": 2.8556,
"eval_samples_per_second": 82.293,
"eval_steps_per_second": 41.322,
"step": 1
},
{
"epoch": 0.017921146953405017,
"grad_norm": 4.795637607574463,
"learning_rate": 0.0002,
"loss": 14.3965,
"step": 10
},
{
"epoch": 0.035842293906810034,
"grad_norm": 5.258082389831543,
"learning_rate": 0.0002,
"loss": 13.813,
"step": 20
},
{
"epoch": 0.053763440860215055,
"grad_norm": 4.523959636688232,
"learning_rate": 0.0002,
"loss": 13.6853,
"step": 30
},
{
"epoch": 0.07168458781362007,
"grad_norm": 4.891673564910889,
"learning_rate": 0.0002,
"loss": 13.4352,
"step": 40
},
{
"epoch": 0.08960573476702509,
"grad_norm": 5.062295436859131,
"learning_rate": 0.0002,
"loss": 13.0299,
"step": 50
},
{
"epoch": 0.08960573476702509,
"eval_loss": 3.3310461044311523,
"eval_runtime": 2.9394,
"eval_samples_per_second": 79.947,
"eval_steps_per_second": 40.144,
"step": 50
},
{
"epoch": 0.10752688172043011,
"grad_norm": 5.369076728820801,
"learning_rate": 0.0002,
"loss": 13.1517,
"step": 60
},
{
"epoch": 0.12544802867383512,
"grad_norm": 5.329068183898926,
"learning_rate": 0.0002,
"loss": 13.13,
"step": 70
},
{
"epoch": 0.14336917562724014,
"grad_norm": 5.421701431274414,
"learning_rate": 0.0002,
"loss": 13.0658,
"step": 80
},
{
"epoch": 0.16129032258064516,
"grad_norm": 5.6926751136779785,
"learning_rate": 0.0002,
"loss": 13.0606,
"step": 90
},
{
"epoch": 0.17921146953405018,
"grad_norm": 4.89196252822876,
"learning_rate": 0.0002,
"loss": 13.2831,
"step": 100
},
{
"epoch": 0.17921146953405018,
"eval_loss": 3.270508289337158,
"eval_runtime": 3.02,
"eval_samples_per_second": 77.814,
"eval_steps_per_second": 39.073,
"step": 100
},
{
"epoch": 0.1971326164874552,
"grad_norm": 5.2668070793151855,
"learning_rate": 0.0002,
"loss": 13.1221,
"step": 110
},
{
"epoch": 0.21505376344086022,
"grad_norm": 4.916939735412598,
"learning_rate": 0.0002,
"loss": 12.991,
"step": 120
},
{
"epoch": 0.23297491039426524,
"grad_norm": 5.837742805480957,
"learning_rate": 0.0002,
"loss": 13.1578,
"step": 130
},
{
"epoch": 0.25089605734767023,
"grad_norm": 5.586097240447998,
"learning_rate": 0.0002,
"loss": 12.6136,
"step": 140
},
{
"epoch": 0.26881720430107525,
"grad_norm": 5.033875465393066,
"learning_rate": 0.0002,
"loss": 13.0117,
"step": 150
},
{
"epoch": 0.26881720430107525,
"eval_loss": 3.2495903968811035,
"eval_runtime": 2.9557,
"eval_samples_per_second": 79.507,
"eval_steps_per_second": 39.923,
"step": 150
},
{
"epoch": 0.2867383512544803,
"grad_norm": 4.852773189544678,
"learning_rate": 0.0002,
"loss": 12.8406,
"step": 160
},
{
"epoch": 0.3046594982078853,
"grad_norm": 5.590361595153809,
"learning_rate": 0.0002,
"loss": 12.821,
"step": 170
},
{
"epoch": 0.3225806451612903,
"grad_norm": 6.373524188995361,
"learning_rate": 0.0002,
"loss": 12.7874,
"step": 180
},
{
"epoch": 0.34050179211469533,
"grad_norm": 4.901456356048584,
"learning_rate": 0.0002,
"loss": 12.9965,
"step": 190
},
{
"epoch": 0.35842293906810035,
"grad_norm": 5.205903053283691,
"learning_rate": 0.0002,
"loss": 12.7484,
"step": 200
},
{
"epoch": 0.35842293906810035,
"eval_loss": 3.230069875717163,
"eval_runtime": 3.0502,
"eval_samples_per_second": 77.044,
"eval_steps_per_second": 38.686,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1501393059840000.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}