File size: 2,392 Bytes
abe04a2 a335c8f abe04a2 a335c8f abe04a2 a335c8f abe04a2 a335c8f abe04a2 a335c8f abe04a2 a335c8f abe04a2 a335c8f abe04a2 a335c8f abe04a2 a335c8f abe04a2 a335c8f abe04a2 a335c8f abe04a2 a335c8f abe04a2 a335c8f abe04a2 a335c8f abe04a2 a335c8f abe04a2 a335c8f abe04a2 a335c8f abe04a2 a335c8f abe04a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 500,
"global_step": 2060,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.4854368932038835,
"grad_norm": 0.6541855931282043,
"learning_rate": 8.786407766990292e-05,
"loss": 0.8179,
"step": 250
},
{
"epoch": 0.970873786407767,
"grad_norm": 0.48072507977485657,
"learning_rate": 7.572815533980583e-05,
"loss": 0.4138,
"step": 500
},
{
"epoch": 1.4563106796116505,
"grad_norm": 0.4883740544319153,
"learning_rate": 6.359223300970875e-05,
"loss": 0.3734,
"step": 750
},
{
"epoch": 1.941747572815534,
"grad_norm": 0.45032238960266113,
"learning_rate": 5.145631067961165e-05,
"loss": 0.3606,
"step": 1000
},
{
"epoch": 2.4271844660194173,
"grad_norm": 0.42356109619140625,
"learning_rate": 3.9320388349514564e-05,
"loss": 0.3521,
"step": 1250
},
{
"epoch": 2.912621359223301,
"grad_norm": 0.4464420676231384,
"learning_rate": 2.7184466019417475e-05,
"loss": 0.3473,
"step": 1500
},
{
"epoch": 3.3980582524271843,
"grad_norm": 0.45505988597869873,
"learning_rate": 1.5048543689320387e-05,
"loss": 0.3394,
"step": 1750
},
{
"epoch": 3.883495145631068,
"grad_norm": 0.40663233399391174,
"learning_rate": 2.912621359223301e-06,
"loss": 0.3358,
"step": 2000
},
{
"epoch": 4.0,
"step": 2060,
"total_flos": 5.02338223728341e+17,
"train_loss": 0.41515450986843666,
"train_runtime": 8856.6028,
"train_samples_per_second": 59.543,
"train_steps_per_second": 0.233
}
],
"logging_steps": 250,
"max_steps": 2060,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5.02338223728341e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}
|