CheapsetZero's picture
Training in progress, step 293, checkpoint
511421f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0025662959794697,
"eval_steps": 147,
"global_step": 293,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003421727972626176,
"eval_loss": 4.355165481567383,
"eval_runtime": 52.6403,
"eval_samples_per_second": 18.712,
"eval_steps_per_second": 1.178,
"step": 1
},
{
"epoch": 0.03421727972626176,
"grad_norm": 4.47415018081665,
"learning_rate": 2e-05,
"loss": 4.2505,
"step": 10
},
{
"epoch": 0.06843455945252352,
"grad_norm": 1.6644771099090576,
"learning_rate": 4e-05,
"loss": 3.3993,
"step": 20
},
{
"epoch": 0.10265183917878529,
"grad_norm": 1.3752275705337524,
"learning_rate": 6e-05,
"loss": 2.8647,
"step": 30
},
{
"epoch": 0.13686911890504705,
"grad_norm": 1.3559530973434448,
"learning_rate": 8e-05,
"loss": 2.6811,
"step": 40
},
{
"epoch": 0.1710863986313088,
"grad_norm": 1.0810866355895996,
"learning_rate": 0.0001,
"loss": 2.5526,
"step": 50
},
{
"epoch": 0.20530367835757057,
"grad_norm": 1.128179669380188,
"learning_rate": 0.00012,
"loss": 2.5003,
"step": 60
},
{
"epoch": 0.23952095808383234,
"grad_norm": 1.2589479684829712,
"learning_rate": 0.00014,
"loss": 2.421,
"step": 70
},
{
"epoch": 0.2737382378100941,
"grad_norm": 1.1778737306594849,
"learning_rate": 0.00016,
"loss": 2.3997,
"step": 80
},
{
"epoch": 0.30795551753635586,
"grad_norm": 1.136731743812561,
"learning_rate": 0.00018,
"loss": 2.3794,
"step": 90
},
{
"epoch": 0.3421727972626176,
"grad_norm": 1.1050846576690674,
"learning_rate": 0.0002,
"loss": 2.339,
"step": 100
},
{
"epoch": 0.3763900769888794,
"grad_norm": 1.1404505968093872,
"learning_rate": 0.0002,
"loss": 2.3492,
"step": 110
},
{
"epoch": 0.41060735671514115,
"grad_norm": 1.0500866174697876,
"learning_rate": 0.0002,
"loss": 2.2997,
"step": 120
},
{
"epoch": 0.4448246364414029,
"grad_norm": 1.2117984294891357,
"learning_rate": 0.0002,
"loss": 2.325,
"step": 130
},
{
"epoch": 0.47904191616766467,
"grad_norm": 1.127273440361023,
"learning_rate": 0.0002,
"loss": 2.2696,
"step": 140
},
{
"epoch": 0.5029940119760479,
"eval_loss": 2.2770884037017822,
"eval_runtime": 53.1905,
"eval_samples_per_second": 18.518,
"eval_steps_per_second": 1.166,
"step": 147
},
{
"epoch": 0.5132591958939264,
"grad_norm": 1.189348578453064,
"learning_rate": 0.0002,
"loss": 2.3341,
"step": 150
},
{
"epoch": 0.5474764756201882,
"grad_norm": 1.0971434116363525,
"learning_rate": 0.0002,
"loss": 2.2627,
"step": 160
},
{
"epoch": 0.58169375534645,
"grad_norm": 1.0656123161315918,
"learning_rate": 0.0002,
"loss": 2.2739,
"step": 170
},
{
"epoch": 0.6159110350727117,
"grad_norm": 1.1617597341537476,
"learning_rate": 0.0002,
"loss": 2.2446,
"step": 180
},
{
"epoch": 0.6501283147989735,
"grad_norm": 1.1609177589416504,
"learning_rate": 0.0002,
"loss": 2.2269,
"step": 190
},
{
"epoch": 0.6843455945252352,
"grad_norm": 1.0725802183151245,
"learning_rate": 0.0002,
"loss": 2.2471,
"step": 200
},
{
"epoch": 0.718562874251497,
"grad_norm": 1.1435920000076294,
"learning_rate": 0.0002,
"loss": 2.2892,
"step": 210
},
{
"epoch": 0.7527801539777588,
"grad_norm": 1.1242313385009766,
"learning_rate": 0.0002,
"loss": 2.2795,
"step": 220
},
{
"epoch": 0.7869974337040205,
"grad_norm": 1.0565266609191895,
"learning_rate": 0.0002,
"loss": 2.2515,
"step": 230
},
{
"epoch": 0.8212147134302823,
"grad_norm": 1.1212342977523804,
"learning_rate": 0.0002,
"loss": 2.2805,
"step": 240
},
{
"epoch": 0.8554319931565441,
"grad_norm": 1.124230146408081,
"learning_rate": 0.0002,
"loss": 2.2378,
"step": 250
},
{
"epoch": 0.8896492728828058,
"grad_norm": 1.0999276638031006,
"learning_rate": 0.0002,
"loss": 2.222,
"step": 260
},
{
"epoch": 0.9238665526090676,
"grad_norm": 1.1538196802139282,
"learning_rate": 0.0002,
"loss": 2.2267,
"step": 270
},
{
"epoch": 0.9580838323353293,
"grad_norm": 1.2899906635284424,
"learning_rate": 0.0002,
"loss": 2.2384,
"step": 280
},
{
"epoch": 0.9923011120615911,
"grad_norm": 1.3248995542526245,
"learning_rate": 0.0002,
"loss": 2.236,
"step": 290
}
],
"logging_steps": 10,
"max_steps": 293,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 147,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.826112117351383e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}