aaabiao's picture
Add files using upload-large-folder tool
42d03f9 verified
raw
history blame
5.88 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.973821989528796,
"eval_steps": 500,
"global_step": 285,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10471204188481675,
"grad_norm": 1.821414025646637,
"learning_rate": 1.5517241379310346e-05,
"loss": 0.4862,
"step": 10
},
{
"epoch": 0.2094240837696335,
"grad_norm": 0.3942393541183772,
"learning_rate": 3.275862068965517e-05,
"loss": 0.3896,
"step": 20
},
{
"epoch": 0.31413612565445026,
"grad_norm": 0.8255719356243104,
"learning_rate": 5e-05,
"loss": 0.3473,
"step": 30
},
{
"epoch": 0.418848167539267,
"grad_norm": 0.37799831218550856,
"learning_rate": 4.981198836496775e-05,
"loss": 0.3207,
"step": 40
},
{
"epoch": 0.5235602094240838,
"grad_norm": 0.2838496737375545,
"learning_rate": 4.9250781329863606e-05,
"loss": 0.3069,
"step": 50
},
{
"epoch": 0.6282722513089005,
"grad_norm": 0.35016464634119265,
"learning_rate": 4.8324819970868473e-05,
"loss": 0.2959,
"step": 60
},
{
"epoch": 0.7329842931937173,
"grad_norm": 0.265970946973345,
"learning_rate": 4.7048031608708876e-05,
"loss": 0.2888,
"step": 70
},
{
"epoch": 0.837696335078534,
"grad_norm": 0.3283794303490618,
"learning_rate": 4.5439620328789593e-05,
"loss": 0.2812,
"step": 80
},
{
"epoch": 0.9424083769633508,
"grad_norm": 0.2933681591769702,
"learning_rate": 4.352377813387398e-05,
"loss": 0.2821,
"step": 90
},
{
"epoch": 1.0418848167539267,
"grad_norm": 0.784193010247334,
"learning_rate": 4.1329321073844415e-05,
"loss": 0.2646,
"step": 100
},
{
"epoch": 1.1465968586387434,
"grad_norm": 0.43774945809310173,
"learning_rate": 3.888925582549006e-05,
"loss": 0.2466,
"step": 110
},
{
"epoch": 1.2513089005235603,
"grad_norm": 0.24107111209505053,
"learning_rate": 3.624028324136517e-05,
"loss": 0.2429,
"step": 120
},
{
"epoch": 1.356020942408377,
"grad_norm": 0.20357460785884127,
"learning_rate": 3.34222463348055e-05,
"loss": 0.2408,
"step": 130
},
{
"epoch": 1.4607329842931938,
"grad_norm": 0.22761632175708865,
"learning_rate": 3.0477531003921745e-05,
"loss": 0.2407,
"step": 140
},
{
"epoch": 1.5654450261780104,
"grad_norm": 0.201935897320554,
"learning_rate": 2.7450428508239024e-05,
"loss": 0.237,
"step": 150
},
{
"epoch": 1.6701570680628273,
"grad_norm": 0.17116285919322816,
"learning_rate": 2.4386469286927196e-05,
"loss": 0.2366,
"step": 160
},
{
"epoch": 1.7748691099476441,
"grad_norm": 0.16436756820682696,
"learning_rate": 2.1331738138615958e-05,
"loss": 0.2357,
"step": 170
},
{
"epoch": 1.8795811518324608,
"grad_norm": 0.16052539184685044,
"learning_rate": 1.8332181063127545e-05,
"loss": 0.2338,
"step": 180
},
{
"epoch": 1.9842931937172774,
"grad_norm": 0.1441672107096996,
"learning_rate": 1.5432914190872757e-05,
"loss": 0.235,
"step": 190
},
{
"epoch": 2.0837696335078535,
"grad_norm": 0.18363079335917978,
"learning_rate": 1.2677545194255402e-05,
"loss": 0.207,
"step": 200
},
{
"epoch": 2.18848167539267,
"grad_norm": 0.15661072667443002,
"learning_rate": 1.0107517387689166e-05,
"loss": 0.2003,
"step": 210
},
{
"epoch": 2.2931937172774868,
"grad_norm": 0.13738033977955236,
"learning_rate": 7.761486381573327e-06,
"loss": 0.2002,
"step": 220
},
{
"epoch": 2.3979057591623034,
"grad_norm": 0.11864758554083908,
"learning_rate": 5.674738665931575e-06,
"loss": 0.2016,
"step": 230
},
{
"epoch": 2.5026178010471205,
"grad_norm": 0.11174553605583903,
"learning_rate": 3.878660868757323e-06,
"loss": 0.2018,
"step": 240
},
{
"epoch": 2.607329842931937,
"grad_norm": 0.10468557565480724,
"learning_rate": 2.4002676719139166e-06,
"loss": 0.1992,
"step": 250
},
{
"epoch": 2.712041884816754,
"grad_norm": 0.11078267776032583,
"learning_rate": 1.2617954851740832e-06,
"loss": 0.1994,
"step": 260
},
{
"epoch": 2.816753926701571,
"grad_norm": 0.09962367034420198,
"learning_rate": 4.803679899192392e-07,
"loss": 0.2002,
"step": 270
},
{
"epoch": 2.9214659685863875,
"grad_norm": 0.10060873138847887,
"learning_rate": 6.773858303274483e-08,
"loss": 0.1993,
"step": 280
},
{
"epoch": 2.973821989528796,
"step": 285,
"total_flos": 2039941919932416.0,
"train_loss": 0.25715315007326894,
"train_runtime": 42232.1226,
"train_samples_per_second": 3.469,
"train_steps_per_second": 0.007
}
],
"logging_steps": 10,
"max_steps": 285,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2039941919932416.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}