MLM_pretrained_RomanUrdu_Urdu / trainer_state.json
Muhammad Umer Tariq Butt
Upload MLM pretrained RomanUrdu-Urdu model
71564f6
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.999846360668331,
"eval_steps": 500,
"global_step": 52066,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.19204916458613405,
"grad_norm": 3.8910398483276367,
"learning_rate": 4.8080276550797006e-05,
"loss": 3.5063,
"step": 5000
},
{
"epoch": 0.3840983291722681,
"grad_norm": 4.762826919555664,
"learning_rate": 4.616055310159401e-05,
"loss": 2.4176,
"step": 10000
},
{
"epoch": 0.5761474937584021,
"grad_norm": 10.465607643127441,
"learning_rate": 4.424044555406184e-05,
"loss": 2.13,
"step": 15000
},
{
"epoch": 0.7681966583445362,
"grad_norm": 3.7013731002807617,
"learning_rate": 4.232072210485885e-05,
"loss": 1.9656,
"step": 20000
},
{
"epoch": 0.9602458229306703,
"grad_norm": 2.268207311630249,
"learning_rate": 4.040061455732668e-05,
"loss": 1.8511,
"step": 25000
},
{
"epoch": 1.0,
"eval_loss": 1.610188364982605,
"eval_runtime": 6.8213,
"eval_samples_per_second": 977.969,
"eval_steps_per_second": 7.77,
"step": 26035
},
{
"epoch": 1.1522949875168043,
"grad_norm": 6.3099284172058105,
"learning_rate": 3.8480891108123676e-05,
"loss": 1.7593,
"step": 30000
},
{
"epoch": 1.3443441521029382,
"grad_norm": 3.523637294769287,
"learning_rate": 3.656116765892069e-05,
"loss": 1.6915,
"step": 35000
},
{
"epoch": 1.5363933166890724,
"grad_norm": 3.6293234825134277,
"learning_rate": 3.464144420971769e-05,
"loss": 1.632,
"step": 40000
},
{
"epoch": 1.7284424812752066,
"grad_norm": 4.846088886260986,
"learning_rate": 3.272210485884387e-05,
"loss": 1.584,
"step": 45000
},
{
"epoch": 1.9204916458613406,
"grad_norm": 2.4185822010040283,
"learning_rate": 3.08019973113117e-05,
"loss": 1.5442,
"step": 50000
}
],
"logging_steps": 5000,
"max_steps": 130175,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 26033,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 7.221258085338186e+18,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}