{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999921630708223, "eval_steps": 700, "global_step": 7975, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08777360678991544, "grad_norm": 15.821257591247559, "learning_rate": 9.179810725552051e-05, "loss": 4.3692, "step": 700 }, { "epoch": 0.08777360678991544, "eval_loss": 0.2736443877220154, "eval_mae": 0.39691290259361267, "eval_mse": 0.2736443877220154, "eval_pearson_r": 0.5351819396018982, "eval_r2": 0.2831069827079773, "eval_rmse": 0.5231103301048279, "eval_runtime": 633.0966, "eval_samples_per_second": 19.348, "eval_spearman_r": 0.5498853126880646, "eval_steps_per_second": 2.42, "step": 700 }, { "epoch": 0.17554721357983089, "grad_norm": 12.040508270263672, "learning_rate": 8.296529968454258e-05, "loss": 3.658, "step": 1400 }, { "epoch": 0.17554721357983089, "eval_loss": 0.25718629360198975, "eval_mae": 0.3839031159877777, "eval_mse": 0.25718629360198975, "eval_pearson_r": 0.5725416541099548, "eval_r2": 0.32622385025024414, "eval_rmse": 0.5071353912353516, "eval_runtime": 627.8848, "eval_samples_per_second": 19.508, "eval_spearman_r": 0.5926255817362731, "eval_steps_per_second": 2.44, "step": 1400 }, { "epoch": 0.2633208203697463, "grad_norm": 30.66448974609375, "learning_rate": 7.413249211356468e-05, "loss": 3.4843, "step": 2100 }, { "epoch": 0.2633208203697463, "eval_loss": 0.26160550117492676, "eval_mae": 0.3920997679233551, "eval_mse": 0.26160547137260437, "eval_pearson_r": 0.5790643692016602, "eval_r2": 0.31464648246765137, "eval_rmse": 0.5114738345146179, "eval_runtime": 628.627, "eval_samples_per_second": 19.485, "eval_spearman_r": 0.5891729768312147, "eval_steps_per_second": 2.437, "step": 2100 }, { "epoch": 0.35109442715966177, "grad_norm": 18.40130043029785, "learning_rate": 6.529968454258676e-05, "loss": 3.4036, "step": 2800 }, { "epoch": 0.35109442715966177, "eval_loss": 0.2504981458187103, "eval_mae": 0.378262460231781, "eval_mse": 0.2504981458187103, "eval_pearson_r": 0.5949272513389587, "eval_r2": 0.34374547004699707, "eval_rmse": 0.5004978775978088, "eval_runtime": 628.0494, "eval_samples_per_second": 19.503, "eval_spearman_r": 0.6035960603894734, "eval_steps_per_second": 2.439, "step": 2800 }, { "epoch": 0.4388680339495772, "grad_norm": 11.223097801208496, "learning_rate": 5.646687697160884e-05, "loss": 3.2658, "step": 3500 }, { "epoch": 0.4388680339495772, "eval_loss": 0.2516283392906189, "eval_mae": 0.3826421797275543, "eval_mse": 0.2516283392906189, "eval_pearson_r": 0.6112059354782104, "eval_r2": 0.34078454971313477, "eval_rmse": 0.5016257166862488, "eval_runtime": 628.4233, "eval_samples_per_second": 19.492, "eval_spearman_r": 0.6174392220462328, "eval_steps_per_second": 2.438, "step": 3500 }, { "epoch": 0.5266416407394926, "grad_norm": 34.326786041259766, "learning_rate": 4.763406940063092e-05, "loss": 3.1724, "step": 4200 }, { "epoch": 0.5266416407394926, "eval_loss": 0.24652785062789917, "eval_mae": 0.37860819697380066, "eval_mse": 0.24652785062789917, "eval_pearson_r": 0.6118167638778687, "eval_r2": 0.3541467785835266, "eval_rmse": 0.4965157210826874, "eval_runtime": 628.1591, "eval_samples_per_second": 19.5, "eval_spearman_r": 0.6173150995319716, "eval_steps_per_second": 2.439, "step": 4200 }, { "epoch": 0.6144152475294081, "grad_norm": 7.161226272583008, "learning_rate": 3.8801261829652994e-05, "loss": 3.1014, "step": 4900 }, { "epoch": 0.6144152475294081, "eval_loss": 0.2447061538696289, "eval_mae": 0.3736642599105835, "eval_mse": 0.2447061538696289, "eval_pearson_r": 0.6133831739425659, "eval_r2": 0.35891926288604736, "eval_rmse": 0.4946778416633606, "eval_runtime": 630.4288, "eval_samples_per_second": 19.43, "eval_spearman_r": 0.6261022939088873, "eval_steps_per_second": 2.43, "step": 4900 }, { "epoch": 0.7021888543193235, "grad_norm": 36.81498718261719, "learning_rate": 2.9968454258675084e-05, "loss": 3.0568, "step": 5600 }, { "epoch": 0.7021888543193235, "eval_loss": 0.23911960422992706, "eval_mae": 0.37206539511680603, "eval_mse": 0.23911964893341064, "eval_pearson_r": 0.6180469989776611, "eval_r2": 0.3735548257827759, "eval_rmse": 0.4889986217021942, "eval_runtime": 628.247, "eval_samples_per_second": 19.497, "eval_spearman_r": 0.620481976822377, "eval_steps_per_second": 2.439, "step": 5600 }, { "epoch": 0.789962461109239, "grad_norm": 8.716201782226562, "learning_rate": 2.113564668769716e-05, "loss": 3.0154, "step": 6300 }, { "epoch": 0.789962461109239, "eval_loss": 0.24396386742591858, "eval_mae": 0.37591251730918884, "eval_mse": 0.24396386742591858, "eval_pearson_r": 0.6165634989738464, "eval_r2": 0.36086392402648926, "eval_rmse": 0.4939269721508026, "eval_runtime": 628.8587, "eval_samples_per_second": 19.478, "eval_spearman_r": 0.6244525948547685, "eval_steps_per_second": 2.436, "step": 6300 }, { "epoch": 0.8777360678991544, "grad_norm": 10.887434005737305, "learning_rate": 1.2302839116719243e-05, "loss": 2.939, "step": 7000 }, { "epoch": 0.8777360678991544, "eval_loss": 0.24175813794136047, "eval_mae": 0.3743629455566406, "eval_mse": 0.24175813794136047, "eval_pearson_r": 0.6214854717254639, "eval_r2": 0.3666425347328186, "eval_rmse": 0.4916890561580658, "eval_runtime": 628.0862, "eval_samples_per_second": 19.502, "eval_spearman_r": 0.6260799567516182, "eval_steps_per_second": 2.439, "step": 7000 }, { "epoch": 0.9655096746890698, "grad_norm": 11.310564994812012, "learning_rate": 3.470031545741325e-06, "loss": 2.9347, "step": 7700 }, { "epoch": 0.9655096746890698, "eval_loss": 0.24253901839256287, "eval_mae": 0.3738330006599426, "eval_mse": 0.24253901839256287, "eval_pearson_r": 0.622931182384491, "eval_r2": 0.36459678411483765, "eval_rmse": 0.4924825131893158, "eval_runtime": 629.0849, "eval_samples_per_second": 19.471, "eval_spearman_r": 0.6275805173207804, "eval_steps_per_second": 2.435, "step": 7700 } ], "logging_steps": 700, "max_steps": 7975, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 700, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.751589692941271e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }