| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9999921630708223, | |
| "eval_steps": 700, | |
| "global_step": 7975, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08777360678991544, | |
| "grad_norm": 15.821257591247559, | |
| "learning_rate": 9.179810725552051e-05, | |
| "loss": 4.3692, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.08777360678991544, | |
| "eval_loss": 0.2736443877220154, | |
| "eval_mae": 0.39691290259361267, | |
| "eval_mse": 0.2736443877220154, | |
| "eval_pearson_r": 0.5351819396018982, | |
| "eval_r2": 0.2831069827079773, | |
| "eval_rmse": 0.5231103301048279, | |
| "eval_runtime": 633.0966, | |
| "eval_samples_per_second": 19.348, | |
| "eval_spearman_r": 0.5498853126880646, | |
| "eval_steps_per_second": 2.42, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.17554721357983089, | |
| "grad_norm": 12.040508270263672, | |
| "learning_rate": 8.296529968454258e-05, | |
| "loss": 3.658, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.17554721357983089, | |
| "eval_loss": 0.25718629360198975, | |
| "eval_mae": 0.3839031159877777, | |
| "eval_mse": 0.25718629360198975, | |
| "eval_pearson_r": 0.5725416541099548, | |
| "eval_r2": 0.32622385025024414, | |
| "eval_rmse": 0.5071353912353516, | |
| "eval_runtime": 627.8848, | |
| "eval_samples_per_second": 19.508, | |
| "eval_spearman_r": 0.5926255817362731, | |
| "eval_steps_per_second": 2.44, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.2633208203697463, | |
| "grad_norm": 30.66448974609375, | |
| "learning_rate": 7.413249211356468e-05, | |
| "loss": 3.4843, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.2633208203697463, | |
| "eval_loss": 0.26160550117492676, | |
| "eval_mae": 0.3920997679233551, | |
| "eval_mse": 0.26160547137260437, | |
| "eval_pearson_r": 0.5790643692016602, | |
| "eval_r2": 0.31464648246765137, | |
| "eval_rmse": 0.5114738345146179, | |
| "eval_runtime": 628.627, | |
| "eval_samples_per_second": 19.485, | |
| "eval_spearman_r": 0.5891729768312147, | |
| "eval_steps_per_second": 2.437, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.35109442715966177, | |
| "grad_norm": 18.40130043029785, | |
| "learning_rate": 6.529968454258676e-05, | |
| "loss": 3.4036, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.35109442715966177, | |
| "eval_loss": 0.2504981458187103, | |
| "eval_mae": 0.378262460231781, | |
| "eval_mse": 0.2504981458187103, | |
| "eval_pearson_r": 0.5949272513389587, | |
| "eval_r2": 0.34374547004699707, | |
| "eval_rmse": 0.5004978775978088, | |
| "eval_runtime": 628.0494, | |
| "eval_samples_per_second": 19.503, | |
| "eval_spearman_r": 0.6035960603894734, | |
| "eval_steps_per_second": 2.439, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.4388680339495772, | |
| "grad_norm": 11.223097801208496, | |
| "learning_rate": 5.646687697160884e-05, | |
| "loss": 3.2658, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.4388680339495772, | |
| "eval_loss": 0.2516283392906189, | |
| "eval_mae": 0.3826421797275543, | |
| "eval_mse": 0.2516283392906189, | |
| "eval_pearson_r": 0.6112059354782104, | |
| "eval_r2": 0.34078454971313477, | |
| "eval_rmse": 0.5016257166862488, | |
| "eval_runtime": 628.4233, | |
| "eval_samples_per_second": 19.492, | |
| "eval_spearman_r": 0.6174392220462328, | |
| "eval_steps_per_second": 2.438, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.5266416407394926, | |
| "grad_norm": 34.326786041259766, | |
| "learning_rate": 4.763406940063092e-05, | |
| "loss": 3.1724, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.5266416407394926, | |
| "eval_loss": 0.24652785062789917, | |
| "eval_mae": 0.37860819697380066, | |
| "eval_mse": 0.24652785062789917, | |
| "eval_pearson_r": 0.6118167638778687, | |
| "eval_r2": 0.3541467785835266, | |
| "eval_rmse": 0.4965157210826874, | |
| "eval_runtime": 628.1591, | |
| "eval_samples_per_second": 19.5, | |
| "eval_spearman_r": 0.6173150995319716, | |
| "eval_steps_per_second": 2.439, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.6144152475294081, | |
| "grad_norm": 7.161226272583008, | |
| "learning_rate": 3.8801261829652994e-05, | |
| "loss": 3.1014, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.6144152475294081, | |
| "eval_loss": 0.2447061538696289, | |
| "eval_mae": 0.3736642599105835, | |
| "eval_mse": 0.2447061538696289, | |
| "eval_pearson_r": 0.6133831739425659, | |
| "eval_r2": 0.35891926288604736, | |
| "eval_rmse": 0.4946778416633606, | |
| "eval_runtime": 630.4288, | |
| "eval_samples_per_second": 19.43, | |
| "eval_spearman_r": 0.6261022939088873, | |
| "eval_steps_per_second": 2.43, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.7021888543193235, | |
| "grad_norm": 36.81498718261719, | |
| "learning_rate": 2.9968454258675084e-05, | |
| "loss": 3.0568, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.7021888543193235, | |
| "eval_loss": 0.23911960422992706, | |
| "eval_mae": 0.37206539511680603, | |
| "eval_mse": 0.23911964893341064, | |
| "eval_pearson_r": 0.6180469989776611, | |
| "eval_r2": 0.3735548257827759, | |
| "eval_rmse": 0.4889986217021942, | |
| "eval_runtime": 628.247, | |
| "eval_samples_per_second": 19.497, | |
| "eval_spearman_r": 0.620481976822377, | |
| "eval_steps_per_second": 2.439, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.789962461109239, | |
| "grad_norm": 8.716201782226562, | |
| "learning_rate": 2.113564668769716e-05, | |
| "loss": 3.0154, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.789962461109239, | |
| "eval_loss": 0.24396386742591858, | |
| "eval_mae": 0.37591251730918884, | |
| "eval_mse": 0.24396386742591858, | |
| "eval_pearson_r": 0.6165634989738464, | |
| "eval_r2": 0.36086392402648926, | |
| "eval_rmse": 0.4939269721508026, | |
| "eval_runtime": 628.8587, | |
| "eval_samples_per_second": 19.478, | |
| "eval_spearman_r": 0.6244525948547685, | |
| "eval_steps_per_second": 2.436, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.8777360678991544, | |
| "grad_norm": 10.887434005737305, | |
| "learning_rate": 1.2302839116719243e-05, | |
| "loss": 2.939, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.8777360678991544, | |
| "eval_loss": 0.24175813794136047, | |
| "eval_mae": 0.3743629455566406, | |
| "eval_mse": 0.24175813794136047, | |
| "eval_pearson_r": 0.6214854717254639, | |
| "eval_r2": 0.3666425347328186, | |
| "eval_rmse": 0.4916890561580658, | |
| "eval_runtime": 628.0862, | |
| "eval_samples_per_second": 19.502, | |
| "eval_spearman_r": 0.6260799567516182, | |
| "eval_steps_per_second": 2.439, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.9655096746890698, | |
| "grad_norm": 11.310564994812012, | |
| "learning_rate": 3.470031545741325e-06, | |
| "loss": 2.9347, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.9655096746890698, | |
| "eval_loss": 0.24253901839256287, | |
| "eval_mae": 0.3738330006599426, | |
| "eval_mse": 0.24253901839256287, | |
| "eval_pearson_r": 0.622931182384491, | |
| "eval_r2": 0.36459678411483765, | |
| "eval_rmse": 0.4924825131893158, | |
| "eval_runtime": 629.0849, | |
| "eval_samples_per_second": 19.471, | |
| "eval_spearman_r": 0.6275805173207804, | |
| "eval_steps_per_second": 2.435, | |
| "step": 7700 | |
| } | |
| ], | |
| "logging_steps": 700, | |
| "max_steps": 7975, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 700, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.751589692941271e+19, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |