{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9970845481049563, "eval_steps": 20, "global_step": 171, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11661807580174927, "grad_norm": 19.55031967163086, "learning_rate": 9.37888198757764e-05, "loss": 2.0774, "step": 20 }, { "epoch": 0.11661807580174927, "eval_loss": 0.9485672116279602, "eval_mae": 0.717108964920044, "eval_mse": 0.9485672116279602, "eval_pearson_r": 0.21942388970474297, "eval_r2": -1.1478704282307026, "eval_rmse": 0.9739441275596619, "eval_runtime": 27.489, "eval_samples_per_second": 199.789, "eval_spearman_r": 0.2441831605140419, "eval_steps_per_second": 3.129, "step": 20 }, { "epoch": 0.23323615160349853, "grad_norm": 16.93649673461914, "learning_rate": 8.136645962732919e-05, "loss": 1.8236, "step": 40 }, { "epoch": 0.23323615160349853, "eval_loss": 0.6921948790550232, "eval_mae": 0.6231020092964172, "eval_mse": 0.6921948790550232, "eval_pearson_r": 0.23120592870479417, "eval_r2": -0.5673585547339739, "eval_rmse": 0.8319824934005737, "eval_runtime": 26.5119, "eval_samples_per_second": 207.153, "eval_spearman_r": 0.26638960986513716, "eval_steps_per_second": 3.244, "step": 40 }, { "epoch": 0.3498542274052478, "grad_norm": 3.22672176361084, "learning_rate": 6.894409937888199e-05, "loss": 1.0444, "step": 60 }, { "epoch": 0.3498542274052478, "eval_loss": 0.4125833511352539, "eval_mae": 0.5312852263450623, "eval_mse": 0.4125833511352539, "eval_pearson_r": 0.26151310278769524, "eval_r2": 0.06577464645927622, "eval_rmse": 0.6423265337944031, "eval_runtime": 26.527, "eval_samples_per_second": 207.034, "eval_spearman_r": 0.30201007562837395, "eval_steps_per_second": 3.242, "step": 60 }, { "epoch": 0.46647230320699706, "grad_norm": 1.3443107604980469, "learning_rate": 5.652173913043478e-05, "loss": 0.8421, "step": 80 }, { "epoch": 0.46647230320699706, "eval_loss": 0.3800634741783142, "eval_mae": 0.5007564425468445, "eval_mse": 0.38006341457366943, "eval_pearson_r": 0.37522868136490395, "eval_r2": 0.13941050286913825, "eval_rmse": 0.6164928078651428, "eval_runtime": 26.5625, "eval_samples_per_second": 206.758, "eval_spearman_r": 0.41593231510610396, "eval_steps_per_second": 3.238, "step": 80 }, { "epoch": 0.5830903790087464, "grad_norm": 1.7762072086334229, "learning_rate": 4.409937888198758e-05, "loss": 0.8386, "step": 100 }, { "epoch": 0.5830903790087464, "eval_loss": 0.3888755738735199, "eval_mae": 0.5195034742355347, "eval_mse": 0.3888755738735199, "eval_pearson_r": 0.39786698142523913, "eval_r2": 0.11945691237345535, "eval_rmse": 0.6235988736152649, "eval_runtime": 26.5775, "eval_samples_per_second": 206.641, "eval_spearman_r": 0.43613497450085337, "eval_steps_per_second": 3.236, "step": 100 }, { "epoch": 0.6997084548104956, "grad_norm": 0.7590553164482117, "learning_rate": 3.167701863354037e-05, "loss": 0.8053, "step": 120 }, { "epoch": 0.6997084548104956, "eval_loss": 0.3734019696712494, "eval_mae": 0.5031423568725586, "eval_mse": 0.37340202927589417, "eval_pearson_r": 0.41265944072737076, "eval_r2": 0.15449414536591266, "eval_rmse": 0.6110662817955017, "eval_runtime": 26.5797, "eval_samples_per_second": 206.624, "eval_spearman_r": 0.4522267158298543, "eval_steps_per_second": 3.236, "step": 120 }, { "epoch": 0.8163265306122449, "grad_norm": 3.719794988632202, "learning_rate": 1.9254658385093167e-05, "loss": 0.7713, "step": 140 }, { "epoch": 0.8163265306122449, "eval_loss": 0.3640034794807434, "eval_mae": 0.49189862608909607, "eval_mse": 0.3640034794807434, "eval_pearson_r": 0.42289288702451056, "eval_r2": 0.1757755694952038, "eval_rmse": 0.6033270359039307, "eval_runtime": 26.5717, "eval_samples_per_second": 206.686, "eval_spearman_r": 0.4647961386487348, "eval_steps_per_second": 3.237, "step": 140 }, { "epoch": 0.9329446064139941, "grad_norm": 1.1577078104019165, "learning_rate": 6.832298136645963e-06, "loss": 0.8134, "step": 160 }, { "epoch": 0.9329446064139941, "eval_loss": 0.36333543062210083, "eval_mae": 0.49393317103385925, "eval_mse": 0.36333543062210083, "eval_pearson_r": 0.42798981712264383, "eval_r2": 0.17728821172156528, "eval_rmse": 0.602773129940033, "eval_runtime": 26.574, "eval_samples_per_second": 206.668, "eval_spearman_r": 0.4676909073177228, "eval_steps_per_second": 3.236, "step": 160 } ], "logging_steps": 20, "max_steps": 171, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1713642315776e+16, "train_batch_size": 64, "trial_name": null, "trial_params": null }