| { | |
| "best_metric": 0.5050071530758227, | |
| "best_model_checkpoint": "/RuleBert-v0.0-k0/checkpoint-400", | |
| "epoch": 0.8849557522123894, | |
| "eval_steps": 50, | |
| "global_step": 400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.999951808959328e-05, | |
| "loss": 0.6589, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9998072404820648e-05, | |
| "loss": 0.554, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_accuracy": 0.0, | |
| "eval_f1": 0.4280639431616341, | |
| "eval_loss": 0.49144598841667175, | |
| "eval_roc_auc": 0.6450392140327205, | |
| "eval_runtime": 2.6644, | |
| "eval_samples_per_second": 90.076, | |
| "eval_steps_per_second": 3.003, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.9995663085020215e-05, | |
| "loss": 0.471, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.9992290362407232e-05, | |
| "loss": 0.4187, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_accuracy": 0.0, | |
| "eval_f1": 0.4875621890547264, | |
| "eval_loss": 0.40552660822868347, | |
| "eval_roc_auc": 0.6698800387923765, | |
| "eval_runtime": 2.7272, | |
| "eval_samples_per_second": 88.003, | |
| "eval_steps_per_second": 2.933, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.9987954562051724e-05, | |
| "loss": 0.3947, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.998265610184716e-05, | |
| "loss": 0.3621, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.008333333333333333, | |
| "eval_f1": 0.49451303155006865, | |
| "eval_loss": 0.3777387738227844, | |
| "eval_roc_auc": 0.6711924439197166, | |
| "eval_runtime": 2.8618, | |
| "eval_samples_per_second": 83.864, | |
| "eval_steps_per_second": 2.795, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.997639549247016e-05, | |
| "loss": 0.3516, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.9969173337331283e-05, | |
| "loss": 0.3541, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.025, | |
| "eval_f1": 0.4965277777777778, | |
| "eval_loss": 0.3699280917644501, | |
| "eval_roc_auc": 0.6713821892393321, | |
| "eval_runtime": 2.8771, | |
| "eval_samples_per_second": 83.417, | |
| "eval_steps_per_second": 2.781, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.9960990332516875e-05, | |
| "loss": 0.3512, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 1.995184726672197e-05, | |
| "loss": 0.3322, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_accuracy": 0.0, | |
| "eval_f1": 0.4925675675675676, | |
| "eval_loss": 0.36883607506752014, | |
| "eval_roc_auc": 0.6711713611064261, | |
| "eval_runtime": 2.8206, | |
| "eval_samples_per_second": 85.088, | |
| "eval_steps_per_second": 2.836, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9941745021174284e-05, | |
| "loss": 0.3399, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.9930684569549265e-05, | |
| "loss": 0.3276, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_accuracy": 0.008333333333333333, | |
| "eval_f1": 0.49861495844875353, | |
| "eval_loss": 0.36757946014404297, | |
| "eval_roc_auc": 0.6724995783437342, | |
| "eval_runtime": 2.7494, | |
| "eval_samples_per_second": 87.293, | |
| "eval_steps_per_second": 2.91, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.991866697787626e-05, | |
| "loss": 0.3467, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.990569340443577e-05, | |
| "loss": 0.3078, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_accuracy": 0.016666666666666666, | |
| "eval_f1": 0.5012285012285013, | |
| "eval_loss": 0.367951363325119, | |
| "eval_roc_auc": 0.6728632568729971, | |
| "eval_runtime": 2.7213, | |
| "eval_samples_per_second": 88.192, | |
| "eval_steps_per_second": 2.94, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.989176509964781e-05, | |
| "loss": 0.3211, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.9876883405951378e-05, | |
| "loss": 0.3293, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_accuracy": 0.0375, | |
| "eval_f1": 0.5050071530758227, | |
| "eval_loss": 0.36856791377067566, | |
| "eval_roc_auc": 0.6734061393152301, | |
| "eval_runtime": 2.704, | |
| "eval_samples_per_second": 88.757, | |
| "eval_steps_per_second": 2.959, | |
| "step": 400 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 8000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 18, | |
| "save_steps": 100, | |
| "total_flos": 842272879411200.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |