| { | |
| "best_metric": 1.0769927501678467, | |
| "best_model_checkpoint": "miner_id_24/checkpoint-50", | |
| "epoch": 0.004265938613143357, | |
| "eval_steps": 25, | |
| "global_step": 50, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 8.531877226286714e-05, | |
| "grad_norm": 1.6899114847183228, | |
| "learning_rate": 2e-05, | |
| "loss": 4.2326, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 8.531877226286714e-05, | |
| "eval_loss": 1.5156238079071045, | |
| "eval_runtime": 235.2477, | |
| "eval_samples_per_second": 83.912, | |
| "eval_steps_per_second": 10.491, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00017063754452573428, | |
| "grad_norm": 2.0952723026275635, | |
| "learning_rate": 4e-05, | |
| "loss": 4.6728, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00025595631678860143, | |
| "grad_norm": 2.2400150299072266, | |
| "learning_rate": 6e-05, | |
| "loss": 4.6282, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.00034127508905146855, | |
| "grad_norm": 2.11881947517395, | |
| "learning_rate": 8e-05, | |
| "loss": 5.1554, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0004265938613143357, | |
| "grad_norm": 2.3352315425872803, | |
| "learning_rate": 0.0001, | |
| "loss": 5.5338, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0005119126335772029, | |
| "grad_norm": 2.7037458419799805, | |
| "learning_rate": 9.987820251299122e-05, | |
| "loss": 5.1749, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0005972314058400699, | |
| "grad_norm": 3.01145339012146, | |
| "learning_rate": 9.951340343707852e-05, | |
| "loss": 5.2913, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0006825501781029371, | |
| "grad_norm": 3.2242839336395264, | |
| "learning_rate": 9.890738003669029e-05, | |
| "loss": 5.104, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0007678689503658043, | |
| "grad_norm": 3.0797693729400635, | |
| "learning_rate": 9.806308479691595e-05, | |
| "loss": 5.4467, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0008531877226286714, | |
| "grad_norm": 3.211855888366699, | |
| "learning_rate": 9.698463103929542e-05, | |
| "loss": 5.4264, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0009385064948915385, | |
| "grad_norm": 2.159902572631836, | |
| "learning_rate": 9.567727288213005e-05, | |
| "loss": 4.8241, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0010238252671544057, | |
| "grad_norm": 2.3554375171661377, | |
| "learning_rate": 9.414737964294636e-05, | |
| "loss": 5.0948, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0011091440394172729, | |
| "grad_norm": 2.441880702972412, | |
| "learning_rate": 9.24024048078213e-05, | |
| "loss": 5.3515, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0011944628116801399, | |
| "grad_norm": 2.6415164470672607, | |
| "learning_rate": 9.045084971874738e-05, | |
| "loss": 5.6319, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.001279781583943007, | |
| "grad_norm": 2.364596366882324, | |
| "learning_rate": 8.83022221559489e-05, | |
| "loss": 4.9701, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0013651003562058742, | |
| "grad_norm": 2.503241777420044, | |
| "learning_rate": 8.596699001693255e-05, | |
| "loss": 5.4936, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0014504191284687414, | |
| "grad_norm": 2.6059958934783936, | |
| "learning_rate": 8.345653031794292e-05, | |
| "loss": 5.0535, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0015357379007316086, | |
| "grad_norm": 2.8563730716705322, | |
| "learning_rate": 8.07830737662829e-05, | |
| "loss": 5.8835, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0016210566729944755, | |
| "grad_norm": 2.718285083770752, | |
| "learning_rate": 7.795964517353735e-05, | |
| "loss": 5.4478, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0017063754452573427, | |
| "grad_norm": 2.870326042175293, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 5.0726, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0017916942175202099, | |
| "grad_norm": 2.8775277137756348, | |
| "learning_rate": 7.191855733945387e-05, | |
| "loss": 5.6933, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.001877012989783077, | |
| "grad_norm": 2.822960615158081, | |
| "learning_rate": 6.873032967079561e-05, | |
| "loss": 5.5182, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.001962331762045944, | |
| "grad_norm": 2.784461259841919, | |
| "learning_rate": 6.545084971874738e-05, | |
| "loss": 5.0475, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.0020476505343088114, | |
| "grad_norm": 3.1153011322021484, | |
| "learning_rate": 6.209609477998338e-05, | |
| "loss": 5.5212, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0021329693065716784, | |
| "grad_norm": 2.521488904953003, | |
| "learning_rate": 5.868240888334653e-05, | |
| "loss": 4.9293, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0021329693065716784, | |
| "eval_loss": 1.1216108798980713, | |
| "eval_runtime": 236.9445, | |
| "eval_samples_per_second": 83.311, | |
| "eval_steps_per_second": 10.416, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0022182880788345458, | |
| "grad_norm": 2.925053834915161, | |
| "learning_rate": 5.522642316338268e-05, | |
| "loss": 5.5309, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0023036068510974127, | |
| "grad_norm": 3.0282070636749268, | |
| "learning_rate": 5.174497483512506e-05, | |
| "loss": 6.1285, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.0023889256233602797, | |
| "grad_norm": 2.7216405868530273, | |
| "learning_rate": 4.825502516487497e-05, | |
| "loss": 5.2937, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.002474244395623147, | |
| "grad_norm": 2.9761924743652344, | |
| "learning_rate": 4.477357683661734e-05, | |
| "loss": 5.4515, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.002559563167886014, | |
| "grad_norm": 3.0058114528656006, | |
| "learning_rate": 4.131759111665349e-05, | |
| "loss": 5.3687, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0026448819401488815, | |
| "grad_norm": 2.876269817352295, | |
| "learning_rate": 3.790390522001662e-05, | |
| "loss": 4.9538, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.0027302007124117484, | |
| "grad_norm": 2.80983304977417, | |
| "learning_rate": 3.4549150281252636e-05, | |
| "loss": 4.7287, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0028155194846746154, | |
| "grad_norm": 3.582716703414917, | |
| "learning_rate": 3.12696703292044e-05, | |
| "loss": 5.5357, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.0029008382569374828, | |
| "grad_norm": 3.126375675201416, | |
| "learning_rate": 2.8081442660546125e-05, | |
| "loss": 5.0843, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0029861570292003497, | |
| "grad_norm": 2.9568567276000977, | |
| "learning_rate": 2.500000000000001e-05, | |
| "loss": 4.7067, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.003071475801463217, | |
| "grad_norm": 2.714190721511841, | |
| "learning_rate": 2.2040354826462668e-05, | |
| "loss": 3.7083, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.003156794573726084, | |
| "grad_norm": 3.0618250370025635, | |
| "learning_rate": 1.9216926233717085e-05, | |
| "loss": 3.3322, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.003242113345988951, | |
| "grad_norm": 2.9036190509796143, | |
| "learning_rate": 1.6543469682057106e-05, | |
| "loss": 3.4382, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.0033274321182518185, | |
| "grad_norm": 2.511988401412964, | |
| "learning_rate": 1.4033009983067452e-05, | |
| "loss": 3.1006, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.0034127508905146854, | |
| "grad_norm": 2.61822772026062, | |
| "learning_rate": 1.1697777844051105e-05, | |
| "loss": 2.9746, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.003498069662777553, | |
| "grad_norm": 2.7285141944885254, | |
| "learning_rate": 9.549150281252633e-06, | |
| "loss": 3.3553, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.0035833884350404198, | |
| "grad_norm": 2.4301650524139404, | |
| "learning_rate": 7.597595192178702e-06, | |
| "loss": 3.1297, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0036687072073032867, | |
| "grad_norm": 2.53094220161438, | |
| "learning_rate": 5.852620357053651e-06, | |
| "loss": 3.2159, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.003754025979566154, | |
| "grad_norm": 2.4559786319732666, | |
| "learning_rate": 4.322727117869951e-06, | |
| "loss": 2.9607, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.003839344751829021, | |
| "grad_norm": 2.5060479640960693, | |
| "learning_rate": 3.0153689607045845e-06, | |
| "loss": 3.117, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.003924663524091888, | |
| "grad_norm": 2.513113260269165, | |
| "learning_rate": 1.9369152030840556e-06, | |
| "loss": 3.2779, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.0040099822963547554, | |
| "grad_norm": 2.574754476547241, | |
| "learning_rate": 1.0926199633097157e-06, | |
| "loss": 3.263, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.004095301068617623, | |
| "grad_norm": 2.6792495250701904, | |
| "learning_rate": 4.865965629214819e-07, | |
| "loss": 3.1335, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.004180619840880489, | |
| "grad_norm": 2.810391902923584, | |
| "learning_rate": 1.2179748700879012e-07, | |
| "loss": 3.3076, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.004265938613143357, | |
| "grad_norm": 3.56740403175354, | |
| "learning_rate": 0.0, | |
| "loss": 3.6467, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.004265938613143357, | |
| "eval_loss": 1.0769927501678467, | |
| "eval_runtime": 237.1606, | |
| "eval_samples_per_second": 83.235, | |
| "eval_steps_per_second": 10.406, | |
| "step": 50 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 50, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 25, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8128561856643072.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |