| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 410, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04878048780487805, | |
| "grad_norm": 0.4036892056465149, | |
| "learning_rate": 0.00019999478113897612, | |
| "loss": 1.0282, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0975609756097561, | |
| "grad_norm": 0.3629762828350067, | |
| "learning_rate": 0.0001999791251006346, | |
| "loss": 0.7875, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.14634146341463414, | |
| "grad_norm": 0.4877622425556183, | |
| "learning_rate": 0.0001999530335191093, | |
| "loss": 0.5942, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1951219512195122, | |
| "grad_norm": 0.4466260075569153, | |
| "learning_rate": 0.00019991650911776695, | |
| "loss": 0.3866, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.24390243902439024, | |
| "grad_norm": 0.649118959903717, | |
| "learning_rate": 0.000199869555708923, | |
| "loss": 0.3928, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2926829268292683, | |
| "grad_norm": 0.8762800097465515, | |
| "learning_rate": 0.0001998121781934438, | |
| "loss": 0.3258, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.34146341463414637, | |
| "grad_norm": 0.9195622801780701, | |
| "learning_rate": 0.0001997443825602349, | |
| "loss": 0.2885, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3902439024390244, | |
| "grad_norm": 0.5856262445449829, | |
| "learning_rate": 0.00019966617588561609, | |
| "loss": 0.2888, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.43902439024390244, | |
| "grad_norm": 0.5520443320274353, | |
| "learning_rate": 0.00019957756633258265, | |
| "loss": 0.2242, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 0.9435800909996033, | |
| "learning_rate": 0.00019947856314995349, | |
| "loss": 0.1629, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5365853658536586, | |
| "grad_norm": 0.9416623115539551, | |
| "learning_rate": 0.00019936917667140555, | |
| "loss": 0.1555, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5853658536585366, | |
| "grad_norm": 0.802065372467041, | |
| "learning_rate": 0.0001992494183143955, | |
| "loss": 0.1339, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6341463414634146, | |
| "grad_norm": 0.7007794380187988, | |
| "learning_rate": 0.00019911930057896774, | |
| "loss": 0.1191, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6829268292682927, | |
| "grad_norm": 0.6755990386009216, | |
| "learning_rate": 0.00019897883704644983, | |
| "loss": 0.1571, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7317073170731707, | |
| "grad_norm": 1.6951078176498413, | |
| "learning_rate": 0.00019882804237803488, | |
| "loss": 0.1309, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.7804878048780488, | |
| "grad_norm": 0.567158579826355, | |
| "learning_rate": 0.0001986669323132512, | |
| "loss": 0.0766, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8292682926829268, | |
| "grad_norm": 0.8820038437843323, | |
| "learning_rate": 0.0001984955236683196, | |
| "loss": 0.0839, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.8780487804878049, | |
| "grad_norm": 0.6520794034004211, | |
| "learning_rate": 0.00019831383433439797, | |
| "loss": 0.0863, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.926829268292683, | |
| "grad_norm": 0.45519864559173584, | |
| "learning_rate": 0.00019812188327571399, | |
| "loss": 0.0889, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.975609756097561, | |
| "grad_norm": 0.614235520362854, | |
| "learning_rate": 0.00019791969052758562, | |
| "loss": 0.0725, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.024390243902439, | |
| "grad_norm": 0.2764686644077301, | |
| "learning_rate": 0.00019770727719432994, | |
| "loss": 0.0407, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.0731707317073171, | |
| "grad_norm": 0.6082726716995239, | |
| "learning_rate": 0.00019748466544706022, | |
| "loss": 0.044, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.1219512195121952, | |
| "grad_norm": 0.9295619130134583, | |
| "learning_rate": 0.00019725187852137195, | |
| "loss": 0.0675, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.170731707317073, | |
| "grad_norm": 0.3758924603462219, | |
| "learning_rate": 0.00019700894071491732, | |
| "loss": 0.0439, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.2195121951219512, | |
| "grad_norm": 0.46514585614204407, | |
| "learning_rate": 0.00019675587738486936, | |
| "loss": 0.0398, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.2682926829268293, | |
| "grad_norm": 0.5870018005371094, | |
| "learning_rate": 0.0001964927149452751, | |
| "loss": 0.0406, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.3170731707317074, | |
| "grad_norm": 0.30292996764183044, | |
| "learning_rate": 0.00019621948086429844, | |
| "loss": 0.028, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.3658536585365852, | |
| "grad_norm": 0.47037121653556824, | |
| "learning_rate": 0.00019593620366135337, | |
| "loss": 0.0239, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.4146341463414633, | |
| "grad_norm": 0.4176475405693054, | |
| "learning_rate": 0.00019564291290412688, | |
| "loss": 0.0281, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.4634146341463414, | |
| "grad_norm": 0.3179157078266144, | |
| "learning_rate": 0.00019533963920549306, | |
| "loss": 0.0281, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.5121951219512195, | |
| "grad_norm": 0.5817562937736511, | |
| "learning_rate": 0.00019502641422031763, | |
| "loss": 0.0296, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.5609756097560976, | |
| "grad_norm": 0.7409655451774597, | |
| "learning_rate": 0.00019470327064215383, | |
| "loss": 0.029, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.6097560975609757, | |
| "grad_norm": 0.4418310225009918, | |
| "learning_rate": 0.00019437024219983028, | |
| "loss": 0.0583, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.6585365853658538, | |
| "grad_norm": 0.31637728214263916, | |
| "learning_rate": 0.0001940273636539301, | |
| "loss": 0.0354, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.7073170731707317, | |
| "grad_norm": 0.22175493836402893, | |
| "learning_rate": 0.00019367467079316279, | |
| "loss": 0.0514, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.7560975609756098, | |
| "grad_norm": 0.6636152267456055, | |
| "learning_rate": 0.00019331220043062894, | |
| "loss": 0.034, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.8048780487804879, | |
| "grad_norm": 0.8424332141876221, | |
| "learning_rate": 0.00019293999039997746, | |
| "loss": 0.0299, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.8536585365853657, | |
| "grad_norm": 0.6435155272483826, | |
| "learning_rate": 0.00019255807955145677, | |
| "loss": 0.0508, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.9024390243902438, | |
| "grad_norm": 0.7734220027923584, | |
| "learning_rate": 0.00019216650774785972, | |
| "loss": 0.035, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.951219512195122, | |
| "grad_norm": 0.2854250967502594, | |
| "learning_rate": 0.0001917653158603628, | |
| "loss": 0.0339, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.6165639758110046, | |
| "learning_rate": 0.0001913545457642601, | |
| "loss": 0.0323, | |
| "step": 410 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3075, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 15, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.64317874233344e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |