| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 1089, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.8348623853211011e-06, | |
| "loss": 1.1016, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 9.174311926605506e-06, | |
| "loss": 1.1129, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.834862385321101e-05, | |
| "loss": 1.1294, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.7522935779816515e-05, | |
| "loss": 1.1021, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.669724770642202e-05, | |
| "loss": 1.0547, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.587155963302753e-05, | |
| "loss": 1.0457, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.504587155963303e-05, | |
| "loss": 1.0129, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.422018348623854e-05, | |
| "loss": 1.024, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.339449541284404e-05, | |
| "loss": 0.9981, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 8.256880733944955e-05, | |
| "loss": 0.9936, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.174311926605506e-05, | |
| "loss": 0.9912, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00010091743119266055, | |
| "loss": 1.0031, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00011009174311926606, | |
| "loss": 0.9909, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00011926605504587157, | |
| "loss": 0.9596, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00012844036697247707, | |
| "loss": 0.997, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00013761467889908258, | |
| "loss": 0.9934, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001467889908256881, | |
| "loss": 0.9994, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0001559633027522936, | |
| "loss": 0.9851, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0001651376146788991, | |
| "loss": 0.9936, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00017431192660550458, | |
| "loss": 0.9802, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00018348623853211012, | |
| "loss": 0.9816, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001926605504587156, | |
| "loss": 0.9631, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001999994861726391, | |
| "loss": 0.9779, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019998150276943902, | |
| "loss": 0.9625, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0001999378332783191, | |
| "loss": 0.9878, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019986848891833845, | |
| "loss": 0.9804, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001997734875046456, | |
| "loss": 0.9817, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019965285344390184, | |
| "loss": 0.9599, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019950661772801063, | |
| "loss": 0.9643, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019933481792615583, | |
| "loss": 0.9789, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00019913749817514963, | |
| "loss": 0.981, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00019891470916809362, | |
| "loss": 0.9719, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019866650814135518, | |
| "loss": 0.9998, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019839295885986296, | |
| "loss": 0.9661, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019809413160072528, | |
| "loss": 0.9625, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019777010313517518, | |
| "loss": 0.9753, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00019742095670884728, | |
| "loss": 0.9702, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001970467820203915, | |
| "loss": 0.9556, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001966476751984283, | |
| "loss": 0.9778, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001962237387768529, | |
| "loss": 0.9602, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019577508166849304, | |
| "loss": 0.9522, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019530181913712872, | |
| "loss": 0.9537, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019480407276787967, | |
| "loss": 0.9742, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001942819704359693, | |
| "loss": 0.974, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00019373564627387242, | |
| "loss": 0.9579, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0001931652406368554, | |
| "loss": 0.9471, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00019257090006691798, | |
| "loss": 0.9992, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001919527772551451, | |
| "loss": 0.9303, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00019131103100247934, | |
| "loss": 0.9675, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001906458261789238, | |
| "loss": 0.9913, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00018995733368118556, | |
| "loss": 0.948, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001892457303887706, | |
| "loss": 0.9665, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00018851119911854233, | |
| "loss": 0.9388, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00018775392857775432, | |
| "loss": 0.9669, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018697411331556956, | |
| "loss": 0.9585, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001861719536730795, | |
| "loss": 0.9585, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0001853476557318346, | |
| "loss": 0.9554, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018450143126090015, | |
| "loss": 0.9552, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001836334976624511, | |
| "loss": 0.9557, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00018274407791591966, | |
| "loss": 0.9785, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00018183340052070997, | |
| "loss": 0.9687, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00018090169943749476, | |
| "loss": 0.9393, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0001799492140281086, | |
| "loss": 0.9778, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00017897618899405423, | |
| "loss": 0.966, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00017798287431363641, | |
| "loss": 0.9505, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00017696952517774062, | |
| "loss": 0.9471, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.000175936401924272, | |
| "loss": 0.9617, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00017488376997127283, | |
| "loss": 0.9513, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00017381189974873407, | |
| "loss": 0.9546, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00017272106662911973, | |
| "loss": 0.9711, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00017161155085662145, | |
| "loss": 0.9634, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00017048363747516117, | |
| "loss": 0.9582, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0001693376162551613, | |
| "loss": 0.9362, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00016817378161909996, | |
| "loss": 0.9715, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00016699243256587153, | |
| "loss": 0.967, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00016579387259397127, | |
| "loss": 0.9536, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00016457840962352403, | |
| "loss": 0.9626, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00016334635591717703, | |
| "loss": 0.972, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00016209802799987673, | |
| "loss": 0.9434, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00016083374657755134, | |
| "loss": 0.9736, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00015955383645471828, | |
| "loss": 0.9399, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001582586264510396, | |
| "loss": 0.9475, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001569484493168452, | |
| "loss": 0.9579, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001556236416476465, | |
| "loss": 0.9465, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00015428454379766223, | |
| "loss": 0.9673, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00015293149979237876, | |
| "loss": 0.9768, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001515648572401667, | |
| "loss": 0.9679, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00015018496724297778, | |
| "loss": 0.922, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00014879218430614345, | |
| "loss": 0.9468, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00014738686624729986, | |
| "loss": 0.9662, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00014596937410446117, | |
| "loss": 0.9704, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001445400720432659, | |
| "loss": 0.9509, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00014309932726342005, | |
| "loss": 0.9885, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001416475099043599, | |
| "loss": 0.9552, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00014018499295016056, | |
| "loss": 0.9807, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00013871215213371284, | |
| "loss": 0.9364, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00013722936584019453, | |
| "loss": 0.9528, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0001357370150098601, | |
| "loss": 0.9643, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0001342354830401738, | |
| "loss": 0.9651, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0001327251556873117, | |
| "loss": 0.9518, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00013120642096705774, | |
| "loss": 0.9606, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00012967966905511906, | |
| "loss": 0.9207, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00012814529218688686, | |
| "loss": 0.9577, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00012660368455666752, | |
| "loss": 0.9394, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00012505524221641096, | |
| "loss": 0.9342, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00012350036297396154, | |
| "loss": 0.9473, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00012193944629085778, | |
| "loss": 1.0108, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00012037289317970757, | |
| "loss": 0.9225, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00011880110610116437, | |
| "loss": 0.9348, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001172244888605319, | |
| "loss": 0.9507, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001156434465040231, | |
| "loss": 0.9306, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00011405838521470029, | |
| "loss": 0.9389, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00011246971220812347, | |
| "loss": 0.9629, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00011087783562773311, | |
| "loss": 0.974, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00010928316443999462, | |
| "loss": 0.9713, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00010768610832933168, | |
| "loss": 0.9443, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00010608707759287452, | |
| "loss": 0.9231, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00010448648303505151, | |
| "loss": 0.9639, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00010288473586204969, | |
| "loss": 0.9409, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00010128224757617274, | |
| "loss": 0.9514, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 9.967942987012241e-05, | |
| "loss": 0.9604, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.80766945212313e-05, | |
| "loss": 0.9722, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.647445328567368e-05, | |
| "loss": 0.9405, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.48731177926821e-05, | |
| "loss": 0.9357, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.327309943879604e-05, | |
| "loss": 0.9592, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.167480928217108e-05, | |
| "loss": 0.9321, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 9.007865793697426e-05, | |
| "loss": 0.9443, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 8.848505546789408e-05, | |
| "loss": 0.9386, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.689441128479134e-05, | |
| "loss": 0.9513, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.530713403751821e-05, | |
| "loss": 0.9461, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.372363151093301e-05, | |
| "loss": 0.9398, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.214431052013634e-05, | |
| "loss": 0.9749, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 8.056957680595732e-05, | |
| "loss": 0.948, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 7.899983493071507e-05, | |
| "loss": 0.9494, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.743548817428339e-05, | |
| "loss": 0.9153, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.587693843048475e-05, | |
| "loss": 0.9553, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.432458610384036e-05, | |
| "loss": 0.9365, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.27788300067029e-05, | |
| "loss": 0.9432, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.124006725679828e-05, | |
| "loss": 0.9331, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.97086931752028e-05, | |
| "loss": 0.9295, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.818510118478172e-05, | |
| "loss": 0.9287, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.666968270911584e-05, | |
| "loss": 0.9447, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.516282707194119e-05, | |
| "loss": 0.9446, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.366492139712886e-05, | |
| "loss": 0.9712, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.217635050922923e-05, | |
| "loss": 0.9212, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 6.069749683460765e-05, | |
| "loss": 0.9444, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.9228740303195674e-05, | |
| "loss": 0.9824, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.777045825088404e-05, | |
| "loss": 0.9366, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5.632302532258169e-05, | |
| "loss": 0.9358, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5.488681337596653e-05, | |
| "loss": 0.9328, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 5.346219138595214e-05, | |
| "loss": 0.95, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 5.2049525349894625e-05, | |
| "loss": 0.9323, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 5.0649178193565314e-05, | |
| "loss": 0.9357, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.92615096779118e-05, | |
| "loss": 0.9721, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.788687630663231e-05, | |
| "loss": 0.9523, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.6525631234587034e-05, | |
| "loss": 0.9674, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.517812417706967e-05, | |
| "loss": 0.9295, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.384470131996252e-05, | |
| "loss": 0.9476, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.252570523079852e-05, | |
| "loss": 0.9567, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.12214747707527e-05, | |
| "loss": 0.9573, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.9932345007585966e-05, | |
| "loss": 0.9339, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.8658647129563364e-05, | |
| "loss": 0.9426, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.740070836036893e-05, | |
| "loss": 0.9521, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.615885187503946e-05, | |
| "loss": 0.9348, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.493339671693765e-05, | |
| "loss": 0.9561, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.372465771578771e-05, | |
| "loss": 0.9384, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.253294540679257e-05, | |
| "loss": 0.9705, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.135856595085498e-05, | |
| "loss": 0.9701, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.0201821055922098e-05, | |
| "loss": 0.9482, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.9063007899474216e-05, | |
| "loss": 0.9649, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.7942419052177525e-05, | |
| "loss": 0.9524, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.6840342402719866e-05, | |
| "loss": 0.9355, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.5757061083850154e-05, | |
| "loss": 0.957, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.4692853399638917e-05, | |
| "loss": 0.9647, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.3647992753979696e-05, | |
| "loss": 0.9562, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.2622747580349314e-05, | |
| "loss": 0.9409, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.161738127284517e-05, | |
| "loss": 0.9251, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.063215211851678e-05, | |
| "loss": 0.9449, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.9667313231009953e-05, | |
| "loss": 0.9702, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.872311248553974e-05, | |
| "loss": 0.9406, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.7799792455209018e-05, | |
| "loss": 0.9599, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.689759034868961e-05, | |
| "loss": 0.9306, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.601673794928127e-05, | |
| "loss": 0.9432, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.5157461555364772e-05, | |
| "loss": 0.9606, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.4319981922263637e-05, | |
| "loss": 0.9364, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.350451420553065e-05, | |
| "loss": 0.954, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.2711267905672231e-05, | |
| "loss": 0.952, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.19404468143262e-05, | |
| "loss": 0.9437, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.1192248961905949e-05, | |
| "loss": 0.9802, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.0466866566724698e-05, | |
| "loss": 0.9283, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 9.764485985613092e-06, | |
| "loss": 0.9563, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 9.085287666042508e-06, | |
| "loss": 0.9498, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 8.429446099766614e-06, | |
| "loss": 0.9462, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 7.797129777992952e-06, | |
| "loss": 0.9434, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 7.1885011480961164e-06, | |
| "loss": 0.9766, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 6.603716571883689e-06, | |
| "loss": 0.952, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 6.042926285425576e-06, | |
| "loss": 0.9489, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.506274360457086e-06, | |
| "loss": 0.9433, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.993898667365671e-06, | |
| "loss": 0.9068, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.505930839770967e-06, | |
| "loss": 0.9383, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.0424962407068166e-06, | |
| "loss": 0.9663, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.6037139304146762e-06, | |
| "loss": 0.9182, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.1896966357558675e-06, | |
| "loss": 0.9272, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.800550721251216e-06, | |
| "loss": 0.9122, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.4363761617550053e-06, | |
| "loss": 0.9536, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.0972665167707126e-06, | |
| "loss": 0.9397, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.7833089064146824e-06, | |
| "loss": 0.9482, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.4945839890343261e-06, | |
| "loss": 0.9568, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.231165940486234e-06, | |
| "loss": 0.9517, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 9.931224350798185e-07, | |
| "loss": 0.9572, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 7.805146281912201e-07, | |
| "loss": 0.961, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 5.933971405519656e-07, | |
| "loss": 0.9365, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.3181804421645875e-07, | |
| "loss": 0.9814, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.9581885021181533e-07, | |
| "loss": 0.9549, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.8543449787338242e-07, | |
| "loss": 0.9529, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.0069334586854107e-07, | |
| "loss": 0.9556, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.161716491105638e-08, | |
| "loss": 0.9656, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 8.221132168073631e-09, | |
| "loss": 0.9504, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.9492650628089905, | |
| "eval_runtime": 481.0248, | |
| "eval_samples_per_second": 15.987, | |
| "eval_steps_per_second": 1.0, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 1089, | |
| "total_flos": 6.125664424440103e+18, | |
| "train_loss": 0.9597015198049904, | |
| "train_runtime": 15408.8999, | |
| "train_samples_per_second": 4.52, | |
| "train_steps_per_second": 0.071 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1089, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "total_flos": 6.125664424440103e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |