| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 2000, | |
| "global_step": 6326, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "document_regularizer_loss": 0.7471, | |
| "epoch": 0.006323110970597534, | |
| "grad_norm": 10069.7373046875, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 787.2433, | |
| "query_regularizer_loss": 0.746, | |
| "sparse_loss": 785.7502, | |
| "step": 20 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.7124, | |
| "epoch": 0.012646221941195067, | |
| "grad_norm": 13459.8525390625, | |
| "learning_rate": 4.105263157894737e-06, | |
| "loss": 440.0618, | |
| "query_regularizer_loss": 0.712, | |
| "sparse_loss": 438.6375, | |
| "step": 40 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.6455, | |
| "epoch": 0.0189693329117926, | |
| "grad_norm": 3622.762451171875, | |
| "learning_rate": 6.2105263157894745e-06, | |
| "loss": 271.8732, | |
| "query_regularizer_loss": 0.6464, | |
| "sparse_loss": 270.5813, | |
| "step": 60 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.5798, | |
| "epoch": 0.025292443882390134, | |
| "grad_norm": 21794.759765625, | |
| "learning_rate": 8.315789473684212e-06, | |
| "loss": 159.8073, | |
| "query_regularizer_loss": 0.5803, | |
| "sparse_loss": 158.6473, | |
| "step": 80 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.5264, | |
| "epoch": 0.03161555485298767, | |
| "grad_norm": 3427.578369140625, | |
| "learning_rate": 1.0421052631578948e-05, | |
| "loss": 115.2051, | |
| "query_regularizer_loss": 0.526, | |
| "sparse_loss": 114.1528, | |
| "step": 100 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.4789, | |
| "epoch": 0.0379386658235852, | |
| "grad_norm": 1716.6964111328125, | |
| "learning_rate": 1.2526315789473686e-05, | |
| "loss": 68.2212, | |
| "query_regularizer_loss": 0.4789, | |
| "sparse_loss": 67.2635, | |
| "step": 120 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.4385, | |
| "epoch": 0.044261776794182736, | |
| "grad_norm": 745.2850341796875, | |
| "learning_rate": 1.4631578947368422e-05, | |
| "loss": 37.2703, | |
| "query_regularizer_loss": 0.4386, | |
| "sparse_loss": 36.3932, | |
| "step": 140 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.405, | |
| "epoch": 0.05058488776478027, | |
| "grad_norm": 1856.085693359375, | |
| "learning_rate": 1.673684210526316e-05, | |
| "loss": 21.9191, | |
| "query_regularizer_loss": 0.405, | |
| "sparse_loss": 21.109, | |
| "step": 160 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.3851, | |
| "epoch": 0.05690799873537781, | |
| "grad_norm": 309.21807861328125, | |
| "learning_rate": 1.8842105263157894e-05, | |
| "loss": 13.6624, | |
| "query_regularizer_loss": 0.3852, | |
| "sparse_loss": 12.8921, | |
| "step": 180 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.3724, | |
| "epoch": 0.06323110970597534, | |
| "grad_norm": 76.83500671386719, | |
| "learning_rate": 2.0947368421052632e-05, | |
| "loss": 6.6098, | |
| "query_regularizer_loss": 0.3724, | |
| "sparse_loss": 5.865, | |
| "step": 200 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.3636, | |
| "epoch": 0.06955422067657287, | |
| "grad_norm": 31.61189842224121, | |
| "learning_rate": 2.305263157894737e-05, | |
| "loss": 4.905, | |
| "query_regularizer_loss": 0.3636, | |
| "sparse_loss": 4.1779, | |
| "step": 220 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.3525, | |
| "epoch": 0.0758773316471704, | |
| "grad_norm": 54.798858642578125, | |
| "learning_rate": 2.5157894736842108e-05, | |
| "loss": 4.4195, | |
| "query_regularizer_loss": 0.3525, | |
| "sparse_loss": 3.7144, | |
| "step": 240 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.3441, | |
| "epoch": 0.08220044261776795, | |
| "grad_norm": 35.759185791015625, | |
| "learning_rate": 2.7263157894736846e-05, | |
| "loss": 3.7145, | |
| "query_regularizer_loss": 0.3441, | |
| "sparse_loss": 3.0263, | |
| "step": 260 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.333, | |
| "epoch": 0.08852355358836547, | |
| "grad_norm": 28.589937210083008, | |
| "learning_rate": 2.9368421052631577e-05, | |
| "loss": 3.3807, | |
| "query_regularizer_loss": 0.333, | |
| "sparse_loss": 2.7146, | |
| "step": 280 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.3202, | |
| "epoch": 0.09484666455896301, | |
| "grad_norm": 21.61874008178711, | |
| "learning_rate": 3.147368421052632e-05, | |
| "loss": 3.3389, | |
| "query_regularizer_loss": 0.3202, | |
| "sparse_loss": 2.6984, | |
| "step": 300 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.3065, | |
| "epoch": 0.10116977552956054, | |
| "grad_norm": 23.4547176361084, | |
| "learning_rate": 3.357894736842105e-05, | |
| "loss": 3.1854, | |
| "query_regularizer_loss": 0.3065, | |
| "sparse_loss": 2.5724, | |
| "step": 320 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.2938, | |
| "epoch": 0.10749288650015808, | |
| "grad_norm": 30.065744400024414, | |
| "learning_rate": 3.5684210526315794e-05, | |
| "loss": 3.0942, | |
| "query_regularizer_loss": 0.2938, | |
| "sparse_loss": 2.5066, | |
| "step": 340 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.2794, | |
| "epoch": 0.11381599747075562, | |
| "grad_norm": 24.388168334960938, | |
| "learning_rate": 3.778947368421053e-05, | |
| "loss": 2.8802, | |
| "query_regularizer_loss": 0.2794, | |
| "sparse_loss": 2.3214, | |
| "step": 360 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.2675, | |
| "epoch": 0.12013910844135314, | |
| "grad_norm": 30.463876724243164, | |
| "learning_rate": 3.989473684210526e-05, | |
| "loss": 2.8744, | |
| "query_regularizer_loss": 0.2676, | |
| "sparse_loss": 2.3393, | |
| "step": 380 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.2574, | |
| "epoch": 0.12646221941195068, | |
| "grad_norm": 8.956077575683594, | |
| "learning_rate": 4.2e-05, | |
| "loss": 2.9323, | |
| "query_regularizer_loss": 0.2574, | |
| "sparse_loss": 2.4174, | |
| "step": 400 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.2454, | |
| "epoch": 0.13278533038254822, | |
| "grad_norm": 17.979278564453125, | |
| "learning_rate": 4.410526315789474e-05, | |
| "loss": 2.805, | |
| "query_regularizer_loss": 0.2454, | |
| "sparse_loss": 2.3142, | |
| "step": 420 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.235, | |
| "epoch": 0.13910844135314573, | |
| "grad_norm": 16.181251525878906, | |
| "learning_rate": 4.6210526315789473e-05, | |
| "loss": 2.7811, | |
| "query_regularizer_loss": 0.235, | |
| "sparse_loss": 2.3111, | |
| "step": 440 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.2264, | |
| "epoch": 0.14543155232374327, | |
| "grad_norm": 17.261810302734375, | |
| "learning_rate": 4.8315789473684215e-05, | |
| "loss": 2.7447, | |
| "query_regularizer_loss": 0.2264, | |
| "sparse_loss": 2.2919, | |
| "step": 460 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.2184, | |
| "epoch": 0.1517546632943408, | |
| "grad_norm": 16.512298583984375, | |
| "learning_rate": 4.999994234069837e-05, | |
| "loss": 2.7353, | |
| "query_regularizer_loss": 0.2184, | |
| "sparse_loss": 2.2986, | |
| "step": 480 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.21, | |
| "epoch": 0.15807777426493835, | |
| "grad_norm": 36.3756217956543, | |
| "learning_rate": 4.9997924293067854e-05, | |
| "loss": 2.6297, | |
| "query_regularizer_loss": 0.21, | |
| "sparse_loss": 2.2096, | |
| "step": 500 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.202, | |
| "epoch": 0.1644008852355359, | |
| "grad_norm": 54.841121673583984, | |
| "learning_rate": 4.9993023546318026e-05, | |
| "loss": 2.6286, | |
| "query_regularizer_loss": 0.2021, | |
| "sparse_loss": 2.2245, | |
| "step": 520 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1967, | |
| "epoch": 0.1707239962061334, | |
| "grad_norm": 104.60382080078125, | |
| "learning_rate": 4.998524066559095e-05, | |
| "loss": 2.6424, | |
| "query_regularizer_loss": 0.1966, | |
| "sparse_loss": 2.2491, | |
| "step": 540 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.193, | |
| "epoch": 0.17704710717673094, | |
| "grad_norm": 61.492584228515625, | |
| "learning_rate": 4.997457654838927e-05, | |
| "loss": 2.5846, | |
| "query_regularizer_loss": 0.193, | |
| "sparse_loss": 2.1985, | |
| "step": 560 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1896, | |
| "epoch": 0.18337021814732848, | |
| "grad_norm": 53.26578140258789, | |
| "learning_rate": 4.9961032424472766e-05, | |
| "loss": 2.6457, | |
| "query_regularizer_loss": 0.1896, | |
| "sparse_loss": 2.2664, | |
| "step": 580 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1867, | |
| "epoch": 0.18969332911792602, | |
| "grad_norm": 13.084442138671875, | |
| "learning_rate": 4.9944609855716445e-05, | |
| "loss": 2.4484, | |
| "query_regularizer_loss": 0.1867, | |
| "sparse_loss": 2.075, | |
| "step": 600 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1808, | |
| "epoch": 0.19601644008852356, | |
| "grad_norm": 15.175243377685547, | |
| "learning_rate": 4.992531073593055e-05, | |
| "loss": 2.5683, | |
| "query_regularizer_loss": 0.1808, | |
| "sparse_loss": 2.2068, | |
| "step": 620 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1747, | |
| "epoch": 0.20233955105912108, | |
| "grad_norm": 30.665386199951172, | |
| "learning_rate": 4.990313729064209e-05, | |
| "loss": 2.5078, | |
| "query_regularizer_loss": 0.1747, | |
| "sparse_loss": 2.1584, | |
| "step": 640 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1695, | |
| "epoch": 0.20866266202971862, | |
| "grad_norm": 18.14386749267578, | |
| "learning_rate": 4.98780920768382e-05, | |
| "loss": 2.4674, | |
| "query_regularizer_loss": 0.1695, | |
| "sparse_loss": 2.1284, | |
| "step": 660 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.165, | |
| "epoch": 0.21498577300031615, | |
| "grad_norm": 25.72368812561035, | |
| "learning_rate": 4.985017798267132e-05, | |
| "loss": 2.4734, | |
| "query_regularizer_loss": 0.165, | |
| "sparse_loss": 2.1434, | |
| "step": 680 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1575, | |
| "epoch": 0.2213088839709137, | |
| "grad_norm": 13.785391807556152, | |
| "learning_rate": 4.981939822712609e-05, | |
| "loss": 2.3857, | |
| "query_regularizer_loss": 0.1575, | |
| "sparse_loss": 2.0707, | |
| "step": 700 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.151, | |
| "epoch": 0.22763199494151123, | |
| "grad_norm": 59.33202362060547, | |
| "learning_rate": 4.9785756359648204e-05, | |
| "loss": 2.3447, | |
| "query_regularizer_loss": 0.151, | |
| "sparse_loss": 2.0427, | |
| "step": 720 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1457, | |
| "epoch": 0.23395510591210875, | |
| "grad_norm": 11.074970245361328, | |
| "learning_rate": 4.9749256259735024e-05, | |
| "loss": 2.3187, | |
| "query_regularizer_loss": 0.1457, | |
| "sparse_loss": 2.0272, | |
| "step": 740 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1402, | |
| "epoch": 0.24027821688270629, | |
| "grad_norm": 14.643407821655273, | |
| "learning_rate": 4.9709902136488276e-05, | |
| "loss": 2.4015, | |
| "query_regularizer_loss": 0.1402, | |
| "sparse_loss": 2.121, | |
| "step": 760 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1392, | |
| "epoch": 0.24660132785330383, | |
| "grad_norm": 27.94011688232422, | |
| "learning_rate": 4.9667698528128593e-05, | |
| "loss": 2.3671, | |
| "query_regularizer_loss": 0.1392, | |
| "sparse_loss": 2.0887, | |
| "step": 780 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1338, | |
| "epoch": 0.25292443882390137, | |
| "grad_norm": 17.952852249145508, | |
| "learning_rate": 4.9622650301472265e-05, | |
| "loss": 2.2929, | |
| "query_regularizer_loss": 0.1338, | |
| "sparse_loss": 2.0252, | |
| "step": 800 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.128, | |
| "epoch": 0.2592475497944989, | |
| "grad_norm": 12.585227012634277, | |
| "learning_rate": 4.957476265136993e-05, | |
| "loss": 2.3264, | |
| "query_regularizer_loss": 0.128, | |
| "sparse_loss": 2.0704, | |
| "step": 820 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1269, | |
| "epoch": 0.26557066076509644, | |
| "grad_norm": 16.949398040771484, | |
| "learning_rate": 4.952404110010757e-05, | |
| "loss": 2.7711, | |
| "query_regularizer_loss": 0.1269, | |
| "sparse_loss": 2.5173, | |
| "step": 840 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1213, | |
| "epoch": 0.27189377173569396, | |
| "grad_norm": 11.61090087890625, | |
| "learning_rate": 4.947049149676968e-05, | |
| "loss": 2.2928, | |
| "query_regularizer_loss": 0.1213, | |
| "sparse_loss": 2.0502, | |
| "step": 860 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1157, | |
| "epoch": 0.27821688270629147, | |
| "grad_norm": 10.377798080444336, | |
| "learning_rate": 4.941412001656474e-05, | |
| "loss": 2.1857, | |
| "query_regularizer_loss": 0.1157, | |
| "sparse_loss": 1.9544, | |
| "step": 880 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1112, | |
| "epoch": 0.28453999367688904, | |
| "grad_norm": 12.839447021484375, | |
| "learning_rate": 4.9354933160113135e-05, | |
| "loss": 2.1, | |
| "query_regularizer_loss": 0.1111, | |
| "sparse_loss": 1.8777, | |
| "step": 900 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1094, | |
| "epoch": 0.29086310464748655, | |
| "grad_norm": 15.7372465133667, | |
| "learning_rate": 4.929293775269754e-05, | |
| "loss": 2.1598, | |
| "query_regularizer_loss": 0.1094, | |
| "sparse_loss": 1.941, | |
| "step": 920 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1069, | |
| "epoch": 0.2971862156180841, | |
| "grad_norm": 85.94547271728516, | |
| "learning_rate": 4.9228140943475766e-05, | |
| "loss": 1.9527, | |
| "query_regularizer_loss": 0.1069, | |
| "sparse_loss": 1.7389, | |
| "step": 940 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1045, | |
| "epoch": 0.3035093265886816, | |
| "grad_norm": 16.49176025390625, | |
| "learning_rate": 4.9160550204656416e-05, | |
| "loss": 2.0608, | |
| "query_regularizer_loss": 0.1045, | |
| "sparse_loss": 1.8519, | |
| "step": 960 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.1019, | |
| "epoch": 0.30983243755927914, | |
| "grad_norm": 11.689447402954102, | |
| "learning_rate": 4.909017333063719e-05, | |
| "loss": 2.0235, | |
| "query_regularizer_loss": 0.1019, | |
| "sparse_loss": 1.8198, | |
| "step": 980 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0988, | |
| "epoch": 0.3161555485298767, | |
| "grad_norm": 15.240221977233887, | |
| "learning_rate": 4.901701843710602e-05, | |
| "loss": 1.9305, | |
| "query_regularizer_loss": 0.0988, | |
| "sparse_loss": 1.7328, | |
| "step": 1000 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0962, | |
| "epoch": 0.3224786595004742, | |
| "grad_norm": 8.189220428466797, | |
| "learning_rate": 4.894109396010522e-05, | |
| "loss": 1.9598, | |
| "query_regularizer_loss": 0.0962, | |
| "sparse_loss": 1.7673, | |
| "step": 1020 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0945, | |
| "epoch": 0.3288017704710718, | |
| "grad_norm": 17.443077087402344, | |
| "learning_rate": 4.886240865505865e-05, | |
| "loss": 1.9558, | |
| "query_regularizer_loss": 0.0945, | |
| "sparse_loss": 1.7667, | |
| "step": 1040 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.094, | |
| "epoch": 0.3351248814416693, | |
| "grad_norm": 10.96996784210205, | |
| "learning_rate": 4.87809715957621e-05, | |
| "loss": 2.0087, | |
| "query_regularizer_loss": 0.094, | |
| "sparse_loss": 1.8206, | |
| "step": 1060 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0934, | |
| "epoch": 0.3414479924122668, | |
| "grad_norm": 9.809027671813965, | |
| "learning_rate": 4.8696792173336845e-05, | |
| "loss": 1.9493, | |
| "query_regularizer_loss": 0.0934, | |
| "sparse_loss": 1.7625, | |
| "step": 1080 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0914, | |
| "epoch": 0.3477711033828644, | |
| "grad_norm": 20.714750289916992, | |
| "learning_rate": 4.860988009514675e-05, | |
| "loss": 1.7575, | |
| "query_regularizer_loss": 0.0914, | |
| "sparse_loss": 1.5748, | |
| "step": 1100 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0898, | |
| "epoch": 0.3540942143534619, | |
| "grad_norm": 16.20149040222168, | |
| "learning_rate": 4.852024538367882e-05, | |
| "loss": 1.7915, | |
| "query_regularizer_loss": 0.0898, | |
| "sparse_loss": 1.6119, | |
| "step": 1120 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0883, | |
| "epoch": 0.36041732532405946, | |
| "grad_norm": 43.65206527709961, | |
| "learning_rate": 4.842789837538741e-05, | |
| "loss": 1.8282, | |
| "query_regularizer_loss": 0.0883, | |
| "sparse_loss": 1.6516, | |
| "step": 1140 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0866, | |
| "epoch": 0.36674043629465697, | |
| "grad_norm": 9.418038368225098, | |
| "learning_rate": 4.83328497195023e-05, | |
| "loss": 1.774, | |
| "query_regularizer_loss": 0.0867, | |
| "sparse_loss": 1.6007, | |
| "step": 1160 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0845, | |
| "epoch": 0.3730635472652545, | |
| "grad_norm": 13.31879997253418, | |
| "learning_rate": 4.82351103768006e-05, | |
| "loss": 1.7967, | |
| "query_regularizer_loss": 0.0845, | |
| "sparse_loss": 1.6277, | |
| "step": 1180 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0831, | |
| "epoch": 0.37938665823585205, | |
| "grad_norm": 14.40135383605957, | |
| "learning_rate": 4.813469161834282e-05, | |
| "loss": 1.7661, | |
| "query_regularizer_loss": 0.0831, | |
| "sparse_loss": 1.5999, | |
| "step": 1200 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0817, | |
| "epoch": 0.38570976920644956, | |
| "grad_norm": 9.40664005279541, | |
| "learning_rate": 4.803160502417309e-05, | |
| "loss": 1.7127, | |
| "query_regularizer_loss": 0.0817, | |
| "sparse_loss": 1.5493, | |
| "step": 1220 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0799, | |
| "epoch": 0.3920328801770471, | |
| "grad_norm": 7.551331520080566, | |
| "learning_rate": 4.7925862481983794e-05, | |
| "loss": 1.6856, | |
| "query_regularizer_loss": 0.0799, | |
| "sparse_loss": 1.5259, | |
| "step": 1240 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0792, | |
| "epoch": 0.39835599114764464, | |
| "grad_norm": 8.512903213500977, | |
| "learning_rate": 4.7817476185744705e-05, | |
| "loss": 1.737, | |
| "query_regularizer_loss": 0.0792, | |
| "sparse_loss": 1.5786, | |
| "step": 1260 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0777, | |
| "epoch": 0.40467910211824215, | |
| "grad_norm": 7.908294200897217, | |
| "learning_rate": 4.770645863429681e-05, | |
| "loss": 1.7078, | |
| "query_regularizer_loss": 0.0777, | |
| "sparse_loss": 1.5525, | |
| "step": 1280 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.076, | |
| "epoch": 0.4110022130888397, | |
| "grad_norm": 7.902525901794434, | |
| "learning_rate": 4.759282262991097e-05, | |
| "loss": 1.7971, | |
| "query_regularizer_loss": 0.076, | |
| "sparse_loss": 1.6451, | |
| "step": 1300 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0745, | |
| "epoch": 0.41732532405943723, | |
| "grad_norm": 7.720804214477539, | |
| "learning_rate": 4.7476581276811594e-05, | |
| "loss": 1.6587, | |
| "query_regularizer_loss": 0.0745, | |
| "sparse_loss": 1.5096, | |
| "step": 1320 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0728, | |
| "epoch": 0.4236484350300348, | |
| "grad_norm": 6.726785659790039, | |
| "learning_rate": 4.7357747979665504e-05, | |
| "loss": 1.6127, | |
| "query_regularizer_loss": 0.0728, | |
| "sparse_loss": 1.4672, | |
| "step": 1340 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0714, | |
| "epoch": 0.4299715460006323, | |
| "grad_norm": 8.149948120117188, | |
| "learning_rate": 4.723633644203612e-05, | |
| "loss": 1.5483, | |
| "query_regularizer_loss": 0.0714, | |
| "sparse_loss": 1.4055, | |
| "step": 1360 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0706, | |
| "epoch": 0.4362946569712298, | |
| "grad_norm": 11.196396827697754, | |
| "learning_rate": 4.711236066480322e-05, | |
| "loss": 1.5743, | |
| "query_regularizer_loss": 0.0706, | |
| "sparse_loss": 1.4331, | |
| "step": 1380 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0696, | |
| "epoch": 0.4426177679418274, | |
| "grad_norm": 8.784821510314941, | |
| "learning_rate": 4.698583494454837e-05, | |
| "loss": 1.6291, | |
| "query_regularizer_loss": 0.0696, | |
| "sparse_loss": 1.4899, | |
| "step": 1400 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0681, | |
| "epoch": 0.4489408789124249, | |
| "grad_norm": 8.496731758117676, | |
| "learning_rate": 4.68567738719063e-05, | |
| "loss": 1.6277, | |
| "query_regularizer_loss": 0.068, | |
| "sparse_loss": 1.4916, | |
| "step": 1420 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0666, | |
| "epoch": 0.45526398988302247, | |
| "grad_norm": 9.2264986038208, | |
| "learning_rate": 4.672519232988234e-05, | |
| "loss": 1.5486, | |
| "query_regularizer_loss": 0.0666, | |
| "sparse_loss": 1.4155, | |
| "step": 1440 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0654, | |
| "epoch": 0.46158710085362, | |
| "grad_norm": 8.807757377624512, | |
| "learning_rate": 4.659110549213615e-05, | |
| "loss": 1.5393, | |
| "query_regularizer_loss": 0.0654, | |
| "sparse_loss": 1.4084, | |
| "step": 1460 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.064, | |
| "epoch": 0.4679102118242175, | |
| "grad_norm": 6.820550441741943, | |
| "learning_rate": 4.645452882123192e-05, | |
| "loss": 1.5138, | |
| "query_regularizer_loss": 0.064, | |
| "sparse_loss": 1.3858, | |
| "step": 1480 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0622, | |
| "epoch": 0.47423332279481506, | |
| "grad_norm": 6.402284622192383, | |
| "learning_rate": 4.6315478066855274e-05, | |
| "loss": 1.5601, | |
| "query_regularizer_loss": 0.0621, | |
| "sparse_loss": 1.4358, | |
| "step": 1500 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0614, | |
| "epoch": 0.48055643376541257, | |
| "grad_norm": 24.8136043548584, | |
| "learning_rate": 4.617396926399706e-05, | |
| "loss": 1.5127, | |
| "query_regularizer_loss": 0.0614, | |
| "sparse_loss": 1.39, | |
| "step": 1520 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0615, | |
| "epoch": 0.48687954473601014, | |
| "grad_norm": 14.119754791259766, | |
| "learning_rate": 4.603001873110422e-05, | |
| "loss": 1.5186, | |
| "query_regularizer_loss": 0.0615, | |
| "sparse_loss": 1.3956, | |
| "step": 1540 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0606, | |
| "epoch": 0.49320265570660765, | |
| "grad_norm": 10.744440078735352, | |
| "learning_rate": 4.588364306819801e-05, | |
| "loss": 1.4835, | |
| "query_regularizer_loss": 0.0606, | |
| "sparse_loss": 1.3624, | |
| "step": 1560 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0598, | |
| "epoch": 0.49952576667720516, | |
| "grad_norm": 7.516956329345703, | |
| "learning_rate": 4.57348591549597e-05, | |
| "loss": 1.3831, | |
| "query_regularizer_loss": 0.0598, | |
| "sparse_loss": 1.2636, | |
| "step": 1580 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0586, | |
| "epoch": 0.5058488776478027, | |
| "grad_norm": 9.290154457092285, | |
| "learning_rate": 4.558368414878405e-05, | |
| "loss": 1.5297, | |
| "query_regularizer_loss": 0.0586, | |
| "sparse_loss": 1.4126, | |
| "step": 1600 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0575, | |
| "epoch": 0.5121719886184003, | |
| "grad_norm": 8.14932918548584, | |
| "learning_rate": 4.543013548280082e-05, | |
| "loss": 1.4104, | |
| "query_regularizer_loss": 0.0575, | |
| "sparse_loss": 1.2954, | |
| "step": 1620 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0563, | |
| "epoch": 0.5184950995889978, | |
| "grad_norm": 6.615036964416504, | |
| "learning_rate": 4.527423086386432e-05, | |
| "loss": 1.3922, | |
| "query_regularizer_loss": 0.0563, | |
| "sparse_loss": 1.2795, | |
| "step": 1640 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0554, | |
| "epoch": 0.5248182105595953, | |
| "grad_norm": 5.8693013191223145, | |
| "learning_rate": 4.51159882705116e-05, | |
| "loss": 1.4043, | |
| "query_regularizer_loss": 0.0554, | |
| "sparse_loss": 1.2935, | |
| "step": 1660 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0541, | |
| "epoch": 0.5311413215301929, | |
| "grad_norm": 6.5546650886535645, | |
| "learning_rate": 4.495542595088914e-05, | |
| "loss": 1.4286, | |
| "query_regularizer_loss": 0.0541, | |
| "sparse_loss": 1.3203, | |
| "step": 1680 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0535, | |
| "epoch": 0.5374644325007903, | |
| "grad_norm": 7.110738754272461, | |
| "learning_rate": 4.4792562420648574e-05, | |
| "loss": 1.3533, | |
| "query_regularizer_loss": 0.0535, | |
| "sparse_loss": 1.2462, | |
| "step": 1700 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.053, | |
| "epoch": 0.5437875434713879, | |
| "grad_norm": 7.050394058227539, | |
| "learning_rate": 4.462741646081145e-05, | |
| "loss": 1.3941, | |
| "query_regularizer_loss": 0.053, | |
| "sparse_loss": 1.288, | |
| "step": 1720 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0516, | |
| "epoch": 0.5501106544419855, | |
| "grad_norm": 7.823602199554443, | |
| "learning_rate": 4.446000711560351e-05, | |
| "loss": 1.3218, | |
| "query_regularizer_loss": 0.0516, | |
| "sparse_loss": 1.2186, | |
| "step": 1740 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0511, | |
| "epoch": 0.5564337654125829, | |
| "grad_norm": 8.4823579788208, | |
| "learning_rate": 4.42903536902585e-05, | |
| "loss": 1.3049, | |
| "query_regularizer_loss": 0.0511, | |
| "sparse_loss": 1.2027, | |
| "step": 1760 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.051, | |
| "epoch": 0.5627568763831805, | |
| "grad_norm": 6.614449977874756, | |
| "learning_rate": 4.4118475748791985e-05, | |
| "loss": 1.4483, | |
| "query_regularizer_loss": 0.051, | |
| "sparse_loss": 1.3464, | |
| "step": 1780 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0503, | |
| "epoch": 0.5690799873537781, | |
| "grad_norm": 6.088893890380859, | |
| "learning_rate": 4.3944393111745255e-05, | |
| "loss": 1.3819, | |
| "query_regularizer_loss": 0.0503, | |
| "sparse_loss": 1.2812, | |
| "step": 1800 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0497, | |
| "epoch": 0.5754030983243756, | |
| "grad_norm": 7.0833024978637695, | |
| "learning_rate": 4.376812585389967e-05, | |
| "loss": 1.3073, | |
| "query_regularizer_loss": 0.0497, | |
| "sparse_loss": 1.2078, | |
| "step": 1820 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0488, | |
| "epoch": 0.5817262092949731, | |
| "grad_norm": 6.979008197784424, | |
| "learning_rate": 4.358969430196166e-05, | |
| "loss": 1.3515, | |
| "query_regularizer_loss": 0.0488, | |
| "sparse_loss": 1.2538, | |
| "step": 1840 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0483, | |
| "epoch": 0.5880493202655707, | |
| "grad_norm": 6.331544399261475, | |
| "learning_rate": 4.340911903221875e-05, | |
| "loss": 1.3165, | |
| "query_regularizer_loss": 0.0482, | |
| "sparse_loss": 1.22, | |
| "step": 1860 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0476, | |
| "epoch": 0.5943724312361682, | |
| "grad_norm": 5.857104301452637, | |
| "learning_rate": 4.322642086816674e-05, | |
| "loss": 1.2582, | |
| "query_regularizer_loss": 0.0476, | |
| "sparse_loss": 1.163, | |
| "step": 1880 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0471, | |
| "epoch": 0.6006955422067657, | |
| "grad_norm": 11.978568077087402, | |
| "learning_rate": 4.3041620878108336e-05, | |
| "loss": 1.2801, | |
| "query_regularizer_loss": 0.0471, | |
| "sparse_loss": 1.1858, | |
| "step": 1900 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0463, | |
| "epoch": 0.6070186531773633, | |
| "grad_norm": 7.875554084777832, | |
| "learning_rate": 4.2854740372723686e-05, | |
| "loss": 1.2912, | |
| "query_regularizer_loss": 0.0463, | |
| "sparse_loss": 1.1985, | |
| "step": 1920 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0455, | |
| "epoch": 0.6133417641479608, | |
| "grad_norm": 7.675542831420898, | |
| "learning_rate": 4.266580090261282e-05, | |
| "loss": 1.2768, | |
| "query_regularizer_loss": 0.0455, | |
| "sparse_loss": 1.1858, | |
| "step": 1940 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.045, | |
| "epoch": 0.6196648751185583, | |
| "grad_norm": 14.170219421386719, | |
| "learning_rate": 4.247482425581053e-05, | |
| "loss": 1.2681, | |
| "query_regularizer_loss": 0.045, | |
| "sparse_loss": 1.178, | |
| "step": 1960 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.045, | |
| "epoch": 0.6259879860891558, | |
| "grad_norm": 15.395133972167969, | |
| "learning_rate": 4.2281832455273805e-05, | |
| "loss": 1.2818, | |
| "query_regularizer_loss": 0.045, | |
| "sparse_loss": 1.1918, | |
| "step": 1980 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0447, | |
| "epoch": 0.6323110970597534, | |
| "grad_norm": 10.426234245300293, | |
| "learning_rate": 4.208684775634221e-05, | |
| "loss": 1.2085, | |
| "query_regularizer_loss": 0.0447, | |
| "sparse_loss": 1.1191, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6323110970597534, | |
| "eval_runtime": 274.6851, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sparse-ir-eval_avg_flops": 893.3501586914062, | |
| "eval_sparse-ir-eval_corpus_active_dims": 1024.0, | |
| "eval_sparse-ir-eval_corpus_sparsity_ratio": 0.9796696315120712, | |
| "eval_sparse-ir-eval_dot_accuracy@1": 0.044191161767646474, | |
| "eval_sparse-ir-eval_dot_accuracy@100": 0.34593081383723256, | |
| "eval_sparse-ir-eval_dot_accuracy@50": 0.2571485702859428, | |
| "eval_sparse-ir-eval_dot_accuracy@8": 0.1227754449110178, | |
| "eval_sparse-ir-eval_dot_map@100": 0.0739891059526251, | |
| "eval_sparse-ir-eval_dot_mrr@10": 0.06742675274468914, | |
| "eval_sparse-ir-eval_dot_ndcg@10": 0.08339501666788006, | |
| "eval_sparse-ir-eval_dot_precision@1": 0.044191161767646474, | |
| "eval_sparse-ir-eval_dot_precision@100": 0.0034593081383723257, | |
| "eval_sparse-ir-eval_dot_precision@50": 0.005142971405718857, | |
| "eval_sparse-ir-eval_dot_precision@8": 0.015346930613877225, | |
| "eval_sparse-ir-eval_dot_recall@1": 0.044191161767646474, | |
| "eval_sparse-ir-eval_dot_recall@100": 0.34593081383723256, | |
| "eval_sparse-ir-eval_dot_recall@50": 0.2571485702859428, | |
| "eval_sparse-ir-eval_dot_recall@8": 0.1227754449110178, | |
| "eval_sparse-ir-eval_query_active_dims": 1024.0, | |
| "eval_sparse-ir-eval_query_sparsity_ratio": 0.9796696315120712, | |
| "eval_steps_per_second": 0.0, | |
| "step": 2000 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0442, | |
| "epoch": 0.638634208030351, | |
| "grad_norm": 6.597539901733398, | |
| "learning_rate": 4.1889892644171435e-05, | |
| "loss": 1.2319, | |
| "query_regularizer_loss": 0.0442, | |
| "sparse_loss": 1.1435, | |
| "step": 2020 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0436, | |
| "epoch": 0.6449573190009484, | |
| "grad_norm": 5.0735087394714355, | |
| "learning_rate": 4.1690989831140394e-05, | |
| "loss": 1.2843, | |
| "query_regularizer_loss": 0.0436, | |
| "sparse_loss": 1.1971, | |
| "step": 2040 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0429, | |
| "epoch": 0.651280429971546, | |
| "grad_norm": 6.517344951629639, | |
| "learning_rate": 4.1490162254232054e-05, | |
| "loss": 1.2895, | |
| "query_regularizer_loss": 0.0429, | |
| "sparse_loss": 1.2036, | |
| "step": 2060 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0426, | |
| "epoch": 0.6576035409421436, | |
| "grad_norm": 21.777257919311523, | |
| "learning_rate": 4.1287433072388436e-05, | |
| "loss": 1.2754, | |
| "query_regularizer_loss": 0.0426, | |
| "sparse_loss": 1.1902, | |
| "step": 2080 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0423, | |
| "epoch": 0.663926651912741, | |
| "grad_norm": 7.3678975105285645, | |
| "learning_rate": 4.108282566383994e-05, | |
| "loss": 1.3094, | |
| "query_regularizer_loss": 0.0423, | |
| "sparse_loss": 1.2248, | |
| "step": 2100 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0422, | |
| "epoch": 0.6702497628833386, | |
| "grad_norm": 6.312955379486084, | |
| "learning_rate": 4.087636362340948e-05, | |
| "loss": 1.1937, | |
| "query_regularizer_loss": 0.0422, | |
| "sparse_loss": 1.1092, | |
| "step": 2120 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0417, | |
| "epoch": 0.6765728738539362, | |
| "grad_norm": 5.551113128662109, | |
| "learning_rate": 4.0668070759791524e-05, | |
| "loss": 1.2294, | |
| "query_regularizer_loss": 0.0417, | |
| "sparse_loss": 1.1461, | |
| "step": 2140 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0408, | |
| "epoch": 0.6828959848245336, | |
| "grad_norm": 7.7479023933410645, | |
| "learning_rate": 4.0457971092806566e-05, | |
| "loss": 1.2211, | |
| "query_regularizer_loss": 0.0408, | |
| "sparse_loss": 1.1395, | |
| "step": 2160 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0402, | |
| "epoch": 0.6892190957951312, | |
| "grad_norm": 13.669305801391602, | |
| "learning_rate": 4.0246088850631246e-05, | |
| "loss": 1.3088, | |
| "query_regularizer_loss": 0.0402, | |
| "sparse_loss": 1.2285, | |
| "step": 2180 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0398, | |
| "epoch": 0.6955422067657288, | |
| "grad_norm": 28.209056854248047, | |
| "learning_rate": 4.003244846700437e-05, | |
| "loss": 1.1989, | |
| "query_regularizer_loss": 0.0398, | |
| "sparse_loss": 1.1193, | |
| "step": 2200 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0396, | |
| "epoch": 0.7018653177363263, | |
| "grad_norm": 7.1696319580078125, | |
| "learning_rate": 3.981707457840927e-05, | |
| "loss": 1.2486, | |
| "query_regularizer_loss": 0.0396, | |
| "sparse_loss": 1.1695, | |
| "step": 2220 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0394, | |
| "epoch": 0.7081884287069238, | |
| "grad_norm": 8.591996192932129, | |
| "learning_rate": 3.9599992021232865e-05, | |
| "loss": 1.1296, | |
| "query_regularizer_loss": 0.0394, | |
| "sparse_loss": 1.0508, | |
| "step": 2240 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0393, | |
| "epoch": 0.7145115396775213, | |
| "grad_norm": 9.680275917053223, | |
| "learning_rate": 3.938122582890147e-05, | |
| "loss": 1.1456, | |
| "query_regularizer_loss": 0.0393, | |
| "sparse_loss": 1.0669, | |
| "step": 2260 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0384, | |
| "epoch": 0.7208346506481189, | |
| "grad_norm": 6.841869354248047, | |
| "learning_rate": 3.916080122899408e-05, | |
| "loss": 1.2594, | |
| "query_regularizer_loss": 0.0384, | |
| "sparse_loss": 1.1827, | |
| "step": 2280 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0381, | |
| "epoch": 0.7271577616187164, | |
| "grad_norm": 7.08558988571167, | |
| "learning_rate": 3.893874364033319e-05, | |
| "loss": 1.1598, | |
| "query_regularizer_loss": 0.0381, | |
| "sparse_loss": 1.0835, | |
| "step": 2300 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0373, | |
| "epoch": 0.7334808725893139, | |
| "grad_norm": 4.713133811950684, | |
| "learning_rate": 3.871507867005353e-05, | |
| "loss": 1.1291, | |
| "query_regularizer_loss": 0.0373, | |
| "sparse_loss": 1.0544, | |
| "step": 2320 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0372, | |
| "epoch": 0.7398039835599115, | |
| "grad_norm": 6.019435405731201, | |
| "learning_rate": 3.8489832110649106e-05, | |
| "loss": 1.1203, | |
| "query_regularizer_loss": 0.0372, | |
| "sparse_loss": 1.0459, | |
| "step": 2340 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.037, | |
| "epoch": 0.746127094530509, | |
| "grad_norm": 5.9214887619018555, | |
| "learning_rate": 3.8263029936998914e-05, | |
| "loss": 1.1708, | |
| "query_regularizer_loss": 0.037, | |
| "sparse_loss": 1.0969, | |
| "step": 2360 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0365, | |
| "epoch": 0.7524502055011065, | |
| "grad_norm": 13.986381530761719, | |
| "learning_rate": 3.803469830337154e-05, | |
| "loss": 1.175, | |
| "query_regularizer_loss": 0.0365, | |
| "sparse_loss": 1.102, | |
| "step": 2380 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0363, | |
| "epoch": 0.7587733164717041, | |
| "grad_norm": 5.55244255065918, | |
| "learning_rate": 3.7804863540409155e-05, | |
| "loss": 1.2057, | |
| "query_regularizer_loss": 0.0363, | |
| "sparse_loss": 1.1331, | |
| "step": 2400 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0359, | |
| "epoch": 0.7650964274423017, | |
| "grad_norm": 4.60949182510376, | |
| "learning_rate": 3.7573552152091065e-05, | |
| "loss": 1.2125, | |
| "query_regularizer_loss": 0.0359, | |
| "sparse_loss": 1.1407, | |
| "step": 2420 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0353, | |
| "epoch": 0.7714195384128991, | |
| "grad_norm": 6.324008941650391, | |
| "learning_rate": 3.7340790812677426e-05, | |
| "loss": 1.2678, | |
| "query_regularizer_loss": 0.0353, | |
| "sparse_loss": 1.1972, | |
| "step": 2440 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0349, | |
| "epoch": 0.7777426493834967, | |
| "grad_norm": 6.179075241088867, | |
| "learning_rate": 3.710660636363315e-05, | |
| "loss": 1.1447, | |
| "query_regularizer_loss": 0.0349, | |
| "sparse_loss": 1.0749, | |
| "step": 2460 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0346, | |
| "epoch": 0.7840657603540943, | |
| "grad_norm": 5.324189186096191, | |
| "learning_rate": 3.687102581053267e-05, | |
| "loss": 1.2268, | |
| "query_regularizer_loss": 0.0346, | |
| "sparse_loss": 1.1575, | |
| "step": 2480 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0342, | |
| "epoch": 0.7903888713246917, | |
| "grad_norm": 6.188036918640137, | |
| "learning_rate": 3.6634076319945706e-05, | |
| "loss": 1.1557, | |
| "query_regularizer_loss": 0.0342, | |
| "sparse_loss": 1.0872, | |
| "step": 2500 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0335, | |
| "epoch": 0.7967119822952893, | |
| "grad_norm": 5.936458587646484, | |
| "learning_rate": 3.639578521630445e-05, | |
| "loss": 1.1321, | |
| "query_regularizer_loss": 0.0335, | |
| "sparse_loss": 1.0651, | |
| "step": 2520 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0331, | |
| "epoch": 0.8030350932658868, | |
| "grad_norm": 5.506819248199463, | |
| "learning_rate": 3.615617997875265e-05, | |
| "loss": 1.1172, | |
| "query_regularizer_loss": 0.0331, | |
| "sparse_loss": 1.051, | |
| "step": 2540 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0332, | |
| "epoch": 0.8093582042364843, | |
| "grad_norm": 4.83391809463501, | |
| "learning_rate": 3.591528823797672e-05, | |
| "loss": 1.1761, | |
| "query_regularizer_loss": 0.0332, | |
| "sparse_loss": 1.1097, | |
| "step": 2560 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0327, | |
| "epoch": 0.8156813152070819, | |
| "grad_norm": 5.821810722351074, | |
| "learning_rate": 3.567313777301946e-05, | |
| "loss": 1.1746, | |
| "query_regularizer_loss": 0.0327, | |
| "sparse_loss": 1.1091, | |
| "step": 2580 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0323, | |
| "epoch": 0.8220044261776794, | |
| "grad_norm": 5.580266952514648, | |
| "learning_rate": 3.5429756508076664e-05, | |
| "loss": 1.1864, | |
| "query_regularizer_loss": 0.0323, | |
| "sparse_loss": 1.1218, | |
| "step": 2600 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.032, | |
| "epoch": 0.828327537148277, | |
| "grad_norm": 6.912046909332275, | |
| "learning_rate": 3.5185172509276926e-05, | |
| "loss": 1.096, | |
| "query_regularizer_loss": 0.032, | |
| "sparse_loss": 1.0319, | |
| "step": 2620 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0316, | |
| "epoch": 0.8346506481188745, | |
| "grad_norm": 4.675662517547607, | |
| "learning_rate": 3.4939413981445165e-05, | |
| "loss": 1.0784, | |
| "query_regularizer_loss": 0.0315, | |
| "sparse_loss": 1.0153, | |
| "step": 2640 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0313, | |
| "epoch": 0.840973759089472, | |
| "grad_norm": 6.675909042358398, | |
| "learning_rate": 3.46925092648501e-05, | |
| "loss": 1.1665, | |
| "query_regularizer_loss": 0.0313, | |
| "sparse_loss": 1.104, | |
| "step": 2660 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0309, | |
| "epoch": 0.8472968700600696, | |
| "grad_norm": 6.421684741973877, | |
| "learning_rate": 3.444448683193611e-05, | |
| "loss": 1.0553, | |
| "query_regularizer_loss": 0.0309, | |
| "sparse_loss": 0.9936, | |
| "step": 2680 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0307, | |
| "epoch": 0.853619981030667, | |
| "grad_norm": 8.149559020996094, | |
| "learning_rate": 3.419537528403986e-05, | |
| "loss": 1.0657, | |
| "query_regularizer_loss": 0.0307, | |
| "sparse_loss": 1.0042, | |
| "step": 2700 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0305, | |
| "epoch": 0.8599430920012646, | |
| "grad_norm": 7.086170196533203, | |
| "learning_rate": 3.39452033480921e-05, | |
| "loss": 1.0973, | |
| "query_regularizer_loss": 0.0305, | |
| "sparse_loss": 1.0362, | |
| "step": 2720 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0302, | |
| "epoch": 0.8662662029718622, | |
| "grad_norm": 6.212243556976318, | |
| "learning_rate": 3.3693999873304904e-05, | |
| "loss": 1.0824, | |
| "query_regularizer_loss": 0.0302, | |
| "sparse_loss": 1.0219, | |
| "step": 2740 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0296, | |
| "epoch": 0.8725893139424596, | |
| "grad_norm": 6.539682865142822, | |
| "learning_rate": 3.344179382784488e-05, | |
| "loss": 1.0886, | |
| "query_regularizer_loss": 0.0296, | |
| "sparse_loss": 1.0295, | |
| "step": 2760 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.029, | |
| "epoch": 0.8789124249130572, | |
| "grad_norm": 5.484647274017334, | |
| "learning_rate": 3.3188614295492595e-05, | |
| "loss": 1.1338, | |
| "query_regularizer_loss": 0.029, | |
| "sparse_loss": 1.0757, | |
| "step": 2780 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0292, | |
| "epoch": 0.8852355358836548, | |
| "grad_norm": 6.082838535308838, | |
| "learning_rate": 3.293449047228874e-05, | |
| "loss": 1.1033, | |
| "query_regularizer_loss": 0.0292, | |
| "sparse_loss": 1.0449, | |
| "step": 2800 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0288, | |
| "epoch": 0.8915586468542523, | |
| "grad_norm": 7.450719356536865, | |
| "learning_rate": 3.2679451663167326e-05, | |
| "loss": 1.0429, | |
| "query_regularizer_loss": 0.0288, | |
| "sparse_loss": 0.9852, | |
| "step": 2820 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0285, | |
| "epoch": 0.8978817578248498, | |
| "grad_norm": 6.752073287963867, | |
| "learning_rate": 3.242352727857625e-05, | |
| "loss": 1.0102, | |
| "query_regularizer_loss": 0.0285, | |
| "sparse_loss": 0.9532, | |
| "step": 2840 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0281, | |
| "epoch": 0.9042048687954474, | |
| "grad_norm": 5.853407859802246, | |
| "learning_rate": 3.216674683108583e-05, | |
| "loss": 1.1599, | |
| "query_regularizer_loss": 0.0281, | |
| "sparse_loss": 1.1036, | |
| "step": 2860 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0281, | |
| "epoch": 0.9105279797660449, | |
| "grad_norm": 7.191678524017334, | |
| "learning_rate": 3.1909139931985415e-05, | |
| "loss": 1.0423, | |
| "query_regularizer_loss": 0.0281, | |
| "sparse_loss": 0.9862, | |
| "step": 2880 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.028, | |
| "epoch": 0.9168510907366424, | |
| "grad_norm": 10.293112754821777, | |
| "learning_rate": 3.165073628786876e-05, | |
| "loss": 1.0815, | |
| "query_regularizer_loss": 0.028, | |
| "sparse_loss": 1.0256, | |
| "step": 2900 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0278, | |
| "epoch": 0.92317420170724, | |
| "grad_norm": 5.882568836212158, | |
| "learning_rate": 3.139156569720826e-05, | |
| "loss": 1.0804, | |
| "query_regularizer_loss": 0.0278, | |
| "sparse_loss": 1.0248, | |
| "step": 2920 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0276, | |
| "epoch": 0.9294973126778375, | |
| "grad_norm": 5.085528373718262, | |
| "learning_rate": 3.113165804691871e-05, | |
| "loss": 1.1668, | |
| "query_regularizer_loss": 0.0276, | |
| "sparse_loss": 1.1115, | |
| "step": 2940 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0273, | |
| "epoch": 0.935820423648435, | |
| "grad_norm": 5.272675037384033, | |
| "learning_rate": 3.0871043308910816e-05, | |
| "loss": 1.0606, | |
| "query_regularizer_loss": 0.0273, | |
| "sparse_loss": 1.006, | |
| "step": 2960 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0267, | |
| "epoch": 0.9421435346190326, | |
| "grad_norm": 5.916753768920898, | |
| "learning_rate": 3.06097515366349e-05, | |
| "loss": 1.0705, | |
| "query_regularizer_loss": 0.0267, | |
| "sparse_loss": 1.0172, | |
| "step": 2980 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0265, | |
| "epoch": 0.9484666455896301, | |
| "grad_norm": 6.121260166168213, | |
| "learning_rate": 3.034781286161519e-05, | |
| "loss": 1.072, | |
| "query_regularizer_loss": 0.0265, | |
| "sparse_loss": 1.0189, | |
| "step": 3000 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0264, | |
| "epoch": 0.9547897565602277, | |
| "grad_norm": 5.811629295349121, | |
| "learning_rate": 3.0085257489975167e-05, | |
| "loss": 1.1239, | |
| "query_regularizer_loss": 0.0264, | |
| "sparse_loss": 1.0711, | |
| "step": 3020 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0263, | |
| "epoch": 0.9611128675308251, | |
| "grad_norm": 14.449254989624023, | |
| "learning_rate": 2.982211569895424e-05, | |
| "loss": 1.112, | |
| "query_regularizer_loss": 0.0263, | |
| "sparse_loss": 1.0594, | |
| "step": 3040 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0264, | |
| "epoch": 0.9674359785014227, | |
| "grad_norm": 7.664610862731934, | |
| "learning_rate": 2.9558417833416264e-05, | |
| "loss": 1.0759, | |
| "query_regularizer_loss": 0.0264, | |
| "sparse_loss": 1.0231, | |
| "step": 3060 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0257, | |
| "epoch": 0.9737590894720203, | |
| "grad_norm": 6.444000720977783, | |
| "learning_rate": 2.9294194302350225e-05, | |
| "loss": 0.956, | |
| "query_regularizer_loss": 0.0257, | |
| "sparse_loss": 0.9047, | |
| "step": 3080 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0255, | |
| "epoch": 0.9800822004426177, | |
| "grad_norm": 5.407084941864014, | |
| "learning_rate": 2.902947557536359e-05, | |
| "loss": 0.9945, | |
| "query_regularizer_loss": 0.0255, | |
| "sparse_loss": 0.9435, | |
| "step": 3100 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0253, | |
| "epoch": 0.9864053114132153, | |
| "grad_norm": 7.782375335693359, | |
| "learning_rate": 2.8764292179168566e-05, | |
| "loss": 1.0119, | |
| "query_regularizer_loss": 0.0253, | |
| "sparse_loss": 0.9613, | |
| "step": 3120 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.025, | |
| "epoch": 0.9927284223838129, | |
| "grad_norm": 5.379085540771484, | |
| "learning_rate": 2.849867469406191e-05, | |
| "loss": 0.9965, | |
| "query_regularizer_loss": 0.025, | |
| "sparse_loss": 0.9465, | |
| "step": 3140 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0247, | |
| "epoch": 0.9990515333544103, | |
| "grad_norm": 13.918062210083008, | |
| "learning_rate": 2.8232653750398404e-05, | |
| "loss": 1.1177, | |
| "query_regularizer_loss": 0.0247, | |
| "sparse_loss": 1.0683, | |
| "step": 3160 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0243, | |
| "epoch": 1.005374644325008, | |
| "grad_norm": 5.923994541168213, | |
| "learning_rate": 2.796626002505871e-05, | |
| "loss": 0.8884, | |
| "query_regularizer_loss": 0.0243, | |
| "sparse_loss": 0.8398, | |
| "step": 3180 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0243, | |
| "epoch": 1.0116977552956055, | |
| "grad_norm": 5.905787467956543, | |
| "learning_rate": 2.7699524237911735e-05, | |
| "loss": 0.9041, | |
| "query_regularizer_loss": 0.0243, | |
| "sparse_loss": 0.8555, | |
| "step": 3200 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0242, | |
| "epoch": 1.018020866266203, | |
| "grad_norm": 7.144820213317871, | |
| "learning_rate": 2.7432477148272124e-05, | |
| "loss": 0.9367, | |
| "query_regularizer_loss": 0.0242, | |
| "sparse_loss": 0.8882, | |
| "step": 3220 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.024, | |
| "epoch": 1.0243439772368006, | |
| "grad_norm": 5.734910011291504, | |
| "learning_rate": 2.7165149551353152e-05, | |
| "loss": 0.8253, | |
| "query_regularizer_loss": 0.024, | |
| "sparse_loss": 0.7774, | |
| "step": 3240 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0237, | |
| "epoch": 1.030667088207398, | |
| "grad_norm": 4.406752586364746, | |
| "learning_rate": 2.689757227471551e-05, | |
| "loss": 0.8637, | |
| "query_regularizer_loss": 0.0237, | |
| "sparse_loss": 0.8163, | |
| "step": 3260 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0235, | |
| "epoch": 1.0369901991779955, | |
| "grad_norm": 18.512943267822266, | |
| "learning_rate": 2.662977617471234e-05, | |
| "loss": 0.8665, | |
| "query_regularizer_loss": 0.0235, | |
| "sparse_loss": 0.8195, | |
| "step": 3280 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0236, | |
| "epoch": 1.0433133101485932, | |
| "grad_norm": 4.601492404937744, | |
| "learning_rate": 2.636179213293094e-05, | |
| "loss": 0.8306, | |
| "query_regularizer_loss": 0.0236, | |
| "sparse_loss": 0.7835, | |
| "step": 3300 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0234, | |
| "epoch": 1.0496364211191906, | |
| "grad_norm": 6.115499973297119, | |
| "learning_rate": 2.609365105263162e-05, | |
| "loss": 0.8374, | |
| "query_regularizer_loss": 0.0234, | |
| "sparse_loss": 0.7906, | |
| "step": 3320 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0235, | |
| "epoch": 1.055959532089788, | |
| "grad_norm": 16.041154861450195, | |
| "learning_rate": 2.5825383855183954e-05, | |
| "loss": 0.9326, | |
| "query_regularizer_loss": 0.0235, | |
| "sparse_loss": 0.8855, | |
| "step": 3340 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0233, | |
| "epoch": 1.0622826430603858, | |
| "grad_norm": 6.99527645111084, | |
| "learning_rate": 2.5557021476501058e-05, | |
| "loss": 0.8675, | |
| "query_regularizer_loss": 0.0233, | |
| "sparse_loss": 0.8209, | |
| "step": 3360 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0232, | |
| "epoch": 1.0686057540309832, | |
| "grad_norm": 9.15439224243164, | |
| "learning_rate": 2.528859486347211e-05, | |
| "loss": 0.8846, | |
| "query_regularizer_loss": 0.0232, | |
| "sparse_loss": 0.8383, | |
| "step": 3380 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0232, | |
| "epoch": 1.0749288650015807, | |
| "grad_norm": 6.056853771209717, | |
| "learning_rate": 2.502013497039362e-05, | |
| "loss": 0.8782, | |
| "query_regularizer_loss": 0.0232, | |
| "sparse_loss": 0.8318, | |
| "step": 3400 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0228, | |
| "epoch": 1.0812519759721784, | |
| "grad_norm": 11.15111255645752, | |
| "learning_rate": 2.4751672755399892e-05, | |
| "loss": 0.9058, | |
| "query_regularizer_loss": 0.0228, | |
| "sparse_loss": 0.8602, | |
| "step": 3420 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0226, | |
| "epoch": 1.0875750869427758, | |
| "grad_norm": 5.096249103546143, | |
| "learning_rate": 2.4483239176892978e-05, | |
| "loss": 0.8242, | |
| "query_regularizer_loss": 0.0226, | |
| "sparse_loss": 0.7789, | |
| "step": 3440 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0224, | |
| "epoch": 1.0938981979133733, | |
| "grad_norm": 4.854412078857422, | |
| "learning_rate": 2.4214865189972626e-05, | |
| "loss": 0.8406, | |
| "query_regularizer_loss": 0.0224, | |
| "sparse_loss": 0.7958, | |
| "step": 3460 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0222, | |
| "epoch": 1.100221308883971, | |
| "grad_norm": 5.57534122467041, | |
| "learning_rate": 2.3946581742866662e-05, | |
| "loss": 0.8854, | |
| "query_regularizer_loss": 0.0222, | |
| "sparse_loss": 0.841, | |
| "step": 3480 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.022, | |
| "epoch": 1.1065444198545684, | |
| "grad_norm": 35.59614562988281, | |
| "learning_rate": 2.367841977336206e-05, | |
| "loss": 0.9114, | |
| "query_regularizer_loss": 0.022, | |
| "sparse_loss": 0.8674, | |
| "step": 3500 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0218, | |
| "epoch": 1.112867530825166, | |
| "grad_norm": 5.315453052520752, | |
| "learning_rate": 2.3410410205237292e-05, | |
| "loss": 0.7916, | |
| "query_regularizer_loss": 0.0218, | |
| "sparse_loss": 0.7481, | |
| "step": 3520 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0217, | |
| "epoch": 1.1191906417957636, | |
| "grad_norm": 6.13749361038208, | |
| "learning_rate": 2.31425839446963e-05, | |
| "loss": 0.8902, | |
| "query_regularizer_loss": 0.0217, | |
| "sparse_loss": 0.8469, | |
| "step": 3540 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0212, | |
| "epoch": 1.125513752766361, | |
| "grad_norm": 6.6804962158203125, | |
| "learning_rate": 2.2874971876804425e-05, | |
| "loss": 0.8235, | |
| "query_regularizer_loss": 0.0212, | |
| "sparse_loss": 0.7812, | |
| "step": 3560 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.021, | |
| "epoch": 1.1318368637369587, | |
| "grad_norm": 7.544855117797852, | |
| "learning_rate": 2.2607604861926847e-05, | |
| "loss": 0.8662, | |
| "query_regularizer_loss": 0.021, | |
| "sparse_loss": 0.8241, | |
| "step": 3580 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0209, | |
| "epoch": 1.1381599747075561, | |
| "grad_norm": 5.754782199859619, | |
| "learning_rate": 2.2340513732169845e-05, | |
| "loss": 0.8252, | |
| "query_regularizer_loss": 0.0209, | |
| "sparse_loss": 0.7835, | |
| "step": 3600 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0208, | |
| "epoch": 1.1444830856781536, | |
| "grad_norm": 4.892688751220703, | |
| "learning_rate": 2.2073729287825283e-05, | |
| "loss": 0.8636, | |
| "query_regularizer_loss": 0.0208, | |
| "sparse_loss": 0.822, | |
| "step": 3620 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0207, | |
| "epoch": 1.1508061966487513, | |
| "grad_norm": 7.930765628814697, | |
| "learning_rate": 2.1807282293818827e-05, | |
| "loss": 0.8013, | |
| "query_regularizer_loss": 0.0207, | |
| "sparse_loss": 0.7599, | |
| "step": 3640 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0209, | |
| "epoch": 1.1571293076193487, | |
| "grad_norm": 5.00435733795166, | |
| "learning_rate": 2.1541203476162222e-05, | |
| "loss": 0.8126, | |
| "query_regularizer_loss": 0.0209, | |
| "sparse_loss": 0.7708, | |
| "step": 3660 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0208, | |
| "epoch": 1.1634524185899462, | |
| "grad_norm": 5.13680362701416, | |
| "learning_rate": 2.1275523518409994e-05, | |
| "loss": 0.8361, | |
| "query_regularizer_loss": 0.0208, | |
| "sparse_loss": 0.7945, | |
| "step": 3680 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0205, | |
| "epoch": 1.1697755295605439, | |
| "grad_norm": 11.393424034118652, | |
| "learning_rate": 2.101027305812113e-05, | |
| "loss": 0.8975, | |
| "query_regularizer_loss": 0.0205, | |
| "sparse_loss": 0.8566, | |
| "step": 3700 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0202, | |
| "epoch": 1.1760986405311413, | |
| "grad_norm": 5.252847671508789, | |
| "learning_rate": 2.0745482683326047e-05, | |
| "loss": 0.8723, | |
| "query_regularizer_loss": 0.0202, | |
| "sparse_loss": 0.832, | |
| "step": 3720 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0202, | |
| "epoch": 1.1824217515017388, | |
| "grad_norm": 6.195733547210693, | |
| "learning_rate": 2.0481182928999194e-05, | |
| "loss": 0.7598, | |
| "query_regularizer_loss": 0.0202, | |
| "sparse_loss": 0.7195, | |
| "step": 3740 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0202, | |
| "epoch": 1.1887448624723365, | |
| "grad_norm": 6.0683135986328125, | |
| "learning_rate": 2.0217404273537928e-05, | |
| "loss": 0.8172, | |
| "query_regularizer_loss": 0.0202, | |
| "sparse_loss": 0.7768, | |
| "step": 3760 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0201, | |
| "epoch": 1.195067973442934, | |
| "grad_norm": 6.833969593048096, | |
| "learning_rate": 1.9954177135247733e-05, | |
| "loss": 0.7955, | |
| "query_regularizer_loss": 0.0201, | |
| "sparse_loss": 0.7554, | |
| "step": 3780 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0198, | |
| "epoch": 1.2013910844135314, | |
| "grad_norm": 6.259845733642578, | |
| "learning_rate": 1.969153186883449e-05, | |
| "loss": 0.8491, | |
| "query_regularizer_loss": 0.0198, | |
| "sparse_loss": 0.8096, | |
| "step": 3800 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0196, | |
| "epoch": 1.207714195384129, | |
| "grad_norm": 6.139260768890381, | |
| "learning_rate": 1.942949876190405e-05, | |
| "loss": 0.8096, | |
| "query_regularizer_loss": 0.0196, | |
| "sparse_loss": 0.7705, | |
| "step": 3820 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0193, | |
| "epoch": 1.2140373063547265, | |
| "grad_norm": 7.0147175788879395, | |
| "learning_rate": 1.9168108031469556e-05, | |
| "loss": 0.8215, | |
| "query_regularizer_loss": 0.0193, | |
| "sparse_loss": 0.7829, | |
| "step": 3840 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0193, | |
| "epoch": 1.220360417325324, | |
| "grad_norm": 4.83867883682251, | |
| "learning_rate": 1.8907389820466858e-05, | |
| "loss": 0.8388, | |
| "query_regularizer_loss": 0.0193, | |
| "sparse_loss": 0.8003, | |
| "step": 3860 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0193, | |
| "epoch": 1.2266835282959216, | |
| "grad_norm": 5.086630344390869, | |
| "learning_rate": 1.8647374194278515e-05, | |
| "loss": 0.8766, | |
| "query_regularizer_loss": 0.0193, | |
| "sparse_loss": 0.8381, | |
| "step": 3880 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0193, | |
| "epoch": 1.233006639266519, | |
| "grad_norm": 7.497378826141357, | |
| "learning_rate": 1.8388091137266754e-05, | |
| "loss": 0.8822, | |
| "query_regularizer_loss": 0.0193, | |
| "sparse_loss": 0.8436, | |
| "step": 3900 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0191, | |
| "epoch": 1.2393297502371166, | |
| "grad_norm": 6.353434085845947, | |
| "learning_rate": 1.8129570549315694e-05, | |
| "loss": 0.7843, | |
| "query_regularizer_loss": 0.0191, | |
| "sparse_loss": 0.746, | |
| "step": 3920 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0189, | |
| "epoch": 1.2456528612077142, | |
| "grad_norm": 6.209091663360596, | |
| "learning_rate": 1.7871842242383447e-05, | |
| "loss": 0.7955, | |
| "query_regularizer_loss": 0.0189, | |
| "sparse_loss": 0.7578, | |
| "step": 3940 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0186, | |
| "epoch": 1.2519759721783117, | |
| "grad_norm": 5.476807594299316, | |
| "learning_rate": 1.761493593706418e-05, | |
| "loss": 0.7593, | |
| "query_regularizer_loss": 0.0186, | |
| "sparse_loss": 0.7221, | |
| "step": 3960 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0189, | |
| "epoch": 1.2582990831489091, | |
| "grad_norm": 5.646886825561523, | |
| "learning_rate": 1.7358881259160883e-05, | |
| "loss": 0.8728, | |
| "query_regularizer_loss": 0.0189, | |
| "sparse_loss": 0.8351, | |
| "step": 3980 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0187, | |
| "epoch": 1.2646221941195068, | |
| "grad_norm": 5.88949728012085, | |
| "learning_rate": 1.710370773626896e-05, | |
| "loss": 0.7812, | |
| "query_regularizer_loss": 0.0187, | |
| "sparse_loss": 0.7438, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.2646221941195068, | |
| "eval_runtime": 144.6586, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sparse-ir-eval_avg_flops": 853.5161743164062, | |
| "eval_sparse-ir-eval_corpus_active_dims": 1024.0, | |
| "eval_sparse-ir-eval_corpus_sparsity_ratio": 0.9796696315120712, | |
| "eval_sparse-ir-eval_dot_accuracy@1": 0.04979004199160168, | |
| "eval_sparse-ir-eval_dot_accuracy@100": 0.3879224155168966, | |
| "eval_sparse-ir-eval_dot_accuracy@50": 0.29734053189362125, | |
| "eval_sparse-ir-eval_dot_accuracy@8": 0.14277144571085784, | |
| "eval_sparse-ir-eval_dot_map@100": 0.08555792971172127, | |
| "eval_sparse-ir-eval_dot_mrr@10": 0.07802764843856622, | |
| "eval_sparse-ir-eval_dot_ndcg@10": 0.09659653047217633, | |
| "eval_sparse-ir-eval_dot_precision@1": 0.04979004199160168, | |
| "eval_sparse-ir-eval_dot_precision@100": 0.0038792241551689668, | |
| "eval_sparse-ir-eval_dot_precision@50": 0.005946810637872426, | |
| "eval_sparse-ir-eval_dot_precision@8": 0.01784643071385723, | |
| "eval_sparse-ir-eval_dot_recall@1": 0.04979004199160168, | |
| "eval_sparse-ir-eval_dot_recall@100": 0.3879224155168966, | |
| "eval_sparse-ir-eval_dot_recall@50": 0.29734053189362125, | |
| "eval_sparse-ir-eval_dot_recall@8": 0.14277144571085784, | |
| "eval_sparse-ir-eval_query_active_dims": 1024.0, | |
| "eval_sparse-ir-eval_query_sparsity_ratio": 0.9796696315120712, | |
| "eval_steps_per_second": 0.0, | |
| "step": 4000 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0185, | |
| "epoch": 1.2709453050901043, | |
| "grad_norm": 6.946842193603516, | |
| "learning_rate": 1.6849444794371173e-05, | |
| "loss": 0.7947, | |
| "query_regularizer_loss": 0.0185, | |
| "sparse_loss": 0.7577, | |
| "step": 4020 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0187, | |
| "epoch": 1.2772684160607017, | |
| "grad_norm": 5.774596214294434, | |
| "learning_rate": 1.6596121754444365e-05, | |
| "loss": 0.861, | |
| "query_regularizer_loss": 0.0187, | |
| "sparse_loss": 0.8236, | |
| "step": 4040 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0185, | |
| "epoch": 1.2835915270312994, | |
| "grad_norm": 5.410093307495117, | |
| "learning_rate": 1.6343767829078157e-05, | |
| "loss": 0.7238, | |
| "query_regularizer_loss": 0.0185, | |
| "sparse_loss": 0.6868, | |
| "step": 4060 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0183, | |
| "epoch": 1.2899146380018969, | |
| "grad_norm": 6.174851894378662, | |
| "learning_rate": 1.609241211910628e-05, | |
| "loss": 0.8105, | |
| "query_regularizer_loss": 0.0183, | |
| "sparse_loss": 0.7738, | |
| "step": 4080 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0181, | |
| "epoch": 1.2962377489724943, | |
| "grad_norm": 6.427083969116211, | |
| "learning_rate": 1.5842083610250713e-05, | |
| "loss": 0.804, | |
| "query_regularizer_loss": 0.0182, | |
| "sparse_loss": 0.7677, | |
| "step": 4100 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0181, | |
| "epoch": 1.302560859943092, | |
| "grad_norm": 5.061923503875732, | |
| "learning_rate": 1.5592811169779146e-05, | |
| "loss": 0.8112, | |
| "query_regularizer_loss": 0.0181, | |
| "sparse_loss": 0.775, | |
| "step": 4120 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0181, | |
| "epoch": 1.3088839709136895, | |
| "grad_norm": 9.3052339553833, | |
| "learning_rate": 1.5344623543176047e-05, | |
| "loss": 0.8061, | |
| "query_regularizer_loss": 0.0181, | |
| "sparse_loss": 0.7699, | |
| "step": 4140 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0181, | |
| "epoch": 1.3152070818842871, | |
| "grad_norm": 6.645140171051025, | |
| "learning_rate": 1.5097549350827823e-05, | |
| "loss": 0.8149, | |
| "query_regularizer_loss": 0.0181, | |
| "sparse_loss": 0.7786, | |
| "step": 4160 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0179, | |
| "epoch": 1.3215301928548846, | |
| "grad_norm": 9.232198715209961, | |
| "learning_rate": 1.4851617084722384e-05, | |
| "loss": 0.7243, | |
| "query_regularizer_loss": 0.0179, | |
| "sparse_loss": 0.6885, | |
| "step": 4180 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0179, | |
| "epoch": 1.327853303825482, | |
| "grad_norm": 6.141971588134766, | |
| "learning_rate": 1.4606855105163509e-05, | |
| "loss": 0.7487, | |
| "query_regularizer_loss": 0.0179, | |
| "sparse_loss": 0.713, | |
| "step": 4200 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0178, | |
| "epoch": 1.3341764147960797, | |
| "grad_norm": 7.69699239730835, | |
| "learning_rate": 1.436329163750042e-05, | |
| "loss": 0.789, | |
| "query_regularizer_loss": 0.0178, | |
| "sparse_loss": 0.7533, | |
| "step": 4220 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0179, | |
| "epoch": 1.3404995257666772, | |
| "grad_norm": 6.825509071350098, | |
| "learning_rate": 1.412095476887289e-05, | |
| "loss": 0.7696, | |
| "query_regularizer_loss": 0.0179, | |
| "sparse_loss": 0.7339, | |
| "step": 4240 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0177, | |
| "epoch": 1.3468226367372749, | |
| "grad_norm": 6.219385623931885, | |
| "learning_rate": 1.3879872444972326e-05, | |
| "loss": 0.7236, | |
| "query_regularizer_loss": 0.0177, | |
| "sparse_loss": 0.6883, | |
| "step": 4260 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0176, | |
| "epoch": 1.3531457477078723, | |
| "grad_norm": 9.367851257324219, | |
| "learning_rate": 1.3640072466819087e-05, | |
| "loss": 0.7761, | |
| "query_regularizer_loss": 0.0176, | |
| "sparse_loss": 0.7409, | |
| "step": 4280 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0174, | |
| "epoch": 1.3594688586784698, | |
| "grad_norm": 6.194346904754639, | |
| "learning_rate": 1.3401582487556613e-05, | |
| "loss": 0.7864, | |
| "query_regularizer_loss": 0.0174, | |
| "sparse_loss": 0.7516, | |
| "step": 4300 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0175, | |
| "epoch": 1.3657919696490675, | |
| "grad_norm": 6.247494697570801, | |
| "learning_rate": 1.3164430009262479e-05, | |
| "loss": 0.8002, | |
| "query_regularizer_loss": 0.0175, | |
| "sparse_loss": 0.7652, | |
| "step": 4320 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0175, | |
| "epoch": 1.372115080619665, | |
| "grad_norm": 5.456516265869141, | |
| "learning_rate": 1.2928642379776946e-05, | |
| "loss": 0.7939, | |
| "query_regularizer_loss": 0.0175, | |
| "sparse_loss": 0.759, | |
| "step": 4340 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0174, | |
| "epoch": 1.3784381915902624, | |
| "grad_norm": 6.565524578094482, | |
| "learning_rate": 1.2694246789549268e-05, | |
| "loss": 0.7647, | |
| "query_regularizer_loss": 0.0174, | |
| "sparse_loss": 0.73, | |
| "step": 4360 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.017, | |
| "epoch": 1.38476130256086, | |
| "grad_norm": 5.862294673919678, | |
| "learning_rate": 1.2461270268502138e-05, | |
| "loss": 0.7741, | |
| "query_regularizer_loss": 0.017, | |
| "sparse_loss": 0.74, | |
| "step": 4380 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0168, | |
| "epoch": 1.3910844135314575, | |
| "grad_norm": 4.879271030426025, | |
| "learning_rate": 1.2229739682914707e-05, | |
| "loss": 0.7361, | |
| "query_regularizer_loss": 0.0167, | |
| "sparse_loss": 0.7026, | |
| "step": 4400 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0169, | |
| "epoch": 1.397407524502055, | |
| "grad_norm": 6.44930362701416, | |
| "learning_rate": 1.1999681732324397e-05, | |
| "loss": 0.7732, | |
| "query_regularizer_loss": 0.0169, | |
| "sparse_loss": 0.7395, | |
| "step": 4420 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0168, | |
| "epoch": 1.4037306354726526, | |
| "grad_norm": 5.632932662963867, | |
| "learning_rate": 1.1771122946448002e-05, | |
| "loss": 0.79, | |
| "query_regularizer_loss": 0.0168, | |
| "sparse_loss": 0.7563, | |
| "step": 4440 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0166, | |
| "epoch": 1.41005374644325, | |
| "grad_norm": 11.790848731994629, | |
| "learning_rate": 1.1544089682122288e-05, | |
| "loss": 0.7661, | |
| "query_regularizer_loss": 0.0166, | |
| "sparse_loss": 0.7329, | |
| "step": 4460 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0168, | |
| "epoch": 1.4163768574138476, | |
| "grad_norm": 4.643637657165527, | |
| "learning_rate": 1.1318608120264676e-05, | |
| "loss": 0.7779, | |
| "query_regularizer_loss": 0.0168, | |
| "sparse_loss": 0.7442, | |
| "step": 4480 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0168, | |
| "epoch": 1.4226999683844452, | |
| "grad_norm": 6.348916530609131, | |
| "learning_rate": 1.1094704262854047e-05, | |
| "loss": 0.7711, | |
| "query_regularizer_loss": 0.0168, | |
| "sparse_loss": 0.7375, | |
| "step": 4500 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0166, | |
| "epoch": 1.4290230793550427, | |
| "grad_norm": 5.77072811126709, | |
| "learning_rate": 1.0872403929932312e-05, | |
| "loss": 0.7952, | |
| "query_regularizer_loss": 0.0166, | |
| "sparse_loss": 0.7619, | |
| "step": 4520 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0166, | |
| "epoch": 1.4353461903256401, | |
| "grad_norm": 6.92957067489624, | |
| "learning_rate": 1.0651732756626848e-05, | |
| "loss": 0.7743, | |
| "query_regularizer_loss": 0.0166, | |
| "sparse_loss": 0.741, | |
| "step": 4540 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0165, | |
| "epoch": 1.4416693012962378, | |
| "grad_norm": 5.602739334106445, | |
| "learning_rate": 1.0432716190194397e-05, | |
| "loss": 0.72, | |
| "query_regularizer_loss": 0.0165, | |
| "sparse_loss": 0.6869, | |
| "step": 4560 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0165, | |
| "epoch": 1.4479924122668353, | |
| "grad_norm": 5.612238883972168, | |
| "learning_rate": 1.0215379487086452e-05, | |
| "loss": 0.7801, | |
| "query_regularizer_loss": 0.0165, | |
| "sparse_loss": 0.7471, | |
| "step": 4580 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0163, | |
| "epoch": 1.4543155232374327, | |
| "grad_norm": 5.197407245635986, | |
| "learning_rate": 9.999747710036875e-06, | |
| "loss": 0.7453, | |
| "query_regularizer_loss": 0.0163, | |
| "sparse_loss": 0.7127, | |
| "step": 4600 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0163, | |
| "epoch": 1.4606386342080304, | |
| "grad_norm": 4.72848653793335, | |
| "learning_rate": 9.785845725171583e-06, | |
| "loss": 0.7509, | |
| "query_regularizer_loss": 0.0163, | |
| "sparse_loss": 0.7184, | |
| "step": 4620 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0163, | |
| "epoch": 1.4669617451786279, | |
| "grad_norm": 5.626763820648193, | |
| "learning_rate": 9.573698199141146e-06, | |
| "loss": 0.7558, | |
| "query_regularizer_loss": 0.0163, | |
| "sparse_loss": 0.7232, | |
| "step": 4640 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0162, | |
| "epoch": 1.4732848561492253, | |
| "grad_norm": 6.345512390136719, | |
| "learning_rate": 9.363329596276258e-06, | |
| "loss": 0.7718, | |
| "query_regularizer_loss": 0.0162, | |
| "sparse_loss": 0.7394, | |
| "step": 4660 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0161, | |
| "epoch": 1.479607967119823, | |
| "grad_norm": 20.017658233642578, | |
| "learning_rate": 9.15476417576656e-06, | |
| "loss": 0.6954, | |
| "query_regularizer_loss": 0.0161, | |
| "sparse_loss": 0.6632, | |
| "step": 4680 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0159, | |
| "epoch": 1.4859310780904205, | |
| "grad_norm": 5.100090026855469, | |
| "learning_rate": 8.948025988863163e-06, | |
| "loss": 0.705, | |
| "query_regularizer_loss": 0.0159, | |
| "sparse_loss": 0.6732, | |
| "step": 4700 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0159, | |
| "epoch": 1.492254189061018, | |
| "grad_norm": 7.6336750984191895, | |
| "learning_rate": 8.743138876105056e-06, | |
| "loss": 0.751, | |
| "query_regularizer_loss": 0.0159, | |
| "sparse_loss": 0.7192, | |
| "step": 4720 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0159, | |
| "epoch": 1.4985773000316156, | |
| "grad_norm": 5.1957783699035645, | |
| "learning_rate": 8.54012646456995e-06, | |
| "loss": 0.765, | |
| "query_regularizer_loss": 0.0159, | |
| "sparse_loss": 0.7333, | |
| "step": 4740 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0158, | |
| "epoch": 1.504900411002213, | |
| "grad_norm": 6.796875953674316, | |
| "learning_rate": 8.33901216514959e-06, | |
| "loss": 0.7983, | |
| "query_regularizer_loss": 0.0158, | |
| "sparse_loss": 0.7667, | |
| "step": 4760 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0158, | |
| "epoch": 1.5112235219728105, | |
| "grad_norm": 6.2827959060668945, | |
| "learning_rate": 8.139819169850152e-06, | |
| "loss": 0.7716, | |
| "query_regularizer_loss": 0.0158, | |
| "sparse_loss": 0.74, | |
| "step": 4780 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0157, | |
| "epoch": 1.5175466329434082, | |
| "grad_norm": 23.407991409301758, | |
| "learning_rate": 7.942570449117689e-06, | |
| "loss": 0.7747, | |
| "query_regularizer_loss": 0.0157, | |
| "sparse_loss": 0.7434, | |
| "step": 4800 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0155, | |
| "epoch": 1.5238697439140056, | |
| "grad_norm": 5.709648132324219, | |
| "learning_rate": 7.747288749189344e-06, | |
| "loss": 0.7613, | |
| "query_regularizer_loss": 0.0155, | |
| "sparse_loss": 0.7302, | |
| "step": 4820 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0156, | |
| "epoch": 1.530192854884603, | |
| "grad_norm": 7.462285041809082, | |
| "learning_rate": 7.553996589470214e-06, | |
| "loss": 0.7962, | |
| "query_regularizer_loss": 0.0156, | |
| "sparse_loss": 0.7651, | |
| "step": 4840 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0155, | |
| "epoch": 1.5365159658552008, | |
| "grad_norm": 6.551488399505615, | |
| "learning_rate": 7.362716259936572e-06, | |
| "loss": 0.7893, | |
| "query_regularizer_loss": 0.0155, | |
| "sparse_loss": 0.7583, | |
| "step": 4860 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0156, | |
| "epoch": 1.5428390768257982, | |
| "grad_norm": 4.285749912261963, | |
| "learning_rate": 7.173469818565334e-06, | |
| "loss": 0.7291, | |
| "query_regularizer_loss": 0.0156, | |
| "sparse_loss": 0.6979, | |
| "step": 4880 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0156, | |
| "epoch": 1.5491621877963957, | |
| "grad_norm": 5.514683246612549, | |
| "learning_rate": 6.986279088790468e-06, | |
| "loss": 0.6982, | |
| "query_regularizer_loss": 0.0155, | |
| "sparse_loss": 0.6671, | |
| "step": 4900 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0154, | |
| "epoch": 1.5554852987669934, | |
| "grad_norm": 4.710526466369629, | |
| "learning_rate": 6.801165656986317e-06, | |
| "loss": 0.7057, | |
| "query_regularizer_loss": 0.0154, | |
| "sparse_loss": 0.6748, | |
| "step": 4920 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0155, | |
| "epoch": 1.561808409737591, | |
| "grad_norm": 6.305178642272949, | |
| "learning_rate": 6.618150869978346e-06, | |
| "loss": 0.7883, | |
| "query_regularizer_loss": 0.0155, | |
| "sparse_loss": 0.7574, | |
| "step": 4940 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0155, | |
| "epoch": 1.5681315207081883, | |
| "grad_norm": 7.8116044998168945, | |
| "learning_rate": 6.43725583258147e-06, | |
| "loss": 0.782, | |
| "query_regularizer_loss": 0.0155, | |
| "sparse_loss": 0.751, | |
| "step": 4960 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0153, | |
| "epoch": 1.574454631678786, | |
| "grad_norm": 6.840033531188965, | |
| "learning_rate": 6.25850140516629e-06, | |
| "loss": 0.7625, | |
| "query_regularizer_loss": 0.0153, | |
| "sparse_loss": 0.7318, | |
| "step": 4980 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0153, | |
| "epoch": 1.5807777426493836, | |
| "grad_norm": 4.293910980224609, | |
| "learning_rate": 6.08190820125353e-06, | |
| "loss": 0.7101, | |
| "query_regularizer_loss": 0.0153, | |
| "sparse_loss": 0.6795, | |
| "step": 5000 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0152, | |
| "epoch": 1.5871008536199809, | |
| "grad_norm": 4.924117088317871, | |
| "learning_rate": 5.907496585136932e-06, | |
| "loss": 0.7394, | |
| "query_regularizer_loss": 0.0152, | |
| "sparse_loss": 0.709, | |
| "step": 5020 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0152, | |
| "epoch": 1.5934239645905786, | |
| "grad_norm": 5.151610851287842, | |
| "learning_rate": 5.735286669534912e-06, | |
| "loss": 0.6894, | |
| "query_regularizer_loss": 0.0152, | |
| "sparse_loss": 0.659, | |
| "step": 5040 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0152, | |
| "epoch": 1.5997470755611762, | |
| "grad_norm": 6.865243434906006, | |
| "learning_rate": 5.5652983132711946e-06, | |
| "loss": 0.6992, | |
| "query_regularizer_loss": 0.0152, | |
| "sparse_loss": 0.6689, | |
| "step": 5060 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0152, | |
| "epoch": 1.6060701865317735, | |
| "grad_norm": 5.148654460906982, | |
| "learning_rate": 5.397551118984756e-06, | |
| "loss": 0.7032, | |
| "query_regularizer_loss": 0.0152, | |
| "sparse_loss": 0.6729, | |
| "step": 5080 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0151, | |
| "epoch": 1.6123932975023711, | |
| "grad_norm": 7.1046929359436035, | |
| "learning_rate": 5.232064430869266e-06, | |
| "loss": 0.7659, | |
| "query_regularizer_loss": 0.0151, | |
| "sparse_loss": 0.7356, | |
| "step": 5100 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0152, | |
| "epoch": 1.6187164084729688, | |
| "grad_norm": 6.476480484008789, | |
| "learning_rate": 5.068857332442408e-06, | |
| "loss": 0.7268, | |
| "query_regularizer_loss": 0.0152, | |
| "sparse_loss": 0.6965, | |
| "step": 5120 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0151, | |
| "epoch": 1.6250395194435663, | |
| "grad_norm": 5.051494598388672, | |
| "learning_rate": 4.907948644345184e-06, | |
| "loss": 0.6928, | |
| "query_regularizer_loss": 0.0151, | |
| "sparse_loss": 0.6627, | |
| "step": 5140 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.015, | |
| "epoch": 1.6313626304141637, | |
| "grad_norm": 9.967106819152832, | |
| "learning_rate": 4.7493569221715776e-06, | |
| "loss": 0.7134, | |
| "query_regularizer_loss": 0.015, | |
| "sparse_loss": 0.6833, | |
| "step": 5160 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0149, | |
| "epoch": 1.6376857413847614, | |
| "grad_norm": 7.947037220001221, | |
| "learning_rate": 4.593100454328744e-06, | |
| "loss": 0.8233, | |
| "query_regularizer_loss": 0.015, | |
| "sparse_loss": 0.7934, | |
| "step": 5180 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0148, | |
| "epoch": 1.6440088523553589, | |
| "grad_norm": 5.4551825523376465, | |
| "learning_rate": 4.439197259928082e-06, | |
| "loss": 0.7258, | |
| "query_regularizer_loss": 0.0148, | |
| "sparse_loss": 0.6962, | |
| "step": 5200 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0148, | |
| "epoch": 1.6503319633259563, | |
| "grad_norm": 5.254171848297119, | |
| "learning_rate": 4.2876650867072516e-06, | |
| "loss": 0.653, | |
| "query_regularizer_loss": 0.0148, | |
| "sparse_loss": 0.6234, | |
| "step": 5220 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0148, | |
| "epoch": 1.656655074296554, | |
| "grad_norm": 59.719482421875, | |
| "learning_rate": 4.1385214089836365e-06, | |
| "loss": 0.764, | |
| "query_regularizer_loss": 0.0148, | |
| "sparse_loss": 0.7344, | |
| "step": 5240 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0148, | |
| "epoch": 1.6629781852671515, | |
| "grad_norm": 6.208061218261719, | |
| "learning_rate": 3.991783425639148e-06, | |
| "loss": 0.8153, | |
| "query_regularizer_loss": 0.0148, | |
| "sparse_loss": 0.7858, | |
| "step": 5260 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0148, | |
| "epoch": 1.669301296237749, | |
| "grad_norm": 5.488613128662109, | |
| "learning_rate": 3.8474680581369635e-06, | |
| "loss": 0.6717, | |
| "query_regularizer_loss": 0.0148, | |
| "sparse_loss": 0.6422, | |
| "step": 5280 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0147, | |
| "epoch": 1.6756244072083466, | |
| "grad_norm": 13.9483642578125, | |
| "learning_rate": 3.7055919485701613e-06, | |
| "loss": 0.7592, | |
| "query_regularizer_loss": 0.0147, | |
| "sparse_loss": 0.7298, | |
| "step": 5300 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0147, | |
| "epoch": 1.681947518178944, | |
| "grad_norm": 7.517942905426025, | |
| "learning_rate": 3.5661714577425954e-06, | |
| "loss": 0.7114, | |
| "query_regularizer_loss": 0.0147, | |
| "sparse_loss": 0.6821, | |
| "step": 5320 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0147, | |
| "epoch": 1.6882706291495415, | |
| "grad_norm": 4.885865211486816, | |
| "learning_rate": 3.429222663282211e-06, | |
| "loss": 0.7035, | |
| "query_regularizer_loss": 0.0147, | |
| "sparse_loss": 0.6741, | |
| "step": 5340 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0146, | |
| "epoch": 1.6945937401201392, | |
| "grad_norm": 6.0214715003967285, | |
| "learning_rate": 3.2947613577870017e-06, | |
| "loss": 0.702, | |
| "query_regularizer_loss": 0.0146, | |
| "sparse_loss": 0.6728, | |
| "step": 5360 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0147, | |
| "epoch": 1.7009168510907366, | |
| "grad_norm": 6.653244495391846, | |
| "learning_rate": 3.162803047003865e-06, | |
| "loss": 0.735, | |
| "query_regularizer_loss": 0.0147, | |
| "sparse_loss": 0.7057, | |
| "step": 5380 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0147, | |
| "epoch": 1.707239962061334, | |
| "grad_norm": 7.448154449462891, | |
| "learning_rate": 3.0333629480404915e-06, | |
| "loss": 0.7298, | |
| "query_regularizer_loss": 0.0147, | |
| "sparse_loss": 0.7005, | |
| "step": 5400 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0146, | |
| "epoch": 1.7135630730319318, | |
| "grad_norm": 5.637879848480225, | |
| "learning_rate": 2.9064559876106097e-06, | |
| "loss": 0.7082, | |
| "query_regularizer_loss": 0.0145, | |
| "sparse_loss": 0.6791, | |
| "step": 5420 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0145, | |
| "epoch": 1.7198861840025292, | |
| "grad_norm": 6.466550827026367, | |
| "learning_rate": 2.7820968003126143e-06, | |
| "loss": 0.693, | |
| "query_regularizer_loss": 0.0146, | |
| "sparse_loss": 0.6639, | |
| "step": 5440 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0146, | |
| "epoch": 1.7262092949731267, | |
| "grad_norm": 6.480766773223877, | |
| "learning_rate": 2.660299726941995e-06, | |
| "loss": 0.7466, | |
| "query_regularizer_loss": 0.0146, | |
| "sparse_loss": 0.7175, | |
| "step": 5460 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0145, | |
| "epoch": 1.7325324059437244, | |
| "grad_norm": 5.0478315353393555, | |
| "learning_rate": 2.541078812837544e-06, | |
| "loss": 0.691, | |
| "query_regularizer_loss": 0.0145, | |
| "sparse_loss": 0.6619, | |
| "step": 5480 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0145, | |
| "epoch": 1.7388555169143218, | |
| "grad_norm": 18.3403263092041, | |
| "learning_rate": 2.4244478062617285e-06, | |
| "loss": 0.8491, | |
| "query_regularizer_loss": 0.0145, | |
| "sparse_loss": 0.8201, | |
| "step": 5500 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0145, | |
| "epoch": 1.7451786278849193, | |
| "grad_norm": 4.109490871429443, | |
| "learning_rate": 2.3104201568152406e-06, | |
| "loss": 0.7267, | |
| "query_regularizer_loss": 0.0145, | |
| "sparse_loss": 0.6977, | |
| "step": 5520 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0145, | |
| "epoch": 1.751501738855517, | |
| "grad_norm": 5.1971540451049805, | |
| "learning_rate": 2.1990090138860443e-06, | |
| "loss": 0.6938, | |
| "query_regularizer_loss": 0.0145, | |
| "sparse_loss": 0.6649, | |
| "step": 5540 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0144, | |
| "epoch": 1.7578248498261144, | |
| "grad_norm": 5.344772815704346, | |
| "learning_rate": 2.090227225132993e-06, | |
| "loss": 0.7251, | |
| "query_regularizer_loss": 0.0144, | |
| "sparse_loss": 0.6962, | |
| "step": 5560 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0144, | |
| "epoch": 1.7641479607967119, | |
| "grad_norm": 6.859626770019531, | |
| "learning_rate": 1.9840873350042975e-06, | |
| "loss": 0.6835, | |
| "query_regularizer_loss": 0.0144, | |
| "sparse_loss": 0.6546, | |
| "step": 5580 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0144, | |
| "epoch": 1.7704710717673096, | |
| "grad_norm": 7.862534046173096, | |
| "learning_rate": 1.8806015832909223e-06, | |
| "loss": 0.7431, | |
| "query_regularizer_loss": 0.0144, | |
| "sparse_loss": 0.7143, | |
| "step": 5600 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0144, | |
| "epoch": 1.776794182737907, | |
| "grad_norm": 7.577550411224365, | |
| "learning_rate": 1.7797819037151137e-06, | |
| "loss": 0.7031, | |
| "query_regularizer_loss": 0.0144, | |
| "sparse_loss": 0.6744, | |
| "step": 5620 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0143, | |
| "epoch": 1.7831172937085045, | |
| "grad_norm": 5.902777194976807, | |
| "learning_rate": 1.6816399225542512e-06, | |
| "loss": 0.6999, | |
| "query_regularizer_loss": 0.0143, | |
| "sparse_loss": 0.6713, | |
| "step": 5640 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0143, | |
| "epoch": 1.7894404046791021, | |
| "grad_norm": 7.284171104431152, | |
| "learning_rate": 1.5861869573000982e-06, | |
| "loss": 0.7097, | |
| "query_regularizer_loss": 0.0143, | |
| "sparse_loss": 0.6811, | |
| "step": 5660 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0143, | |
| "epoch": 1.7957635156496996, | |
| "grad_norm": 5.959704875946045, | |
| "learning_rate": 1.4934340153537424e-06, | |
| "loss": 0.7125, | |
| "query_regularizer_loss": 0.0143, | |
| "sparse_loss": 0.6839, | |
| "step": 5680 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0143, | |
| "epoch": 1.802086626620297, | |
| "grad_norm": 6.5475568771362305, | |
| "learning_rate": 1.4033917927562228e-06, | |
| "loss": 0.7399, | |
| "query_regularizer_loss": 0.0143, | |
| "sparse_loss": 0.7113, | |
| "step": 5700 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0143, | |
| "epoch": 1.8084097375908947, | |
| "grad_norm": 4.185171604156494, | |
| "learning_rate": 1.3160706729550886e-06, | |
| "loss": 0.677, | |
| "query_regularizer_loss": 0.0143, | |
| "sparse_loss": 0.6484, | |
| "step": 5720 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.8147328485614924, | |
| "grad_norm": 25.05087661743164, | |
| "learning_rate": 1.2314807256070093e-06, | |
| "loss": 0.7428, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.7143, | |
| "step": 5740 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.8210559595320897, | |
| "grad_norm": 6.057918548583984, | |
| "learning_rate": 1.1496317054165734e-06, | |
| "loss": 0.7495, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.7211, | |
| "step": 5760 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.8273790705026873, | |
| "grad_norm": 5.152764797210693, | |
| "learning_rate": 1.070533051011388e-06, | |
| "loss": 0.7266, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.6981, | |
| "step": 5780 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.833702181473285, | |
| "grad_norm": 6.547135829925537, | |
| "learning_rate": 9.94193883853653e-07, | |
| "loss": 0.6984, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.67, | |
| "step": 5800 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.8400252924438822, | |
| "grad_norm": 4.288388729095459, | |
| "learning_rate": 9.20623007188276e-07, | |
| "loss": 0.7527, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.7243, | |
| "step": 5820 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.84634840341448, | |
| "grad_norm": 10.613944053649902, | |
| "learning_rate": 8.498289050277331e-07, | |
| "loss": 0.6564, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.6281, | |
| "step": 5840 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.8526715143850776, | |
| "grad_norm": 5.5082902908325195, | |
| "learning_rate": 7.81819741173681e-07, | |
| "loss": 0.7028, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.6744, | |
| "step": 5860 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.8589946253556748, | |
| "grad_norm": 10.326964378356934, | |
| "learning_rate": 7.166033582755583e-07, | |
| "loss": 0.7015, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.6732, | |
| "step": 5880 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.8653177363262725, | |
| "grad_norm": 4.219756603240967, | |
| "learning_rate": 6.541872769261631e-07, | |
| "loss": 0.7219, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.6936, | |
| "step": 5900 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.8716408472968702, | |
| "grad_norm": 6.046718597412109, | |
| "learning_rate": 5.945786947944176e-07, | |
| "loss": 0.7569, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.7285, | |
| "step": 5920 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.8779639582674676, | |
| "grad_norm": 6.10089635848999, | |
| "learning_rate": 5.377844857953423e-07, | |
| "loss": 0.6832, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.6548, | |
| "step": 5940 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.884287069238065, | |
| "grad_norm": 9.817451477050781, | |
| "learning_rate": 4.838111992973627e-07, | |
| "loss": 0.72, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.6916, | |
| "step": 5960 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.8906101802086628, | |
| "grad_norm": 6.2373433113098145, | |
| "learning_rate": 4.3266505936708226e-07, | |
| "loss": 0.6878, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.6595, | |
| "step": 5980 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.8969332911792602, | |
| "grad_norm": 5.504240036010742, | |
| "learning_rate": 3.843519640514937e-07, | |
| "loss": 0.6468, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.6185, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.8969332911792602, | |
| "eval_runtime": 119.8241, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sparse-ir-eval_avg_flops": 828.1011962890625, | |
| "eval_sparse-ir-eval_corpus_active_dims": 1024.0, | |
| "eval_sparse-ir-eval_corpus_sparsity_ratio": 0.9796696315120712, | |
| "eval_sparse-ir-eval_dot_accuracy@1": 0.05598880223955209, | |
| "eval_sparse-ir-eval_dot_accuracy@100": 0.41011797640471903, | |
| "eval_sparse-ir-eval_dot_accuracy@50": 0.32113577284543093, | |
| "eval_sparse-ir-eval_dot_accuracy@8": 0.15736852629474105, | |
| "eval_sparse-ir-eval_dot_map@100": 0.09527335248492443, | |
| "eval_sparse-ir-eval_dot_mrr@10": 0.08729730244427296, | |
| "eval_sparse-ir-eval_dot_ndcg@10": 0.10700833927390947, | |
| "eval_sparse-ir-eval_dot_precision@1": 0.05598880223955209, | |
| "eval_sparse-ir-eval_dot_precision@100": 0.004101179764047191, | |
| "eval_sparse-ir-eval_dot_precision@50": 0.006422715456908619, | |
| "eval_sparse-ir-eval_dot_precision@8": 0.01967106578684263, | |
| "eval_sparse-ir-eval_dot_recall@1": 0.05598880223955209, | |
| "eval_sparse-ir-eval_dot_recall@100": 0.41011797640471903, | |
| "eval_sparse-ir-eval_dot_recall@50": 0.32113577284543093, | |
| "eval_sparse-ir-eval_dot_recall@8": 0.15736852629474105, | |
| "eval_sparse-ir-eval_query_active_dims": 1024.0, | |
| "eval_sparse-ir-eval_query_sparsity_ratio": 0.9796696315120712, | |
| "eval_steps_per_second": 0.0, | |
| "step": 6000 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.9032564021498577, | |
| "grad_norm": 24.536941528320312, | |
| "learning_rate": 3.388774846978804e-07, | |
| "loss": 0.6901, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.6618, | |
| "step": 6020 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.9095795131204554, | |
| "grad_norm": 4.5274786949157715, | |
| "learning_rate": 2.9624686531129766e-07, | |
| "loss": 0.7066, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.6782, | |
| "step": 6040 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.9159026240910528, | |
| "grad_norm": 5.511288642883301, | |
| "learning_rate": 2.5646502194988097e-07, | |
| "loss": 0.6818, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.6535, | |
| "step": 6060 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.9222257350616503, | |
| "grad_norm": 5.5782904624938965, | |
| "learning_rate": 2.1953654215791653e-07, | |
| "loss": 0.735, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.7067, | |
| "step": 6080 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.928548846032248, | |
| "grad_norm": 8.528367042541504, | |
| "learning_rate": 1.8546568443683077e-07, | |
| "loss": 0.7364, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.708, | |
| "step": 6100 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.9348719570028454, | |
| "grad_norm": 6.339615821838379, | |
| "learning_rate": 1.5425637775409728e-07, | |
| "loss": 0.7485, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.7202, | |
| "step": 6120 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.9411950679734429, | |
| "grad_norm": 5.221982955932617, | |
| "learning_rate": 1.2591222109017143e-07, | |
| "loss": 0.7123, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.684, | |
| "step": 6140 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.9475181789440406, | |
| "grad_norm": 6.821455478668213, | |
| "learning_rate": 1.0043648302345276e-07, | |
| "loss": 0.7488, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.7205, | |
| "step": 6160 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.953841289914638, | |
| "grad_norm": 6.010687351226807, | |
| "learning_rate": 7.783210135337282e-08, | |
| "loss": 0.7161, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.6878, | |
| "step": 6180 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.9601644008852355, | |
| "grad_norm": 5.301513195037842, | |
| "learning_rate": 5.810168276160211e-08, | |
| "loss": 0.6795, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.6512, | |
| "step": 6200 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0141, | |
| "epoch": 1.9664875118558331, | |
| "grad_norm": 5.3588480949401855, | |
| "learning_rate": 4.1247502511465585e-08, | |
| "loss": 0.6925, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.6642, | |
| "step": 6220 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.9728106228264306, | |
| "grad_norm": 5.97401237487793, | |
| "learning_rate": 2.7271504185558126e-08, | |
| "loss": 0.8108, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.7825, | |
| "step": 6240 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.979133733797028, | |
| "grad_norm": 6.50218391418457, | |
| "learning_rate": 1.6175299461615447e-08, | |
| "loss": 0.7295, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.7012, | |
| "step": 6260 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.9854568447676257, | |
| "grad_norm": 6.084305286407471, | |
| "learning_rate": 7.96016792666554e-09, | |
| "loss": 0.7232, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.6949, | |
| "step": 6280 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.9917799557382232, | |
| "grad_norm": 5.208037376403809, | |
| "learning_rate": 2.627056929460636e-09, | |
| "loss": 0.7575, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.7292, | |
| "step": 6300 | |
| }, | |
| { | |
| "document_regularizer_loss": 0.0142, | |
| "epoch": 1.9981030667088207, | |
| "grad_norm": 5.697127819061279, | |
| "learning_rate": 1.7658147123955637e-10, | |
| "loss": 0.7006, | |
| "query_regularizer_loss": 0.0142, | |
| "sparse_loss": 0.6723, | |
| "step": 6320 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 6326, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |