splade-large-mean / trainer_state.json
charsiu's picture
Upload checkpoints from checkpoint-6326
42151a9 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 2000,
"global_step": 6326,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"document_regularizer_loss": 0.7471,
"epoch": 0.006323110970597534,
"grad_norm": 10069.7373046875,
"learning_rate": 2.0000000000000003e-06,
"loss": 787.2433,
"query_regularizer_loss": 0.746,
"sparse_loss": 785.7502,
"step": 20
},
{
"document_regularizer_loss": 0.7124,
"epoch": 0.012646221941195067,
"grad_norm": 13459.8525390625,
"learning_rate": 4.105263157894737e-06,
"loss": 440.0618,
"query_regularizer_loss": 0.712,
"sparse_loss": 438.6375,
"step": 40
},
{
"document_regularizer_loss": 0.6455,
"epoch": 0.0189693329117926,
"grad_norm": 3622.762451171875,
"learning_rate": 6.2105263157894745e-06,
"loss": 271.8732,
"query_regularizer_loss": 0.6464,
"sparse_loss": 270.5813,
"step": 60
},
{
"document_regularizer_loss": 0.5798,
"epoch": 0.025292443882390134,
"grad_norm": 21794.759765625,
"learning_rate": 8.315789473684212e-06,
"loss": 159.8073,
"query_regularizer_loss": 0.5803,
"sparse_loss": 158.6473,
"step": 80
},
{
"document_regularizer_loss": 0.5264,
"epoch": 0.03161555485298767,
"grad_norm": 3427.578369140625,
"learning_rate": 1.0421052631578948e-05,
"loss": 115.2051,
"query_regularizer_loss": 0.526,
"sparse_loss": 114.1528,
"step": 100
},
{
"document_regularizer_loss": 0.4789,
"epoch": 0.0379386658235852,
"grad_norm": 1716.6964111328125,
"learning_rate": 1.2526315789473686e-05,
"loss": 68.2212,
"query_regularizer_loss": 0.4789,
"sparse_loss": 67.2635,
"step": 120
},
{
"document_regularizer_loss": 0.4385,
"epoch": 0.044261776794182736,
"grad_norm": 745.2850341796875,
"learning_rate": 1.4631578947368422e-05,
"loss": 37.2703,
"query_regularizer_loss": 0.4386,
"sparse_loss": 36.3932,
"step": 140
},
{
"document_regularizer_loss": 0.405,
"epoch": 0.05058488776478027,
"grad_norm": 1856.085693359375,
"learning_rate": 1.673684210526316e-05,
"loss": 21.9191,
"query_regularizer_loss": 0.405,
"sparse_loss": 21.109,
"step": 160
},
{
"document_regularizer_loss": 0.3851,
"epoch": 0.05690799873537781,
"grad_norm": 309.21807861328125,
"learning_rate": 1.8842105263157894e-05,
"loss": 13.6624,
"query_regularizer_loss": 0.3852,
"sparse_loss": 12.8921,
"step": 180
},
{
"document_regularizer_loss": 0.3724,
"epoch": 0.06323110970597534,
"grad_norm": 76.83500671386719,
"learning_rate": 2.0947368421052632e-05,
"loss": 6.6098,
"query_regularizer_loss": 0.3724,
"sparse_loss": 5.865,
"step": 200
},
{
"document_regularizer_loss": 0.3636,
"epoch": 0.06955422067657287,
"grad_norm": 31.61189842224121,
"learning_rate": 2.305263157894737e-05,
"loss": 4.905,
"query_regularizer_loss": 0.3636,
"sparse_loss": 4.1779,
"step": 220
},
{
"document_regularizer_loss": 0.3525,
"epoch": 0.0758773316471704,
"grad_norm": 54.798858642578125,
"learning_rate": 2.5157894736842108e-05,
"loss": 4.4195,
"query_regularizer_loss": 0.3525,
"sparse_loss": 3.7144,
"step": 240
},
{
"document_regularizer_loss": 0.3441,
"epoch": 0.08220044261776795,
"grad_norm": 35.759185791015625,
"learning_rate": 2.7263157894736846e-05,
"loss": 3.7145,
"query_regularizer_loss": 0.3441,
"sparse_loss": 3.0263,
"step": 260
},
{
"document_regularizer_loss": 0.333,
"epoch": 0.08852355358836547,
"grad_norm": 28.589937210083008,
"learning_rate": 2.9368421052631577e-05,
"loss": 3.3807,
"query_regularizer_loss": 0.333,
"sparse_loss": 2.7146,
"step": 280
},
{
"document_regularizer_loss": 0.3202,
"epoch": 0.09484666455896301,
"grad_norm": 21.61874008178711,
"learning_rate": 3.147368421052632e-05,
"loss": 3.3389,
"query_regularizer_loss": 0.3202,
"sparse_loss": 2.6984,
"step": 300
},
{
"document_regularizer_loss": 0.3065,
"epoch": 0.10116977552956054,
"grad_norm": 23.4547176361084,
"learning_rate": 3.357894736842105e-05,
"loss": 3.1854,
"query_regularizer_loss": 0.3065,
"sparse_loss": 2.5724,
"step": 320
},
{
"document_regularizer_loss": 0.2938,
"epoch": 0.10749288650015808,
"grad_norm": 30.065744400024414,
"learning_rate": 3.5684210526315794e-05,
"loss": 3.0942,
"query_regularizer_loss": 0.2938,
"sparse_loss": 2.5066,
"step": 340
},
{
"document_regularizer_loss": 0.2794,
"epoch": 0.11381599747075562,
"grad_norm": 24.388168334960938,
"learning_rate": 3.778947368421053e-05,
"loss": 2.8802,
"query_regularizer_loss": 0.2794,
"sparse_loss": 2.3214,
"step": 360
},
{
"document_regularizer_loss": 0.2675,
"epoch": 0.12013910844135314,
"grad_norm": 30.463876724243164,
"learning_rate": 3.989473684210526e-05,
"loss": 2.8744,
"query_regularizer_loss": 0.2676,
"sparse_loss": 2.3393,
"step": 380
},
{
"document_regularizer_loss": 0.2574,
"epoch": 0.12646221941195068,
"grad_norm": 8.956077575683594,
"learning_rate": 4.2e-05,
"loss": 2.9323,
"query_regularizer_loss": 0.2574,
"sparse_loss": 2.4174,
"step": 400
},
{
"document_regularizer_loss": 0.2454,
"epoch": 0.13278533038254822,
"grad_norm": 17.979278564453125,
"learning_rate": 4.410526315789474e-05,
"loss": 2.805,
"query_regularizer_loss": 0.2454,
"sparse_loss": 2.3142,
"step": 420
},
{
"document_regularizer_loss": 0.235,
"epoch": 0.13910844135314573,
"grad_norm": 16.181251525878906,
"learning_rate": 4.6210526315789473e-05,
"loss": 2.7811,
"query_regularizer_loss": 0.235,
"sparse_loss": 2.3111,
"step": 440
},
{
"document_regularizer_loss": 0.2264,
"epoch": 0.14543155232374327,
"grad_norm": 17.261810302734375,
"learning_rate": 4.8315789473684215e-05,
"loss": 2.7447,
"query_regularizer_loss": 0.2264,
"sparse_loss": 2.2919,
"step": 460
},
{
"document_regularizer_loss": 0.2184,
"epoch": 0.1517546632943408,
"grad_norm": 16.512298583984375,
"learning_rate": 4.999994234069837e-05,
"loss": 2.7353,
"query_regularizer_loss": 0.2184,
"sparse_loss": 2.2986,
"step": 480
},
{
"document_regularizer_loss": 0.21,
"epoch": 0.15807777426493835,
"grad_norm": 36.3756217956543,
"learning_rate": 4.9997924293067854e-05,
"loss": 2.6297,
"query_regularizer_loss": 0.21,
"sparse_loss": 2.2096,
"step": 500
},
{
"document_regularizer_loss": 0.202,
"epoch": 0.1644008852355359,
"grad_norm": 54.841121673583984,
"learning_rate": 4.9993023546318026e-05,
"loss": 2.6286,
"query_regularizer_loss": 0.2021,
"sparse_loss": 2.2245,
"step": 520
},
{
"document_regularizer_loss": 0.1967,
"epoch": 0.1707239962061334,
"grad_norm": 104.60382080078125,
"learning_rate": 4.998524066559095e-05,
"loss": 2.6424,
"query_regularizer_loss": 0.1966,
"sparse_loss": 2.2491,
"step": 540
},
{
"document_regularizer_loss": 0.193,
"epoch": 0.17704710717673094,
"grad_norm": 61.492584228515625,
"learning_rate": 4.997457654838927e-05,
"loss": 2.5846,
"query_regularizer_loss": 0.193,
"sparse_loss": 2.1985,
"step": 560
},
{
"document_regularizer_loss": 0.1896,
"epoch": 0.18337021814732848,
"grad_norm": 53.26578140258789,
"learning_rate": 4.9961032424472766e-05,
"loss": 2.6457,
"query_regularizer_loss": 0.1896,
"sparse_loss": 2.2664,
"step": 580
},
{
"document_regularizer_loss": 0.1867,
"epoch": 0.18969332911792602,
"grad_norm": 13.084442138671875,
"learning_rate": 4.9944609855716445e-05,
"loss": 2.4484,
"query_regularizer_loss": 0.1867,
"sparse_loss": 2.075,
"step": 600
},
{
"document_regularizer_loss": 0.1808,
"epoch": 0.19601644008852356,
"grad_norm": 15.175243377685547,
"learning_rate": 4.992531073593055e-05,
"loss": 2.5683,
"query_regularizer_loss": 0.1808,
"sparse_loss": 2.2068,
"step": 620
},
{
"document_regularizer_loss": 0.1747,
"epoch": 0.20233955105912108,
"grad_norm": 30.665386199951172,
"learning_rate": 4.990313729064209e-05,
"loss": 2.5078,
"query_regularizer_loss": 0.1747,
"sparse_loss": 2.1584,
"step": 640
},
{
"document_regularizer_loss": 0.1695,
"epoch": 0.20866266202971862,
"grad_norm": 18.14386749267578,
"learning_rate": 4.98780920768382e-05,
"loss": 2.4674,
"query_regularizer_loss": 0.1695,
"sparse_loss": 2.1284,
"step": 660
},
{
"document_regularizer_loss": 0.165,
"epoch": 0.21498577300031615,
"grad_norm": 25.72368812561035,
"learning_rate": 4.985017798267132e-05,
"loss": 2.4734,
"query_regularizer_loss": 0.165,
"sparse_loss": 2.1434,
"step": 680
},
{
"document_regularizer_loss": 0.1575,
"epoch": 0.2213088839709137,
"grad_norm": 13.785391807556152,
"learning_rate": 4.981939822712609e-05,
"loss": 2.3857,
"query_regularizer_loss": 0.1575,
"sparse_loss": 2.0707,
"step": 700
},
{
"document_regularizer_loss": 0.151,
"epoch": 0.22763199494151123,
"grad_norm": 59.33202362060547,
"learning_rate": 4.9785756359648204e-05,
"loss": 2.3447,
"query_regularizer_loss": 0.151,
"sparse_loss": 2.0427,
"step": 720
},
{
"document_regularizer_loss": 0.1457,
"epoch": 0.23395510591210875,
"grad_norm": 11.074970245361328,
"learning_rate": 4.9749256259735024e-05,
"loss": 2.3187,
"query_regularizer_loss": 0.1457,
"sparse_loss": 2.0272,
"step": 740
},
{
"document_regularizer_loss": 0.1402,
"epoch": 0.24027821688270629,
"grad_norm": 14.643407821655273,
"learning_rate": 4.9709902136488276e-05,
"loss": 2.4015,
"query_regularizer_loss": 0.1402,
"sparse_loss": 2.121,
"step": 760
},
{
"document_regularizer_loss": 0.1392,
"epoch": 0.24660132785330383,
"grad_norm": 27.94011688232422,
"learning_rate": 4.9667698528128593e-05,
"loss": 2.3671,
"query_regularizer_loss": 0.1392,
"sparse_loss": 2.0887,
"step": 780
},
{
"document_regularizer_loss": 0.1338,
"epoch": 0.25292443882390137,
"grad_norm": 17.952852249145508,
"learning_rate": 4.9622650301472265e-05,
"loss": 2.2929,
"query_regularizer_loss": 0.1338,
"sparse_loss": 2.0252,
"step": 800
},
{
"document_regularizer_loss": 0.128,
"epoch": 0.2592475497944989,
"grad_norm": 12.585227012634277,
"learning_rate": 4.957476265136993e-05,
"loss": 2.3264,
"query_regularizer_loss": 0.128,
"sparse_loss": 2.0704,
"step": 820
},
{
"document_regularizer_loss": 0.1269,
"epoch": 0.26557066076509644,
"grad_norm": 16.949398040771484,
"learning_rate": 4.952404110010757e-05,
"loss": 2.7711,
"query_regularizer_loss": 0.1269,
"sparse_loss": 2.5173,
"step": 840
},
{
"document_regularizer_loss": 0.1213,
"epoch": 0.27189377173569396,
"grad_norm": 11.61090087890625,
"learning_rate": 4.947049149676968e-05,
"loss": 2.2928,
"query_regularizer_loss": 0.1213,
"sparse_loss": 2.0502,
"step": 860
},
{
"document_regularizer_loss": 0.1157,
"epoch": 0.27821688270629147,
"grad_norm": 10.377798080444336,
"learning_rate": 4.941412001656474e-05,
"loss": 2.1857,
"query_regularizer_loss": 0.1157,
"sparse_loss": 1.9544,
"step": 880
},
{
"document_regularizer_loss": 0.1112,
"epoch": 0.28453999367688904,
"grad_norm": 12.839447021484375,
"learning_rate": 4.9354933160113135e-05,
"loss": 2.1,
"query_regularizer_loss": 0.1111,
"sparse_loss": 1.8777,
"step": 900
},
{
"document_regularizer_loss": 0.1094,
"epoch": 0.29086310464748655,
"grad_norm": 15.7372465133667,
"learning_rate": 4.929293775269754e-05,
"loss": 2.1598,
"query_regularizer_loss": 0.1094,
"sparse_loss": 1.941,
"step": 920
},
{
"document_regularizer_loss": 0.1069,
"epoch": 0.2971862156180841,
"grad_norm": 85.94547271728516,
"learning_rate": 4.9228140943475766e-05,
"loss": 1.9527,
"query_regularizer_loss": 0.1069,
"sparse_loss": 1.7389,
"step": 940
},
{
"document_regularizer_loss": 0.1045,
"epoch": 0.3035093265886816,
"grad_norm": 16.49176025390625,
"learning_rate": 4.9160550204656416e-05,
"loss": 2.0608,
"query_regularizer_loss": 0.1045,
"sparse_loss": 1.8519,
"step": 960
},
{
"document_regularizer_loss": 0.1019,
"epoch": 0.30983243755927914,
"grad_norm": 11.689447402954102,
"learning_rate": 4.909017333063719e-05,
"loss": 2.0235,
"query_regularizer_loss": 0.1019,
"sparse_loss": 1.8198,
"step": 980
},
{
"document_regularizer_loss": 0.0988,
"epoch": 0.3161555485298767,
"grad_norm": 15.240221977233887,
"learning_rate": 4.901701843710602e-05,
"loss": 1.9305,
"query_regularizer_loss": 0.0988,
"sparse_loss": 1.7328,
"step": 1000
},
{
"document_regularizer_loss": 0.0962,
"epoch": 0.3224786595004742,
"grad_norm": 8.189220428466797,
"learning_rate": 4.894109396010522e-05,
"loss": 1.9598,
"query_regularizer_loss": 0.0962,
"sparse_loss": 1.7673,
"step": 1020
},
{
"document_regularizer_loss": 0.0945,
"epoch": 0.3288017704710718,
"grad_norm": 17.443077087402344,
"learning_rate": 4.886240865505865e-05,
"loss": 1.9558,
"query_regularizer_loss": 0.0945,
"sparse_loss": 1.7667,
"step": 1040
},
{
"document_regularizer_loss": 0.094,
"epoch": 0.3351248814416693,
"grad_norm": 10.96996784210205,
"learning_rate": 4.87809715957621e-05,
"loss": 2.0087,
"query_regularizer_loss": 0.094,
"sparse_loss": 1.8206,
"step": 1060
},
{
"document_regularizer_loss": 0.0934,
"epoch": 0.3414479924122668,
"grad_norm": 9.809027671813965,
"learning_rate": 4.8696792173336845e-05,
"loss": 1.9493,
"query_regularizer_loss": 0.0934,
"sparse_loss": 1.7625,
"step": 1080
},
{
"document_regularizer_loss": 0.0914,
"epoch": 0.3477711033828644,
"grad_norm": 20.714750289916992,
"learning_rate": 4.860988009514675e-05,
"loss": 1.7575,
"query_regularizer_loss": 0.0914,
"sparse_loss": 1.5748,
"step": 1100
},
{
"document_regularizer_loss": 0.0898,
"epoch": 0.3540942143534619,
"grad_norm": 16.20149040222168,
"learning_rate": 4.852024538367882e-05,
"loss": 1.7915,
"query_regularizer_loss": 0.0898,
"sparse_loss": 1.6119,
"step": 1120
},
{
"document_regularizer_loss": 0.0883,
"epoch": 0.36041732532405946,
"grad_norm": 43.65206527709961,
"learning_rate": 4.842789837538741e-05,
"loss": 1.8282,
"query_regularizer_loss": 0.0883,
"sparse_loss": 1.6516,
"step": 1140
},
{
"document_regularizer_loss": 0.0866,
"epoch": 0.36674043629465697,
"grad_norm": 9.418038368225098,
"learning_rate": 4.83328497195023e-05,
"loss": 1.774,
"query_regularizer_loss": 0.0867,
"sparse_loss": 1.6007,
"step": 1160
},
{
"document_regularizer_loss": 0.0845,
"epoch": 0.3730635472652545,
"grad_norm": 13.31879997253418,
"learning_rate": 4.82351103768006e-05,
"loss": 1.7967,
"query_regularizer_loss": 0.0845,
"sparse_loss": 1.6277,
"step": 1180
},
{
"document_regularizer_loss": 0.0831,
"epoch": 0.37938665823585205,
"grad_norm": 14.40135383605957,
"learning_rate": 4.813469161834282e-05,
"loss": 1.7661,
"query_regularizer_loss": 0.0831,
"sparse_loss": 1.5999,
"step": 1200
},
{
"document_regularizer_loss": 0.0817,
"epoch": 0.38570976920644956,
"grad_norm": 9.40664005279541,
"learning_rate": 4.803160502417309e-05,
"loss": 1.7127,
"query_regularizer_loss": 0.0817,
"sparse_loss": 1.5493,
"step": 1220
},
{
"document_regularizer_loss": 0.0799,
"epoch": 0.3920328801770471,
"grad_norm": 7.551331520080566,
"learning_rate": 4.7925862481983794e-05,
"loss": 1.6856,
"query_regularizer_loss": 0.0799,
"sparse_loss": 1.5259,
"step": 1240
},
{
"document_regularizer_loss": 0.0792,
"epoch": 0.39835599114764464,
"grad_norm": 8.512903213500977,
"learning_rate": 4.7817476185744705e-05,
"loss": 1.737,
"query_regularizer_loss": 0.0792,
"sparse_loss": 1.5786,
"step": 1260
},
{
"document_regularizer_loss": 0.0777,
"epoch": 0.40467910211824215,
"grad_norm": 7.908294200897217,
"learning_rate": 4.770645863429681e-05,
"loss": 1.7078,
"query_regularizer_loss": 0.0777,
"sparse_loss": 1.5525,
"step": 1280
},
{
"document_regularizer_loss": 0.076,
"epoch": 0.4110022130888397,
"grad_norm": 7.902525901794434,
"learning_rate": 4.759282262991097e-05,
"loss": 1.7971,
"query_regularizer_loss": 0.076,
"sparse_loss": 1.6451,
"step": 1300
},
{
"document_regularizer_loss": 0.0745,
"epoch": 0.41732532405943723,
"grad_norm": 7.720804214477539,
"learning_rate": 4.7476581276811594e-05,
"loss": 1.6587,
"query_regularizer_loss": 0.0745,
"sparse_loss": 1.5096,
"step": 1320
},
{
"document_regularizer_loss": 0.0728,
"epoch": 0.4236484350300348,
"grad_norm": 6.726785659790039,
"learning_rate": 4.7357747979665504e-05,
"loss": 1.6127,
"query_regularizer_loss": 0.0728,
"sparse_loss": 1.4672,
"step": 1340
},
{
"document_regularizer_loss": 0.0714,
"epoch": 0.4299715460006323,
"grad_norm": 8.149948120117188,
"learning_rate": 4.723633644203612e-05,
"loss": 1.5483,
"query_regularizer_loss": 0.0714,
"sparse_loss": 1.4055,
"step": 1360
},
{
"document_regularizer_loss": 0.0706,
"epoch": 0.4362946569712298,
"grad_norm": 11.196396827697754,
"learning_rate": 4.711236066480322e-05,
"loss": 1.5743,
"query_regularizer_loss": 0.0706,
"sparse_loss": 1.4331,
"step": 1380
},
{
"document_regularizer_loss": 0.0696,
"epoch": 0.4426177679418274,
"grad_norm": 8.784821510314941,
"learning_rate": 4.698583494454837e-05,
"loss": 1.6291,
"query_regularizer_loss": 0.0696,
"sparse_loss": 1.4899,
"step": 1400
},
{
"document_regularizer_loss": 0.0681,
"epoch": 0.4489408789124249,
"grad_norm": 8.496731758117676,
"learning_rate": 4.68567738719063e-05,
"loss": 1.6277,
"query_regularizer_loss": 0.068,
"sparse_loss": 1.4916,
"step": 1420
},
{
"document_regularizer_loss": 0.0666,
"epoch": 0.45526398988302247,
"grad_norm": 9.2264986038208,
"learning_rate": 4.672519232988234e-05,
"loss": 1.5486,
"query_regularizer_loss": 0.0666,
"sparse_loss": 1.4155,
"step": 1440
},
{
"document_regularizer_loss": 0.0654,
"epoch": 0.46158710085362,
"grad_norm": 8.807757377624512,
"learning_rate": 4.659110549213615e-05,
"loss": 1.5393,
"query_regularizer_loss": 0.0654,
"sparse_loss": 1.4084,
"step": 1460
},
{
"document_regularizer_loss": 0.064,
"epoch": 0.4679102118242175,
"grad_norm": 6.820550441741943,
"learning_rate": 4.645452882123192e-05,
"loss": 1.5138,
"query_regularizer_loss": 0.064,
"sparse_loss": 1.3858,
"step": 1480
},
{
"document_regularizer_loss": 0.0622,
"epoch": 0.47423332279481506,
"grad_norm": 6.402284622192383,
"learning_rate": 4.6315478066855274e-05,
"loss": 1.5601,
"query_regularizer_loss": 0.0621,
"sparse_loss": 1.4358,
"step": 1500
},
{
"document_regularizer_loss": 0.0614,
"epoch": 0.48055643376541257,
"grad_norm": 24.8136043548584,
"learning_rate": 4.617396926399706e-05,
"loss": 1.5127,
"query_regularizer_loss": 0.0614,
"sparse_loss": 1.39,
"step": 1520
},
{
"document_regularizer_loss": 0.0615,
"epoch": 0.48687954473601014,
"grad_norm": 14.119754791259766,
"learning_rate": 4.603001873110422e-05,
"loss": 1.5186,
"query_regularizer_loss": 0.0615,
"sparse_loss": 1.3956,
"step": 1540
},
{
"document_regularizer_loss": 0.0606,
"epoch": 0.49320265570660765,
"grad_norm": 10.744440078735352,
"learning_rate": 4.588364306819801e-05,
"loss": 1.4835,
"query_regularizer_loss": 0.0606,
"sparse_loss": 1.3624,
"step": 1560
},
{
"document_regularizer_loss": 0.0598,
"epoch": 0.49952576667720516,
"grad_norm": 7.516956329345703,
"learning_rate": 4.57348591549597e-05,
"loss": 1.3831,
"query_regularizer_loss": 0.0598,
"sparse_loss": 1.2636,
"step": 1580
},
{
"document_regularizer_loss": 0.0586,
"epoch": 0.5058488776478027,
"grad_norm": 9.290154457092285,
"learning_rate": 4.558368414878405e-05,
"loss": 1.5297,
"query_regularizer_loss": 0.0586,
"sparse_loss": 1.4126,
"step": 1600
},
{
"document_regularizer_loss": 0.0575,
"epoch": 0.5121719886184003,
"grad_norm": 8.14932918548584,
"learning_rate": 4.543013548280082e-05,
"loss": 1.4104,
"query_regularizer_loss": 0.0575,
"sparse_loss": 1.2954,
"step": 1620
},
{
"document_regularizer_loss": 0.0563,
"epoch": 0.5184950995889978,
"grad_norm": 6.615036964416504,
"learning_rate": 4.527423086386432e-05,
"loss": 1.3922,
"query_regularizer_loss": 0.0563,
"sparse_loss": 1.2795,
"step": 1640
},
{
"document_regularizer_loss": 0.0554,
"epoch": 0.5248182105595953,
"grad_norm": 5.8693013191223145,
"learning_rate": 4.51159882705116e-05,
"loss": 1.4043,
"query_regularizer_loss": 0.0554,
"sparse_loss": 1.2935,
"step": 1660
},
{
"document_regularizer_loss": 0.0541,
"epoch": 0.5311413215301929,
"grad_norm": 6.5546650886535645,
"learning_rate": 4.495542595088914e-05,
"loss": 1.4286,
"query_regularizer_loss": 0.0541,
"sparse_loss": 1.3203,
"step": 1680
},
{
"document_regularizer_loss": 0.0535,
"epoch": 0.5374644325007903,
"grad_norm": 7.110738754272461,
"learning_rate": 4.4792562420648574e-05,
"loss": 1.3533,
"query_regularizer_loss": 0.0535,
"sparse_loss": 1.2462,
"step": 1700
},
{
"document_regularizer_loss": 0.053,
"epoch": 0.5437875434713879,
"grad_norm": 7.050394058227539,
"learning_rate": 4.462741646081145e-05,
"loss": 1.3941,
"query_regularizer_loss": 0.053,
"sparse_loss": 1.288,
"step": 1720
},
{
"document_regularizer_loss": 0.0516,
"epoch": 0.5501106544419855,
"grad_norm": 7.823602199554443,
"learning_rate": 4.446000711560351e-05,
"loss": 1.3218,
"query_regularizer_loss": 0.0516,
"sparse_loss": 1.2186,
"step": 1740
},
{
"document_regularizer_loss": 0.0511,
"epoch": 0.5564337654125829,
"grad_norm": 8.4823579788208,
"learning_rate": 4.42903536902585e-05,
"loss": 1.3049,
"query_regularizer_loss": 0.0511,
"sparse_loss": 1.2027,
"step": 1760
},
{
"document_regularizer_loss": 0.051,
"epoch": 0.5627568763831805,
"grad_norm": 6.614449977874756,
"learning_rate": 4.4118475748791985e-05,
"loss": 1.4483,
"query_regularizer_loss": 0.051,
"sparse_loss": 1.3464,
"step": 1780
},
{
"document_regularizer_loss": 0.0503,
"epoch": 0.5690799873537781,
"grad_norm": 6.088893890380859,
"learning_rate": 4.3944393111745255e-05,
"loss": 1.3819,
"query_regularizer_loss": 0.0503,
"sparse_loss": 1.2812,
"step": 1800
},
{
"document_regularizer_loss": 0.0497,
"epoch": 0.5754030983243756,
"grad_norm": 7.0833024978637695,
"learning_rate": 4.376812585389967e-05,
"loss": 1.3073,
"query_regularizer_loss": 0.0497,
"sparse_loss": 1.2078,
"step": 1820
},
{
"document_regularizer_loss": 0.0488,
"epoch": 0.5817262092949731,
"grad_norm": 6.979008197784424,
"learning_rate": 4.358969430196166e-05,
"loss": 1.3515,
"query_regularizer_loss": 0.0488,
"sparse_loss": 1.2538,
"step": 1840
},
{
"document_regularizer_loss": 0.0483,
"epoch": 0.5880493202655707,
"grad_norm": 6.331544399261475,
"learning_rate": 4.340911903221875e-05,
"loss": 1.3165,
"query_regularizer_loss": 0.0482,
"sparse_loss": 1.22,
"step": 1860
},
{
"document_regularizer_loss": 0.0476,
"epoch": 0.5943724312361682,
"grad_norm": 5.857104301452637,
"learning_rate": 4.322642086816674e-05,
"loss": 1.2582,
"query_regularizer_loss": 0.0476,
"sparse_loss": 1.163,
"step": 1880
},
{
"document_regularizer_loss": 0.0471,
"epoch": 0.6006955422067657,
"grad_norm": 11.978568077087402,
"learning_rate": 4.3041620878108336e-05,
"loss": 1.2801,
"query_regularizer_loss": 0.0471,
"sparse_loss": 1.1858,
"step": 1900
},
{
"document_regularizer_loss": 0.0463,
"epoch": 0.6070186531773633,
"grad_norm": 7.875554084777832,
"learning_rate": 4.2854740372723686e-05,
"loss": 1.2912,
"query_regularizer_loss": 0.0463,
"sparse_loss": 1.1985,
"step": 1920
},
{
"document_regularizer_loss": 0.0455,
"epoch": 0.6133417641479608,
"grad_norm": 7.675542831420898,
"learning_rate": 4.266580090261282e-05,
"loss": 1.2768,
"query_regularizer_loss": 0.0455,
"sparse_loss": 1.1858,
"step": 1940
},
{
"document_regularizer_loss": 0.045,
"epoch": 0.6196648751185583,
"grad_norm": 14.170219421386719,
"learning_rate": 4.247482425581053e-05,
"loss": 1.2681,
"query_regularizer_loss": 0.045,
"sparse_loss": 1.178,
"step": 1960
},
{
"document_regularizer_loss": 0.045,
"epoch": 0.6259879860891558,
"grad_norm": 15.395133972167969,
"learning_rate": 4.2281832455273805e-05,
"loss": 1.2818,
"query_regularizer_loss": 0.045,
"sparse_loss": 1.1918,
"step": 1980
},
{
"document_regularizer_loss": 0.0447,
"epoch": 0.6323110970597534,
"grad_norm": 10.426234245300293,
"learning_rate": 4.208684775634221e-05,
"loss": 1.2085,
"query_regularizer_loss": 0.0447,
"sparse_loss": 1.1191,
"step": 2000
},
{
"epoch": 0.6323110970597534,
"eval_runtime": 274.6851,
"eval_samples_per_second": 0.0,
"eval_sparse-ir-eval_avg_flops": 893.3501586914062,
"eval_sparse-ir-eval_corpus_active_dims": 1024.0,
"eval_sparse-ir-eval_corpus_sparsity_ratio": 0.9796696315120712,
"eval_sparse-ir-eval_dot_accuracy@1": 0.044191161767646474,
"eval_sparse-ir-eval_dot_accuracy@100": 0.34593081383723256,
"eval_sparse-ir-eval_dot_accuracy@50": 0.2571485702859428,
"eval_sparse-ir-eval_dot_accuracy@8": 0.1227754449110178,
"eval_sparse-ir-eval_dot_map@100": 0.0739891059526251,
"eval_sparse-ir-eval_dot_mrr@10": 0.06742675274468914,
"eval_sparse-ir-eval_dot_ndcg@10": 0.08339501666788006,
"eval_sparse-ir-eval_dot_precision@1": 0.044191161767646474,
"eval_sparse-ir-eval_dot_precision@100": 0.0034593081383723257,
"eval_sparse-ir-eval_dot_precision@50": 0.005142971405718857,
"eval_sparse-ir-eval_dot_precision@8": 0.015346930613877225,
"eval_sparse-ir-eval_dot_recall@1": 0.044191161767646474,
"eval_sparse-ir-eval_dot_recall@100": 0.34593081383723256,
"eval_sparse-ir-eval_dot_recall@50": 0.2571485702859428,
"eval_sparse-ir-eval_dot_recall@8": 0.1227754449110178,
"eval_sparse-ir-eval_query_active_dims": 1024.0,
"eval_sparse-ir-eval_query_sparsity_ratio": 0.9796696315120712,
"eval_steps_per_second": 0.0,
"step": 2000
},
{
"document_regularizer_loss": 0.0442,
"epoch": 0.638634208030351,
"grad_norm": 6.597539901733398,
"learning_rate": 4.1889892644171435e-05,
"loss": 1.2319,
"query_regularizer_loss": 0.0442,
"sparse_loss": 1.1435,
"step": 2020
},
{
"document_regularizer_loss": 0.0436,
"epoch": 0.6449573190009484,
"grad_norm": 5.0735087394714355,
"learning_rate": 4.1690989831140394e-05,
"loss": 1.2843,
"query_regularizer_loss": 0.0436,
"sparse_loss": 1.1971,
"step": 2040
},
{
"document_regularizer_loss": 0.0429,
"epoch": 0.651280429971546,
"grad_norm": 6.517344951629639,
"learning_rate": 4.1490162254232054e-05,
"loss": 1.2895,
"query_regularizer_loss": 0.0429,
"sparse_loss": 1.2036,
"step": 2060
},
{
"document_regularizer_loss": 0.0426,
"epoch": 0.6576035409421436,
"grad_norm": 21.777257919311523,
"learning_rate": 4.1287433072388436e-05,
"loss": 1.2754,
"query_regularizer_loss": 0.0426,
"sparse_loss": 1.1902,
"step": 2080
},
{
"document_regularizer_loss": 0.0423,
"epoch": 0.663926651912741,
"grad_norm": 7.3678975105285645,
"learning_rate": 4.108282566383994e-05,
"loss": 1.3094,
"query_regularizer_loss": 0.0423,
"sparse_loss": 1.2248,
"step": 2100
},
{
"document_regularizer_loss": 0.0422,
"epoch": 0.6702497628833386,
"grad_norm": 6.312955379486084,
"learning_rate": 4.087636362340948e-05,
"loss": 1.1937,
"query_regularizer_loss": 0.0422,
"sparse_loss": 1.1092,
"step": 2120
},
{
"document_regularizer_loss": 0.0417,
"epoch": 0.6765728738539362,
"grad_norm": 5.551113128662109,
"learning_rate": 4.0668070759791524e-05,
"loss": 1.2294,
"query_regularizer_loss": 0.0417,
"sparse_loss": 1.1461,
"step": 2140
},
{
"document_regularizer_loss": 0.0408,
"epoch": 0.6828959848245336,
"grad_norm": 7.7479023933410645,
"learning_rate": 4.0457971092806566e-05,
"loss": 1.2211,
"query_regularizer_loss": 0.0408,
"sparse_loss": 1.1395,
"step": 2160
},
{
"document_regularizer_loss": 0.0402,
"epoch": 0.6892190957951312,
"grad_norm": 13.669305801391602,
"learning_rate": 4.0246088850631246e-05,
"loss": 1.3088,
"query_regularizer_loss": 0.0402,
"sparse_loss": 1.2285,
"step": 2180
},
{
"document_regularizer_loss": 0.0398,
"epoch": 0.6955422067657288,
"grad_norm": 28.209056854248047,
"learning_rate": 4.003244846700437e-05,
"loss": 1.1989,
"query_regularizer_loss": 0.0398,
"sparse_loss": 1.1193,
"step": 2200
},
{
"document_regularizer_loss": 0.0396,
"epoch": 0.7018653177363263,
"grad_norm": 7.1696319580078125,
"learning_rate": 3.981707457840927e-05,
"loss": 1.2486,
"query_regularizer_loss": 0.0396,
"sparse_loss": 1.1695,
"step": 2220
},
{
"document_regularizer_loss": 0.0394,
"epoch": 0.7081884287069238,
"grad_norm": 8.591996192932129,
"learning_rate": 3.9599992021232865e-05,
"loss": 1.1296,
"query_regularizer_loss": 0.0394,
"sparse_loss": 1.0508,
"step": 2240
},
{
"document_regularizer_loss": 0.0393,
"epoch": 0.7145115396775213,
"grad_norm": 9.680275917053223,
"learning_rate": 3.938122582890147e-05,
"loss": 1.1456,
"query_regularizer_loss": 0.0393,
"sparse_loss": 1.0669,
"step": 2260
},
{
"document_regularizer_loss": 0.0384,
"epoch": 0.7208346506481189,
"grad_norm": 6.841869354248047,
"learning_rate": 3.916080122899408e-05,
"loss": 1.2594,
"query_regularizer_loss": 0.0384,
"sparse_loss": 1.1827,
"step": 2280
},
{
"document_regularizer_loss": 0.0381,
"epoch": 0.7271577616187164,
"grad_norm": 7.08558988571167,
"learning_rate": 3.893874364033319e-05,
"loss": 1.1598,
"query_regularizer_loss": 0.0381,
"sparse_loss": 1.0835,
"step": 2300
},
{
"document_regularizer_loss": 0.0373,
"epoch": 0.7334808725893139,
"grad_norm": 4.713133811950684,
"learning_rate": 3.871507867005353e-05,
"loss": 1.1291,
"query_regularizer_loss": 0.0373,
"sparse_loss": 1.0544,
"step": 2320
},
{
"document_regularizer_loss": 0.0372,
"epoch": 0.7398039835599115,
"grad_norm": 6.019435405731201,
"learning_rate": 3.8489832110649106e-05,
"loss": 1.1203,
"query_regularizer_loss": 0.0372,
"sparse_loss": 1.0459,
"step": 2340
},
{
"document_regularizer_loss": 0.037,
"epoch": 0.746127094530509,
"grad_norm": 5.9214887619018555,
"learning_rate": 3.8263029936998914e-05,
"loss": 1.1708,
"query_regularizer_loss": 0.037,
"sparse_loss": 1.0969,
"step": 2360
},
{
"document_regularizer_loss": 0.0365,
"epoch": 0.7524502055011065,
"grad_norm": 13.986381530761719,
"learning_rate": 3.803469830337154e-05,
"loss": 1.175,
"query_regularizer_loss": 0.0365,
"sparse_loss": 1.102,
"step": 2380
},
{
"document_regularizer_loss": 0.0363,
"epoch": 0.7587733164717041,
"grad_norm": 5.55244255065918,
"learning_rate": 3.7804863540409155e-05,
"loss": 1.2057,
"query_regularizer_loss": 0.0363,
"sparse_loss": 1.1331,
"step": 2400
},
{
"document_regularizer_loss": 0.0359,
"epoch": 0.7650964274423017,
"grad_norm": 4.60949182510376,
"learning_rate": 3.7573552152091065e-05,
"loss": 1.2125,
"query_regularizer_loss": 0.0359,
"sparse_loss": 1.1407,
"step": 2420
},
{
"document_regularizer_loss": 0.0353,
"epoch": 0.7714195384128991,
"grad_norm": 6.324008941650391,
"learning_rate": 3.7340790812677426e-05,
"loss": 1.2678,
"query_regularizer_loss": 0.0353,
"sparse_loss": 1.1972,
"step": 2440
},
{
"document_regularizer_loss": 0.0349,
"epoch": 0.7777426493834967,
"grad_norm": 6.179075241088867,
"learning_rate": 3.710660636363315e-05,
"loss": 1.1447,
"query_regularizer_loss": 0.0349,
"sparse_loss": 1.0749,
"step": 2460
},
{
"document_regularizer_loss": 0.0346,
"epoch": 0.7840657603540943,
"grad_norm": 5.324189186096191,
"learning_rate": 3.687102581053267e-05,
"loss": 1.2268,
"query_regularizer_loss": 0.0346,
"sparse_loss": 1.1575,
"step": 2480
},
{
"document_regularizer_loss": 0.0342,
"epoch": 0.7903888713246917,
"grad_norm": 6.188036918640137,
"learning_rate": 3.6634076319945706e-05,
"loss": 1.1557,
"query_regularizer_loss": 0.0342,
"sparse_loss": 1.0872,
"step": 2500
},
{
"document_regularizer_loss": 0.0335,
"epoch": 0.7967119822952893,
"grad_norm": 5.936458587646484,
"learning_rate": 3.639578521630445e-05,
"loss": 1.1321,
"query_regularizer_loss": 0.0335,
"sparse_loss": 1.0651,
"step": 2520
},
{
"document_regularizer_loss": 0.0331,
"epoch": 0.8030350932658868,
"grad_norm": 5.506819248199463,
"learning_rate": 3.615617997875265e-05,
"loss": 1.1172,
"query_regularizer_loss": 0.0331,
"sparse_loss": 1.051,
"step": 2540
},
{
"document_regularizer_loss": 0.0332,
"epoch": 0.8093582042364843,
"grad_norm": 4.83391809463501,
"learning_rate": 3.591528823797672e-05,
"loss": 1.1761,
"query_regularizer_loss": 0.0332,
"sparse_loss": 1.1097,
"step": 2560
},
{
"document_regularizer_loss": 0.0327,
"epoch": 0.8156813152070819,
"grad_norm": 5.821810722351074,
"learning_rate": 3.567313777301946e-05,
"loss": 1.1746,
"query_regularizer_loss": 0.0327,
"sparse_loss": 1.1091,
"step": 2580
},
{
"document_regularizer_loss": 0.0323,
"epoch": 0.8220044261776794,
"grad_norm": 5.580266952514648,
"learning_rate": 3.5429756508076664e-05,
"loss": 1.1864,
"query_regularizer_loss": 0.0323,
"sparse_loss": 1.1218,
"step": 2600
},
{
"document_regularizer_loss": 0.032,
"epoch": 0.828327537148277,
"grad_norm": 6.912046909332275,
"learning_rate": 3.5185172509276926e-05,
"loss": 1.096,
"query_regularizer_loss": 0.032,
"sparse_loss": 1.0319,
"step": 2620
},
{
"document_regularizer_loss": 0.0316,
"epoch": 0.8346506481188745,
"grad_norm": 4.675662517547607,
"learning_rate": 3.4939413981445165e-05,
"loss": 1.0784,
"query_regularizer_loss": 0.0315,
"sparse_loss": 1.0153,
"step": 2640
},
{
"document_regularizer_loss": 0.0313,
"epoch": 0.840973759089472,
"grad_norm": 6.675909042358398,
"learning_rate": 3.46925092648501e-05,
"loss": 1.1665,
"query_regularizer_loss": 0.0313,
"sparse_loss": 1.104,
"step": 2660
},
{
"document_regularizer_loss": 0.0309,
"epoch": 0.8472968700600696,
"grad_norm": 6.421684741973877,
"learning_rate": 3.444448683193611e-05,
"loss": 1.0553,
"query_regularizer_loss": 0.0309,
"sparse_loss": 0.9936,
"step": 2680
},
{
"document_regularizer_loss": 0.0307,
"epoch": 0.853619981030667,
"grad_norm": 8.149559020996094,
"learning_rate": 3.419537528403986e-05,
"loss": 1.0657,
"query_regularizer_loss": 0.0307,
"sparse_loss": 1.0042,
"step": 2700
},
{
"document_regularizer_loss": 0.0305,
"epoch": 0.8599430920012646,
"grad_norm": 7.086170196533203,
"learning_rate": 3.39452033480921e-05,
"loss": 1.0973,
"query_regularizer_loss": 0.0305,
"sparse_loss": 1.0362,
"step": 2720
},
{
"document_regularizer_loss": 0.0302,
"epoch": 0.8662662029718622,
"grad_norm": 6.212243556976318,
"learning_rate": 3.3693999873304904e-05,
"loss": 1.0824,
"query_regularizer_loss": 0.0302,
"sparse_loss": 1.0219,
"step": 2740
},
{
"document_regularizer_loss": 0.0296,
"epoch": 0.8725893139424596,
"grad_norm": 6.539682865142822,
"learning_rate": 3.344179382784488e-05,
"loss": 1.0886,
"query_regularizer_loss": 0.0296,
"sparse_loss": 1.0295,
"step": 2760
},
{
"document_regularizer_loss": 0.029,
"epoch": 0.8789124249130572,
"grad_norm": 5.484647274017334,
"learning_rate": 3.3188614295492595e-05,
"loss": 1.1338,
"query_regularizer_loss": 0.029,
"sparse_loss": 1.0757,
"step": 2780
},
{
"document_regularizer_loss": 0.0292,
"epoch": 0.8852355358836548,
"grad_norm": 6.082838535308838,
"learning_rate": 3.293449047228874e-05,
"loss": 1.1033,
"query_regularizer_loss": 0.0292,
"sparse_loss": 1.0449,
"step": 2800
},
{
"document_regularizer_loss": 0.0288,
"epoch": 0.8915586468542523,
"grad_norm": 7.450719356536865,
"learning_rate": 3.2679451663167326e-05,
"loss": 1.0429,
"query_regularizer_loss": 0.0288,
"sparse_loss": 0.9852,
"step": 2820
},
{
"document_regularizer_loss": 0.0285,
"epoch": 0.8978817578248498,
"grad_norm": 6.752073287963867,
"learning_rate": 3.242352727857625e-05,
"loss": 1.0102,
"query_regularizer_loss": 0.0285,
"sparse_loss": 0.9532,
"step": 2840
},
{
"document_regularizer_loss": 0.0281,
"epoch": 0.9042048687954474,
"grad_norm": 5.853407859802246,
"learning_rate": 3.216674683108583e-05,
"loss": 1.1599,
"query_regularizer_loss": 0.0281,
"sparse_loss": 1.1036,
"step": 2860
},
{
"document_regularizer_loss": 0.0281,
"epoch": 0.9105279797660449,
"grad_norm": 7.191678524017334,
"learning_rate": 3.1909139931985415e-05,
"loss": 1.0423,
"query_regularizer_loss": 0.0281,
"sparse_loss": 0.9862,
"step": 2880
},
{
"document_regularizer_loss": 0.028,
"epoch": 0.9168510907366424,
"grad_norm": 10.293112754821777,
"learning_rate": 3.165073628786876e-05,
"loss": 1.0815,
"query_regularizer_loss": 0.028,
"sparse_loss": 1.0256,
"step": 2900
},
{
"document_regularizer_loss": 0.0278,
"epoch": 0.92317420170724,
"grad_norm": 5.882568836212158,
"learning_rate": 3.139156569720826e-05,
"loss": 1.0804,
"query_regularizer_loss": 0.0278,
"sparse_loss": 1.0248,
"step": 2920
},
{
"document_regularizer_loss": 0.0276,
"epoch": 0.9294973126778375,
"grad_norm": 5.085528373718262,
"learning_rate": 3.113165804691871e-05,
"loss": 1.1668,
"query_regularizer_loss": 0.0276,
"sparse_loss": 1.1115,
"step": 2940
},
{
"document_regularizer_loss": 0.0273,
"epoch": 0.935820423648435,
"grad_norm": 5.272675037384033,
"learning_rate": 3.0871043308910816e-05,
"loss": 1.0606,
"query_regularizer_loss": 0.0273,
"sparse_loss": 1.006,
"step": 2960
},
{
"document_regularizer_loss": 0.0267,
"epoch": 0.9421435346190326,
"grad_norm": 5.916753768920898,
"learning_rate": 3.06097515366349e-05,
"loss": 1.0705,
"query_regularizer_loss": 0.0267,
"sparse_loss": 1.0172,
"step": 2980
},
{
"document_regularizer_loss": 0.0265,
"epoch": 0.9484666455896301,
"grad_norm": 6.121260166168213,
"learning_rate": 3.034781286161519e-05,
"loss": 1.072,
"query_regularizer_loss": 0.0265,
"sparse_loss": 1.0189,
"step": 3000
},
{
"document_regularizer_loss": 0.0264,
"epoch": 0.9547897565602277,
"grad_norm": 5.811629295349121,
"learning_rate": 3.0085257489975167e-05,
"loss": 1.1239,
"query_regularizer_loss": 0.0264,
"sparse_loss": 1.0711,
"step": 3020
},
{
"document_regularizer_loss": 0.0263,
"epoch": 0.9611128675308251,
"grad_norm": 14.449254989624023,
"learning_rate": 2.982211569895424e-05,
"loss": 1.112,
"query_regularizer_loss": 0.0263,
"sparse_loss": 1.0594,
"step": 3040
},
{
"document_regularizer_loss": 0.0264,
"epoch": 0.9674359785014227,
"grad_norm": 7.664610862731934,
"learning_rate": 2.9558417833416264e-05,
"loss": 1.0759,
"query_regularizer_loss": 0.0264,
"sparse_loss": 1.0231,
"step": 3060
},
{
"document_regularizer_loss": 0.0257,
"epoch": 0.9737590894720203,
"grad_norm": 6.444000720977783,
"learning_rate": 2.9294194302350225e-05,
"loss": 0.956,
"query_regularizer_loss": 0.0257,
"sparse_loss": 0.9047,
"step": 3080
},
{
"document_regularizer_loss": 0.0255,
"epoch": 0.9800822004426177,
"grad_norm": 5.407084941864014,
"learning_rate": 2.902947557536359e-05,
"loss": 0.9945,
"query_regularizer_loss": 0.0255,
"sparse_loss": 0.9435,
"step": 3100
},
{
"document_regularizer_loss": 0.0253,
"epoch": 0.9864053114132153,
"grad_norm": 7.782375335693359,
"learning_rate": 2.8764292179168566e-05,
"loss": 1.0119,
"query_regularizer_loss": 0.0253,
"sparse_loss": 0.9613,
"step": 3120
},
{
"document_regularizer_loss": 0.025,
"epoch": 0.9927284223838129,
"grad_norm": 5.379085540771484,
"learning_rate": 2.849867469406191e-05,
"loss": 0.9965,
"query_regularizer_loss": 0.025,
"sparse_loss": 0.9465,
"step": 3140
},
{
"document_regularizer_loss": 0.0247,
"epoch": 0.9990515333544103,
"grad_norm": 13.918062210083008,
"learning_rate": 2.8232653750398404e-05,
"loss": 1.1177,
"query_regularizer_loss": 0.0247,
"sparse_loss": 1.0683,
"step": 3160
},
{
"document_regularizer_loss": 0.0243,
"epoch": 1.005374644325008,
"grad_norm": 5.923994541168213,
"learning_rate": 2.796626002505871e-05,
"loss": 0.8884,
"query_regularizer_loss": 0.0243,
"sparse_loss": 0.8398,
"step": 3180
},
{
"document_regularizer_loss": 0.0243,
"epoch": 1.0116977552956055,
"grad_norm": 5.905787467956543,
"learning_rate": 2.7699524237911735e-05,
"loss": 0.9041,
"query_regularizer_loss": 0.0243,
"sparse_loss": 0.8555,
"step": 3200
},
{
"document_regularizer_loss": 0.0242,
"epoch": 1.018020866266203,
"grad_norm": 7.144820213317871,
"learning_rate": 2.7432477148272124e-05,
"loss": 0.9367,
"query_regularizer_loss": 0.0242,
"sparse_loss": 0.8882,
"step": 3220
},
{
"document_regularizer_loss": 0.024,
"epoch": 1.0243439772368006,
"grad_norm": 5.734910011291504,
"learning_rate": 2.7165149551353152e-05,
"loss": 0.8253,
"query_regularizer_loss": 0.024,
"sparse_loss": 0.7774,
"step": 3240
},
{
"document_regularizer_loss": 0.0237,
"epoch": 1.030667088207398,
"grad_norm": 4.406752586364746,
"learning_rate": 2.689757227471551e-05,
"loss": 0.8637,
"query_regularizer_loss": 0.0237,
"sparse_loss": 0.8163,
"step": 3260
},
{
"document_regularizer_loss": 0.0235,
"epoch": 1.0369901991779955,
"grad_norm": 18.512943267822266,
"learning_rate": 2.662977617471234e-05,
"loss": 0.8665,
"query_regularizer_loss": 0.0235,
"sparse_loss": 0.8195,
"step": 3280
},
{
"document_regularizer_loss": 0.0236,
"epoch": 1.0433133101485932,
"grad_norm": 4.601492404937744,
"learning_rate": 2.636179213293094e-05,
"loss": 0.8306,
"query_regularizer_loss": 0.0236,
"sparse_loss": 0.7835,
"step": 3300
},
{
"document_regularizer_loss": 0.0234,
"epoch": 1.0496364211191906,
"grad_norm": 6.115499973297119,
"learning_rate": 2.609365105263162e-05,
"loss": 0.8374,
"query_regularizer_loss": 0.0234,
"sparse_loss": 0.7906,
"step": 3320
},
{
"document_regularizer_loss": 0.0235,
"epoch": 1.055959532089788,
"grad_norm": 16.041154861450195,
"learning_rate": 2.5825383855183954e-05,
"loss": 0.9326,
"query_regularizer_loss": 0.0235,
"sparse_loss": 0.8855,
"step": 3340
},
{
"document_regularizer_loss": 0.0233,
"epoch": 1.0622826430603858,
"grad_norm": 6.99527645111084,
"learning_rate": 2.5557021476501058e-05,
"loss": 0.8675,
"query_regularizer_loss": 0.0233,
"sparse_loss": 0.8209,
"step": 3360
},
{
"document_regularizer_loss": 0.0232,
"epoch": 1.0686057540309832,
"grad_norm": 9.15439224243164,
"learning_rate": 2.528859486347211e-05,
"loss": 0.8846,
"query_regularizer_loss": 0.0232,
"sparse_loss": 0.8383,
"step": 3380
},
{
"document_regularizer_loss": 0.0232,
"epoch": 1.0749288650015807,
"grad_norm": 6.056853771209717,
"learning_rate": 2.502013497039362e-05,
"loss": 0.8782,
"query_regularizer_loss": 0.0232,
"sparse_loss": 0.8318,
"step": 3400
},
{
"document_regularizer_loss": 0.0228,
"epoch": 1.0812519759721784,
"grad_norm": 11.15111255645752,
"learning_rate": 2.4751672755399892e-05,
"loss": 0.9058,
"query_regularizer_loss": 0.0228,
"sparse_loss": 0.8602,
"step": 3420
},
{
"document_regularizer_loss": 0.0226,
"epoch": 1.0875750869427758,
"grad_norm": 5.096249103546143,
"learning_rate": 2.4483239176892978e-05,
"loss": 0.8242,
"query_regularizer_loss": 0.0226,
"sparse_loss": 0.7789,
"step": 3440
},
{
"document_regularizer_loss": 0.0224,
"epoch": 1.0938981979133733,
"grad_norm": 4.854412078857422,
"learning_rate": 2.4214865189972626e-05,
"loss": 0.8406,
"query_regularizer_loss": 0.0224,
"sparse_loss": 0.7958,
"step": 3460
},
{
"document_regularizer_loss": 0.0222,
"epoch": 1.100221308883971,
"grad_norm": 5.57534122467041,
"learning_rate": 2.3946581742866662e-05,
"loss": 0.8854,
"query_regularizer_loss": 0.0222,
"sparse_loss": 0.841,
"step": 3480
},
{
"document_regularizer_loss": 0.022,
"epoch": 1.1065444198545684,
"grad_norm": 35.59614562988281,
"learning_rate": 2.367841977336206e-05,
"loss": 0.9114,
"query_regularizer_loss": 0.022,
"sparse_loss": 0.8674,
"step": 3500
},
{
"document_regularizer_loss": 0.0218,
"epoch": 1.112867530825166,
"grad_norm": 5.315453052520752,
"learning_rate": 2.3410410205237292e-05,
"loss": 0.7916,
"query_regularizer_loss": 0.0218,
"sparse_loss": 0.7481,
"step": 3520
},
{
"document_regularizer_loss": 0.0217,
"epoch": 1.1191906417957636,
"grad_norm": 6.13749361038208,
"learning_rate": 2.31425839446963e-05,
"loss": 0.8902,
"query_regularizer_loss": 0.0217,
"sparse_loss": 0.8469,
"step": 3540
},
{
"document_regularizer_loss": 0.0212,
"epoch": 1.125513752766361,
"grad_norm": 6.6804962158203125,
"learning_rate": 2.2874971876804425e-05,
"loss": 0.8235,
"query_regularizer_loss": 0.0212,
"sparse_loss": 0.7812,
"step": 3560
},
{
"document_regularizer_loss": 0.021,
"epoch": 1.1318368637369587,
"grad_norm": 7.544855117797852,
"learning_rate": 2.2607604861926847e-05,
"loss": 0.8662,
"query_regularizer_loss": 0.021,
"sparse_loss": 0.8241,
"step": 3580
},
{
"document_regularizer_loss": 0.0209,
"epoch": 1.1381599747075561,
"grad_norm": 5.754782199859619,
"learning_rate": 2.2340513732169845e-05,
"loss": 0.8252,
"query_regularizer_loss": 0.0209,
"sparse_loss": 0.7835,
"step": 3600
},
{
"document_regularizer_loss": 0.0208,
"epoch": 1.1444830856781536,
"grad_norm": 4.892688751220703,
"learning_rate": 2.2073729287825283e-05,
"loss": 0.8636,
"query_regularizer_loss": 0.0208,
"sparse_loss": 0.822,
"step": 3620
},
{
"document_regularizer_loss": 0.0207,
"epoch": 1.1508061966487513,
"grad_norm": 7.930765628814697,
"learning_rate": 2.1807282293818827e-05,
"loss": 0.8013,
"query_regularizer_loss": 0.0207,
"sparse_loss": 0.7599,
"step": 3640
},
{
"document_regularizer_loss": 0.0209,
"epoch": 1.1571293076193487,
"grad_norm": 5.00435733795166,
"learning_rate": 2.1541203476162222e-05,
"loss": 0.8126,
"query_regularizer_loss": 0.0209,
"sparse_loss": 0.7708,
"step": 3660
},
{
"document_regularizer_loss": 0.0208,
"epoch": 1.1634524185899462,
"grad_norm": 5.13680362701416,
"learning_rate": 2.1275523518409994e-05,
"loss": 0.8361,
"query_regularizer_loss": 0.0208,
"sparse_loss": 0.7945,
"step": 3680
},
{
"document_regularizer_loss": 0.0205,
"epoch": 1.1697755295605439,
"grad_norm": 11.393424034118652,
"learning_rate": 2.101027305812113e-05,
"loss": 0.8975,
"query_regularizer_loss": 0.0205,
"sparse_loss": 0.8566,
"step": 3700
},
{
"document_regularizer_loss": 0.0202,
"epoch": 1.1760986405311413,
"grad_norm": 5.252847671508789,
"learning_rate": 2.0745482683326047e-05,
"loss": 0.8723,
"query_regularizer_loss": 0.0202,
"sparse_loss": 0.832,
"step": 3720
},
{
"document_regularizer_loss": 0.0202,
"epoch": 1.1824217515017388,
"grad_norm": 6.195733547210693,
"learning_rate": 2.0481182928999194e-05,
"loss": 0.7598,
"query_regularizer_loss": 0.0202,
"sparse_loss": 0.7195,
"step": 3740
},
{
"document_regularizer_loss": 0.0202,
"epoch": 1.1887448624723365,
"grad_norm": 6.0683135986328125,
"learning_rate": 2.0217404273537928e-05,
"loss": 0.8172,
"query_regularizer_loss": 0.0202,
"sparse_loss": 0.7768,
"step": 3760
},
{
"document_regularizer_loss": 0.0201,
"epoch": 1.195067973442934,
"grad_norm": 6.833969593048096,
"learning_rate": 1.9954177135247733e-05,
"loss": 0.7955,
"query_regularizer_loss": 0.0201,
"sparse_loss": 0.7554,
"step": 3780
},
{
"document_regularizer_loss": 0.0198,
"epoch": 1.2013910844135314,
"grad_norm": 6.259845733642578,
"learning_rate": 1.969153186883449e-05,
"loss": 0.8491,
"query_regularizer_loss": 0.0198,
"sparse_loss": 0.8096,
"step": 3800
},
{
"document_regularizer_loss": 0.0196,
"epoch": 1.207714195384129,
"grad_norm": 6.139260768890381,
"learning_rate": 1.942949876190405e-05,
"loss": 0.8096,
"query_regularizer_loss": 0.0196,
"sparse_loss": 0.7705,
"step": 3820
},
{
"document_regularizer_loss": 0.0193,
"epoch": 1.2140373063547265,
"grad_norm": 7.0147175788879395,
"learning_rate": 1.9168108031469556e-05,
"loss": 0.8215,
"query_regularizer_loss": 0.0193,
"sparse_loss": 0.7829,
"step": 3840
},
{
"document_regularizer_loss": 0.0193,
"epoch": 1.220360417325324,
"grad_norm": 4.83867883682251,
"learning_rate": 1.8907389820466858e-05,
"loss": 0.8388,
"query_regularizer_loss": 0.0193,
"sparse_loss": 0.8003,
"step": 3860
},
{
"document_regularizer_loss": 0.0193,
"epoch": 1.2266835282959216,
"grad_norm": 5.086630344390869,
"learning_rate": 1.8647374194278515e-05,
"loss": 0.8766,
"query_regularizer_loss": 0.0193,
"sparse_loss": 0.8381,
"step": 3880
},
{
"document_regularizer_loss": 0.0193,
"epoch": 1.233006639266519,
"grad_norm": 7.497378826141357,
"learning_rate": 1.8388091137266754e-05,
"loss": 0.8822,
"query_regularizer_loss": 0.0193,
"sparse_loss": 0.8436,
"step": 3900
},
{
"document_regularizer_loss": 0.0191,
"epoch": 1.2393297502371166,
"grad_norm": 6.353434085845947,
"learning_rate": 1.8129570549315694e-05,
"loss": 0.7843,
"query_regularizer_loss": 0.0191,
"sparse_loss": 0.746,
"step": 3920
},
{
"document_regularizer_loss": 0.0189,
"epoch": 1.2456528612077142,
"grad_norm": 6.209091663360596,
"learning_rate": 1.7871842242383447e-05,
"loss": 0.7955,
"query_regularizer_loss": 0.0189,
"sparse_loss": 0.7578,
"step": 3940
},
{
"document_regularizer_loss": 0.0186,
"epoch": 1.2519759721783117,
"grad_norm": 5.476807594299316,
"learning_rate": 1.761493593706418e-05,
"loss": 0.7593,
"query_regularizer_loss": 0.0186,
"sparse_loss": 0.7221,
"step": 3960
},
{
"document_regularizer_loss": 0.0189,
"epoch": 1.2582990831489091,
"grad_norm": 5.646886825561523,
"learning_rate": 1.7358881259160883e-05,
"loss": 0.8728,
"query_regularizer_loss": 0.0189,
"sparse_loss": 0.8351,
"step": 3980
},
{
"document_regularizer_loss": 0.0187,
"epoch": 1.2646221941195068,
"grad_norm": 5.88949728012085,
"learning_rate": 1.710370773626896e-05,
"loss": 0.7812,
"query_regularizer_loss": 0.0187,
"sparse_loss": 0.7438,
"step": 4000
},
{
"epoch": 1.2646221941195068,
"eval_runtime": 144.6586,
"eval_samples_per_second": 0.0,
"eval_sparse-ir-eval_avg_flops": 853.5161743164062,
"eval_sparse-ir-eval_corpus_active_dims": 1024.0,
"eval_sparse-ir-eval_corpus_sparsity_ratio": 0.9796696315120712,
"eval_sparse-ir-eval_dot_accuracy@1": 0.04979004199160168,
"eval_sparse-ir-eval_dot_accuracy@100": 0.3879224155168966,
"eval_sparse-ir-eval_dot_accuracy@50": 0.29734053189362125,
"eval_sparse-ir-eval_dot_accuracy@8": 0.14277144571085784,
"eval_sparse-ir-eval_dot_map@100": 0.08555792971172127,
"eval_sparse-ir-eval_dot_mrr@10": 0.07802764843856622,
"eval_sparse-ir-eval_dot_ndcg@10": 0.09659653047217633,
"eval_sparse-ir-eval_dot_precision@1": 0.04979004199160168,
"eval_sparse-ir-eval_dot_precision@100": 0.0038792241551689668,
"eval_sparse-ir-eval_dot_precision@50": 0.005946810637872426,
"eval_sparse-ir-eval_dot_precision@8": 0.01784643071385723,
"eval_sparse-ir-eval_dot_recall@1": 0.04979004199160168,
"eval_sparse-ir-eval_dot_recall@100": 0.3879224155168966,
"eval_sparse-ir-eval_dot_recall@50": 0.29734053189362125,
"eval_sparse-ir-eval_dot_recall@8": 0.14277144571085784,
"eval_sparse-ir-eval_query_active_dims": 1024.0,
"eval_sparse-ir-eval_query_sparsity_ratio": 0.9796696315120712,
"eval_steps_per_second": 0.0,
"step": 4000
},
{
"document_regularizer_loss": 0.0185,
"epoch": 1.2709453050901043,
"grad_norm": 6.946842193603516,
"learning_rate": 1.6849444794371173e-05,
"loss": 0.7947,
"query_regularizer_loss": 0.0185,
"sparse_loss": 0.7577,
"step": 4020
},
{
"document_regularizer_loss": 0.0187,
"epoch": 1.2772684160607017,
"grad_norm": 5.774596214294434,
"learning_rate": 1.6596121754444365e-05,
"loss": 0.861,
"query_regularizer_loss": 0.0187,
"sparse_loss": 0.8236,
"step": 4040
},
{
"document_regularizer_loss": 0.0185,
"epoch": 1.2835915270312994,
"grad_norm": 5.410093307495117,
"learning_rate": 1.6343767829078157e-05,
"loss": 0.7238,
"query_regularizer_loss": 0.0185,
"sparse_loss": 0.6868,
"step": 4060
},
{
"document_regularizer_loss": 0.0183,
"epoch": 1.2899146380018969,
"grad_norm": 6.174851894378662,
"learning_rate": 1.609241211910628e-05,
"loss": 0.8105,
"query_regularizer_loss": 0.0183,
"sparse_loss": 0.7738,
"step": 4080
},
{
"document_regularizer_loss": 0.0181,
"epoch": 1.2962377489724943,
"grad_norm": 6.427083969116211,
"learning_rate": 1.5842083610250713e-05,
"loss": 0.804,
"query_regularizer_loss": 0.0182,
"sparse_loss": 0.7677,
"step": 4100
},
{
"document_regularizer_loss": 0.0181,
"epoch": 1.302560859943092,
"grad_norm": 5.061923503875732,
"learning_rate": 1.5592811169779146e-05,
"loss": 0.8112,
"query_regularizer_loss": 0.0181,
"sparse_loss": 0.775,
"step": 4120
},
{
"document_regularizer_loss": 0.0181,
"epoch": 1.3088839709136895,
"grad_norm": 9.3052339553833,
"learning_rate": 1.5344623543176047e-05,
"loss": 0.8061,
"query_regularizer_loss": 0.0181,
"sparse_loss": 0.7699,
"step": 4140
},
{
"document_regularizer_loss": 0.0181,
"epoch": 1.3152070818842871,
"grad_norm": 6.645140171051025,
"learning_rate": 1.5097549350827823e-05,
"loss": 0.8149,
"query_regularizer_loss": 0.0181,
"sparse_loss": 0.7786,
"step": 4160
},
{
"document_regularizer_loss": 0.0179,
"epoch": 1.3215301928548846,
"grad_norm": 9.232198715209961,
"learning_rate": 1.4851617084722384e-05,
"loss": 0.7243,
"query_regularizer_loss": 0.0179,
"sparse_loss": 0.6885,
"step": 4180
},
{
"document_regularizer_loss": 0.0179,
"epoch": 1.327853303825482,
"grad_norm": 6.141971588134766,
"learning_rate": 1.4606855105163509e-05,
"loss": 0.7487,
"query_regularizer_loss": 0.0179,
"sparse_loss": 0.713,
"step": 4200
},
{
"document_regularizer_loss": 0.0178,
"epoch": 1.3341764147960797,
"grad_norm": 7.69699239730835,
"learning_rate": 1.436329163750042e-05,
"loss": 0.789,
"query_regularizer_loss": 0.0178,
"sparse_loss": 0.7533,
"step": 4220
},
{
"document_regularizer_loss": 0.0179,
"epoch": 1.3404995257666772,
"grad_norm": 6.825509071350098,
"learning_rate": 1.412095476887289e-05,
"loss": 0.7696,
"query_regularizer_loss": 0.0179,
"sparse_loss": 0.7339,
"step": 4240
},
{
"document_regularizer_loss": 0.0177,
"epoch": 1.3468226367372749,
"grad_norm": 6.219385623931885,
"learning_rate": 1.3879872444972326e-05,
"loss": 0.7236,
"query_regularizer_loss": 0.0177,
"sparse_loss": 0.6883,
"step": 4260
},
{
"document_regularizer_loss": 0.0176,
"epoch": 1.3531457477078723,
"grad_norm": 9.367851257324219,
"learning_rate": 1.3640072466819087e-05,
"loss": 0.7761,
"query_regularizer_loss": 0.0176,
"sparse_loss": 0.7409,
"step": 4280
},
{
"document_regularizer_loss": 0.0174,
"epoch": 1.3594688586784698,
"grad_norm": 6.194346904754639,
"learning_rate": 1.3401582487556613e-05,
"loss": 0.7864,
"query_regularizer_loss": 0.0174,
"sparse_loss": 0.7516,
"step": 4300
},
{
"document_regularizer_loss": 0.0175,
"epoch": 1.3657919696490675,
"grad_norm": 6.247494697570801,
"learning_rate": 1.3164430009262479e-05,
"loss": 0.8002,
"query_regularizer_loss": 0.0175,
"sparse_loss": 0.7652,
"step": 4320
},
{
"document_regularizer_loss": 0.0175,
"epoch": 1.372115080619665,
"grad_norm": 5.456516265869141,
"learning_rate": 1.2928642379776946e-05,
"loss": 0.7939,
"query_regularizer_loss": 0.0175,
"sparse_loss": 0.759,
"step": 4340
},
{
"document_regularizer_loss": 0.0174,
"epoch": 1.3784381915902624,
"grad_norm": 6.565524578094482,
"learning_rate": 1.2694246789549268e-05,
"loss": 0.7647,
"query_regularizer_loss": 0.0174,
"sparse_loss": 0.73,
"step": 4360
},
{
"document_regularizer_loss": 0.017,
"epoch": 1.38476130256086,
"grad_norm": 5.862294673919678,
"learning_rate": 1.2461270268502138e-05,
"loss": 0.7741,
"query_regularizer_loss": 0.017,
"sparse_loss": 0.74,
"step": 4380
},
{
"document_regularizer_loss": 0.0168,
"epoch": 1.3910844135314575,
"grad_norm": 4.879271030426025,
"learning_rate": 1.2229739682914707e-05,
"loss": 0.7361,
"query_regularizer_loss": 0.0167,
"sparse_loss": 0.7026,
"step": 4400
},
{
"document_regularizer_loss": 0.0169,
"epoch": 1.397407524502055,
"grad_norm": 6.44930362701416,
"learning_rate": 1.1999681732324397e-05,
"loss": 0.7732,
"query_regularizer_loss": 0.0169,
"sparse_loss": 0.7395,
"step": 4420
},
{
"document_regularizer_loss": 0.0168,
"epoch": 1.4037306354726526,
"grad_norm": 5.632932662963867,
"learning_rate": 1.1771122946448002e-05,
"loss": 0.79,
"query_regularizer_loss": 0.0168,
"sparse_loss": 0.7563,
"step": 4440
},
{
"document_regularizer_loss": 0.0166,
"epoch": 1.41005374644325,
"grad_norm": 11.790848731994629,
"learning_rate": 1.1544089682122288e-05,
"loss": 0.7661,
"query_regularizer_loss": 0.0166,
"sparse_loss": 0.7329,
"step": 4460
},
{
"document_regularizer_loss": 0.0168,
"epoch": 1.4163768574138476,
"grad_norm": 4.643637657165527,
"learning_rate": 1.1318608120264676e-05,
"loss": 0.7779,
"query_regularizer_loss": 0.0168,
"sparse_loss": 0.7442,
"step": 4480
},
{
"document_regularizer_loss": 0.0168,
"epoch": 1.4226999683844452,
"grad_norm": 6.348916530609131,
"learning_rate": 1.1094704262854047e-05,
"loss": 0.7711,
"query_regularizer_loss": 0.0168,
"sparse_loss": 0.7375,
"step": 4500
},
{
"document_regularizer_loss": 0.0166,
"epoch": 1.4290230793550427,
"grad_norm": 5.77072811126709,
"learning_rate": 1.0872403929932312e-05,
"loss": 0.7952,
"query_regularizer_loss": 0.0166,
"sparse_loss": 0.7619,
"step": 4520
},
{
"document_regularizer_loss": 0.0166,
"epoch": 1.4353461903256401,
"grad_norm": 6.92957067489624,
"learning_rate": 1.0651732756626848e-05,
"loss": 0.7743,
"query_regularizer_loss": 0.0166,
"sparse_loss": 0.741,
"step": 4540
},
{
"document_regularizer_loss": 0.0165,
"epoch": 1.4416693012962378,
"grad_norm": 5.602739334106445,
"learning_rate": 1.0432716190194397e-05,
"loss": 0.72,
"query_regularizer_loss": 0.0165,
"sparse_loss": 0.6869,
"step": 4560
},
{
"document_regularizer_loss": 0.0165,
"epoch": 1.4479924122668353,
"grad_norm": 5.612238883972168,
"learning_rate": 1.0215379487086452e-05,
"loss": 0.7801,
"query_regularizer_loss": 0.0165,
"sparse_loss": 0.7471,
"step": 4580
},
{
"document_regularizer_loss": 0.0163,
"epoch": 1.4543155232374327,
"grad_norm": 5.197407245635986,
"learning_rate": 9.999747710036875e-06,
"loss": 0.7453,
"query_regularizer_loss": 0.0163,
"sparse_loss": 0.7127,
"step": 4600
},
{
"document_regularizer_loss": 0.0163,
"epoch": 1.4606386342080304,
"grad_norm": 4.72848653793335,
"learning_rate": 9.785845725171583e-06,
"loss": 0.7509,
"query_regularizer_loss": 0.0163,
"sparse_loss": 0.7184,
"step": 4620
},
{
"document_regularizer_loss": 0.0163,
"epoch": 1.4669617451786279,
"grad_norm": 5.626763820648193,
"learning_rate": 9.573698199141146e-06,
"loss": 0.7558,
"query_regularizer_loss": 0.0163,
"sparse_loss": 0.7232,
"step": 4640
},
{
"document_regularizer_loss": 0.0162,
"epoch": 1.4732848561492253,
"grad_norm": 6.345512390136719,
"learning_rate": 9.363329596276258e-06,
"loss": 0.7718,
"query_regularizer_loss": 0.0162,
"sparse_loss": 0.7394,
"step": 4660
},
{
"document_regularizer_loss": 0.0161,
"epoch": 1.479607967119823,
"grad_norm": 20.017658233642578,
"learning_rate": 9.15476417576656e-06,
"loss": 0.6954,
"query_regularizer_loss": 0.0161,
"sparse_loss": 0.6632,
"step": 4680
},
{
"document_regularizer_loss": 0.0159,
"epoch": 1.4859310780904205,
"grad_norm": 5.100090026855469,
"learning_rate": 8.948025988863163e-06,
"loss": 0.705,
"query_regularizer_loss": 0.0159,
"sparse_loss": 0.6732,
"step": 4700
},
{
"document_regularizer_loss": 0.0159,
"epoch": 1.492254189061018,
"grad_norm": 7.6336750984191895,
"learning_rate": 8.743138876105056e-06,
"loss": 0.751,
"query_regularizer_loss": 0.0159,
"sparse_loss": 0.7192,
"step": 4720
},
{
"document_regularizer_loss": 0.0159,
"epoch": 1.4985773000316156,
"grad_norm": 5.1957783699035645,
"learning_rate": 8.54012646456995e-06,
"loss": 0.765,
"query_regularizer_loss": 0.0159,
"sparse_loss": 0.7333,
"step": 4740
},
{
"document_regularizer_loss": 0.0158,
"epoch": 1.504900411002213,
"grad_norm": 6.796875953674316,
"learning_rate": 8.33901216514959e-06,
"loss": 0.7983,
"query_regularizer_loss": 0.0158,
"sparse_loss": 0.7667,
"step": 4760
},
{
"document_regularizer_loss": 0.0158,
"epoch": 1.5112235219728105,
"grad_norm": 6.2827959060668945,
"learning_rate": 8.139819169850152e-06,
"loss": 0.7716,
"query_regularizer_loss": 0.0158,
"sparse_loss": 0.74,
"step": 4780
},
{
"document_regularizer_loss": 0.0157,
"epoch": 1.5175466329434082,
"grad_norm": 23.407991409301758,
"learning_rate": 7.942570449117689e-06,
"loss": 0.7747,
"query_regularizer_loss": 0.0157,
"sparse_loss": 0.7434,
"step": 4800
},
{
"document_regularizer_loss": 0.0155,
"epoch": 1.5238697439140056,
"grad_norm": 5.709648132324219,
"learning_rate": 7.747288749189344e-06,
"loss": 0.7613,
"query_regularizer_loss": 0.0155,
"sparse_loss": 0.7302,
"step": 4820
},
{
"document_regularizer_loss": 0.0156,
"epoch": 1.530192854884603,
"grad_norm": 7.462285041809082,
"learning_rate": 7.553996589470214e-06,
"loss": 0.7962,
"query_regularizer_loss": 0.0156,
"sparse_loss": 0.7651,
"step": 4840
},
{
"document_regularizer_loss": 0.0155,
"epoch": 1.5365159658552008,
"grad_norm": 6.551488399505615,
"learning_rate": 7.362716259936572e-06,
"loss": 0.7893,
"query_regularizer_loss": 0.0155,
"sparse_loss": 0.7583,
"step": 4860
},
{
"document_regularizer_loss": 0.0156,
"epoch": 1.5428390768257982,
"grad_norm": 4.285749912261963,
"learning_rate": 7.173469818565334e-06,
"loss": 0.7291,
"query_regularizer_loss": 0.0156,
"sparse_loss": 0.6979,
"step": 4880
},
{
"document_regularizer_loss": 0.0156,
"epoch": 1.5491621877963957,
"grad_norm": 5.514683246612549,
"learning_rate": 6.986279088790468e-06,
"loss": 0.6982,
"query_regularizer_loss": 0.0155,
"sparse_loss": 0.6671,
"step": 4900
},
{
"document_regularizer_loss": 0.0154,
"epoch": 1.5554852987669934,
"grad_norm": 4.710526466369629,
"learning_rate": 6.801165656986317e-06,
"loss": 0.7057,
"query_regularizer_loss": 0.0154,
"sparse_loss": 0.6748,
"step": 4920
},
{
"document_regularizer_loss": 0.0155,
"epoch": 1.561808409737591,
"grad_norm": 6.305178642272949,
"learning_rate": 6.618150869978346e-06,
"loss": 0.7883,
"query_regularizer_loss": 0.0155,
"sparse_loss": 0.7574,
"step": 4940
},
{
"document_regularizer_loss": 0.0155,
"epoch": 1.5681315207081883,
"grad_norm": 7.8116044998168945,
"learning_rate": 6.43725583258147e-06,
"loss": 0.782,
"query_regularizer_loss": 0.0155,
"sparse_loss": 0.751,
"step": 4960
},
{
"document_regularizer_loss": 0.0153,
"epoch": 1.574454631678786,
"grad_norm": 6.840033531188965,
"learning_rate": 6.25850140516629e-06,
"loss": 0.7625,
"query_regularizer_loss": 0.0153,
"sparse_loss": 0.7318,
"step": 4980
},
{
"document_regularizer_loss": 0.0153,
"epoch": 1.5807777426493836,
"grad_norm": 4.293910980224609,
"learning_rate": 6.08190820125353e-06,
"loss": 0.7101,
"query_regularizer_loss": 0.0153,
"sparse_loss": 0.6795,
"step": 5000
},
{
"document_regularizer_loss": 0.0152,
"epoch": 1.5871008536199809,
"grad_norm": 4.924117088317871,
"learning_rate": 5.907496585136932e-06,
"loss": 0.7394,
"query_regularizer_loss": 0.0152,
"sparse_loss": 0.709,
"step": 5020
},
{
"document_regularizer_loss": 0.0152,
"epoch": 1.5934239645905786,
"grad_norm": 5.151610851287842,
"learning_rate": 5.735286669534912e-06,
"loss": 0.6894,
"query_regularizer_loss": 0.0152,
"sparse_loss": 0.659,
"step": 5040
},
{
"document_regularizer_loss": 0.0152,
"epoch": 1.5997470755611762,
"grad_norm": 6.865243434906006,
"learning_rate": 5.5652983132711946e-06,
"loss": 0.6992,
"query_regularizer_loss": 0.0152,
"sparse_loss": 0.6689,
"step": 5060
},
{
"document_regularizer_loss": 0.0152,
"epoch": 1.6060701865317735,
"grad_norm": 5.148654460906982,
"learning_rate": 5.397551118984756e-06,
"loss": 0.7032,
"query_regularizer_loss": 0.0152,
"sparse_loss": 0.6729,
"step": 5080
},
{
"document_regularizer_loss": 0.0151,
"epoch": 1.6123932975023711,
"grad_norm": 7.1046929359436035,
"learning_rate": 5.232064430869266e-06,
"loss": 0.7659,
"query_regularizer_loss": 0.0151,
"sparse_loss": 0.7356,
"step": 5100
},
{
"document_regularizer_loss": 0.0152,
"epoch": 1.6187164084729688,
"grad_norm": 6.476480484008789,
"learning_rate": 5.068857332442408e-06,
"loss": 0.7268,
"query_regularizer_loss": 0.0152,
"sparse_loss": 0.6965,
"step": 5120
},
{
"document_regularizer_loss": 0.0151,
"epoch": 1.6250395194435663,
"grad_norm": 5.051494598388672,
"learning_rate": 4.907948644345184e-06,
"loss": 0.6928,
"query_regularizer_loss": 0.0151,
"sparse_loss": 0.6627,
"step": 5140
},
{
"document_regularizer_loss": 0.015,
"epoch": 1.6313626304141637,
"grad_norm": 9.967106819152832,
"learning_rate": 4.7493569221715776e-06,
"loss": 0.7134,
"query_regularizer_loss": 0.015,
"sparse_loss": 0.6833,
"step": 5160
},
{
"document_regularizer_loss": 0.0149,
"epoch": 1.6376857413847614,
"grad_norm": 7.947037220001221,
"learning_rate": 4.593100454328744e-06,
"loss": 0.8233,
"query_regularizer_loss": 0.015,
"sparse_loss": 0.7934,
"step": 5180
},
{
"document_regularizer_loss": 0.0148,
"epoch": 1.6440088523553589,
"grad_norm": 5.4551825523376465,
"learning_rate": 4.439197259928082e-06,
"loss": 0.7258,
"query_regularizer_loss": 0.0148,
"sparse_loss": 0.6962,
"step": 5200
},
{
"document_regularizer_loss": 0.0148,
"epoch": 1.6503319633259563,
"grad_norm": 5.254171848297119,
"learning_rate": 4.2876650867072516e-06,
"loss": 0.653,
"query_regularizer_loss": 0.0148,
"sparse_loss": 0.6234,
"step": 5220
},
{
"document_regularizer_loss": 0.0148,
"epoch": 1.656655074296554,
"grad_norm": 59.719482421875,
"learning_rate": 4.1385214089836365e-06,
"loss": 0.764,
"query_regularizer_loss": 0.0148,
"sparse_loss": 0.7344,
"step": 5240
},
{
"document_regularizer_loss": 0.0148,
"epoch": 1.6629781852671515,
"grad_norm": 6.208061218261719,
"learning_rate": 3.991783425639148e-06,
"loss": 0.8153,
"query_regularizer_loss": 0.0148,
"sparse_loss": 0.7858,
"step": 5260
},
{
"document_regularizer_loss": 0.0148,
"epoch": 1.669301296237749,
"grad_norm": 5.488613128662109,
"learning_rate": 3.8474680581369635e-06,
"loss": 0.6717,
"query_regularizer_loss": 0.0148,
"sparse_loss": 0.6422,
"step": 5280
},
{
"document_regularizer_loss": 0.0147,
"epoch": 1.6756244072083466,
"grad_norm": 13.9483642578125,
"learning_rate": 3.7055919485701613e-06,
"loss": 0.7592,
"query_regularizer_loss": 0.0147,
"sparse_loss": 0.7298,
"step": 5300
},
{
"document_regularizer_loss": 0.0147,
"epoch": 1.681947518178944,
"grad_norm": 7.517942905426025,
"learning_rate": 3.5661714577425954e-06,
"loss": 0.7114,
"query_regularizer_loss": 0.0147,
"sparse_loss": 0.6821,
"step": 5320
},
{
"document_regularizer_loss": 0.0147,
"epoch": 1.6882706291495415,
"grad_norm": 4.885865211486816,
"learning_rate": 3.429222663282211e-06,
"loss": 0.7035,
"query_regularizer_loss": 0.0147,
"sparse_loss": 0.6741,
"step": 5340
},
{
"document_regularizer_loss": 0.0146,
"epoch": 1.6945937401201392,
"grad_norm": 6.0214715003967285,
"learning_rate": 3.2947613577870017e-06,
"loss": 0.702,
"query_regularizer_loss": 0.0146,
"sparse_loss": 0.6728,
"step": 5360
},
{
"document_regularizer_loss": 0.0147,
"epoch": 1.7009168510907366,
"grad_norm": 6.653244495391846,
"learning_rate": 3.162803047003865e-06,
"loss": 0.735,
"query_regularizer_loss": 0.0147,
"sparse_loss": 0.7057,
"step": 5380
},
{
"document_regularizer_loss": 0.0147,
"epoch": 1.707239962061334,
"grad_norm": 7.448154449462891,
"learning_rate": 3.0333629480404915e-06,
"loss": 0.7298,
"query_regularizer_loss": 0.0147,
"sparse_loss": 0.7005,
"step": 5400
},
{
"document_regularizer_loss": 0.0146,
"epoch": 1.7135630730319318,
"grad_norm": 5.637879848480225,
"learning_rate": 2.9064559876106097e-06,
"loss": 0.7082,
"query_regularizer_loss": 0.0145,
"sparse_loss": 0.6791,
"step": 5420
},
{
"document_regularizer_loss": 0.0145,
"epoch": 1.7198861840025292,
"grad_norm": 6.466550827026367,
"learning_rate": 2.7820968003126143e-06,
"loss": 0.693,
"query_regularizer_loss": 0.0146,
"sparse_loss": 0.6639,
"step": 5440
},
{
"document_regularizer_loss": 0.0146,
"epoch": 1.7262092949731267,
"grad_norm": 6.480766773223877,
"learning_rate": 2.660299726941995e-06,
"loss": 0.7466,
"query_regularizer_loss": 0.0146,
"sparse_loss": 0.7175,
"step": 5460
},
{
"document_regularizer_loss": 0.0145,
"epoch": 1.7325324059437244,
"grad_norm": 5.0478315353393555,
"learning_rate": 2.541078812837544e-06,
"loss": 0.691,
"query_regularizer_loss": 0.0145,
"sparse_loss": 0.6619,
"step": 5480
},
{
"document_regularizer_loss": 0.0145,
"epoch": 1.7388555169143218,
"grad_norm": 18.3403263092041,
"learning_rate": 2.4244478062617285e-06,
"loss": 0.8491,
"query_regularizer_loss": 0.0145,
"sparse_loss": 0.8201,
"step": 5500
},
{
"document_regularizer_loss": 0.0145,
"epoch": 1.7451786278849193,
"grad_norm": 4.109490871429443,
"learning_rate": 2.3104201568152406e-06,
"loss": 0.7267,
"query_regularizer_loss": 0.0145,
"sparse_loss": 0.6977,
"step": 5520
},
{
"document_regularizer_loss": 0.0145,
"epoch": 1.751501738855517,
"grad_norm": 5.1971540451049805,
"learning_rate": 2.1990090138860443e-06,
"loss": 0.6938,
"query_regularizer_loss": 0.0145,
"sparse_loss": 0.6649,
"step": 5540
},
{
"document_regularizer_loss": 0.0144,
"epoch": 1.7578248498261144,
"grad_norm": 5.344772815704346,
"learning_rate": 2.090227225132993e-06,
"loss": 0.7251,
"query_regularizer_loss": 0.0144,
"sparse_loss": 0.6962,
"step": 5560
},
{
"document_regularizer_loss": 0.0144,
"epoch": 1.7641479607967119,
"grad_norm": 6.859626770019531,
"learning_rate": 1.9840873350042975e-06,
"loss": 0.6835,
"query_regularizer_loss": 0.0144,
"sparse_loss": 0.6546,
"step": 5580
},
{
"document_regularizer_loss": 0.0144,
"epoch": 1.7704710717673096,
"grad_norm": 7.862534046173096,
"learning_rate": 1.8806015832909223e-06,
"loss": 0.7431,
"query_regularizer_loss": 0.0144,
"sparse_loss": 0.7143,
"step": 5600
},
{
"document_regularizer_loss": 0.0144,
"epoch": 1.776794182737907,
"grad_norm": 7.577550411224365,
"learning_rate": 1.7797819037151137e-06,
"loss": 0.7031,
"query_regularizer_loss": 0.0144,
"sparse_loss": 0.6744,
"step": 5620
},
{
"document_regularizer_loss": 0.0143,
"epoch": 1.7831172937085045,
"grad_norm": 5.902777194976807,
"learning_rate": 1.6816399225542512e-06,
"loss": 0.6999,
"query_regularizer_loss": 0.0143,
"sparse_loss": 0.6713,
"step": 5640
},
{
"document_regularizer_loss": 0.0143,
"epoch": 1.7894404046791021,
"grad_norm": 7.284171104431152,
"learning_rate": 1.5861869573000982e-06,
"loss": 0.7097,
"query_regularizer_loss": 0.0143,
"sparse_loss": 0.6811,
"step": 5660
},
{
"document_regularizer_loss": 0.0143,
"epoch": 1.7957635156496996,
"grad_norm": 5.959704875946045,
"learning_rate": 1.4934340153537424e-06,
"loss": 0.7125,
"query_regularizer_loss": 0.0143,
"sparse_loss": 0.6839,
"step": 5680
},
{
"document_regularizer_loss": 0.0143,
"epoch": 1.802086626620297,
"grad_norm": 6.5475568771362305,
"learning_rate": 1.4033917927562228e-06,
"loss": 0.7399,
"query_regularizer_loss": 0.0143,
"sparse_loss": 0.7113,
"step": 5700
},
{
"document_regularizer_loss": 0.0143,
"epoch": 1.8084097375908947,
"grad_norm": 4.185171604156494,
"learning_rate": 1.3160706729550886e-06,
"loss": 0.677,
"query_regularizer_loss": 0.0143,
"sparse_loss": 0.6484,
"step": 5720
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.8147328485614924,
"grad_norm": 25.05087661743164,
"learning_rate": 1.2314807256070093e-06,
"loss": 0.7428,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.7143,
"step": 5740
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.8210559595320897,
"grad_norm": 6.057918548583984,
"learning_rate": 1.1496317054165734e-06,
"loss": 0.7495,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.7211,
"step": 5760
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.8273790705026873,
"grad_norm": 5.152764797210693,
"learning_rate": 1.070533051011388e-06,
"loss": 0.7266,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.6981,
"step": 5780
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.833702181473285,
"grad_norm": 6.547135829925537,
"learning_rate": 9.94193883853653e-07,
"loss": 0.6984,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.67,
"step": 5800
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.8400252924438822,
"grad_norm": 4.288388729095459,
"learning_rate": 9.20623007188276e-07,
"loss": 0.7527,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.7243,
"step": 5820
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.84634840341448,
"grad_norm": 10.613944053649902,
"learning_rate": 8.498289050277331e-07,
"loss": 0.6564,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.6281,
"step": 5840
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.8526715143850776,
"grad_norm": 5.5082902908325195,
"learning_rate": 7.81819741173681e-07,
"loss": 0.7028,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.6744,
"step": 5860
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.8589946253556748,
"grad_norm": 10.326964378356934,
"learning_rate": 7.166033582755583e-07,
"loss": 0.7015,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.6732,
"step": 5880
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.8653177363262725,
"grad_norm": 4.219756603240967,
"learning_rate": 6.541872769261631e-07,
"loss": 0.7219,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.6936,
"step": 5900
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.8716408472968702,
"grad_norm": 6.046718597412109,
"learning_rate": 5.945786947944176e-07,
"loss": 0.7569,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.7285,
"step": 5920
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.8779639582674676,
"grad_norm": 6.10089635848999,
"learning_rate": 5.377844857953423e-07,
"loss": 0.6832,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.6548,
"step": 5940
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.884287069238065,
"grad_norm": 9.817451477050781,
"learning_rate": 4.838111992973627e-07,
"loss": 0.72,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.6916,
"step": 5960
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.8906101802086628,
"grad_norm": 6.2373433113098145,
"learning_rate": 4.3266505936708226e-07,
"loss": 0.6878,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.6595,
"step": 5980
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.8969332911792602,
"grad_norm": 5.504240036010742,
"learning_rate": 3.843519640514937e-07,
"loss": 0.6468,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.6185,
"step": 6000
},
{
"epoch": 1.8969332911792602,
"eval_runtime": 119.8241,
"eval_samples_per_second": 0.0,
"eval_sparse-ir-eval_avg_flops": 828.1011962890625,
"eval_sparse-ir-eval_corpus_active_dims": 1024.0,
"eval_sparse-ir-eval_corpus_sparsity_ratio": 0.9796696315120712,
"eval_sparse-ir-eval_dot_accuracy@1": 0.05598880223955209,
"eval_sparse-ir-eval_dot_accuracy@100": 0.41011797640471903,
"eval_sparse-ir-eval_dot_accuracy@50": 0.32113577284543093,
"eval_sparse-ir-eval_dot_accuracy@8": 0.15736852629474105,
"eval_sparse-ir-eval_dot_map@100": 0.09527335248492443,
"eval_sparse-ir-eval_dot_mrr@10": 0.08729730244427296,
"eval_sparse-ir-eval_dot_ndcg@10": 0.10700833927390947,
"eval_sparse-ir-eval_dot_precision@1": 0.05598880223955209,
"eval_sparse-ir-eval_dot_precision@100": 0.004101179764047191,
"eval_sparse-ir-eval_dot_precision@50": 0.006422715456908619,
"eval_sparse-ir-eval_dot_precision@8": 0.01967106578684263,
"eval_sparse-ir-eval_dot_recall@1": 0.05598880223955209,
"eval_sparse-ir-eval_dot_recall@100": 0.41011797640471903,
"eval_sparse-ir-eval_dot_recall@50": 0.32113577284543093,
"eval_sparse-ir-eval_dot_recall@8": 0.15736852629474105,
"eval_sparse-ir-eval_query_active_dims": 1024.0,
"eval_sparse-ir-eval_query_sparsity_ratio": 0.9796696315120712,
"eval_steps_per_second": 0.0,
"step": 6000
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.9032564021498577,
"grad_norm": 24.536941528320312,
"learning_rate": 3.388774846978804e-07,
"loss": 0.6901,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.6618,
"step": 6020
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.9095795131204554,
"grad_norm": 4.5274786949157715,
"learning_rate": 2.9624686531129766e-07,
"loss": 0.7066,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.6782,
"step": 6040
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.9159026240910528,
"grad_norm": 5.511288642883301,
"learning_rate": 2.5646502194988097e-07,
"loss": 0.6818,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.6535,
"step": 6060
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.9222257350616503,
"grad_norm": 5.5782904624938965,
"learning_rate": 2.1953654215791653e-07,
"loss": 0.735,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.7067,
"step": 6080
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.928548846032248,
"grad_norm": 8.528367042541504,
"learning_rate": 1.8546568443683077e-07,
"loss": 0.7364,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.708,
"step": 6100
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.9348719570028454,
"grad_norm": 6.339615821838379,
"learning_rate": 1.5425637775409728e-07,
"loss": 0.7485,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.7202,
"step": 6120
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.9411950679734429,
"grad_norm": 5.221982955932617,
"learning_rate": 1.2591222109017143e-07,
"loss": 0.7123,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.684,
"step": 6140
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.9475181789440406,
"grad_norm": 6.821455478668213,
"learning_rate": 1.0043648302345276e-07,
"loss": 0.7488,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.7205,
"step": 6160
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.953841289914638,
"grad_norm": 6.010687351226807,
"learning_rate": 7.783210135337282e-08,
"loss": 0.7161,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.6878,
"step": 6180
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.9601644008852355,
"grad_norm": 5.301513195037842,
"learning_rate": 5.810168276160211e-08,
"loss": 0.6795,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.6512,
"step": 6200
},
{
"document_regularizer_loss": 0.0141,
"epoch": 1.9664875118558331,
"grad_norm": 5.3588480949401855,
"learning_rate": 4.1247502511465585e-08,
"loss": 0.6925,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.6642,
"step": 6220
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.9728106228264306,
"grad_norm": 5.97401237487793,
"learning_rate": 2.7271504185558126e-08,
"loss": 0.8108,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.7825,
"step": 6240
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.979133733797028,
"grad_norm": 6.50218391418457,
"learning_rate": 1.6175299461615447e-08,
"loss": 0.7295,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.7012,
"step": 6260
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.9854568447676257,
"grad_norm": 6.084305286407471,
"learning_rate": 7.96016792666554e-09,
"loss": 0.7232,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.6949,
"step": 6280
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.9917799557382232,
"grad_norm": 5.208037376403809,
"learning_rate": 2.627056929460636e-09,
"loss": 0.7575,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.7292,
"step": 6300
},
{
"document_regularizer_loss": 0.0142,
"epoch": 1.9981030667088207,
"grad_norm": 5.697127819061279,
"learning_rate": 1.7658147123955637e-10,
"loss": 0.7006,
"query_regularizer_loss": 0.0142,
"sparse_loss": 0.6723,
"step": 6320
}
],
"logging_steps": 20,
"max_steps": 6326,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}